From 774cf23ee556cac320fb68fd553e78100a8a9855 Mon Sep 17 00:00:00 2001
From: ddh0 <chemist-mulches-39@icloud.com>
Date: Wed, 10 Dec 2025 22:13:58 -0600
Subject: [PATCH 01/38] initial commit for branch

---
 include/llama.h        |   9 +++
 src/llama-sampling.cpp | 134 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 143 insertions(+)
diff --git a/include/llama.h b/include/llama.h
index b52eaacfa7..7e1e65523b 100644
--- a/include/llama.h
+++ b/include/llama.h
@@ -1289,6 +1289,15 @@ extern "C" {
                           const char ** seq_breakers,
                               size_t    num_breakers);
 
+    /// @details power law sampler, reshapes probability distribution to target specific probability ranges
+    /// ref: https://github.com/MrJackSpade/llama.cpp
+    /// ref: [PR]
+    LLAMA_API struct llama_sampler * llama_sampler_init_power_law(
+                                float    target,       // target probability (0.0 to 1.0)
+                                float    target_range, // adaptive target range (±range from target)
+                                int32_t  queue_size,   // rolling history window size for adaptation
+                                uint32_t seed);        // RNG seed
+
     LLAMA_API struct llama_sampler * llama_sampler_init_logit_bias(
                              int32_t   n_vocab,
                              int32_t   n_logit_bias,
diff --git a/src/llama-sampling.cpp b/src/llama-sampling.cpp
index 3f4a729bc3..6ef8121d7c 100644
--- a/src/llama-sampling.cpp
+++ b/src/llama-sampling.cpp
@@ -2313,6 +2313,140 @@ struct llama_sampler * llama_sampler_init_dry_testing(int32_t context_size, floa
     return result;
 }
 
+// power-law
+// ref: https://github.com/MrJackSpade/llama.cpp/tree/master
+// ref: [PR]
+
+struct llama_sampler_power_law {
+    const float    target;
+    const float    target_range;
+    const int32_t  queue_size;
+    const uint32_t seed;
+
+    std::mt19937       rng;
+    ring_buffer<float> history;
+};
+
+static const char * llama_sampler_power_law_name(const struct llama_sampler * /*smpl*/) {
+    return "power-law";
+}
+
+static void llama_sampler_power_law_apply(struct llama_sampler * smpl, llama_token_data_array * cur_p) {
+    auto * ctx = (llama_sampler_power_law *) smpl->ctx;
+
+    // these don't need to be modified or exposed to the user
+    const float peak_logit_value   = 3.0f;
+    const float tail_heaviness     = 3.0f;
+
+    const float min_target = ctx->target - ctx->target_range;
+    const float max_target = ctx->target + ctx->target_range;
+
+    // compute probabilities to get the "original" values
+    llama_sampler_softmax_impl(cur_p, false);
+
+    // store original probabilities (needed for history update)
+    std::vector<float> original_probs;
+    original_probs.reserve(cur_p->size);
+    for (size_t i = 0; i < cur_p->size; ++i) {
+        original_probs.push_back(cur_p->data[i].p);
+    }
+
+    // calculate adaptive target
+    float computed_target = ctx->target;
+    if (ctx->history.size() > 0) {
+        float sum_excluding_oldest = 0.0f;
+        size_t sz = ctx->history.size();
+
+        // sum all except the oldest element
+        for (size_t i = 0; i < sz - 1; ++i) {
+            sum_excluding_oldest += ctx->history.rat(i);
+        }
+
+        float next_value = (ctx->target * ctx->queue_size) - sum_excluding_oldest;
+        computed_target = std::max(min_target, std::min(next_value, max_target));
+    }
+
+    // find closest token (for degenerate width ~ 0 case)
+    float min_distance      = FLT_MAX;
+    int   closest_token_idx = -1;
+
+    for (size_t i = 0; i < cur_p->size; ++i) {
+        float distance = std::abs(cur_p->data[i].p - computed_target);
+        if (distance < min_distance) {
+            min_distance      = distance;
+            closest_token_idx = (int) i;
+        }
+    }
+
+    // apply power law transformation
+    for (size_t i = 0; i < cur_p->size; ++i) {
+        float p = cur_p->data[i].p;
+
+        float distance            = std::abs(p - computed_target);
+        float normalized_distance = distance / 0.2f;
+        cur_p->data[i].logit      = peak_logit_value / (1.0f + std::pow(normalized_distance, tail_heaviness));
+    }
+
+    llama_sampler_softmax_impl(cur_p, false);
+
+    // sample from distribution
+    const int idx = llama_sample_dist(cur_p, ctx->rng);
+
+    // set sampled token
+    cur_p->selected = idx;
+
+    // update history with ORIGINAL probability
+    ctx->history.push_back(original_probs[idx]);
+}
+
+static void llama_sampler_power_law_reset(struct llama_sampler * smpl) {
+    auto * ctx    = (llama_sampler_power_law *) smpl->ctx;
+    ctx->history  = ring_buffer<float>(ctx->queue_size);
+}
+
+static struct llama_sampler * llama_sampler_power_law_clone(const struct llama_sampler * smpl) {
+    const auto * ctx  = (const llama_sampler_power_law *) smpl->ctx;
+    auto * result     = llama_sampler_init_power_law(ctx->target, ctx->target_range, ctx->queue_size, ctx->seed);
+    auto * result_ctx = (llama_sampler_power_law *) result->ctx;
+
+    result_ctx->history  = ctx->history;
+
+    return result;
+}
+
+static void llama_sampler_power_law_free(struct llama_sampler * smpl) {
+    delete (llama_sampler_power_law *) smpl->ctx;
+}
+
+static struct llama_sampler_i llama_sampler_power_law_i = {
+    /* .name   = */ llama_sampler_power_law_name,
+    /* .accept = */ nullptr,
+    /* .apply  = */ llama_sampler_power_law_apply,
+    /* .reset  = */ llama_sampler_power_law_reset,
+    /* .clone  = */ llama_sampler_power_law_clone,
+    /* .free   = */ llama_sampler_power_law_free,
+};
+
+struct llama_sampler * llama_sampler_init_power_law(
+    float    target,
+    float    target_range,
+    int32_t  queue_size,
+    uint32_t seed
+) {
+    auto seed_cur = get_rng_seed(seed);
+    return llama_sampler_init(
+        /* .iface = */ &llama_sampler_power_law_i,
+        /* .ctx   = */ new llama_sampler_power_law {
+            /* .target       = */ target,
+            /* .target_range = */ target_range,
+            /* .queue_size   = */ queue_size,
+            /* .seed         = */ seed_cur,
+            /* .rng          = */ std::mt19937(seed_cur),
+            /* .history      = */ ring_buffer<float>(queue_size),
+        }
+    );
+}
+
 // logit-bias
 
 struct llama_sampler_logit_bias {

From 5ab4ff7e445266f63929617c4f77cb518d24e7ae Mon Sep 17 00:00:00 2001
From: ddh0 <chemist-mulches-39@icloud.com>
Date: Wed, 10 Dec 2025 22:30:14 -0600
Subject: [PATCH 02/38] simplify constants

---
 src/llama-sampling.cpp | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/src/llama-sampling.cpp b/src/llama-sampling.cpp
index 6ef8121d7c..173f660c73 100644
--- a/src/llama-sampling.cpp
+++ b/src/llama-sampling.cpp
@@ -2334,10 +2334,6 @@ static const char * llama_sampler_power_law_name(const struct llama_sampler * /*
 static void llama_sampler_power_law_apply(struct llama_sampler * smpl, llama_token_data_array * cur_p) {
     auto * ctx = (llama_sampler_power_law *) smpl->ctx;
 
-    // these don't need to be modified or exposed to the user
-    const float peak_logit_value   = 3.0f;
-    const float tail_heaviness     = 3.0f;
-
     const float min_target = ctx->target - ctx->target_range;
     const float max_target = ctx->target + ctx->target_range;
 
@@ -2382,9 +2378,8 @@ static void llama_sampler_power_law_apply(struct llama_sampler * smpl, llama_tok
     for (size_t i = 0; i < cur_p->size; ++i) {
         float p = cur_p->data[i].p;
 
-        float distance            = std::abs(p - computed_target);
-        float normalized_distance = distance / 0.2f;
-        cur_p->data[i].logit      = peak_logit_value / (1.0f + std::pow(normalized_distance, tail_heaviness));
+        float normalized_distance = std::abs(p - computed_target) / 0.2f;
+        cur_p->data[i].logit      = 3.0f / (1.0f + std::pow(normalized_distance, 3.0f));
     }
 
     llama_sampler_softmax_impl(cur_p, false);

From 88fb0f3f3288724eada8b1212ed6b8bd4552ac33 Mon Sep 17 00:00:00 2001
From: ddh0 <chemist-mulches-39@icloud.com>
Date: Thu, 11 Dec 2025 13:47:51 -0600
Subject: [PATCH 03/38] add params to `struct common_params_sampling`, add
 reference to PR

---
 common/common.h        | 58 ++++++++++++++++++++++--------------------
 include/llama.h        | 10 ++++----
 src/llama-sampling.cpp |  5 ++--
 3 files changed, 39 insertions(+), 34 deletions(-)

diff --git a/common/common.h b/common/common.h
index 2fd83f0cf9..e6d8af4b73 100644
--- a/common/common.h
+++ b/common/common.h
@@ -116,6 +116,7 @@ enum common_sampler_type {
     COMMON_SAMPLER_TYPE_INFILL      = 9,
     COMMON_SAMPLER_TYPE_PENALTIES   = 10,
     COMMON_SAMPLER_TYPE_TOP_N_SIGMA = 11,
+    COMMON_SAMPLER_TYPE_POWER_LAW   = 12,
 };
 
 // dimensionality reduction methods, used by cvector-generator
@@ -163,33 +164,36 @@ enum common_params_sampling_config : uint64_t {
 struct common_params_sampling {
     uint32_t seed = LLAMA_DEFAULT_SEED; // the seed used to initialize llama_sampler
 
-    int32_t n_prev             = 64;    // number of previous tokens to remember
-    int32_t n_probs            = 0;     // if greater than 0, output the probabilities of top n_probs tokens.
-    int32_t min_keep           = 0;     // 0 = disabled, otherwise samplers should return at least min_keep tokens
-    int32_t top_k              = 40;    // <= 0 to use vocab size
-    float   top_p              = 0.95f; // 1.0 = disabled
-    float   min_p              = 0.05f; // 0.0 = disabled
-    float   xtc_probability    = 0.00f; // 0.0 = disabled
-    float   xtc_threshold      = 0.10f; // > 0.5 disables XTC
-    float   typ_p              = 1.00f; // typical_p, 1.0 = disabled
-    float   temp               = 0.80f; // <= 0.0 to sample greedily, 0.0 to not output probabilities
-    float   dynatemp_range     = 0.00f; // 0.0 = disabled
-    float   dynatemp_exponent  = 1.00f; // controls how entropy maps to temperature in dynamic temperature sampler
-    int32_t penalty_last_n     = 64;    // last n tokens to penalize (0 = disable penalty, -1 = context size)
-    float   penalty_repeat     = 1.00f; // 1.0 = disabled
-    float   penalty_freq       = 0.00f; // 0.0 = disabled
-    float   penalty_present    = 0.00f; // 0.0 = disabled
-    float   dry_multiplier     = 0.0f;  // 0.0 = disabled;      DRY repetition penalty for tokens extending repetition:
-    float   dry_base           = 1.75f; // 0.0 = disabled;      multiplier * base ^ (length of sequence before token - allowed length)
-    int32_t dry_allowed_length = 2;     // tokens extending repetitions beyond this receive penalty
-    int32_t dry_penalty_last_n = -1;    // how many tokens to scan for repetitions (0 = disable penalty, -1 = context size)
-    int32_t mirostat           = 0;     // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
-    float   top_n_sigma        = -1.00f;// -1.0 = disabled
-    float   mirostat_tau       = 5.00f; // target entropy
-    float   mirostat_eta       = 0.10f; // learning rate
-    bool    ignore_eos         = false;
-    bool    no_perf            = false; // disable performance metrics
-    bool    timing_per_token   = false;
+    int32_t n_prev                 = 64;     // number of previous tokens to remember
+    int32_t n_probs                = 0;      // if greater than 0, output the probabilities of top n_probs tokens.
+    int32_t min_keep               = 0;      // 0 = disabled, otherwise samplers should return at least min_keep tokens
+    int32_t top_k                  = 40;     // <= 0 to use vocab size
+    float   top_p                  = 0.95f;  // 1.0 = disabled
+    float   min_p                  = 0.05f;  // 0.0 = disabled
+    float   xtc_probability        = 0.00f;  // 0.0 = disabled
+    float   xtc_threshold          = 0.10f;  // > 0.5 disables XTC
+    float   typ_p                  = 1.00f;  // typical_p, 1.0 = disabled
+    float   temp                   = 0.80f;  // <= 0.0 to sample greedily, 0.0 to not output probabilities
+    float   dynatemp_range         = 0.00f;  // 0.0 = disabled
+    float   dynatemp_exponent      = 1.00f;  // controls how entropy maps to temperature in dynamic temperature sampler
+    int32_t penalty_last_n         = 64;     // last n tokens to penalize (0 = disable penalty, -1 = context size)
+    float   penalty_repeat         = 1.00f;  // 1.0 = disabled
+    float   penalty_freq           = 0.00f;  // 0.0 = disabled
+    float   penalty_present        = 0.00f;  // 0.0 = disabled
+    float   dry_multiplier         = 0.0f;   // 0.0 = disabled;      DRY repetition penalty for tokens extending repetition:
+    float   dry_base               = 1.75f;  // 0.0 = disabled;      multiplier * base ^ (length of sequence before token - allowed length)
+    int32_t dry_allowed_length     = 2;      // tokens extending repetitions beyond this receive penalty
+    int32_t dry_penalty_last_n     = -1;     // how many tokens to scan for repetitions (0 = disable penalty, -1 = context size)
+    float   power_law_target       = 0.5;    // target probability (0.0 to 1.0)
+    float   power_law_target_range = 0.5;    // adapt the target within this range (target +/- range)
+    int32_t power_law_queue_size   = 10;     // rolling history window size for adaptation
+    int32_t mirostat               = 0;      // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
+    float   top_n_sigma            = -1.00f; // -1.0 = disabled
+    float   mirostat_tau           = 5.00f;  // target entropy
+    float   mirostat_eta           = 0.10f;  // learning rate
+    bool    ignore_eos             = false;
+    bool    no_perf                = false;  // disable performance metrics
+    bool    timing_per_token       = false;
 
     uint64_t user_sampling_config = 0; // bitfield to track user-specified samplers
 
diff --git a/include/llama.h b/include/llama.h
index 7e1e65523b..3adfdb9993 100644
--- a/include/llama.h
+++ b/include/llama.h
@@ -1291,12 +1291,12 @@ extern "C" {
 
     /// @details power law sampler, reshapes probability distribution to target specific probability ranges
     /// ref: https://github.com/MrJackSpade/llama.cpp
-    /// ref: [PR]
+    /// ref: https://github.com/ggml-org/llama.cpp/pull/17927
     LLAMA_API struct llama_sampler * llama_sampler_init_power_law(
-                                float    target,       // target probability (0.0 to 1.0)
-                                float    target_range, // adaptive target range (±range from target)
-                                int32_t  queue_size,   // rolling history window size for adaptation
-                                uint32_t seed);        // RNG seed
+                               float    target,       // target probability (0.0 to 1.0)
+                               float    target_range, // adaptive target range (±range from target)
+                             int32_t    queue_size,   // rolling history window size for adaptation
+                            uint32_t    seed);        // RNG seed
 
     LLAMA_API struct llama_sampler * llama_sampler_init_logit_bias(
                              int32_t   n_vocab,
diff --git a/src/llama-sampling.cpp b/src/llama-sampling.cpp
index 173f660c73..fb488acffe 100644
--- a/src/llama-sampling.cpp
+++ b/src/llama-sampling.cpp
@@ -2315,7 +2315,7 @@ struct llama_sampler * llama_sampler_init_dry_testing(int32_t context_size, floa
 
 // power-law
 // ref: https://github.com/MrJackSpade/llama.cpp/tree/master
-// ref: [PR]
+// ref: https://github.com/ggml-org/llama.cpp/pull/17927
 
 struct llama_sampler_power_law {
     const float    target;
@@ -2404,7 +2404,8 @@ static struct llama_sampler * llama_sampler_power_law_clone(const struct llama_s
     auto * result     = llama_sampler_init_power_law(ctx->target, ctx->target_range, ctx->queue_size, ctx->seed);
     auto * result_ctx = (llama_sampler_power_law *) result->ctx;
 
-    result_ctx->history  = ctx->history;
+    result_ctx->rng     = ctx->rng;
+    result_ctx->history = ctx->history;
 
     return result;
 }

From 374bfd43634e2ab2b42957243fa0a8295dd8de99 Mon Sep 17 00:00:00 2001
From: ddh0 <chemist-mulches-39@icloud.com>
Date: Thu, 11 Dec 2025 14:22:58 -0600
Subject: [PATCH 04/38] explicitly clamp `min_target` and `max_target` to
 `[0.0, 1.0]`

---
 src/llama-sampling.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/llama-sampling.cpp b/src/llama-sampling.cpp
index fb488acffe..eacad79448 100644
--- a/src/llama-sampling.cpp
+++ b/src/llama-sampling.cpp
@@ -2334,8 +2334,9 @@ static const char * llama_sampler_power_law_name(const struct llama_sampler * /*
 static void llama_sampler_power_law_apply(struct llama_sampler * smpl, llama_token_data_array * cur_p) {
     auto * ctx = (llama_sampler_power_law *) smpl->ctx;
 
-    const float min_target = ctx->target - ctx->target_range;
-    const float max_target = ctx->target + ctx->target_range;
+    // clamp the target range to [0.0, 1.0]
+    const float min_target = std::max(ctx->target - ctx->target_range, 0.0f);
+    const float max_target = std::min(ctx->target + ctx->target_range, 1.0f);
 
     // compute probabilities to get the "original" values
     llama_sampler_softmax_impl(cur_p, false);

From ffe163911be3201f303c40cf18df431ce14e6e71 Mon Sep 17 00:00:00 2001
From: ddh0 <chemist-mulches-39@icloud.com>
Date: Thu, 11 Dec 2025 15:16:11 -0600
Subject: [PATCH 05/38] add args, rename `queue_size` -> `window_size`

---
 common/arg.cpp         | 23 +++++++++++++++++++++++
 common/common.h        |  2 +-
 include/llama.h        |  4 ++--
 src/llama-sampling.cpp | 14 +++++++-------
 4 files changed, 33 insertions(+), 10 deletions(-)

diff --git a/common/arg.cpp b/common/arg.cpp
index a31dcbc689..4210633398 100644
--- a/common/arg.cpp
+++ b/common/arg.cpp
@@ -1501,6 +1501,29 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
             }
         }
     ).set_sparam());
+    add_opt(common_arg(
+        {"--power-law-target"}, "N",
+        string_format("Power Law sampler target probability (default: %.2f, 0.0 to 1.0)\n"
+                      "[(more info)](https://github.com/ggml-org/llama.cpp/pull/17927)",
+                      (double)params.sampling.power_law_target),
+        [](common_params & params, const std::string & value) {
+            params.sampling.power_law_target = std::stof(value);
+        }
+    ).set_sparam());
+    add_opt(common_arg(
+        {"--power-law-target-range"}, "N",
+        string_format("Power Law sampler adaptive range +/- from target (default: %.2f, 0.0 = no adaptation)", (double)params.sampling.power_law_target_range),
+        [](common_params & params, const std::string & value) {
+            params.sampling.power_law_target_range = std::stof(value);
+        }
+    ).set_sparam());
+    add_opt(common_arg(
+        {"--power-law-window-size"}, "N",
+        string_format("Power Law sampler rolling window size, in tokens (default: %d)", params.sampling.power_law_window_size),
+        [](common_params & params, int value) {
+            params.sampling.power_law_window_size = value;
+        }
+    ).set_sparam());
     add_opt(common_arg(
         {"--dynatemp-range"}, "N",
         string_format("dynamic temperature range (default: %.1f, 0.0 = disabled)", (double)params.sampling.dynatemp_range),
diff --git a/common/common.h b/common/common.h
index e6d8af4b73..d4f1229a7e 100644
--- a/common/common.h
+++ b/common/common.h
@@ -186,7 +186,7 @@ struct common_params_sampling {
     int32_t dry_penalty_last_n     = -1;     // how many tokens to scan for repetitions (0 = disable penalty, -1 = context size)
     float   power_law_target       = 0.5;    // target probability (0.0 to 1.0)
     float   power_law_target_range = 0.5;    // adapt the target within this range (target +/- range)
-    int32_t power_law_queue_size   = 10;     // rolling history window size for adaptation
+    int32_t power_law_window_size  = 10;     // rolling history window size for target adaptation
     int32_t mirostat               = 0;      // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
     float   top_n_sigma            = -1.00f; // -1.0 = disabled
     float   mirostat_tau           = 5.00f;  // target entropy
diff --git a/include/llama.h b/include/llama.h
index 3adfdb9993..1aace655d0 100644
--- a/include/llama.h
+++ b/include/llama.h
@@ -1294,8 +1294,8 @@ extern "C" {
     /// ref: https://github.com/ggml-org/llama.cpp/pull/17927
     LLAMA_API struct llama_sampler * llama_sampler_init_power_law(
                                float    target,       // target probability (0.0 to 1.0)
-                               float    target_range, // adaptive target range (±range from target)
-                             int32_t    queue_size,   // rolling history window size for adaptation
+                               float    target_range, // adaptive target range (+/- range from target)
+                             int32_t    window_size,  // rolling history window size for target adaptation
                             uint32_t    seed);        // RNG seed
 
     LLAMA_API struct llama_sampler * llama_sampler_init_logit_bias(
diff --git a/src/llama-sampling.cpp b/src/llama-sampling.cpp
index eacad79448..e2c229cd9f 100644
--- a/src/llama-sampling.cpp
+++ b/src/llama-sampling.cpp
@@ -2320,7 +2320,7 @@ struct llama_sampler * llama_sampler_init_dry_testing(int32_t context_size, floa
 struct llama_sampler_power_law {
     const float    target;
     const float    target_range;
-    const int32_t  queue_size;
+    const int32_t  window_size;
     const uint32_t seed;
 
     std::mt19937       rng;
@@ -2359,7 +2359,7 @@ static void llama_sampler_power_law_apply(struct llama_sampler * smpl, llama_tok
             sum_excluding_oldest += ctx->history.rat(i);
         }
 
-        float next_value = (ctx->target * ctx->queue_size) - sum_excluding_oldest;
+        float next_value = (ctx->target * ctx->window_size) - sum_excluding_oldest;
         computed_target = std::max(min_target, std::min(next_value, max_target));
     }
 
@@ -2397,12 +2397,12 @@ static void llama_sampler_power_law_apply(struct llama_sampler * smpl, llama_tok
 
 static void llama_sampler_power_law_reset(struct llama_sampler * smpl) {
     auto * ctx    = (llama_sampler_power_law *) smpl->ctx;
-    ctx->history  = ring_buffer<float>(ctx->queue_size);
+    ctx->history  = ring_buffer<float>(ctx->window_size);
 }
 
 static struct llama_sampler * llama_sampler_power_law_clone(const struct llama_sampler * smpl) {
     const auto * ctx  = (const llama_sampler_power_law *) smpl->ctx;
-    auto * result     = llama_sampler_init_power_law(ctx->target, ctx->target_range, ctx->queue_size, ctx->seed);
+    auto * result     = llama_sampler_init_power_law(ctx->target, ctx->target_range, ctx->window_size, ctx->seed);
     auto * result_ctx = (llama_sampler_power_law *) result->ctx;
 
     result_ctx->rng     = ctx->rng;
@@ -2427,7 +2427,7 @@ static struct llama_sampler_i llama_sampler_power_law_i = {
 struct llama_sampler * llama_sampler_init_power_law(
     float    target,
     float    target_range,
-    int32_t  queue_size,
+    int32_t  window_size,
     uint32_t seed
 ) {
     auto seed_cur = get_rng_seed(seed);
@@ -2436,10 +2436,10 @@ struct llama_sampler * llama_sampler_init_power_law(
         /* .ctx   = */ new llama_sampler_power_law {
             /* .target       = */ target,
             /* .target_range = */ target_range,
-            /* .queue_size   = */ queue_size,
+            /* .window_size  = */ window_size,
             /* .seed         = */ seed_cur,
             /* .rng          = */ std::mt19937(seed_cur),
-            /* .history      = */ ring_buffer<float>(queue_size),
+            /* .history      = */ ring_buffer<float>(window_size),
         }
     );
 }

From 4959878a748be461f0bf1e7fecfe93694d5eaba4 Mon Sep 17 00:00:00 2001
From: ddh0 <chemist-mulches-39@icloud.com>
Date: Thu, 11 Dec 2025 16:27:14 -0600
Subject: [PATCH 06/38] improved comments

---
 common/arg.cpp         |  2 +-
 include/llama.h        | 15 +++++++++++----
 src/llama-sampling.cpp | 11 +++++++++--
 3 files changed, 21 insertions(+), 7 deletions(-)

diff --git a/common/arg.cpp b/common/arg.cpp
index 4210633398..eac7454768 100644
--- a/common/arg.cpp
+++ b/common/arg.cpp
@@ -1512,7 +1512,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
     ).set_sparam());
     add_opt(common_arg(
         {"--power-law-target-range"}, "N",
-        string_format("Power Law sampler adaptive range +/- from target (default: %.2f, 0.0 = no adaptation)", (double)params.sampling.power_law_target_range),
+        string_format("Power Law sampler adaptive target range (target±range) (default: %.2f, 0.0 = fixed target)", (double)params.sampling.power_law_target_range),
         [](common_params & params, const std::string & value) {
             params.sampling.power_law_target_range = std::stof(value);
         }
diff --git a/include/llama.h b/include/llama.h
index 1aace655d0..95df1058cc 100644
--- a/include/llama.h
+++ b/include/llama.h
@@ -1289,12 +1289,19 @@ extern "C" {
                           const char ** seq_breakers,
                               size_t    num_breakers);
 
-    /// @details power law sampler, reshapes probability distribution to target specific probability ranges
-    /// ref: https://github.com/MrJackSpade/llama.cpp
-    /// ref: https://github.com/ggml-org/llama.cpp/pull/17927
+    /// @details power-law sampler - reshapes probability distribution to target specific probability ranges
+    ///
+    /// this sampler is like `greedy`, `dist`, and `mirostat` in that it actually selects a token ID
+    /// rather than just transforming logits. therefore it must always be the last sampler in the
+    /// sampler chain.
+    ///
+    /// it is recommended to only perform minimal truncation before this sampler.
+    ///
+    /// ref: https://github.com/MrJackSpade/llama.cpp/tree/master (original impl, documentation)
+    /// ref: https://github.com/ggml-org/llama.cpp/pull/17927     (llama.cpp PR)
     LLAMA_API struct llama_sampler * llama_sampler_init_power_law(
                                float    target,       // target probability (0.0 to 1.0)
-                               float    target_range, // adaptive target range (+/- range from target)
+                               float    target_range, // adaptive target range (target±range)
                              int32_t    window_size,  // rolling history window size for target adaptation
                             uint32_t    seed);        // RNG seed
 
diff --git a/src/llama-sampling.cpp b/src/llama-sampling.cpp
index e2c229cd9f..0b591d60a8 100644
--- a/src/llama-sampling.cpp
+++ b/src/llama-sampling.cpp
@@ -2314,8 +2314,15 @@ struct llama_sampler * llama_sampler_init_dry_testing(int32_t context_size, floa
 }
 
 // power-law
-// ref: https://github.com/MrJackSpade/llama.cpp/tree/master
-// ref: https://github.com/ggml-org/llama.cpp/pull/17927
+//
+// this sampler is like `greedy`, `dist`, and `mirostat` in that it actually selects a token ID
+// rather than just transforming logits. therefore it must always be the last sampler in the
+// sampler chain.
+//
+// it is recommended to only perform minimal truncation before this sampler.
+//
+// ref: https://github.com/MrJackSpade/llama.cpp/tree/master (original impl, documentation)
+// ref: https://github.com/ggml-org/llama.cpp/pull/17927     (llama.cpp PR)
 
 struct llama_sampler_power_law {
     const float    target;

From f3457a83e653b85074dff573ee723069f7cf1fed Mon Sep 17 00:00:00 2001
From: ddh0 <chemist-mulches-39@icloud.com>
Date: Thu, 11 Dec 2025 16:36:00 -0600
Subject: [PATCH 07/38] minor

---
 common/arg.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/common/arg.cpp b/common/arg.cpp
index eac7454768..18259c72c2 100644
--- a/common/arg.cpp
+++ b/common/arg.cpp
@@ -1503,7 +1503,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
     ).set_sparam());
     add_opt(common_arg(
         {"--power-law-target"}, "N",
-        string_format("Power Law sampler target probability (default: %.2f, 0.0 to 1.0)\n"
+        string_format("Power Law sampler target probability (default: %.2f; allowed range 0.0 to 1.0)\n"
                       "[(more info)](https://github.com/ggml-org/llama.cpp/pull/17927)",
                       (double)params.sampling.power_law_target),
         [](common_params & params, const std::string & value) {
@@ -1512,7 +1512,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
     ).set_sparam());
     add_opt(common_arg(
         {"--power-law-target-range"}, "N",
-        string_format("Power Law sampler adaptive target range (target±range) (default: %.2f, 0.0 = fixed target)", (double)params.sampling.power_law_target_range),
+        string_format("Power Law sampler adaptive target range (target±range) (default: %.2f; 0.0 = fixed target)", (double)params.sampling.power_law_target_range),
         [](common_params & params, const std::string & value) {
             params.sampling.power_law_target_range = std::stof(value);
         }

From 93169593b8f4571df120f6e8dbf8c21185a589ff Mon Sep 17 00:00:00 2001
From: ddh0 <chemist-mulches-39@icloud.com>
Date: Thu, 11 Dec 2025 16:46:17 -0600
Subject: [PATCH 08/38] remove old unused code from algorithm

---
 src/llama-sampling.cpp | 15 +--------------
 1 file changed, 1 insertion(+), 14 deletions(-)

diff --git a/src/llama-sampling.cpp b/src/llama-sampling.cpp
index 0b591d60a8..b61202c636 100644
--- a/src/llama-sampling.cpp
+++ b/src/llama-sampling.cpp
@@ -2370,24 +2370,11 @@ static void llama_sampler_power_law_apply(struct llama_sampler * smpl, llama_tok
         computed_target = std::max(min_target, std::min(next_value, max_target));
     }
 
-    // find closest token (for degenerate width ~ 0 case)
-    float min_distance      = FLT_MAX;
-    int   closest_token_idx = -1;
-
-    for (size_t i = 0; i < cur_p->size; ++i) {
-        float distance = std::abs(cur_p->data[i].p - computed_target);
-        if (distance < min_distance) {
-            min_distance      = distance;
-            closest_token_idx = (int) i;
-        }
-    }
-
     // apply power law transformation
     for (size_t i = 0; i < cur_p->size; ++i) {
         float p = cur_p->data[i].p;
-
         float normalized_distance = std::abs(p - computed_target) / 0.2f;
-        cur_p->data[i].logit      = 3.0f / (1.0f + std::pow(normalized_distance, 3.0f));
+        cur_p->data[i].logit = 3.0f / (1.0f + std::pow(normalized_distance, 3.0f));
     }
 
     llama_sampler_softmax_impl(cur_p, false);

From b3aea5776865d09bda4f35729ee367b70cb47f64 Mon Sep 17 00:00:00 2001
From: ddh0 <chemist-mulches-39@icloud.com>
Date: Thu, 11 Dec 2025 16:48:52 -0600
Subject: [PATCH 09/38] minor

---
 src/llama-sampling.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/llama-sampling.cpp b/src/llama-sampling.cpp
index b61202c636..06a1eef148 100644
--- a/src/llama-sampling.cpp
+++ b/src/llama-sampling.cpp
@@ -2390,8 +2390,8 @@ static void llama_sampler_power_law_apply(struct llama_sampler * smpl, llama_tok
 }
 
 static void llama_sampler_power_law_reset(struct llama_sampler * smpl) {
-    auto * ctx    = (llama_sampler_power_law *) smpl->ctx;
-    ctx->history  = ring_buffer<float>(ctx->window_size);
+    auto * ctx   = (llama_sampler_power_law *) smpl->ctx;
+    ctx->history = ring_buffer<float>(ctx->window_size);
 }
 
 static struct llama_sampler * llama_sampler_power_law_clone(const struct llama_sampler * smpl) {

From cd7de7c7a8fc30ec45737df428a09e2b80c30289 Mon Sep 17 00:00:00 2001
From: ddh0 <chemist-mulches-39@icloud.com>
Date: Thu, 11 Dec 2025 17:23:27 -0600
Subject: [PATCH 10/38] add power law case to `common_sampler_init`, add
 sampler name mappings

---
 common/sampling.cpp | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/common/sampling.cpp b/common/sampling.cpp
index 7a6b7be1e0..07d7153384 100644
--- a/common/sampling.cpp
+++ b/common/sampling.cpp
@@ -243,6 +243,9 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co
                 params.logit_bias.data()));
 
     if (params.mirostat == 0) {
+        // if this flag is set, we will not need to add `dist` at the end of the sampler chain
+        bool has_distribution_sampler = false;
+
         for (const auto & cnstr : params.samplers) {
             switch (cnstr) {
                 case COMMON_SAMPLER_TYPE_DRY:
@@ -253,7 +256,7 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co
                             c_breakers.push_back(str.c_str());
                         }
 
-                        llama_sampler_chain_add(result->chain, llama_sampler_init_dry      (vocab, llama_model_n_ctx_train(model), params.dry_multiplier, params.dry_base, params.dry_allowed_length, params.dry_penalty_last_n, c_breakers.data(), c_breakers.size()));
+                        llama_sampler_chain_add(result->chain, llama_sampler_init_dry     (vocab, llama_model_n_ctx_train(model), params.dry_multiplier, params.dry_base, params.dry_allowed_length, params.dry_penalty_last_n, c_breakers.data(), c_breakers.size()));
                     }
                     break;
                 case COMMON_SAMPLER_TYPE_TOP_K:
@@ -283,11 +286,18 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co
                 case COMMON_SAMPLER_TYPE_PENALTIES:
                     llama_sampler_chain_add(result->chain, llama_sampler_init_penalties   (params.penalty_last_n, params.penalty_repeat, params.penalty_freq, params.penalty_present));
                     break;
+                case COMMON_SAMPLER_TYPE_POWER_LAW:
+                    llama_sampler_chain_add(result->chain, llama_sampler_init_power_law   (params.power_law_target, params.power_law_target_range, params.power_law_window_size, params.seed));
+                    has_distribution_sampler = true;
+                    break;
                 default:
                     GGML_ASSERT(false && "unknown sampler type");
             }
         }
-        llama_sampler_chain_add(result->chain, llama_sampler_init_dist(params.seed));
+        // only add `dist` to the end of the chain if no other distribution samplers were added
+        if (!has_distribution_sampler) {
+            llama_sampler_chain_add(result->chain, llama_sampler_init_dist(params.seed));
+        }
     } else if (params.mirostat == 1) {
         llama_sampler_chain_add(result->chain, llama_sampler_init_temp(params.temp));
         llama_sampler_chain_add(result->chain, llama_sampler_init_mirostat(llama_vocab_n_tokens(vocab), params.seed, params.mirostat_tau, params.mirostat_eta, 100));
@@ -586,6 +596,7 @@ std::vector<common_sampler_type> common_sampler_types_from_names(const std::vect
         { "xtc",         COMMON_SAMPLER_TYPE_XTC },
         { "infill",      COMMON_SAMPLER_TYPE_INFILL },
         { "penalties",   COMMON_SAMPLER_TYPE_PENALTIES },
+        { "power_law",   COMMON_SAMPLER_TYPE_POWER_LAW },
     };
 
     // since samplers names are written multiple ways
@@ -601,6 +612,7 @@ std::vector<common_sampler_type> common_sampler_types_from_names(const std::vect
         { "typ",         COMMON_SAMPLER_TYPE_TYPICAL_P },
         { "min-p",       COMMON_SAMPLER_TYPE_MIN_P },
         { "temp",        COMMON_SAMPLER_TYPE_TEMPERATURE },
+        { "power-law",   COMMON_SAMPLER_TYPE_POWER_LAW },
     };
 
     std::vector<common_sampler_type> samplers;

From 534cb4fbba8782cef4b40f3a789811d801d72db5 Mon Sep 17 00:00:00 2001
From: ddh0 <chemist-mulches-39@icloud.com>
Date: Thu, 11 Dec 2025 17:29:04 -0600
Subject: [PATCH 11/38] clarify behaviour when `window_size = 0`

---
 common/arg.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/common/arg.cpp b/common/arg.cpp
index 18259c72c2..31f67627f6 100644
--- a/common/arg.cpp
+++ b/common/arg.cpp
@@ -1519,7 +1519,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
     ).set_sparam());
     add_opt(common_arg(
         {"--power-law-window-size"}, "N",
-        string_format("Power Law sampler rolling window size, in tokens (default: %d)", params.sampling.power_law_window_size),
+        string_format("Power Law sampler rolling window size, in tokens (default: %d; 0 = fixed target)", params.sampling.power_law_window_size),
         [](common_params & params, int value) {
             params.sampling.power_law_window_size = value;
         }

From dcada035b4d18702cce3135a052c7c5dea71e478 Mon Sep 17 00:00:00 2001
From: ddh0 <chemist-mulches-39@icloud.com>
Date: Thu, 11 Dec 2025 17:49:47 -0600
Subject: [PATCH 12/38] add missing enums

---
 common/sampling.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/common/sampling.cpp b/common/sampling.cpp
index 07d7153384..90f48c5a05 100644
--- a/common/sampling.cpp
+++ b/common/sampling.cpp
@@ -564,6 +564,7 @@ char common_sampler_type_to_chr(enum common_sampler_type cnstr) {
         case COMMON_SAMPLER_TYPE_XTC:         return 'x';
         case COMMON_SAMPLER_TYPE_INFILL:      return 'i';
         case COMMON_SAMPLER_TYPE_PENALTIES:   return 'e';
+        case COMMON_SAMPLER_TYPE_POWER_LAW:   return 'w';
         default : return '?';
     }
 }
@@ -580,6 +581,7 @@ std::string common_sampler_type_to_str(enum common_sampler_type cnstr) {
         case COMMON_SAMPLER_TYPE_XTC:         return "xtc";
         case COMMON_SAMPLER_TYPE_INFILL:      return "infill";
         case COMMON_SAMPLER_TYPE_PENALTIES:   return "penalties";
+        case COMMON_SAMPLER_TYPE_POWER_LAW:   return "power_law";
         default : return "";
     }
 }

From 2d62bbea9fcdb3cb40b7a437680f3a5c716bebd6 Mon Sep 17 00:00:00 2001
From: ddh0 <chemist-mulches-39@icloud.com>
Date: Thu, 11 Dec 2025 22:43:10 -0600
Subject: [PATCH 13/38] remove `target_range` param, make `target == 1` no-op,
 cleanup code

---
 common/arg.cpp         | 13 ++-------
 common/common.h        | 60 +++++++++++++++++++---------------------
 include/llama.h        | 11 +++++---
 src/llama-sampling.cpp | 63 +++++++++++++++++++++++++-----------------
 4 files changed, 77 insertions(+), 70 deletions(-)

diff --git a/common/arg.cpp b/common/arg.cpp
index 31f67627f6..a8ea0caf33 100644
--- a/common/arg.cpp
+++ b/common/arg.cpp
@@ -1503,23 +1503,16 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
     ).set_sparam());
     add_opt(common_arg(
         {"--power-law-target"}, "N",
-        string_format("Power Law sampler target probability (default: %.2f; allowed range 0.0 to 1.0)\n"
-                      "[(more info)](https://github.com/ggml-org/llama.cpp/pull/17927)",
+        string_format("target probability for Power Law sampling (valid range 0.0 to 1.0; <0 = disabled) "
+                      "(%.1f = default)\n[(more info)](https://github.com/ggml-org/llama.cpp/pull/17927)",
                       (double)params.sampling.power_law_target),
         [](common_params & params, const std::string & value) {
             params.sampling.power_law_target = std::stof(value);
         }
     ).set_sparam());
-    add_opt(common_arg(
-        {"--power-law-target-range"}, "N",
-        string_format("Power Law sampler adaptive target range (target±range) (default: %.2f; 0.0 = fixed target)", (double)params.sampling.power_law_target_range),
-        [](common_params & params, const std::string & value) {
-            params.sampling.power_law_target_range = std::stof(value);
-        }
-    ).set_sparam());
     add_opt(common_arg(
         {"--power-law-window-size"}, "N",
-        string_format("Power Law sampler rolling window size, in tokens (default: %d; 0 = fixed target)", params.sampling.power_law_window_size),
+        string_format("rolling window size for target adaptation in Power Law sampling (≤0 = fixed target; %d = default)", params.sampling.power_law_window_size),
         [](common_params & params, int value) {
             params.sampling.power_law_window_size = value;
         }
diff --git a/common/common.h b/common/common.h
index d4f1229a7e..ba3d776bdc 100644
--- a/common/common.h
+++ b/common/common.h
@@ -164,42 +164,40 @@ enum common_params_sampling_config : uint64_t {
 struct common_params_sampling {
     uint32_t seed = LLAMA_DEFAULT_SEED; // the seed used to initialize llama_sampler
 
-    int32_t n_prev                 = 64;     // number of previous tokens to remember
-    int32_t n_probs                = 0;      // if greater than 0, output the probabilities of top n_probs tokens.
-    int32_t min_keep               = 0;      // 0 = disabled, otherwise samplers should return at least min_keep tokens
-    int32_t top_k                  = 40;     // <= 0 to use vocab size
-    float   top_p                  = 0.95f;  // 1.0 = disabled
-    float   min_p                  = 0.05f;  // 0.0 = disabled
-    float   xtc_probability        = 0.00f;  // 0.0 = disabled
-    float   xtc_threshold          = 0.10f;  // > 0.5 disables XTC
-    float   typ_p                  = 1.00f;  // typical_p, 1.0 = disabled
-    float   temp                   = 0.80f;  // <= 0.0 to sample greedily, 0.0 to not output probabilities
-    float   dynatemp_range         = 0.00f;  // 0.0 = disabled
-    float   dynatemp_exponent      = 1.00f;  // controls how entropy maps to temperature in dynamic temperature sampler
-    int32_t penalty_last_n         = 64;     // last n tokens to penalize (0 = disable penalty, -1 = context size)
-    float   penalty_repeat         = 1.00f;  // 1.0 = disabled
-    float   penalty_freq           = 0.00f;  // 0.0 = disabled
-    float   penalty_present        = 0.00f;  // 0.0 = disabled
-    float   dry_multiplier         = 0.0f;   // 0.0 = disabled;      DRY repetition penalty for tokens extending repetition:
-    float   dry_base               = 1.75f;  // 0.0 = disabled;      multiplier * base ^ (length of sequence before token - allowed length)
-    int32_t dry_allowed_length     = 2;      // tokens extending repetitions beyond this receive penalty
-    int32_t dry_penalty_last_n     = -1;     // how many tokens to scan for repetitions (0 = disable penalty, -1 = context size)
-    float   power_law_target       = 0.5;    // target probability (0.0 to 1.0)
-    float   power_law_target_range = 0.5;    // adapt the target within this range (target +/- range)
-    int32_t power_law_window_size  = 10;     // rolling history window size for target adaptation
-    int32_t mirostat               = 0;      // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
-    float   top_n_sigma            = -1.00f; // -1.0 = disabled
-    float   mirostat_tau           = 5.00f;  // target entropy
-    float   mirostat_eta           = 0.10f;  // learning rate
-    bool    ignore_eos             = false;
-    bool    no_perf                = false;  // disable performance metrics
-    bool    timing_per_token       = false;
+    int32_t n_prev                = 64;     // number of previous tokens to remember
+    int32_t n_probs               = 0;      // if greater than 0, output the probabilities of top n_probs tokens.
+    int32_t min_keep              = 0;      // 0 = disabled, otherwise samplers should return at least min_keep tokens
+    int32_t top_k                 = 40;     // <= 0 to use vocab size
+    float   top_p                 = 0.95f;  // 1.0 = disabled
+    float   min_p                 = 0.05f;  // 0.0 = disabled
+    float   xtc_probability       = 0.00f;  // 0.0 = disabled
+    float   xtc_threshold         = 0.10f;  // > 0.5 disables XTC
+    float   typ_p                 = 1.00f;  // typical_p, 1.0 = disabled
+    float   temp                  = 0.80f;  // <= 0.0 to sample greedily, 0.0 to not output probabilities
+    float   dynatemp_range        = 0.00f;  // 0.0 = disabled
+    float   dynatemp_exponent     = 1.00f;  // controls how entropy maps to temperature in dynamic temperature sampler
+    int32_t penalty_last_n        = 64;     // last n tokens to penalize (0 = disable penalty, -1 = context size)
+    float   penalty_repeat        = 1.00f;  // 1.0 = disabled
+    float   penalty_freq          = 0.00f;  // 0.0 = disabled
+    float   penalty_present       = 0.00f;  // 0.0 = disabled
+    float   dry_multiplier        = 0.0f;   // 0.0 = disabled;      DRY repetition penalty for tokens extending repetition:
+    float   dry_base              = 1.75f;  // 0.0 = disabled;      multiplier * base ^ (length of sequence before token - allowed length)
+    int32_t dry_allowed_length    = 2;      // tokens extending repetitions beyond this receive penalty
+    int32_t dry_penalty_last_n    = -1;     // how many tokens to scan for repetitions (0 = disable penalty, -1 = context size)
+    float   power_law_target      = -1.0f;  // target probability for Power Law sampling (valid range 0.0 to 1.0; <0 = disabled)
+    int32_t power_law_window_size = 10;     // rolling window size for target adaptation in Power Law sampling (≤0 = fixed target)
+    int32_t mirostat              = 0;      // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
+    float   top_n_sigma           = -1.00f; // -1.0 = disabled
+    float   mirostat_tau          = 5.00f;  // target entropy
+    float   mirostat_eta          = 0.10f;  // learning rate
+    bool    ignore_eos            = false;
+    bool    no_perf               = false;  // disable performance metrics
+    bool    timing_per_token      = false;
 
     uint64_t user_sampling_config = 0; // bitfield to track user-specified samplers
 
     std::vector<std::string> dry_sequence_breakers = {"\n", ":", "\"", "*"};     // default sequence breakers for DRY
 
-
     std::vector<enum common_sampler_type> samplers = {
         COMMON_SAMPLER_TYPE_PENALTIES,
         COMMON_SAMPLER_TYPE_DRY,
diff --git a/include/llama.h b/include/llama.h
index 95df1058cc..ce1308d2bd 100644
--- a/include/llama.h
+++ b/include/llama.h
@@ -1297,13 +1297,16 @@ extern "C" {
     ///
     /// it is recommended to only perform minimal truncation before this sampler.
     ///
+    /// @param target target probability (valid range 0.0 to 1.0; <0 = disabled)
+    /// @param window_size rolling window size for target adaptation (≤0 = fixed target)
+    /// @param seed RNG seed
+    ///
     /// ref: https://github.com/MrJackSpade/llama.cpp/tree/master (original impl, documentation)
     /// ref: https://github.com/ggml-org/llama.cpp/pull/17927     (llama.cpp PR)
     LLAMA_API struct llama_sampler * llama_sampler_init_power_law(
-                               float    target,       // target probability (0.0 to 1.0)
-                               float    target_range, // adaptive target range (target±range)
-                             int32_t    window_size,  // rolling history window size for target adaptation
-                            uint32_t    seed);        // RNG seed
+                               float    target,
+                             int32_t    window_size,
+                            uint32_t    seed);
 
     LLAMA_API struct llama_sampler * llama_sampler_init_logit_bias(
                              int32_t   n_vocab,
diff --git a/src/llama-sampling.cpp b/src/llama-sampling.cpp
index 06a1eef148..d5f485f846 100644
--- a/src/llama-sampling.cpp
+++ b/src/llama-sampling.cpp
@@ -2326,12 +2326,11 @@ struct llama_sampler * llama_sampler_init_dry_testing(int32_t context_size, floa
 
 struct llama_sampler_power_law {
     const float    target;
-    const float    target_range;
     const int32_t  window_size;
-    const uint32_t seed;
 
+    const uint32_t     seed;
     std::mt19937       rng;
-    ring_buffer<float> history;
+    ring_buffer<float> window;
 };
 
 static const char * llama_sampler_power_law_name(const struct llama_sampler * /*smpl*/) {
@@ -2341,66 +2340,82 @@ static const char * llama_sampler_power_law_name(const struct llama_sampler * /*
 static void llama_sampler_power_law_apply(struct llama_sampler * smpl, llama_token_data_array * cur_p) {
     auto * ctx = (llama_sampler_power_law *) smpl->ctx;
 
-    // clamp the target range to [0.0, 1.0]
-    const float min_target = std::max(ctx->target - ctx->target_range, 0.0f);
-    const float max_target = std::min(ctx->target + ctx->target_range, 1.0f);
+    if (ctx->target < 0.0f) {
+        // no-op: just sample from the distribution as-is
+        llama_sampler_softmax_impl(cur_p, false);
+        const int idx = llama_sample_dist(cur_p, ctx->rng);
+        cur_p->selected = idx;
+        return;
+    }
+
+    // fixed power law transform parameters (from original implementation)
+    const float distribution_width = 0.2f;
+    const float peak_logit_value   = 3.0f;
+    const float tail_heaviness     = 3.0f;
 
     // compute probabilities to get the "original" values
     llama_sampler_softmax_impl(cur_p, false);
 
-    // store original probabilities (needed for history update)
+    // store original probabilities (used for future target adaptation)
     std::vector<float> original_probs;
     original_probs.reserve(cur_p->size);
     for (size_t i = 0; i < cur_p->size; ++i) {
         original_probs.push_back(cur_p->data[i].p);
     }
 
+    //
     // calculate adaptive target
+    //
+
+    const float min_target = 0.0f;
+    const float max_target = 1.0f;
+
     float computed_target = ctx->target;
-    if (ctx->history.size() > 0) {
+    if (ctx->window.size() > 0) {
         float sum_excluding_oldest = 0.0f;
-        size_t sz = ctx->history.size();
+        size_t sz = ctx->window.size();
 
         // sum all except the oldest element
         for (size_t i = 0; i < sz - 1; ++i) {
-            sum_excluding_oldest += ctx->history.rat(i);
+            sum_excluding_oldest += ctx->window.rat(i);
         }
 
         float next_value = (ctx->target * ctx->window_size) - sum_excluding_oldest;
         computed_target = std::max(min_target, std::min(next_value, max_target));
     }
 
-    // apply power law transformation
+    //
+    // power law transform
+    //
+
     for (size_t i = 0; i < cur_p->size; ++i) {
         float p = cur_p->data[i].p;
-        float normalized_distance = std::abs(p - computed_target) / 0.2f;
-        cur_p->data[i].logit = 3.0f / (1.0f + std::pow(normalized_distance, 3.0f));
+        float normalized_distance = std::abs(p - computed_target) / distribution_width;
+        cur_p->data[i].logit = peak_logit_value / (1.0f + std::pow(normalized_distance, tail_heaviness));
     }
 
     llama_sampler_softmax_impl(cur_p, false);
 
-    // sample from distribution
+    // sample from the transformed distribution
     const int idx = llama_sample_dist(cur_p, ctx->rng);
-
-    // set sampled token
     cur_p->selected = idx;
 
-    // update history with ORIGINAL probability
-    ctx->history.push_back(original_probs[idx]);
+    // add the ORIGINAL probability to the rolling window
+    ctx->window.push_back(original_probs[idx]);
 }
 
 static void llama_sampler_power_law_reset(struct llama_sampler * smpl) {
-    auto * ctx   = (llama_sampler_power_law *) smpl->ctx;
-    ctx->history = ring_buffer<float>(ctx->window_size);
+    auto * ctx  = (llama_sampler_power_law *) smpl->ctx;
+    ctx->window = ring_buffer<float>(ctx->window_size);
 }
 
 static struct llama_sampler * llama_sampler_power_law_clone(const struct llama_sampler * smpl) {
     const auto * ctx  = (const llama_sampler_power_law *) smpl->ctx;
-    auto * result     = llama_sampler_init_power_law(ctx->target, ctx->target_range, ctx->window_size, ctx->seed);
+    auto * result     = llama_sampler_init_power_law(ctx->target, ctx->window_size, ctx->seed);
     auto * result_ctx = (llama_sampler_power_law *) result->ctx;
 
     result_ctx->rng     = ctx->rng;
-    result_ctx->history = ctx->history;
+    result_ctx->window = ctx->window;
 
     return result;
 }
@@ -2420,7 +2435,6 @@ static struct llama_sampler_i llama_sampler_power_law_i = {
 
 struct llama_sampler * llama_sampler_init_power_law(
     float    target,
-    float    target_range,
     int32_t  window_size,
     uint32_t seed
 ) {
@@ -2429,11 +2443,10 @@ struct llama_sampler * llama_sampler_init_power_law(
         /* .iface = */ &llama_sampler_power_law_i,
         /* .ctx   = */ new llama_sampler_power_law {
             /* .target       = */ target,
-            /* .target_range = */ target_range,
             /* .window_size  = */ window_size,
             /* .seed         = */ seed_cur,
             /* .rng          = */ std::mt19937(seed_cur),
-            /* .history      = */ ring_buffer<float>(window_size),
+            /* .window       = */ ring_buffer<float>(window_size),
         }
     );
 }

From 5c78b7927fed36512538539d8ff7518c0d23d8cb Mon Sep 17 00:00:00 2001
From: ddh0 <chemist-mulches-39@icloud.com>
Date: Thu, 11 Dec 2025 22:47:36 -0600
Subject: [PATCH 14/38] oops, straggler

---
 common/sampling.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/common/sampling.cpp b/common/sampling.cpp
index 90f48c5a05..63a17287dc 100644
--- a/common/sampling.cpp
+++ b/common/sampling.cpp
@@ -287,7 +287,7 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co
                     llama_sampler_chain_add(result->chain, llama_sampler_init_penalties   (params.penalty_last_n, params.penalty_repeat, params.penalty_freq, params.penalty_present));
                     break;
                 case COMMON_SAMPLER_TYPE_POWER_LAW:
-                    llama_sampler_chain_add(result->chain, llama_sampler_init_power_law   (params.power_law_target, params.power_law_target_range, params.power_law_window_size, params.seed));
+                    llama_sampler_chain_add(result->chain, llama_sampler_init_power_law   (params.power_law_target, params.power_law_window_size, params.seed));
                     has_distribution_sampler = true;
                     break;
                 default:

From 53380c183f225a63ab788cf00e0a0188da073e47 Mon Sep 17 00:00:00 2001
From: ddh0 <chemist-mulches-39@icloud.com>
Date: Fri, 12 Dec 2025 22:39:51 -0600
Subject: [PATCH 15/38] add missing parameters in `server-task.cpp`

---
 tools/server/server-task.cpp | 52 +++++++++++++++++++-----------------
 1 file changed, 27 insertions(+), 25 deletions(-)

diff --git a/tools/server/server-task.cpp b/tools/server/server-task.cpp
index 360826062b..c3ac98f13f 100644
--- a/tools/server/server-task.cpp
+++ b/tools/server/server-task.cpp
@@ -182,31 +182,33 @@ task_params server_task::params_from_json_cmpl(
     params.t_max_predict_ms = json_value(data,       "t_max_predict_ms",   defaults.t_max_predict_ms);
     params.response_fields  = json_value(data,       "response_fields",    std::vector<std::string>());
 
-    params.sampling.top_k              = json_value(data, "top_k",               defaults.sampling.top_k);
-    params.sampling.top_p              = json_value(data, "top_p",               defaults.sampling.top_p);
-    params.sampling.min_p              = json_value(data, "min_p",               defaults.sampling.min_p);
-    params.sampling.top_n_sigma        = json_value(data, "top_n_sigma",         defaults.sampling.top_n_sigma);
-    params.sampling.xtc_probability    = json_value(data, "xtc_probability",     defaults.sampling.xtc_probability);
-    params.sampling.xtc_threshold      = json_value(data, "xtc_threshold",       defaults.sampling.xtc_threshold);
-    params.sampling.typ_p              = json_value(data, "typical_p",           defaults.sampling.typ_p);
-    params.sampling.temp               = json_value(data, "temperature",         defaults.sampling.temp);
-    params.sampling.dynatemp_range     = json_value(data, "dynatemp_range",      defaults.sampling.dynatemp_range);
-    params.sampling.dynatemp_exponent  = json_value(data, "dynatemp_exponent",   defaults.sampling.dynatemp_exponent);
-    params.sampling.penalty_last_n     = json_value(data, "repeat_last_n",       defaults.sampling.penalty_last_n);
-    params.sampling.penalty_repeat     = json_value(data, "repeat_penalty",      defaults.sampling.penalty_repeat);
-    params.sampling.penalty_freq       = json_value(data, "frequency_penalty",   defaults.sampling.penalty_freq);
-    params.sampling.penalty_present    = json_value(data, "presence_penalty",    defaults.sampling.penalty_present);
-    params.sampling.dry_multiplier     = json_value(data, "dry_multiplier",      defaults.sampling.dry_multiplier);
-    params.sampling.dry_base           = json_value(data, "dry_base",            defaults.sampling.dry_base);
-    params.sampling.dry_allowed_length = json_value(data, "dry_allowed_length",  defaults.sampling.dry_allowed_length);
-    params.sampling.dry_penalty_last_n = json_value(data, "dry_penalty_last_n",  defaults.sampling.dry_penalty_last_n);
-    params.sampling.mirostat           = json_value(data, "mirostat",            defaults.sampling.mirostat);
-    params.sampling.mirostat_tau       = json_value(data, "mirostat_tau",        defaults.sampling.mirostat_tau);
-    params.sampling.mirostat_eta       = json_value(data, "mirostat_eta",        defaults.sampling.mirostat_eta);
-    params.sampling.seed               = json_value(data, "seed",                defaults.sampling.seed);
-    params.sampling.n_probs            = json_value(data, "n_probs",             defaults.sampling.n_probs);
-    params.sampling.min_keep           = json_value(data, "min_keep",            defaults.sampling.min_keep);
-    params.post_sampling_probs         = json_value(data, "post_sampling_probs", defaults.post_sampling_probs);
+    params.sampling.top_k                 = json_value(data, "top_k",                 defaults.sampling.top_k);
+    params.sampling.top_p                 = json_value(data, "top_p",                 defaults.sampling.top_p);
+    params.sampling.min_p                 = json_value(data, "min_p",                 defaults.sampling.min_p);
+    params.sampling.top_n_sigma           = json_value(data, "top_n_sigma",           defaults.sampling.top_n_sigma);
+    params.sampling.xtc_probability       = json_value(data, "xtc_probability",       defaults.sampling.xtc_probability);
+    params.sampling.xtc_threshold         = json_value(data, "xtc_threshold",         defaults.sampling.xtc_threshold);
+    params.sampling.typ_p                 = json_value(data, "typical_p",             defaults.sampling.typ_p);
+    params.sampling.temp                  = json_value(data, "temperature",           defaults.sampling.temp);
+    params.sampling.dynatemp_range        = json_value(data, "dynatemp_range",        defaults.sampling.dynatemp_range);
+    params.sampling.dynatemp_exponent     = json_value(data, "dynatemp_exponent",     defaults.sampling.dynatemp_exponent);
+    params.sampling.penalty_last_n        = json_value(data, "repeat_last_n",         defaults.sampling.penalty_last_n);
+    params.sampling.penalty_repeat        = json_value(data, "repeat_penalty",        defaults.sampling.penalty_repeat);
+    params.sampling.penalty_freq          = json_value(data, "frequency_penalty",     defaults.sampling.penalty_freq);
+    params.sampling.penalty_present       = json_value(data, "presence_penalty",      defaults.sampling.penalty_present);
+    params.sampling.dry_multiplier        = json_value(data, "dry_multiplier",        defaults.sampling.dry_multiplier);
+    params.sampling.dry_base              = json_value(data, "dry_base",              defaults.sampling.dry_base);
+    params.sampling.dry_allowed_length    = json_value(data, "dry_allowed_length",    defaults.sampling.dry_allowed_length);
+    params.sampling.dry_penalty_last_n    = json_value(data, "dry_penalty_last_n",    defaults.sampling.dry_penalty_last_n);
+    params.sampling.mirostat              = json_value(data, "mirostat",              defaults.sampling.mirostat);
+    params.sampling.mirostat_tau          = json_value(data, "mirostat_tau",          defaults.sampling.mirostat_tau);
+    params.sampling.mirostat_eta          = json_value(data, "mirostat_eta",          defaults.sampling.mirostat_eta);
+    params.sampling.power_law_target      = json_value(data, "power_law_target",      defaults.sampling.power_law_target);
+    params.sampling.power_law_window_size = json_value(data, "power_law_window_size", defaults.sampling.power_law_window_size);
+    params.sampling.seed                  = json_value(data, "seed",                  defaults.sampling.seed);
+    params.sampling.n_probs               = json_value(data, "n_probs",               defaults.sampling.n_probs);
+    params.sampling.min_keep              = json_value(data, "min_keep",              defaults.sampling.min_keep);
+    params.post_sampling_probs            = json_value(data, "post_sampling_probs",   defaults.post_sampling_probs);
 
     params.speculative.n_min = json_value(data, "speculative.n_min", defaults.speculative.n_min);
     params.speculative.n_max = json_value(data, "speculative.n_max", defaults.speculative.n_max);

From 94cb883ed9184ac96a838566b0cbbb7918237b64 Mon Sep 17 00:00:00 2001
From: ddh0 <chemist-mulches-39@icloud.com>
Date: Fri, 12 Dec 2025 23:19:08 -0600
Subject: [PATCH 16/38] copy from author

ref:
https://gist.github.com/MrJackSpade/9be99c7efbba7b95a41377e123b7b069
---
 src/llama-sampling.cpp | 156 +++++++++++++++++++++++++++++++++--------
 1 file changed, 125 insertions(+), 31 deletions(-)

diff --git a/src/llama-sampling.cpp b/src/llama-sampling.cpp
index d5f485f846..738fd05caa 100644
--- a/src/llama-sampling.cpp
+++ b/src/llama-sampling.cpp
@@ -2337,21 +2337,134 @@ static const char * llama_sampler_power_law_name(const struct llama_sampler * /*
     return "power-law";
 }
 
+// Computes the target probability for the current sampling step.
+//
+// The target determines which token probabilities the power law distribution
+// will favor. This function implements a dynamic feedback mechanism to maintain
+// an average selection probability close to the base target over time.
+//
+// When the window is empty:
+//   - Returns the base target value (ctx->target)
+//
+// When the window has entries:
+//   - Calculates what the next target should be to keep the weighted average
+//     of selected token probabilities equal to ctx->target
+//   - Uses exponential decay weighting: newer values have more influence
+//
+// Exponential Decay Weighting:
+//   After inserting the new value, the weights will be:
+//     new_value:    weight = 1        (age 0, newest)
+//     rat(0):       weight = decay    (age 1)
+//     rat(1):       weight = decay^2  (age 2)
+//     ...
+//     rat(sz-2):    weight = decay^(sz-1)
+//     rat(sz-1):    evicted (oldest)
+//
+//   The "effective window size" is approximately 1/(1-decay):
+//     decay=0.9  → effective window ≈ 10 tokens
+//     decay=0.95 → effective window ≈ 20 tokens
+//     decay=1.0  → no decay, equivalent to simple average (original behavior)
+//
+// Formula derivation:
+//   We want the weighted average after insertion to equal target:
+//
+//     (new_value * 1 + Σ rat(i) * decay^(i+1)) / total_weight = target
+//
+//   Where total_weight = 1 + decay + decay^2 + ... + decay^(sz-1)
+//                      = (1 - decay^sz) / (1 - decay)   [geometric series]
+//
+//   Solving for new_value:
+//     new_value = target * total_weight - decay * Σ rat(i) * decay^i
+//
+//   The factor of 'decay' on the sum accounts for all existing values
+//   shifting one position older when the new value is inserted.
+//
+// The exponential decay helps prevent "fishtailing" - a phenomenon where
+// forced high-probability selections (when the model is very confident)
+// cause the algorithm to overcorrect with many low-probability selections,
+// then swing back the other way. By decaying old values, the influence of
+// forced selections fades faster, reducing oscillation amplitude and
+// recovery time.
+//
+// Finally, the computed target is clamped to [min_target, max_target] to
+// prevent extreme values that could destabilize sampling.
+//
+static float llama_sampler_power_law_compute_target(const llama_sampler_power_law * ctx,
+                                                    float                           min_target,
+                                                    float                           max_target,
+                                                    float                           tail_decay) {
+    float  computed_target = ctx->target;
+    size_t sz              = ctx->window.size();
+
+    if (sz > 0) {
+        // Check if window is at capacity (oldest element will be evicted on next push)
+        // Use the window_size parameter from context, not a capacity() method
+        const bool window_full = (sz == ctx->window_size);
+
+        // Compute weighted sum with exponential decay
+        // rat(0) = newest in buffer, gets weight 1
+        // rat(i) gets weight decay^i
+        //
+        // When window is full: exclude oldest element (it will be evicted)
+        // When window is not full: include all elements (nothing evicted)
+        float  weighted_sum    = 0.0f;
+        float  weight          = 1.0f;
+        size_t elements_to_sum = window_full ? (sz - 1) : sz;
+
+        for (size_t i = 0; i < elements_to_sum; ++i) {
+            weighted_sum += ctx->window.rat(i) * weight;
+            weight *= tail_decay;
+        }
+
+        // Compute total weight after new value is inserted
+        // When full: sz elements remain (oldest evicted, new added)
+        // When not full: sz + 1 elements (new added, nothing evicted)
+        size_t final_element_count = window_full ? sz : (sz + 1);
+
+        float total_weight;
+        if (std::abs(tail_decay - 1.0f) < FLT_EPSILON) {
+            total_weight = (float) final_element_count;
+        } else {
+            total_weight = (1.0f - std::pow(tail_decay, (float) final_element_count)) / (1.0f - tail_decay);
+        }
+
+        // Shift weights to account for new value taking position 0
+        // All existing values age by 1, so multiply their weights by decay
+        float shifted_weighted_sum = weighted_sum * tail_decay;
+
+        // Solve for the new value that achieves target weighted average
+        float next_value = (ctx->target * total_weight) - shifted_weighted_sum;
+
+        // Clamp to allowed range
+        computed_target = std::max(min_target, std::min(next_value, max_target));
+    }
+
+    return computed_target;
+}
+
 static void llama_sampler_power_law_apply(struct llama_sampler * smpl, llama_token_data_array * cur_p) {
     auto * ctx = (llama_sampler_power_law *) smpl->ctx;
 
     if (ctx->target < 0.0f) {
+        fprintf(stderr, "Target below zero, sampling from distribution\n");
         // no-op: just sample from the distribution as-is
         llama_sampler_softmax_impl(cur_p, false);
-        const int idx = llama_sample_dist(cur_p, ctx->rng);
+        const int idx   = llama_sample_dist(cur_p, ctx->rng);
         cur_p->selected = idx;
         return;
     }
 
-    // fixed power law transform parameters (from original implementation)
-    const float distribution_width = 0.2f;
-    const float peak_logit_value   = 3.0f;
-    const float tail_heaviness     = 3.0f;
+    // fixed power law transform parameters
+    const float distribution_width = 0.3f;
+    const float peak_logit_value   = 5.0f;
+    const float tail_heaviness     = 2.0f;
+
+    // target computation parameters
+    const float min_target = 0.0f;
+    const float max_target = 1.0f;
+    const float tail_decay = 0.50f;  // Exponential decay factor for history weighting
+                                     // Lower = faster response, higher = more stability
+                                     // Effective window ≈ 1/(1-decay) ≈ 20 tokens
 
     // compute probabilities to get the "original" values
     llama_sampler_softmax_impl(cur_p, false);
@@ -2363,45 +2476,26 @@ static void llama_sampler_power_law_apply(struct llama_sampler * smpl, llama_tok
         original_probs.push_back(cur_p->data[i].p);
     }
 
-    //
     // calculate adaptive target
-    //
+    float computed_target = llama_sampler_power_law_compute_target(ctx, min_target, max_target, tail_decay);
 
-    const float min_target = 0.0f;
-    const float max_target = 1.0f;
-
-    float computed_target = ctx->target;
-    if (ctx->window.size() > 0) {
-        float sum_excluding_oldest = 0.0f;
-        size_t sz = ctx->window.size();
-
-        // sum all except the oldest element
-        for (size_t i = 0; i < sz - 1; ++i) {
-            sum_excluding_oldest += ctx->window.rat(i);
-        }
-
-        float next_value = (ctx->target * ctx->window_size) - sum_excluding_oldest;
-        computed_target = std::max(min_target, std::min(next_value, max_target));
-    }
-
-    //
     // power law transform
-    //
-
     for (size_t i = 0; i < cur_p->size; ++i) {
-        float p = cur_p->data[i].p;
+        float p                   = cur_p->data[i].p;
         float normalized_distance = std::abs(p - computed_target) / distribution_width;
-        cur_p->data[i].logit = peak_logit_value / (1.0f + std::pow(normalized_distance, tail_heaviness));
+        cur_p->data[i].logit      = peak_logit_value / (1.0f + std::pow(normalized_distance, tail_heaviness));
     }
 
     llama_sampler_softmax_impl(cur_p, false);
 
     // sample from the transformed distribution
-    const int idx = llama_sample_dist(cur_p, ctx->rng);
+    const int idx   = llama_sample_dist(cur_p, ctx->rng);
     cur_p->selected = idx;
 
     // add the ORIGINAL probability to the rolling window
-    ctx->window.push_back(original_probs[idx]);
+    float original_p = original_probs[idx];
+
+    ctx->window.push_back(original_p);
 }
 
 static void llama_sampler_power_law_reset(struct llama_sampler * smpl) {

From 0a19a3fd6c179d0e2761130a86cf945acc838c83 Mon Sep 17 00:00:00 2001
From: ddh0 <chemist-mulches-39@icloud.com>
Date: Fri, 12 Dec 2025 23:32:57 -0600
Subject: [PATCH 17/38] remove old debug log, style nit

---
 src/llama-sampling.cpp | 28 ++++++++++++++++------------
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/src/llama-sampling.cpp b/src/llama-sampling.cpp
index 738fd05caa..5871668d96 100644
--- a/src/llama-sampling.cpp
+++ b/src/llama-sampling.cpp
@@ -2389,10 +2389,12 @@ static const char * llama_sampler_power_law_name(const struct llama_sampler * /*
 // Finally, the computed target is clamped to [min_target, max_target] to
 // prevent extreme values that could destabilize sampling.
 //
-static float llama_sampler_power_law_compute_target(const llama_sampler_power_law * ctx,
-                                                    float                           min_target,
-                                                    float                           max_target,
-                                                    float                           tail_decay) {
+static float llama_sampler_power_law_compute_target(
+    const llama_sampler_power_law * ctx,
+                            float   min_target,
+                            float   max_target,
+                            float   tail_decay) {
+
     float  computed_target = ctx->target;
     size_t sz              = ctx->window.size();
 
@@ -2416,6 +2418,10 @@ static float llama_sampler_power_law_compute_target(const llama_sampler_power_la
             weight *= tail_decay;
         }
 
+        // Shift weights to account for new value taking position 0
+        // All existing values age by 1, so multiply their weights by decay
+        float shifted_weighted_sum = weighted_sum * tail_decay;
+
         // Compute total weight after new value is inserted
         // When full: sz elements remain (oldest evicted, new added)
         // When not full: sz + 1 elements (new added, nothing evicted)
@@ -2428,10 +2434,6 @@ static float llama_sampler_power_law_compute_target(const llama_sampler_power_la
             total_weight = (1.0f - std::pow(tail_decay, (float) final_element_count)) / (1.0f - tail_decay);
         }
 
-        // Shift weights to account for new value taking position 0
-        // All existing values age by 1, so multiply their weights by decay
-        float shifted_weighted_sum = weighted_sum * tail_decay;
-
         // Solve for the new value that achieves target weighted average
         float next_value = (ctx->target * total_weight) - shifted_weighted_sum;
 
@@ -2446,7 +2448,6 @@ static void llama_sampler_power_law_apply(struct llama_sampler * smpl, llama_tok
     auto * ctx = (llama_sampler_power_law *) smpl->ctx;
 
     if (ctx->target < 0.0f) {
-        fprintf(stderr, "Target below zero, sampling from distribution\n");
         // no-op: just sample from the distribution as-is
         llama_sampler_softmax_impl(cur_p, false);
         const int idx   = llama_sample_dist(cur_p, ctx->rng);
@@ -2462,9 +2463,9 @@ static void llama_sampler_power_law_apply(struct llama_sampler * smpl, llama_tok
     // target computation parameters
     const float min_target = 0.0f;
     const float max_target = 1.0f;
-    const float tail_decay = 0.50f;  // Exponential decay factor for history weighting
-                                     // Lower = faster response, higher = more stability
-                                     // Effective window ≈ 1/(1-decay) ≈ 20 tokens
+    const float tail_decay = 0.50f;  // exponential decay factor for history weighting
+                                     // lower = faster response, higher = more stability
+                                     // effective window ≈ 1/(1-decay) ≈ 20 tokens
 
     // compute probabilities to get the "original" values
     llama_sampler_softmax_impl(cur_p, false);
@@ -2479,7 +2480,10 @@ static void llama_sampler_power_law_apply(struct llama_sampler * smpl, llama_tok
     // calculate adaptive target
     float computed_target = llama_sampler_power_law_compute_target(ctx, min_target, max_target, tail_decay);
 
+    //
     // power law transform
+    //
+
     for (size_t i = 0; i < cur_p->size; ++i) {
         float p                   = cur_p->data[i].p;
         float normalized_distance = std::abs(p - computed_target) / distribution_width;

From 824bb3aa6ebc14e5bf9c2bb5d0959841100f10fd Mon Sep 17 00:00:00 2001
From: ddh0 <chemist-mulches-39@icloud.com>
Date: Sat, 13 Dec 2025 00:23:15 -0600
Subject: [PATCH 18/38] fix compiler warning, add commented-out logging per
 token

---
 src/llama-sampling.cpp | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/src/llama-sampling.cpp b/src/llama-sampling.cpp
index 5871668d96..7686f59148 100644
--- a/src/llama-sampling.cpp
+++ b/src/llama-sampling.cpp
@@ -2401,7 +2401,7 @@ static float llama_sampler_power_law_compute_target(
     if (sz > 0) {
         // Check if window is at capacity (oldest element will be evicted on next push)
         // Use the window_size parameter from context, not a capacity() method
-        const bool window_full = (sz == ctx->window_size);
+        const bool window_full = (sz == (size_t)ctx->window_size);
 
         // Compute weighted sum with exponential decay
         // rat(0) = newest in buffer, gets weight 1
@@ -2496,6 +2496,18 @@ static void llama_sampler_power_law_apply(struct llama_sampler * smpl, llama_tok
     const int idx   = llama_sample_dist(cur_p, ctx->rng);
     cur_p->selected = idx;
 
+    // uncomment this to log the target values and history window contents for every token
+    //
+    // fprintf(stderr, "power_law: window_size=%zu/%d values=[", 
+    //         ctx->window.size(), ctx->window_size);
+    // for (size_t i = 0; i < ctx->window.size(); ++i) {
+    //     fprintf(stderr, "%.1f", ctx->window.rat(i));
+    //     if (i < ctx->window.size() - 1) fprintf(stderr, ",");
+    // }
+    // fprintf(stderr, "] computed_target=%.4f selected_token=%d orig_prob=%.4f\n", 
+    //         computed_target, cur_p->data[idx].id, original_probs[idx]);
+    // fflush(stderr);
+
     // add the ORIGINAL probability to the rolling window
     float original_p = original_probs[idx];
 

From a96ddd743a8badf058a31edf893ce5c660a02eee Mon Sep 17 00:00:00 2001
From: ddh0 <chemist-mulches-39@icloud.com>
Date: Sat, 13 Dec 2025 22:15:03 -0600
Subject: [PATCH 19/38] re-write + change parameters + simplify

---
 common/common.h              |  58 +++++-----
 include/llama.h              |  22 ++--
 src/llama-sampling.cpp       | 207 +++++++++++------------------------
 tools/server/server-task.cpp |  54 ++++-----
 4 files changed, 130 insertions(+), 211 deletions(-)

diff --git a/common/common.h b/common/common.h
index ba3d776bdc..66a6ca96b3 100644
--- a/common/common.h
+++ b/common/common.h
@@ -164,35 +164,35 @@ enum common_params_sampling_config : uint64_t {
 struct common_params_sampling {
     uint32_t seed = LLAMA_DEFAULT_SEED; // the seed used to initialize llama_sampler
 
-    int32_t n_prev                = 64;     // number of previous tokens to remember
-    int32_t n_probs               = 0;      // if greater than 0, output the probabilities of top n_probs tokens.
-    int32_t min_keep              = 0;      // 0 = disabled, otherwise samplers should return at least min_keep tokens
-    int32_t top_k                 = 40;     // <= 0 to use vocab size
-    float   top_p                 = 0.95f;  // 1.0 = disabled
-    float   min_p                 = 0.05f;  // 0.0 = disabled
-    float   xtc_probability       = 0.00f;  // 0.0 = disabled
-    float   xtc_threshold         = 0.10f;  // > 0.5 disables XTC
-    float   typ_p                 = 1.00f;  // typical_p, 1.0 = disabled
-    float   temp                  = 0.80f;  // <= 0.0 to sample greedily, 0.0 to not output probabilities
-    float   dynatemp_range        = 0.00f;  // 0.0 = disabled
-    float   dynatemp_exponent     = 1.00f;  // controls how entropy maps to temperature in dynamic temperature sampler
-    int32_t penalty_last_n        = 64;     // last n tokens to penalize (0 = disable penalty, -1 = context size)
-    float   penalty_repeat        = 1.00f;  // 1.0 = disabled
-    float   penalty_freq          = 0.00f;  // 0.0 = disabled
-    float   penalty_present       = 0.00f;  // 0.0 = disabled
-    float   dry_multiplier        = 0.0f;   // 0.0 = disabled;      DRY repetition penalty for tokens extending repetition:
-    float   dry_base              = 1.75f;  // 0.0 = disabled;      multiplier * base ^ (length of sequence before token - allowed length)
-    int32_t dry_allowed_length    = 2;      // tokens extending repetitions beyond this receive penalty
-    int32_t dry_penalty_last_n    = -1;     // how many tokens to scan for repetitions (0 = disable penalty, -1 = context size)
-    float   power_law_target      = -1.0f;  // target probability for Power Law sampling (valid range 0.0 to 1.0; <0 = disabled)
-    int32_t power_law_window_size = 10;     // rolling window size for target adaptation in Power Law sampling (≤0 = fixed target)
-    int32_t mirostat              = 0;      // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
-    float   top_n_sigma           = -1.00f; // -1.0 = disabled
-    float   mirostat_tau          = 5.00f;  // target entropy
-    float   mirostat_eta          = 0.10f;  // learning rate
-    bool    ignore_eos            = false;
-    bool    no_perf               = false;  // disable performance metrics
-    bool    timing_per_token      = false;
+    int32_t n_prev             = 64;     // number of previous tokens to remember
+    int32_t n_probs            = 0;      // if greater than 0, output the probabilities of top n_probs tokens.
+    int32_t min_keep           = 0;      // 0 = disabled, otherwise samplers should return at least min_keep tokens
+    int32_t top_k              = 40;     // <= 0 to use vocab size
+    float   top_p              = 0.95f;  // 1.0 = disabled
+    float   min_p              = 0.05f;  // 0.0 = disabled
+    float   xtc_probability    = 0.00f;  // 0.0 = disabled
+    float   xtc_threshold      = 0.10f;  // > 0.5 disables XTC
+    float   typ_p              = 1.00f;  // typical_p, 1.0 = disabled
+    float   temp               = 0.80f;  // <= 0.0 to sample greedily, 0.0 to not output probabilities
+    float   dynatemp_range     = 0.00f;  // 0.0 = disabled
+    float   dynatemp_exponent  = 1.00f;  // controls how entropy maps to temperature in dynamic temperature sampler
+    int32_t penalty_last_n     = 64;     // last n tokens to penalize (0 = disable penalty, -1 = context size)
+    float   penalty_repeat     = 1.00f;  // 1.0 = disabled
+    float   penalty_freq       = 0.00f;  // 0.0 = disabled
+    float   penalty_present    = 0.00f;  // 0.0 = disabled
+    float   dry_multiplier     = 0.0f;   // 0.0 = disabled;      DRY repetition penalty for tokens extending repetition:
+    float   dry_base           = 1.75f;  // 0.0 = disabled;      multiplier * base ^ (length of sequence before token - allowed length)
+    int32_t dry_allowed_length = 2;      // tokens extending repetitions beyond this receive penalty
+    int32_t dry_penalty_last_n = -1;     // how many tokens to scan for repetitions (0 = disable penalty, -1 = context size)
+    float   power_law_target   = -1.0f;  // select tokens near this probability (valid range 0.0 to 1.0; <0 = disabled)
+    float   power_law_decay    = 0.9f;   // decay rate for target adaptation over time. lower values -> faster but less stable adaptation. (valid range 0.0 to 1.0; ≤0 = no adaptation)
+    int32_t mirostat           = 0;      // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
+    float   top_n_sigma        = -1.00f; // -1.0 = disabled
+    float   mirostat_tau       = 5.00f;  // target entropy
+    float   mirostat_eta       = 0.10f;  // learning rate
+    bool    ignore_eos         = false;
+    bool    no_perf            = false;  // disable performance metrics
+    bool    timing_per_token   = false;
 
     uint64_t user_sampling_config = 0; // bitfield to track user-specified samplers
 
diff --git a/include/llama.h b/include/llama.h
index ce1308d2bd..f3867c6988 100644
--- a/include/llama.h
+++ b/include/llama.h
@@ -1289,24 +1289,28 @@ extern "C" {
                           const char ** seq_breakers,
                               size_t    num_breakers);
 
-    /// @details power-law sampler - reshapes probability distribution to target specific probability ranges
+    /// power-law
+    ///
+    /// this sampler implements a power law probability transformation with adaptive
+    /// target tracking. it reshapes token probability distributions to favor tokens near a
+    /// configurable target probability, rather than always selecting from the highest probability
+    /// candidates. it is ideal for creative, unpredictable text generation.
     ///
     /// this sampler is like `greedy`, `dist`, and `mirostat` in that it actually selects a token ID
     /// rather than just transforming logits. therefore it must always be the last sampler in the
     /// sampler chain.
     ///
-    /// it is recommended to only perform minimal truncation before this sampler.
+    /// minimal truncation before this sampler is recommended.
     ///
-    /// @param target target probability (valid range 0.0 to 1.0; <0 = disabled)
-    /// @param window_size rolling window size for target adaptation (≤0 = fixed target)
-    /// @param seed RNG seed
+    /// @param target select tokens near this probability (valid range 0.0 to 1.0; <0 = disabled)
+    /// @param decay decay rate for target adaptation over time. lower values -> faster but less stable adaptation. (valid range 0.0 to 1.0; ≤0 = no adaptation)
     ///
-    /// ref: https://github.com/MrJackSpade/llama.cpp/tree/master (original impl, documentation)
+    /// ref: https://github.com/MrJackSpade/llama.cpp/tree/master (original impl)
     /// ref: https://github.com/ggml-org/llama.cpp/pull/17927     (llama.cpp PR)
     LLAMA_API struct llama_sampler * llama_sampler_init_power_law(
-                               float    target,
-                             int32_t    window_size,
-                            uint32_t    seed);
+                               float   target,
+                               float   decay,
+                            uint32_t   seed);
 
     LLAMA_API struct llama_sampler * llama_sampler_init_logit_bias(
                              int32_t   n_vocab,
diff --git a/src/llama-sampling.cpp b/src/llama-sampling.cpp
index 7686f59148..db126a18d5 100644
--- a/src/llama-sampling.cpp
+++ b/src/llama-sampling.cpp
@@ -2315,133 +2315,62 @@ struct llama_sampler * llama_sampler_init_dry_testing(int32_t context_size, floa
 
 // power-law
 //
+// this sampler implements a power law probability transformation with adaptive
+// target tracking. it reshapes token probability distributions to favor tokens near a
+// configurable target probability, rather than always selecting from the highest probability
+// candidates. it is ideal for creative, unpredictable text generation.
+//
 // this sampler is like `greedy`, `dist`, and `mirostat` in that it actually selects a token ID
 // rather than just transforming logits. therefore it must always be the last sampler in the
 // sampler chain.
 //
-// it is recommended to only perform minimal truncation before this sampler.
+// minimal truncation before this sampler is recommended.
 //
-// ref: https://github.com/MrJackSpade/llama.cpp/tree/master (original impl, documentation)
+// ref: https://github.com/MrJackSpade/llama.cpp/tree/master (original impl)
 // ref: https://github.com/ggml-org/llama.cpp/pull/17927     (llama.cpp PR)
 
 struct llama_sampler_power_law {
-    const float    target;
-    const int32_t  window_size;
 
-    const uint32_t     seed;
-    std::mt19937       rng;
-    ring_buffer<float> window;
+    // the desired average probability for selected tokens (0.0 to 1.0)
+    // higher values favor more probable tokens (more deterministic)
+    // lower values favor less probable tokens (more creative)
+    // negative values disable Power Law sampling (sample from distribution as-is)
+    const float target;
+
+    // controls how quickly history influence fades (0.0 to 0.99)
+    // lower values = faster adaptation, more reactive to recent tokens
+    // higher values = slower adaptation, more stable over time
+    // effective history length ≈ 1/(1-decay) tokens
+    // examples: decay=0.5 → ~2 tokens, decay=0.9 → ~10, decay=0.95 → ~20
+    // internally clamped to <= 0.99 to prevent unbounded accumulation
+    const float decay;
+
+    const uint32_t seed;
+    std::mt19937   rng;
+
+    // historical token probabilities weighted by recency
+    float weighted_sum;
+    // sum of weights, converges to 1/(1-decay)
+    float total_weight;
 };
 
 static const char * llama_sampler_power_law_name(const struct llama_sampler * /*smpl*/) {
     return "power-law";
 }
 
-// Computes the target probability for the current sampling step.
-//
-// The target determines which token probabilities the power law distribution
-// will favor. This function implements a dynamic feedback mechanism to maintain
-// an average selection probability close to the base target over time.
-//
-// When the window is empty:
-//   - Returns the base target value (ctx->target)
-//
-// When the window has entries:
-//   - Calculates what the next target should be to keep the weighted average
-//     of selected token probabilities equal to ctx->target
-//   - Uses exponential decay weighting: newer values have more influence
-//
-// Exponential Decay Weighting:
-//   After inserting the new value, the weights will be:
-//     new_value:    weight = 1        (age 0, newest)
-//     rat(0):       weight = decay    (age 1)
-//     rat(1):       weight = decay^2  (age 2)
-//     ...
-//     rat(sz-2):    weight = decay^(sz-1)
-//     rat(sz-1):    evicted (oldest)
-//
-//   The "effective window size" is approximately 1/(1-decay):
-//     decay=0.9  → effective window ≈ 10 tokens
-//     decay=0.95 → effective window ≈ 20 tokens
-//     decay=1.0  → no decay, equivalent to simple average (original behavior)
-//
-// Formula derivation:
-//   We want the weighted average after insertion to equal target:
-//
-//     (new_value * 1 + Σ rat(i) * decay^(i+1)) / total_weight = target
-//
-//   Where total_weight = 1 + decay + decay^2 + ... + decay^(sz-1)
-//                      = (1 - decay^sz) / (1 - decay)   [geometric series]
-//
-//   Solving for new_value:
-//     new_value = target * total_weight - decay * Σ rat(i) * decay^i
-//
-//   The factor of 'decay' on the sum accounts for all existing values
-//   shifting one position older when the new value is inserted.
-//
-// The exponential decay helps prevent "fishtailing" - a phenomenon where
-// forced high-probability selections (when the model is very confident)
-// cause the algorithm to overcorrect with many low-probability selections,
-// then swing back the other way. By decaying old values, the influence of
-// forced selections fades faster, reducing oscillation amplitude and
-// recovery time.
-//
-// Finally, the computed target is clamped to [min_target, max_target] to
-// prevent extreme values that could destabilize sampling.
-//
-static float llama_sampler_power_law_compute_target(
-    const llama_sampler_power_law * ctx,
-                            float   min_target,
-                            float   max_target,
-                            float   tail_decay) {
-
-    float  computed_target = ctx->target;
-    size_t sz              = ctx->window.size();
-
-    if (sz > 0) {
-        // Check if window is at capacity (oldest element will be evicted on next push)
-        // Use the window_size parameter from context, not a capacity() method
-        const bool window_full = (sz == (size_t)ctx->window_size);
-
-        // Compute weighted sum with exponential decay
-        // rat(0) = newest in buffer, gets weight 1
-        // rat(i) gets weight decay^i
-        //
-        // When window is full: exclude oldest element (it will be evicted)
-        // When window is not full: include all elements (nothing evicted)
-        float  weighted_sum    = 0.0f;
-        float  weight          = 1.0f;
-        size_t elements_to_sum = window_full ? (sz - 1) : sz;
-
-        for (size_t i = 0; i < elements_to_sum; ++i) {
-            weighted_sum += ctx->window.rat(i) * weight;
-            weight *= tail_decay;
-        }
-
-        // Shift weights to account for new value taking position 0
-        // All existing values age by 1, so multiply their weights by decay
-        float shifted_weighted_sum = weighted_sum * tail_decay;
-
-        // Compute total weight after new value is inserted
-        // When full: sz elements remain (oldest evicted, new added)
-        // When not full: sz + 1 elements (new added, nothing evicted)
-        size_t final_element_count = window_full ? sz : (sz + 1);
-
-        float total_weight;
-        if (std::abs(tail_decay - 1.0f) < FLT_EPSILON) {
-            total_weight = (float) final_element_count;
-        } else {
-            total_weight = (1.0f - std::pow(tail_decay, (float) final_element_count)) / (1.0f - tail_decay);
-        }
-
-        // Solve for the new value that achieves target weighted average
-        float next_value = (ctx->target * total_weight) - shifted_weighted_sum;
-
-        // Clamp to allowed range
-        computed_target = std::max(min_target, std::min(next_value, max_target));
+// compute the adaptive target probability for the current sampling step
+static float llama_sampler_power_law_compute_target(const llama_sampler_power_law * ctx, float decay) {
+    if (ctx->total_weight == 0.0f) {
+        // if there is no history, just use base target
+        return ctx->target;
     }
 
-    return computed_target;
+    // maintain a running weighted sum with exponential decay
+    float new_total_weight = 1.0f + decay * ctx->total_weight;
+    float next_value = ctx->target * new_total_weight - decay * ctx->weighted_sum;
+
+    // clamp to [0.0, 1.0]
+    return std::max(0.0f, std::min(next_value, 1.0f));
 }
 
 static void llama_sampler_power_law_apply(struct llama_sampler * smpl, llama_token_data_array * cur_p) {
@@ -2455,30 +2384,25 @@ static void llama_sampler_power_law_apply(struct llama_sampler * smpl, llama_tok
         return;
     }
 
+    // clamp decay to avoid degenerate case at 1.0 (unbounded accumulation)
+    const float decay = std::min(ctx->decay, 0.99f);
+
     // fixed power law transform parameters
     const float distribution_width = 0.3f;
     const float peak_logit_value   = 5.0f;
     const float tail_heaviness     = 2.0f;
 
-    // target computation parameters
-    const float min_target = 0.0f;
-    const float max_target = 1.0f;
-    const float tail_decay = 0.50f;  // exponential decay factor for history weighting
-                                     // lower = faster response, higher = more stability
-                                     // effective window ≈ 1/(1-decay) ≈ 20 tokens
-
-    // compute probabilities to get the "original" values
+    // get the original probabilities
     llama_sampler_softmax_impl(cur_p, false);
 
-    // store original probabilities (used for future target adaptation)
+    // store the original probabilities (needed for history update after selection)
     std::vector<float> original_probs;
     original_probs.reserve(cur_p->size);
     for (size_t i = 0; i < cur_p->size; ++i) {
         original_probs.push_back(cur_p->data[i].p);
     }
 
-    // calculate adaptive target
-    float computed_target = llama_sampler_power_law_compute_target(ctx, min_target, max_target, tail_decay);
+    float computed_target = llama_sampler_power_law_compute_target(ctx, decay);
 
     //
     // power law transform
@@ -2492,40 +2416,30 @@ static void llama_sampler_power_law_apply(struct llama_sampler * smpl, llama_tok
 
     llama_sampler_softmax_impl(cur_p, false);
 
-    // sample from the transformed distribution
+    // sample from transformed distribution
     const int idx   = llama_sample_dist(cur_p, ctx->rng);
     cur_p->selected = idx;
 
-    // uncomment this to log the target values and history window contents for every token
-    //
-    // fprintf(stderr, "power_law: window_size=%zu/%d values=[", 
-    //         ctx->window.size(), ctx->window_size);
-    // for (size_t i = 0; i < ctx->window.size(); ++i) {
-    //     fprintf(stderr, "%.1f", ctx->window.rat(i));
-    //     if (i < ctx->window.size() - 1) fprintf(stderr, ",");
-    // }
-    // fprintf(stderr, "] computed_target=%.4f selected_token=%d orig_prob=%.4f\n", 
-    //         computed_target, cur_p->data[idx].id, original_probs[idx]);
-    // fflush(stderr);
-
-    // add the ORIGINAL probability to the rolling window
-    float original_p = original_probs[idx];
-
-    ctx->window.push_back(original_p);
+    // update running history with the original probability of the selected token
+    float original_p  = original_probs[idx];
+    ctx->weighted_sum = original_p + decay * ctx->weighted_sum;
+    ctx->total_weight = 1.0f + decay * ctx->total_weight;
 }
 
 static void llama_sampler_power_law_reset(struct llama_sampler * smpl) {
-    auto * ctx  = (llama_sampler_power_law *) smpl->ctx;
-    ctx->window = ring_buffer<float>(ctx->window_size);
+    auto * ctx        = (llama_sampler_power_law *) smpl->ctx;
+    ctx->weighted_sum = 0.0f;
+    ctx->total_weight = 0.0f;
 }
 
 static struct llama_sampler * llama_sampler_power_law_clone(const struct llama_sampler * smpl) {
     const auto * ctx  = (const llama_sampler_power_law *) smpl->ctx;
-    auto * result     = llama_sampler_init_power_law(ctx->target, ctx->window_size, ctx->seed);
+    auto * result     = llama_sampler_init_power_law(ctx->target, ctx->decay, ctx->seed);
     auto * result_ctx = (llama_sampler_power_law *) result->ctx;
 
-    result_ctx->rng     = ctx->rng;
-    result_ctx->window = ctx->window;
+    result_ctx->rng          = ctx->rng;
+    result_ctx->weighted_sum = ctx->weighted_sum;
+    result_ctx->total_weight = ctx->total_weight;
 
     return result;
 }
@@ -2545,7 +2459,7 @@ static struct llama_sampler_i llama_sampler_power_law_i = {
 
 struct llama_sampler * llama_sampler_init_power_law(
     float    target,
-    int32_t  window_size,
+    float    decay,
     uint32_t seed
 ) {
     auto seed_cur = get_rng_seed(seed);
@@ -2553,10 +2467,11 @@ struct llama_sampler * llama_sampler_init_power_law(
         /* .iface = */ &llama_sampler_power_law_i,
         /* .ctx   = */ new llama_sampler_power_law {
             /* .target       = */ target,
-            /* .window_size  = */ window_size,
+            /* .decay        = */ decay,
             /* .seed         = */ seed_cur,
             /* .rng          = */ std::mt19937(seed_cur),
-            /* .window       = */ ring_buffer<float>(window_size),
+            /* .weighted_sum = */ 0.0f,
+            /* .total_weight = */ 0.0f,
         }
     );
 }
diff --git a/tools/server/server-task.cpp b/tools/server/server-task.cpp
index c3ac98f13f..6c083e6624 100644
--- a/tools/server/server-task.cpp
+++ b/tools/server/server-task.cpp
@@ -182,33 +182,33 @@ task_params server_task::params_from_json_cmpl(
     params.t_max_predict_ms = json_value(data,       "t_max_predict_ms",   defaults.t_max_predict_ms);
     params.response_fields  = json_value(data,       "response_fields",    std::vector<std::string>());
 
-    params.sampling.top_k                 = json_value(data, "top_k",                 defaults.sampling.top_k);
-    params.sampling.top_p                 = json_value(data, "top_p",                 defaults.sampling.top_p);
-    params.sampling.min_p                 = json_value(data, "min_p",                 defaults.sampling.min_p);
-    params.sampling.top_n_sigma           = json_value(data, "top_n_sigma",           defaults.sampling.top_n_sigma);
-    params.sampling.xtc_probability       = json_value(data, "xtc_probability",       defaults.sampling.xtc_probability);
-    params.sampling.xtc_threshold         = json_value(data, "xtc_threshold",         defaults.sampling.xtc_threshold);
-    params.sampling.typ_p                 = json_value(data, "typical_p",             defaults.sampling.typ_p);
-    params.sampling.temp                  = json_value(data, "temperature",           defaults.sampling.temp);
-    params.sampling.dynatemp_range        = json_value(data, "dynatemp_range",        defaults.sampling.dynatemp_range);
-    params.sampling.dynatemp_exponent     = json_value(data, "dynatemp_exponent",     defaults.sampling.dynatemp_exponent);
-    params.sampling.penalty_last_n        = json_value(data, "repeat_last_n",         defaults.sampling.penalty_last_n);
-    params.sampling.penalty_repeat        = json_value(data, "repeat_penalty",        defaults.sampling.penalty_repeat);
-    params.sampling.penalty_freq          = json_value(data, "frequency_penalty",     defaults.sampling.penalty_freq);
-    params.sampling.penalty_present       = json_value(data, "presence_penalty",      defaults.sampling.penalty_present);
-    params.sampling.dry_multiplier        = json_value(data, "dry_multiplier",        defaults.sampling.dry_multiplier);
-    params.sampling.dry_base              = json_value(data, "dry_base",              defaults.sampling.dry_base);
-    params.sampling.dry_allowed_length    = json_value(data, "dry_allowed_length",    defaults.sampling.dry_allowed_length);
-    params.sampling.dry_penalty_last_n    = json_value(data, "dry_penalty_last_n",    defaults.sampling.dry_penalty_last_n);
-    params.sampling.mirostat              = json_value(data, "mirostat",              defaults.sampling.mirostat);
-    params.sampling.mirostat_tau          = json_value(data, "mirostat_tau",          defaults.sampling.mirostat_tau);
-    params.sampling.mirostat_eta          = json_value(data, "mirostat_eta",          defaults.sampling.mirostat_eta);
-    params.sampling.power_law_target      = json_value(data, "power_law_target",      defaults.sampling.power_law_target);
-    params.sampling.power_law_window_size = json_value(data, "power_law_window_size", defaults.sampling.power_law_window_size);
-    params.sampling.seed                  = json_value(data, "seed",                  defaults.sampling.seed);
-    params.sampling.n_probs               = json_value(data, "n_probs",               defaults.sampling.n_probs);
-    params.sampling.min_keep              = json_value(data, "min_keep",              defaults.sampling.min_keep);
-    params.post_sampling_probs            = json_value(data, "post_sampling_probs",   defaults.post_sampling_probs);
+    params.sampling.top_k              = json_value(data, "top_k",               defaults.sampling.top_k);
+    params.sampling.top_p              = json_value(data, "top_p",               defaults.sampling.top_p);
+    params.sampling.min_p              = json_value(data, "min_p",               defaults.sampling.min_p);
+    params.sampling.top_n_sigma        = json_value(data, "top_n_sigma",         defaults.sampling.top_n_sigma);
+    params.sampling.xtc_probability    = json_value(data, "xtc_probability",     defaults.sampling.xtc_probability);
+    params.sampling.xtc_threshold      = json_value(data, "xtc_threshold",       defaults.sampling.xtc_threshold);
+    params.sampling.typ_p              = json_value(data, "typical_p",           defaults.sampling.typ_p);
+    params.sampling.temp               = json_value(data, "temperature",         defaults.sampling.temp);
+    params.sampling.dynatemp_range     = json_value(data, "dynatemp_range",      defaults.sampling.dynatemp_range);
+    params.sampling.dynatemp_exponent  = json_value(data, "dynatemp_exponent",   defaults.sampling.dynatemp_exponent);
+    params.sampling.penalty_last_n     = json_value(data, "repeat_last_n",       defaults.sampling.penalty_last_n);
+    params.sampling.penalty_repeat     = json_value(data, "repeat_penalty",      defaults.sampling.penalty_repeat);
+    params.sampling.penalty_freq       = json_value(data, "frequency_penalty",   defaults.sampling.penalty_freq);
+    params.sampling.penalty_present    = json_value(data, "presence_penalty",    defaults.sampling.penalty_present);
+    params.sampling.dry_multiplier     = json_value(data, "dry_multiplier",      defaults.sampling.dry_multiplier);
+    params.sampling.dry_base           = json_value(data, "dry_base",            defaults.sampling.dry_base);
+    params.sampling.dry_allowed_length = json_value(data, "dry_allowed_length",  defaults.sampling.dry_allowed_length);
+    params.sampling.dry_penalty_last_n = json_value(data, "dry_penalty_last_n",  defaults.sampling.dry_penalty_last_n);
+    params.sampling.mirostat           = json_value(data, "mirostat",            defaults.sampling.mirostat);
+    params.sampling.mirostat_tau       = json_value(data, "mirostat_tau",        defaults.sampling.mirostat_tau);
+    params.sampling.mirostat_eta       = json_value(data, "mirostat_eta",        defaults.sampling.mirostat_eta);
+    params.sampling.power_law_target   = json_value(data, "power_law_target",    defaults.sampling.power_law_target);
+    params.sampling.power_law_decay    = json_value(data, "power_law_decay",     defaults.sampling.power_law_decay);
+    params.sampling.seed               = json_value(data, "seed",                defaults.sampling.seed);
+    params.sampling.n_probs            = json_value(data, "n_probs",             defaults.sampling.n_probs);
+    params.sampling.min_keep           = json_value(data, "min_keep",            defaults.sampling.min_keep);
+    params.post_sampling_probs         = json_value(data, "post_sampling_probs", defaults.post_sampling_probs);
 
     params.speculative.n_min = json_value(data, "speculative.n_min", defaults.speculative.n_min);
     params.speculative.n_max = json_value(data, "speculative.n_max", defaults.speculative.n_max);

From b8a9626a739541cc6f65cd07ced19b12c364bf48 Mon Sep 17 00:00:00 2001
From: ddh0 <chemist-mulches-39@icloud.com>
Date: Sat, 13 Dec 2025 22:17:08 -0600
Subject: [PATCH 20/38] oops forgot args.cpp

---
 common/arg.cpp | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/common/arg.cpp b/common/arg.cpp
index 0226a6e644..919e37b7f8 100644
--- a/common/arg.cpp
+++ b/common/arg.cpp
@@ -1559,18 +1559,22 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
     ).set_sparam());
     add_opt(common_arg(
         {"--power-law-target"}, "N",
-        string_format("target probability for Power Law sampling (valid range 0.0 to 1.0; <0 = disabled) "
-                      "(%.1f = default)\n[(more info)](https://github.com/ggml-org/llama.cpp/pull/17927)",
+        string_format("power law sampler: select tokens near this probability (valid range 0.0 "
+                      "to 1.0; <0 = disabled) (default: %.2f)\n"
+                      "[(more info)]""(https://github.com/ggml-org/llama.cpp/pull/17927)",
                       (double)params.sampling.power_law_target),
         [](common_params & params, const std::string & value) {
             params.sampling.power_law_target = std::stof(value);
         }
     ).set_sparam());
     add_opt(common_arg(
-        {"--power-law-window-size"}, "N",
-        string_format("rolling window size for target adaptation in Power Law sampling (≤0 = fixed target; %d = default)", params.sampling.power_law_window_size),
+        {"--power-law-decay"}, "N",
+        string_format("power law sampler: decay rate for target adaptation over time. lower "
+                      "values -> faster but less stable adaptation. "
+                      "(valid range 0.0 to 1.0; ≤0 = no adaptation) (default: %.2f)",
+                      (double)params.sampling.power_law_decay),
         [](common_params & params, int value) {
-            params.sampling.power_law_window_size = value;
+            params.sampling.power_law_decay = value;
         }
     ).set_sparam());
     add_opt(common_arg(

From 965bcc9dc4675432d37340647a6916adbe79f184 Mon Sep 17 00:00:00 2001
From: ddh0 <chemist-mulches-39@icloud.com>
Date: Sat, 13 Dec 2025 22:19:15 -0600
Subject: [PATCH 21/38] fix leftover `window_size`

---
 common/sampling.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/common/sampling.cpp b/common/sampling.cpp
index 63a17287dc..8bfdae3be1 100644
--- a/common/sampling.cpp
+++ b/common/sampling.cpp
@@ -287,7 +287,7 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co
                     llama_sampler_chain_add(result->chain, llama_sampler_init_penalties   (params.penalty_last_n, params.penalty_repeat, params.penalty_freq, params.penalty_present));
                     break;
                 case COMMON_SAMPLER_TYPE_POWER_LAW:
-                    llama_sampler_chain_add(result->chain, llama_sampler_init_power_law   (params.power_law_target, params.power_law_window_size, params.seed));
+                    llama_sampler_chain_add(result->chain, llama_sampler_init_power_law   (params.power_law_target, params.power_law_decay, params.seed));
                     has_distribution_sampler = true;
                     break;
                 default:

From d1e5c60442aebfc788e5096eac8d810efea3c1df Mon Sep 17 00:00:00 2001
From: ddh0 <chemist-mulches-39@icloud.com>
Date: Sat, 13 Dec 2025 23:26:03 -0600
Subject: [PATCH 22/38] add missing values to `common_params_sampling::print()`

---
 common/sampling.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/common/sampling.cpp b/common/sampling.cpp
index 8bfdae3be1..a8494a679d 100644
--- a/common/sampling.cpp
+++ b/common/sampling.cpp
@@ -151,11 +151,11 @@ std::string common_params_sampling::print() const {
             "\trepeat_last_n = %d, repeat_penalty = %.3f, frequency_penalty = %.3f, presence_penalty = %.3f\n"
             "\tdry_multiplier = %.3f, dry_base = %.3f, dry_allowed_length = %d, dry_penalty_last_n = %d\n"
             "\ttop_k = %d, top_p = %.3f, min_p = %.3f, xtc_probability = %.3f, xtc_threshold = %.3f, typical_p = %.3f, top_n_sigma = %.3f, temp = %.3f\n"
-            "\tmirostat = %d, mirostat_lr = %.3f, mirostat_ent = %.3f",
+            "\tmirostat = %d, mirostat_lr = %.3f, mirostat_ent = %.3f, power_law_target = %.3f, power_law_decay = %.3f",
             penalty_last_n, penalty_repeat, penalty_freq, penalty_present,
             dry_multiplier, dry_base, dry_allowed_length, dry_penalty_last_n,
             top_k, top_p, min_p, xtc_probability, xtc_threshold, typ_p, top_n_sigma, temp,
-            mirostat, mirostat_eta, mirostat_tau);
+            mirostat, mirostat_eta, mirostat_tau, power_law_target, power_law_decay);
 
     return std::string(result);
 }

From 9613c481725a0fb39784db5b292cdc3de446156f Mon Sep 17 00:00:00 2001
From: ddh0 <chemist-mulches-39@icloud.com>
Date: Sun, 14 Dec 2025 00:36:59 -0600
Subject: [PATCH 23/38] with logging

---
 src/llama-sampling.cpp | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/src/llama-sampling.cpp b/src/llama-sampling.cpp
index db126a18d5..ae3e269ea2 100644
--- a/src/llama-sampling.cpp
+++ b/src/llama-sampling.cpp
@@ -2362,12 +2362,16 @@ static const char * llama_sampler_power_law_name(const struct llama_sampler * /*
 static float llama_sampler_power_law_compute_target(const llama_sampler_power_law * ctx, float decay) {
     if (ctx->total_weight == 0.0f) {
         // if there is no history, just use base target
+        fprintf(stderr, "power-law: compute_target: total_weight == 0.0 (target fixed at %.3f)\n", ctx->target);
+        fflush(stderr);
         return ctx->target;
     }
 
     // maintain a running weighted sum with exponential decay
     float new_total_weight = 1.0f + decay * ctx->total_weight;
+    fprintf(stderr, "power-law: compute_target: new_total_weight = %.3f\n", new_total_weight); fflush(stderr);
     float next_value = ctx->target * new_total_weight - decay * ctx->weighted_sum;
+    fprintf(stderr, "power-law: compute_target: next_value = %.3f\n", next_value); fflush(stderr);
 
     // clamp to [0.0, 1.0]
     return std::max(0.0f, std::min(next_value, 1.0f));
@@ -2378,14 +2382,16 @@ static void llama_sampler_power_law_apply(struct llama_sampler * smpl, llama_tok
 
     if (ctx->target < 0.0f) {
         // no-op: just sample from the distribution as-is
+        fprintf(stderr, "power-law: no-op!"); fflush(stderr);
         llama_sampler_softmax_impl(cur_p, false);
-        const int idx   = llama_sample_dist(cur_p, ctx->rng);
+        const int idx = llama_sample_dist(cur_p, ctx->rng);
         cur_p->selected = idx;
         return;
     }
 
     // clamp decay to avoid degenerate case at 1.0 (unbounded accumulation)
     const float decay = std::min(ctx->decay, 0.99f);
+    fprintf(stderr, "power-law: decay = %.3f\n", decay); fflush(stderr);
 
     // fixed power law transform parameters
     const float distribution_width = 0.3f;
@@ -2403,15 +2409,20 @@ static void llama_sampler_power_law_apply(struct llama_sampler * smpl, llama_tok
     }
 
     float computed_target = llama_sampler_power_law_compute_target(ctx, decay);
+    fprintf(stderr, "power-law: computed_target = %.3f\n", computed_target); fflush(stderr);
 
     //
     // power law transform
     //
 
     for (size_t i = 0; i < cur_p->size; ++i) {
-        float p                   = cur_p->data[i].p;
-        float normalized_distance = std::abs(p - computed_target) / distribution_width;
-        cur_p->data[i].logit      = peak_logit_value / (1.0f + std::pow(normalized_distance, tail_heaviness));
+        float p = cur_p->data[i].p;
+        fprintf(stderr, "power-law: transform: p = %.3f\n", p); fflush(stderr);
+        float normed_distance = std::abs(p - computed_target) / distribution_width;
+        fprintf(stderr, "power-law: transform: normed_distance = %.3f\n", normed_distance); fflush(stderr);
+        float new_p = peak_logit_value / (1.0f + std::pow(normed_distance, tail_heaviness));
+        fprintf(stderr, "power-law: transform: new_p = %.3f\n", new_p); fflush(stderr);
+        cur_p->data[i].logit = new_p;
     }
 
     llama_sampler_softmax_impl(cur_p, false);
@@ -2419,6 +2430,7 @@ static void llama_sampler_power_law_apply(struct llama_sampler * smpl, llama_tok
     // sample from transformed distribution
     const int idx   = llama_sample_dist(cur_p, ctx->rng);
     cur_p->selected = idx;
+    fprintf(stderr, "power-law: selected token %d\n", idx); fflush(stderr);
 
     // update running history with the original probability of the selected token
     float original_p  = original_probs[idx];

From 2a3f579d1ffcd2dffeb60ea21e7a4ceba6d15e22 Mon Sep 17 00:00:00 2001
From: ddh0 <chemist-mulches-39@icloud.com>
Date: Sun, 14 Dec 2025 01:55:02 -0600
Subject: [PATCH 24/38] does this fix it?

---
 src/llama-sampling.cpp | 49 ++++++++++++++++++------------------------
 1 file changed, 21 insertions(+), 28 deletions(-)

diff --git a/src/llama-sampling.cpp b/src/llama-sampling.cpp
index ae3e269ea2..26135a4f82 100644
--- a/src/llama-sampling.cpp
+++ b/src/llama-sampling.cpp
@@ -2358,23 +2358,20 @@ static const char * llama_sampler_power_law_name(const struct llama_sampler * /*
     return "power-law";
 }
 
-// compute the adaptive target probability for the current sampling step
-static float llama_sampler_power_law_compute_target(const llama_sampler_power_law * ctx, float decay) {
+// compute the adapted target probability for the current sampling step
+static float llama_sampler_power_law_compute_target(const llama_sampler_power_law * ctx) {
+    const float base_target = ctx->target;
     if (ctx->total_weight == 0.0f) {
-        // if there is no history, just use base target
-        fprintf(stderr, "power-law: compute_target: total_weight == 0.0 (target fixed at %.3f)\n", ctx->target);
-        fflush(stderr);
-        return ctx->target;
+        fprintf(stderr, "power-law: compute_target: total_weight == 0.0 (target fixed at %.3f)\n", base_target);
+        return base_target;
     }
+    float target = 2.0f * base_target - (ctx->weighted_sum / ctx->total_weight);
+    fprintf(stderr, "power-law: compute_target: target = %.3f\n", target);
 
-    // maintain a running weighted sum with exponential decay
-    float new_total_weight = 1.0f + decay * ctx->total_weight;
-    fprintf(stderr, "power-law: compute_target: new_total_weight = %.3f\n", new_total_weight); fflush(stderr);
-    float next_value = ctx->target * new_total_weight - decay * ctx->weighted_sum;
-    fprintf(stderr, "power-law: compute_target: next_value = %.3f\n", next_value); fflush(stderr);
-
-    // clamp to [0.0, 1.0]
-    return std::max(0.0f, std::min(next_value, 1.0f));
+    // clamp result to [0.0, 1.0]
+    target = std::max(0.0f, std::min(target, 1.0f));
+    fprintf(stderr, "power-law: compute_target: target (post-clamp) = %.3f\n", target); fflush(stderr);
+    return target;
 }
 
 static void llama_sampler_power_law_apply(struct llama_sampler * smpl, llama_token_data_array * cur_p) {
@@ -2393,11 +2390,6 @@ static void llama_sampler_power_law_apply(struct llama_sampler * smpl, llama_tok
     const float decay = std::min(ctx->decay, 0.99f);
     fprintf(stderr, "power-law: decay = %.3f\n", decay); fflush(stderr);
 
-    // fixed power law transform parameters
-    const float distribution_width = 0.3f;
-    const float peak_logit_value   = 5.0f;
-    const float tail_heaviness     = 2.0f;
-
     // get the original probabilities
     llama_sampler_softmax_impl(cur_p, false);
 
@@ -2408,21 +2400,22 @@ static void llama_sampler_power_law_apply(struct llama_sampler * smpl, llama_tok
         original_probs.push_back(cur_p->data[i].p);
     }
 
-    float computed_target = llama_sampler_power_law_compute_target(ctx, decay);
+    float computed_target = llama_sampler_power_law_compute_target(ctx);
     fprintf(stderr, "power-law: computed_target = %.3f\n", computed_target); fflush(stderr);
 
     //
     // power law transform
     //
 
+    // transformation constants
+    const float distribution_width = 0.3f;
+    const float peak_logit_value   = 5.0f;
+
+    const float inv_width = 1.0f / distribution_width;
+
     for (size_t i = 0; i < cur_p->size; ++i) {
-        float p = cur_p->data[i].p;
-        fprintf(stderr, "power-law: transform: p = %.3f\n", p); fflush(stderr);
-        float normed_distance = std::abs(p - computed_target) / distribution_width;
-        fprintf(stderr, "power-law: transform: normed_distance = %.3f\n", normed_distance); fflush(stderr);
-        float new_p = peak_logit_value / (1.0f + std::pow(normed_distance, tail_heaviness));
-        fprintf(stderr, "power-law: transform: new_p = %.3f\n", new_p); fflush(stderr);
-        cur_p->data[i].logit = new_p;
+        float dist = (cur_p->data[i].p - computed_target) * inv_width;
+        cur_p->data[i].logit = peak_logit_value / (1.0f + dist * dist);
     }
 
     llama_sampler_softmax_impl(cur_p, false);
@@ -2430,7 +2423,7 @@ static void llama_sampler_power_law_apply(struct llama_sampler * smpl, llama_tok
     // sample from transformed distribution
     const int idx   = llama_sample_dist(cur_p, ctx->rng);
     cur_p->selected = idx;
-    fprintf(stderr, "power-law: selected token %d\n", idx); fflush(stderr);
+    fprintf(stderr, "power-law: selected token at index %d\n", idx); fflush(stderr);
 
     // update running history with the original probability of the selected token
     float original_p  = original_probs[idx];

From ec54fe5f1445e982e68b6a9c05975de1310719e8 Mon Sep 17 00:00:00 2001
From: ddh0 <chemist-mulches-39@icloud.com>
Date: Sun, 14 Dec 2025 02:54:14 -0600
Subject: [PATCH 25/38] no, but does this?

---
 common/arg.cpp         | 10 ++++------
 src/llama-sampling.cpp |  3 +++
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/common/arg.cpp b/common/arg.cpp
index 919e37b7f8..e7bb44f8f5 100644
--- a/common/arg.cpp
+++ b/common/arg.cpp
@@ -1569,12 +1569,10 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
     ).set_sparam());
     add_opt(common_arg(
         {"--power-law-decay"}, "N",
-        string_format("power law sampler: decay rate for target adaptation over time. lower "
-                      "values -> faster but less stable adaptation. "
-                      "(valid range 0.0 to 1.0; ≤0 = no adaptation) (default: %.2f)",
-                      (double)params.sampling.power_law_decay),
-        [](common_params & params, int value) {
-            params.sampling.power_law_decay = value;
+        string_format("decay rate for target adaptation over time. lower values -> faster but less stable adaptation.\n"
+        "(valid range 0.0 to 1.0; ≤0 = no adaptation) (default: %.2f)", (double)params.sampling.power_law_decay),
+        [](common_params & params, const std::string & value) {
+            params.sampling.power_law_decay = std::stof(value);
         }
     ).set_sparam());
     add_opt(common_arg(
diff --git a/src/llama-sampling.cpp b/src/llama-sampling.cpp
index 26135a4f82..6beb927a6c 100644
--- a/src/llama-sampling.cpp
+++ b/src/llama-sampling.cpp
@@ -2427,8 +2427,11 @@ static void llama_sampler_power_law_apply(struct llama_sampler * smpl, llama_tok
 
     // update running history with the original probability of the selected token
     float original_p  = original_probs[idx];
+    fprintf(stderr, "power-law: original prob was %.3f\n", original_p); fflush(stderr);
     ctx->weighted_sum = original_p + decay * ctx->weighted_sum;
+    fprintf(stderr, "power-law: updated ctx->weighted_sum = %.3f\n", ctx->weighted_sum); fflush(stderr);
     ctx->total_weight = 1.0f + decay * ctx->total_weight;
+    fprintf(stderr, "power-law: updated ctx->total_weight = %.3f\n", ctx->total_weight); fflush(stderr);
 }
 
 static void llama_sampler_power_law_reset(struct llama_sampler * smpl) {

From 667b70fdac1054401f6ab278fba99a90bcf5253b Mon Sep 17 00:00:00 2001
From: ddh0 <chemist-mulches-39@icloud.com>
Date: Sun, 14 Dec 2025 03:41:28 -0600
Subject: [PATCH 26/38] update default decay

---
 common/common.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/common/common.h b/common/common.h
index 66a6ca96b3..7fe62b4111 100644
--- a/common/common.h
+++ b/common/common.h
@@ -185,7 +185,7 @@ struct common_params_sampling {
     int32_t dry_allowed_length = 2;      // tokens extending repetitions beyond this receive penalty
     int32_t dry_penalty_last_n = -1;     // how many tokens to scan for repetitions (0 = disable penalty, -1 = context size)
     float   power_law_target   = -1.0f;  // select tokens near this probability (valid range 0.0 to 1.0; <0 = disabled)
-    float   power_law_decay    = 0.9f;   // decay rate for target adaptation over time. lower values -> faster but less stable adaptation. (valid range 0.0 to 1.0; ≤0 = no adaptation)
+    float   power_law_decay    = 0.50f;  // decay rate for target adaptation over time. lower values -> faster but less stable adaptation. (valid range 0.0 to 1.0; ≤0 = no adaptation)
     int32_t mirostat           = 0;      // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
     float   top_n_sigma        = -1.00f; // -1.0 = disabled
     float   mirostat_tau       = 5.00f;  // target entropy

From 693478066981b41f3e3b7a714c9327310a87dfc2 Mon Sep 17 00:00:00 2001
From: ddh0 <chemist-mulches-39@icloud.com>
Date: Sun, 14 Dec 2025 16:26:15 -0600
Subject: [PATCH 27/38] optimize

---
 src/llama-sampling.cpp | 53 ++++++++++++++++++++----------------------
 1 file changed, 25 insertions(+), 28 deletions(-)

diff --git a/src/llama-sampling.cpp b/src/llama-sampling.cpp
index 6beb927a6c..78fe7706b9 100644
--- a/src/llama-sampling.cpp
+++ b/src/llama-sampling.cpp
@@ -2349,11 +2349,18 @@ struct llama_sampler_power_law {
     std::mt19937   rng;
 
     // historical token probabilities weighted by recency
-    float weighted_sum;
+    float              weighted_sum;
     // sum of weights, converges to 1/(1-decay)
-    float total_weight;
+    float              total_weight;
+    // used to store original token probabilities (needed for history update after selection)
+    std::vector<float> original_probs;
 };
 
+// transformation constants
+static constexpr float DISTRIBUTION_WIDTH = 0.3f;
+static constexpr float PEAK_LOGIT_VALUE   = 5.0f;
+static constexpr float INV_WIDTH          = 1.0f / DISTRIBUTION_WIDTH;
+
 static const char * llama_sampler_power_law_name(const struct llama_sampler * /*smpl*/) {
     return "power-law";
 }
@@ -2369,7 +2376,7 @@ static float llama_sampler_power_law_compute_target(const llama_sampler_power_la
     fprintf(stderr, "power-law: compute_target: target = %.3f\n", target);
 
     // clamp result to [0.0, 1.0]
-    target = std::max(0.0f, std::min(target, 1.0f));
+    target = std::clamp(target, 0.0f, 1.0f);
     fprintf(stderr, "power-law: compute_target: target (post-clamp) = %.3f\n", target); fflush(stderr);
     return target;
 }
@@ -2379,43 +2386,32 @@ static void llama_sampler_power_law_apply(struct llama_sampler * smpl, llama_tok
 
     if (ctx->target < 0.0f) {
         // no-op: just sample from the distribution as-is
-        fprintf(stderr, "power-law: no-op!"); fflush(stderr);
+        fprintf(stderr, "power-law: no-op!");
         llama_sampler_softmax_impl(cur_p, false);
         const int idx = llama_sample_dist(cur_p, ctx->rng);
         cur_p->selected = idx;
         return;
     }
 
-    // clamp decay to avoid degenerate case at 1.0 (unbounded accumulation)
-    const float decay = std::min(ctx->decay, 0.99f);
-    fprintf(stderr, "power-law: decay = %.3f\n", decay); fflush(stderr);
-
     // get the original probabilities
     llama_sampler_softmax_impl(cur_p, false);
 
-    // store the original probabilities (needed for history update after selection)
-    std::vector<float> original_probs;
-    original_probs.reserve(cur_p->size);
+    // store the original probabilities
+    ctx->original_probs.resize(cur_p->size);
     for (size_t i = 0; i < cur_p->size; ++i) {
-        original_probs.push_back(cur_p->data[i].p);
+        ctx->original_probs[i] = cur_p->data[i].p;
     }
 
     float computed_target = llama_sampler_power_law_compute_target(ctx);
-    fprintf(stderr, "power-law: computed_target = %.3f\n", computed_target); fflush(stderr);
+    fprintf(stderr, "power-law: computed_target = %.3f\n", computed_target);
 
     //
     // power law transform
     //
 
-    // transformation constants
-    const float distribution_width = 0.3f;
-    const float peak_logit_value   = 5.0f;
-
-    const float inv_width = 1.0f / distribution_width;
-
     for (size_t i = 0; i < cur_p->size; ++i) {
-        float dist = (cur_p->data[i].p - computed_target) * inv_width;
-        cur_p->data[i].logit = peak_logit_value / (1.0f + dist * dist);
+        float dist = (cur_p->data[i].p - computed_target) * INV_WIDTH;
+        cur_p->data[i].logit = PEAK_LOGIT_VALUE / (1.0f + dist * dist);
     }
 
     llama_sampler_softmax_impl(cur_p, false);
@@ -2423,14 +2419,14 @@ static void llama_sampler_power_law_apply(struct llama_sampler * smpl, llama_tok
     // sample from transformed distribution
     const int idx   = llama_sample_dist(cur_p, ctx->rng);
     cur_p->selected = idx;
-    fprintf(stderr, "power-law: selected token at index %d\n", idx); fflush(stderr);
+    fprintf(stderr, "power-law: selected token at index %d\n", idx);
 
     // update running history with the original probability of the selected token
-    float original_p  = original_probs[idx];
-    fprintf(stderr, "power-law: original prob was %.3f\n", original_p); fflush(stderr);
-    ctx->weighted_sum = original_p + decay * ctx->weighted_sum;
-    fprintf(stderr, "power-law: updated ctx->weighted_sum = %.3f\n", ctx->weighted_sum); fflush(stderr);
-    ctx->total_weight = 1.0f + decay * ctx->total_weight;
+    float original_p  = ctx->original_probs[idx];
+    fprintf(stderr, "power-law: original prob was %.3f\n", original_p);
+    ctx->weighted_sum = original_p + ctx->decay * ctx->weighted_sum;
+    fprintf(stderr, "power-law: updated ctx->weighted_sum = %.3f\n", ctx->weighted_sum);
+    ctx->total_weight = 1.0f + ctx->decay * ctx->total_weight;
     fprintf(stderr, "power-law: updated ctx->total_weight = %.3f\n", ctx->total_weight); fflush(stderr);
 }
 
@@ -2448,6 +2444,7 @@ static struct llama_sampler * llama_sampler_power_law_clone(const struct llama_s
     result_ctx->rng          = ctx->rng;
     result_ctx->weighted_sum = ctx->weighted_sum;
     result_ctx->total_weight = ctx->total_weight;
+    result_ctx->original_probs.reserve(ctx->original_probs.capacity());
 
     return result;
 }
@@ -2475,7 +2472,7 @@ struct llama_sampler * llama_sampler_init_power_law(
         /* .iface = */ &llama_sampler_power_law_i,
         /* .ctx   = */ new llama_sampler_power_law {
             /* .target       = */ target,
-            /* .decay        = */ decay,
+            /* .decay        = */ std::min(decay, 0.99f),
             /* .seed         = */ seed_cur,
             /* .rng          = */ std::mt19937(seed_cur),
             /* .weighted_sum = */ 0.0f,

From f5d08724e75d3f41d4737c333349e03b21baa704 Mon Sep 17 00:00:00 2001
From: ddh0 <chemist-mulches-39@icloud.com>
Date: Sun, 14 Dec 2025 21:51:59 -0600
Subject: [PATCH 28/38] fix bad merge

my git skills are lacking
---
 common/sampling.cpp | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/common/sampling.cpp b/common/sampling.cpp
index ee58aa50b3..1e26f44a6c 100644
--- a/common/sampling.cpp
+++ b/common/sampling.cpp
@@ -253,8 +253,6 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co
                         for (const auto & str : params.dry_sequence_breakers) {
                             c_breakers.push_back(str.c_str());
                         }
-
-                        llama_sampler_chain_add(result->chain, llama_sampler_init_dry     (vocab, llama_model_n_ctx_train(model), params.dry_multiplier, params.dry_base, params.dry_allowed_length, params.dry_penalty_last_n, c_breakers.data(), c_breakers.size()));
                         samplers.push_back(llama_sampler_init_dry    (vocab, llama_model_n_ctx_train(model), params.dry_multiplier, params.dry_base, params.dry_allowed_length, params.dry_penalty_last_n, c_breakers.data(), c_breakers.size()));
                     }
                     break;
@@ -286,7 +284,6 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co
                     samplers.push_back(llama_sampler_init_penalties  (params.penalty_last_n, params.penalty_repeat, params.penalty_freq, params.penalty_present));
                     break;
                 case COMMON_SAMPLER_TYPE_POWER_LAW:
-                    llama_sampler_chain_add(result->chain, llama_sampler_init_power_law   (params.power_law_target, params.power_law_decay, params.seed));
                     has_distribution_sampler = true;
                     break;
                 default:
@@ -295,10 +292,8 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co
         }
         // only add `dist` to the end of the chain if no other distribution samplers were added
         if (!has_distribution_sampler) {
-            llama_sampler_chain_add(result->chain, llama_sampler_init_dist(params.seed));
+            samplers.push_back(llama_sampler_init_dist(params.seed));
         }
-
-        samplers.push_back(llama_sampler_init_dist(params.seed));
     } else if (params.mirostat == 1) {
         samplers.push_back(llama_sampler_init_temp(params.temp));
         samplers.push_back(llama_sampler_init_mirostat(llama_vocab_n_tokens(vocab), params.seed, params.mirostat_tau, params.mirostat_eta, 100));

From 493bf301ff861cc1ce52dc86e8204954c98f8f80 Mon Sep 17 00:00:00 2001
From: ddh0 <chemist-mulches-39@icloud.com>
Date: Sun, 14 Dec 2025 21:55:45 -0600
Subject: [PATCH 29/38] silence `missing initializer for member`

---
 src/llama-sampling.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/llama-sampling.cpp b/src/llama-sampling.cpp
index 78fe7706b9..e044ef5898 100644
--- a/src/llama-sampling.cpp
+++ b/src/llama-sampling.cpp
@@ -2477,6 +2477,7 @@ struct llama_sampler * llama_sampler_init_power_law(
             /* .rng          = */ std::mt19937(seed_cur),
             /* .weighted_sum = */ 0.0f,
             /* .total_weight = */ 0.0f,
+            /* .original_probs = */ {},
         }
     );
 }

From 68543257e944acf75f2483619c54638ee46a3901 Mon Sep 17 00:00:00 2001
From: ddh0 <chemist-mulches-39@icloud.com>
Date: Sun, 14 Dec 2025 22:03:17 -0600
Subject: [PATCH 30/38] update default decay to 0.9

---
 common/common.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/common/common.h b/common/common.h
index 7231cbc5b8..4cc909beeb 100644
--- a/common/common.h
+++ b/common/common.h
@@ -185,7 +185,7 @@ struct common_params_sampling {
     int32_t dry_allowed_length = 2;      // tokens extending repetitions beyond this receive penalty
     int32_t dry_penalty_last_n = -1;     // how many tokens to scan for repetitions (0 = disable penalty, -1 = context size)
     float   power_law_target   = -1.0f;  // select tokens near this probability (valid range 0.0 to 1.0; <0 = disabled)
-    float   power_law_decay    = 0.50f;  // decay rate for target adaptation over time. lower values -> faster but less stable adaptation. (valid range 0.0 to 1.0; ≤0 = no adaptation)
+    float   power_law_decay    = 0.90f;  // decay rate for target adaptation over time. lower values -> faster but less stable adaptation. (valid range 0.0 to 1.0; ≤0 = no adaptation)
     int32_t mirostat           = 0;      // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
     float   top_n_sigma        = -1.00f; // -1.0 = disabled
     float   mirostat_tau       = 5.00f;  // target entropy

From b5ed673ce92fdc9753679742ef28a218b5df1e68 Mon Sep 17 00:00:00 2001
From: ddh0 <chemist-mulches-39@icloud.com>
Date: Sun, 14 Dec 2025 22:08:36 -0600
Subject: [PATCH 31/38] fix logging

---
 src/llama-sampling.cpp | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/llama-sampling.cpp b/src/llama-sampling.cpp
index e044ef5898..1c1febee2d 100644
--- a/src/llama-sampling.cpp
+++ b/src/llama-sampling.cpp
@@ -2386,17 +2386,15 @@ static void llama_sampler_power_law_apply(struct llama_sampler * smpl, llama_tok
 
     if (ctx->target < 0.0f) {
         // no-op: just sample from the distribution as-is
-        fprintf(stderr, "power-law: no-op!");
+        fprintf(stderr, "power-law: no-op!\n"); fflush(stderr);
         llama_sampler_softmax_impl(cur_p, false);
         const int idx = llama_sample_dist(cur_p, ctx->rng);
         cur_p->selected = idx;
         return;
     }
 
-    // get the original probabilities
+    // softmax and store the original probabilities
     llama_sampler_softmax_impl(cur_p, false);
-
-    // store the original probabilities
     ctx->original_probs.resize(cur_p->size);
     for (size_t i = 0; i < cur_p->size; ++i) {
         ctx->original_probs[i] = cur_p->data[i].p;
@@ -2409,6 +2407,7 @@ static void llama_sampler_power_law_apply(struct llama_sampler * smpl, llama_tok
     // power law transform
     //
 
+    fprintf(stderr, "power-law: transform: cur_p->size = %.3f\n", cur_p->size);
     for (size_t i = 0; i < cur_p->size; ++i) {
         float dist = (cur_p->data[i].p - computed_target) * INV_WIDTH;
         cur_p->data[i].logit = PEAK_LOGIT_VALUE / (1.0f + dist * dist);

From 4e28eb2ffe9d052132f9daa4e5b0d73dec27bb0a Mon Sep 17 00:00:00 2001
From: ddh0 <chemist-mulches-39@icloud.com>
Date: Sun, 14 Dec 2025 22:11:34 -0600
Subject: [PATCH 32/38] format (double)

---
 src/llama-sampling.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/llama-sampling.cpp b/src/llama-sampling.cpp
index 1c1febee2d..f255340837 100644
--- a/src/llama-sampling.cpp
+++ b/src/llama-sampling.cpp
@@ -2407,7 +2407,7 @@ static void llama_sampler_power_law_apply(struct llama_sampler * smpl, llama_tok
     // power law transform
     //
 
-    fprintf(stderr, "power-law: transform: cur_p->size = %.3f\n", cur_p->size);
+    fprintf(stderr, "power-law: transform: cur_p->size = %.3f\n", (double)cur_p->size);
     for (size_t i = 0; i < cur_p->size; ++i) {
         float dist = (cur_p->data[i].p - computed_target) * INV_WIDTH;
         cur_p->data[i].logit = PEAK_LOGIT_VALUE / (1.0f + dist * dist);

From 1c58e9a96a3060e907a60cfae41c837b6f46e2ea Mon Sep 17 00:00:00 2001
From: ddh0 <chemist-mulches-39@icloud.com>
Date: Sun, 14 Dec 2025 22:32:27 -0600
Subject: [PATCH 33/38] add power law to the new `samplers` vector

---
 common/sampling.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/common/sampling.cpp b/common/sampling.cpp
index 1e26f44a6c..05e44170e4 100644
--- a/common/sampling.cpp
+++ b/common/sampling.cpp
@@ -285,6 +285,7 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co
                     break;
                 case COMMON_SAMPLER_TYPE_POWER_LAW:
                     has_distribution_sampler = true;
+                    samplers.push_back(llama_sampler_init_power_law  (params.power_law_target, params.power_law_decay, params.seed));
                     break;
                 default:
                     GGML_ASSERT(false && "unknown sampler type");

From 4e04bd1ce21f6ec85897e89058866f18d4214b3a Mon Sep 17 00:00:00 2001
From: ddh0 <chemist-mulches-39@icloud.com>
Date: Sun, 14 Dec 2025 23:14:51 -0600
Subject: [PATCH 34/38] log sampler init values

---
 src/llama-sampling.cpp | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/src/llama-sampling.cpp b/src/llama-sampling.cpp
index f255340837..cf235b57d4 100644
--- a/src/llama-sampling.cpp
+++ b/src/llama-sampling.cpp
@@ -2466,16 +2466,19 @@ struct llama_sampler * llama_sampler_init_power_law(
     float    decay,
     uint32_t seed
 ) {
+    const float _decay = std::min(decay, 0.99f);
+    fprintf(stderr, "power-law: init: target %.3f, decay %.3f\n", (double)target, (double)_decay);
+    fflush(stderr);
     auto seed_cur = get_rng_seed(seed);
     return llama_sampler_init(
         /* .iface = */ &llama_sampler_power_law_i,
         /* .ctx   = */ new llama_sampler_power_law {
-            /* .target       = */ target,
-            /* .decay        = */ std::min(decay, 0.99f),
-            /* .seed         = */ seed_cur,
-            /* .rng          = */ std::mt19937(seed_cur),
-            /* .weighted_sum = */ 0.0f,
-            /* .total_weight = */ 0.0f,
+            /* .target         = */ target,
+            /* .decay          = */ _decay,
+            /* .seed           = */ seed_cur,
+            /* .rng            = */ std::mt19937(seed_cur),
+            /* .weighted_sum   = */ 0.0f,
+            /* .total_weight   = */ 0.0f,
             /* .original_probs = */ {},
         }
     );

From 9c50b573f5e316037700d2fd548adc8a81074d6c Mon Sep 17 00:00:00 2001
From: ddh0 <chemist-mulches-39@icloud.com>
Date: Mon, 15 Dec 2025 09:25:05 -0600
Subject: [PATCH 35/38] improve logging messages in llama_sampler_power_law

---
 src/llama-sampling.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/llama-sampling.cpp b/src/llama-sampling.cpp
index cf235b57d4..dc827fe219 100644
--- a/src/llama-sampling.cpp
+++ b/src/llama-sampling.cpp
@@ -2369,15 +2369,15 @@ static const char * llama_sampler_power_law_name(const struct llama_sampler * /*
 static float llama_sampler_power_law_compute_target(const llama_sampler_power_law * ctx) {
     const float base_target = ctx->target;
     if (ctx->total_weight == 0.0f) {
-        fprintf(stderr, "power-law: compute_target: total_weight == 0.0 (target fixed at %.3f)\n", base_target);
+        fprintf(stderr, "power-law: compute_target: total_weight == 0.0 (target fixed at %.3f)\n", base_target); fflush(stderr);
         return base_target;
     }
     float target = 2.0f * base_target - (ctx->weighted_sum / ctx->total_weight);
-    fprintf(stderr, "power-law: compute_target: target = %.3f\n", target);
+    fprintf(stderr, "power-law: compute_target: raw target = %.3f\n", target);
 
     // clamp result to [0.0, 1.0]
     target = std::clamp(target, 0.0f, 1.0f);
-    fprintf(stderr, "power-law: compute_target: target (post-clamp) = %.3f\n", target); fflush(stderr);
+    fprintf(stderr, "power-law: compute_target: clamped target = %.3f\n", target); fflush(stderr);
     return target;
 }
 
@@ -2407,7 +2407,7 @@ static void llama_sampler_power_law_apply(struct llama_sampler * smpl, llama_tok
     // power law transform
     //
 
-    fprintf(stderr, "power-law: transform: cur_p->size = %.3f\n", (double)cur_p->size);
+    fprintf(stderr, "power-law: transform: cur_p->size = %d\n", (size_t)cur_p->size);
     for (size_t i = 0; i < cur_p->size; ++i) {
         float dist = (cur_p->data[i].p - computed_target) * INV_WIDTH;
         cur_p->data[i].logit = PEAK_LOGIT_VALUE / (1.0f + dist * dist);

From 0344068cf112e524eb3fbdbd58c171870b63e56c Mon Sep 17 00:00:00 2001
From: ddh0 <chemist-mulches-39@icloud.com>
Date: Mon, 15 Dec 2025 09:35:44 -0600
Subject: [PATCH 36/38] remove extraneous logging

---
 src/llama-sampling.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/llama-sampling.cpp b/src/llama-sampling.cpp
index dc827fe219..7b48e5d970 100644
--- a/src/llama-sampling.cpp
+++ b/src/llama-sampling.cpp
@@ -2401,7 +2401,6 @@ static void llama_sampler_power_law_apply(struct llama_sampler * smpl, llama_tok
     }
 
     float computed_target = llama_sampler_power_law_compute_target(ctx);
-    fprintf(stderr, "power-law: computed_target = %.3f\n", computed_target);
 
     //
     // power law transform

From 1c2d2e900d487d70f704441bafe9ac87afd89d6f Mon Sep 17 00:00:00 2001
From: ddh0 <chemist-mulches-39@icloud.com>
Date: Mon, 15 Dec 2025 21:02:11 -0600
Subject: [PATCH 37/38] simplify target computation

last commit with debug logging!
---
 src/llama-sampling.cpp | 26 +++++++-------------------
 1 file changed, 7 insertions(+), 19 deletions(-)

diff --git a/src/llama-sampling.cpp b/src/llama-sampling.cpp
index 7b48e5d970..7684c8f38c 100644
--- a/src/llama-sampling.cpp
+++ b/src/llama-sampling.cpp
@@ -2365,22 +2365,6 @@ static const char * llama_sampler_power_law_name(const struct llama_sampler * /*
     return "power-law";
 }
 
-// compute the adapted target probability for the current sampling step
-static float llama_sampler_power_law_compute_target(const llama_sampler_power_law * ctx) {
-    const float base_target = ctx->target;
-    if (ctx->total_weight == 0.0f) {
-        fprintf(stderr, "power-law: compute_target: total_weight == 0.0 (target fixed at %.3f)\n", base_target); fflush(stderr);
-        return base_target;
-    }
-    float target = 2.0f * base_target - (ctx->weighted_sum / ctx->total_weight);
-    fprintf(stderr, "power-law: compute_target: raw target = %.3f\n", target);
-
-    // clamp result to [0.0, 1.0]
-    target = std::clamp(target, 0.0f, 1.0f);
-    fprintf(stderr, "power-law: compute_target: clamped target = %.3f\n", target); fflush(stderr);
-    return target;
-}
-
 static void llama_sampler_power_law_apply(struct llama_sampler * smpl, llama_token_data_array * cur_p) {
     auto * ctx = (llama_sampler_power_law *) smpl->ctx;
 
@@ -2400,13 +2384,18 @@ static void llama_sampler_power_law_apply(struct llama_sampler * smpl, llama_tok
         ctx->original_probs[i] = cur_p->data[i].p;
     }
 
-    float computed_target = llama_sampler_power_law_compute_target(ctx);
+    // compute the adapted target probability for the current sampling step
+    float computed_target = std::clamp(
+        ctx->total_weight == 0.0f ? ctx->target : 2.0f * ctx->target - (ctx->weighted_sum / ctx->total_weight),
+        0.0f, 1.0f
+    );
+    fprintf(stderr, "power-law: computed target = %.3f\n", computed_target);
 
     //
     // power law transform
     //
 
-    fprintf(stderr, "power-law: transform: cur_p->size = %d\n", (size_t)cur_p->size);
+    fprintf(stderr, "power-law: cur_p->size = %d\n", (int)cur_p->size);
     for (size_t i = 0; i < cur_p->size; ++i) {
         float dist = (cur_p->data[i].p - computed_target) * INV_WIDTH;
         cur_p->data[i].logit = PEAK_LOGIT_VALUE / (1.0f + dist * dist);
@@ -2421,7 +2410,6 @@ static void llama_sampler_power_law_apply(struct llama_sampler * smpl, llama_tok
 
     // update running history with the original probability of the selected token
     float original_p  = ctx->original_probs[idx];
-    fprintf(stderr, "power-law: original prob was %.3f\n", original_p);
     ctx->weighted_sum = original_p + ctx->decay * ctx->weighted_sum;
     fprintf(stderr, "power-law: updated ctx->weighted_sum = %.3f\n", ctx->weighted_sum);
     ctx->total_weight = 1.0f + ctx->decay * ctx->total_weight;

From fcb512908630db298337c3ad13361e4493f1fb8b Mon Sep 17 00:00:00 2001
From: ddh0 <chemist-mulches-39@icloud.com>
Date: Mon, 15 Dec 2025 21:42:29 -0600
Subject: [PATCH 38/38] remove debug logging, explicitly clamp params at init

---
 src/llama-sampling.cpp | 24 +++++-------------------
 1 file changed, 5 insertions(+), 19 deletions(-)

diff --git a/src/llama-sampling.cpp b/src/llama-sampling.cpp
index 7684c8f38c..77ec141a56 100644
--- a/src/llama-sampling.cpp
+++ b/src/llama-sampling.cpp
@@ -2370,10 +2370,8 @@ static void llama_sampler_power_law_apply(struct llama_sampler * smpl, llama_tok
 
     if (ctx->target < 0.0f) {
         // no-op: just sample from the distribution as-is
-        fprintf(stderr, "power-law: no-op!\n"); fflush(stderr);
         llama_sampler_softmax_impl(cur_p, false);
-        const int idx = llama_sample_dist(cur_p, ctx->rng);
-        cur_p->selected = idx;
+        cur_p->selected = llama_sample_dist(cur_p, ctx->rng);
         return;
     }
 
@@ -2389,13 +2387,8 @@ static void llama_sampler_power_law_apply(struct llama_sampler * smpl, llama_tok
         ctx->total_weight == 0.0f ? ctx->target : 2.0f * ctx->target - (ctx->weighted_sum / ctx->total_weight),
         0.0f, 1.0f
     );
-    fprintf(stderr, "power-law: computed target = %.3f\n", computed_target);
 
-    //
     // power law transform
-    //
-
-    fprintf(stderr, "power-law: cur_p->size = %d\n", (int)cur_p->size);
     for (size_t i = 0; i < cur_p->size; ++i) {
         float dist = (cur_p->data[i].p - computed_target) * INV_WIDTH;
         cur_p->data[i].logit = PEAK_LOGIT_VALUE / (1.0f + dist * dist);
@@ -2406,14 +2399,10 @@ static void llama_sampler_power_law_apply(struct llama_sampler * smpl, llama_tok
     // sample from transformed distribution
     const int idx   = llama_sample_dist(cur_p, ctx->rng);
     cur_p->selected = idx;
-    fprintf(stderr, "power-law: selected token at index %d\n", idx);
 
     // update running history with the original probability of the selected token
-    float original_p  = ctx->original_probs[idx];
-    ctx->weighted_sum = original_p + ctx->decay * ctx->weighted_sum;
-    fprintf(stderr, "power-law: updated ctx->weighted_sum = %.3f\n", ctx->weighted_sum);
-    ctx->total_weight = 1.0f + ctx->decay * ctx->total_weight;
-    fprintf(stderr, "power-law: updated ctx->total_weight = %.3f\n", ctx->total_weight); fflush(stderr);
+    ctx->weighted_sum = ctx->original_probs[idx] + ctx->decay * ctx->weighted_sum;
+    ctx->total_weight = 1.0f + ctx->decay * ctx->total_weight; // history fades over time
 }
 
 static void llama_sampler_power_law_reset(struct llama_sampler * smpl) {
@@ -2453,15 +2442,12 @@ struct llama_sampler * llama_sampler_init_power_law(
     float    decay,
     uint32_t seed
 ) {
-    const float _decay = std::min(decay, 0.99f);
-    fprintf(stderr, "power-law: init: target %.3f, decay %.3f\n", (double)target, (double)_decay);
-    fflush(stderr);
     auto seed_cur = get_rng_seed(seed);
     return llama_sampler_init(
         /* .iface = */ &llama_sampler_power_law_i,
         /* .ctx   = */ new llama_sampler_power_law {
-            /* .target         = */ target,
-            /* .decay          = */ _decay,
+            /* .target         = */ std::clamp(target, 0.0f, 1.0f),
+            /* .decay          = */ std::clamp(decay, 0.0f, 0.99f),
             /* .seed           = */ seed_cur,
             /* .rng            = */ std::mt19937(seed_cur),
             /* .weighted_sum   = */ 0.0f,