add params to `struct common_params_sampling`, add reference to PR

2025-12-11 13:47:51 -06:00 · 2025-12-11 13:47:51 -06:00 · 88fb0f3f32
parent 66e2d17c7f
commit 88fb0f3f32
3 changed files with 39 additions and 34 deletions
--- a/common/common.h
+++ b/common/common.h
@ -116,6 +116,7 @@ enum common_sampler_type {
    COMMON_SAMPLER_TYPE_INFILL      = 9,
    COMMON_SAMPLER_TYPE_PENALTIES   = 10,
    COMMON_SAMPLER_TYPE_TOP_N_SIGMA = 11,
+    COMMON_SAMPLER_TYPE_POWER_LAW   = 12,
 };

 // dimensionality reduction methods, used by cvector-generator
@ -163,33 +164,36 @@ enum common_params_sampling_config : uint64_t {
 struct common_params_sampling {
    uint32_t seed = LLAMA_DEFAULT_SEED; // the seed used to initialize llama_sampler

-    int32_t n_prev             = 64;    // number of previous tokens to remember
-    int32_t n_probs            = 0;     // if greater than 0, output the probabilities of top n_probs tokens.
-    int32_t min_keep           = 0;     // 0 = disabled, otherwise samplers should return at least min_keep tokens
-    int32_t top_k              = 40;    // <= 0 to use vocab size
-    float   top_p              = 0.95f; // 1.0 = disabled
-    float   min_p              = 0.05f; // 0.0 = disabled
-    float   xtc_probability    = 0.00f; // 0.0 = disabled
-    float   xtc_threshold      = 0.10f; // > 0.5 disables XTC
-    float   typ_p              = 1.00f; // typical_p, 1.0 = disabled
-    float   temp               = 0.80f; // <= 0.0 to sample greedily, 0.0 to not output probabilities
-    float   dynatemp_range     = 0.00f; // 0.0 = disabled
-    float   dynatemp_exponent  = 1.00f; // controls how entropy maps to temperature in dynamic temperature sampler
-    int32_t penalty_last_n     = 64;    // last n tokens to penalize (0 = disable penalty, -1 = context size)
-    float   penalty_repeat     = 1.00f; // 1.0 = disabled
-    float   penalty_freq       = 0.00f; // 0.0 = disabled
-    float   penalty_present    = 0.00f; // 0.0 = disabled
-    float   dry_multiplier     = 0.0f;  // 0.0 = disabled;      DRY repetition penalty for tokens extending repetition:
-    float   dry_base           = 1.75f; // 0.0 = disabled;      multiplier * base ^ (length of sequence before token - allowed length)
-    int32_t dry_allowed_length = 2;     // tokens extending repetitions beyond this receive penalty
-    int32_t dry_penalty_last_n = -1;    // how many tokens to scan for repetitions (0 = disable penalty, -1 = context size)
-    int32_t mirostat           = 0;     // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
-    float   top_n_sigma        = -1.00f;// -1.0 = disabled
-    float   mirostat_tau       = 5.00f; // target entropy
-    float   mirostat_eta       = 0.10f; // learning rate
-    bool    ignore_eos         = false;
-    bool    no_perf            = false; // disable performance metrics
-    bool    timing_per_token   = false;
+    int32_t n_prev                 = 64;     // number of previous tokens to remember
+    int32_t n_probs                = 0;      // if greater than 0, output the probabilities of top n_probs tokens.
+    int32_t min_keep               = 0;      // 0 = disabled, otherwise samplers should return at least min_keep tokens
+    int32_t top_k                  = 40;     // <= 0 to use vocab size
+    float   top_p                  = 0.95f;  // 1.0 = disabled
+    float   min_p                  = 0.05f;  // 0.0 = disabled
+    float   xtc_probability        = 0.00f;  // 0.0 = disabled
+    float   xtc_threshold          = 0.10f;  // > 0.5 disables XTC
+    float   typ_p                  = 1.00f;  // typical_p, 1.0 = disabled
+    float   temp                   = 0.80f;  // <= 0.0 to sample greedily, 0.0 to not output probabilities
+    float   dynatemp_range         = 0.00f;  // 0.0 = disabled
+    float   dynatemp_exponent      = 1.00f;  // controls how entropy maps to temperature in dynamic temperature sampler
+    int32_t penalty_last_n         = 64;     // last n tokens to penalize (0 = disable penalty, -1 = context size)
+    float   penalty_repeat         = 1.00f;  // 1.0 = disabled
+    float   penalty_freq           = 0.00f;  // 0.0 = disabled
+    float   penalty_present        = 0.00f;  // 0.0 = disabled
+    float   dry_multiplier         = 0.0f;   // 0.0 = disabled;      DRY repetition penalty for tokens extending repetition:
+    float   dry_base               = 1.75f;  // 0.0 = disabled;      multiplier * base ^ (length of sequence before token - allowed length)
+    int32_t dry_allowed_length     = 2;      // tokens extending repetitions beyond this receive penalty
+    int32_t dry_penalty_last_n     = -1;     // how many tokens to scan for repetitions (0 = disable penalty, -1 = context size)
+    float   power_law_target       = 0.5;    // target probability (0.0 to 1.0)
+    float   power_law_target_range = 0.5;    // adapt the target within this range (target +/- range)
+    int32_t power_law_queue_size   = 10;     // rolling history window size for adaptation
+    int32_t mirostat               = 0;      // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
+    float   top_n_sigma            = -1.00f; // -1.0 = disabled
+    float   mirostat_tau           = 5.00f;  // target entropy
+    float   mirostat_eta           = 0.10f;  // learning rate
+    bool    ignore_eos             = false;
+    bool    no_perf                = false;  // disable performance metrics
+    bool    timing_per_token       = false;

    uint64_t user_sampling_config = 0; // bitfield to track user-specified samplers

--- a/include/llama.h
+++ b/include/llama.h
@ -1291,12 +1291,12 @@ extern "C" {

    /// @details power law sampler, reshapes probability distribution to target specific probability ranges
    /// ref: https://github.com/MrJackSpade/llama.cpp
-    /// ref: [PR]
+    /// ref: https://github.com/ggml-org/llama.cpp/pull/17927
    LLAMA_API struct llama_sampler * llama_sampler_init_power_law(
-                                float    target,       // target probability (0.0 to 1.0)
-                                float    target_range, // adaptive target range (±range from target)
-                                int32_t  queue_size,   // rolling history window size for adaptation
-                                uint32_t seed);        // RNG seed
+                               float    target,       // target probability (0.0 to 1.0)
+                               float    target_range, // adaptive target range (±range from target)
+                             int32_t    queue_size,   // rolling history window size for adaptation
+                            uint32_t    seed);        // RNG seed

    LLAMA_API struct llama_sampler * llama_sampler_init_logit_bias(
                             int32_t   n_vocab,
--- a/src/llama-sampling.cpp
+++ b/src/llama-sampling.cpp
@ -2315,7 +2315,7 @@ struct llama_sampler * llama_sampler_init_dry_testing(int32_t context_size, floa

 // power-law
 // ref: https://github.com/MrJackSpade/llama.cpp/tree/master
-// ref: [PR]
+// ref: https://github.com/ggml-org/llama.cpp/pull/17927

 struct llama_sampler_power_law {
    const float    target;
@ -2404,7 +2404,8 @@ static struct llama_sampler * llama_sampler_power_law_clone(const struct llama_s
    auto * result     = llama_sampler_init_power_law(ctx->target, ctx->target_range, ctx->queue_size, ctx->seed);
    auto * result_ctx = (llama_sampler_power_law *) result->ctx;

-    result_ctx->history  = ctx->history;
+    result_ctx->rng     = ctx->rng;
+    result_ctx->history = ctx->history;

    return result;
 }