From 88fb0f3f3288724eada8b1212ed6b8bd4552ac33 Mon Sep 17 00:00:00 2001 From: ddh0 Date: Thu, 11 Dec 2025 13:47:51 -0600 Subject: [PATCH] add params to `struct common_params_sampling`, add reference to PR --- common/common.h | 58 ++++++++++++++++++++++-------------------- include/llama.h | 10 ++++---- src/llama-sampling.cpp | 5 ++-- 3 files changed, 39 insertions(+), 34 deletions(-) diff --git a/common/common.h b/common/common.h index 2fd83f0cf9..e6d8af4b73 100644 --- a/common/common.h +++ b/common/common.h @@ -116,6 +116,7 @@ enum common_sampler_type { COMMON_SAMPLER_TYPE_INFILL = 9, COMMON_SAMPLER_TYPE_PENALTIES = 10, COMMON_SAMPLER_TYPE_TOP_N_SIGMA = 11, + COMMON_SAMPLER_TYPE_POWER_LAW = 12, }; // dimensionality reduction methods, used by cvector-generator @@ -163,33 +164,36 @@ enum common_params_sampling_config : uint64_t { struct common_params_sampling { uint32_t seed = LLAMA_DEFAULT_SEED; // the seed used to initialize llama_sampler - int32_t n_prev = 64; // number of previous tokens to remember - int32_t n_probs = 0; // if greater than 0, output the probabilities of top n_probs tokens. - int32_t min_keep = 0; // 0 = disabled, otherwise samplers should return at least min_keep tokens - int32_t top_k = 40; // <= 0 to use vocab size - float top_p = 0.95f; // 1.0 = disabled - float min_p = 0.05f; // 0.0 = disabled - float xtc_probability = 0.00f; // 0.0 = disabled - float xtc_threshold = 0.10f; // > 0.5 disables XTC - float typ_p = 1.00f; // typical_p, 1.0 = disabled - float temp = 0.80f; // <= 0.0 to sample greedily, 0.0 to not output probabilities - float dynatemp_range = 0.00f; // 0.0 = disabled - float dynatemp_exponent = 1.00f; // controls how entropy maps to temperature in dynamic temperature sampler - int32_t penalty_last_n = 64; // last n tokens to penalize (0 = disable penalty, -1 = context size) - float penalty_repeat = 1.00f; // 1.0 = disabled - float penalty_freq = 0.00f; // 0.0 = disabled - float penalty_present = 0.00f; // 0.0 = disabled - float dry_multiplier = 0.0f; // 0.0 = disabled; DRY repetition penalty for tokens extending repetition: - float dry_base = 1.75f; // 0.0 = disabled; multiplier * base ^ (length of sequence before token - allowed length) - int32_t dry_allowed_length = 2; // tokens extending repetitions beyond this receive penalty - int32_t dry_penalty_last_n = -1; // how many tokens to scan for repetitions (0 = disable penalty, -1 = context size) - int32_t mirostat = 0; // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0 - float top_n_sigma = -1.00f;// -1.0 = disabled - float mirostat_tau = 5.00f; // target entropy - float mirostat_eta = 0.10f; // learning rate - bool ignore_eos = false; - bool no_perf = false; // disable performance metrics - bool timing_per_token = false; + int32_t n_prev = 64; // number of previous tokens to remember + int32_t n_probs = 0; // if greater than 0, output the probabilities of top n_probs tokens. + int32_t min_keep = 0; // 0 = disabled, otherwise samplers should return at least min_keep tokens + int32_t top_k = 40; // <= 0 to use vocab size + float top_p = 0.95f; // 1.0 = disabled + float min_p = 0.05f; // 0.0 = disabled + float xtc_probability = 0.00f; // 0.0 = disabled + float xtc_threshold = 0.10f; // > 0.5 disables XTC + float typ_p = 1.00f; // typical_p, 1.0 = disabled + float temp = 0.80f; // <= 0.0 to sample greedily, 0.0 to not output probabilities + float dynatemp_range = 0.00f; // 0.0 = disabled + float dynatemp_exponent = 1.00f; // controls how entropy maps to temperature in dynamic temperature sampler + int32_t penalty_last_n = 64; // last n tokens to penalize (0 = disable penalty, -1 = context size) + float penalty_repeat = 1.00f; // 1.0 = disabled + float penalty_freq = 0.00f; // 0.0 = disabled + float penalty_present = 0.00f; // 0.0 = disabled + float dry_multiplier = 0.0f; // 0.0 = disabled; DRY repetition penalty for tokens extending repetition: + float dry_base = 1.75f; // 0.0 = disabled; multiplier * base ^ (length of sequence before token - allowed length) + int32_t dry_allowed_length = 2; // tokens extending repetitions beyond this receive penalty + int32_t dry_penalty_last_n = -1; // how many tokens to scan for repetitions (0 = disable penalty, -1 = context size) + float power_law_target = 0.5; // target probability (0.0 to 1.0) + float power_law_target_range = 0.5; // adapt the target within this range (target +/- range) + int32_t power_law_queue_size = 10; // rolling history window size for adaptation + int32_t mirostat = 0; // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0 + float top_n_sigma = -1.00f; // -1.0 = disabled + float mirostat_tau = 5.00f; // target entropy + float mirostat_eta = 0.10f; // learning rate + bool ignore_eos = false; + bool no_perf = false; // disable performance metrics + bool timing_per_token = false; uint64_t user_sampling_config = 0; // bitfield to track user-specified samplers diff --git a/include/llama.h b/include/llama.h index 7e1e65523b..3adfdb9993 100644 --- a/include/llama.h +++ b/include/llama.h @@ -1291,12 +1291,12 @@ extern "C" { /// @details power law sampler, reshapes probability distribution to target specific probability ranges /// ref: https://github.com/MrJackSpade/llama.cpp - /// ref: [PR] + /// ref: https://github.com/ggml-org/llama.cpp/pull/17927 LLAMA_API struct llama_sampler * llama_sampler_init_power_law( - float target, // target probability (0.0 to 1.0) - float target_range, // adaptive target range (±range from target) - int32_t queue_size, // rolling history window size for adaptation - uint32_t seed); // RNG seed + float target, // target probability (0.0 to 1.0) + float target_range, // adaptive target range (±range from target) + int32_t queue_size, // rolling history window size for adaptation + uint32_t seed); // RNG seed LLAMA_API struct llama_sampler * llama_sampler_init_logit_bias( int32_t n_vocab, diff --git a/src/llama-sampling.cpp b/src/llama-sampling.cpp index 173f660c73..fb488acffe 100644 --- a/src/llama-sampling.cpp +++ b/src/llama-sampling.cpp @@ -2315,7 +2315,7 @@ struct llama_sampler * llama_sampler_init_dry_testing(int32_t context_size, floa // power-law // ref: https://github.com/MrJackSpade/llama.cpp/tree/master -// ref: [PR] +// ref: https://github.com/ggml-org/llama.cpp/pull/17927 struct llama_sampler_power_law { const float target; @@ -2404,7 +2404,8 @@ static struct llama_sampler * llama_sampler_power_law_clone(const struct llama_s auto * result = llama_sampler_init_power_law(ctx->target, ctx->target_range, ctx->queue_size, ctx->seed); auto * result_ctx = (llama_sampler_power_law *) result->ctx; - result_ctx->history = ctx->history; + result_ctx->rng = ctx->rng; + result_ctx->history = ctx->history; return result; }