From ffe163911be3201f303c40cf18df431ce14e6e71 Mon Sep 17 00:00:00 2001 From: ddh0 Date: Thu, 11 Dec 2025 15:16:11 -0600 Subject: [PATCH] add args, rename `queue_size` -> `window_size` --- common/arg.cpp | 23 +++++++++++++++++++++++ common/common.h | 2 +- include/llama.h | 4 ++-- src/llama-sampling.cpp | 14 +++++++------- 4 files changed, 33 insertions(+), 10 deletions(-) diff --git a/common/arg.cpp b/common/arg.cpp index a31dcbc689..4210633398 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -1501,6 +1501,29 @@ common_params_context common_params_parser_init(common_params & params, llama_ex } } ).set_sparam()); + add_opt(common_arg( + {"--power-law-target"}, "N", + string_format("Power Law sampler target probability (default: %.2f, 0.0 to 1.0)\n" + "[(more info)](https://github.com/ggml-org/llama.cpp/pull/17927)", + (double)params.sampling.power_law_target), + [](common_params & params, const std::string & value) { + params.sampling.power_law_target = std::stof(value); + } + ).set_sparam()); + add_opt(common_arg( + {"--power-law-target-range"}, "N", + string_format("Power Law sampler adaptive range +/- from target (default: %.2f, 0.0 = no adaptation)", (double)params.sampling.power_law_target_range), + [](common_params & params, const std::string & value) { + params.sampling.power_law_target_range = std::stof(value); + } + ).set_sparam()); + add_opt(common_arg( + {"--power-law-window-size"}, "N", + string_format("Power Law sampler rolling window size, in tokens (default: %d)", params.sampling.power_law_window_size), + [](common_params & params, int value) { + params.sampling.power_law_window_size = value; + } + ).set_sparam()); add_opt(common_arg( {"--dynatemp-range"}, "N", string_format("dynamic temperature range (default: %.1f, 0.0 = disabled)", (double)params.sampling.dynatemp_range), diff --git a/common/common.h b/common/common.h index e6d8af4b73..d4f1229a7e 100644 --- a/common/common.h +++ b/common/common.h @@ -186,7 +186,7 @@ struct common_params_sampling { int32_t dry_penalty_last_n = -1; // how many tokens to scan for repetitions (0 = disable penalty, -1 = context size) float power_law_target = 0.5; // target probability (0.0 to 1.0) float power_law_target_range = 0.5; // adapt the target within this range (target +/- range) - int32_t power_law_queue_size = 10; // rolling history window size for adaptation + int32_t power_law_window_size = 10; // rolling history window size for target adaptation int32_t mirostat = 0; // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0 float top_n_sigma = -1.00f; // -1.0 = disabled float mirostat_tau = 5.00f; // target entropy diff --git a/include/llama.h b/include/llama.h index 3adfdb9993..1aace655d0 100644 --- a/include/llama.h +++ b/include/llama.h @@ -1294,8 +1294,8 @@ extern "C" { /// ref: https://github.com/ggml-org/llama.cpp/pull/17927 LLAMA_API struct llama_sampler * llama_sampler_init_power_law( float target, // target probability (0.0 to 1.0) - float target_range, // adaptive target range (±range from target) - int32_t queue_size, // rolling history window size for adaptation + float target_range, // adaptive target range (+/- range from target) + int32_t window_size, // rolling history window size for target adaptation uint32_t seed); // RNG seed LLAMA_API struct llama_sampler * llama_sampler_init_logit_bias( diff --git a/src/llama-sampling.cpp b/src/llama-sampling.cpp index eacad79448..e2c229cd9f 100644 --- a/src/llama-sampling.cpp +++ b/src/llama-sampling.cpp @@ -2320,7 +2320,7 @@ struct llama_sampler * llama_sampler_init_dry_testing(int32_t context_size, floa struct llama_sampler_power_law { const float target; const float target_range; - const int32_t queue_size; + const int32_t window_size; const uint32_t seed; std::mt19937 rng; @@ -2359,7 +2359,7 @@ static void llama_sampler_power_law_apply(struct llama_sampler * smpl, llama_tok sum_excluding_oldest += ctx->history.rat(i); } - float next_value = (ctx->target * ctx->queue_size) - sum_excluding_oldest; + float next_value = (ctx->target * ctx->window_size) - sum_excluding_oldest; computed_target = std::max(min_target, std::min(next_value, max_target)); } @@ -2397,12 +2397,12 @@ static void llama_sampler_power_law_apply(struct llama_sampler * smpl, llama_tok static void llama_sampler_power_law_reset(struct llama_sampler * smpl) { auto * ctx = (llama_sampler_power_law *) smpl->ctx; - ctx->history = ring_buffer(ctx->queue_size); + ctx->history = ring_buffer(ctx->window_size); } static struct llama_sampler * llama_sampler_power_law_clone(const struct llama_sampler * smpl) { const auto * ctx = (const llama_sampler_power_law *) smpl->ctx; - auto * result = llama_sampler_init_power_law(ctx->target, ctx->target_range, ctx->queue_size, ctx->seed); + auto * result = llama_sampler_init_power_law(ctx->target, ctx->target_range, ctx->window_size, ctx->seed); auto * result_ctx = (llama_sampler_power_law *) result->ctx; result_ctx->rng = ctx->rng; @@ -2427,7 +2427,7 @@ static struct llama_sampler_i llama_sampler_power_law_i = { struct llama_sampler * llama_sampler_init_power_law( float target, float target_range, - int32_t queue_size, + int32_t window_size, uint32_t seed ) { auto seed_cur = get_rng_seed(seed); @@ -2436,10 +2436,10 @@ struct llama_sampler * llama_sampler_init_power_law( /* .ctx = */ new llama_sampler_power_law { /* .target = */ target, /* .target_range = */ target_range, - /* .queue_size = */ queue_size, + /* .window_size = */ window_size, /* .seed = */ seed_cur, /* .rng = */ std::mt19937(seed_cur), - /* .history = */ ring_buffer(queue_size), + /* .history = */ ring_buffer(window_size), } ); }