diff --git a/common/arg.cpp b/common/arg.cpp index 919e37b7f8..e7bb44f8f5 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -1569,12 +1569,10 @@ common_params_context common_params_parser_init(common_params & params, llama_ex ).set_sparam()); add_opt(common_arg( {"--power-law-decay"}, "N", - string_format("power law sampler: decay rate for target adaptation over time. lower " - "values -> faster but less stable adaptation. " - "(valid range 0.0 to 1.0; ≤0 = no adaptation) (default: %.2f)", - (double)params.sampling.power_law_decay), - [](common_params & params, int value) { - params.sampling.power_law_decay = value; + string_format("decay rate for target adaptation over time. lower values -> faster but less stable adaptation.\n" + "(valid range 0.0 to 1.0; ≤0 = no adaptation) (default: %.2f)", (double)params.sampling.power_law_decay), + [](common_params & params, const std::string & value) { + params.sampling.power_law_decay = std::stof(value); } ).set_sparam()); add_opt(common_arg( diff --git a/src/llama-sampling.cpp b/src/llama-sampling.cpp index 26135a4f82..6beb927a6c 100644 --- a/src/llama-sampling.cpp +++ b/src/llama-sampling.cpp @@ -2427,8 +2427,11 @@ static void llama_sampler_power_law_apply(struct llama_sampler * smpl, llama_tok // update running history with the original probability of the selected token float original_p = original_probs[idx]; + fprintf(stderr, "power-law: original prob was %.3f\n", original_p); fflush(stderr); ctx->weighted_sum = original_p + decay * ctx->weighted_sum; + fprintf(stderr, "power-law: updated ctx->weighted_sum = %.3f\n", ctx->weighted_sum); fflush(stderr); ctx->total_weight = 1.0f + decay * ctx->total_weight; + fprintf(stderr, "power-law: updated ctx->total_weight = %.3f\n", ctx->total_weight); fflush(stderr); } static void llama_sampler_power_law_reset(struct llama_sampler * smpl) {