remove `target_range` param, make `target == 1` no-op, cleanup code
This commit is contained in:
parent
dcada035b4
commit
2d62bbea9f
|
|
@ -1503,23 +1503,16 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|||
).set_sparam());
|
||||
add_opt(common_arg(
|
||||
{"--power-law-target"}, "N",
|
||||
string_format("Power Law sampler target probability (default: %.2f; allowed range 0.0 to 1.0)\n"
|
||||
"[(more info)](https://github.com/ggml-org/llama.cpp/pull/17927)",
|
||||
string_format("target probability for Power Law sampling (valid range 0.0 to 1.0; <0 = disabled) "
|
||||
"(%.1f = default)\n[(more info)](https://github.com/ggml-org/llama.cpp/pull/17927)",
|
||||
(double)params.sampling.power_law_target),
|
||||
[](common_params & params, const std::string & value) {
|
||||
params.sampling.power_law_target = std::stof(value);
|
||||
}
|
||||
).set_sparam());
|
||||
add_opt(common_arg(
|
||||
{"--power-law-target-range"}, "N",
|
||||
string_format("Power Law sampler adaptive target range (target±range) (default: %.2f; 0.0 = fixed target)", (double)params.sampling.power_law_target_range),
|
||||
[](common_params & params, const std::string & value) {
|
||||
params.sampling.power_law_target_range = std::stof(value);
|
||||
}
|
||||
).set_sparam());
|
||||
add_opt(common_arg(
|
||||
{"--power-law-window-size"}, "N",
|
||||
string_format("Power Law sampler rolling window size, in tokens (default: %d; 0 = fixed target)", params.sampling.power_law_window_size),
|
||||
string_format("rolling window size for target adaptation in Power Law sampling (≤0 = fixed target; %d = default)", params.sampling.power_law_window_size),
|
||||
[](common_params & params, int value) {
|
||||
params.sampling.power_law_window_size = value;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -164,42 +164,40 @@ enum common_params_sampling_config : uint64_t {
|
|||
struct common_params_sampling {
|
||||
uint32_t seed = LLAMA_DEFAULT_SEED; // the seed used to initialize llama_sampler
|
||||
|
||||
int32_t n_prev = 64; // number of previous tokens to remember
|
||||
int32_t n_probs = 0; // if greater than 0, output the probabilities of top n_probs tokens.
|
||||
int32_t min_keep = 0; // 0 = disabled, otherwise samplers should return at least min_keep tokens
|
||||
int32_t top_k = 40; // <= 0 to use vocab size
|
||||
float top_p = 0.95f; // 1.0 = disabled
|
||||
float min_p = 0.05f; // 0.0 = disabled
|
||||
float xtc_probability = 0.00f; // 0.0 = disabled
|
||||
float xtc_threshold = 0.10f; // > 0.5 disables XTC
|
||||
float typ_p = 1.00f; // typical_p, 1.0 = disabled
|
||||
float temp = 0.80f; // <= 0.0 to sample greedily, 0.0 to not output probabilities
|
||||
float dynatemp_range = 0.00f; // 0.0 = disabled
|
||||
float dynatemp_exponent = 1.00f; // controls how entropy maps to temperature in dynamic temperature sampler
|
||||
int32_t penalty_last_n = 64; // last n tokens to penalize (0 = disable penalty, -1 = context size)
|
||||
float penalty_repeat = 1.00f; // 1.0 = disabled
|
||||
float penalty_freq = 0.00f; // 0.0 = disabled
|
||||
float penalty_present = 0.00f; // 0.0 = disabled
|
||||
float dry_multiplier = 0.0f; // 0.0 = disabled; DRY repetition penalty for tokens extending repetition:
|
||||
float dry_base = 1.75f; // 0.0 = disabled; multiplier * base ^ (length of sequence before token - allowed length)
|
||||
int32_t dry_allowed_length = 2; // tokens extending repetitions beyond this receive penalty
|
||||
int32_t dry_penalty_last_n = -1; // how many tokens to scan for repetitions (0 = disable penalty, -1 = context size)
|
||||
float power_law_target = 0.5; // target probability (0.0 to 1.0)
|
||||
float power_law_target_range = 0.5; // adapt the target within this range (target +/- range)
|
||||
int32_t power_law_window_size = 10; // rolling history window size for target adaptation
|
||||
int32_t mirostat = 0; // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
|
||||
float top_n_sigma = -1.00f; // -1.0 = disabled
|
||||
float mirostat_tau = 5.00f; // target entropy
|
||||
float mirostat_eta = 0.10f; // learning rate
|
||||
bool ignore_eos = false;
|
||||
bool no_perf = false; // disable performance metrics
|
||||
bool timing_per_token = false;
|
||||
int32_t n_prev = 64; // number of previous tokens to remember
|
||||
int32_t n_probs = 0; // if greater than 0, output the probabilities of top n_probs tokens.
|
||||
int32_t min_keep = 0; // 0 = disabled, otherwise samplers should return at least min_keep tokens
|
||||
int32_t top_k = 40; // <= 0 to use vocab size
|
||||
float top_p = 0.95f; // 1.0 = disabled
|
||||
float min_p = 0.05f; // 0.0 = disabled
|
||||
float xtc_probability = 0.00f; // 0.0 = disabled
|
||||
float xtc_threshold = 0.10f; // > 0.5 disables XTC
|
||||
float typ_p = 1.00f; // typical_p, 1.0 = disabled
|
||||
float temp = 0.80f; // <= 0.0 to sample greedily, 0.0 to not output probabilities
|
||||
float dynatemp_range = 0.00f; // 0.0 = disabled
|
||||
float dynatemp_exponent = 1.00f; // controls how entropy maps to temperature in dynamic temperature sampler
|
||||
int32_t penalty_last_n = 64; // last n tokens to penalize (0 = disable penalty, -1 = context size)
|
||||
float penalty_repeat = 1.00f; // 1.0 = disabled
|
||||
float penalty_freq = 0.00f; // 0.0 = disabled
|
||||
float penalty_present = 0.00f; // 0.0 = disabled
|
||||
float dry_multiplier = 0.0f; // 0.0 = disabled; DRY repetition penalty for tokens extending repetition:
|
||||
float dry_base = 1.75f; // 0.0 = disabled; multiplier * base ^ (length of sequence before token - allowed length)
|
||||
int32_t dry_allowed_length = 2; // tokens extending repetitions beyond this receive penalty
|
||||
int32_t dry_penalty_last_n = -1; // how many tokens to scan for repetitions (0 = disable penalty, -1 = context size)
|
||||
float power_law_target = -1.0f; // target probability for Power Law sampling (valid range 0.0 to 1.0; <0 = disabled)
|
||||
int32_t power_law_window_size = 10; // rolling window size for target adaptation in Power Law sampling (≤0 = fixed target)
|
||||
int32_t mirostat = 0; // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
|
||||
float top_n_sigma = -1.00f; // -1.0 = disabled
|
||||
float mirostat_tau = 5.00f; // target entropy
|
||||
float mirostat_eta = 0.10f; // learning rate
|
||||
bool ignore_eos = false;
|
||||
bool no_perf = false; // disable performance metrics
|
||||
bool timing_per_token = false;
|
||||
|
||||
uint64_t user_sampling_config = 0; // bitfield to track user-specified samplers
|
||||
|
||||
std::vector<std::string> dry_sequence_breakers = {"\n", ":", "\"", "*"}; // default sequence breakers for DRY
|
||||
|
||||
|
||||
std::vector<enum common_sampler_type> samplers = {
|
||||
COMMON_SAMPLER_TYPE_PENALTIES,
|
||||
COMMON_SAMPLER_TYPE_DRY,
|
||||
|
|
|
|||
|
|
@ -1297,13 +1297,16 @@ extern "C" {
|
|||
///
|
||||
/// it is recommended to only perform minimal truncation before this sampler.
|
||||
///
|
||||
/// @param target target probability (valid range 0.0 to 1.0; <0 = disabled)
|
||||
/// @param window_size rolling window size for target adaptation (≤0 = fixed target)
|
||||
/// @param seed RNG seed
|
||||
///
|
||||
/// ref: https://github.com/MrJackSpade/llama.cpp/tree/master (original impl, documentation)
|
||||
/// ref: https://github.com/ggml-org/llama.cpp/pull/17927 (llama.cpp PR)
|
||||
LLAMA_API struct llama_sampler * llama_sampler_init_power_law(
|
||||
float target, // target probability (0.0 to 1.0)
|
||||
float target_range, // adaptive target range (target±range)
|
||||
int32_t window_size, // rolling history window size for target adaptation
|
||||
uint32_t seed); // RNG seed
|
||||
float target,
|
||||
int32_t window_size,
|
||||
uint32_t seed);
|
||||
|
||||
LLAMA_API struct llama_sampler * llama_sampler_init_logit_bias(
|
||||
int32_t n_vocab,
|
||||
|
|
|
|||
|
|
@ -2326,12 +2326,11 @@ struct llama_sampler * llama_sampler_init_dry_testing(int32_t context_size, floa
|
|||
|
||||
struct llama_sampler_power_law {
|
||||
const float target;
|
||||
const float target_range;
|
||||
const int32_t window_size;
|
||||
const uint32_t seed;
|
||||
|
||||
const uint32_t seed;
|
||||
std::mt19937 rng;
|
||||
ring_buffer<float> history;
|
||||
ring_buffer<float> window;
|
||||
};
|
||||
|
||||
static const char * llama_sampler_power_law_name(const struct llama_sampler * /*smpl*/) {
|
||||
|
|
@ -2341,66 +2340,82 @@ static const char * llama_sampler_power_law_name(const struct llama_sampler * /*
|
|||
static void llama_sampler_power_law_apply(struct llama_sampler * smpl, llama_token_data_array * cur_p) {
|
||||
auto * ctx = (llama_sampler_power_law *) smpl->ctx;
|
||||
|
||||
// clamp the target range to [0.0, 1.0]
|
||||
const float min_target = std::max(ctx->target - ctx->target_range, 0.0f);
|
||||
const float max_target = std::min(ctx->target + ctx->target_range, 1.0f);
|
||||
if (ctx->target < 0.0f) {
|
||||
// no-op: just sample from the distribution as-is
|
||||
llama_sampler_softmax_impl(cur_p, false);
|
||||
const int idx = llama_sample_dist(cur_p, ctx->rng);
|
||||
cur_p->selected = idx;
|
||||
return;
|
||||
}
|
||||
|
||||
// fixed power law transform parameters (from original implementation)
|
||||
const float distribution_width = 0.2f;
|
||||
const float peak_logit_value = 3.0f;
|
||||
const float tail_heaviness = 3.0f;
|
||||
|
||||
// compute probabilities to get the "original" values
|
||||
llama_sampler_softmax_impl(cur_p, false);
|
||||
|
||||
// store original probabilities (needed for history update)
|
||||
// store original probabilities (used for future target adaptation)
|
||||
std::vector<float> original_probs;
|
||||
original_probs.reserve(cur_p->size);
|
||||
for (size_t i = 0; i < cur_p->size; ++i) {
|
||||
original_probs.push_back(cur_p->data[i].p);
|
||||
}
|
||||
|
||||
//
|
||||
// calculate adaptive target
|
||||
//
|
||||
|
||||
const float min_target = 0.0f;
|
||||
const float max_target = 1.0f;
|
||||
|
||||
float computed_target = ctx->target;
|
||||
if (ctx->history.size() > 0) {
|
||||
if (ctx->window.size() > 0) {
|
||||
float sum_excluding_oldest = 0.0f;
|
||||
size_t sz = ctx->history.size();
|
||||
size_t sz = ctx->window.size();
|
||||
|
||||
// sum all except the oldest element
|
||||
for (size_t i = 0; i < sz - 1; ++i) {
|
||||
sum_excluding_oldest += ctx->history.rat(i);
|
||||
sum_excluding_oldest += ctx->window.rat(i);
|
||||
}
|
||||
|
||||
float next_value = (ctx->target * ctx->window_size) - sum_excluding_oldest;
|
||||
computed_target = std::max(min_target, std::min(next_value, max_target));
|
||||
}
|
||||
|
||||
// apply power law transformation
|
||||
//
|
||||
// power law transform
|
||||
//
|
||||
|
||||
for (size_t i = 0; i < cur_p->size; ++i) {
|
||||
float p = cur_p->data[i].p;
|
||||
float normalized_distance = std::abs(p - computed_target) / 0.2f;
|
||||
cur_p->data[i].logit = 3.0f / (1.0f + std::pow(normalized_distance, 3.0f));
|
||||
float normalized_distance = std::abs(p - computed_target) / distribution_width;
|
||||
cur_p->data[i].logit = peak_logit_value / (1.0f + std::pow(normalized_distance, tail_heaviness));
|
||||
}
|
||||
|
||||
llama_sampler_softmax_impl(cur_p, false);
|
||||
|
||||
// sample from distribution
|
||||
// sample from the transformed distribution
|
||||
const int idx = llama_sample_dist(cur_p, ctx->rng);
|
||||
|
||||
// set sampled token
|
||||
cur_p->selected = idx;
|
||||
|
||||
// update history with ORIGINAL probability
|
||||
ctx->history.push_back(original_probs[idx]);
|
||||
// add the ORIGINAL probability to the rolling window
|
||||
ctx->window.push_back(original_probs[idx]);
|
||||
}
|
||||
|
||||
static void llama_sampler_power_law_reset(struct llama_sampler * smpl) {
|
||||
auto * ctx = (llama_sampler_power_law *) smpl->ctx;
|
||||
ctx->history = ring_buffer<float>(ctx->window_size);
|
||||
auto * ctx = (llama_sampler_power_law *) smpl->ctx;
|
||||
ctx->window = ring_buffer<float>(ctx->window_size);
|
||||
}
|
||||
|
||||
static struct llama_sampler * llama_sampler_power_law_clone(const struct llama_sampler * smpl) {
|
||||
const auto * ctx = (const llama_sampler_power_law *) smpl->ctx;
|
||||
auto * result = llama_sampler_init_power_law(ctx->target, ctx->target_range, ctx->window_size, ctx->seed);
|
||||
auto * result = llama_sampler_init_power_law(ctx->target, ctx->window_size, ctx->seed);
|
||||
auto * result_ctx = (llama_sampler_power_law *) result->ctx;
|
||||
|
||||
result_ctx->rng = ctx->rng;
|
||||
result_ctx->history = ctx->history;
|
||||
result_ctx->window = ctx->window;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
|
@ -2420,7 +2435,6 @@ static struct llama_sampler_i llama_sampler_power_law_i = {
|
|||
|
||||
struct llama_sampler * llama_sampler_init_power_law(
|
||||
float target,
|
||||
float target_range,
|
||||
int32_t window_size,
|
||||
uint32_t seed
|
||||
) {
|
||||
|
|
@ -2429,11 +2443,10 @@ struct llama_sampler * llama_sampler_init_power_law(
|
|||
/* .iface = */ &llama_sampler_power_law_i,
|
||||
/* .ctx = */ new llama_sampler_power_law {
|
||||
/* .target = */ target,
|
||||
/* .target_range = */ target_range,
|
||||
/* .window_size = */ window_size,
|
||||
/* .seed = */ seed_cur,
|
||||
/* .rng = */ std::mt19937(seed_cur),
|
||||
/* .history = */ ring_buffer<float>(window_size),
|
||||
/* .window = */ ring_buffer<float>(window_size),
|
||||
}
|
||||
);
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue