Update comments
This commit is contained in:
parent
5aceb9e3ae
commit
ee05d6bc0b
|
|
@ -596,13 +596,13 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
|
||||||
ggml_type type;
|
ggml_type type;
|
||||||
float bpw;
|
float bpw;
|
||||||
size_t bytes;
|
size_t bytes;
|
||||||
float error; // lower is better
|
float error;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct tensor_info {
|
struct tensor_info {
|
||||||
const llama_model_loader::llama_tensor_weight * w;
|
const llama_model_loader::llama_tensor_weight * w;
|
||||||
std::vector<candidate_types> candidate; // sorted by bpw ascending
|
std::vector<candidate_types> candidate;
|
||||||
int choice = -1; // index into cand
|
int choice = -1;
|
||||||
float min_bpw = 0.0;
|
float min_bpw = 0.0;
|
||||||
float max_bpw = 0.0;
|
float max_bpw = 0.0;
|
||||||
size_t n_elements = 0;
|
size_t n_elements = 0;
|
||||||
|
|
@ -610,7 +610,6 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
|
||||||
|
|
||||||
auto name_tn = LLM_TN(model.arch);
|
auto name_tn = LLM_TN(model.arch);
|
||||||
|
|
||||||
// The candidate types we consider; adjust as needed
|
|
||||||
const ggml_type base_candidates[] = {
|
const ggml_type base_candidates[] = {
|
||||||
// Model's
|
// Model's
|
||||||
GGML_TYPE_IQ1_S,
|
GGML_TYPE_IQ1_S,
|
||||||
|
|
@ -639,8 +638,6 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
|
||||||
bool q = name.rfind("weight") == name.size() - 6;
|
bool q = name.rfind("weight") == name.size() - 6;
|
||||||
q &= (ggml_n_dims(t) >= 2);
|
q &= (ggml_n_dims(t) >= 2);
|
||||||
q &= name.find("_norm.weight") == std::string::npos;
|
q &= name.find("_norm.weight") == std::string::npos;
|
||||||
//q &= name != name_tn(LLM_TENSOR_TOKEN_EMBD, "weight");
|
|
||||||
//q &= name != name_tn(LLM_TENSOR_OUTPUT, "weight");
|
|
||||||
q &= name.find("ffn_gate_inp.weight") == std::string::npos;
|
q &= name.find("ffn_gate_inp.weight") == std::string::npos;
|
||||||
q &= name.find("altup") == std::string::npos;
|
q &= name.find("altup") == std::string::npos;
|
||||||
q &= name.find("laurel") == std::string::npos;
|
q &= name.find("laurel") == std::string::npos;
|
||||||
|
|
@ -719,7 +716,7 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
|
||||||
|
|
||||||
const ggml_type_traits * traits = ggml_get_type_traits(typ);
|
const ggml_type_traits * traits = ggml_get_type_traits(typ);
|
||||||
if (!traits || !traits->to_float) {
|
if (!traits || !traits->to_float) {
|
||||||
// cannot dequantize candidate -> assign very high error
|
// Cannot dequantize candidate -> assign very high error
|
||||||
return 1e35f;
|
return 1e35f;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -842,12 +839,10 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
|
||||||
info.w = tw;
|
info.w = tw;
|
||||||
info.n_elements = nelem;
|
info.n_elements = nelem;
|
||||||
|
|
||||||
// Candidate build with compatibility handling and availability checks
|
// Build per-tensor candidate list
|
||||||
for (ggml_type ts_type : base_candidates) {
|
for (ggml_type ts_type : base_candidates) {
|
||||||
// Skip IQ* without imatrix
|
|
||||||
if (is_iq(ts_type) && !values) { continue; }
|
if (is_iq(ts_type) && !values) { continue; }
|
||||||
ggml_type tt = make_compatible(t, ts_type);
|
ggml_type tt = make_compatible(t, ts_type);
|
||||||
// After fallback, if still incompatible, skip
|
|
||||||
if (!is_compatible(t, tt)) { continue; }
|
if (!is_compatible(t, tt)) { continue; }
|
||||||
|
|
||||||
// Compute bpw and bytes
|
// Compute bpw and bytes
|
||||||
|
|
@ -861,19 +856,18 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
|
||||||
}
|
}
|
||||||
|
|
||||||
if (info.candidate.empty()) {
|
if (info.candidate.empty()) {
|
||||||
// as a last resort, keep original type
|
// As a last resort, keep original type
|
||||||
float bpw = ggml_nbytes(t) * 8.0f / nelem;
|
float bpw = ggml_nbytes(t) * 8.0f / nelem;
|
||||||
info.candidate.push_back(candidate_types{t->type, bpw, ggml_nbytes(t), 0.0});
|
info.candidate.push_back(candidate_types{t->type, bpw, ggml_nbytes(t), 0.0});
|
||||||
}
|
}
|
||||||
|
|
||||||
// Sort by bpw ascending
|
|
||||||
std::sort(info.candidate.begin(), info.candidate.end(), [](const candidate_types &a, const candidate_types &b) {
|
std::sort(info.candidate.begin(), info.candidate.end(), [](const candidate_types &a, const candidate_types &b) {
|
||||||
if (a.bpw != b.bpw) { return a.bpw < b.bpw; }
|
if (a.bpw != b.bpw) { return a.bpw < b.bpw; }
|
||||||
if (a.error != b.error) { return a.error < b.error; }
|
if (a.error != b.error) { return a.error < b.error; }
|
||||||
return a.bytes < b.bytes;
|
return a.bytes < b.bytes;
|
||||||
});
|
});
|
||||||
|
|
||||||
// collapse candidates with identical storage size (bytes)
|
// Collapse candidates with identical storage size (bytes)
|
||||||
{
|
{
|
||||||
std::vector<candidate_types> uniq;
|
std::vector<candidate_types> uniq;
|
||||||
uniq.reserve(info.candidate.size());
|
uniq.reserve(info.candidate.size());
|
||||||
|
|
@ -903,7 +897,6 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
|
||||||
if (all.empty()) { return {}; }
|
if (all.empty()) { return {}; }
|
||||||
|
|
||||||
// Greedy allocation from minimum bpw upward to reach target_bpw
|
// Greedy allocation from minimum bpw upward to reach target_bpw
|
||||||
// Start with minimal bpw assignment
|
|
||||||
auto current_total_bytes = [&]() -> size_t {
|
auto current_total_bytes = [&]() -> size_t {
|
||||||
size_t b = 0;
|
size_t b = 0;
|
||||||
for (const auto & ti : all) {
|
for (const auto & ti : all) {
|
||||||
|
|
@ -938,11 +931,11 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
|
||||||
}
|
}
|
||||||
|
|
||||||
struct upgrade {
|
struct upgrade {
|
||||||
int idx; // tensor index
|
int idx;
|
||||||
int next; // next candidate index (strictly larger bytes)
|
int next;
|
||||||
double err; // error reduction
|
double err;
|
||||||
size_t delta_bytes; // increase in bytes
|
size_t delta_bytes;
|
||||||
double ratio; // err per added bit
|
double ratio;
|
||||||
};
|
};
|
||||||
|
|
||||||
// Find next strictly-larger candidate index for a tensor
|
// Find next strictly-larger candidate index for a tensor
|
||||||
|
|
@ -998,6 +991,7 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
|
||||||
}
|
}
|
||||||
|
|
||||||
// We might still be below target but taking any single upgrade overshoots.
|
// We might still be below target but taking any single upgrade overshoots.
|
||||||
|
// Try to find the best upgrade that overshoots the target_bpw by the least and has the best error-to-size ratio.
|
||||||
{
|
{
|
||||||
double under_gap = (double)target_bpw - bpw_now;
|
double under_gap = (double)target_bpw - bpw_now;
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue