Refactor pareto pruning and convexification
This commit is contained in:
parent
6b8cedf3bc
commit
c466c53808
|
|
@ -1146,8 +1146,7 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
|
||||||
for (size_t i = 0; i < base_sz; ++i) {
|
for (size_t i = 0; i < base_sz; ++i) {
|
||||||
ggml_type ts_type = base_arr[i];
|
ggml_type ts_type = base_arr[i];
|
||||||
if (is_iq(ts_type) && !has_valid_imatrix) {
|
if (is_iq(ts_type) && !has_valid_imatrix) {
|
||||||
LLAMA_LOG_WARN("%s: skipping %s quantization for %s, no or mismatched imatrix provided\n",
|
LLAMA_LOG_WARN("%s: skipping %s quantization for %s, no or mismatched imatrix provided\n", __func__, ggml_type_name(ts_type), name.c_str());
|
||||||
__func__, ggml_type_name(ts_type), name.c_str());
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1214,60 +1213,54 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
|
||||||
info.candidate.push_back(candidate_types{ tensor->type, bpw, ggml_nbytes(tensor), 0.0 });
|
info.candidate.push_back(candidate_types{ tensor->type, bpw, ggml_nbytes(tensor), 0.0 });
|
||||||
}
|
}
|
||||||
|
|
||||||
// Keep only the pareto‑optimal candidates: if A has >= bytes and >= error than B, drop A.
|
// Keep only the pareto‑optimal candidates and enforce convexity in (bytes, error) curve
|
||||||
{
|
{
|
||||||
std::vector<candidate_types> pruned;
|
auto & candidates = info.candidate;
|
||||||
pruned.reserve(info.candidate.size());
|
if (!candidates.empty()) {
|
||||||
|
std::sort(candidates.begin(), candidates.end(), [](const candidate_types & a, const candidate_types & b) {
|
||||||
|
if (a.bytes != b.bytes) { return a.bytes < b.bytes; }
|
||||||
|
|
||||||
// Sort by bytes ascending, error ascending
|
return a.error < b.error;
|
||||||
std::sort(info.candidate.begin(), info.candidate.end(), [](const candidate_types & a, const candidate_types & b) {
|
});
|
||||||
if (a.bytes != b.bytes) { return a.bytes < b.bytes; }
|
|
||||||
return a.error < b.error;
|
|
||||||
});
|
|
||||||
|
|
||||||
double best_err = infinity;
|
std::vector<candidate_types> pareto;
|
||||||
size_t last_bytes = std::numeric_limits<size_t>::max();
|
pareto.reserve(candidates.size());
|
||||||
for (const auto & c : info.candidate) {
|
double best_err = infinity;
|
||||||
// Only keep the best error seen so far at strictly larger byte sizes
|
size_t last_bytes = std::numeric_limits<size_t>::max();
|
||||||
if (c.bytes != last_bytes) {
|
for (const auto & c : candidates) {
|
||||||
// first time we see this byte size
|
if (c.bytes != last_bytes) {
|
||||||
last_bytes = c.bytes;
|
last_bytes = c.bytes;
|
||||||
if (c.error < best_err) {
|
if (c.error < best_err) {
|
||||||
pruned.push_back(c);
|
best_err = c.error;
|
||||||
best_err = c.error;
|
pareto.push_back(c);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
// same bytes: we already sorted by error; skip
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
info.candidate.swap(pruned);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Enforce convexity in (bytes, error) curve
|
|
||||||
{
|
|
||||||
const auto & c = info.candidate;
|
|
||||||
if (c.size() >= 3) {
|
|
||||||
std::vector<candidate_types> convex;
|
|
||||||
convex.reserve(c.size());
|
|
||||||
auto slope = [](const candidate_types & a, const candidate_types & b) -> double {
|
|
||||||
const double dx = (double)b.bytes - (double)a.bytes;
|
|
||||||
if (dx <= 0.0) { return infinity; }
|
|
||||||
|
|
||||||
return ((double)b.error - (double)a.error) / dx;
|
|
||||||
};
|
|
||||||
|
|
||||||
for (const auto & p : c) {
|
|
||||||
while (convex.size() >= 2) {
|
|
||||||
double s1 = slope(convex[convex.size() - 2], convex[convex.size() - 1]);
|
|
||||||
double s2 = slope(convex[convex.size() - 1], p);
|
|
||||||
if (s2 + epsilon < s1) { convex.pop_back(); }
|
|
||||||
else { break; }
|
|
||||||
}
|
|
||||||
convex.push_back(p);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
info.candidate.swap(convex);
|
candidates.swap(pareto);
|
||||||
|
|
||||||
|
if (candidates.size() >= 3) {
|
||||||
|
std::vector<candidate_types> hull;
|
||||||
|
hull.reserve(candidates.size());
|
||||||
|
auto slope = [](const candidate_types & a, const candidate_types & b) {
|
||||||
|
const double dx = b.bytes - a.bytes;
|
||||||
|
|
||||||
|
return dx <= 0.0 ? infinity : (b.error - a.error) / dx;
|
||||||
|
};
|
||||||
|
|
||||||
|
for (const auto & p : candidates) {
|
||||||
|
while (hull.size() >= 2) {
|
||||||
|
double s1 = slope(hull[hull.size() - 2], hull[hull.size() - 1]);
|
||||||
|
double s2 = slope(hull[hull.size() - 1], p);
|
||||||
|
if (s2 + epsilon < s1) { hull.pop_back(); }
|
||||||
|
else { break; }
|
||||||
|
}
|
||||||
|
|
||||||
|
hull.push_back(p);
|
||||||
|
}
|
||||||
|
|
||||||
|
candidates.swap(hull);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue