Refactor pareto optimise and convexify

This commit is contained in:
Ed Addario 2025-09-21 16:21:35 +01:00
parent 1a3e9ea4c8
commit 9a1656eb97
No known key found for this signature in database
GPG Key ID: E7875815A3230993
1 changed files with 42 additions and 44 deletions

View File

@ -1179,22 +1179,22 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
} }
// Keep only the paretooptimal candidates and enforce convexity in (bytes, error) curve // Keep only the paretooptimal candidates and enforce convexity in (bytes, error) curve
{ auto pareto_convex = [](std::vector<candidate_types> & candidates) {
auto & candidates = info.candidate; if (candidates.empty()) return;
if (!candidates.empty()) {
std::sort(candidates.begin(), candidates.end(), [](const candidate_types & a, const candidate_types & b) { std::sort(candidates.begin(), candidates.end(), [](const candidate_types & a, const candidate_types & b) {
if (a.bytes != b.bytes) { return a.bytes < b.bytes; } if (a.bytes != b.bytes) { return a.bytes < b.bytes; }
return a.error < b.error; return a.error < b.error;
}); });
// Pareto by bytes -> error
std::vector<candidate_types> pareto; std::vector<candidate_types> pareto;
pareto.reserve(candidates.size()); pareto.reserve(candidates.size());
double best_err = infinity; double best_err = std::numeric_limits<double>::infinity();
size_t last_bytes = std::numeric_limits<size_t>::max(); size_t last_b = std::numeric_limits<size_t>::max();
for (const auto & c : candidates) { for (const auto & c : candidates) {
if (c.bytes != last_bytes) { if (c.bytes != last_b) {
last_bytes = c.bytes; last_b = c.bytes;
if (c.error < best_err) { if (c.error < best_err) {
best_err = c.error; best_err = c.error;
pareto.push_back(c); pareto.push_back(c);
@ -1203,31 +1203,29 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
} }
candidates.swap(pareto); candidates.swap(pareto);
if (candidates.size() < 3) { return; } // need at least 3 points to do convex hull
if (candidates.size() >= 3) { // Convex hull (lower envelope)
std::vector<candidate_types> hull;
hull.reserve(candidates.size());
auto slope = [](const candidate_types & a, const candidate_types & b) { auto slope = [](const candidate_types & a, const candidate_types & b) {
const double dx = b.bytes - a.bytes; const double dx = b.bytes - a.bytes;
return dx <= 0.0 ? infinity : (b.error - a.error) / dx; return dx <= 0.0 ? infinity : (b.error - a.error) / dx;
}; };
std::vector<candidate_types> hull; hull.reserve(candidates.size());
for (const auto & p : candidates) { for (const auto & p : candidates) {
while (hull.size() >= 2) { while (hull.size() >= 2) {
double s1 = slope(hull[hull.size() - 2], hull[hull.size() - 1]); const double s1 = slope(hull[hull.size() - 2], hull[hull.size() - 1]);
double s2 = slope(hull[hull.size() - 1], p); const double s2 = slope(hull[hull.size() - 1], p);
if (s2 + epsilon < s1) { hull.pop_back(); } if (s2 + epsilon < s1) hull.pop_back();
else { break; } else { break; }
} }
hull.push_back(p); hull.push_back(p);
} }
candidates.swap(hull); candidates.swap(hull);
} };
}
} pareto_convex(info.candidate);
// Initialize choice at the smallest bpw candidate // Initialize choice at the smallest bpw candidate
info.choice = 0; info.choice = 0;