Refactor pareto optimise and convexify

2025-09-21 16:21:35 +01:00 · 2025-09-21 16:21:35 +01:00 · 9a1656eb97
parent 1a3e9ea4c8
commit 9a1656eb97
1 changed files with 42 additions and 44 deletions
--- a/src/llama-quant.cpp
+++ b/src/llama-quant.cpp
@ -1179,55 +1179,53 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
        }
        // Keep only the pareto‑optimal candidates and enforce convexity in (bytes, error) curve
-        {
+        auto pareto_convex = [](std::vector<candidate_types> & candidates) {
-            auto & candidates = info.candidate;
+            if (candidates.empty()) return;
            if (!candidates.empty()) {
                std::sort(candidates.begin(), candidates.end(), [](const candidate_types & a, const candidate_types & b) {
                    if (a.bytes != b.bytes) { return a.bytes < b.bytes; }
-                    return a.error < b.error;
+            std::sort(candidates.begin(), candidates.end(), [](const candidate_types & a, const candidate_types & b) {
-                });
+                if (a.bytes != b.bytes) { return a.bytes < b.bytes; }
                return a.error < b.error;
            });
-                std::vector<candidate_types> pareto;
+            // Pareto by bytes -> error
-                pareto.reserve(candidates.size());
+            std::vector<candidate_types> pareto;
-                double best_err = infinity;
+            pareto.reserve(candidates.size());
-                size_t last_bytes = std::numeric_limits<size_t>::max();
+            double best_err = std::numeric_limits<double>::infinity();
-                for (const auto & c : candidates) {
+            size_t last_b = std::numeric_limits<size_t>::max();
-                    if (c.bytes != last_bytes) {
+            for (const auto & c : candidates) {
-                        last_bytes = c.bytes;
+                if (c.bytes != last_b) {
-                        if (c.error < best_err) {
+                    last_b = c.bytes;
-                            best_err = c.error;
+                    if (c.error < best_err) {
-                            pareto.push_back(c);
+                        best_err = c.error;
-                        }
+                        pareto.push_back(c);
                    }
                }
                candidates.swap(pareto);
                if (candidates.size() >= 3) {
                    std::vector<candidate_types> hull;
                    hull.reserve(candidates.size());
                    auto slope = [](const candidate_types & a, const candidate_types & b) {
                        const double dx = b.bytes - a.bytes;
                        return dx <= 0.0 ? infinity : (b.error - a.error) / dx;
                    };
                    for (const auto & p : candidates) {
                        while (hull.size() >= 2) {
                            double s1 = slope(hull[hull.size() - 2], hull[hull.size() - 1]);
                            double s2 = slope(hull[hull.size() - 1], p);
                            if (s2 + epsilon < s1) { hull.pop_back(); }
                            else { break; }
                        }
                        hull.push_back(p);
                    }
                    candidates.swap(hull);
                }
            }
-        }
+
            candidates.swap(pareto);
            if (candidates.size() < 3) { return; } // need at least 3 points to do convex hull
            // Convex hull (lower envelope)
            auto slope = [](const candidate_types & a, const candidate_types & b) {
                const double dx = b.bytes - a.bytes;
                return dx <= 0.0 ? infinity : (b.error - a.error) / dx;
            };
            std::vector<candidate_types> hull; hull.reserve(candidates.size());
            for (const auto & p : candidates) {
                while (hull.size() >= 2) {
                    const double s1 = slope(hull[hull.size() - 2], hull[hull.size() - 1]);
                    const double s2 = slope(hull[hull.size() - 1], p);
                    if (s2 + epsilon < s1) hull.pop_back();
                    else { break; }
                }
                hull.push_back(p);
            }
            candidates.swap(hull);
        };
        pareto_convex(info.candidate);
        // Initialize choice at the smallest bpw candidate
        info.choice = 0;