Update comments

2025-08-19 22:32:53 +01:00 · 2025-08-19 22:32:53 +01:00 · ee05d6bc0b
parent 5aceb9e3ae
commit ee05d6bc0b
1 changed files with 13 additions and 19 deletions
--- a/src/llama-quant.cpp
+++ b/src/llama-quant.cpp
@ -596,13 +596,13 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
        ggml_type type;
        float bpw;
        size_t bytes;
-        float error;  // lower is better
+        float error;
    };

    struct tensor_info {
        const llama_model_loader::llama_tensor_weight * w;
-        std::vector<candidate_types> candidate; // sorted by bpw ascending
-        int choice = -1;             // index into cand
+        std::vector<candidate_types> candidate;
+        int choice = -1;
        float min_bpw = 0.0;
        float max_bpw = 0.0;
        size_t n_elements = 0;
@ -610,7 +610,6 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(

    auto name_tn = LLM_TN(model.arch);

-    // The candidate types we consider; adjust as needed
    const ggml_type base_candidates[] = {
        // Model's
        GGML_TYPE_IQ1_S,
@ -639,8 +638,6 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
        bool q = name.rfind("weight") == name.size() - 6;
        q &= (ggml_n_dims(t) >= 2);
        q &= name.find("_norm.weight") == std::string::npos;
-        //q &= name != name_tn(LLM_TENSOR_TOKEN_EMBD, "weight");
-        //q &= name != name_tn(LLM_TENSOR_OUTPUT, "weight");
        q &= name.find("ffn_gate_inp.weight") == std::string::npos;
        q &= name.find("altup") == std::string::npos;
        q &= name.find("laurel") == std::string::npos;
@ -719,7 +716,7 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(

        const ggml_type_traits * traits = ggml_get_type_traits(typ);
        if (!traits || !traits->to_float) {
-            // cannot dequantize candidate -> assign very high error
+            // Cannot dequantize candidate -> assign very high error
            return 1e35f;
        }

@ -842,12 +839,10 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
        info.w = tw;
        info.n_elements = nelem;

-        // Candidate build with compatibility handling and availability checks
+        // Build per-tensor candidate list
        for (ggml_type ts_type : base_candidates) {
-            // Skip IQ* without imatrix
            if (is_iq(ts_type) && !values) { continue; }
            ggml_type tt = make_compatible(t, ts_type);
-            // After fallback, if still incompatible, skip
            if (!is_compatible(t, tt)) { continue; }

            // Compute bpw and bytes
@ -861,19 +856,18 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
        }

        if (info.candidate.empty()) {
-            // as a last resort, keep original type
+            // As a last resort, keep original type
            float bpw = ggml_nbytes(t) * 8.0f / nelem;
            info.candidate.push_back(candidate_types{t->type, bpw, ggml_nbytes(t), 0.0});
        }

-        // Sort by bpw ascending
        std::sort(info.candidate.begin(), info.candidate.end(), [](const candidate_types &a, const candidate_types &b) {
            if (a.bpw != b.bpw) { return a.bpw < b.bpw; }
            if (a.error != b.error) { return a.error < b.error; }
            return a.bytes < b.bytes;
        });

-        // collapse candidates with identical storage size (bytes)
+        // Collapse candidates with identical storage size (bytes)
        {
            std::vector<candidate_types> uniq;
            uniq.reserve(info.candidate.size());
@ -903,7 +897,6 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
    if (all.empty()) { return {}; }

    // Greedy allocation from minimum bpw upward to reach target_bpw
-    // Start with minimal bpw assignment
    auto current_total_bytes = [&]() -> size_t {
        size_t b = 0;
        for (const auto & ti : all) {
@ -938,11 +931,11 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
    }

    struct upgrade {
-        int idx;            // tensor index
-        int next;           // next candidate index (strictly larger bytes)
-        double err;         // error reduction
-        size_t delta_bytes; // increase in bytes
-        double ratio;       // err per added bit
+        int idx;
+        int next;
+        double err;
+        size_t delta_bytes;
+        double ratio;
    };

    // Find next strictly-larger candidate index for a tensor
@ -998,6 +991,7 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
    }

    // We might still be below target but taking any single upgrade overshoots.
+    // Try to find the best upgrade that overshoots the target_bpw by the least and has the best error-to-size ratio.
    {
        double under_gap = (double)target_bpw - bpw_now;