Add save_bpw_state()

2025-10-05 20:17:27 +01:00 · 2025-10-05 20:17:27 +01:00 · e48ca32f19
parent 533cda3076
commit e48ca32f19
1 changed files with 50 additions and 0 deletions
--- a/src/llama-quant.cpp
+++ b/src/llama-quant.cpp
@ -734,6 +734,56 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
        });
    };
    // Saved state per tensor
    struct saved_info {
        std::vector<candidate_types> candidate;
        int choice = -1;
        float min_bpw = 0.0f;
        float max_bpw = 0.0f;
        size_t n_elements = 0;
    };
    auto save_bpw_state = [&](const std::vector<tensor_info> & all_vec) {
        const std::string tmp = checkpoint_file + ".tmp";
        std::ofstream ofs(tmp, std::ios::binary | std::ios::trunc);
        if (!ofs) { return; } // best-effort
        const float target_bpw = params->target_bpw;
        const uint8_t bias_mode = params->no_bias ? 1 : 0;
        ofs.write((const char *)&file_magic, sizeof(file_magic));
        ofs.write((const char *)&target_bpw, sizeof(target_bpw));
        ofs.write((const char *)&bias_mode, sizeof(bias_mode));
        const uint64_t n = all_vec.size();
        ofs.write((const char *)&n, sizeof(n));
        for (const auto & ti : all_vec) {
            const std::string name = ggml_get_name(ti.w->tensor);
            const uint32_t len = (uint32_t)name.size();
            ofs.write((const char *)&len, sizeof(len));
            ofs.write(name.data(), len);
            const uint64_t cn = ti.candidate.size();
            ofs.write((const char *)&cn, sizeof(cn));
            ofs.write((const char *)&ti.choice, sizeof(ti.choice));
            ofs.write((const char *)&ti.min_bpw, sizeof(ti.min_bpw));
            ofs.write((const char *)&ti.max_bpw, sizeof(ti.max_bpw));
            const uint64_t ne = ti.n_elements;
            ofs.write((const char *)&ne, sizeof(ne));
            for (const auto & c : ti.candidate) {
                const int32_t  t = c.type;
                const uint64_t b = c.bytes;
                ofs.write((const char *)&t, sizeof(t));
                ofs.write((const char *)&c.bpw, sizeof(c.bpw));
                ofs.write((const char *)&b, sizeof(b));
                ofs.write((const char *)&c.error, sizeof(c.error));
            }
        }
        ofs.close();
        std::remove(checkpoint_file.c_str()); // TODO: handle errors
        std::rename(tmp.c_str(), checkpoint_file.c_str());
        LLAMA_LOG_INFO("%s: saved bpw progress for %lu tensors to %s\n", func, all_vec.size(), checkpoint_file.c_str());
    };
    // Estimate error for a given type using a sampled subset of rows
    auto estimate_error = [&](const ggml_tensor * t,
        const ggml_type quant_type,