Add code comments

This commit is contained in:
Ed Addario 2025-12-25 15:47:44 +00:00
parent 5f7bba7828
commit b6d718a4a6
No known key found for this signature in database
GPG Key ID: E7875815A3230993
1 changed files with 26 additions and 11 deletions

View File

@ -623,7 +623,7 @@ static void signal_handler(int) {
bpw_stop.store(true, std::memory_order_relaxed);
}
// Returns tensor type overrides to meet a global bpw target
// Returns tensor type overrides that meet a global bpw target
static std::unordered_map<std::string, ggml_type> target_bpw_type(
llama_model_loader & ml,
const llama_model & model,
@ -650,6 +650,7 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
}
} signal_guard;
// Error and bias projection per GGML_TYPE per tensor
struct candidate_types {
ggml_type type = GGML_TYPE_COUNT;
float bpw = 0.0f;
@ -659,6 +660,7 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
double proj = 0.0;
};
// Pertensor quantization mix that satisfies a global bpw target
struct tensor_info {
const llama_model_loader::llama_tensor_weight * w = nullptr;
std::vector<candidate_types> candidate;
@ -697,22 +699,33 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
constexpr uint64_t arbitrary_magic = 0xeabada55cafed00d;
const char * func = __func__;
// Tensor size in bytes for a given type
auto tensor_bytes = [](const ggml_tensor * t, const ggml_type typ) -> size_t {
const int64_t n_per_row = t->ne[0];
const size_t row_sz = ggml_row_size(typ, n_per_row);
return (size_t)ggml_nrows(t) * row_sz;
};
// Tensor bpw for a given type
auto tensor_bpw = [&](const ggml_tensor * t, const ggml_type typ) -> double {
const size_t bytes = tensor_bytes(t, typ);
return (double)bytes * 8.0 / (double)ggml_nelements(t);
};
// Check if tensor is compatible with quantization type
auto is_compatible = [](const ggml_tensor * t, const ggml_type typ) -> bool {
const int64_t blck = ggml_blck_size(typ);
return blck <= 1 || (t->ne[0] % blck) == 0;
};
// Get suitable fallback for type
auto make_compatible = [&](const ggml_tensor * t, const ggml_type typ) -> ggml_type {
if (is_compatible(t, typ)) { return typ; }
const ggml_type fb = fallback_type(typ);
return is_compatible(t, fb) ? fb : GGML_TYPE_F16;
};
// Check if tensor is an IQ type
auto is_iq = [](const enum ggml_type t) {
switch (t) {
case GGML_TYPE_IQ1_S:
@ -730,12 +743,7 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
}
};
auto make_compatible = [&](const ggml_tensor * t, const ggml_type typ) -> ggml_type {
if (is_compatible(t, typ)) { return typ; }
const ggml_type fb = fallback_type(typ);
return is_compatible(t, fb) ? fb : GGML_TYPE_F16;
};
// Check if tensor can be quantized
auto can_quantize = [&](const ggml_tensor * t) -> bool {
if (ggml_n_dims(t) < 2) { return false; } // skip 1D tensors
return is_quantizable(ggml_get_name(t), model.arch, params);
@ -750,6 +758,7 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
size_t n_elements = 0;
};
// DJB2 hashing algorithm
auto djb2_hash = [&](const uint8_t * data, const size_t n) -> uint64_t {
uint64_t h = 5381;
for (size_t i = 0; i < n; ++i) {
@ -758,6 +767,7 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
return h ? h : arbitrary_magic;
};
// Get model ID from metadata hash
auto metadata_id = [&](const gguf_context * ctx) -> uint64_t {
const size_t sz = gguf_get_meta_size(ctx);
std::vector<uint8_t> buf(sz);
@ -794,6 +804,7 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
}
}
// Serializes vector<tensor_info> to disk
auto save_bpw_state = [&](const std::vector<tensor_info> & all_vec) {
const std::string tmp = checkpoint_file + ".tmp";
std::ofstream ofs(tmp, std::ios::binary | std::ios::trunc);
@ -832,6 +843,7 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
LLAMA_LOG_INFO("%s: saved progress for %lu tensors to %s\n", func, all_vec.size(), checkpoint_file.c_str());
};
// Deserializes vector<tensor_info> from disk
auto load_bpw_state = [&]() -> std::unordered_map<std::string, saved_info> {
std::unordered_map<std::string, saved_info> out;
std::ifstream ifs(checkpoint_file, std::ios::binary);
@ -890,6 +902,7 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
return out;
};
// Deletes checkpoint file unless --keep-bpw-state is set
auto delete_bpw_state = [&] {
std::ifstream ifs(checkpoint_file);
if (ifs.good() && !params->keep_bpw_state) {
@ -898,6 +911,7 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
}
};
// Check for user interrupt and save progress
auto check_signal_handler = [&](const std::vector<tensor_info> & all_vec) {
if (bpw_stop.load(std::memory_order_relaxed)) {
LLAMA_LOG_INFO("\n%s: saving progress for %lu tensors to %s\n", func, all_vec.size(), checkpoint_file.c_str());
@ -1161,7 +1175,7 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
const auto bpw_data = load_bpw_state();
// Parallelize tensor processing - courtesy of https://github.com/ddh0
// Parallelize tensor processing (courtesy of https://github.com/ddh0)
auto process_tensor = [&](const llama_model_loader::llama_tensor_weight * tw,
std::vector<no_init<uint8_t>> & thread_local_buffer,
std::mutex & loader_mutex,
@ -1555,6 +1569,7 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
size_t target_total_bytes = std::llround(target_bpw * (double)nq_elements / 8.0);
size_t budget_bytes = target_total_bytes >= nq_bytes ? target_total_bytes - nq_bytes : min_bytes;
// Get the types' override
auto emit_overrides = [&]() -> std::unordered_map<std::string, ggml_type> {
std::unordered_map<std::string, ggml_type> overrides;
LLAMA_LOG_INFO("%s: - estimated tensor quantization mix:\n", func);
@ -1592,7 +1607,7 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
return important;
};
// Lagrangian relaxation to minimise error subject to a bpw target constraint
// Lagrangian relaxation to minimize error subject to a bpw target constraint
auto lagrange_penalty = [&](const double mu, std::vector<int> & choice, size_t & bytes, double & err) {
choice.resize(all.size());
bytes = 0;
@ -1636,7 +1651,7 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
lagrange_penalty(mu_lo, choice_lo, bytes_lo, err_lo);
// increase mu until we get under budget or hit a safety cap
// Increase mu until we get under budget or hit a safety cap
{
int expand = 0;
size_t prev_bytes_hi = std::numeric_limits<size_t>::max();
@ -1741,7 +1756,7 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
}
}
delete_bpw_state(); // we're done, clear any checkpoint
delete_bpw_state();
return emit_overrides();
}