diff --git a/include/llama.h b/include/llama.h index 3515ee1a13..c82a4147f4 100644 --- a/include/llama.h +++ b/include/llama.h @@ -369,7 +369,6 @@ extern "C" { float target_bpw; // target bits per weight (bpw) bool keep_bpw_state; // keep bpw state file void * bpw_state; // pointer to bpw state file - void * statistics; // pointer to statistics data } llama_model_quantize_params; typedef struct llama_logit_bias { diff --git a/src/llama-quant.cpp b/src/llama-quant.cpp index 86ca165b6c..99759a27c8 100644 --- a/src/llama-quant.cpp +++ b/src/llama-quant.cpp @@ -631,7 +631,6 @@ static std::unordered_map target_bpw_type( const std::map & mapped, const std::unordered_map> * values_data, const std::unordered_map> * activations_data, - const std::unordered_map> * statistics_data, const llama_model_quantize_params * params, int nthread ) { @@ -1840,7 +1839,6 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std:: } const std::unordered_map> * values_data = nullptr; const std::unordered_map> * activations_data = nullptr; - const std::unordered_map> * statistics_data = nullptr; if (params->imatrix) { values_data = static_cast>*>(params->imatrix); if (values_data) { @@ -1871,12 +1869,6 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std:: } } } - if (params->statistics) { - statistics_data = static_cast>*>(params->statistics); - if (statistics_data) { - LLAMA_LOG_INFO(" and %d statistics",int(statistics_data->size())); - } - } LLAMA_LOG_INFO("\n"); gguf_context_ptr ctx_out { gguf_init_empty() }; @@ -2031,16 +2023,10 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std:: std::unordered_map bpw_overrides = {}; if (params->target_bpw != -1.0f && !params->only_copy) { if (params->imatrix) { - const char* base_msg = params->activations - ? (params->statistics - ? "imatrix with activations and statistics provided, process will be more accurate\n" - : "imatrix with activations provided, process will be accurate\n") - : "imatrix without activations provided, process will be less accurate\n"; - if (params->activations) { LLAMA_LOG_INFO("%s: %s", __func__, base_msg); } - else { LLAMA_LOG_WARN("%s: %s", __func__, base_msg); } LLAMA_LOG_INFO("%s: computing tensor quantization mix to achieve %.4f bpw\n", __func__, params->target_bpw); - bpw_overrides = target_bpw_type(ml, model, tensors, mapped, values_data, activations_data, statistics_data, params, nthread); + + bpw_overrides = target_bpw_type(ml, model, tensors, mapped, values_data, activations_data, params, nthread); } else { LLAMA_LOG_WARN("%s: --target-bpw requires an imatrix but none was provided, option will be ignored\n", __func__); } @@ -2305,7 +2291,6 @@ llama_model_quantize_params llama_model_quantize_default_params() { /*.target_bpw =*/ -1.0f, /*.keep_bpw_state =*/ false, /*.bpw_state =*/ nullptr, - /*.statistics =*/ nullptr }; return result; diff --git a/tools/quantize/quantize.cpp b/tools/quantize/quantize.cpp index 0b2b05b60a..aabcd73986 100644 --- a/tools/quantize/quantize.cpp +++ b/tools/quantize/quantize.cpp @@ -221,8 +221,7 @@ static int load_legacy_imatrix(const std::string & imatrix_file, std::vector & imatrix_datasets, std::unordered_map> & values_data, - std::unordered_map> & activations_data, - std::unordered_map> & statistics_data) { + std::unordered_map> & activations_data) { struct ggml_context * ctx = nullptr; struct gguf_init_params meta_gguf_params = { @@ -257,10 +256,9 @@ static int load_imatrix(const std::string & imatrix_file, const std::string sums_suffix{ ".in_sum" }; const std::string sums2_suffix{ ".in_sum2" }; const std::string counts_suffix{ ".counts" }; - const std::string stats_suffix{ ".stats" }; // Using an ordered map to get a deterministic iteration order. - std::map> sums_counts_for; + std::map> sums_counts_for; for (struct ggml_tensor * cur = ggml_get_first_tensor(ctx); cur; cur = ggml_get_next_tensor(ctx, cur)) { std::string name = cur->name; @@ -276,11 +274,7 @@ static int load_imatrix(const std::string & imatrix_file, } else if (string_remove_suffix(name, counts_suffix)) { // counts std::get<2>(sums_counts_for[std::move(name)]) = cur; - } else if (string_remove_suffix(name, stats_suffix)) { - // stats - std::get<3>(sums_counts_for[std::move(name)]) = cur; - } - else { + } else { // ignore other tensors } } @@ -290,7 +284,6 @@ static int load_imatrix(const std::string & imatrix_file, const struct ggml_tensor * sums = std::get<0>(sc.second); const struct ggml_tensor * sums2 = std::get<1>(sc.second); const struct ggml_tensor * counts = std::get<2>(sc.second); - const struct ggml_tensor * stats = std::get<3>(sc.second); // check sums2 and counts are present, and that sums and sums2 have the same shape if (!sums2 || !counts || (sums != nullptr && ggml_nelements(sums) != ggml_nelements(sums2))) { @@ -308,19 +301,6 @@ static int load_imatrix(const std::string & imatrix_file, if (sums) { activations.resize(ggml_nelements(sums)); } - if (stats) { - auto & statistics = statistics_data[name]; - statistics.resize(ggml_nelements(stats)); - if (stats->type == GGML_TYPE_F32) { - std::memcpy(statistics.data(), stats->data, ggml_nelements(stats) * sizeof(float)); - } else { - fprintf(stderr, "%s: unsupported .stats type '%s' for '%s' - ignoring entry\n", - __func__, ggml_type_name(stats->type), name.c_str()); - statistics.clear(); - statistics_data.erase(name); - } - - } values.resize(ggml_nelements(sums2)); float max_count = 0.0f; for (int64_t j = 0; j < ne1; ++j) { @@ -373,23 +353,22 @@ static int prepare_imatrix(const std::string & imatrix_file, const std::vector & included_weights, const std::vector & excluded_weights, std::unordered_map> & values_data, - std::unordered_map> & activations_data, - std::unordered_map> & statistics_data) { + std::unordered_map> & activations_data) { int m_last_call = -1; if (!imatrix_file.empty()) { - m_last_call = load_imatrix(imatrix_file, imatrix_dataset, values_data, activations_data, statistics_data); + m_last_call = load_imatrix(imatrix_file, imatrix_dataset, values_data, activations_data); } if (values_data.empty()) { return m_last_call; } if (!excluded_weights.empty()) { for (const auto & name : excluded_weights) { - for (auto it = values_data.begin(); it != values_data.end();) { - auto pos = it->first.find(name); + for (auto vt = values_data.begin(); vt != values_data.end();) { + auto pos = vt->first.find(name); if (pos != std::string::npos) { - it = values_data.erase(it); + vt = values_data.erase(vt); } else { - ++it; + ++vt; } } for (auto at = activations_data.begin(); at != activations_data.end();) { @@ -400,20 +379,11 @@ static int prepare_imatrix(const std::string & imatrix_file, ++at; } } - for (auto st = statistics_data.begin(); st != statistics_data.end();) { - auto pos = st->first.find(name); - if (pos != std::string::npos) { - st = activations_data.erase(st); - } else { - ++st; - } - } } } if (!included_weights.empty()) { std::unordered_map> tmp_values; std::unordered_map> tmp_activations; - std::unordered_map> tmp_statistics; for (const auto & name : included_weights) { for (auto & e : values_data) { auto pos = e.first.find(name); @@ -427,16 +397,9 @@ static int prepare_imatrix(const std::string & imatrix_file, tmp_activations.emplace(std::move(a)); } } - for (auto & s : statistics_data) { - auto pos = s.first.find(name); - if (pos != std::string::npos) { - tmp_statistics.emplace(std::move(s)); - } - } } values_data = std::move(tmp_values); activations_data = std::move(tmp_activations); - statistics_data = std::move(tmp_statistics); } return m_last_call; @@ -653,8 +616,7 @@ int main(int argc, char ** argv) { std::vector imatrix_datasets; std::unordered_map> values_data; std::unordered_map> activations_data; - std::unordered_map> statistics_data; - int m_last_call = prepare_imatrix(imatrix_file, imatrix_datasets, included_weights, excluded_weights, values_data, activations_data, statistics_data); + int m_last_call = prepare_imatrix(imatrix_file, imatrix_datasets, included_weights, excluded_weights, values_data, activations_data); if (!values_data.empty()) { params.imatrix = &values_data; { @@ -694,9 +656,6 @@ int main(int argc, char ** argv) { if (!activations_data.empty()) { params.activations = &activations_data; } - if (!statistics_data.empty()) { - params.statistics = &statistics_data; - } if (!kv_overrides.empty()) { kv_overrides.emplace_back(); kv_overrides.back().key[0] = 0;