Improve list of candidate types

This commit is contained in:
Ed Addario 2025-08-23 02:50:55 +01:00
parent 73124a9921
commit 68ae5e66ce
No known key found for this signature in database
GPG Key ID: E7875815A3230993
1 changed files with 9 additions and 10 deletions

View File

@ -1023,21 +1023,20 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
size_t total_sampled_rows = f32_sample.size() / n_per_row; size_t total_sampled_rows = f32_sample.size() / n_per_row;
// Build list of candidate types first (compatible ones) // Build list of candidate types first (compatible ones)
std::vector<ggml_type> quant_candidates; const ggml_type * base_arr = is_iq(params->ftype) ? iq_quants : k_quants;
if (is_iq(params->ftype)) { const size_t base_sz = is_iq(params->ftype) ? sizeof(iq_quants) / sizeof(iq_quants[0]) : sizeof(k_quants) / sizeof(k_quants[0]);
quant_candidates.assign(std::begin(iq_quants), std::end(iq_quants));
} else {
quant_candidates.assign(std::begin(k_quants), std::end(k_quants));
}
// Compute maximum row size among compatible candidates (to size quantized_buffer once)
size_t max_row_sz = 0; size_t max_row_sz = 0;
const bool has_valid_imatrix = !values_sample.empty() && values_sample.size() == (size_t)ne2 * (size_t)n_per_row; const bool has_valid_imatrix = !values_sample.empty() && values_sample.size() == (size_t)ne2 * (size_t)n_per_row;
std::vector<ggml_type> compatible_candidates; std::vector<ggml_type> compatible_candidates;
compatible_candidates.reserve(quant_candidates.size()); compatible_candidates.reserve(base_sz);
for (ggml_type ts_type : quant_candidates) {
for (size_t i = 0; i < base_sz; ++i) {
ggml_type ts_type = base_arr[i];
if (is_iq(ts_type) && !has_valid_imatrix) { if (is_iq(ts_type) && !has_valid_imatrix) {
LLAMA_LOG_WARN("%s: skipping %s quantization for %s, no or mismatched imatrix provided\n", __func__, ggml_type_name(ts_type) , name.c_str()); LLAMA_LOG_WARN("%s: skipping %s quantization for %s, no or mismatched imatrix provided\n",
__func__, ggml_type_name(ts_type), name.c_str());
continue; continue;
} }
ggml_type tt = make_compatible(t, ts_type); ggml_type tt = make_compatible(t, ts_type);