From 6d17889addf3aa18000334e1dd958111104cdf3e Mon Sep 17 00:00:00 2001 From: Ed Addario Date: Fri, 22 Aug 2025 16:58:46 +0100 Subject: [PATCH] Log if override is from tensor-type or from bpw-target --- src/llama-quant.cpp | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/llama-quant.cpp b/src/llama-quant.cpp index 10993e89c6..721deaddad 100644 --- a/src/llama-quant.cpp +++ b/src/llama-quant.cpp @@ -1049,8 +1049,8 @@ static std::unordered_map target_bpw_type( // Now evaluate candidates std::vector eval_candidates(compatible_candidates.size()); - const float *values = values_sample.empty() ? nullptr : values_sample.data(); - const float *activations = activations_sample.empty() ? nullptr : activations_sample.data(); + const float * values = values_sample.empty() ? nullptr : values_sample.data(); + const float * activations = activations_sample.empty() ? nullptr : activations_sample.data(); std::vector quantized_buffer(max_row_sz * total_sampled_rows); std::vector dequantised_buffer(f32_sample.size()); int n_eval_threads = std::max(1, std::min(nthread, (int)compatible_candidates.size())); @@ -1656,15 +1656,18 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std:: new_type = llama_tensor_get_type(qs, new_type, tensor, ftype); // get bpw override const auto override = bpw_overrides.find(name); - if (override != bpw_overrides.end()) { new_type = override->second; } - // unless the user specifies a type, and the tensor geometry will not require fallback quantisation + if (override != bpw_overrides.end() && override->second != new_type) { + LLAMA_LOG_DEBUG("(bpw overriding %s) ", ggml_type_name(new_type)); + new_type = override->second; + } + // unless the user specifies a type, and the tensor shape will not require fallback quantisation if (params->tensor_types && qs.n_fallback - fallback == 0) { const std::vector & tensor_types = *static_cast *>(params->tensor_types); const std::string tensor_name(tensor->name); for (const auto & [tname, qtype] : tensor_types) { if (std::regex pattern(tname); std::regex_search(tensor_name, pattern)) { if (qtype != new_type) { - LLAMA_LOG_DEBUG("(overriding %s) ", ggml_type_name(new_type)); + LLAMA_LOG_DEBUG("(type overriding %s) ", ggml_type_name(new_type)); new_type = qtype; // if two or more types are specified for the same tensor, the last match wins } }