correct fallback logic

This commit is contained in:
ddh0 2026-02-16 12:34:51 -06:00
parent aaf010edeb
commit 521a13e6c6
1 changed files with 4 additions and 5 deletions

View File

@ -175,8 +175,8 @@ static void llama_tensor_dequantize_impl(
workers.clear();
}
// internal logic for selecting the target tensor type for a given quantization
// and model arch
// internal standard logic for selecting the target tensor type for a specific
// quantization mixture & model architecture
static ggml_type llama_tensor_get_type_impl(
quantize_state_impl & qs,
ggml_type new_type,
@ -422,7 +422,6 @@ static ggml_type llama_tensor_get_type_impl(
new_type = GGML_TYPE_IQ3_XXS;
}
}
return new_type;
}
@ -468,7 +467,7 @@ static ggml_type llama_tensor_get_type(
const int64_t qk_k = ggml_blck_size(new_type);
if (nx % qk_k != 0) {
LLAMA_LOG_WARN("\n\n%s : tensor cols %" PRId64 " x %" PRId64 " are not divisible by %" PRId64 ", required for %s", __func__, nx, ny, qk_k, ggml_type_name(new_type));
LLAMA_LOG_WARN("\n%s: tensor cols %" PRId64 " x %" PRId64 " are not divisible by %" PRId64 ", required for %s", __func__, nx, ny, qk_k, ggml_type_name(new_type));
convert_incompatible_tensor = true;
}
@ -489,7 +488,7 @@ static ggml_type llama_tensor_get_type(
case GGML_TYPE_Q4_K: new_type = GGML_TYPE_Q5_0; break;
case GGML_TYPE_Q5_K: new_type = GGML_TYPE_Q5_1; break;
case GGML_TYPE_Q6_K: new_type = GGML_TYPE_Q8_0; break;
default: throw std::runtime_error("\nUnsupported tensor size encountered\n");
default: throw std::runtime_error("unsupported tensor size");
}
if (tensor->ne[0] % ggml_blck_size(new_type) != 0) {
new_type = GGML_TYPE_F16;