correct fallback logic
This commit is contained in:
parent
aaf010edeb
commit
521a13e6c6
|
|
@ -175,8 +175,8 @@ static void llama_tensor_dequantize_impl(
|
|||
workers.clear();
|
||||
}
|
||||
|
||||
// internal logic for selecting the target tensor type for a given quantization
|
||||
// and model arch
|
||||
// internal standard logic for selecting the target tensor type for a specific
|
||||
// quantization mixture & model architecture
|
||||
static ggml_type llama_tensor_get_type_impl(
|
||||
quantize_state_impl & qs,
|
||||
ggml_type new_type,
|
||||
|
|
@ -422,7 +422,6 @@ static ggml_type llama_tensor_get_type_impl(
|
|||
new_type = GGML_TYPE_IQ3_XXS;
|
||||
}
|
||||
}
|
||||
|
||||
return new_type;
|
||||
}
|
||||
|
||||
|
|
@ -468,7 +467,7 @@ static ggml_type llama_tensor_get_type(
|
|||
const int64_t qk_k = ggml_blck_size(new_type);
|
||||
|
||||
if (nx % qk_k != 0) {
|
||||
LLAMA_LOG_WARN("\n\n%s : tensor cols %" PRId64 " x %" PRId64 " are not divisible by %" PRId64 ", required for %s", __func__, nx, ny, qk_k, ggml_type_name(new_type));
|
||||
LLAMA_LOG_WARN("\n%s: tensor cols %" PRId64 " x %" PRId64 " are not divisible by %" PRId64 ", required for %s", __func__, nx, ny, qk_k, ggml_type_name(new_type));
|
||||
convert_incompatible_tensor = true;
|
||||
}
|
||||
|
||||
|
|
@ -489,7 +488,7 @@ static ggml_type llama_tensor_get_type(
|
|||
case GGML_TYPE_Q4_K: new_type = GGML_TYPE_Q5_0; break;
|
||||
case GGML_TYPE_Q5_K: new_type = GGML_TYPE_Q5_1; break;
|
||||
case GGML_TYPE_Q6_K: new_type = GGML_TYPE_Q8_0; break;
|
||||
default: throw std::runtime_error("\nUnsupported tensor size encountered\n");
|
||||
default: throw std::runtime_error("unsupported tensor size");
|
||||
}
|
||||
if (tensor->ne[0] % ggml_blck_size(new_type) != 0) {
|
||||
new_type = GGML_TYPE_F16;
|
||||
|
|
|
|||
Loading…
Reference in New Issue