correct fallback logic

2026-02-16 12:34:51 -06:00 · 2026-02-16 12:34:51 -06:00 · 521a13e6c6
parent aaf010edeb
commit 521a13e6c6
1 changed files with 4 additions and 5 deletions
--- a/src/llama-quant.cpp
+++ b/src/llama-quant.cpp
@ -175,8 +175,8 @@ static void llama_tensor_dequantize_impl(
    workers.clear();
 }

-// internal logic for selecting the target tensor type for a given quantization
-// and model arch
+// internal standard logic for selecting the target tensor type for a specific
+// quantization mixture & model architecture
 static ggml_type llama_tensor_get_type_impl(
            quantize_state_impl & qs,
                      ggml_type   new_type,
@ -422,7 +422,6 @@ static ggml_type llama_tensor_get_type_impl(
            new_type = GGML_TYPE_IQ3_XXS;
        }
    }
-
    return new_type;
 }

@ -468,7 +467,7 @@ static ggml_type llama_tensor_get_type(
            const int64_t qk_k = ggml_blck_size(new_type);

            if (nx % qk_k != 0) {
-                LLAMA_LOG_WARN("\n\n%s : tensor cols %" PRId64 " x %" PRId64 " are not divisible by %" PRId64 ", required for %s", __func__, nx, ny, qk_k, ggml_type_name(new_type));
+                LLAMA_LOG_WARN("\n%s: tensor cols %" PRId64 " x %" PRId64 " are not divisible by %" PRId64 ", required for %s", __func__, nx, ny, qk_k, ggml_type_name(new_type));
                convert_incompatible_tensor = true;
            }

@ -489,7 +488,7 @@ static ggml_type llama_tensor_get_type(
                    case GGML_TYPE_Q4_K:   new_type = GGML_TYPE_Q5_0;   break;
                    case GGML_TYPE_Q5_K:   new_type = GGML_TYPE_Q5_1;   break;
                    case GGML_TYPE_Q6_K:   new_type = GGML_TYPE_Q8_0;   break;
-                    default: throw std::runtime_error("\nUnsupported tensor size encountered\n");
+                    default: throw std::runtime_error("unsupported tensor size");
                }
                if (tensor->ne[0] % ggml_blck_size(new_type) != 0) {
                    new_type = GGML_TYPE_F16;