diff --git a/src/llama-quant.cpp b/src/llama-quant.cpp index 9e7c58b167..5bce2bf221 100644 --- a/src/llama-quant.cpp +++ b/src/llama-quant.cpp @@ -175,8 +175,8 @@ static void llama_tensor_dequantize_impl( workers.clear(); } -// internal logic for selecting the target tensor type for a given quantization -// and model arch +// internal standard logic for selecting the target tensor type for a specific +// quantization mixture & model architecture static ggml_type llama_tensor_get_type_impl( quantize_state_impl & qs, ggml_type new_type, @@ -422,7 +422,6 @@ static ggml_type llama_tensor_get_type_impl( new_type = GGML_TYPE_IQ3_XXS; } } - return new_type; } @@ -468,7 +467,7 @@ static ggml_type llama_tensor_get_type( const int64_t qk_k = ggml_blck_size(new_type); if (nx % qk_k != 0) { - LLAMA_LOG_WARN("\n\n%s : tensor cols %" PRId64 " x %" PRId64 " are not divisible by %" PRId64 ", required for %s", __func__, nx, ny, qk_k, ggml_type_name(new_type)); + LLAMA_LOG_WARN("\n%s: tensor cols %" PRId64 " x %" PRId64 " are not divisible by %" PRId64 ", required for %s", __func__, nx, ny, qk_k, ggml_type_name(new_type)); convert_incompatible_tensor = true; } @@ -489,7 +488,7 @@ static ggml_type llama_tensor_get_type( case GGML_TYPE_Q4_K: new_type = GGML_TYPE_Q5_0; break; case GGML_TYPE_Q5_K: new_type = GGML_TYPE_Q5_1; break; case GGML_TYPE_Q6_K: new_type = GGML_TYPE_Q8_0; break; - default: throw std::runtime_error("\nUnsupported tensor size encountered\n"); + default: throw std::runtime_error("unsupported tensor size"); } if (tensor->ne[0] % ggml_blck_size(new_type) != 0) { new_type = GGML_TYPE_F16;