diff --git a/src/llama-quant.cpp b/src/llama-quant.cpp index e062b2dc6a..d31552ea23 100644 --- a/src/llama-quant.cpp +++ b/src/llama-quant.cpp @@ -665,28 +665,23 @@ static std::unordered_map target_bpw_type( auto tensor_bytes = [](const ggml_tensor * t, const ggml_type typ) -> size_t { const int64_t n_per_row = t->ne[0]; const size_t row_sz = ggml_row_size(typ, n_per_row); - const int64_t nrows = ggml_nrows(t); - return (size_t)nrows * row_sz; + return (size_t)ggml_nrows(t) * row_sz; }; auto tensor_bpw = [&](const ggml_tensor * t, const ggml_type typ) -> double { - const int64_t nelem = ggml_nelements(t); const size_t bytes = tensor_bytes(t, typ); - return (double)bytes * 8.0 / (double)nelem; + return (double)bytes * 8.0 / (double)ggml_nelements(t); }; auto is_compatible = [&](const ggml_tensor * t, const ggml_type typ) -> bool { - const int64_t n_per_row = t->ne[0]; const int64_t blck = ggml_blck_size(typ); - if (blck <= 1) { return true; } - return n_per_row % blck == 0; + return blck <= 1 || (t->ne[0] % blck) == 0; }; auto make_compatible = [&](const ggml_tensor * t, const ggml_type typ) -> ggml_type { - if (is_compatible(t, typ)) { return typ; } + if (is_compatible(t, typ)) return typ; ggml_type fb = fallback_type(typ); - if (is_compatible(t, fb)) { return fb; } - return GGML_TYPE_F16; + return is_compatible(t, fb) ? fb : GGML_TYPE_F16; }; auto name_tn = LLM_TN(model.arch); @@ -1080,7 +1075,7 @@ static std::unordered_map target_bpw_type( ++current_sampled_rows; } - + rows_sample[slice] = current_sampled_rows; }