diff --git a/src/llama-quant.cpp b/src/llama-quant.cpp index 3544653a56..8a709ddfdd 100644 --- a/src/llama-quant.cpp +++ b/src/llama-quant.cpp @@ -1165,7 +1165,7 @@ static std::unordered_map target_bpw_type( // Evaluate candidates std::vector eval_candidates(compatible_candidates.size()); std::vector quantized_buffer(max_row_sz * total_sampled_rows); - std::vector dequantised_buffer(f32_sample.size()); + std::vector dequantized_buffer(f32_sample.size()); const float * slice_lambda = lambdas.empty() ? nullptr : lambdas.data(); int n_eval_threads = std::max(1, std::min(nthread, (int)compatible_candidates.size())); std::atomic cidx{0}; @@ -1175,7 +1175,7 @@ static std::unordered_map target_bpw_type( eval_workers.emplace_back([&] { // thread-local scratch std::vector tl_quantized_buffer(quantized_buffer.size()); - std::vector tl_dequantised_buffer(dequantised_buffer.size()); + std::vector tl_dequantized_buffer(dequantized_buffer.size()); for (;;) { const size_t i = cidx.fetch_add(1, std::memory_order_relaxed); if (i >= compatible_candidates.size()) { break; } @@ -1184,7 +1184,7 @@ static std::unordered_map target_bpw_type( const auto bpw = (float)tensor_bpw(tensor, tensor_types); const size_t bytes = tensor_bytes(tensor, tensor_types); const auto err = estimate_error(tensor, tensor_types, f32_sample, rows_sample, values, activations, - tl_quantized_buffer, tl_dequantised_buffer, tensor_lambda, slice_lambda); + tl_quantized_buffer, tl_dequantized_buffer, tensor_lambda, slice_lambda); eval_candidates[i] = candidate_types{ tensor_types, bpw, bytes, err }; } });