Add quant types

This commit is contained in:
Ed Addario 2025-10-12 16:30:35 +01:00
parent 12e0524f3a
commit b6094a97bf
No known key found for this signature in database
GPG Key ID: E7875815A3230993
1 changed files with 8 additions and 5 deletions

View File

@ -655,8 +655,11 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
GGML_TYPE_IQ1_S,
GGML_TYPE_IQ1_M,
GGML_TYPE_IQ2_XXS,
GGML_TYPE_IQ2_XS,
GGML_TYPE_IQ2_S,
GGML_TYPE_Q2_K,
GGML_TYPE_IQ3_XXS,
GGML_TYPE_IQ3_S,
GGML_TYPE_Q3_K,
GGML_TYPE_IQ4_XS,
GGML_TYPE_IQ4_NL,
@ -1155,7 +1158,7 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
}
{
std::lock_guard<std::mutex> lock(log_mutex);
LLAMA_LOG_INFO("\ttarget_bpw_type: - processing tensor %45s \t(%12" PRId64 " elements)\n", name.c_str(), ggml_nelements(tensor));
LLAMA_LOG_INFO("\t%s: - processing tensor %45s \t(%12" PRId64 " elements)\n", func, name.c_str(), ggml_nelements(tensor));
}
if (!ml.use_mmap) {
@ -1457,19 +1460,19 @@ static std::unordered_map<std::string, ggml_type> target_bpw_type(
std::vector<tensor_info> all; // this vector will be populated by the parallel workers
{
std::atomic<size_t> tensor_idx{0}; // shared work queue index for all threads
const size_t num_tensors_to_process = tensors.size();
const size_t tensors_to_process = tensors.size();
std::mutex loader_mutex;
std::mutex log_mutex;
std::mutex results_mutex;
std::vector<std::thread> workers;
int num_threads_to_spawn = std::max(1, std::min<int>(nthread, (int)num_tensors_to_process));
int threads_to_spawn = std::max(1, std::min<int>(nthread, (int)tensors_to_process));
for (int i = 0; i < num_threads_to_spawn; ++i) {
for (int i = 0; i < threads_to_spawn; ++i) {
workers.emplace_back([&]() {
std::vector<no_init<uint8_t>> thread_local_buffer;
while (true) {
const size_t current_idx = tensor_idx.fetch_add(1);
if (current_idx >= num_tensors_to_process) { break; }
if (current_idx >= tensors_to_process) { break; }
const auto * tw = tensors[current_idx];
if (!can_quantize(tw->tensor)) { continue; }
// Execute the main processing logic for this tensor