From 99119ceaf4037df585e62c6613a3001b0e03c811 Mon Sep 17 00:00:00 2001 From: Colin Kealty <3266127+bartowski1182@users.noreply.github.com> Date: Wed, 4 Mar 2026 13:31:57 -0500 Subject: [PATCH 01/21] Add unit test coverage for llama_tensor_get_type --- src/llama-quant.cpp | 163 +- src/llama-quant.h | 53 + tests/.gitignore | 1 + tests/gguf-model-data.cpp | 28 + tests/snapshots/glm-4.6v.schema | 2478 +++++++++++++++++++++++++++ tests/snapshots/qwen3-0.6b.schema | 1224 +++++++++++++ tests/test-gguf-model-data.cpp | 33 + tests/test-quant-type-selection.cpp | 520 ++++++ 8 files changed, 4439 insertions(+), 61 deletions(-) create mode 100644 tests/snapshots/glm-4.6v.schema create mode 100644 tests/snapshots/qwen3-0.6b.schema create mode 100644 tests/test-quant-type-selection.cpp diff --git a/src/llama-quant.cpp b/src/llama-quant.cpp index 8e8ce23124..7415328c24 100644 --- a/src/llama-quant.cpp +++ b/src/llama-quant.cpp @@ -1,11 +1,11 @@ -#include "llama.h" +#include "llama-quant.h" #include "llama-impl.h" #include "llama-model.h" #include "llama-model-loader.h" +#include #include #include -#include #include #include #include @@ -13,13 +13,6 @@ #include #include -// result of parsing --tensor-type option -// (changes to this struct must be reflected in tools/quantize/quantize.cpp) -struct tensor_type_option { - std::string name; - ggml_type type = GGML_TYPE_COUNT; -}; - // tensor categorization - used to avoid repeated string matching in quantization logic. // this is different from LLM_TN - we want broad categories, not specific tensor names per arch. enum class tensor_category { @@ -157,46 +150,6 @@ static bool category_is_attn_v(tensor_category cat) { cat == tensor_category::ATTENTION_KV_B; } -// -// quantization state -// - -struct quantize_state_impl { - const llama_model & model; - const llama_model_quantize_params * params; - - int n_attention_wv = 0; - int n_ffn_down = 0; - int n_ffn_gate = 0; - int n_ffn_up = 0; - int i_attention_wv = 0; - int i_ffn_down = 0; - int i_ffn_gate = 0; - int i_ffn_up = 0; - - int n_fallback = 0; - - bool has_imatrix = false; - - // used to figure out if a model has tied embeddings (tok_embd shares weights with output) - bool has_tied_embeddings = true; // assume tied until we see output.weight - - // tensor type override patterns (compiled once, used twice) - std::vector> tensor_type_patterns; - - quantize_state_impl(const llama_model & model, const llama_model_quantize_params * params): - model(model), params(params) - { - // compile regex patterns once - they are expensive - if (params->tensor_types) { - const auto & tensor_types = *static_cast *>(params->tensor_types); - for (const auto & [tname, qtype] : tensor_types) { - tensor_type_patterns.emplace_back(std::regex(tname), qtype); - } - } - } -}; - // per-tensor metadata, computed in the preliminary loop and used in the main loop struct tensor_metadata { ggml_type target_type; @@ -286,7 +239,7 @@ static void llama_tensor_dequantize_impl( // do we allow this tensor to be quantized? // -static bool tensor_allows_quantization(const llama_model_quantize_params * params, llm_arch arch, const ggml_tensor * tensor) { +bool tensor_allows_quantization(const llama_model_quantize_params * params, llm_arch arch, const ggml_tensor * tensor) { // trivial checks first -- no string ops needed if (params->only_copy) return false; @@ -402,8 +355,7 @@ static ggml_type tensor_type_fallback(quantize_state_impl & qs, const ggml_tenso return return_type; } -// internal standard logic for selecting the target tensor type based on tensor category, ftype, and model arch -static ggml_type llama_tensor_get_type_impl(quantize_state_impl & qs, ggml_type new_type, const ggml_tensor * tensor, llama_ftype ftype, tensor_category category) { +ggml_type llama_tensor_get_type_impl(quantize_state_impl & qs, ggml_type new_type, const ggml_tensor * tensor, llama_ftype ftype, tensor_category category) { const std::string name = ggml_get_name(tensor); // TODO: avoid hardcoded tensor names - use the TN_* constants @@ -652,8 +604,15 @@ static ggml_type llama_tensor_get_type_impl(quantize_state_impl & qs, ggml_type return new_type; } -// outer wrapper: determine the ggml_type that this tensor should be quantized to -static ggml_type llama_tensor_get_type(quantize_state_impl & qs, const llama_model_quantize_params * params, const ggml_tensor * tensor, ggml_type default_type, const tensor_metadata & tm) { +// public API: compute category from tensor name and delegate to _impl +ggml_type llama_tensor_get_type(quantize_state_impl & qs, ggml_type new_type, const ggml_tensor * tensor, llama_ftype ftype) { + const std::string name = ggml_get_name(tensor); + tensor_category category = tensor_get_category(name); + return llama_tensor_get_type_impl(qs, new_type, tensor, ftype, category); +} + +// outer wrapper: determine the ggml_type that this tensor should be quantized to (used internally by llama_model_quantize_impl) +static ggml_type llama_tensor_get_type_internal(quantize_state_impl & qs, const llama_model_quantize_params * params, const ggml_tensor * tensor, ggml_type default_type, const tensor_metadata & tm) { if (!tensor_allows_quantization(params, qs.model.arch, tensor)) { return tensor->type; } @@ -784,7 +743,7 @@ static bool tensor_requires_imatrix(const char * tensor_name, const ggml_type ds // given a file type, get the default tensor type // -static ggml_type llama_ftype_get_default_type(llama_ftype ftype) { +ggml_type llama_ftype_get_default_type(llama_ftype ftype) { switch (ftype) { case LLAMA_FTYPE_MOSTLY_Q4_0: return GGML_TYPE_Q4_0; case LLAMA_FTYPE_MOSTLY_Q4_1: return GGML_TYPE_Q4_1; @@ -827,12 +786,85 @@ static ggml_type llama_ftype_get_default_type(llama_ftype ftype) { } } +struct ftype_name_entry { + const char * name; + llama_ftype ftype; +}; + +static const ftype_name_entry ftype_name_table[] = { + { "F32", LLAMA_FTYPE_ALL_F32 }, + { "F16", LLAMA_FTYPE_MOSTLY_F16 }, + { "BF16", LLAMA_FTYPE_MOSTLY_BF16 }, + { "Q4_0", LLAMA_FTYPE_MOSTLY_Q4_0 }, + { "Q4_1", LLAMA_FTYPE_MOSTLY_Q4_1 }, + { "Q5_0", LLAMA_FTYPE_MOSTLY_Q5_0 }, + { "Q5_1", LLAMA_FTYPE_MOSTLY_Q5_1 }, + { "Q8_0", LLAMA_FTYPE_MOSTLY_Q8_0 }, + { "Q2_K", LLAMA_FTYPE_MOSTLY_Q2_K }, + { "Q2_K_S", LLAMA_FTYPE_MOSTLY_Q2_K_S }, + { "Q3_K_S", LLAMA_FTYPE_MOSTLY_Q3_K_S }, + { "Q3_K_M", LLAMA_FTYPE_MOSTLY_Q3_K_M }, + { "Q3_K_L", LLAMA_FTYPE_MOSTLY_Q3_K_L }, + { "Q4_K_S", LLAMA_FTYPE_MOSTLY_Q4_K_S }, + { "Q4_K_M", LLAMA_FTYPE_MOSTLY_Q4_K_M }, + { "Q5_K_S", LLAMA_FTYPE_MOSTLY_Q5_K_S }, + { "Q5_K_M", LLAMA_FTYPE_MOSTLY_Q5_K_M }, + { "Q6_K", LLAMA_FTYPE_MOSTLY_Q6_K }, + { "IQ1_S", LLAMA_FTYPE_MOSTLY_IQ1_S }, + { "IQ1_M", LLAMA_FTYPE_MOSTLY_IQ1_M }, + { "IQ2_XXS", LLAMA_FTYPE_MOSTLY_IQ2_XXS }, + { "IQ2_XS", LLAMA_FTYPE_MOSTLY_IQ2_XS }, + { "IQ2_S", LLAMA_FTYPE_MOSTLY_IQ2_S }, + { "IQ2_M", LLAMA_FTYPE_MOSTLY_IQ2_M }, + { "IQ3_XXS", LLAMA_FTYPE_MOSTLY_IQ3_XXS }, + { "IQ3_XS", LLAMA_FTYPE_MOSTLY_IQ3_XS }, + { "IQ3_S", LLAMA_FTYPE_MOSTLY_IQ3_S }, + { "IQ3_M", LLAMA_FTYPE_MOSTLY_IQ3_M }, + { "IQ4_NL", LLAMA_FTYPE_MOSTLY_IQ4_NL }, + { "IQ4_XS", LLAMA_FTYPE_MOSTLY_IQ4_XS }, + { "TQ1_0", LLAMA_FTYPE_MOSTLY_TQ1_0 }, + { "TQ2_0", LLAMA_FTYPE_MOSTLY_TQ2_0 }, + { "MXFP4_MOE", LLAMA_FTYPE_MOSTLY_MXFP4_MOE }, +}; + +llama_ftype llama_ftype_from_name(const char * name) { + for (const auto & e : ftype_name_table) { + if (strcmp(name, e.name) == 0) { + return e.ftype; + } + } + return (llama_ftype)-1; +} + +const char * llama_ftype_to_name(llama_ftype ftype) { + for (const auto & e : ftype_name_table) { + if (e.ftype == ftype) { + return e.name; + } + } + return nullptr; +} + +void init_quantize_state_counters(quantize_state_impl & qs, const std::vector & tensor_names) { + for (const auto & name : tensor_names) { + tensor_category cat = tensor_get_category(name); + + if (category_is_attn_v(cat)) { + ++qs.n_attention_wv; + } + + if (tensor_name_match_output_weight(name.c_str())) { + qs.has_tied_embeddings = false; + } + } + qs.n_ffn_down = qs.n_ffn_gate = qs.n_ffn_up = (int)qs.model.hparams.n_layer; +} + // // main quantization driver // static void llama_model_quantize_impl(const std::string & fname_inp, const std::string & fname_out, const llama_model_quantize_params * params) { - ggml_type default_type; llama_ftype ftype = params->ftype; int nthread = params->nthread; @@ -841,7 +873,7 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std:: nthread = std::thread::hardware_concurrency(); } - default_type = llama_ftype_get_default_type(ftype); + ggml_type default_type = llama_ftype_get_default_type(ftype); // mmap consistently increases speed on Linux, and also increases speed on Windows with // hot cache. It may cause a slowdown on macOS, possibly related to free memory. @@ -878,8 +910,7 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std:: imatrix_data = static_cast>*>(params->imatrix); if (imatrix_data) { LLAMA_LOG_INFO("\n%s: have importance matrix data with %d entries\n", - __func__, (int)imatrix_data->size()); - qs.has_imatrix = true; + __func__, (int)imatrix_data->size()); qs.has_imatrix = true; // check imatrix for nans or infs for (const auto & kv : *imatrix_data) { for (float f : kv.second) { @@ -961,7 +992,17 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std:: }); } + { // Based on old loop + std::vector tensor_names; + tensor_names.reserve(tensors.size()); + for (const auto * it : tensors) { + tensor_names.emplace_back(ggml_get_name(it->tensor)); + } + init_quantize_state_counters(qs, tensor_names); + } + int idx = 0; + uint16_t n_split = 1; // Assume split index is continuous @@ -1013,7 +1054,7 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std:: metadata[i].allows_quantization = tensor_allows_quantization(params, model.arch, tensor); if (metadata[i].allows_quantization) { - metadata[i].target_type = llama_tensor_get_type(qs, params, tensor, default_type, metadata[i]); + metadata[i].target_type = llama_tensor_get_type_internal(qs, params, tensor, default_type, metadata[i]); } else { metadata[i].target_type = tensor->type; } @@ -1045,7 +1086,7 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std:: gguf_set_val_i32(ctx_outs[i].get(), ml.llm_kv(LLM_KV_SPLIT_TENSORS_COUNT).c_str(), (int32_t)tensors.size()); } } - + size_t total_size_org = 0; size_t total_size_new = 0; diff --git a/src/llama-quant.h b/src/llama-quant.h index 6f70f09bee..059f68fd09 100644 --- a/src/llama-quant.h +++ b/src/llama-quant.h @@ -1 +1,54 @@ #pragma once + +#include "llama.h" + +#include "ggml.h" + +#include "llama-arch.h" + +#include +#include + +struct llama_model; + +struct quantize_state_impl { + const llama_model & model; + const llama_model_quantize_params * params; + + int n_attention_wv = 0; + int n_ffn_down = 0; + int n_ffn_gate = 0; + int n_ffn_up = 0; + int i_attention_wv = 0; + int i_ffn_down = 0; + int i_ffn_gate = 0; + int i_ffn_up = 0; + + int n_k_quantized = 0; + int n_fallback = 0; + + bool has_imatrix = false; + + // used to figure out if a model shares tok_embd with the output weight + bool has_output = false; + + quantize_state_impl(const llama_model & model, const llama_model_quantize_params * params) + : model(model) + , params(params) + {} +}; + +ggml_type llama_tensor_get_type(quantize_state_impl & qs, ggml_type new_type, const ggml_tensor * tensor, llama_ftype ftype); +ggml_type llama_ftype_default_type(llama_ftype ftype); + +// Ftype name <-> enum conversions. +// Returns (llama_ftype)-1 on failure. +llama_ftype llama_ftype_from_name(const char * name); +const char * llama_ftype_to_name(llama_ftype ftype); + +// Initialize quantize_state_impl counters by scanning tensor names. +// tensor_names: all quantizable weight tensor names in the model. +void init_quantize_state_counters(quantize_state_impl & qs, const std::vector & tensor_names); + +// Returns true if this tensor should be quantized (based on name, dims, params). +bool tensor_allows_quantization(const llama_model_quantize_params * params, llm_arch arch, const ggml_tensor * tensor); diff --git a/tests/.gitignore b/tests/.gitignore index ba2b164fac..52b292b1f8 100644 --- a/tests/.gitignore +++ b/tests/.gitignore @@ -1,5 +1,6 @@ * !*.* +!snapshots/ *.o ggml-common.h **/*.swp diff --git a/tests/gguf-model-data.cpp b/tests/gguf-model-data.cpp index 3bc82c88da..aa550c9538 100644 --- a/tests/gguf-model-data.cpp +++ b/tests/gguf-model-data.cpp @@ -124,6 +124,34 @@ static bool gguf_skip_value(gguf_buf_reader & r, int32_t vtype) { } static bool gguf_read_uint32_val(gguf_buf_reader & r, int32_t vtype, uint32_t & out) { + // Handle array-valued fields (e.g. per-layer head counts in hybrid models) + // by reading the first element as a representative value. + if (vtype == GGUF_TYPE_ARRAY) { + int32_t elem_type; + uint64_t count; + if (!r.read_val(elem_type)) { + return false; + } + if (!r.read_val(count)) { + return false; + } + if (count == 0) { + return false; + } + // Read first element, skip the rest + if (!gguf_read_uint32_val(r, elem_type, out)) { + return false; + } + for (uint64_t i = 1; i < count; i++) { + size_t sz = gguf_val_type_size(elem_type); + if (sz == 0) { + return false; + } + if (!r.skip(sz)) { + return false; + } + } + } if (vtype == GGUF_TYPE_UINT8) { uint8_t v; if (!r.read_val(v)) { diff --git a/tests/snapshots/glm-4.6v.schema b/tests/snapshots/glm-4.6v.schema new file mode 100644 index 0000000000..841667a1b3 --- /dev/null +++ b/tests/snapshots/glm-4.6v.schema @@ -0,0 +1,2478 @@ +# Model: GLM-4.6V +# n_embd=4096, n_ff=10944, n_vocab=151552, n_layer=46, n_head=96, n_head_kv=8, n_expert=128 + +[F32] f32 +output.weight q6_K + +[F16] f16 +output.weight q6_K + +[Q4_0] q4_0 +output.weight q6_K + +[Q4_1] q4_1 +output.weight q6_K + +[Q8_0] q8_0 + +[Q5_0] q5_0 +output.weight q6_K + +[Q5_1] q5_1 +output.weight q6_K + +[Q2_K] q2_K +blk.0.ffn_down.weight q3_K +blk.0.attn_output.weight q3_K +blk.0.attn_v.weight q4_K +blk.1.ffn_down_exps.weight q3_K +blk.1.ffn_down_shexp.weight q3_K +blk.1.attn_output.weight q3_K +blk.1.attn_v.weight q4_K +blk.2.ffn_down_exps.weight q3_K +blk.2.ffn_down_shexp.weight q3_K +blk.2.attn_output.weight q3_K +blk.2.attn_v.weight q4_K +blk.3.ffn_down_exps.weight q3_K +blk.3.ffn_down_shexp.weight q3_K +blk.3.attn_output.weight q3_K +blk.3.attn_v.weight q4_K +blk.4.ffn_down_exps.weight q3_K +blk.4.ffn_down_shexp.weight q3_K +blk.4.attn_output.weight q3_K +blk.4.attn_v.weight q4_K +blk.5.ffn_down_exps.weight q3_K +blk.5.ffn_down_shexp.weight q3_K +blk.5.attn_output.weight q3_K +blk.5.attn_v.weight q4_K +blk.6.ffn_down_exps.weight q3_K +blk.6.ffn_down_shexp.weight q3_K +blk.6.attn_output.weight q3_K +blk.6.attn_v.weight q4_K +blk.7.ffn_down_exps.weight q3_K +blk.7.ffn_down_shexp.weight q3_K +blk.7.attn_output.weight q3_K +blk.7.attn_v.weight q4_K +blk.8.ffn_down_exps.weight q3_K +blk.8.ffn_down_shexp.weight q3_K +blk.8.attn_output.weight q3_K +blk.8.attn_v.weight q4_K +blk.9.ffn_down_exps.weight q3_K +blk.9.ffn_down_shexp.weight q3_K +blk.9.attn_output.weight q3_K +blk.9.attn_v.weight q4_K +blk.10.ffn_down_exps.weight q3_K +blk.10.ffn_down_shexp.weight q3_K +blk.10.attn_output.weight q3_K +blk.10.attn_v.weight q4_K +blk.11.ffn_down_exps.weight q3_K +blk.11.ffn_down_shexp.weight q3_K +blk.11.attn_output.weight q3_K +blk.11.attn_v.weight q4_K +blk.12.ffn_down_exps.weight q3_K +blk.12.ffn_down_shexp.weight q3_K +blk.12.attn_output.weight q3_K +blk.12.attn_v.weight q4_K +blk.13.ffn_down_exps.weight q3_K +blk.13.ffn_down_shexp.weight q3_K +blk.13.attn_output.weight q3_K +blk.13.attn_v.weight q4_K +blk.14.ffn_down_exps.weight q3_K +blk.14.ffn_down_shexp.weight q3_K +blk.14.attn_output.weight q3_K +blk.14.attn_v.weight q4_K +blk.15.ffn_down_exps.weight q3_K +blk.15.ffn_down_shexp.weight q3_K +blk.15.attn_output.weight q3_K +blk.15.attn_v.weight q4_K +blk.16.ffn_down_exps.weight q3_K +blk.16.ffn_down_shexp.weight q3_K +blk.16.attn_output.weight q3_K +blk.16.attn_v.weight q4_K +blk.17.ffn_down_exps.weight q3_K +blk.17.ffn_down_shexp.weight q3_K +blk.17.attn_output.weight q3_K +blk.17.attn_v.weight q4_K +blk.18.ffn_down_exps.weight q3_K +blk.18.ffn_down_shexp.weight q3_K +blk.18.attn_output.weight q3_K +blk.18.attn_v.weight q4_K +blk.19.ffn_down_exps.weight q3_K +blk.19.ffn_down_shexp.weight q3_K +blk.19.attn_output.weight q3_K +blk.19.attn_v.weight q4_K +blk.20.ffn_down_exps.weight q3_K +blk.20.ffn_down_shexp.weight q3_K +blk.20.attn_output.weight q3_K +blk.20.attn_v.weight q4_K +blk.21.ffn_down_exps.weight q3_K +blk.21.ffn_down_shexp.weight q3_K +blk.21.attn_output.weight q3_K +blk.21.attn_v.weight q4_K +blk.22.ffn_down_exps.weight q3_K +blk.22.ffn_down_shexp.weight q3_K +blk.22.attn_output.weight q3_K +blk.22.attn_v.weight q4_K +blk.23.ffn_down_exps.weight q3_K +blk.23.ffn_down_shexp.weight q3_K +blk.23.attn_output.weight q3_K +blk.23.attn_v.weight q4_K +blk.24.ffn_down_exps.weight q3_K +blk.24.ffn_down_shexp.weight q3_K +blk.24.attn_output.weight q3_K +blk.24.attn_v.weight q4_K +blk.25.ffn_down_exps.weight q3_K +blk.25.ffn_down_shexp.weight q3_K +blk.25.attn_output.weight q3_K +blk.25.attn_v.weight q4_K +blk.26.ffn_down_exps.weight q3_K +blk.26.ffn_down_shexp.weight q3_K +blk.26.attn_output.weight q3_K +blk.26.attn_v.weight q4_K +blk.27.ffn_down_exps.weight q3_K +blk.27.ffn_down_shexp.weight q3_K +blk.27.attn_output.weight q3_K +blk.27.attn_v.weight q4_K +blk.28.ffn_down_exps.weight q3_K +blk.28.ffn_down_shexp.weight q3_K +blk.28.attn_output.weight q3_K +blk.28.attn_v.weight q4_K +blk.29.ffn_down_exps.weight q3_K +blk.29.ffn_down_shexp.weight q3_K +blk.29.attn_output.weight q3_K +blk.29.attn_v.weight q4_K +blk.30.ffn_down_exps.weight q3_K +blk.30.ffn_down_shexp.weight q3_K +blk.30.attn_output.weight q3_K +blk.30.attn_v.weight q4_K +blk.31.ffn_down_exps.weight q3_K +blk.31.ffn_down_shexp.weight q3_K +blk.31.attn_output.weight q3_K +blk.31.attn_v.weight q4_K +blk.32.ffn_down_exps.weight q3_K +blk.32.ffn_down_shexp.weight q3_K +blk.32.attn_output.weight q3_K +blk.32.attn_v.weight q4_K +blk.33.ffn_down_exps.weight q3_K +blk.33.ffn_down_shexp.weight q3_K +blk.33.attn_output.weight q3_K +blk.33.attn_v.weight q4_K +blk.34.ffn_down_exps.weight q3_K +blk.34.attn_output.weight q3_K +blk.34.ffn_down_shexp.weight q3_K +blk.34.attn_v.weight q4_K +blk.35.ffn_down_exps.weight q3_K +blk.35.ffn_down_shexp.weight q3_K +blk.35.attn_output.weight q3_K +blk.35.attn_v.weight q4_K +blk.36.ffn_down_exps.weight q3_K +blk.36.ffn_down_shexp.weight q3_K +blk.36.attn_output.weight q3_K +blk.36.attn_v.weight q4_K +blk.37.ffn_down_exps.weight q3_K +blk.37.ffn_down_shexp.weight q3_K +blk.37.attn_output.weight q3_K +blk.37.attn_v.weight q4_K +blk.38.ffn_down_exps.weight q3_K +blk.38.ffn_down_shexp.weight q3_K +blk.38.attn_output.weight q3_K +blk.38.attn_v.weight q4_K +blk.39.ffn_down_exps.weight q3_K +blk.39.ffn_down_shexp.weight q3_K +blk.39.attn_output.weight q3_K +blk.39.attn_v.weight q4_K +blk.40.ffn_down_exps.weight q3_K +blk.40.ffn_down_shexp.weight q3_K +blk.40.attn_output.weight q3_K +blk.40.attn_v.weight q4_K +blk.41.ffn_down_exps.weight q3_K +blk.41.ffn_down_shexp.weight q3_K +blk.41.attn_output.weight q3_K +blk.41.attn_v.weight q4_K +blk.42.ffn_down_exps.weight q3_K +blk.42.ffn_down_shexp.weight q3_K +blk.42.attn_v.weight q4_K +blk.42.attn_output.weight q3_K +blk.43.ffn_down_exps.weight q3_K +blk.43.ffn_down_shexp.weight q3_K +blk.43.attn_output.weight q3_K +blk.43.attn_v.weight q4_K +blk.44.ffn_down_exps.weight q3_K +blk.44.ffn_down_shexp.weight q3_K +blk.44.attn_output.weight q3_K +blk.44.attn_v.weight q4_K +output.weight q6_K +blk.45.ffn_down_exps.weight q3_K +blk.45.ffn_down_shexp.weight q3_K +blk.45.attn_output.weight q3_K +blk.45.attn_v.weight q4_K + +[Q3_K_S] q3_K +output.weight q6_K + +[Q3_K_M] q3_K +blk.0.ffn_down.weight q5_K +blk.0.attn_output.weight q4_K +blk.0.attn_v.weight q5_K +blk.1.ffn_down_exps.weight q5_K +blk.1.ffn_down_shexp.weight q5_K +blk.1.attn_output.weight q4_K +blk.1.attn_v.weight q5_K +blk.2.ffn_down_exps.weight q4_K +blk.2.ffn_down_shexp.weight q4_K +blk.2.attn_output.weight q4_K +blk.2.attn_v.weight q4_K +blk.3.ffn_down_exps.weight q4_K +blk.3.ffn_down_shexp.weight q4_K +blk.3.attn_output.weight q4_K +blk.3.attn_v.weight q4_K +blk.4.ffn_down_exps.weight q4_K +blk.4.ffn_down_shexp.weight q4_K +blk.4.attn_output.weight q4_K +blk.4.attn_v.weight q4_K +blk.5.ffn_down_exps.weight q4_K +blk.5.ffn_down_shexp.weight q4_K +blk.5.attn_output.weight q4_K +blk.5.attn_v.weight q4_K +blk.6.ffn_down_exps.weight q4_K +blk.6.ffn_down_shexp.weight q4_K +blk.6.attn_output.weight q4_K +blk.6.attn_v.weight q4_K +blk.7.ffn_down_exps.weight q4_K +blk.7.ffn_down_shexp.weight q4_K +blk.7.attn_output.weight q4_K +blk.7.attn_v.weight q4_K +blk.8.ffn_down_exps.weight q4_K +blk.8.ffn_down_shexp.weight q4_K +blk.8.attn_output.weight q4_K +blk.8.attn_v.weight q4_K +blk.9.ffn_down_exps.weight q4_K +blk.9.ffn_down_shexp.weight q4_K +blk.9.attn_output.weight q4_K +blk.9.attn_v.weight q4_K +blk.10.ffn_down_exps.weight q4_K +blk.10.ffn_down_shexp.weight q4_K +blk.10.attn_output.weight q4_K +blk.10.attn_v.weight q4_K +blk.11.ffn_down_exps.weight q4_K +blk.11.ffn_down_shexp.weight q4_K +blk.11.attn_output.weight q4_K +blk.11.attn_v.weight q4_K +blk.12.ffn_down_exps.weight q4_K +blk.12.ffn_down_shexp.weight q4_K +blk.12.attn_output.weight q4_K +blk.12.attn_v.weight q4_K +blk.13.ffn_down_exps.weight q4_K +blk.13.ffn_down_shexp.weight q4_K +blk.13.attn_output.weight q4_K +blk.13.attn_v.weight q4_K +blk.14.ffn_down_exps.weight q4_K +blk.14.ffn_down_shexp.weight q4_K +blk.14.attn_output.weight q4_K +blk.14.attn_v.weight q4_K +blk.15.ffn_down_exps.weight q4_K +blk.15.ffn_down_shexp.weight q4_K +blk.15.attn_output.weight q4_K +blk.15.attn_v.weight q4_K +blk.16.ffn_down_exps.weight q4_K +blk.16.ffn_down_shexp.weight q4_K +blk.16.attn_output.weight q4_K +blk.16.attn_v.weight q4_K +blk.17.ffn_down_exps.weight q4_K +blk.17.ffn_down_shexp.weight q4_K +blk.17.attn_output.weight q4_K +blk.17.attn_v.weight q4_K +blk.18.ffn_down_exps.weight q4_K +blk.18.ffn_down_shexp.weight q4_K +blk.18.attn_output.weight q4_K +blk.18.attn_v.weight q4_K +blk.19.ffn_down_exps.weight q4_K +blk.19.ffn_down_shexp.weight q4_K +blk.19.attn_output.weight q4_K +blk.19.attn_v.weight q4_K +blk.20.ffn_down_exps.weight q4_K +blk.20.ffn_down_shexp.weight q4_K +blk.20.attn_output.weight q4_K +blk.20.attn_v.weight q4_K +blk.21.ffn_down_exps.weight q4_K +blk.21.ffn_down_shexp.weight q4_K +blk.21.attn_output.weight q4_K +blk.21.attn_v.weight q4_K +blk.22.ffn_down_exps.weight q4_K +blk.22.ffn_down_shexp.weight q4_K +blk.22.attn_output.weight q4_K +blk.22.attn_v.weight q4_K +blk.23.ffn_down_exps.weight q4_K +blk.23.ffn_down_shexp.weight q4_K +blk.23.attn_output.weight q4_K +blk.23.attn_v.weight q4_K +blk.24.ffn_down_exps.weight q4_K +blk.24.ffn_down_shexp.weight q4_K +blk.24.attn_output.weight q4_K +blk.24.attn_v.weight q4_K +blk.25.ffn_down_exps.weight q4_K +blk.25.ffn_down_shexp.weight q4_K +blk.25.attn_output.weight q4_K +blk.25.attn_v.weight q4_K +blk.26.ffn_down_exps.weight q4_K +blk.26.ffn_down_shexp.weight q4_K +blk.26.attn_output.weight q4_K +blk.26.attn_v.weight q4_K +blk.27.ffn_down_exps.weight q4_K +blk.27.ffn_down_shexp.weight q4_K +blk.27.attn_output.weight q4_K +blk.27.attn_v.weight q4_K +blk.28.ffn_down_exps.weight q4_K +blk.28.ffn_down_shexp.weight q4_K +blk.28.attn_output.weight q4_K +blk.28.attn_v.weight q4_K +blk.29.ffn_down_exps.weight q4_K +blk.29.ffn_down_shexp.weight q4_K +blk.29.attn_output.weight q4_K +blk.29.attn_v.weight q4_K +blk.30.ffn_down_exps.weight q4_K +blk.30.ffn_down_shexp.weight q4_K +blk.30.attn_output.weight q4_K +blk.30.attn_v.weight q4_K +blk.31.ffn_down_exps.weight q4_K +blk.31.ffn_down_shexp.weight q4_K +blk.31.attn_output.weight q4_K +blk.31.attn_v.weight q4_K +blk.32.ffn_down_exps.weight q4_K +blk.32.ffn_down_shexp.weight q4_K +blk.32.attn_output.weight q4_K +blk.32.attn_v.weight q4_K +blk.33.ffn_down_exps.weight q4_K +blk.33.ffn_down_shexp.weight q4_K +blk.33.attn_output.weight q4_K +blk.33.attn_v.weight q4_K +blk.34.ffn_down_exps.weight q4_K +blk.34.attn_output.weight q4_K +blk.34.ffn_down_shexp.weight q4_K +blk.34.attn_v.weight q4_K +blk.35.ffn_down_exps.weight q4_K +blk.35.ffn_down_shexp.weight q4_K +blk.35.attn_output.weight q4_K +blk.35.attn_v.weight q4_K +blk.36.ffn_down_exps.weight q4_K +blk.36.ffn_down_shexp.weight q4_K +blk.36.attn_output.weight q4_K +blk.36.attn_v.weight q4_K +blk.37.ffn_down_exps.weight q4_K +blk.37.ffn_down_shexp.weight q4_K +blk.37.attn_output.weight q4_K +blk.37.attn_v.weight q4_K +blk.38.ffn_down_exps.weight q4_K +blk.38.ffn_down_shexp.weight q4_K +blk.38.attn_output.weight q4_K +blk.38.attn_v.weight q4_K +blk.39.ffn_down_exps.weight q4_K +blk.39.ffn_down_shexp.weight q4_K +blk.39.attn_output.weight q4_K +blk.39.attn_v.weight q4_K +blk.40.ffn_down_exps.weight q4_K +blk.40.ffn_down_shexp.weight q4_K +blk.40.attn_output.weight q4_K +blk.40.attn_v.weight q4_K +blk.41.ffn_down_exps.weight q4_K +blk.41.ffn_down_shexp.weight q4_K +blk.41.attn_output.weight q4_K +blk.41.attn_v.weight q4_K +blk.42.ffn_down_exps.weight q4_K +blk.42.ffn_down_shexp.weight q4_K +blk.42.attn_v.weight q4_K +blk.42.attn_output.weight q4_K +blk.43.ffn_down_exps.weight q4_K +blk.43.ffn_down_shexp.weight q4_K +blk.43.attn_output.weight q4_K +blk.43.attn_v.weight q4_K +blk.44.ffn_down_exps.weight q4_K +blk.44.ffn_down_shexp.weight q4_K +blk.44.attn_output.weight q4_K +blk.44.attn_v.weight q4_K +output.weight q6_K +blk.45.ffn_down_exps.weight q4_K +blk.45.ffn_down_shexp.weight q4_K +blk.45.attn_output.weight q4_K +blk.45.attn_v.weight q4_K + +[Q3_K_L] q3_K +blk.0.ffn_down.weight q5_K +blk.0.attn_output.weight q5_K +blk.0.attn_v.weight q5_K +blk.1.ffn_down_exps.weight q5_K +blk.1.ffn_down_shexp.weight q5_K +blk.1.attn_output.weight q5_K +blk.1.attn_v.weight q5_K +blk.2.ffn_down_exps.weight q5_K +blk.2.ffn_down_shexp.weight q5_K +blk.2.attn_output.weight q5_K +blk.2.attn_v.weight q5_K +blk.3.ffn_down_exps.weight q5_K +blk.3.ffn_down_shexp.weight q5_K +blk.3.attn_output.weight q5_K +blk.3.attn_v.weight q5_K +blk.4.ffn_down_exps.weight q5_K +blk.4.ffn_down_shexp.weight q5_K +blk.4.attn_output.weight q5_K +blk.4.attn_v.weight q5_K +blk.5.ffn_down_exps.weight q5_K +blk.5.ffn_down_shexp.weight q5_K +blk.5.attn_output.weight q5_K +blk.5.attn_v.weight q5_K +blk.6.ffn_down_exps.weight q5_K +blk.6.ffn_down_shexp.weight q5_K +blk.6.attn_output.weight q5_K +blk.6.attn_v.weight q5_K +blk.7.ffn_down_exps.weight q5_K +blk.7.ffn_down_shexp.weight q5_K +blk.7.attn_output.weight q5_K +blk.7.attn_v.weight q5_K +blk.8.ffn_down_exps.weight q5_K +blk.8.ffn_down_shexp.weight q5_K +blk.8.attn_output.weight q5_K +blk.8.attn_v.weight q5_K +blk.9.ffn_down_exps.weight q5_K +blk.9.ffn_down_shexp.weight q5_K +blk.9.attn_output.weight q5_K +blk.9.attn_v.weight q5_K +blk.10.ffn_down_exps.weight q5_K +blk.10.ffn_down_shexp.weight q5_K +blk.10.attn_output.weight q5_K +blk.10.attn_v.weight q5_K +blk.11.ffn_down_exps.weight q5_K +blk.11.ffn_down_shexp.weight q5_K +blk.11.attn_output.weight q5_K +blk.11.attn_v.weight q5_K +blk.12.ffn_down_exps.weight q5_K +blk.12.ffn_down_shexp.weight q5_K +blk.12.attn_output.weight q5_K +blk.12.attn_v.weight q5_K +blk.13.ffn_down_exps.weight q5_K +blk.13.ffn_down_shexp.weight q5_K +blk.13.attn_output.weight q5_K +blk.13.attn_v.weight q5_K +blk.14.ffn_down_exps.weight q5_K +blk.14.ffn_down_shexp.weight q5_K +blk.14.attn_output.weight q5_K +blk.14.attn_v.weight q5_K +blk.15.ffn_down_exps.weight q5_K +blk.15.ffn_down_shexp.weight q5_K +blk.15.attn_output.weight q5_K +blk.15.attn_v.weight q5_K +blk.16.ffn_down_exps.weight q5_K +blk.16.ffn_down_shexp.weight q5_K +blk.16.attn_output.weight q5_K +blk.16.attn_v.weight q5_K +blk.17.ffn_down_exps.weight q5_K +blk.17.ffn_down_shexp.weight q5_K +blk.17.attn_output.weight q5_K +blk.17.attn_v.weight q5_K +blk.18.ffn_down_exps.weight q5_K +blk.18.ffn_down_shexp.weight q5_K +blk.18.attn_output.weight q5_K +blk.18.attn_v.weight q5_K +blk.19.ffn_down_exps.weight q5_K +blk.19.ffn_down_shexp.weight q5_K +blk.19.attn_output.weight q5_K +blk.19.attn_v.weight q5_K +blk.20.ffn_down_exps.weight q5_K +blk.20.ffn_down_shexp.weight q5_K +blk.20.attn_output.weight q5_K +blk.20.attn_v.weight q5_K +blk.21.ffn_down_exps.weight q5_K +blk.21.ffn_down_shexp.weight q5_K +blk.21.attn_output.weight q5_K +blk.21.attn_v.weight q5_K +blk.22.ffn_down_exps.weight q5_K +blk.22.ffn_down_shexp.weight q5_K +blk.22.attn_output.weight q5_K +blk.22.attn_v.weight q5_K +blk.23.ffn_down_exps.weight q5_K +blk.23.ffn_down_shexp.weight q5_K +blk.23.attn_output.weight q5_K +blk.23.attn_v.weight q5_K +blk.24.ffn_down_exps.weight q5_K +blk.24.ffn_down_shexp.weight q5_K +blk.24.attn_output.weight q5_K +blk.24.attn_v.weight q5_K +blk.25.ffn_down_exps.weight q5_K +blk.25.ffn_down_shexp.weight q5_K +blk.25.attn_output.weight q5_K +blk.25.attn_v.weight q5_K +blk.26.ffn_down_exps.weight q5_K +blk.26.ffn_down_shexp.weight q5_K +blk.26.attn_output.weight q5_K +blk.26.attn_v.weight q5_K +blk.27.ffn_down_exps.weight q5_K +blk.27.ffn_down_shexp.weight q5_K +blk.27.attn_output.weight q5_K +blk.27.attn_v.weight q5_K +blk.28.ffn_down_exps.weight q5_K +blk.28.ffn_down_shexp.weight q5_K +blk.28.attn_output.weight q5_K +blk.28.attn_v.weight q5_K +blk.29.ffn_down_exps.weight q5_K +blk.29.ffn_down_shexp.weight q5_K +blk.29.attn_output.weight q5_K +blk.29.attn_v.weight q5_K +blk.30.ffn_down_exps.weight q5_K +blk.30.ffn_down_shexp.weight q5_K +blk.30.attn_output.weight q5_K +blk.30.attn_v.weight q5_K +blk.31.ffn_down_exps.weight q5_K +blk.31.ffn_down_shexp.weight q5_K +blk.31.attn_output.weight q5_K +blk.31.attn_v.weight q5_K +blk.32.ffn_down_exps.weight q5_K +blk.32.ffn_down_shexp.weight q5_K +blk.32.attn_output.weight q5_K +blk.32.attn_v.weight q5_K +blk.33.ffn_down_exps.weight q5_K +blk.33.ffn_down_shexp.weight q5_K +blk.33.attn_output.weight q5_K +blk.33.attn_v.weight q5_K +blk.34.ffn_down_exps.weight q5_K +blk.34.attn_output.weight q5_K +blk.34.ffn_down_shexp.weight q5_K +blk.34.attn_v.weight q5_K +blk.35.ffn_down_exps.weight q5_K +blk.35.ffn_down_shexp.weight q5_K +blk.35.attn_output.weight q5_K +blk.35.attn_v.weight q5_K +blk.36.ffn_down_exps.weight q5_K +blk.36.ffn_down_shexp.weight q5_K +blk.36.attn_output.weight q5_K +blk.36.attn_v.weight q5_K +blk.37.ffn_down_exps.weight q5_K +blk.37.ffn_down_shexp.weight q5_K +blk.37.attn_output.weight q5_K +blk.37.attn_v.weight q5_K +blk.38.ffn_down_exps.weight q5_K +blk.38.ffn_down_shexp.weight q5_K +blk.38.attn_output.weight q5_K +blk.38.attn_v.weight q5_K +blk.39.ffn_down_exps.weight q5_K +blk.39.ffn_down_shexp.weight q5_K +blk.39.attn_output.weight q5_K +blk.39.attn_v.weight q5_K +blk.40.ffn_down_exps.weight q5_K +blk.40.ffn_down_shexp.weight q5_K +blk.40.attn_output.weight q5_K +blk.40.attn_v.weight q5_K +blk.41.ffn_down_exps.weight q5_K +blk.41.ffn_down_shexp.weight q5_K +blk.41.attn_output.weight q5_K +blk.41.attn_v.weight q5_K +blk.42.ffn_down_exps.weight q5_K +blk.42.ffn_down_shexp.weight q5_K +blk.42.attn_v.weight q5_K +blk.42.attn_output.weight q5_K +blk.43.ffn_down_exps.weight q5_K +blk.43.ffn_down_shexp.weight q5_K +blk.43.attn_output.weight q5_K +blk.43.attn_v.weight q5_K +blk.44.ffn_down_exps.weight q5_K +blk.44.ffn_down_shexp.weight q5_K +blk.44.attn_output.weight q5_K +blk.44.attn_v.weight q5_K +output.weight q6_K +blk.45.ffn_down_exps.weight q5_K +blk.45.ffn_down_shexp.weight q5_K +blk.45.attn_output.weight q5_K +blk.45.attn_v.weight q5_K + +[Q4_K_S] q4_K +blk.0.ffn_down.weight q5_K +blk.0.attn_v.weight q5_K +blk.1.ffn_down_exps.weight q5_K +blk.1.ffn_down_shexp.weight q5_K +blk.1.attn_v.weight q5_K +blk.2.ffn_down_exps.weight q5_K +blk.2.ffn_down_shexp.weight q5_K +blk.2.attn_v.weight q5_K +blk.3.ffn_down_exps.weight q5_K +blk.3.ffn_down_shexp.weight q5_K +blk.3.attn_v.weight q5_K +blk.4.ffn_down_exps.weight q5_K +blk.4.ffn_down_shexp.weight q5_K +output.weight q6_K + +[Q4_K_M] q4_K +blk.0.ffn_down.weight q6_K +blk.0.attn_v.weight q6_K +blk.1.ffn_down_exps.weight q6_K +blk.1.ffn_down_shexp.weight q6_K +blk.1.attn_v.weight q6_K +blk.2.ffn_down_exps.weight q6_K +blk.2.ffn_down_shexp.weight q6_K +blk.2.attn_v.weight q6_K +blk.3.ffn_down_exps.weight q6_K +blk.3.ffn_down_shexp.weight q6_K +blk.3.attn_v.weight q6_K +blk.4.ffn_down_exps.weight q6_K +blk.4.ffn_down_shexp.weight q6_K +blk.4.attn_v.weight q6_K +blk.7.ffn_down_exps.weight q6_K +blk.7.ffn_down_shexp.weight q6_K +blk.7.attn_v.weight q6_K +blk.10.ffn_down_exps.weight q6_K +blk.10.ffn_down_shexp.weight q6_K +blk.10.attn_v.weight q6_K +blk.13.ffn_down_exps.weight q6_K +blk.13.ffn_down_shexp.weight q6_K +blk.13.attn_v.weight q6_K +blk.16.ffn_down_exps.weight q6_K +blk.16.ffn_down_shexp.weight q6_K +blk.16.attn_v.weight q6_K +blk.19.ffn_down_exps.weight q6_K +blk.19.ffn_down_shexp.weight q6_K +blk.19.attn_v.weight q6_K +blk.22.ffn_down_exps.weight q6_K +blk.22.ffn_down_shexp.weight q6_K +blk.22.attn_v.weight q6_K +blk.25.ffn_down_exps.weight q6_K +blk.25.ffn_down_shexp.weight q6_K +blk.25.attn_v.weight q6_K +blk.28.ffn_down_exps.weight q6_K +blk.28.ffn_down_shexp.weight q6_K +blk.28.attn_v.weight q6_K +blk.31.ffn_down_exps.weight q6_K +blk.31.ffn_down_shexp.weight q6_K +blk.31.attn_v.weight q6_K +blk.34.ffn_down_exps.weight q6_K +blk.34.ffn_down_shexp.weight q6_K +blk.34.attn_v.weight q6_K +blk.37.ffn_down_exps.weight q6_K +blk.37.ffn_down_shexp.weight q6_K +blk.37.attn_v.weight q6_K +blk.40.ffn_down_exps.weight q6_K +blk.40.ffn_down_shexp.weight q6_K +blk.40.attn_v.weight q6_K +blk.41.ffn_down_exps.weight q6_K +blk.41.ffn_down_shexp.weight q6_K +blk.41.attn_v.weight q6_K +blk.42.ffn_down_exps.weight q6_K +blk.42.ffn_down_shexp.weight q6_K +blk.42.attn_v.weight q6_K +blk.43.ffn_down_exps.weight q6_K +blk.43.ffn_down_shexp.weight q6_K +blk.43.attn_v.weight q6_K +blk.44.ffn_down_exps.weight q6_K +blk.44.ffn_down_shexp.weight q6_K +blk.44.attn_v.weight q6_K +output.weight q6_K +blk.45.ffn_down_exps.weight q6_K +blk.45.ffn_down_shexp.weight q6_K +blk.45.attn_v.weight q6_K + +[Q5_K_S] q5_K +output.weight q6_K + +[Q5_K_M] q5_K +blk.0.ffn_down.weight q6_K +blk.0.attn_v.weight q6_K +blk.1.ffn_down_exps.weight q6_K +blk.1.ffn_down_shexp.weight q6_K +blk.1.attn_v.weight q6_K +blk.2.ffn_down_exps.weight q6_K +blk.2.ffn_down_shexp.weight q6_K +blk.2.attn_v.weight q6_K +blk.3.ffn_down_exps.weight q6_K +blk.3.ffn_down_shexp.weight q6_K +blk.3.attn_v.weight q6_K +blk.4.ffn_down_exps.weight q6_K +blk.4.ffn_down_shexp.weight q6_K +blk.4.attn_v.weight q6_K +blk.7.ffn_down_exps.weight q6_K +blk.7.ffn_down_shexp.weight q6_K +blk.7.attn_v.weight q6_K +blk.10.ffn_down_exps.weight q6_K +blk.10.ffn_down_shexp.weight q6_K +blk.10.attn_v.weight q6_K +blk.13.ffn_down_exps.weight q6_K +blk.13.ffn_down_shexp.weight q6_K +blk.13.attn_v.weight q6_K +blk.16.ffn_down_exps.weight q6_K +blk.16.ffn_down_shexp.weight q6_K +blk.16.attn_v.weight q6_K +blk.19.ffn_down_exps.weight q6_K +blk.19.ffn_down_shexp.weight q6_K +blk.19.attn_v.weight q6_K +blk.22.ffn_down_exps.weight q6_K +blk.22.ffn_down_shexp.weight q6_K +blk.22.attn_v.weight q6_K +blk.25.ffn_down_exps.weight q6_K +blk.25.ffn_down_shexp.weight q6_K +blk.25.attn_v.weight q6_K +blk.28.ffn_down_exps.weight q6_K +blk.28.ffn_down_shexp.weight q6_K +blk.28.attn_v.weight q6_K +blk.31.ffn_down_exps.weight q6_K +blk.31.ffn_down_shexp.weight q6_K +blk.31.attn_v.weight q6_K +blk.34.ffn_down_exps.weight q6_K +blk.34.ffn_down_shexp.weight q6_K +blk.34.attn_v.weight q6_K +blk.37.ffn_down_exps.weight q6_K +blk.37.ffn_down_shexp.weight q6_K +blk.37.attn_v.weight q6_K +blk.40.ffn_down_exps.weight q6_K +blk.40.ffn_down_shexp.weight q6_K +blk.40.attn_v.weight q6_K +blk.41.ffn_down_exps.weight q6_K +blk.41.ffn_down_shexp.weight q6_K +blk.41.attn_v.weight q6_K +blk.42.ffn_down_exps.weight q6_K +blk.42.ffn_down_shexp.weight q6_K +blk.42.attn_v.weight q6_K +blk.43.ffn_down_exps.weight q6_K +blk.43.ffn_down_shexp.weight q6_K +blk.43.attn_v.weight q6_K +blk.44.ffn_down_exps.weight q6_K +blk.44.ffn_down_shexp.weight q6_K +blk.44.attn_v.weight q6_K +output.weight q6_K +blk.45.ffn_down_exps.weight q6_K +blk.45.ffn_down_shexp.weight q6_K +blk.45.attn_v.weight q6_K + +[Q6_K] q6_K + +[IQ2_XXS] iq2_xxs +token_embd.weight q2_K +blk.0.ffn_down.weight q2_K +blk.0.attn_v.weight q4_K +blk.1.ffn_down_exps.weight q2_K +blk.1.ffn_down_shexp.weight q2_K +blk.1.attn_v.weight q4_K +blk.2.ffn_down_exps.weight q2_K +blk.2.ffn_down_shexp.weight q2_K +blk.2.attn_v.weight q4_K +blk.3.attn_v.weight q4_K +blk.4.attn_v.weight q4_K +blk.5.attn_v.weight q4_K +blk.6.attn_v.weight q4_K +blk.7.attn_v.weight q4_K +blk.8.attn_v.weight q4_K +blk.9.attn_v.weight q4_K +blk.10.attn_v.weight q4_K +blk.11.attn_v.weight q4_K +blk.12.attn_v.weight q4_K +blk.13.attn_v.weight q4_K +blk.14.attn_v.weight q4_K +blk.15.attn_v.weight q4_K +blk.16.attn_v.weight q4_K +blk.17.attn_v.weight q4_K +blk.18.attn_v.weight q4_K +blk.19.attn_v.weight q4_K +blk.20.attn_v.weight q4_K +blk.21.attn_v.weight q4_K +blk.22.attn_v.weight q4_K +blk.23.attn_v.weight q4_K +blk.24.attn_v.weight q4_K +blk.25.attn_v.weight q4_K +blk.26.attn_v.weight q4_K +blk.27.attn_v.weight q4_K +blk.28.attn_v.weight q4_K +blk.29.attn_v.weight q4_K +blk.30.attn_v.weight q4_K +blk.31.attn_v.weight q4_K +blk.32.attn_v.weight q4_K +blk.33.attn_v.weight q4_K +blk.34.attn_v.weight q4_K +blk.35.attn_v.weight q4_K +blk.36.attn_v.weight q4_K +blk.37.attn_v.weight q4_K +blk.38.attn_v.weight q4_K +blk.39.attn_v.weight q4_K +blk.40.attn_v.weight q4_K +blk.41.attn_v.weight q4_K +blk.42.attn_v.weight q4_K +blk.43.attn_v.weight q4_K +blk.44.attn_v.weight q4_K +output.weight q5_K +blk.45.attn_v.weight q4_K + +[IQ2_XS] iq2_xs +token_embd.weight q2_K +blk.0.ffn_down.weight q2_K +blk.0.attn_v.weight q4_K +blk.1.ffn_down_exps.weight q2_K +blk.1.ffn_down_shexp.weight q2_K +blk.1.attn_v.weight q4_K +blk.2.ffn_down_exps.weight q2_K +blk.2.ffn_down_shexp.weight q2_K +blk.2.attn_v.weight q4_K +blk.3.attn_v.weight q4_K +blk.4.attn_v.weight q4_K +blk.5.attn_v.weight q4_K +blk.6.attn_v.weight q4_K +blk.7.attn_v.weight q4_K +blk.8.attn_v.weight q4_K +blk.9.attn_v.weight q4_K +blk.10.attn_v.weight q4_K +blk.11.attn_v.weight q4_K +blk.12.attn_v.weight q4_K +blk.13.attn_v.weight q4_K +blk.14.attn_v.weight q4_K +blk.15.attn_v.weight q4_K +blk.16.attn_v.weight q4_K +blk.17.attn_v.weight q4_K +blk.18.attn_v.weight q4_K +blk.19.attn_v.weight q4_K +blk.20.attn_v.weight q4_K +blk.21.attn_v.weight q4_K +blk.22.attn_v.weight q4_K +blk.23.attn_v.weight q4_K +blk.24.attn_v.weight q4_K +blk.25.attn_v.weight q4_K +blk.26.attn_v.weight q4_K +blk.27.attn_v.weight q4_K +blk.28.attn_v.weight q4_K +blk.29.attn_v.weight q4_K +blk.30.attn_v.weight q4_K +blk.31.attn_v.weight q4_K +blk.32.attn_v.weight q4_K +blk.33.attn_v.weight q4_K +blk.34.attn_v.weight q4_K +blk.35.attn_v.weight q4_K +blk.36.attn_v.weight q4_K +blk.37.attn_v.weight q4_K +blk.38.attn_v.weight q4_K +blk.39.attn_v.weight q4_K +blk.40.attn_v.weight q4_K +blk.41.attn_v.weight q4_K +blk.42.attn_v.weight q4_K +blk.43.attn_v.weight q4_K +blk.44.attn_v.weight q4_K +output.weight q5_K +blk.45.attn_v.weight q4_K + +[Q2_K_S] q2_K +blk.0.ffn_down.weight q4_K +blk.0.attn_v.weight q4_K +blk.1.ffn_down_exps.weight q4_K +blk.1.ffn_down_shexp.weight q4_K +blk.1.attn_v.weight q4_K +blk.2.ffn_down_exps.weight q4_K +blk.2.ffn_down_shexp.weight q4_K +blk.2.attn_v.weight q4_K +blk.3.ffn_down_exps.weight q4_K +blk.3.ffn_down_shexp.weight q4_K +blk.3.attn_v.weight q4_K +blk.4.ffn_down_exps.weight q4_K +blk.4.ffn_down_shexp.weight q4_K +blk.4.attn_v.weight q4_K +blk.5.attn_v.weight q4_K +blk.6.attn_v.weight q4_K +blk.7.attn_v.weight q4_K +blk.8.attn_v.weight q4_K +blk.9.attn_v.weight q4_K +blk.10.attn_v.weight q4_K +blk.11.attn_v.weight q4_K +blk.12.attn_v.weight q4_K +blk.13.attn_v.weight q4_K +blk.14.attn_v.weight q4_K +blk.15.attn_v.weight q4_K +blk.16.attn_v.weight q4_K +blk.17.attn_v.weight q4_K +blk.18.attn_v.weight q4_K +blk.19.attn_v.weight q4_K +blk.20.attn_v.weight q4_K +blk.21.attn_v.weight q4_K +blk.22.attn_v.weight q4_K +blk.23.attn_v.weight q4_K +blk.24.attn_v.weight q4_K +blk.25.attn_v.weight q4_K +blk.26.attn_v.weight q4_K +blk.27.attn_v.weight q4_K +blk.28.attn_v.weight q4_K +blk.29.attn_v.weight q4_K +blk.30.attn_v.weight q4_K +blk.31.attn_v.weight q4_K +blk.32.attn_v.weight q4_K +blk.33.attn_v.weight q4_K +blk.34.attn_v.weight q4_K +blk.35.attn_v.weight q4_K +blk.36.attn_v.weight q4_K +blk.37.attn_v.weight q4_K +blk.38.attn_v.weight q4_K +blk.39.attn_v.weight q4_K +blk.40.attn_v.weight q4_K +blk.41.attn_v.weight q4_K +blk.42.attn_v.weight q4_K +blk.43.attn_v.weight q4_K +blk.44.attn_v.weight q4_K +output.weight q6_K +blk.45.attn_v.weight q4_K + +[IQ3_XS] iq3_s +blk.0.attn_k.weight iq3_xxs +blk.0.attn_q.weight iq3_xxs +blk.0.attn_v.weight q4_K +blk.1.attn_k.weight iq3_xxs +blk.1.attn_q.weight iq3_xxs +blk.1.attn_v.weight q4_K +blk.2.attn_k.weight iq3_xxs +blk.2.attn_q.weight iq3_xxs +blk.2.attn_v.weight q4_K +blk.3.attn_k.weight iq3_xxs +blk.3.attn_q.weight iq3_xxs +blk.3.attn_v.weight q4_K +blk.4.attn_k.weight iq3_xxs +blk.4.attn_q.weight iq3_xxs +blk.4.attn_v.weight q4_K +blk.5.ffn_gate_exps.weight iq3_xxs +blk.5.ffn_up_exps.weight iq3_xxs +blk.5.ffn_gate_shexp.weight iq3_xxs +blk.5.ffn_up_shexp.weight iq3_xxs +blk.5.attn_k.weight iq3_xxs +blk.5.attn_q.weight iq3_xxs +blk.5.attn_v.weight q4_K +blk.6.ffn_gate_exps.weight iq3_xxs +blk.6.ffn_up_exps.weight iq3_xxs +blk.6.ffn_gate_shexp.weight iq3_xxs +blk.6.ffn_up_shexp.weight iq3_xxs +blk.6.attn_k.weight iq3_xxs +blk.6.attn_q.weight iq3_xxs +blk.6.attn_v.weight q4_K +blk.7.ffn_gate_exps.weight iq3_xxs +blk.7.ffn_up_exps.weight iq3_xxs +blk.7.ffn_gate_shexp.weight iq3_xxs +blk.7.ffn_up_shexp.weight iq3_xxs +blk.7.attn_k.weight iq3_xxs +blk.7.attn_q.weight iq3_xxs +blk.7.attn_v.weight q4_K +blk.8.ffn_gate_exps.weight iq3_xxs +blk.8.ffn_up_exps.weight iq3_xxs +blk.8.ffn_gate_shexp.weight iq3_xxs +blk.8.ffn_up_shexp.weight iq3_xxs +blk.8.attn_k.weight iq3_xxs +blk.8.attn_q.weight iq3_xxs +blk.8.attn_v.weight q4_K +blk.9.ffn_gate_exps.weight iq3_xxs +blk.9.ffn_up_exps.weight iq3_xxs +blk.9.ffn_gate_shexp.weight iq3_xxs +blk.9.ffn_up_shexp.weight iq3_xxs +blk.9.attn_k.weight iq3_xxs +blk.9.attn_q.weight iq3_xxs +blk.9.attn_v.weight q4_K +blk.10.ffn_gate_exps.weight iq3_xxs +blk.10.ffn_up_exps.weight iq3_xxs +blk.10.ffn_gate_shexp.weight iq3_xxs +blk.10.ffn_up_shexp.weight iq3_xxs +blk.10.attn_k.weight iq3_xxs +blk.10.attn_q.weight iq3_xxs +blk.10.attn_v.weight q4_K +blk.11.ffn_gate_exps.weight iq3_xxs +blk.11.ffn_up_exps.weight iq3_xxs +blk.11.ffn_gate_shexp.weight iq3_xxs +blk.11.ffn_up_shexp.weight iq3_xxs +blk.11.attn_k.weight iq3_xxs +blk.11.attn_q.weight iq3_xxs +blk.11.attn_v.weight q4_K +blk.12.ffn_gate_exps.weight iq3_xxs +blk.12.ffn_up_exps.weight iq3_xxs +blk.12.ffn_gate_shexp.weight iq3_xxs +blk.12.ffn_up_shexp.weight iq3_xxs +blk.12.attn_k.weight iq3_xxs +blk.12.attn_q.weight iq3_xxs +blk.12.attn_v.weight q4_K +blk.13.ffn_gate_exps.weight iq3_xxs +blk.13.ffn_up_exps.weight iq3_xxs +blk.13.ffn_gate_shexp.weight iq3_xxs +blk.13.ffn_up_shexp.weight iq3_xxs +blk.13.attn_k.weight iq3_xxs +blk.13.attn_q.weight iq3_xxs +blk.13.attn_v.weight q4_K +blk.14.ffn_gate_exps.weight iq3_xxs +blk.14.ffn_up_exps.weight iq3_xxs +blk.14.ffn_gate_shexp.weight iq3_xxs +blk.14.ffn_up_shexp.weight iq3_xxs +blk.14.attn_k.weight iq3_xxs +blk.14.attn_q.weight iq3_xxs +blk.14.attn_v.weight q4_K +blk.15.ffn_gate_exps.weight iq3_xxs +blk.15.ffn_up_exps.weight iq3_xxs +blk.15.ffn_gate_shexp.weight iq3_xxs +blk.15.ffn_up_shexp.weight iq3_xxs +blk.15.attn_k.weight iq3_xxs +blk.15.attn_q.weight iq3_xxs +blk.15.attn_v.weight q4_K +blk.16.ffn_gate_exps.weight iq3_xxs +blk.16.ffn_up_exps.weight iq3_xxs +blk.16.ffn_gate_shexp.weight iq3_xxs +blk.16.ffn_up_shexp.weight iq3_xxs +blk.16.attn_k.weight iq3_xxs +blk.16.attn_q.weight iq3_xxs +blk.16.attn_v.weight q4_K +blk.17.ffn_gate_exps.weight iq3_xxs +blk.17.ffn_up_exps.weight iq3_xxs +blk.17.ffn_gate_shexp.weight iq3_xxs +blk.17.ffn_up_shexp.weight iq3_xxs +blk.17.attn_k.weight iq3_xxs +blk.17.attn_q.weight iq3_xxs +blk.17.attn_v.weight q4_K +blk.18.ffn_gate_exps.weight iq3_xxs +blk.18.ffn_up_exps.weight iq3_xxs +blk.18.ffn_gate_shexp.weight iq3_xxs +blk.18.ffn_up_shexp.weight iq3_xxs +blk.18.attn_k.weight iq3_xxs +blk.18.attn_q.weight iq3_xxs +blk.18.attn_v.weight q4_K +blk.19.ffn_gate_exps.weight iq3_xxs +blk.19.ffn_up_exps.weight iq3_xxs +blk.19.ffn_gate_shexp.weight iq3_xxs +blk.19.ffn_up_shexp.weight iq3_xxs +blk.19.attn_k.weight iq3_xxs +blk.19.attn_q.weight iq3_xxs +blk.19.attn_v.weight q4_K +blk.20.ffn_gate_exps.weight iq3_xxs +blk.20.ffn_up_exps.weight iq3_xxs +blk.20.ffn_gate_shexp.weight iq3_xxs +blk.20.ffn_up_shexp.weight iq3_xxs +blk.20.attn_k.weight iq3_xxs +blk.20.attn_q.weight iq3_xxs +blk.20.attn_v.weight q4_K +blk.21.ffn_gate_exps.weight iq3_xxs +blk.21.ffn_up_exps.weight iq3_xxs +blk.21.ffn_gate_shexp.weight iq3_xxs +blk.21.ffn_up_shexp.weight iq3_xxs +blk.21.attn_k.weight iq3_xxs +blk.21.attn_q.weight iq3_xxs +blk.21.attn_v.weight q4_K +blk.22.ffn_gate_exps.weight iq3_xxs +blk.22.ffn_up_exps.weight iq3_xxs +blk.22.ffn_gate_shexp.weight iq3_xxs +blk.22.ffn_up_shexp.weight iq3_xxs +blk.22.attn_k.weight iq3_xxs +blk.22.attn_q.weight iq3_xxs +blk.22.attn_v.weight q4_K +blk.23.ffn_gate_exps.weight iq3_xxs +blk.23.ffn_up_exps.weight iq3_xxs +blk.23.ffn_gate_shexp.weight iq3_xxs +blk.23.ffn_up_shexp.weight iq3_xxs +blk.23.attn_k.weight iq3_xxs +blk.23.attn_q.weight iq3_xxs +blk.23.attn_v.weight q4_K +blk.24.ffn_gate_exps.weight iq3_xxs +blk.24.ffn_up_exps.weight iq3_xxs +blk.24.ffn_gate_shexp.weight iq3_xxs +blk.24.ffn_up_shexp.weight iq3_xxs +blk.24.attn_k.weight iq3_xxs +blk.24.attn_q.weight iq3_xxs +blk.24.attn_v.weight q4_K +blk.25.ffn_gate_exps.weight iq3_xxs +blk.25.ffn_up_exps.weight iq3_xxs +blk.25.ffn_gate_shexp.weight iq3_xxs +blk.25.ffn_up_shexp.weight iq3_xxs +blk.25.attn_k.weight iq3_xxs +blk.25.attn_q.weight iq3_xxs +blk.25.attn_v.weight q4_K +blk.26.ffn_gate_exps.weight iq3_xxs +blk.26.ffn_up_exps.weight iq3_xxs +blk.26.ffn_gate_shexp.weight iq3_xxs +blk.26.ffn_up_shexp.weight iq3_xxs +blk.26.attn_k.weight iq3_xxs +blk.26.attn_q.weight iq3_xxs +blk.26.attn_v.weight q4_K +blk.27.ffn_gate_exps.weight iq3_xxs +blk.27.ffn_up_exps.weight iq3_xxs +blk.27.ffn_gate_shexp.weight iq3_xxs +blk.27.ffn_up_shexp.weight iq3_xxs +blk.27.attn_k.weight iq3_xxs +blk.27.attn_q.weight iq3_xxs +blk.27.attn_v.weight q4_K +blk.28.ffn_gate_exps.weight iq3_xxs +blk.28.ffn_up_exps.weight iq3_xxs +blk.28.ffn_gate_shexp.weight iq3_xxs +blk.28.ffn_up_shexp.weight iq3_xxs +blk.28.attn_k.weight iq3_xxs +blk.28.attn_q.weight iq3_xxs +blk.28.attn_v.weight q4_K +blk.29.ffn_gate_exps.weight iq3_xxs +blk.29.ffn_up_exps.weight iq3_xxs +blk.29.ffn_gate_shexp.weight iq3_xxs +blk.29.ffn_up_shexp.weight iq3_xxs +blk.29.attn_k.weight iq3_xxs +blk.29.attn_q.weight iq3_xxs +blk.29.attn_v.weight q4_K +blk.30.ffn_gate_exps.weight iq3_xxs +blk.30.ffn_up_exps.weight iq3_xxs +blk.30.ffn_gate_shexp.weight iq3_xxs +blk.30.ffn_up_shexp.weight iq3_xxs +blk.30.attn_k.weight iq3_xxs +blk.30.attn_q.weight iq3_xxs +blk.30.attn_v.weight q4_K +blk.31.ffn_gate_exps.weight iq3_xxs +blk.31.ffn_up_exps.weight iq3_xxs +blk.31.ffn_gate_shexp.weight iq3_xxs +blk.31.ffn_up_shexp.weight iq3_xxs +blk.31.attn_k.weight iq3_xxs +blk.31.attn_q.weight iq3_xxs +blk.31.attn_v.weight q4_K +blk.32.ffn_gate_exps.weight iq3_xxs +blk.32.ffn_up_exps.weight iq3_xxs +blk.32.ffn_gate_shexp.weight iq3_xxs +blk.32.ffn_up_shexp.weight iq3_xxs +blk.32.attn_k.weight iq3_xxs +blk.32.attn_q.weight iq3_xxs +blk.32.attn_v.weight q4_K +blk.33.ffn_gate_exps.weight iq3_xxs +blk.33.ffn_up_exps.weight iq3_xxs +blk.33.ffn_gate_shexp.weight iq3_xxs +blk.33.ffn_up_shexp.weight iq3_xxs +blk.33.attn_k.weight iq3_xxs +blk.33.attn_q.weight iq3_xxs +blk.33.attn_v.weight q4_K +blk.34.ffn_gate_exps.weight iq3_xxs +blk.34.ffn_up_exps.weight iq3_xxs +blk.34.ffn_gate_shexp.weight iq3_xxs +blk.34.ffn_up_shexp.weight iq3_xxs +blk.34.attn_k.weight iq3_xxs +blk.34.attn_q.weight iq3_xxs +blk.34.attn_v.weight q4_K +blk.35.ffn_gate_exps.weight iq3_xxs +blk.35.ffn_up_exps.weight iq3_xxs +blk.35.ffn_gate_shexp.weight iq3_xxs +blk.35.ffn_up_shexp.weight iq3_xxs +blk.35.attn_k.weight iq3_xxs +blk.35.attn_q.weight iq3_xxs +blk.35.attn_v.weight q4_K +blk.36.ffn_gate_exps.weight iq3_xxs +blk.36.ffn_up_exps.weight iq3_xxs +blk.36.ffn_gate_shexp.weight iq3_xxs +blk.36.ffn_up_shexp.weight iq3_xxs +blk.36.attn_k.weight iq3_xxs +blk.36.attn_q.weight iq3_xxs +blk.36.attn_v.weight q4_K +blk.37.ffn_gate_exps.weight iq3_xxs +blk.37.ffn_up_exps.weight iq3_xxs +blk.37.ffn_gate_shexp.weight iq3_xxs +blk.37.ffn_up_shexp.weight iq3_xxs +blk.37.attn_k.weight iq3_xxs +blk.37.attn_q.weight iq3_xxs +blk.37.attn_v.weight q4_K +blk.38.ffn_gate_exps.weight iq3_xxs +blk.38.ffn_up_exps.weight iq3_xxs +blk.38.ffn_gate_shexp.weight iq3_xxs +blk.38.ffn_up_shexp.weight iq3_xxs +blk.38.attn_k.weight iq3_xxs +blk.38.attn_q.weight iq3_xxs +blk.38.attn_v.weight q4_K +blk.39.ffn_gate_exps.weight iq3_xxs +blk.39.ffn_up_exps.weight iq3_xxs +blk.39.ffn_gate_shexp.weight iq3_xxs +blk.39.ffn_up_shexp.weight iq3_xxs +blk.39.attn_k.weight iq3_xxs +blk.39.attn_q.weight iq3_xxs +blk.39.attn_v.weight q4_K +blk.40.attn_k.weight iq3_xxs +blk.40.attn_q.weight iq3_xxs +blk.40.attn_v.weight q4_K +blk.41.attn_k.weight iq3_xxs +blk.41.attn_q.weight iq3_xxs +blk.41.attn_v.weight q4_K +blk.42.attn_k.weight iq3_xxs +blk.42.attn_q.weight iq3_xxs +blk.42.attn_v.weight q4_K +blk.43.attn_k.weight iq3_xxs +blk.43.attn_q.weight iq3_xxs +blk.43.attn_v.weight q4_K +blk.44.attn_k.weight iq3_xxs +blk.44.attn_q.weight iq3_xxs +blk.44.attn_v.weight q4_K +output.weight q6_K +blk.45.attn_k.weight iq3_xxs +blk.45.attn_q.weight iq3_xxs +blk.45.attn_v.weight q4_K + +[IQ3_XXS] iq3_xxs +token_embd.weight iq3_s +blk.0.ffn_down.weight q4_K +blk.0.attn_k.weight iq2_s +blk.0.attn_output.weight iq3_s +blk.0.attn_q.weight iq2_s +blk.0.attn_v.weight q4_K +blk.1.ffn_down_exps.weight q4_K +blk.1.ffn_down_shexp.weight q4_K +blk.1.attn_k.weight iq2_s +blk.1.attn_output.weight iq3_s +blk.1.attn_q.weight iq2_s +blk.1.attn_v.weight q4_K +blk.2.ffn_down_exps.weight q4_K +blk.2.ffn_down_shexp.weight q4_K +blk.2.attn_k.weight iq2_s +blk.2.attn_output.weight iq3_s +blk.2.attn_q.weight iq2_s +blk.2.attn_v.weight q4_K +blk.3.ffn_down_exps.weight q4_K +blk.3.ffn_down_shexp.weight q4_K +blk.3.attn_k.weight iq2_s +blk.3.attn_output.weight iq3_s +blk.3.attn_q.weight iq2_s +blk.3.attn_v.weight q4_K +blk.4.ffn_down_exps.weight q4_K +blk.4.ffn_down_shexp.weight q4_K +blk.4.attn_k.weight iq2_s +blk.4.attn_output.weight iq3_s +blk.4.attn_q.weight iq2_s +blk.4.attn_v.weight q4_K +blk.5.ffn_down_exps.weight q3_K +blk.5.ffn_down_shexp.weight q3_K +blk.5.attn_k.weight iq2_s +blk.5.attn_output.weight iq3_s +blk.5.attn_q.weight iq2_s +blk.5.attn_v.weight q4_K +blk.6.ffn_down_exps.weight q3_K +blk.6.ffn_down_shexp.weight q3_K +blk.6.attn_k.weight iq2_s +blk.6.attn_output.weight iq3_s +blk.6.attn_q.weight iq2_s +blk.6.attn_v.weight q4_K +blk.7.ffn_down_exps.weight q3_K +blk.7.ffn_down_shexp.weight q3_K +blk.7.attn_k.weight iq2_s +blk.7.attn_output.weight iq3_s +blk.7.attn_q.weight iq2_s +blk.7.attn_v.weight q4_K +blk.8.ffn_down_exps.weight q3_K +blk.8.ffn_down_shexp.weight q3_K +blk.8.attn_k.weight iq2_s +blk.8.attn_output.weight iq3_s +blk.8.attn_q.weight iq2_s +blk.8.attn_v.weight q4_K +blk.9.ffn_down_exps.weight q3_K +blk.9.ffn_down_shexp.weight q3_K +blk.9.attn_k.weight iq2_s +blk.9.attn_output.weight iq3_s +blk.9.attn_q.weight iq2_s +blk.9.attn_v.weight q4_K +blk.10.ffn_down_exps.weight q3_K +blk.10.ffn_down_shexp.weight q3_K +blk.10.attn_k.weight iq2_s +blk.10.attn_output.weight iq3_s +blk.10.attn_q.weight iq2_s +blk.10.attn_v.weight q4_K +blk.11.ffn_down_exps.weight q3_K +blk.11.ffn_down_shexp.weight q3_K +blk.11.attn_k.weight iq2_s +blk.11.attn_output.weight iq3_s +blk.11.attn_q.weight iq2_s +blk.11.attn_v.weight q4_K +blk.12.ffn_down_exps.weight q3_K +blk.12.ffn_down_shexp.weight q3_K +blk.12.attn_k.weight iq2_s +blk.12.attn_output.weight iq3_s +blk.12.attn_q.weight iq2_s +blk.12.attn_v.weight q4_K +blk.13.ffn_down_exps.weight q3_K +blk.13.ffn_down_shexp.weight q3_K +blk.13.attn_k.weight iq2_s +blk.13.attn_output.weight iq3_s +blk.13.attn_q.weight iq2_s +blk.13.attn_v.weight q4_K +blk.14.ffn_down_exps.weight q3_K +blk.14.ffn_down_shexp.weight q3_K +blk.14.attn_k.weight iq2_s +blk.14.attn_output.weight iq3_s +blk.14.attn_q.weight iq2_s +blk.14.attn_v.weight q4_K +blk.15.ffn_down_exps.weight q3_K +blk.15.ffn_down_shexp.weight q3_K +blk.15.attn_k.weight iq2_s +blk.15.attn_output.weight iq3_s +blk.15.attn_q.weight iq2_s +blk.15.attn_v.weight q4_K +blk.16.ffn_down_exps.weight q3_K +blk.16.ffn_down_shexp.weight q3_K +blk.16.attn_k.weight iq2_s +blk.16.attn_output.weight iq3_s +blk.16.attn_q.weight iq2_s +blk.16.attn_v.weight q4_K +blk.17.ffn_down_exps.weight q3_K +blk.17.ffn_down_shexp.weight q3_K +blk.17.attn_k.weight iq2_s +blk.17.attn_output.weight iq3_s +blk.17.attn_q.weight iq2_s +blk.17.attn_v.weight q4_K +blk.18.ffn_down_exps.weight q3_K +blk.18.ffn_down_shexp.weight q3_K +blk.18.attn_k.weight iq2_s +blk.18.attn_output.weight iq3_s +blk.18.attn_q.weight iq2_s +blk.18.attn_v.weight q4_K +blk.19.ffn_down_exps.weight q3_K +blk.19.ffn_down_shexp.weight q3_K +blk.19.attn_k.weight iq2_s +blk.19.attn_output.weight iq3_s +blk.19.attn_q.weight iq2_s +blk.19.attn_v.weight q4_K +blk.20.ffn_down_exps.weight q3_K +blk.20.ffn_down_shexp.weight q3_K +blk.20.attn_k.weight iq2_s +blk.20.attn_output.weight iq3_s +blk.20.attn_q.weight iq2_s +blk.20.attn_v.weight q4_K +blk.21.ffn_down_exps.weight q3_K +blk.21.ffn_down_shexp.weight q3_K +blk.21.attn_k.weight iq2_s +blk.21.attn_output.weight iq3_s +blk.21.attn_q.weight iq2_s +blk.21.attn_v.weight q4_K +blk.22.ffn_down_exps.weight q3_K +blk.22.ffn_down_shexp.weight q3_K +blk.22.attn_k.weight iq2_s +blk.22.attn_output.weight iq3_s +blk.22.attn_q.weight iq2_s +blk.22.attn_v.weight q4_K +blk.23.ffn_down_exps.weight q3_K +blk.23.ffn_down_shexp.weight q3_K +blk.23.attn_k.weight iq2_s +blk.23.attn_output.weight iq3_s +blk.23.attn_q.weight iq2_s +blk.23.attn_v.weight q4_K +blk.24.ffn_down_exps.weight q3_K +blk.24.ffn_down_shexp.weight q3_K +blk.24.attn_k.weight iq2_s +blk.24.attn_output.weight iq3_s +blk.24.attn_q.weight iq2_s +blk.24.attn_v.weight q4_K +blk.25.ffn_down_exps.weight q3_K +blk.25.ffn_down_shexp.weight q3_K +blk.25.attn_k.weight iq2_s +blk.25.attn_output.weight iq3_s +blk.25.attn_q.weight iq2_s +blk.25.attn_v.weight q4_K +blk.26.ffn_down_exps.weight q3_K +blk.26.ffn_down_shexp.weight q3_K +blk.26.attn_k.weight iq2_s +blk.26.attn_output.weight iq3_s +blk.26.attn_q.weight iq2_s +blk.26.attn_v.weight q4_K +blk.27.ffn_down_exps.weight q3_K +blk.27.ffn_down_shexp.weight q3_K +blk.27.attn_k.weight iq2_s +blk.27.attn_output.weight iq3_s +blk.27.attn_q.weight iq2_s +blk.27.attn_v.weight q4_K +blk.28.ffn_down_exps.weight q3_K +blk.28.ffn_down_shexp.weight q3_K +blk.28.attn_k.weight iq2_s +blk.28.attn_output.weight iq3_s +blk.28.attn_q.weight iq2_s +blk.28.attn_v.weight q4_K +blk.29.ffn_down_exps.weight q3_K +blk.29.ffn_down_shexp.weight q3_K +blk.29.attn_k.weight iq2_s +blk.29.attn_output.weight iq3_s +blk.29.attn_q.weight iq2_s +blk.29.attn_v.weight q4_K +blk.30.ffn_down_exps.weight q3_K +blk.30.ffn_down_shexp.weight q3_K +blk.30.attn_k.weight iq2_s +blk.30.attn_output.weight iq3_s +blk.30.attn_q.weight iq2_s +blk.30.attn_v.weight q4_K +blk.31.ffn_down_exps.weight q3_K +blk.31.ffn_down_shexp.weight q3_K +blk.31.attn_k.weight iq2_s +blk.31.attn_output.weight iq3_s +blk.31.attn_q.weight iq2_s +blk.31.attn_v.weight q4_K +blk.32.ffn_down_exps.weight q3_K +blk.32.ffn_down_shexp.weight q3_K +blk.32.attn_k.weight iq2_s +blk.32.attn_output.weight iq3_s +blk.32.attn_q.weight iq2_s +blk.32.attn_v.weight q4_K +blk.33.ffn_down_exps.weight q3_K +blk.33.ffn_down_shexp.weight q3_K +blk.33.attn_k.weight iq2_s +blk.33.attn_output.weight iq3_s +blk.33.attn_q.weight iq2_s +blk.33.attn_v.weight q4_K +blk.34.ffn_down_exps.weight q3_K +blk.34.attn_output.weight iq3_s +blk.34.ffn_down_shexp.weight q3_K +blk.34.attn_k.weight iq2_s +blk.34.attn_q.weight iq2_s +blk.34.attn_v.weight q4_K +blk.35.ffn_down_exps.weight q3_K +blk.35.ffn_down_shexp.weight q3_K +blk.35.attn_k.weight iq2_s +blk.35.attn_output.weight iq3_s +blk.35.attn_q.weight iq2_s +blk.35.attn_v.weight q4_K +blk.36.ffn_down_exps.weight q3_K +blk.36.ffn_down_shexp.weight q3_K +blk.36.attn_k.weight iq2_s +blk.36.attn_output.weight iq3_s +blk.36.attn_q.weight iq2_s +blk.36.attn_v.weight q4_K +blk.37.ffn_down_exps.weight q3_K +blk.37.ffn_down_shexp.weight q3_K +blk.37.attn_k.weight iq2_s +blk.37.attn_output.weight iq3_s +blk.37.attn_q.weight iq2_s +blk.37.attn_v.weight q4_K +blk.38.ffn_down_exps.weight q3_K +blk.38.ffn_down_shexp.weight q3_K +blk.38.attn_k.weight iq2_s +blk.38.attn_output.weight iq3_s +blk.38.attn_q.weight iq2_s +blk.38.attn_v.weight q4_K +blk.39.ffn_down_exps.weight q3_K +blk.39.ffn_down_shexp.weight q3_K +blk.39.attn_k.weight iq2_s +blk.39.attn_output.weight iq3_s +blk.39.attn_q.weight iq2_s +blk.39.attn_v.weight q4_K +blk.40.ffn_down_exps.weight q3_K +blk.40.ffn_down_shexp.weight q3_K +blk.40.attn_k.weight iq2_s +blk.40.attn_output.weight iq3_s +blk.40.attn_q.weight iq2_s +blk.40.attn_v.weight q4_K +blk.41.ffn_down_exps.weight q3_K +blk.41.ffn_down_shexp.weight q3_K +blk.41.attn_k.weight iq2_s +blk.41.attn_output.weight iq3_s +blk.41.attn_q.weight iq2_s +blk.41.attn_v.weight q4_K +blk.42.ffn_down_exps.weight q3_K +blk.42.ffn_down_shexp.weight q3_K +blk.42.attn_k.weight iq2_s +blk.42.attn_q.weight iq2_s +blk.42.attn_v.weight q4_K +blk.42.attn_output.weight iq3_s +blk.43.ffn_down_exps.weight q3_K +blk.43.ffn_down_shexp.weight q3_K +blk.43.attn_k.weight iq2_s +blk.43.attn_output.weight iq3_s +blk.43.attn_q.weight iq2_s +blk.43.attn_v.weight q4_K +blk.44.ffn_down_exps.weight q3_K +blk.44.ffn_down_shexp.weight q3_K +blk.44.attn_k.weight iq2_s +blk.44.attn_output.weight iq3_s +blk.44.attn_q.weight iq2_s +blk.44.attn_v.weight q4_K +output.weight q5_K +blk.45.ffn_down_exps.weight q3_K +blk.45.ffn_down_shexp.weight q3_K +blk.45.attn_k.weight iq2_s +blk.45.attn_output.weight iq3_s +blk.45.attn_q.weight iq2_s +blk.45.attn_v.weight q4_K + +[IQ1_S] iq1_s +token_embd.weight q2_K +blk.0.ffn_down.weight q2_K +blk.0.attn_output.weight iq2_xxs +blk.0.attn_v.weight q4_K +blk.1.ffn_down_exps.weight q2_K +blk.1.ffn_down_shexp.weight q2_K +blk.1.attn_output.weight iq2_xxs +blk.1.attn_v.weight q4_K +blk.2.ffn_down_exps.weight q2_K +blk.2.ffn_down_shexp.weight q2_K +blk.2.attn_output.weight iq2_xxs +blk.2.attn_v.weight q4_K +blk.3.attn_output.weight iq2_xxs +blk.3.attn_v.weight q4_K +blk.4.attn_output.weight iq2_xxs +blk.4.attn_v.weight q4_K +blk.5.attn_output.weight iq2_xxs +blk.5.attn_v.weight q4_K +blk.6.attn_output.weight iq2_xxs +blk.6.attn_v.weight q4_K +blk.7.attn_output.weight iq2_xxs +blk.7.attn_v.weight q4_K +blk.8.attn_output.weight iq2_xxs +blk.8.attn_v.weight q4_K +blk.9.attn_output.weight iq2_xxs +blk.9.attn_v.weight q4_K +blk.10.attn_output.weight iq2_xxs +blk.10.attn_v.weight q4_K +blk.11.attn_output.weight iq2_xxs +blk.11.attn_v.weight q4_K +blk.12.attn_output.weight iq2_xxs +blk.12.attn_v.weight q4_K +blk.13.attn_output.weight iq2_xxs +blk.13.attn_v.weight q4_K +blk.14.attn_output.weight iq2_xxs +blk.14.attn_v.weight q4_K +blk.15.attn_output.weight iq2_xxs +blk.15.attn_v.weight q4_K +blk.16.attn_output.weight iq2_xxs +blk.16.attn_v.weight q4_K +blk.17.attn_output.weight iq2_xxs +blk.17.attn_v.weight q4_K +blk.18.attn_output.weight iq2_xxs +blk.18.attn_v.weight q4_K +blk.19.attn_output.weight iq2_xxs +blk.19.attn_v.weight q4_K +blk.20.attn_output.weight iq2_xxs +blk.20.attn_v.weight q4_K +blk.21.attn_output.weight iq2_xxs +blk.21.attn_v.weight q4_K +blk.22.attn_output.weight iq2_xxs +blk.22.attn_v.weight q4_K +blk.23.attn_output.weight iq2_xxs +blk.23.attn_v.weight q4_K +blk.24.attn_output.weight iq2_xxs +blk.24.attn_v.weight q4_K +blk.25.attn_output.weight iq2_xxs +blk.25.attn_v.weight q4_K +blk.26.attn_output.weight iq2_xxs +blk.26.attn_v.weight q4_K +blk.27.attn_output.weight iq2_xxs +blk.27.attn_v.weight q4_K +blk.28.attn_output.weight iq2_xxs +blk.28.attn_v.weight q4_K +blk.29.attn_output.weight iq2_xxs +blk.29.attn_v.weight q4_K +blk.30.attn_output.weight iq2_xxs +blk.30.attn_v.weight q4_K +blk.31.attn_output.weight iq2_xxs +blk.31.attn_v.weight q4_K +blk.32.attn_output.weight iq2_xxs +blk.32.attn_v.weight q4_K +blk.33.attn_output.weight iq2_xxs +blk.33.attn_v.weight q4_K +blk.34.attn_output.weight iq2_xxs +blk.34.attn_v.weight q4_K +blk.35.attn_output.weight iq2_xxs +blk.35.attn_v.weight q4_K +blk.36.attn_output.weight iq2_xxs +blk.36.attn_v.weight q4_K +blk.37.attn_output.weight iq2_xxs +blk.37.attn_v.weight q4_K +blk.38.attn_output.weight iq2_xxs +blk.38.attn_v.weight q4_K +blk.39.attn_output.weight iq2_xxs +blk.39.attn_v.weight q4_K +blk.40.attn_output.weight iq2_xxs +blk.40.attn_v.weight q4_K +blk.41.attn_output.weight iq2_xxs +blk.41.attn_v.weight q4_K +blk.42.attn_v.weight q4_K +blk.42.attn_output.weight iq2_xxs +blk.43.attn_output.weight iq2_xxs +blk.43.attn_v.weight q4_K +blk.44.attn_output.weight iq2_xxs +blk.44.attn_v.weight q4_K +output.weight q5_K +blk.45.attn_output.weight iq2_xxs +blk.45.attn_v.weight q4_K + +[IQ4_NL] iq4_nl +blk.0.ffn_down.weight q5_K +blk.0.attn_v.weight q5_K +blk.1.ffn_down_exps.weight q5_K +blk.1.ffn_down_shexp.weight q5_K +blk.1.attn_v.weight q5_K +blk.2.ffn_down_exps.weight q5_K +blk.2.ffn_down_shexp.weight q5_K +blk.2.attn_v.weight q5_K +blk.3.ffn_down_exps.weight q5_K +blk.3.ffn_down_shexp.weight q5_K +blk.3.attn_v.weight q5_K +blk.4.ffn_down_exps.weight q5_K +blk.4.ffn_down_shexp.weight q5_K +blk.4.attn_v.weight q5_K +blk.5.attn_v.weight q5_K +blk.6.attn_v.weight q5_K +blk.7.attn_v.weight q5_K +blk.8.attn_v.weight q5_K +blk.9.attn_v.weight q5_K +blk.10.attn_v.weight q5_K +blk.11.attn_v.weight q5_K +blk.12.attn_v.weight q5_K +blk.13.attn_v.weight q5_K +blk.14.attn_v.weight q5_K +blk.15.attn_v.weight q5_K +blk.16.attn_v.weight q5_K +blk.17.attn_v.weight q5_K +blk.18.attn_v.weight q5_K +blk.19.attn_v.weight q5_K +blk.20.attn_v.weight q5_K +blk.21.attn_v.weight q5_K +blk.22.attn_v.weight q5_K +blk.23.attn_v.weight q5_K +blk.24.attn_v.weight q5_K +blk.25.attn_v.weight q5_K +blk.26.attn_v.weight q5_K +blk.27.attn_v.weight q5_K +blk.28.attn_v.weight q5_K +blk.29.attn_v.weight q5_K +blk.30.attn_v.weight q5_K +blk.31.attn_v.weight q5_K +blk.32.attn_v.weight q5_K +blk.33.attn_v.weight q5_K +blk.34.attn_v.weight q5_K +blk.35.attn_v.weight q5_K +blk.36.attn_v.weight q5_K +blk.37.attn_v.weight q5_K +blk.38.attn_v.weight q5_K +blk.39.attn_v.weight q5_K +blk.40.attn_v.weight q5_K +blk.41.attn_v.weight q5_K +blk.42.attn_v.weight q5_K +blk.43.attn_v.weight q5_K +blk.44.attn_v.weight q5_K +output.weight q6_K +blk.45.attn_v.weight q5_K + +[IQ3_S] iq3_s +blk.0.attn_v.weight q4_K +blk.1.attn_v.weight q4_K +blk.2.attn_v.weight q4_K +blk.3.attn_v.weight q4_K +blk.4.attn_v.weight q4_K +blk.5.attn_v.weight q4_K +blk.6.attn_v.weight q4_K +blk.7.attn_v.weight q4_K +blk.8.attn_v.weight q4_K +blk.9.attn_v.weight q4_K +blk.10.attn_v.weight q4_K +blk.11.attn_v.weight q4_K +blk.12.attn_v.weight q4_K +blk.13.attn_v.weight q4_K +blk.14.attn_v.weight q4_K +blk.15.attn_v.weight q4_K +blk.16.attn_v.weight q4_K +blk.17.attn_v.weight q4_K +blk.18.attn_v.weight q4_K +blk.19.attn_v.weight q4_K +blk.20.attn_v.weight q4_K +blk.21.attn_v.weight q4_K +blk.22.attn_v.weight q4_K +blk.23.attn_v.weight q4_K +blk.24.attn_v.weight q4_K +blk.25.attn_v.weight q4_K +blk.26.attn_v.weight q4_K +blk.27.attn_v.weight q4_K +blk.28.attn_v.weight q4_K +blk.29.attn_v.weight q4_K +blk.30.attn_v.weight q4_K +blk.31.attn_v.weight q4_K +blk.32.attn_v.weight q4_K +blk.33.attn_v.weight q4_K +blk.34.attn_v.weight q4_K +blk.35.attn_v.weight q4_K +blk.36.attn_v.weight q4_K +blk.37.attn_v.weight q4_K +blk.38.attn_v.weight q4_K +blk.39.attn_v.weight q4_K +blk.40.attn_v.weight q4_K +blk.41.attn_v.weight q4_K +blk.42.attn_v.weight q4_K +blk.43.attn_v.weight q4_K +blk.44.attn_v.weight q4_K +output.weight q6_K +blk.45.attn_v.weight q4_K + +[IQ3_M] iq3_s +blk.0.ffn_down.weight q4_K +blk.0.attn_output.weight q4_K +blk.0.attn_v.weight q4_K +blk.1.ffn_down_exps.weight q4_K +blk.1.ffn_down_shexp.weight q4_K +blk.1.attn_output.weight q4_K +blk.1.attn_v.weight q4_K +blk.2.ffn_down_exps.weight q4_K +blk.2.ffn_down_shexp.weight q4_K +blk.2.attn_output.weight q4_K +blk.2.attn_v.weight q4_K +blk.3.ffn_down_exps.weight q4_K +blk.3.ffn_down_shexp.weight q4_K +blk.3.attn_output.weight q4_K +blk.3.attn_v.weight q4_K +blk.4.ffn_down_exps.weight q4_K +blk.4.ffn_down_shexp.weight q4_K +blk.4.attn_output.weight q4_K +blk.4.attn_v.weight q4_K +blk.5.attn_output.weight q4_K +blk.5.attn_v.weight q4_K +blk.6.attn_output.weight q4_K +blk.6.attn_v.weight q4_K +blk.7.attn_output.weight q4_K +blk.7.attn_v.weight q4_K +blk.8.attn_output.weight q4_K +blk.8.attn_v.weight q4_K +blk.9.attn_output.weight q4_K +blk.9.attn_v.weight q4_K +blk.10.attn_output.weight q4_K +blk.10.attn_v.weight q4_K +blk.11.attn_output.weight q4_K +blk.11.attn_v.weight q4_K +blk.12.attn_output.weight q4_K +blk.12.attn_v.weight q4_K +blk.13.attn_output.weight q4_K +blk.13.attn_v.weight q4_K +blk.14.attn_output.weight q4_K +blk.14.attn_v.weight q4_K +blk.15.attn_output.weight q4_K +blk.15.attn_v.weight q4_K +blk.16.attn_output.weight q4_K +blk.16.attn_v.weight q4_K +blk.17.attn_output.weight q4_K +blk.17.attn_v.weight q4_K +blk.18.attn_output.weight q4_K +blk.18.attn_v.weight q4_K +blk.19.attn_output.weight q4_K +blk.19.attn_v.weight q4_K +blk.20.attn_output.weight q4_K +blk.20.attn_v.weight q4_K +blk.21.attn_output.weight q4_K +blk.21.attn_v.weight q4_K +blk.22.attn_output.weight q4_K +blk.22.attn_v.weight q4_K +blk.23.attn_output.weight q4_K +blk.23.attn_v.weight q4_K +blk.24.attn_output.weight q4_K +blk.24.attn_v.weight q4_K +blk.25.attn_output.weight q4_K +blk.25.attn_v.weight q4_K +blk.26.attn_output.weight q4_K +blk.26.attn_v.weight q4_K +blk.27.attn_output.weight q4_K +blk.27.attn_v.weight q4_K +blk.28.attn_output.weight q4_K +blk.28.attn_v.weight q4_K +blk.29.attn_output.weight q4_K +blk.29.attn_v.weight q4_K +blk.30.attn_output.weight q4_K +blk.30.attn_v.weight q4_K +blk.31.attn_output.weight q4_K +blk.31.attn_v.weight q4_K +blk.32.attn_output.weight q4_K +blk.32.attn_v.weight q4_K +blk.33.attn_output.weight q4_K +blk.33.attn_v.weight q4_K +blk.34.attn_output.weight q4_K +blk.34.attn_v.weight q4_K +blk.35.attn_output.weight q4_K +blk.35.attn_v.weight q4_K +blk.36.attn_output.weight q4_K +blk.36.attn_v.weight q4_K +blk.37.attn_output.weight q4_K +blk.37.attn_v.weight q4_K +blk.38.attn_output.weight q4_K +blk.38.attn_v.weight q4_K +blk.39.attn_output.weight q4_K +blk.39.attn_v.weight q4_K +blk.40.attn_output.weight q4_K +blk.40.attn_v.weight q4_K +blk.41.attn_output.weight q4_K +blk.41.attn_v.weight q4_K +blk.42.attn_v.weight q4_K +blk.42.attn_output.weight q4_K +blk.43.attn_output.weight q4_K +blk.43.attn_v.weight q4_K +blk.44.attn_output.weight q4_K +blk.44.attn_v.weight q4_K +output.weight q6_K +blk.45.attn_output.weight q4_K +blk.45.attn_v.weight q4_K + +[IQ2_S] iq2_xs +token_embd.weight iq3_s +blk.0.ffn_down.weight iq3_s +blk.0.attn_output.weight iq3_s +blk.0.attn_v.weight q4_K +blk.1.ffn_down_exps.weight iq3_s +blk.1.ffn_down_shexp.weight iq3_s +blk.1.attn_output.weight iq3_s +blk.1.attn_v.weight q4_K +blk.2.ffn_down_exps.weight iq3_s +blk.2.ffn_down_shexp.weight iq3_s +blk.2.attn_output.weight iq3_s +blk.2.attn_v.weight q4_K +blk.3.attn_output.weight iq3_s +blk.3.attn_v.weight q4_K +blk.4.attn_output.weight iq3_s +blk.4.attn_v.weight q4_K +blk.5.attn_output.weight iq3_s +blk.5.attn_v.weight q4_K +blk.6.attn_output.weight iq3_s +blk.6.attn_v.weight q4_K +blk.7.attn_output.weight iq3_s +blk.7.attn_v.weight q4_K +blk.8.attn_output.weight iq3_s +blk.8.attn_v.weight q4_K +blk.9.attn_output.weight iq3_s +blk.9.attn_v.weight q4_K +blk.10.attn_output.weight iq3_s +blk.10.attn_v.weight q4_K +blk.11.attn_output.weight iq3_s +blk.11.attn_v.weight q4_K +blk.12.attn_output.weight iq3_s +blk.12.attn_v.weight q4_K +blk.13.attn_output.weight iq3_s +blk.13.attn_v.weight q4_K +blk.14.attn_output.weight iq3_s +blk.14.attn_v.weight q4_K +blk.15.attn_output.weight iq3_s +blk.15.attn_v.weight q4_K +blk.16.attn_output.weight iq3_s +blk.16.attn_v.weight q4_K +blk.17.attn_output.weight iq3_s +blk.17.attn_v.weight q4_K +blk.18.attn_output.weight iq3_s +blk.18.attn_v.weight q4_K +blk.19.attn_output.weight iq3_s +blk.19.attn_v.weight q4_K +blk.20.attn_output.weight iq3_s +blk.20.attn_v.weight q4_K +blk.21.attn_output.weight iq3_s +blk.21.attn_v.weight q4_K +blk.22.attn_output.weight iq3_s +blk.22.attn_v.weight q4_K +blk.23.attn_output.weight iq3_s +blk.23.attn_v.weight q4_K +blk.24.attn_output.weight iq3_s +blk.24.attn_v.weight q4_K +blk.25.attn_output.weight iq3_s +blk.25.attn_v.weight q4_K +blk.26.attn_output.weight iq3_s +blk.26.attn_v.weight q4_K +blk.27.attn_output.weight iq3_s +blk.27.attn_v.weight q4_K +blk.28.attn_output.weight iq3_s +blk.28.attn_v.weight q4_K +blk.29.attn_output.weight iq3_s +blk.29.attn_v.weight q4_K +blk.30.attn_output.weight iq3_s +blk.30.attn_v.weight q4_K +blk.31.attn_output.weight iq3_s +blk.31.attn_v.weight q4_K +blk.32.attn_output.weight iq3_s +blk.32.attn_v.weight q4_K +blk.33.attn_output.weight iq3_s +blk.33.attn_v.weight q4_K +blk.34.attn_output.weight iq3_s +blk.34.attn_v.weight q4_K +blk.35.attn_output.weight iq3_s +blk.35.attn_v.weight q4_K +blk.36.attn_output.weight iq3_s +blk.36.attn_v.weight q4_K +blk.37.attn_output.weight iq3_s +blk.37.attn_v.weight q4_K +blk.38.attn_output.weight iq3_s +blk.38.attn_v.weight q4_K +blk.39.attn_output.weight iq3_s +blk.39.attn_v.weight q4_K +blk.40.attn_output.weight iq3_s +blk.40.attn_v.weight q4_K +blk.41.attn_output.weight iq3_s +blk.41.attn_v.weight q4_K +blk.42.attn_v.weight q4_K +blk.42.attn_output.weight iq3_s +blk.43.attn_output.weight iq3_s +blk.43.attn_v.weight q4_K +blk.44.attn_output.weight iq3_s +blk.44.attn_v.weight q4_K +output.weight q5_K +blk.45.attn_output.weight iq3_s +blk.45.attn_v.weight q4_K + +[IQ2_M] iq2_s +token_embd.weight iq3_s +blk.0.ffn_down.weight iq3_s +blk.0.attn_output.weight iq3_s +blk.0.attn_v.weight q4_K +blk.1.ffn_down_exps.weight iq3_s +blk.1.ffn_down_shexp.weight iq3_s +blk.1.attn_output.weight iq3_s +blk.1.attn_v.weight q4_K +blk.2.ffn_down_exps.weight iq3_s +blk.2.ffn_down_shexp.weight iq3_s +blk.2.attn_output.weight iq3_s +blk.2.attn_v.weight q4_K +blk.3.attn_output.weight iq3_s +blk.3.attn_v.weight q4_K +blk.4.attn_output.weight iq3_s +blk.4.attn_v.weight q4_K +blk.5.attn_output.weight iq3_s +blk.5.attn_v.weight q4_K +blk.6.attn_output.weight iq3_s +blk.6.attn_v.weight q4_K +blk.7.attn_output.weight iq3_s +blk.7.attn_v.weight q4_K +blk.8.attn_output.weight iq3_s +blk.8.attn_v.weight q4_K +blk.9.attn_output.weight iq3_s +blk.9.attn_v.weight q4_K +blk.10.attn_output.weight iq3_s +blk.10.attn_v.weight q4_K +blk.11.attn_output.weight iq3_s +blk.11.attn_v.weight q4_K +blk.12.attn_output.weight iq3_s +blk.12.attn_v.weight q4_K +blk.13.attn_output.weight iq3_s +blk.13.attn_v.weight q4_K +blk.14.attn_output.weight iq3_s +blk.14.attn_v.weight q4_K +blk.15.attn_output.weight iq3_s +blk.15.attn_v.weight q4_K +blk.16.attn_output.weight iq3_s +blk.16.attn_v.weight q4_K +blk.17.attn_output.weight iq3_s +blk.17.attn_v.weight q4_K +blk.18.attn_output.weight iq3_s +blk.18.attn_v.weight q4_K +blk.19.attn_output.weight iq3_s +blk.19.attn_v.weight q4_K +blk.20.attn_output.weight iq3_s +blk.20.attn_v.weight q4_K +blk.21.attn_output.weight iq3_s +blk.21.attn_v.weight q4_K +blk.22.attn_output.weight iq3_s +blk.22.attn_v.weight q4_K +blk.23.attn_output.weight iq3_s +blk.23.attn_v.weight q4_K +blk.24.attn_output.weight iq3_s +blk.24.attn_v.weight q4_K +blk.25.attn_output.weight iq3_s +blk.25.attn_v.weight q4_K +blk.26.attn_output.weight iq3_s +blk.26.attn_v.weight q4_K +blk.27.attn_output.weight iq3_s +blk.27.attn_v.weight q4_K +blk.28.attn_output.weight iq3_s +blk.28.attn_v.weight q4_K +blk.29.attn_output.weight iq3_s +blk.29.attn_v.weight q4_K +blk.30.attn_output.weight iq3_s +blk.30.attn_v.weight q4_K +blk.31.attn_output.weight iq3_s +blk.31.attn_v.weight q4_K +blk.32.attn_output.weight iq3_s +blk.32.attn_v.weight q4_K +blk.33.attn_output.weight iq3_s +blk.33.attn_v.weight q4_K +blk.34.attn_output.weight iq3_s +blk.34.attn_v.weight q4_K +blk.35.attn_output.weight iq3_s +blk.35.attn_v.weight q4_K +blk.36.attn_output.weight iq3_s +blk.36.attn_v.weight q4_K +blk.37.attn_output.weight iq3_s +blk.37.attn_v.weight q4_K +blk.38.attn_output.weight iq3_s +blk.38.attn_v.weight q4_K +blk.39.attn_output.weight iq3_s +blk.39.attn_v.weight q4_K +blk.40.attn_output.weight iq3_s +blk.40.attn_v.weight q4_K +blk.41.attn_output.weight iq3_s +blk.41.attn_v.weight q4_K +blk.42.attn_v.weight q4_K +blk.42.attn_output.weight iq3_s +blk.43.attn_output.weight iq3_s +blk.43.attn_v.weight q4_K +blk.44.attn_output.weight iq3_s +blk.44.attn_v.weight q4_K +output.weight q5_K +blk.45.attn_output.weight iq3_s +blk.45.attn_v.weight q4_K + +[IQ4_XS] iq4_xs +blk.0.ffn_down.weight q5_K +blk.0.attn_v.weight q5_K +blk.1.ffn_down_exps.weight q5_K +blk.1.ffn_down_shexp.weight q5_K +blk.1.attn_v.weight q5_K +blk.2.ffn_down_exps.weight q5_K +blk.2.ffn_down_shexp.weight q5_K +blk.2.attn_v.weight q5_K +blk.3.ffn_down_exps.weight q5_K +blk.3.ffn_down_shexp.weight q5_K +blk.3.attn_v.weight q5_K +blk.4.ffn_down_exps.weight q5_K +blk.4.ffn_down_shexp.weight q5_K +blk.4.attn_v.weight q5_K +blk.5.attn_v.weight q5_K +blk.6.attn_v.weight q5_K +blk.7.attn_v.weight q5_K +blk.8.attn_v.weight q5_K +blk.9.attn_v.weight q5_K +blk.10.attn_v.weight q5_K +blk.11.attn_v.weight q5_K +blk.12.attn_v.weight q5_K +blk.13.attn_v.weight q5_K +blk.14.attn_v.weight q5_K +blk.15.attn_v.weight q5_K +blk.16.attn_v.weight q5_K +blk.17.attn_v.weight q5_K +blk.18.attn_v.weight q5_K +blk.19.attn_v.weight q5_K +blk.20.attn_v.weight q5_K +blk.21.attn_v.weight q5_K +blk.22.attn_v.weight q5_K +blk.23.attn_v.weight q5_K +blk.24.attn_v.weight q5_K +blk.25.attn_v.weight q5_K +blk.26.attn_v.weight q5_K +blk.27.attn_v.weight q5_K +blk.28.attn_v.weight q5_K +blk.29.attn_v.weight q5_K +blk.30.attn_v.weight q5_K +blk.31.attn_v.weight q5_K +blk.32.attn_v.weight q5_K +blk.33.attn_v.weight q5_K +blk.34.attn_v.weight q5_K +blk.35.attn_v.weight q5_K +blk.36.attn_v.weight q5_K +blk.37.attn_v.weight q5_K +blk.38.attn_v.weight q5_K +blk.39.attn_v.weight q5_K +blk.40.attn_v.weight q5_K +blk.41.attn_v.weight q5_K +blk.42.attn_v.weight q5_K +blk.43.attn_v.weight q5_K +blk.44.attn_v.weight q5_K +output.weight q6_K +blk.45.attn_v.weight q5_K + +[IQ1_M] iq1_m +token_embd.weight q2_K +blk.0.ffn_down.weight q2_K +blk.0.attn_output.weight iq2_xxs +blk.0.attn_v.weight q4_K +blk.1.ffn_down_exps.weight q2_K +blk.1.ffn_down_shexp.weight q2_K +blk.1.attn_output.weight iq2_xxs +blk.1.attn_v.weight q4_K +blk.2.ffn_down_exps.weight q2_K +blk.2.ffn_down_shexp.weight q2_K +blk.2.attn_output.weight iq2_xxs +blk.2.attn_v.weight q4_K +blk.3.attn_output.weight iq2_xxs +blk.3.attn_v.weight q4_K +blk.4.attn_output.weight iq2_xxs +blk.4.attn_v.weight q4_K +blk.5.attn_output.weight iq2_xxs +blk.5.attn_v.weight q4_K +blk.6.attn_output.weight iq2_xxs +blk.6.attn_v.weight q4_K +blk.7.attn_output.weight iq2_xxs +blk.7.attn_v.weight q4_K +blk.8.attn_output.weight iq2_xxs +blk.8.attn_v.weight q4_K +blk.9.attn_output.weight iq2_xxs +blk.9.attn_v.weight q4_K +blk.10.attn_output.weight iq2_xxs +blk.10.attn_v.weight q4_K +blk.11.attn_output.weight iq2_xxs +blk.11.attn_v.weight q4_K +blk.12.attn_output.weight iq2_xxs +blk.12.attn_v.weight q4_K +blk.13.attn_output.weight iq2_xxs +blk.13.attn_v.weight q4_K +blk.14.attn_output.weight iq2_xxs +blk.14.attn_v.weight q4_K +blk.15.attn_output.weight iq2_xxs +blk.15.attn_v.weight q4_K +blk.16.attn_output.weight iq2_xxs +blk.16.attn_v.weight q4_K +blk.17.attn_output.weight iq2_xxs +blk.17.attn_v.weight q4_K +blk.18.attn_output.weight iq2_xxs +blk.18.attn_v.weight q4_K +blk.19.attn_output.weight iq2_xxs +blk.19.attn_v.weight q4_K +blk.20.attn_output.weight iq2_xxs +blk.20.attn_v.weight q4_K +blk.21.attn_output.weight iq2_xxs +blk.21.attn_v.weight q4_K +blk.22.attn_output.weight iq2_xxs +blk.22.attn_v.weight q4_K +blk.23.attn_output.weight iq2_xxs +blk.23.attn_v.weight q4_K +blk.24.attn_output.weight iq2_xxs +blk.24.attn_v.weight q4_K +blk.25.attn_output.weight iq2_xxs +blk.25.attn_v.weight q4_K +blk.26.attn_output.weight iq2_xxs +blk.26.attn_v.weight q4_K +blk.27.attn_output.weight iq2_xxs +blk.27.attn_v.weight q4_K +blk.28.attn_output.weight iq2_xxs +blk.28.attn_v.weight q4_K +blk.29.attn_output.weight iq2_xxs +blk.29.attn_v.weight q4_K +blk.30.attn_output.weight iq2_xxs +blk.30.attn_v.weight q4_K +blk.31.attn_output.weight iq2_xxs +blk.31.attn_v.weight q4_K +blk.32.attn_output.weight iq2_xxs +blk.32.attn_v.weight q4_K +blk.33.attn_output.weight iq2_xxs +blk.33.attn_v.weight q4_K +blk.34.attn_output.weight iq2_xxs +blk.34.attn_v.weight q4_K +blk.35.attn_output.weight iq2_xxs +blk.35.attn_v.weight q4_K +blk.36.attn_output.weight iq2_xxs +blk.36.attn_v.weight q4_K +blk.37.attn_output.weight iq2_xxs +blk.37.attn_v.weight q4_K +blk.38.attn_output.weight iq2_xxs +blk.38.attn_v.weight q4_K +blk.39.attn_output.weight iq2_xxs +blk.39.attn_v.weight q4_K +blk.40.attn_output.weight iq2_xxs +blk.40.attn_v.weight q4_K +blk.41.attn_output.weight iq2_xxs +blk.41.attn_v.weight q4_K +blk.42.attn_v.weight q4_K +blk.42.attn_output.weight iq2_xxs +blk.43.attn_output.weight iq2_xxs +blk.43.attn_v.weight q4_K +blk.44.attn_output.weight iq2_xxs +blk.44.attn_v.weight q4_K +output.weight q5_K +blk.45.attn_output.weight iq2_xxs +blk.45.attn_v.weight q4_K + +[BF16] bf16 +output.weight q6_K + +[TQ1_0] tq1_0 +token_embd.weight q4_K +output.weight q6_K + +[TQ2_0] tq2_0 +token_embd.weight q4_K +output.weight q6_K + +[MXFP4_MOE] mxfp4 +token_embd.weight q8_0 +blk.0.ffn_down.weight q8_0 +blk.0.ffn_gate.weight q8_0 +blk.0.ffn_up.weight q8_0 +blk.0.attn_k.weight q8_0 +blk.0.attn_output.weight q8_0 +blk.0.attn_q.weight q8_0 +blk.0.attn_v.weight q8_0 +blk.1.ffn_down_shexp.weight q8_0 +blk.1.ffn_gate_shexp.weight q8_0 +blk.1.ffn_up_shexp.weight q8_0 +blk.1.attn_k.weight q8_0 +blk.1.attn_output.weight q8_0 +blk.1.attn_q.weight q8_0 +blk.1.attn_v.weight q8_0 +blk.2.ffn_down_shexp.weight q8_0 +blk.2.ffn_gate_shexp.weight q8_0 +blk.2.ffn_up_shexp.weight q8_0 +blk.2.attn_k.weight q8_0 +blk.2.attn_output.weight q8_0 +blk.2.attn_q.weight q8_0 +blk.2.attn_v.weight q8_0 +blk.3.ffn_down_shexp.weight q8_0 +blk.3.ffn_gate_shexp.weight q8_0 +blk.3.ffn_up_shexp.weight q8_0 +blk.3.attn_k.weight q8_0 +blk.3.attn_output.weight q8_0 +blk.3.attn_q.weight q8_0 +blk.3.attn_v.weight q8_0 +blk.4.ffn_down_shexp.weight q8_0 +blk.4.ffn_gate_shexp.weight q8_0 +blk.4.ffn_up_shexp.weight q8_0 +blk.4.attn_k.weight q8_0 +blk.4.attn_output.weight q8_0 +blk.4.attn_q.weight q8_0 +blk.4.attn_v.weight q8_0 +blk.5.ffn_down_shexp.weight q8_0 +blk.5.ffn_gate_shexp.weight q8_0 +blk.5.ffn_up_shexp.weight q8_0 +blk.5.attn_k.weight q8_0 +blk.5.attn_output.weight q8_0 +blk.5.attn_q.weight q8_0 +blk.5.attn_v.weight q8_0 +blk.6.ffn_down_shexp.weight q8_0 +blk.6.ffn_gate_shexp.weight q8_0 +blk.6.ffn_up_shexp.weight q8_0 +blk.6.attn_k.weight q8_0 +blk.6.attn_output.weight q8_0 +blk.6.attn_q.weight q8_0 +blk.6.attn_v.weight q8_0 +blk.7.ffn_down_shexp.weight q8_0 +blk.7.ffn_gate_shexp.weight q8_0 +blk.7.ffn_up_shexp.weight q8_0 +blk.7.attn_k.weight q8_0 +blk.7.attn_output.weight q8_0 +blk.7.attn_q.weight q8_0 +blk.7.attn_v.weight q8_0 +blk.8.ffn_down_shexp.weight q8_0 +blk.8.ffn_gate_shexp.weight q8_0 +blk.8.ffn_up_shexp.weight q8_0 +blk.8.attn_k.weight q8_0 +blk.8.attn_output.weight q8_0 +blk.8.attn_q.weight q8_0 +blk.8.attn_v.weight q8_0 +blk.9.ffn_down_shexp.weight q8_0 +blk.9.ffn_gate_shexp.weight q8_0 +blk.9.ffn_up_shexp.weight q8_0 +blk.9.attn_k.weight q8_0 +blk.9.attn_output.weight q8_0 +blk.9.attn_q.weight q8_0 +blk.9.attn_v.weight q8_0 +blk.10.ffn_down_shexp.weight q8_0 +blk.10.ffn_gate_shexp.weight q8_0 +blk.10.ffn_up_shexp.weight q8_0 +blk.10.attn_k.weight q8_0 +blk.10.attn_output.weight q8_0 +blk.10.attn_q.weight q8_0 +blk.10.attn_v.weight q8_0 +blk.11.ffn_down_shexp.weight q8_0 +blk.11.ffn_gate_shexp.weight q8_0 +blk.11.ffn_up_shexp.weight q8_0 +blk.11.attn_k.weight q8_0 +blk.11.attn_output.weight q8_0 +blk.11.attn_q.weight q8_0 +blk.11.attn_v.weight q8_0 +blk.12.ffn_down_shexp.weight q8_0 +blk.12.ffn_gate_shexp.weight q8_0 +blk.12.ffn_up_shexp.weight q8_0 +blk.12.attn_k.weight q8_0 +blk.12.attn_output.weight q8_0 +blk.12.attn_q.weight q8_0 +blk.12.attn_v.weight q8_0 +blk.13.ffn_down_shexp.weight q8_0 +blk.13.ffn_gate_shexp.weight q8_0 +blk.13.ffn_up_shexp.weight q8_0 +blk.13.attn_k.weight q8_0 +blk.13.attn_output.weight q8_0 +blk.13.attn_q.weight q8_0 +blk.13.attn_v.weight q8_0 +blk.14.ffn_down_shexp.weight q8_0 +blk.14.ffn_gate_shexp.weight q8_0 +blk.14.ffn_up_shexp.weight q8_0 +blk.14.attn_k.weight q8_0 +blk.14.attn_output.weight q8_0 +blk.14.attn_q.weight q8_0 +blk.14.attn_v.weight q8_0 +blk.15.ffn_down_shexp.weight q8_0 +blk.15.ffn_gate_shexp.weight q8_0 +blk.15.ffn_up_shexp.weight q8_0 +blk.15.attn_k.weight q8_0 +blk.15.attn_output.weight q8_0 +blk.15.attn_q.weight q8_0 +blk.15.attn_v.weight q8_0 +blk.16.ffn_down_shexp.weight q8_0 +blk.16.ffn_gate_shexp.weight q8_0 +blk.16.ffn_up_shexp.weight q8_0 +blk.16.attn_k.weight q8_0 +blk.16.attn_output.weight q8_0 +blk.16.attn_q.weight q8_0 +blk.16.attn_v.weight q8_0 +blk.17.ffn_down_shexp.weight q8_0 +blk.17.ffn_gate_shexp.weight q8_0 +blk.17.ffn_up_shexp.weight q8_0 +blk.17.attn_k.weight q8_0 +blk.17.attn_output.weight q8_0 +blk.17.attn_q.weight q8_0 +blk.17.attn_v.weight q8_0 +blk.18.ffn_down_shexp.weight q8_0 +blk.18.ffn_gate_shexp.weight q8_0 +blk.18.ffn_up_shexp.weight q8_0 +blk.18.attn_k.weight q8_0 +blk.18.attn_output.weight q8_0 +blk.18.attn_q.weight q8_0 +blk.18.attn_v.weight q8_0 +blk.19.ffn_down_shexp.weight q8_0 +blk.19.ffn_gate_shexp.weight q8_0 +blk.19.ffn_up_shexp.weight q8_0 +blk.19.attn_k.weight q8_0 +blk.19.attn_output.weight q8_0 +blk.19.attn_q.weight q8_0 +blk.19.attn_v.weight q8_0 +blk.20.ffn_down_shexp.weight q8_0 +blk.20.ffn_gate_shexp.weight q8_0 +blk.20.ffn_up_shexp.weight q8_0 +blk.20.attn_k.weight q8_0 +blk.20.attn_output.weight q8_0 +blk.20.attn_q.weight q8_0 +blk.20.attn_v.weight q8_0 +blk.21.ffn_down_shexp.weight q8_0 +blk.21.ffn_gate_shexp.weight q8_0 +blk.21.ffn_up_shexp.weight q8_0 +blk.21.attn_k.weight q8_0 +blk.21.attn_output.weight q8_0 +blk.21.attn_q.weight q8_0 +blk.21.attn_v.weight q8_0 +blk.22.ffn_down_shexp.weight q8_0 +blk.22.ffn_gate_shexp.weight q8_0 +blk.22.ffn_up_shexp.weight q8_0 +blk.22.attn_k.weight q8_0 +blk.22.attn_output.weight q8_0 +blk.22.attn_q.weight q8_0 +blk.22.attn_v.weight q8_0 +blk.23.ffn_down_shexp.weight q8_0 +blk.23.ffn_gate_shexp.weight q8_0 +blk.23.ffn_up_shexp.weight q8_0 +blk.23.attn_k.weight q8_0 +blk.23.attn_output.weight q8_0 +blk.23.attn_q.weight q8_0 +blk.23.attn_v.weight q8_0 +blk.24.ffn_down_shexp.weight q8_0 +blk.24.ffn_gate_shexp.weight q8_0 +blk.24.ffn_up_shexp.weight q8_0 +blk.24.attn_k.weight q8_0 +blk.24.attn_output.weight q8_0 +blk.24.attn_q.weight q8_0 +blk.24.attn_v.weight q8_0 +blk.25.ffn_down_shexp.weight q8_0 +blk.25.ffn_gate_shexp.weight q8_0 +blk.25.ffn_up_shexp.weight q8_0 +blk.25.attn_k.weight q8_0 +blk.25.attn_output.weight q8_0 +blk.25.attn_q.weight q8_0 +blk.25.attn_v.weight q8_0 +blk.26.ffn_down_shexp.weight q8_0 +blk.26.ffn_gate_shexp.weight q8_0 +blk.26.ffn_up_shexp.weight q8_0 +blk.26.attn_k.weight q8_0 +blk.26.attn_output.weight q8_0 +blk.26.attn_q.weight q8_0 +blk.26.attn_v.weight q8_0 +blk.27.ffn_down_shexp.weight q8_0 +blk.27.ffn_gate_shexp.weight q8_0 +blk.27.ffn_up_shexp.weight q8_0 +blk.27.attn_k.weight q8_0 +blk.27.attn_output.weight q8_0 +blk.27.attn_q.weight q8_0 +blk.27.attn_v.weight q8_0 +blk.28.ffn_down_shexp.weight q8_0 +blk.28.ffn_gate_shexp.weight q8_0 +blk.28.ffn_up_shexp.weight q8_0 +blk.28.attn_k.weight q8_0 +blk.28.attn_output.weight q8_0 +blk.28.attn_q.weight q8_0 +blk.28.attn_v.weight q8_0 +blk.29.ffn_down_shexp.weight q8_0 +blk.29.ffn_gate_shexp.weight q8_0 +blk.29.ffn_up_shexp.weight q8_0 +blk.29.attn_k.weight q8_0 +blk.29.attn_output.weight q8_0 +blk.29.attn_q.weight q8_0 +blk.29.attn_v.weight q8_0 +blk.30.ffn_down_shexp.weight q8_0 +blk.30.ffn_gate_shexp.weight q8_0 +blk.30.ffn_up_shexp.weight q8_0 +blk.30.attn_k.weight q8_0 +blk.30.attn_output.weight q8_0 +blk.30.attn_q.weight q8_0 +blk.30.attn_v.weight q8_0 +blk.31.ffn_down_shexp.weight q8_0 +blk.31.ffn_gate_shexp.weight q8_0 +blk.31.ffn_up_shexp.weight q8_0 +blk.31.attn_k.weight q8_0 +blk.31.attn_output.weight q8_0 +blk.31.attn_q.weight q8_0 +blk.31.attn_v.weight q8_0 +blk.32.ffn_down_shexp.weight q8_0 +blk.32.ffn_gate_shexp.weight q8_0 +blk.32.ffn_up_shexp.weight q8_0 +blk.32.attn_k.weight q8_0 +blk.32.attn_output.weight q8_0 +blk.32.attn_q.weight q8_0 +blk.32.attn_v.weight q8_0 +blk.33.ffn_down_shexp.weight q8_0 +blk.33.ffn_gate_shexp.weight q8_0 +blk.33.ffn_up_shexp.weight q8_0 +blk.33.attn_k.weight q8_0 +blk.33.attn_output.weight q8_0 +blk.33.attn_q.weight q8_0 +blk.33.attn_v.weight q8_0 +blk.34.ffn_gate_shexp.weight q8_0 +blk.34.attn_output.weight q8_0 +blk.34.ffn_down_shexp.weight q8_0 +blk.34.ffn_up_shexp.weight q8_0 +blk.34.attn_k.weight q8_0 +blk.34.attn_q.weight q8_0 +blk.34.attn_v.weight q8_0 +blk.35.ffn_down_shexp.weight q8_0 +blk.35.ffn_gate_shexp.weight q8_0 +blk.35.ffn_up_shexp.weight q8_0 +blk.35.attn_k.weight q8_0 +blk.35.attn_output.weight q8_0 +blk.35.attn_q.weight q8_0 +blk.35.attn_v.weight q8_0 +blk.36.ffn_down_shexp.weight q8_0 +blk.36.ffn_gate_shexp.weight q8_0 +blk.36.ffn_up_shexp.weight q8_0 +blk.36.attn_k.weight q8_0 +blk.36.attn_output.weight q8_0 +blk.36.attn_q.weight q8_0 +blk.36.attn_v.weight q8_0 +blk.37.ffn_down_shexp.weight q8_0 +blk.37.ffn_gate_shexp.weight q8_0 +blk.37.ffn_up_shexp.weight q8_0 +blk.37.attn_k.weight q8_0 +blk.37.attn_output.weight q8_0 +blk.37.attn_q.weight q8_0 +blk.37.attn_v.weight q8_0 +blk.38.ffn_down_shexp.weight q8_0 +blk.38.ffn_gate_shexp.weight q8_0 +blk.38.ffn_up_shexp.weight q8_0 +blk.38.attn_k.weight q8_0 +blk.38.attn_output.weight q8_0 +blk.38.attn_q.weight q8_0 +blk.38.attn_v.weight q8_0 +blk.39.ffn_down_shexp.weight q8_0 +blk.39.ffn_gate_shexp.weight q8_0 +blk.39.ffn_up_shexp.weight q8_0 +blk.39.attn_k.weight q8_0 +blk.39.attn_output.weight q8_0 +blk.39.attn_q.weight q8_0 +blk.39.attn_v.weight q8_0 +blk.40.ffn_down_shexp.weight q8_0 +blk.40.ffn_gate_shexp.weight q8_0 +blk.40.ffn_up_shexp.weight q8_0 +blk.40.attn_k.weight q8_0 +blk.40.attn_output.weight q8_0 +blk.40.attn_q.weight q8_0 +blk.40.attn_v.weight q8_0 +blk.41.ffn_down_shexp.weight q8_0 +blk.41.ffn_gate_shexp.weight q8_0 +blk.41.ffn_up_shexp.weight q8_0 +blk.41.attn_k.weight q8_0 +blk.41.attn_output.weight q8_0 +blk.41.attn_q.weight q8_0 +blk.41.attn_v.weight q8_0 +blk.42.ffn_down_shexp.weight q8_0 +blk.42.ffn_gate_shexp.weight q8_0 +blk.42.ffn_up_shexp.weight q8_0 +blk.42.attn_k.weight q8_0 +blk.42.attn_q.weight q8_0 +blk.42.attn_v.weight q8_0 +blk.42.attn_output.weight q8_0 +blk.43.ffn_down_shexp.weight q8_0 +blk.43.ffn_gate_shexp.weight q8_0 +blk.43.ffn_up_shexp.weight q8_0 +blk.43.attn_k.weight q8_0 +blk.43.attn_output.weight q8_0 +blk.43.attn_q.weight q8_0 +blk.43.attn_v.weight q8_0 +blk.44.ffn_down_shexp.weight q8_0 +blk.44.ffn_gate_shexp.weight q8_0 +blk.44.ffn_up_shexp.weight q8_0 +blk.44.attn_k.weight q8_0 +blk.44.attn_output.weight q8_0 +blk.44.attn_q.weight q8_0 +blk.44.attn_v.weight q8_0 +output.weight q8_0 +blk.45.ffn_down_shexp.weight q8_0 +blk.45.ffn_gate_shexp.weight q8_0 +blk.45.ffn_up_shexp.weight q8_0 +blk.45.attn_k.weight q8_0 +blk.45.attn_output.weight q8_0 +blk.45.attn_q.weight q8_0 +blk.45.attn_v.weight q8_0 diff --git a/tests/snapshots/qwen3-0.6b.schema b/tests/snapshots/qwen3-0.6b.schema new file mode 100644 index 0000000000..5ada58c60e --- /dev/null +++ b/tests/snapshots/qwen3-0.6b.schema @@ -0,0 +1,1224 @@ +# Model: Qwen3-0.6B +# n_embd=1024, n_ff=3072, n_vocab=151936, n_layer=28, n_head=16, n_head_kv=8 + +[F32] f32 +output.weight q6_K + +[F16] f16 +output.weight q6_K + +[Q4_0] q4_0 +output.weight q6_K + +[Q4_1] q4_1 +output.weight q6_K + +[Q8_0] q8_0 + +[Q5_0] q5_0 +output.weight q6_K + +[Q5_1] q5_1 +output.weight q6_K + +[Q2_K] q2_K +output.weight q6_K +blk.0.attn_output.weight q3_K +blk.0.attn_v.weight q3_K +blk.0.ffn_down.weight q3_K +blk.1.attn_output.weight q3_K +blk.1.attn_v.weight q3_K +blk.1.ffn_down.weight q3_K +blk.2.attn_output.weight q3_K +blk.2.attn_v.weight q3_K +blk.2.ffn_down.weight q3_K +blk.3.attn_output.weight q3_K +blk.3.attn_v.weight q3_K +blk.3.ffn_down.weight q3_K +blk.4.attn_output.weight q3_K +blk.4.attn_v.weight q3_K +blk.4.ffn_down.weight q3_K +blk.5.attn_output.weight q3_K +blk.5.attn_v.weight q3_K +blk.5.ffn_down.weight q3_K +blk.6.attn_output.weight q3_K +blk.6.attn_v.weight q3_K +blk.6.ffn_down.weight q3_K +blk.7.attn_output.weight q3_K +blk.7.attn_v.weight q3_K +blk.7.ffn_down.weight q3_K +blk.8.attn_output.weight q3_K +blk.8.attn_v.weight q3_K +blk.8.ffn_down.weight q3_K +blk.9.attn_output.weight q3_K +blk.9.attn_v.weight q3_K +blk.9.ffn_down.weight q3_K +blk.10.attn_output.weight q3_K +blk.10.attn_v.weight q3_K +blk.10.ffn_down.weight q3_K +blk.11.attn_output.weight q3_K +blk.11.attn_v.weight q3_K +blk.11.ffn_down.weight q3_K +blk.12.attn_output.weight q3_K +blk.12.attn_v.weight q3_K +blk.12.ffn_down.weight q3_K +blk.13.attn_output.weight q3_K +blk.13.attn_v.weight q3_K +blk.13.ffn_down.weight q3_K +blk.14.attn_output.weight q3_K +blk.14.attn_v.weight q3_K +blk.14.ffn_down.weight q3_K +blk.15.attn_output.weight q3_K +blk.15.attn_v.weight q3_K +blk.15.ffn_down.weight q3_K +blk.16.attn_output.weight q3_K +blk.16.attn_v.weight q3_K +blk.16.ffn_down.weight q3_K +blk.17.attn_output.weight q3_K +blk.17.attn_v.weight q3_K +blk.17.ffn_down.weight q3_K +blk.18.attn_output.weight q3_K +blk.18.attn_v.weight q3_K +blk.18.ffn_down.weight q3_K +blk.19.attn_output.weight q3_K +blk.19.attn_v.weight q3_K +blk.19.ffn_down.weight q3_K +blk.20.attn_output.weight q3_K +blk.20.attn_v.weight q3_K +blk.20.ffn_down.weight q3_K +blk.21.attn_output.weight q3_K +blk.21.attn_v.weight q3_K +blk.21.ffn_down.weight q3_K +blk.22.attn_output.weight q3_K +blk.22.attn_v.weight q3_K +blk.22.ffn_down.weight q3_K +blk.23.attn_output.weight q3_K +blk.23.attn_v.weight q3_K +blk.23.ffn_down.weight q3_K +blk.24.attn_output.weight q3_K +blk.24.attn_v.weight q3_K +blk.24.ffn_down.weight q3_K +blk.25.attn_output.weight q3_K +blk.25.attn_v.weight q3_K +blk.25.ffn_down.weight q3_K +blk.26.attn_output.weight q3_K +blk.26.attn_v.weight q3_K +blk.26.ffn_down.weight q3_K +blk.27.attn_output.weight q3_K +blk.27.attn_v.weight q3_K +blk.27.ffn_down.weight q3_K + +[Q3_K_S] q3_K +output.weight q6_K + +[Q3_K_M] q3_K +output.weight q6_K +blk.0.attn_output.weight q4_K +blk.0.attn_v.weight q5_K +blk.0.ffn_down.weight q5_K +blk.1.attn_output.weight q4_K +blk.1.attn_v.weight q5_K +blk.1.ffn_down.weight q4_K +blk.2.attn_output.weight q4_K +blk.2.attn_v.weight q4_K +blk.2.ffn_down.weight q4_K +blk.3.attn_output.weight q4_K +blk.3.attn_v.weight q4_K +blk.3.ffn_down.weight q4_K +blk.4.attn_output.weight q4_K +blk.4.attn_v.weight q4_K +blk.4.ffn_down.weight q4_K +blk.5.attn_output.weight q4_K +blk.5.attn_v.weight q4_K +blk.5.ffn_down.weight q4_K +blk.6.attn_output.weight q4_K +blk.6.attn_v.weight q4_K +blk.6.ffn_down.weight q4_K +blk.7.attn_output.weight q4_K +blk.7.attn_v.weight q4_K +blk.7.ffn_down.weight q4_K +blk.8.attn_output.weight q4_K +blk.8.attn_v.weight q4_K +blk.8.ffn_down.weight q4_K +blk.9.attn_output.weight q4_K +blk.9.attn_v.weight q4_K +blk.9.ffn_down.weight q4_K +blk.10.attn_output.weight q4_K +blk.10.attn_v.weight q4_K +blk.10.ffn_down.weight q4_K +blk.11.attn_output.weight q4_K +blk.11.attn_v.weight q4_K +blk.11.ffn_down.weight q4_K +blk.12.attn_output.weight q4_K +blk.12.attn_v.weight q4_K +blk.12.ffn_down.weight q4_K +blk.13.attn_output.weight q4_K +blk.13.attn_v.weight q4_K +blk.13.ffn_down.weight q4_K +blk.14.attn_output.weight q4_K +blk.14.attn_v.weight q4_K +blk.14.ffn_down.weight q4_K +blk.15.attn_output.weight q4_K +blk.15.attn_v.weight q4_K +blk.15.ffn_down.weight q4_K +blk.16.attn_output.weight q4_K +blk.16.attn_v.weight q4_K +blk.16.ffn_down.weight q4_K +blk.17.attn_output.weight q4_K +blk.17.attn_v.weight q4_K +blk.17.ffn_down.weight q4_K +blk.18.attn_output.weight q4_K +blk.18.attn_v.weight q4_K +blk.18.ffn_down.weight q4_K +blk.19.attn_output.weight q4_K +blk.19.attn_v.weight q4_K +blk.19.ffn_down.weight q4_K +blk.20.attn_output.weight q4_K +blk.20.attn_v.weight q4_K +blk.20.ffn_down.weight q4_K +blk.21.attn_output.weight q4_K +blk.21.attn_v.weight q4_K +blk.21.ffn_down.weight q4_K +blk.22.attn_output.weight q4_K +blk.22.attn_v.weight q4_K +blk.22.ffn_down.weight q4_K +blk.23.attn_output.weight q4_K +blk.23.attn_v.weight q4_K +blk.23.ffn_down.weight q4_K +blk.24.attn_output.weight q4_K +blk.24.attn_v.weight q4_K +blk.24.ffn_down.weight q4_K +blk.25.attn_output.weight q4_K +blk.25.attn_v.weight q4_K +blk.25.ffn_down.weight q4_K +blk.26.attn_output.weight q4_K +blk.26.attn_v.weight q4_K +blk.26.ffn_down.weight q4_K +blk.27.attn_output.weight q4_K +blk.27.attn_v.weight q4_K +blk.27.ffn_down.weight q4_K + +[Q3_K_L] q3_K +output.weight q6_K +blk.0.attn_output.weight q5_K +blk.0.attn_v.weight q5_K +blk.0.ffn_down.weight q5_K +blk.1.attn_output.weight q5_K +blk.1.attn_v.weight q5_K +blk.1.ffn_down.weight q5_K +blk.2.attn_output.weight q5_K +blk.2.attn_v.weight q5_K +blk.2.ffn_down.weight q5_K +blk.3.attn_output.weight q5_K +blk.3.attn_v.weight q5_K +blk.3.ffn_down.weight q5_K +blk.4.attn_output.weight q5_K +blk.4.attn_v.weight q5_K +blk.4.ffn_down.weight q5_K +blk.5.attn_output.weight q5_K +blk.5.attn_v.weight q5_K +blk.5.ffn_down.weight q5_K +blk.6.attn_output.weight q5_K +blk.6.attn_v.weight q5_K +blk.6.ffn_down.weight q5_K +blk.7.attn_output.weight q5_K +blk.7.attn_v.weight q5_K +blk.7.ffn_down.weight q5_K +blk.8.attn_output.weight q5_K +blk.8.attn_v.weight q5_K +blk.8.ffn_down.weight q5_K +blk.9.attn_output.weight q5_K +blk.9.attn_v.weight q5_K +blk.9.ffn_down.weight q5_K +blk.10.attn_output.weight q5_K +blk.10.attn_v.weight q5_K +blk.10.ffn_down.weight q5_K +blk.11.attn_output.weight q5_K +blk.11.attn_v.weight q5_K +blk.11.ffn_down.weight q5_K +blk.12.attn_output.weight q5_K +blk.12.attn_v.weight q5_K +blk.12.ffn_down.weight q5_K +blk.13.attn_output.weight q5_K +blk.13.attn_v.weight q5_K +blk.13.ffn_down.weight q5_K +blk.14.attn_output.weight q5_K +blk.14.attn_v.weight q5_K +blk.14.ffn_down.weight q5_K +blk.15.attn_output.weight q5_K +blk.15.attn_v.weight q5_K +blk.15.ffn_down.weight q5_K +blk.16.attn_output.weight q5_K +blk.16.attn_v.weight q5_K +blk.16.ffn_down.weight q5_K +blk.17.attn_output.weight q5_K +blk.17.attn_v.weight q5_K +blk.17.ffn_down.weight q5_K +blk.18.attn_output.weight q5_K +blk.18.attn_v.weight q5_K +blk.18.ffn_down.weight q5_K +blk.19.attn_output.weight q5_K +blk.19.attn_v.weight q5_K +blk.19.ffn_down.weight q5_K +blk.20.attn_output.weight q5_K +blk.20.attn_v.weight q5_K +blk.20.ffn_down.weight q5_K +blk.21.attn_output.weight q5_K +blk.21.attn_v.weight q5_K +blk.21.ffn_down.weight q5_K +blk.22.attn_output.weight q5_K +blk.22.attn_v.weight q5_K +blk.22.ffn_down.weight q5_K +blk.23.attn_output.weight q5_K +blk.23.attn_v.weight q5_K +blk.23.ffn_down.weight q5_K +blk.24.attn_output.weight q5_K +blk.24.attn_v.weight q5_K +blk.24.ffn_down.weight q5_K +blk.25.attn_output.weight q5_K +blk.25.attn_v.weight q5_K +blk.25.ffn_down.weight q5_K +blk.26.attn_output.weight q5_K +blk.26.attn_v.weight q5_K +blk.26.ffn_down.weight q5_K +blk.27.attn_output.weight q5_K +blk.27.attn_v.weight q5_K +blk.27.ffn_down.weight q5_K + +[Q4_K_S] q4_K +output.weight q6_K +blk.0.attn_v.weight q5_K +blk.0.ffn_down.weight q5_K +blk.1.attn_v.weight q5_K +blk.1.ffn_down.weight q5_K +blk.2.attn_v.weight q5_K +blk.2.ffn_down.weight q5_K +blk.3.attn_v.weight q5_K + +[Q4_K_M] q4_K +output.weight q6_K +blk.0.attn_v.weight q6_K +blk.0.ffn_down.weight q6_K +blk.1.attn_v.weight q6_K +blk.1.ffn_down.weight q6_K +blk.2.attn_v.weight q6_K +blk.2.ffn_down.weight q6_K +blk.5.attn_v.weight q6_K +blk.5.ffn_down.weight q6_K +blk.8.attn_v.weight q6_K +blk.8.ffn_down.weight q6_K +blk.11.attn_v.weight q6_K +blk.11.ffn_down.weight q6_K +blk.14.attn_v.weight q6_K +blk.14.ffn_down.weight q6_K +blk.17.attn_v.weight q6_K +blk.17.ffn_down.weight q6_K +blk.20.attn_v.weight q6_K +blk.20.ffn_down.weight q6_K +blk.23.attn_v.weight q6_K +blk.23.ffn_down.weight q6_K +blk.24.attn_v.weight q6_K +blk.24.ffn_down.weight q6_K +blk.25.attn_v.weight q6_K +blk.25.ffn_down.weight q6_K +blk.26.attn_v.weight q6_K +blk.26.ffn_down.weight q6_K +blk.27.attn_v.weight q6_K +blk.27.ffn_down.weight q6_K + +[Q5_K_S] q5_K +output.weight q6_K + +[Q5_K_M] q5_K +output.weight q6_K +blk.0.attn_v.weight q6_K +blk.0.ffn_down.weight q6_K +blk.1.attn_v.weight q6_K +blk.1.ffn_down.weight q6_K +blk.2.attn_v.weight q6_K +blk.2.ffn_down.weight q6_K +blk.5.attn_v.weight q6_K +blk.5.ffn_down.weight q6_K +blk.8.attn_v.weight q6_K +blk.8.ffn_down.weight q6_K +blk.11.attn_v.weight q6_K +blk.11.ffn_down.weight q6_K +blk.14.attn_v.weight q6_K +blk.14.ffn_down.weight q6_K +blk.17.attn_v.weight q6_K +blk.17.ffn_down.weight q6_K +blk.20.attn_v.weight q6_K +blk.20.ffn_down.weight q6_K +blk.23.attn_v.weight q6_K +blk.23.ffn_down.weight q6_K +blk.24.attn_v.weight q6_K +blk.24.ffn_down.weight q6_K +blk.25.attn_v.weight q6_K +blk.25.ffn_down.weight q6_K +blk.26.attn_v.weight q6_K +blk.26.ffn_down.weight q6_K +blk.27.attn_v.weight q6_K +blk.27.ffn_down.weight q6_K + +[Q6_K] q6_K + +[IQ2_XXS] iq2_xxs +output.weight q5_K +token_embd.weight q2_K +blk.0.attn_v.weight q2_K +blk.0.ffn_down.weight q2_K +blk.1.attn_v.weight q2_K +blk.1.ffn_down.weight q2_K +blk.2.attn_v.weight q2_K +blk.2.ffn_down.weight q2_K +blk.3.attn_v.weight q2_K +blk.4.attn_v.weight q2_K +blk.5.attn_v.weight q2_K +blk.6.attn_v.weight q2_K +blk.7.attn_v.weight q2_K +blk.8.attn_v.weight q2_K +blk.9.attn_v.weight q2_K +blk.10.attn_v.weight q2_K +blk.11.attn_v.weight q2_K +blk.12.attn_v.weight q2_K +blk.13.attn_v.weight q2_K +blk.14.attn_v.weight q2_K +blk.15.attn_v.weight q2_K +blk.16.attn_v.weight q2_K +blk.17.attn_v.weight q2_K +blk.18.attn_v.weight q2_K +blk.19.attn_v.weight q2_K +blk.20.attn_v.weight q2_K +blk.21.attn_v.weight q2_K +blk.22.attn_v.weight q2_K +blk.23.attn_v.weight q2_K +blk.24.attn_v.weight q2_K +blk.25.attn_v.weight q2_K +blk.26.attn_v.weight q2_K +blk.27.attn_v.weight q2_K + +[IQ2_XS] iq2_xs +output.weight q5_K +token_embd.weight q2_K +blk.0.attn_v.weight q2_K +blk.0.ffn_down.weight q2_K +blk.1.attn_v.weight q2_K +blk.1.ffn_down.weight q2_K +blk.2.attn_v.weight q2_K +blk.2.ffn_down.weight q2_K +blk.3.attn_v.weight q2_K +blk.4.attn_v.weight q2_K +blk.5.attn_v.weight q2_K +blk.6.attn_v.weight q2_K +blk.7.attn_v.weight q2_K +blk.8.attn_v.weight q2_K +blk.9.attn_v.weight q2_K +blk.10.attn_v.weight q2_K +blk.11.attn_v.weight q2_K +blk.12.attn_v.weight q2_K +blk.13.attn_v.weight q2_K +blk.14.attn_v.weight q2_K +blk.15.attn_v.weight q2_K +blk.16.attn_v.weight q2_K +blk.17.attn_v.weight q2_K +blk.18.attn_v.weight q2_K +blk.19.attn_v.weight q2_K +blk.20.attn_v.weight q2_K +blk.21.attn_v.weight q2_K +blk.22.attn_v.weight q2_K +blk.23.attn_v.weight q2_K +blk.24.attn_v.weight q2_K +blk.25.attn_v.weight q2_K +blk.26.attn_v.weight q2_K +blk.27.attn_v.weight q2_K + +[Q2_K_S] q2_K +output.weight q6_K +blk.0.ffn_down.weight q4_K +blk.1.ffn_down.weight q4_K +blk.2.ffn_down.weight q4_K + +[IQ3_XS] iq3_s +output.weight q6_K +blk.0.attn_k.weight iq3_xxs +blk.0.attn_q.weight iq3_xxs +blk.1.attn_k.weight iq3_xxs +blk.1.attn_q.weight iq3_xxs +blk.2.attn_k.weight iq3_xxs +blk.2.attn_q.weight iq3_xxs +blk.3.attn_k.weight iq3_xxs +blk.3.attn_q.weight iq3_xxs +blk.3.ffn_gate.weight iq3_xxs +blk.3.ffn_up.weight iq3_xxs +blk.4.attn_k.weight iq3_xxs +blk.4.attn_q.weight iq3_xxs +blk.4.ffn_gate.weight iq3_xxs +blk.4.ffn_up.weight iq3_xxs +blk.5.attn_k.weight iq3_xxs +blk.5.attn_q.weight iq3_xxs +blk.5.ffn_gate.weight iq3_xxs +blk.5.ffn_up.weight iq3_xxs +blk.6.attn_k.weight iq3_xxs +blk.6.attn_q.weight iq3_xxs +blk.6.ffn_gate.weight iq3_xxs +blk.6.ffn_up.weight iq3_xxs +blk.7.attn_k.weight iq3_xxs +blk.7.attn_q.weight iq3_xxs +blk.7.ffn_gate.weight iq3_xxs +blk.7.ffn_up.weight iq3_xxs +blk.8.attn_k.weight iq3_xxs +blk.8.attn_q.weight iq3_xxs +blk.8.ffn_gate.weight iq3_xxs +blk.8.ffn_up.weight iq3_xxs +blk.9.attn_k.weight iq3_xxs +blk.9.attn_q.weight iq3_xxs +blk.9.ffn_gate.weight iq3_xxs +blk.9.ffn_up.weight iq3_xxs +blk.10.attn_k.weight iq3_xxs +blk.10.attn_q.weight iq3_xxs +blk.10.ffn_gate.weight iq3_xxs +blk.10.ffn_up.weight iq3_xxs +blk.11.attn_k.weight iq3_xxs +blk.11.attn_q.weight iq3_xxs +blk.11.ffn_gate.weight iq3_xxs +blk.11.ffn_up.weight iq3_xxs +blk.12.attn_k.weight iq3_xxs +blk.12.attn_q.weight iq3_xxs +blk.12.ffn_gate.weight iq3_xxs +blk.12.ffn_up.weight iq3_xxs +blk.13.attn_k.weight iq3_xxs +blk.13.attn_q.weight iq3_xxs +blk.13.ffn_gate.weight iq3_xxs +blk.13.ffn_up.weight iq3_xxs +blk.14.attn_k.weight iq3_xxs +blk.14.attn_q.weight iq3_xxs +blk.14.ffn_gate.weight iq3_xxs +blk.14.ffn_up.weight iq3_xxs +blk.15.attn_k.weight iq3_xxs +blk.15.attn_q.weight iq3_xxs +blk.15.ffn_gate.weight iq3_xxs +blk.15.ffn_up.weight iq3_xxs +blk.16.attn_k.weight iq3_xxs +blk.16.attn_q.weight iq3_xxs +blk.16.ffn_gate.weight iq3_xxs +blk.16.ffn_up.weight iq3_xxs +blk.17.attn_k.weight iq3_xxs +blk.17.attn_q.weight iq3_xxs +blk.17.ffn_gate.weight iq3_xxs +blk.17.ffn_up.weight iq3_xxs +blk.18.attn_k.weight iq3_xxs +blk.18.attn_q.weight iq3_xxs +blk.18.ffn_gate.weight iq3_xxs +blk.18.ffn_up.weight iq3_xxs +blk.19.attn_k.weight iq3_xxs +blk.19.attn_q.weight iq3_xxs +blk.19.ffn_gate.weight iq3_xxs +blk.19.ffn_up.weight iq3_xxs +blk.20.attn_k.weight iq3_xxs +blk.20.attn_q.weight iq3_xxs +blk.20.ffn_gate.weight iq3_xxs +blk.20.ffn_up.weight iq3_xxs +blk.21.attn_k.weight iq3_xxs +blk.21.attn_q.weight iq3_xxs +blk.21.ffn_gate.weight iq3_xxs +blk.21.ffn_up.weight iq3_xxs +blk.22.attn_k.weight iq3_xxs +blk.22.attn_q.weight iq3_xxs +blk.22.ffn_gate.weight iq3_xxs +blk.22.ffn_up.weight iq3_xxs +blk.23.attn_k.weight iq3_xxs +blk.23.attn_q.weight iq3_xxs +blk.23.ffn_gate.weight iq3_xxs +blk.23.ffn_up.weight iq3_xxs +blk.24.attn_k.weight iq3_xxs +blk.24.attn_q.weight iq3_xxs +blk.25.attn_k.weight iq3_xxs +blk.25.attn_q.weight iq3_xxs +blk.26.attn_k.weight iq3_xxs +blk.26.attn_q.weight iq3_xxs +blk.27.attn_k.weight iq3_xxs +blk.27.attn_q.weight iq3_xxs + +[IQ3_XXS] iq3_xxs +output.weight q5_K +token_embd.weight iq3_s +blk.0.attn_k.weight iq2_s +blk.0.attn_output.weight iq3_s +blk.0.attn_q.weight iq2_s +blk.0.attn_v.weight iq3_s +blk.0.ffn_down.weight q4_K +blk.1.attn_k.weight iq2_s +blk.1.attn_output.weight iq3_s +blk.1.attn_q.weight iq2_s +blk.1.attn_v.weight iq3_s +blk.1.ffn_down.weight q4_K +blk.2.attn_k.weight iq2_s +blk.2.attn_output.weight iq3_s +blk.2.attn_q.weight iq2_s +blk.2.attn_v.weight iq3_s +blk.2.ffn_down.weight q4_K +blk.3.attn_k.weight iq2_s +blk.3.attn_output.weight iq3_s +blk.3.attn_q.weight iq2_s +blk.3.attn_v.weight iq3_s +blk.3.ffn_down.weight q3_K +blk.4.attn_k.weight iq2_s +blk.4.attn_output.weight iq3_s +blk.4.attn_q.weight iq2_s +blk.4.attn_v.weight iq3_s +blk.4.ffn_down.weight q3_K +blk.5.attn_k.weight iq2_s +blk.5.attn_output.weight iq3_s +blk.5.attn_q.weight iq2_s +blk.5.attn_v.weight iq3_s +blk.5.ffn_down.weight q3_K +blk.6.attn_k.weight iq2_s +blk.6.attn_output.weight iq3_s +blk.6.attn_q.weight iq2_s +blk.6.attn_v.weight iq3_s +blk.6.ffn_down.weight q3_K +blk.7.attn_k.weight iq2_s +blk.7.attn_output.weight iq3_s +blk.7.attn_q.weight iq2_s +blk.7.attn_v.weight iq3_s +blk.7.ffn_down.weight q3_K +blk.8.attn_k.weight iq2_s +blk.8.attn_output.weight iq3_s +blk.8.attn_q.weight iq2_s +blk.8.attn_v.weight iq3_s +blk.8.ffn_down.weight q3_K +blk.9.attn_k.weight iq2_s +blk.9.attn_output.weight iq3_s +blk.9.attn_q.weight iq2_s +blk.9.attn_v.weight iq3_s +blk.9.ffn_down.weight q3_K +blk.10.attn_k.weight iq2_s +blk.10.attn_output.weight iq3_s +blk.10.attn_q.weight iq2_s +blk.10.attn_v.weight iq3_s +blk.10.ffn_down.weight q3_K +blk.11.attn_k.weight iq2_s +blk.11.attn_output.weight iq3_s +blk.11.attn_q.weight iq2_s +blk.11.attn_v.weight iq3_s +blk.11.ffn_down.weight q3_K +blk.12.attn_k.weight iq2_s +blk.12.attn_output.weight iq3_s +blk.12.attn_q.weight iq2_s +blk.12.attn_v.weight iq3_s +blk.12.ffn_down.weight q3_K +blk.13.attn_k.weight iq2_s +blk.13.attn_output.weight iq3_s +blk.13.attn_q.weight iq2_s +blk.13.attn_v.weight iq3_s +blk.13.ffn_down.weight q3_K +blk.14.attn_k.weight iq2_s +blk.14.attn_output.weight iq3_s +blk.14.attn_q.weight iq2_s +blk.14.attn_v.weight iq3_s +blk.14.ffn_down.weight q3_K +blk.15.attn_k.weight iq2_s +blk.15.attn_output.weight iq3_s +blk.15.attn_q.weight iq2_s +blk.15.attn_v.weight iq3_s +blk.15.ffn_down.weight q3_K +blk.16.attn_k.weight iq2_s +blk.16.attn_output.weight iq3_s +blk.16.attn_q.weight iq2_s +blk.16.attn_v.weight iq3_s +blk.16.ffn_down.weight q3_K +blk.17.attn_k.weight iq2_s +blk.17.attn_output.weight iq3_s +blk.17.attn_q.weight iq2_s +blk.17.attn_v.weight iq3_s +blk.17.ffn_down.weight q3_K +blk.18.attn_k.weight iq2_s +blk.18.attn_output.weight iq3_s +blk.18.attn_q.weight iq2_s +blk.18.attn_v.weight iq3_s +blk.18.ffn_down.weight q3_K +blk.19.attn_k.weight iq2_s +blk.19.attn_output.weight iq3_s +blk.19.attn_q.weight iq2_s +blk.19.attn_v.weight iq3_s +blk.19.ffn_down.weight q3_K +blk.20.attn_k.weight iq2_s +blk.20.attn_output.weight iq3_s +blk.20.attn_q.weight iq2_s +blk.20.attn_v.weight iq3_s +blk.20.ffn_down.weight q3_K +blk.21.attn_k.weight iq2_s +blk.21.attn_output.weight iq3_s +blk.21.attn_q.weight iq2_s +blk.21.attn_v.weight iq3_s +blk.21.ffn_down.weight q3_K +blk.22.attn_k.weight iq2_s +blk.22.attn_output.weight iq3_s +blk.22.attn_q.weight iq2_s +blk.22.attn_v.weight iq3_s +blk.22.ffn_down.weight q3_K +blk.23.attn_k.weight iq2_s +blk.23.attn_output.weight iq3_s +blk.23.attn_q.weight iq2_s +blk.23.attn_v.weight iq3_s +blk.23.ffn_down.weight q3_K +blk.24.attn_k.weight iq2_s +blk.24.attn_output.weight iq3_s +blk.24.attn_q.weight iq2_s +blk.24.attn_v.weight iq3_s +blk.24.ffn_down.weight q3_K +blk.25.attn_k.weight iq2_s +blk.25.attn_output.weight iq3_s +blk.25.attn_q.weight iq2_s +blk.25.attn_v.weight iq3_s +blk.25.ffn_down.weight q3_K +blk.26.attn_k.weight iq2_s +blk.26.attn_output.weight iq3_s +blk.26.attn_q.weight iq2_s +blk.26.attn_v.weight iq3_s +blk.26.ffn_down.weight q3_K +blk.27.attn_k.weight iq2_s +blk.27.attn_output.weight iq3_s +blk.27.attn_q.weight iq2_s +blk.27.attn_v.weight iq3_s +blk.27.ffn_down.weight q3_K + +[IQ1_S] iq1_s +output.weight q5_K +token_embd.weight q2_K +blk.0.attn_output.weight iq2_xxs +blk.0.attn_v.weight q2_K +blk.0.ffn_down.weight q2_K +blk.1.attn_output.weight iq2_xxs +blk.1.attn_v.weight q2_K +blk.1.ffn_down.weight q2_K +blk.2.attn_output.weight iq2_xxs +blk.2.attn_v.weight q2_K +blk.2.ffn_down.weight q2_K +blk.3.attn_output.weight iq2_xxs +blk.3.attn_v.weight q2_K +blk.4.attn_output.weight iq2_xxs +blk.4.attn_v.weight q2_K +blk.5.attn_output.weight iq2_xxs +blk.5.attn_v.weight q2_K +blk.6.attn_output.weight iq2_xxs +blk.6.attn_v.weight q2_K +blk.7.attn_output.weight iq2_xxs +blk.7.attn_v.weight q2_K +blk.8.attn_output.weight iq2_xxs +blk.8.attn_v.weight q2_K +blk.9.attn_output.weight iq2_xxs +blk.9.attn_v.weight q2_K +blk.10.attn_output.weight iq2_xxs +blk.10.attn_v.weight q2_K +blk.11.attn_output.weight iq2_xxs +blk.11.attn_v.weight q2_K +blk.12.attn_output.weight iq2_xxs +blk.12.attn_v.weight q2_K +blk.13.attn_output.weight iq2_xxs +blk.13.attn_v.weight q2_K +blk.14.attn_output.weight iq2_xxs +blk.14.attn_v.weight q2_K +blk.15.attn_output.weight iq2_xxs +blk.15.attn_v.weight q2_K +blk.16.attn_output.weight iq2_xxs +blk.16.attn_v.weight q2_K +blk.17.attn_output.weight iq2_xxs +blk.17.attn_v.weight q2_K +blk.18.attn_output.weight iq2_xxs +blk.18.attn_v.weight q2_K +blk.19.attn_output.weight iq2_xxs +blk.19.attn_v.weight q2_K +blk.20.attn_output.weight iq2_xxs +blk.20.attn_v.weight q2_K +blk.21.attn_output.weight iq2_xxs +blk.21.attn_v.weight q2_K +blk.22.attn_output.weight iq2_xxs +blk.22.attn_v.weight q2_K +blk.23.attn_output.weight iq2_xxs +blk.23.attn_v.weight q2_K +blk.24.attn_output.weight iq2_xxs +blk.24.attn_v.weight q2_K +blk.25.attn_output.weight iq2_xxs +blk.25.attn_v.weight q2_K +blk.26.attn_output.weight iq2_xxs +blk.26.attn_v.weight q2_K +blk.27.attn_output.weight iq2_xxs +blk.27.attn_v.weight q2_K + +[IQ4_NL] iq4_nl +output.weight q6_K +blk.0.ffn_down.weight q5_K +blk.1.ffn_down.weight q5_K +blk.2.ffn_down.weight q5_K + +[IQ3_S] iq3_s +output.weight q6_K + +[IQ3_M] iq3_s +output.weight q6_K +blk.0.attn_output.weight q4_K +blk.0.attn_v.weight q4_K +blk.0.ffn_down.weight q4_K +blk.1.attn_output.weight q4_K +blk.1.attn_v.weight q4_K +blk.1.ffn_down.weight q4_K +blk.2.attn_output.weight q4_K +blk.2.attn_v.weight q4_K +blk.2.ffn_down.weight q4_K +blk.3.attn_output.weight q4_K +blk.3.attn_v.weight q4_K +blk.4.attn_output.weight q4_K +blk.4.attn_v.weight q4_K +blk.5.attn_output.weight q4_K +blk.5.attn_v.weight q4_K +blk.6.attn_output.weight q4_K +blk.6.attn_v.weight q4_K +blk.7.attn_output.weight q4_K +blk.7.attn_v.weight q4_K +blk.8.attn_output.weight q4_K +blk.8.attn_v.weight q4_K +blk.9.attn_output.weight q4_K +blk.9.attn_v.weight q4_K +blk.10.attn_output.weight q4_K +blk.10.attn_v.weight q4_K +blk.11.attn_output.weight q4_K +blk.11.attn_v.weight q4_K +blk.12.attn_output.weight q4_K +blk.12.attn_v.weight q4_K +blk.13.attn_output.weight q4_K +blk.13.attn_v.weight q4_K +blk.14.attn_output.weight q4_K +blk.14.attn_v.weight q4_K +blk.15.attn_output.weight q4_K +blk.15.attn_v.weight q4_K +blk.16.attn_output.weight q4_K +blk.16.attn_v.weight q4_K +blk.17.attn_output.weight q4_K +blk.17.attn_v.weight q4_K +blk.18.attn_output.weight q4_K +blk.18.attn_v.weight q4_K +blk.19.attn_output.weight q4_K +blk.19.attn_v.weight q4_K +blk.20.attn_output.weight q4_K +blk.20.attn_v.weight q4_K +blk.21.attn_output.weight q4_K +blk.21.attn_v.weight q4_K +blk.22.attn_output.weight q4_K +blk.22.attn_v.weight q4_K +blk.23.attn_output.weight q4_K +blk.23.attn_v.weight q4_K +blk.24.attn_output.weight q4_K +blk.24.attn_v.weight q4_K +blk.25.attn_output.weight q4_K +blk.25.attn_v.weight q4_K +blk.26.attn_output.weight q4_K +blk.26.attn_v.weight q4_K +blk.27.attn_output.weight q4_K +blk.27.attn_v.weight q4_K + +[IQ2_S] iq2_xs +output.weight q5_K +token_embd.weight iq3_s +blk.0.attn_output.weight iq3_s +blk.0.attn_v.weight iq3_s +blk.0.ffn_down.weight iq3_s +blk.1.attn_output.weight iq3_s +blk.1.attn_v.weight iq3_s +blk.1.ffn_down.weight iq3_s +blk.2.attn_output.weight iq3_s +blk.2.attn_v.weight iq3_s +blk.2.ffn_down.weight iq3_s +blk.3.attn_output.weight iq3_s +blk.3.attn_v.weight iq3_s +blk.4.attn_output.weight iq3_s +blk.4.attn_v.weight iq3_s +blk.5.attn_output.weight iq3_s +blk.5.attn_v.weight iq3_s +blk.6.attn_output.weight iq3_s +blk.6.attn_v.weight iq3_s +blk.7.attn_output.weight iq3_s +blk.7.attn_v.weight iq3_s +blk.8.attn_output.weight iq3_s +blk.8.attn_v.weight iq3_s +blk.9.attn_output.weight iq3_s +blk.9.attn_v.weight iq3_s +blk.10.attn_output.weight iq3_s +blk.10.attn_v.weight iq3_s +blk.11.attn_output.weight iq3_s +blk.11.attn_v.weight iq3_s +blk.12.attn_output.weight iq3_s +blk.12.attn_v.weight iq3_s +blk.13.attn_output.weight iq3_s +blk.13.attn_v.weight iq3_s +blk.14.attn_output.weight iq3_s +blk.14.attn_v.weight iq3_s +blk.15.attn_output.weight iq3_s +blk.15.attn_v.weight iq3_s +blk.16.attn_output.weight iq3_s +blk.16.attn_v.weight iq3_s +blk.17.attn_output.weight iq3_s +blk.17.attn_v.weight iq3_s +blk.18.attn_output.weight iq3_s +blk.18.attn_v.weight iq3_s +blk.19.attn_output.weight iq3_s +blk.19.attn_v.weight iq3_s +blk.20.attn_output.weight iq3_s +blk.20.attn_v.weight iq3_s +blk.21.attn_output.weight iq3_s +blk.21.attn_v.weight iq3_s +blk.22.attn_output.weight iq3_s +blk.22.attn_v.weight iq3_s +blk.23.attn_output.weight iq3_s +blk.23.attn_v.weight iq3_s +blk.24.attn_output.weight iq3_s +blk.24.attn_v.weight iq3_s +blk.25.attn_output.weight iq3_s +blk.25.attn_v.weight iq3_s +blk.26.attn_output.weight iq3_s +blk.26.attn_v.weight iq3_s +blk.27.attn_output.weight iq3_s +blk.27.attn_v.weight iq3_s + +[IQ2_M] iq2_s +output.weight q5_K +token_embd.weight iq3_s +blk.0.attn_output.weight iq3_s +blk.0.attn_v.weight iq3_s +blk.0.ffn_down.weight iq3_s +blk.1.attn_output.weight iq3_s +blk.1.attn_v.weight iq3_s +blk.1.ffn_down.weight iq3_s +blk.2.attn_output.weight iq3_s +blk.2.attn_v.weight iq3_s +blk.2.ffn_down.weight iq3_s +blk.3.attn_output.weight iq3_s +blk.3.attn_v.weight iq3_s +blk.4.attn_output.weight iq3_s +blk.4.attn_v.weight iq3_s +blk.5.attn_output.weight iq3_s +blk.5.attn_v.weight iq3_s +blk.6.attn_output.weight iq3_s +blk.6.attn_v.weight iq3_s +blk.7.attn_output.weight iq3_s +blk.7.attn_v.weight iq3_s +blk.8.attn_output.weight iq3_s +blk.8.attn_v.weight iq3_s +blk.9.attn_output.weight iq3_s +blk.9.attn_v.weight iq3_s +blk.10.attn_output.weight iq3_s +blk.10.attn_v.weight iq3_s +blk.11.attn_output.weight iq3_s +blk.11.attn_v.weight iq3_s +blk.12.attn_output.weight iq3_s +blk.12.attn_v.weight iq3_s +blk.13.attn_output.weight iq3_s +blk.13.attn_v.weight iq3_s +blk.14.attn_output.weight iq3_s +blk.14.attn_v.weight iq3_s +blk.15.attn_output.weight iq3_s +blk.15.attn_v.weight iq3_s +blk.16.attn_output.weight iq3_s +blk.16.attn_v.weight iq3_s +blk.17.attn_output.weight iq3_s +blk.17.attn_v.weight iq3_s +blk.18.attn_output.weight iq3_s +blk.18.attn_v.weight iq3_s +blk.19.attn_output.weight iq3_s +blk.19.attn_v.weight iq3_s +blk.20.attn_output.weight iq3_s +blk.20.attn_v.weight iq3_s +blk.21.attn_output.weight iq3_s +blk.21.attn_v.weight iq3_s +blk.22.attn_output.weight iq3_s +blk.22.attn_v.weight iq3_s +blk.23.attn_output.weight iq3_s +blk.23.attn_v.weight iq3_s +blk.24.attn_output.weight iq3_s +blk.24.attn_v.weight iq3_s +blk.25.attn_output.weight iq3_s +blk.25.attn_v.weight iq3_s +blk.26.attn_output.weight iq3_s +blk.26.attn_v.weight iq3_s +blk.27.attn_output.weight iq3_s +blk.27.attn_v.weight iq3_s + +[IQ4_XS] iq4_xs +output.weight q6_K +blk.0.ffn_down.weight q5_K +blk.1.ffn_down.weight q5_K +blk.2.ffn_down.weight q5_K + +[IQ1_M] iq1_m +output.weight q5_K +token_embd.weight q2_K +blk.0.attn_output.weight iq2_xxs +blk.0.attn_v.weight q2_K +blk.0.ffn_down.weight q2_K +blk.1.attn_output.weight iq2_xxs +blk.1.attn_v.weight q2_K +blk.1.ffn_down.weight q2_K +blk.2.attn_output.weight iq2_xxs +blk.2.attn_v.weight q2_K +blk.2.ffn_down.weight q2_K +blk.3.attn_output.weight iq2_xxs +blk.3.attn_v.weight q2_K +blk.4.attn_output.weight iq2_xxs +blk.4.attn_v.weight q2_K +blk.5.attn_output.weight iq2_xxs +blk.5.attn_v.weight q2_K +blk.6.attn_output.weight iq2_xxs +blk.6.attn_v.weight q2_K +blk.7.attn_output.weight iq2_xxs +blk.7.attn_v.weight q2_K +blk.8.attn_output.weight iq2_xxs +blk.8.attn_v.weight q2_K +blk.9.attn_output.weight iq2_xxs +blk.9.attn_v.weight q2_K +blk.10.attn_output.weight iq2_xxs +blk.10.attn_v.weight q2_K +blk.11.attn_output.weight iq2_xxs +blk.11.attn_v.weight q2_K +blk.12.attn_output.weight iq2_xxs +blk.12.attn_v.weight q2_K +blk.13.attn_output.weight iq2_xxs +blk.13.attn_v.weight q2_K +blk.14.attn_output.weight iq2_xxs +blk.14.attn_v.weight q2_K +blk.15.attn_output.weight iq2_xxs +blk.15.attn_v.weight q2_K +blk.16.attn_output.weight iq2_xxs +blk.16.attn_v.weight q2_K +blk.17.attn_output.weight iq2_xxs +blk.17.attn_v.weight q2_K +blk.18.attn_output.weight iq2_xxs +blk.18.attn_v.weight q2_K +blk.19.attn_output.weight iq2_xxs +blk.19.attn_v.weight q2_K +blk.20.attn_output.weight iq2_xxs +blk.20.attn_v.weight q2_K +blk.21.attn_output.weight iq2_xxs +blk.21.attn_v.weight q2_K +blk.22.attn_output.weight iq2_xxs +blk.22.attn_v.weight q2_K +blk.23.attn_output.weight iq2_xxs +blk.23.attn_v.weight q2_K +blk.24.attn_output.weight iq2_xxs +blk.24.attn_v.weight q2_K +blk.25.attn_output.weight iq2_xxs +blk.25.attn_v.weight q2_K +blk.26.attn_output.weight iq2_xxs +blk.26.attn_v.weight q2_K +blk.27.attn_output.weight iq2_xxs +blk.27.attn_v.weight q2_K + +[BF16] bf16 +output.weight q6_K + +[TQ1_0] tq1_0 +output.weight q6_K +token_embd.weight q4_K + +[TQ2_0] tq2_0 +output.weight q6_K +token_embd.weight q4_K + +[MXFP4_MOE] mxfp4 +output.weight q8_0 +token_embd.weight q8_0 +blk.0.attn_k.weight q8_0 +blk.0.attn_output.weight q8_0 +blk.0.attn_q.weight q8_0 +blk.0.attn_v.weight q8_0 +blk.0.ffn_down.weight q8_0 +blk.0.ffn_gate.weight q8_0 +blk.0.ffn_up.weight q8_0 +blk.1.attn_k.weight q8_0 +blk.1.attn_output.weight q8_0 +blk.1.attn_q.weight q8_0 +blk.1.attn_v.weight q8_0 +blk.1.ffn_down.weight q8_0 +blk.1.ffn_gate.weight q8_0 +blk.1.ffn_up.weight q8_0 +blk.2.attn_k.weight q8_0 +blk.2.attn_output.weight q8_0 +blk.2.attn_q.weight q8_0 +blk.2.attn_v.weight q8_0 +blk.2.ffn_down.weight q8_0 +blk.2.ffn_gate.weight q8_0 +blk.2.ffn_up.weight q8_0 +blk.3.attn_k.weight q8_0 +blk.3.attn_output.weight q8_0 +blk.3.attn_q.weight q8_0 +blk.3.attn_v.weight q8_0 +blk.3.ffn_down.weight q8_0 +blk.3.ffn_gate.weight q8_0 +blk.3.ffn_up.weight q8_0 +blk.4.attn_k.weight q8_0 +blk.4.attn_output.weight q8_0 +blk.4.attn_q.weight q8_0 +blk.4.attn_v.weight q8_0 +blk.4.ffn_down.weight q8_0 +blk.4.ffn_gate.weight q8_0 +blk.4.ffn_up.weight q8_0 +blk.5.attn_k.weight q8_0 +blk.5.attn_output.weight q8_0 +blk.5.attn_q.weight q8_0 +blk.5.attn_v.weight q8_0 +blk.5.ffn_down.weight q8_0 +blk.5.ffn_gate.weight q8_0 +blk.5.ffn_up.weight q8_0 +blk.6.attn_k.weight q8_0 +blk.6.attn_output.weight q8_0 +blk.6.attn_q.weight q8_0 +blk.6.attn_v.weight q8_0 +blk.6.ffn_down.weight q8_0 +blk.6.ffn_gate.weight q8_0 +blk.6.ffn_up.weight q8_0 +blk.7.attn_k.weight q8_0 +blk.7.attn_output.weight q8_0 +blk.7.attn_q.weight q8_0 +blk.7.attn_v.weight q8_0 +blk.7.ffn_down.weight q8_0 +blk.7.ffn_gate.weight q8_0 +blk.7.ffn_up.weight q8_0 +blk.8.attn_k.weight q8_0 +blk.8.attn_output.weight q8_0 +blk.8.attn_q.weight q8_0 +blk.8.attn_v.weight q8_0 +blk.8.ffn_down.weight q8_0 +blk.8.ffn_gate.weight q8_0 +blk.8.ffn_up.weight q8_0 +blk.9.attn_k.weight q8_0 +blk.9.attn_output.weight q8_0 +blk.9.attn_q.weight q8_0 +blk.9.attn_v.weight q8_0 +blk.9.ffn_down.weight q8_0 +blk.9.ffn_gate.weight q8_0 +blk.9.ffn_up.weight q8_0 +blk.10.attn_k.weight q8_0 +blk.10.attn_output.weight q8_0 +blk.10.attn_q.weight q8_0 +blk.10.attn_v.weight q8_0 +blk.10.ffn_down.weight q8_0 +blk.10.ffn_gate.weight q8_0 +blk.10.ffn_up.weight q8_0 +blk.11.attn_k.weight q8_0 +blk.11.attn_output.weight q8_0 +blk.11.attn_q.weight q8_0 +blk.11.attn_v.weight q8_0 +blk.11.ffn_down.weight q8_0 +blk.11.ffn_gate.weight q8_0 +blk.11.ffn_up.weight q8_0 +blk.12.attn_k.weight q8_0 +blk.12.attn_output.weight q8_0 +blk.12.attn_q.weight q8_0 +blk.12.attn_v.weight q8_0 +blk.12.ffn_down.weight q8_0 +blk.12.ffn_gate.weight q8_0 +blk.12.ffn_up.weight q8_0 +blk.13.attn_k.weight q8_0 +blk.13.attn_output.weight q8_0 +blk.13.attn_q.weight q8_0 +blk.13.attn_v.weight q8_0 +blk.13.ffn_down.weight q8_0 +blk.13.ffn_gate.weight q8_0 +blk.13.ffn_up.weight q8_0 +blk.14.attn_k.weight q8_0 +blk.14.attn_output.weight q8_0 +blk.14.attn_q.weight q8_0 +blk.14.attn_v.weight q8_0 +blk.14.ffn_down.weight q8_0 +blk.14.ffn_gate.weight q8_0 +blk.14.ffn_up.weight q8_0 +blk.15.attn_k.weight q8_0 +blk.15.attn_output.weight q8_0 +blk.15.attn_q.weight q8_0 +blk.15.attn_v.weight q8_0 +blk.15.ffn_down.weight q8_0 +blk.15.ffn_gate.weight q8_0 +blk.15.ffn_up.weight q8_0 +blk.16.attn_k.weight q8_0 +blk.16.attn_output.weight q8_0 +blk.16.attn_q.weight q8_0 +blk.16.attn_v.weight q8_0 +blk.16.ffn_down.weight q8_0 +blk.16.ffn_gate.weight q8_0 +blk.16.ffn_up.weight q8_0 +blk.17.attn_k.weight q8_0 +blk.17.attn_output.weight q8_0 +blk.17.attn_q.weight q8_0 +blk.17.attn_v.weight q8_0 +blk.17.ffn_down.weight q8_0 +blk.17.ffn_gate.weight q8_0 +blk.17.ffn_up.weight q8_0 +blk.18.attn_k.weight q8_0 +blk.18.attn_output.weight q8_0 +blk.18.attn_q.weight q8_0 +blk.18.attn_v.weight q8_0 +blk.18.ffn_down.weight q8_0 +blk.18.ffn_gate.weight q8_0 +blk.18.ffn_up.weight q8_0 +blk.19.attn_k.weight q8_0 +blk.19.attn_output.weight q8_0 +blk.19.attn_q.weight q8_0 +blk.19.attn_v.weight q8_0 +blk.19.ffn_down.weight q8_0 +blk.19.ffn_gate.weight q8_0 +blk.19.ffn_up.weight q8_0 +blk.20.attn_k.weight q8_0 +blk.20.attn_output.weight q8_0 +blk.20.attn_q.weight q8_0 +blk.20.attn_v.weight q8_0 +blk.20.ffn_down.weight q8_0 +blk.20.ffn_gate.weight q8_0 +blk.20.ffn_up.weight q8_0 +blk.21.attn_k.weight q8_0 +blk.21.attn_output.weight q8_0 +blk.21.attn_q.weight q8_0 +blk.21.attn_v.weight q8_0 +blk.21.ffn_down.weight q8_0 +blk.21.ffn_gate.weight q8_0 +blk.21.ffn_up.weight q8_0 +blk.22.attn_k.weight q8_0 +blk.22.attn_output.weight q8_0 +blk.22.attn_q.weight q8_0 +blk.22.attn_v.weight q8_0 +blk.22.ffn_down.weight q8_0 +blk.22.ffn_gate.weight q8_0 +blk.22.ffn_up.weight q8_0 +blk.23.attn_k.weight q8_0 +blk.23.attn_output.weight q8_0 +blk.23.attn_q.weight q8_0 +blk.23.attn_v.weight q8_0 +blk.23.ffn_down.weight q8_0 +blk.23.ffn_gate.weight q8_0 +blk.23.ffn_up.weight q8_0 +blk.24.attn_k.weight q8_0 +blk.24.attn_output.weight q8_0 +blk.24.attn_q.weight q8_0 +blk.24.attn_v.weight q8_0 +blk.24.ffn_down.weight q8_0 +blk.24.ffn_gate.weight q8_0 +blk.24.ffn_up.weight q8_0 +blk.25.attn_k.weight q8_0 +blk.25.attn_output.weight q8_0 +blk.25.attn_q.weight q8_0 +blk.25.attn_v.weight q8_0 +blk.25.ffn_down.weight q8_0 +blk.25.ffn_gate.weight q8_0 +blk.25.ffn_up.weight q8_0 +blk.26.attn_k.weight q8_0 +blk.26.attn_output.weight q8_0 +blk.26.attn_q.weight q8_0 +blk.26.attn_v.weight q8_0 +blk.26.ffn_down.weight q8_0 +blk.26.ffn_gate.weight q8_0 +blk.26.ffn_up.weight q8_0 +blk.27.attn_k.weight q8_0 +blk.27.attn_output.weight q8_0 +blk.27.attn_q.weight q8_0 +blk.27.attn_v.weight q8_0 +blk.27.ffn_down.weight q8_0 +blk.27.ffn_gate.weight q8_0 +blk.27.ffn_up.weight q8_0 diff --git a/tests/test-gguf-model-data.cpp b/tests/test-gguf-model-data.cpp index cc0174961d..e05c4b8976 100644 --- a/tests/test-gguf-model-data.cpp +++ b/tests/test-gguf-model-data.cpp @@ -116,6 +116,39 @@ int main() { // Verify tensor count TEST_ASSERT(model3.tensors.size() == 780, "expected tensor count == 780"); + // Test a hybrid-attention model with array-valued head counts + auto result4 = gguf_fetch_model_meta("ggml-org/Step-3.5-Flash-GGUF", "Q4_K"); + if (!result4.has_value()) { + fprintf(stderr, "FAIL: could not fetch Step-3.5-Flash metadata\n"); + return 1; + } + const auto & model4 = result4.value(); + + fprintf(stderr, "Architecture: %s\n", model4.architecture.c_str()); + fprintf(stderr, "n_embd: %u\n", model4.n_embd); + fprintf(stderr, "n_ff: %u\n", model4.n_ff); + fprintf(stderr, "n_vocab: %u\n", model4.n_vocab); + fprintf(stderr, "n_layer: %u\n", model4.n_layer); + fprintf(stderr, "n_head: %u\n", model4.n_head); + fprintf(stderr, "n_head_kv: %u\n", model4.n_head_kv); + fprintf(stderr, "n_expert: %u\n", model4.n_expert); + fprintf(stderr, "n_embd_head_k: %u\n", model4.n_embd_head_k); + fprintf(stderr, "n_embd_head_v: %u\n", model4.n_embd_head_v); + fprintf(stderr, "tensors: %zu\n", model4.tensors.size()); + + TEST_ASSERT(model4.architecture == "step35", "expected architecture 'step35'"); + + TEST_ASSERT(model4.n_layer == 45, "expected n_layer == 45"); + TEST_ASSERT(model4.n_embd == 4096, "expected n_embd == 4096"); + TEST_ASSERT(model4.n_ff == 11264, "expected n_ff == 11264"); + TEST_ASSERT(model4.n_head == 64, "expected n_head == 64 (first element of per-layer array)"); + TEST_ASSERT(model4.n_head_kv == 8, "expected n_head_kv == 8 (first element of per-layer array)"); + TEST_ASSERT(model4.n_expert == 288, "expected n_expert == 288"); + TEST_ASSERT(model4.n_embd_head_k == 128, "expected n_embd_head_k == 128"); + TEST_ASSERT(model4.n_embd_head_v == 128, "expected n_embd_head_v == 128"); + TEST_ASSERT(model4.n_vocab == 128896, "expected n_vocab == 128896"); + TEST_ASSERT(model4.tensors.size() == 754, "expected tensor count == 754"); + fprintf(stderr, "=== ALL TESTS PASSED ===\n"); return 0; } diff --git a/tests/test-quant-type-selection.cpp b/tests/test-quant-type-selection.cpp new file mode 100644 index 0000000000..4ecb4c09ac --- /dev/null +++ b/tests/test-quant-type-selection.cpp @@ -0,0 +1,520 @@ +#include "ggml.h" +#include "ggml-cpp.h" +#include "llama.h" + +#include "../src/llama-arch.h" +#include "../src/llama-model.h" +#include "../src/llama-quant.h" + +#include "gguf-model-data.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// --------------------------------------------------------------------------- +// Mock tensor construction - may be better to extract this in the future +// --------------------------------------------------------------------------- + +struct mock_tensor { + ggml_context_ptr ctx; + ggml_tensor * tensor; +}; + +static mock_tensor make_mock_tensor(const std::string & name, int64_t ne0, int64_t ne1, + int64_t ne2 = 1, int64_t ne3 = 1) { + struct ggml_init_params params = { + /*.mem_size =*/ 2 * ggml_tensor_overhead(), + /*.mem_buffer =*/ nullptr, + /*.no_alloc =*/ true, + }; + ggml_context_ptr ctx(ggml_init(params)); + ggml_tensor * t; + if (ne3 > 1) { + t = ggml_new_tensor_4d(ctx.get(), GGML_TYPE_F32, ne0, ne1, ne2, ne3); + } else if (ne2 > 1) { + t = ggml_new_tensor_3d(ctx.get(), GGML_TYPE_F32, ne0, ne1, ne2); + } else { + t = ggml_new_tensor_2d(ctx.get(), GGML_TYPE_F32, ne0, ne1); + } + ggml_set_name(t, name.c_str()); + return { std::move(ctx), t }; +} + +static ggml_type ggml_type_from_name(const std::string & name) { + for (int i = 0; i < GGML_TYPE_COUNT; i++) { + const char * tname = ggml_type_name((ggml_type)i); + if (tname && name == tname) { + return (ggml_type)i; + } + } + return GGML_TYPE_COUNT; +} + +// --------------------------------------------------------------------------- +// File parser for snapshot files (quant type schemas) +// --------------------------------------------------------------------------- + +struct snapshot_section { + llama_ftype ftype; + ggml_type default_type; + std::vector> overrides; +}; + +// This function is pretty ugly, but it's a trade-off of readable snapshot files +// versus readable parsing code +static bool parse_snapshot_file(const std::string & path, std::vector & sections) { + std::ifstream f(path); + if (!f.good()) { + return false; + } + + snapshot_section * cur = nullptr; + std::string line; + + while (std::getline(f, line)) { + if (line.empty() || line[0] == '#') { + continue; + } + + // section header: [FTYPE_NAME] default_type + if (line[0] == '[') { + auto close = line.find(']'); + if (close == std::string::npos) { + fprintf(stderr, "parse error: missing ] in '%s'\n", line.c_str()); + return false; + } + std::string ftype_str = line.substr(1, close - 1); + std::string default_str; + size_t pos = close + 1; + while (pos < line.size() && line[pos] == ' ') { pos++; } + default_str = line.substr(pos); + + llama_ftype ftype = llama_ftype_from_name(ftype_str.c_str()); + if ((int)ftype < 0) { + fprintf(stderr, "parse error: unknown ftype '%s'\n", ftype_str.c_str()); + return false; + } + + ggml_type dtype = ggml_type_from_name(default_str); + if (dtype == GGML_TYPE_COUNT) { + fprintf(stderr, "parse error: unknown default type '%s'\n", default_str.c_str()); + return false; + } + + sections.push_back({ftype, dtype, {}}); + cur = §ions.back(); + continue; + } + + if (!cur) { + fprintf(stderr, "parse error: tensor line before any section: '%s'\n", line.c_str()); + return false; + } + + auto sp = line.rfind(' '); + if (sp == std::string::npos) { + fprintf(stderr, "parse error: no space in tensor line: '%s'\n", line.c_str()); + return false; + } + + std::string tname = line.substr(0, sp); + std::string ttype = line.substr(sp + 1); + + ggml_type gt = ggml_type_from_name(ttype); + if (gt == GGML_TYPE_COUNT) { + fprintf(stderr, "parse error: unknown type '%s' for tensor '%s'\n", + ttype.c_str(), tname.c_str()); + return false; + } + + cur->overrides.push_back({tname, gt}); + } + + return true; +} + +// --------------------------------------------------------------------------- +// Remote model support using gguf-model-data.cpp +// --------------------------------------------------------------------------- + +struct remote_model_spec { + const char * repo; + const char * quant; +}; + +// Get model name from repo: strip org prefix, strip -GGUF suffix, +// and strip anything up to and including first '_' (e.g. "deepseek-ai_DeepSeek-V3.1"). +static std::string model_name_from_repo(const char * repo) { + std::string s(repo); + + auto slash = s.find('/'); + if (slash != std::string::npos) { + s = s.substr(slash + 1); + } + + const std::string suffix = "-GGUF"; + if (s.size() >= suffix.size() && s.compare(s.size() - suffix.size(), suffix.size(), suffix) == 0) { + s = s.substr(0, s.size() - suffix.size()); + } + + auto underscore = s.find('_'); + if (underscore != std::string::npos) { + s = s.substr(underscore + 1); + } + + return s; +} + +static std::string snapshot_file_from_name(const std::string & name) { + std::string lower = name; + for (auto & c : lower) { + c = std::tolower(c); + } + return lower; +} + +static const remote_model_spec model_specs[] = { + { "ggml-org/Qwen3-0.6B-GGUF", "Q8_0" }, + { "ggml-org/GLM-4.6V-GGUF", "Q8_0" }, + { "ggml-org/Step-3.5-Flash-GGUF", "Q4_K" }, + { "ggml-org/Qwen3-Coder-Next-GGUF", "Q8_0" }, + { "ggml-org/Qwen3-14B-GGUF", "Q8_0" }, + { "ggml-org/Nemotron-Nano-3-30B-A3B-GGUF", "Q8_0" }, + { "ggml-org/gpt-oss-120b-GGUF", "mxfp4" }, + { "ggml-org/gemma-3-4b-it-GGUF", "Q8_0" }, + { "bartowski/Meta-Llama-3.1-70B-Instruct-GGUF", "Q4_K_M" }, + { "bartowski/deepseek-ai_DeepSeek-V3.1-GGUF", "IQ1_S" }, + { "bartowski/Qwen_Qwen3.5-397B-A17B-GGUF", "IQ1_S" }, // TODO: swap with ggml-org if/when it's released + { "bartowski/Qwen_Qwen3.5-27B-GGUF", "Q8_0" }, // TODO: swap with ggml-org if/when it's released +}; + +static const int n_model_specs = (int)(sizeof(model_specs) / sizeof(model_specs[0])); + +// Determine llm_type from metadata. +// Only LLM_TYPE_70B matters -> probably can/should be dropped in the future +static llm_type infer_llm_type(llm_arch arch, const gguf_remote_model & remote) { + if (arch == LLM_ARCH_LLAMA && remote.n_layer == 80 && remote.n_head != remote.n_head_kv) { + return LLM_TYPE_70B; + } + return LLM_TYPE_UNKNOWN; +} + +static std::unique_ptr build_mock_model_from_remote(const gguf_remote_model & remote) { + struct llama_model_params mparams = llama_model_default_params(); + auto model = std::make_unique(mparams); + + model->arch = llm_arch_from_string(remote.architecture); + model->type = infer_llm_type(model->arch, remote); + + model->hparams.n_embd = remote.n_embd; + model->hparams.n_embd_head_k = remote.n_embd_head_k; + model->hparams.n_embd_head_v = remote.n_embd_head_v; + model->hparams.n_layer = remote.n_layer; + model->hparams.n_expert = remote.n_expert; + + for (uint32_t i = 0; i < remote.n_layer; i++) { + model->hparams.n_head_arr[i] = remote.n_head; + model->hparams.n_head_kv_arr[i] = remote.n_head_kv; + model->hparams.n_ff_arr[i] = remote.n_ff; + } + + return model; +} + +static std::vector build_mock_tensors( + const gguf_remote_model & remote, + llm_arch arch, + const llama_model_quantize_params & qparams) { + std::vector result; + + for (const auto & t : remote.tensors) { + auto mt = make_mock_tensor(t.name, t.ne[0], t.ne[1], t.ne[2], t.ne[3]); + if (tensor_allows_quantization(&qparams, arch, mt.tensor)) { + result.push_back(std::move(mt)); + } + } + + return result; +} + +static std::string read_file_contents(const std::string & path) { + std::ifstream f(path); + if (!f.good()) { + return ""; + } + std::ostringstream ss; + ss << f.rdbuf(); + return ss.str(); +} + +// --------------------------------------------------------------------------- +// Compute quantization type assignments per target ftype +// --------------------------------------------------------------------------- + +// Returns {tensor_name, assigned_type} for each tensor, in order. +static std::vector> compute_quant_types( + llama_model & mdl, + const std::vector & tensors, + llama_ftype ftype) { + llama_model_quantize_params qparams = llama_model_quantize_default_params(); + qparams.ftype = ftype; + + quantize_state_impl qs(mdl, &qparams); + + std::vector names; + names.reserve(tensors.size()); + for (const auto & mt : tensors) { + names.push_back(mt.tensor->name); + } + init_quantize_state_counters(qs, names); + + ggml_type default_type = llama_ftype_default_type(ftype); + + std::vector> result; + result.reserve(tensors.size()); + + for (const auto & mt : tensors) { + ggml_type got = llama_tensor_get_type(qs, default_type, mt.tensor, ftype); + result.push_back({mt.tensor->name, got}); + } + + return result; +} + +// --------------------------------------------------------------------------- +// Generate mode: regenerate all snapshot files +// Use this when either adding new models or modifying quants +// --------------------------------------------------------------------------- + +static std::string generate_snapshot(const std::string & name, + const gguf_remote_model & remote, + llama_model & mdl, + const std::vector & tensors) { + std::ostringstream out; + + out << "# Model: " << name << "\n"; + out << "# n_embd=" << remote.n_embd + << ", n_ff=" << remote.n_ff + << ", n_vocab=" << remote.n_vocab + << ", n_layer=" << remote.n_layer + << ", n_head=" << remote.n_head + << ", n_head_kv=" << remote.n_head_kv; + if (remote.n_expert > 0) { + out << ", n_expert=" << remote.n_expert; + } + out << "\n"; + + for (int i = 0; i < LLAMA_FTYPE_GUESSED; i++) { + llama_ftype ft = (llama_ftype)i; + ggml_type default_type = llama_ftype_default_type(ft); + if (default_type == GGML_TYPE_COUNT) { + continue; + } + const char * fname = llama_ftype_to_name(ft); + if (!fname) { + continue; + } + + auto types = compute_quant_types(mdl, tensors, ft); + + out << "\n[" << fname << "] " << ggml_type_name(default_type) << "\n"; + for (const auto & [name, type] : types) { + if (type != default_type) { + out << name << " " << ggml_type_name(type) << "\n"; + } + } + } + + return out.str(); +} + +static int run_generate(const std::string & snapshot_dir) { + fprintf(stderr, "This will overwrite all snapshot files in:\n %s\n", snapshot_dir.c_str()); + fprintf(stderr, "Continue? [y/N] "); + int ch = fgetc(stdin); + if (ch != 'y' && ch != 'Y') { + fprintf(stderr, "Aborted.\n"); + return 1; + } + + fprintf(stderr, "\n"); + + int n_written = 0; + + for (int m = 0; m < n_model_specs; m++) { + const auto & spec = model_specs[m]; + std::string name = model_name_from_repo(spec.repo); + + fprintf(stderr, "Fetching model metadata for %s from %s...\n", name.c_str(), spec.repo); + auto result = gguf_fetch_model_meta(spec.repo, spec.quant); + if (!result.has_value()) { + fprintf(stderr, "ERROR: could not fetch model metadata for %s\n", name.c_str()); + return 1; + } + + const auto & remote = result.value(); + auto model = build_mock_model_from_remote(remote); + llama_model_quantize_params qparams = llama_model_quantize_default_params(); + auto tensors = build_mock_tensors(remote, model->arch, qparams); + + std::string content = generate_snapshot(name, remote, *model, tensors); + std::string path = snapshot_dir + "/" + snapshot_file_from_name(name) + ".schema"; + + std::ofstream f(path); + if (!f.good()) { + fprintf(stderr, "ERROR: could not write %s\n", path.c_str()); + return 1; + } + f << content; + n_written++; + fprintf(stderr, " wrote %s\n", path.c_str()); + } + + fprintf(stderr, "%d files written\n", n_written); + return 0; +} + +// --------------------------------------------------------------------------- +// Test mode: compare against snapshot files +// --------------------------------------------------------------------------- + +static bool run_test_section(llama_model & mdl, + const std::vector & tensors, + const snapshot_section & section) { + // verify default_type matches what llama_ftype_default_type returns + ggml_type computed_default = llama_ftype_default_type(section.ftype); + if (computed_default != section.default_type) { + printf(" FAIL [%s] default type mismatch: file says %s, code says %s\n", + llama_ftype_to_name(section.ftype), + ggml_type_name(section.default_type), + ggml_type_name(computed_default)); + return false; + } + + auto types = compute_quant_types(mdl, tensors, section.ftype); + + std::map override_map(section.overrides.begin(), section.overrides.end()); + + bool all_pass = true; + int n_override_found = 0; + + for (const auto & [name, got] : types) { + ggml_type expected = section.default_type; + auto it = override_map.find(name); + if (it != override_map.end()) { + expected = it->second; + n_override_found++; + } + + if (got != expected) { + printf(" FAIL %-50s expected %s, got %s\n", + name.c_str(), ggml_type_name(expected), ggml_type_name(got)); + all_pass = false; + } + } + + if (n_override_found != (int)section.overrides.size()) { + printf(" FAIL [%s] override count mismatch: listed %d, matched %d\n", + llama_ftype_to_name(section.ftype), + (int)section.overrides.size(), n_override_found); + all_pass = false; + } + + return all_pass; +} + +static int run_remote_tests(const std::string & snapshot_dir, const char * argv0) { + int total_pass = 0; + int total_fail = 0; + int total_skip = 0; + + for (int m = 0; m < n_model_specs; m++) { + const auto & spec = model_specs[m]; + std::string name = model_name_from_repo(spec.repo); + printf("=== %s ===\n", name.c_str()); + + fprintf(stderr, "Fetching model metadata for %s from %s...\n", name.c_str(), spec.repo); + auto result = gguf_fetch_model_meta(spec.repo, spec.quant); + if (!result.has_value()) { + printf(" SKIP (could not fetch model metadata)\n\n"); + total_skip++; + continue; + } + + const auto & remote = result.value(); + auto model = build_mock_model_from_remote(remote); + llama_model_quantize_params qparams = llama_model_quantize_default_params(); + auto tensors = build_mock_tensors(remote, model->arch, qparams); + + std::string snapshot_path = snapshot_dir + "/" + snapshot_file_from_name(name) + ".schema"; + std::vector sections; + if (!parse_snapshot_file(snapshot_path, sections)) { + printf(" SKIP (could not read snapshot file: %s)\n\n", snapshot_path.c_str()); + total_skip++; + continue; + } + + int model_pass = 0; + int model_fail = 0; + + for (const auto & section : sections) { + bool pass = run_test_section(*model, tensors, section); + if (pass) { + model_pass++; + } else { + model_fail++; + } + } + + printf(" %s %s: %d/%d ftype sections passed (%d tensors)\n", + model_fail == 0 ? "PASS" : "FAIL", + name.c_str(), model_pass, model_pass + model_fail, + (int)tensors.size()); + printf("\n"); + + if (model_fail == 0) { + total_pass++; + } else { + total_fail++; + } + } + + printf("%d/%d models passed", total_pass, total_pass + total_fail); + if (total_skip > 0) { + printf(", %d skipped", total_skip); + } + printf("\n"); + + if (total_fail > 0) { + printf("\nIf these changes are intentional, regenerate snapshot files with:\n"); + printf(" %s --generate\n", argv0); + } + + return total_fail > 0 ? 1 : 0; +} + +int main(int argc, char ** argv) { + std::string snapshot_dir = SNAPSHOT_DIR; + bool generate = false; + + for (int i = 1; i < argc; i++) { + if (strcmp(argv[i], "--snapshot-dir") == 0 && i + 1 < argc) { + snapshot_dir = argv[++i]; + } else if (strcmp(argv[i], "--generate") == 0) { + generate = true; + } + } + + if (generate) { + return run_generate(snapshot_dir); + } + + return run_remote_tests(snapshot_dir, argv[0]); +} From a3ff1940e93d57b74e143b36423b320d8677ef5b Mon Sep 17 00:00:00 2001 From: Colin Kealty <3266127+bartowski1182@users.noreply.github.com> Date: Wed, 4 Mar 2026 13:42:59 -0500 Subject: [PATCH 02/21] Fix merge conflicts, add more schemas --- tests/CMakeLists.txt | 6 + tests/gguf-model-data.cpp | 1 + tests/snapshots/deepseek-v3.1.schema | 1956 +++++++++ tests/snapshots/gemma-3-4b-it.schema | 1455 ++++++ tests/snapshots/gpt-oss-120b.schema | 1616 +++++++ .../meta-llama-3.1-70b-instruct.schema | 3899 +++++++++++++++++ .../snapshots/nemotron-nano-3-30b-a3b.schema | 696 +++ tests/snapshots/qwen3-14b.schema | 1908 ++++++++ tests/snapshots/qwen3-coder-next.schema | 1713 ++++++++ tests/snapshots/qwen3.5-27b.schema | 1837 ++++++++ tests/snapshots/qwen3.5-397b-a17b.schema | 2148 +++++++++ tests/snapshots/step-3.5-flash.schema | 2453 +++++++++++ tests/test-quant-type-selection.cpp | 2 +- 13 files changed, 19689 insertions(+), 1 deletion(-) create mode 100644 tests/snapshots/deepseek-v3.1.schema create mode 100644 tests/snapshots/gemma-3-4b-it.schema create mode 100644 tests/snapshots/gpt-oss-120b.schema create mode 100644 tests/snapshots/meta-llama-3.1-70b-instruct.schema create mode 100644 tests/snapshots/nemotron-nano-3-30b-a3b.schema create mode 100644 tests/snapshots/qwen3-14b.schema create mode 100644 tests/snapshots/qwen3-coder-next.schema create mode 100644 tests/snapshots/qwen3.5-27b.schema create mode 100644 tests/snapshots/qwen3.5-397b-a17b.schema create mode 100644 tests/snapshots/step-3.5-flash.schema diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 9582164b58..ced1f3943c 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -274,6 +274,12 @@ if (TARGET cpp-httplib) add_executable(test-gguf-model-data test-gguf-model-data.cpp) target_link_libraries(test-gguf-model-data PRIVATE gguf-model-data common) llama_test(test-gguf-model-data LABEL "model") + + # test-quant-type-selection requires gguf-model-data for remote model metadata + llama_build_and_test(test-quant-type-selection.cpp LABEL "model") + target_link_libraries(test-quant-type-selection PRIVATE gguf-model-data) + target_compile_definitions(test-quant-type-selection PRIVATE + SNAPSHOT_DIR="${CMAKE_CURRENT_SOURCE_DIR}/snapshots") endif() endif() diff --git a/tests/gguf-model-data.cpp b/tests/gguf-model-data.cpp index aa550c9538..97de083568 100644 --- a/tests/gguf-model-data.cpp +++ b/tests/gguf-model-data.cpp @@ -151,6 +151,7 @@ static bool gguf_read_uint32_val(gguf_buf_reader & r, int32_t vtype, uint32_t & return false; } } + return true; } if (vtype == GGUF_TYPE_UINT8) { uint8_t v; diff --git a/tests/snapshots/deepseek-v3.1.schema b/tests/snapshots/deepseek-v3.1.schema new file mode 100644 index 0000000000..a5bce29b27 --- /dev/null +++ b/tests/snapshots/deepseek-v3.1.schema @@ -0,0 +1,1956 @@ +# Model: DeepSeek-V3.1 +# n_embd=7168, n_ff=18432, n_vocab=129280, n_layer=61, n_head=128, n_head_kv=1, n_expert=256 + +[F32] f32 +output.weight q6_K + +[F16] f16 +output.weight q6_K + +[Q4_0] q4_0 +output.weight q6_K + +[Q4_1] q4_1 +output.weight q6_K + +[Q8_0] q8_0 + +[Q5_0] q5_0 +output.weight q6_K + +[Q5_1] q5_1 +output.weight q6_K + +[Q2_K] q2_K +output.weight q6_K +blk.0.attn_output.weight q3_K +blk.0.ffn_down.weight q3_K +blk.1.attn_output.weight q3_K +blk.1.ffn_down.weight q3_K +blk.2.attn_output.weight q3_K +blk.2.ffn_down.weight q3_K +blk.3.attn_output.weight q3_K +blk.3.ffn_down_exps.weight q3_K +blk.3.ffn_down_shexp.weight q3_K +blk.4.attn_output.weight q3_K +blk.4.ffn_down_exps.weight q3_K +blk.4.ffn_down_shexp.weight q3_K +blk.5.attn_output.weight q3_K +blk.5.ffn_down_exps.weight q3_K +blk.5.ffn_down_shexp.weight q3_K +blk.6.attn_output.weight q3_K +blk.6.ffn_down_exps.weight q3_K +blk.6.ffn_down_shexp.weight q3_K +blk.7.attn_output.weight q3_K +blk.7.ffn_down_exps.weight q3_K +blk.7.ffn_down_shexp.weight q3_K +blk.8.attn_output.weight q3_K +blk.8.ffn_down_exps.weight q3_K +blk.8.ffn_down_shexp.weight q3_K +blk.9.attn_output.weight q3_K +blk.9.ffn_down_exps.weight q3_K +blk.9.ffn_down_shexp.weight q3_K +blk.10.attn_output.weight q3_K +blk.10.ffn_down_exps.weight q3_K +blk.10.ffn_down_shexp.weight q3_K +blk.11.attn_output.weight q3_K +blk.11.ffn_down_exps.weight q3_K +blk.11.ffn_down_shexp.weight q3_K +blk.12.attn_output.weight q3_K +blk.12.ffn_down_exps.weight q3_K +blk.12.ffn_down_shexp.weight q3_K +blk.13.attn_output.weight q3_K +blk.13.ffn_down_exps.weight q3_K +blk.13.ffn_down_shexp.weight q3_K +blk.14.attn_output.weight q3_K +blk.14.ffn_down_exps.weight q3_K +blk.14.ffn_down_shexp.weight q3_K +blk.15.attn_output.weight q3_K +blk.15.ffn_down_exps.weight q3_K +blk.15.ffn_down_shexp.weight q3_K +blk.16.attn_output.weight q3_K +blk.16.ffn_down_exps.weight q3_K +blk.16.ffn_down_shexp.weight q3_K +blk.17.attn_output.weight q3_K +blk.17.ffn_down_exps.weight q3_K +blk.17.ffn_down_shexp.weight q3_K +blk.18.attn_output.weight q3_K +blk.18.ffn_down_exps.weight q3_K +blk.18.ffn_down_shexp.weight q3_K +blk.19.attn_output.weight q3_K +blk.19.ffn_down_exps.weight q3_K +blk.19.ffn_down_shexp.weight q3_K +blk.20.attn_output.weight q3_K +blk.20.ffn_down_exps.weight q3_K +blk.20.ffn_down_shexp.weight q3_K +blk.21.attn_output.weight q3_K +blk.21.ffn_down_exps.weight q3_K +blk.21.ffn_down_shexp.weight q3_K +blk.22.attn_output.weight q3_K +blk.22.ffn_down_exps.weight q3_K +blk.22.ffn_down_shexp.weight q3_K +blk.23.attn_output.weight q3_K +blk.23.ffn_down_exps.weight q3_K +blk.23.ffn_down_shexp.weight q3_K +blk.24.attn_output.weight q3_K +blk.24.ffn_down_exps.weight q3_K +blk.24.ffn_down_shexp.weight q3_K +blk.25.attn_output.weight q3_K +blk.25.ffn_down_exps.weight q3_K +blk.25.ffn_down_shexp.weight q3_K +blk.26.attn_output.weight q3_K +blk.26.ffn_down_exps.weight q3_K +blk.26.ffn_down_shexp.weight q3_K +blk.27.attn_output.weight q3_K +blk.27.ffn_down_exps.weight q3_K +blk.27.ffn_down_shexp.weight q3_K +blk.28.attn_output.weight q3_K +blk.28.ffn_down_exps.weight q3_K +blk.28.ffn_down_shexp.weight q3_K +blk.29.attn_output.weight q3_K +blk.29.ffn_down_exps.weight q3_K +blk.29.ffn_down_shexp.weight q3_K +blk.30.attn_output.weight q3_K +blk.30.ffn_down_exps.weight q3_K +blk.30.ffn_down_shexp.weight q3_K +blk.31.attn_output.weight q3_K +blk.31.ffn_down_exps.weight q3_K +blk.31.ffn_down_shexp.weight q3_K +blk.32.attn_output.weight q3_K +blk.32.ffn_down_exps.weight q3_K +blk.32.ffn_down_shexp.weight q3_K +blk.33.attn_output.weight q3_K +blk.33.ffn_down_exps.weight q3_K +blk.33.ffn_down_shexp.weight q3_K +blk.34.attn_output.weight q3_K +blk.34.ffn_down_exps.weight q3_K +blk.34.ffn_down_shexp.weight q3_K +blk.35.attn_output.weight q3_K +blk.35.ffn_down_exps.weight q3_K +blk.35.ffn_down_shexp.weight q3_K +blk.36.attn_output.weight q3_K +blk.36.ffn_down_exps.weight q3_K +blk.36.ffn_down_shexp.weight q3_K +blk.37.attn_output.weight q3_K +blk.37.ffn_down_exps.weight q3_K +blk.37.ffn_down_shexp.weight q3_K +blk.38.attn_output.weight q3_K +blk.38.ffn_down_exps.weight q3_K +blk.38.ffn_down_shexp.weight q3_K +blk.39.attn_output.weight q3_K +blk.39.ffn_down_exps.weight q3_K +blk.39.ffn_down_shexp.weight q3_K +blk.40.attn_output.weight q3_K +blk.40.ffn_down_exps.weight q3_K +blk.40.ffn_down_shexp.weight q3_K +blk.41.attn_output.weight q3_K +blk.41.ffn_down_exps.weight q3_K +blk.41.ffn_down_shexp.weight q3_K +blk.42.attn_output.weight q3_K +blk.42.ffn_down_exps.weight q3_K +blk.42.ffn_down_shexp.weight q3_K +blk.43.attn_output.weight q3_K +blk.43.ffn_down_exps.weight q3_K +blk.43.ffn_down_shexp.weight q3_K +blk.44.attn_output.weight q3_K +blk.44.ffn_down_exps.weight q3_K +blk.44.ffn_down_shexp.weight q3_K +blk.45.attn_output.weight q3_K +blk.45.ffn_down_exps.weight q3_K +blk.45.ffn_down_shexp.weight q3_K +blk.46.attn_output.weight q3_K +blk.46.ffn_down_exps.weight q3_K +blk.46.ffn_down_shexp.weight q3_K +blk.47.attn_output.weight q3_K +blk.47.ffn_down_exps.weight q3_K +blk.47.ffn_down_shexp.weight q3_K +blk.48.attn_output.weight q3_K +blk.48.ffn_down_exps.weight q3_K +blk.48.ffn_down_shexp.weight q3_K +blk.49.attn_output.weight q3_K +blk.49.ffn_down_exps.weight q3_K +blk.49.ffn_down_shexp.weight q3_K +blk.50.attn_output.weight q3_K +blk.50.ffn_down_exps.weight q3_K +blk.50.ffn_down_shexp.weight q3_K +blk.51.attn_output.weight q3_K +blk.51.ffn_down_exps.weight q3_K +blk.51.ffn_down_shexp.weight q3_K +blk.52.attn_output.weight q3_K +blk.52.ffn_down_exps.weight q3_K +blk.52.ffn_down_shexp.weight q3_K +blk.53.attn_output.weight q3_K +blk.53.ffn_down_exps.weight q3_K +blk.53.ffn_down_shexp.weight q3_K +blk.54.attn_output.weight q3_K +blk.54.ffn_down_exps.weight q3_K +blk.54.ffn_down_shexp.weight q3_K +blk.55.attn_output.weight q3_K +blk.55.ffn_down_exps.weight q3_K +blk.55.ffn_down_shexp.weight q3_K +blk.56.attn_output.weight q3_K +blk.56.ffn_down_exps.weight q3_K +blk.56.ffn_down_shexp.weight q3_K +blk.57.attn_output.weight q3_K +blk.57.ffn_down_exps.weight q3_K +blk.57.ffn_down_shexp.weight q3_K +blk.58.attn_output.weight q3_K +blk.58.ffn_down_exps.weight q3_K +blk.58.ffn_down_shexp.weight q3_K +blk.59.attn_output.weight q3_K +blk.59.ffn_down_exps.weight q3_K +blk.59.ffn_down_shexp.weight q3_K +blk.60.attn_output.weight q3_K +blk.60.ffn_down_exps.weight q3_K +blk.60.ffn_down_shexp.weight q3_K + +[Q3_K_S] q3_K +output.weight q6_K + +[Q3_K_M] q3_K +output.weight q6_K +blk.0.attn_output.weight q4_K +blk.0.ffn_down.weight q5_K +blk.1.attn_output.weight q4_K +blk.1.ffn_down.weight q5_K +blk.2.attn_output.weight q4_K +blk.2.ffn_down.weight q5_K +blk.3.attn_output.weight q4_K +blk.3.ffn_down_exps.weight q4_K +blk.3.ffn_down_shexp.weight q4_K +blk.4.attn_output.weight q4_K +blk.4.ffn_down_exps.weight q4_K +blk.4.ffn_down_shexp.weight q4_K +blk.5.attn_output.weight q4_K +blk.5.ffn_down_exps.weight q4_K +blk.5.ffn_down_shexp.weight q4_K +blk.6.attn_output.weight q4_K +blk.6.ffn_down_exps.weight q4_K +blk.6.ffn_down_shexp.weight q4_K +blk.7.attn_output.weight q4_K +blk.7.ffn_down_exps.weight q4_K +blk.7.ffn_down_shexp.weight q4_K +blk.8.attn_output.weight q4_K +blk.8.ffn_down_exps.weight q4_K +blk.8.ffn_down_shexp.weight q4_K +blk.9.attn_output.weight q4_K +blk.9.ffn_down_exps.weight q4_K +blk.9.ffn_down_shexp.weight q4_K +blk.10.attn_output.weight q4_K +blk.10.ffn_down_exps.weight q4_K +blk.10.ffn_down_shexp.weight q4_K +blk.11.attn_output.weight q4_K +blk.11.ffn_down_exps.weight q4_K +blk.11.ffn_down_shexp.weight q4_K +blk.12.attn_output.weight q4_K +blk.12.ffn_down_exps.weight q4_K +blk.12.ffn_down_shexp.weight q4_K +blk.13.attn_output.weight q4_K +blk.13.ffn_down_exps.weight q4_K +blk.13.ffn_down_shexp.weight q4_K +blk.14.attn_output.weight q4_K +blk.14.ffn_down_exps.weight q4_K +blk.14.ffn_down_shexp.weight q4_K +blk.15.attn_output.weight q4_K +blk.15.ffn_down_exps.weight q4_K +blk.15.ffn_down_shexp.weight q4_K +blk.16.attn_output.weight q4_K +blk.16.ffn_down_exps.weight q4_K +blk.16.ffn_down_shexp.weight q4_K +blk.17.attn_output.weight q4_K +blk.17.ffn_down_exps.weight q4_K +blk.17.ffn_down_shexp.weight q4_K +blk.18.attn_output.weight q4_K +blk.18.ffn_down_exps.weight q4_K +blk.18.ffn_down_shexp.weight q4_K +blk.19.attn_output.weight q4_K +blk.19.ffn_down_exps.weight q4_K +blk.19.ffn_down_shexp.weight q4_K +blk.20.attn_output.weight q4_K +blk.20.ffn_down_exps.weight q4_K +blk.20.ffn_down_shexp.weight q4_K +blk.21.attn_output.weight q4_K +blk.21.ffn_down_exps.weight q4_K +blk.21.ffn_down_shexp.weight q4_K +blk.22.attn_output.weight q4_K +blk.22.ffn_down_exps.weight q4_K +blk.22.ffn_down_shexp.weight q4_K +blk.23.attn_output.weight q4_K +blk.23.ffn_down_exps.weight q4_K +blk.23.ffn_down_shexp.weight q4_K +blk.24.attn_output.weight q4_K +blk.24.ffn_down_exps.weight q4_K +blk.24.ffn_down_shexp.weight q4_K +blk.25.attn_output.weight q4_K +blk.25.ffn_down_exps.weight q4_K +blk.25.ffn_down_shexp.weight q4_K +blk.26.attn_output.weight q4_K +blk.26.ffn_down_exps.weight q4_K +blk.26.ffn_down_shexp.weight q4_K +blk.27.attn_output.weight q4_K +blk.27.ffn_down_exps.weight q4_K +blk.27.ffn_down_shexp.weight q4_K +blk.28.attn_output.weight q4_K +blk.28.ffn_down_exps.weight q4_K +blk.28.ffn_down_shexp.weight q4_K +blk.29.attn_output.weight q4_K +blk.29.ffn_down_exps.weight q4_K +blk.29.ffn_down_shexp.weight q4_K +blk.30.attn_output.weight q4_K +blk.30.ffn_down_exps.weight q4_K +blk.30.ffn_down_shexp.weight q4_K +blk.31.attn_output.weight q4_K +blk.31.ffn_down_exps.weight q4_K +blk.31.ffn_down_shexp.weight q4_K +blk.32.attn_output.weight q4_K +blk.32.ffn_down_exps.weight q4_K +blk.32.ffn_down_shexp.weight q4_K +blk.33.attn_output.weight q4_K +blk.33.ffn_down_exps.weight q4_K +blk.33.ffn_down_shexp.weight q4_K +blk.34.attn_output.weight q4_K +blk.34.ffn_down_exps.weight q4_K +blk.34.ffn_down_shexp.weight q4_K +blk.35.attn_output.weight q4_K +blk.35.ffn_down_exps.weight q4_K +blk.35.ffn_down_shexp.weight q4_K +blk.36.attn_output.weight q4_K +blk.36.ffn_down_exps.weight q4_K +blk.36.ffn_down_shexp.weight q4_K +blk.37.attn_output.weight q4_K +blk.37.ffn_down_exps.weight q4_K +blk.37.ffn_down_shexp.weight q4_K +blk.38.attn_output.weight q4_K +blk.38.ffn_down_exps.weight q4_K +blk.38.ffn_down_shexp.weight q4_K +blk.39.attn_output.weight q4_K +blk.39.ffn_down_exps.weight q4_K +blk.39.ffn_down_shexp.weight q4_K +blk.40.attn_output.weight q4_K +blk.40.ffn_down_exps.weight q4_K +blk.40.ffn_down_shexp.weight q4_K +blk.41.attn_output.weight q4_K +blk.41.ffn_down_exps.weight q4_K +blk.41.ffn_down_shexp.weight q4_K +blk.42.attn_output.weight q4_K +blk.42.ffn_down_exps.weight q4_K +blk.42.ffn_down_shexp.weight q4_K +blk.43.attn_output.weight q4_K +blk.43.ffn_down_exps.weight q4_K +blk.43.ffn_down_shexp.weight q4_K +blk.44.attn_output.weight q4_K +blk.44.ffn_down_exps.weight q4_K +blk.44.ffn_down_shexp.weight q4_K +blk.45.attn_output.weight q4_K +blk.45.ffn_down_exps.weight q4_K +blk.45.ffn_down_shexp.weight q4_K +blk.46.attn_output.weight q4_K +blk.46.ffn_down_exps.weight q4_K +blk.46.ffn_down_shexp.weight q4_K +blk.47.attn_output.weight q4_K +blk.47.ffn_down_exps.weight q4_K +blk.47.ffn_down_shexp.weight q4_K +blk.48.attn_output.weight q4_K +blk.48.ffn_down_exps.weight q4_K +blk.48.ffn_down_shexp.weight q4_K +blk.49.attn_output.weight q4_K +blk.49.ffn_down_exps.weight q4_K +blk.49.ffn_down_shexp.weight q4_K +blk.50.attn_output.weight q4_K +blk.50.ffn_down_exps.weight q4_K +blk.50.ffn_down_shexp.weight q4_K +blk.51.attn_output.weight q4_K +blk.51.ffn_down_exps.weight q4_K +blk.51.ffn_down_shexp.weight q4_K +blk.52.attn_output.weight q4_K +blk.52.ffn_down_exps.weight q4_K +blk.52.ffn_down_shexp.weight q4_K +blk.53.attn_output.weight q4_K +blk.53.ffn_down_exps.weight q4_K +blk.53.ffn_down_shexp.weight q4_K +blk.54.attn_output.weight q4_K +blk.54.ffn_down_exps.weight q4_K +blk.54.ffn_down_shexp.weight q4_K +blk.55.attn_output.weight q4_K +blk.55.ffn_down_exps.weight q4_K +blk.55.ffn_down_shexp.weight q4_K +blk.56.attn_output.weight q4_K +blk.56.ffn_down_exps.weight q4_K +blk.56.ffn_down_shexp.weight q4_K +blk.57.attn_output.weight q4_K +blk.57.ffn_down_exps.weight q4_K +blk.57.ffn_down_shexp.weight q4_K +blk.58.attn_output.weight q4_K +blk.58.ffn_down_exps.weight q4_K +blk.58.ffn_down_shexp.weight q4_K +blk.59.attn_output.weight q4_K +blk.59.ffn_down_exps.weight q4_K +blk.59.ffn_down_shexp.weight q4_K +blk.60.attn_output.weight q4_K +blk.60.ffn_down_exps.weight q4_K +blk.60.ffn_down_shexp.weight q4_K + +[Q3_K_L] q3_K +output.weight q6_K +blk.0.attn_output.weight q5_K +blk.0.ffn_down.weight q5_K +blk.1.attn_output.weight q5_K +blk.1.ffn_down.weight q5_K +blk.2.attn_output.weight q5_K +blk.2.ffn_down.weight q5_K +blk.3.attn_output.weight q5_K +blk.3.ffn_down_exps.weight q5_K +blk.3.ffn_down_shexp.weight q5_K +blk.4.attn_output.weight q5_K +blk.4.ffn_down_exps.weight q5_K +blk.4.ffn_down_shexp.weight q5_K +blk.5.attn_output.weight q5_K +blk.5.ffn_down_exps.weight q5_K +blk.5.ffn_down_shexp.weight q5_K +blk.6.attn_output.weight q5_K +blk.6.ffn_down_exps.weight q5_K +blk.6.ffn_down_shexp.weight q5_K +blk.7.attn_output.weight q5_K +blk.7.ffn_down_exps.weight q5_K +blk.7.ffn_down_shexp.weight q5_K +blk.8.attn_output.weight q5_K +blk.8.ffn_down_exps.weight q5_K +blk.8.ffn_down_shexp.weight q5_K +blk.9.attn_output.weight q5_K +blk.9.ffn_down_exps.weight q5_K +blk.9.ffn_down_shexp.weight q5_K +blk.10.attn_output.weight q5_K +blk.10.ffn_down_exps.weight q5_K +blk.10.ffn_down_shexp.weight q5_K +blk.11.attn_output.weight q5_K +blk.11.ffn_down_exps.weight q5_K +blk.11.ffn_down_shexp.weight q5_K +blk.12.attn_output.weight q5_K +blk.12.ffn_down_exps.weight q5_K +blk.12.ffn_down_shexp.weight q5_K +blk.13.attn_output.weight q5_K +blk.13.ffn_down_exps.weight q5_K +blk.13.ffn_down_shexp.weight q5_K +blk.14.attn_output.weight q5_K +blk.14.ffn_down_exps.weight q5_K +blk.14.ffn_down_shexp.weight q5_K +blk.15.attn_output.weight q5_K +blk.15.ffn_down_exps.weight q5_K +blk.15.ffn_down_shexp.weight q5_K +blk.16.attn_output.weight q5_K +blk.16.ffn_down_exps.weight q5_K +blk.16.ffn_down_shexp.weight q5_K +blk.17.attn_output.weight q5_K +blk.17.ffn_down_exps.weight q5_K +blk.17.ffn_down_shexp.weight q5_K +blk.18.attn_output.weight q5_K +blk.18.ffn_down_exps.weight q5_K +blk.18.ffn_down_shexp.weight q5_K +blk.19.attn_output.weight q5_K +blk.19.ffn_down_exps.weight q5_K +blk.19.ffn_down_shexp.weight q5_K +blk.20.attn_output.weight q5_K +blk.20.ffn_down_exps.weight q5_K +blk.20.ffn_down_shexp.weight q5_K +blk.21.attn_output.weight q5_K +blk.21.ffn_down_exps.weight q5_K +blk.21.ffn_down_shexp.weight q5_K +blk.22.attn_output.weight q5_K +blk.22.ffn_down_exps.weight q5_K +blk.22.ffn_down_shexp.weight q5_K +blk.23.attn_output.weight q5_K +blk.23.ffn_down_exps.weight q5_K +blk.23.ffn_down_shexp.weight q5_K +blk.24.attn_output.weight q5_K +blk.24.ffn_down_exps.weight q5_K +blk.24.ffn_down_shexp.weight q5_K +blk.25.attn_output.weight q5_K +blk.25.ffn_down_exps.weight q5_K +blk.25.ffn_down_shexp.weight q5_K +blk.26.attn_output.weight q5_K +blk.26.ffn_down_exps.weight q5_K +blk.26.ffn_down_shexp.weight q5_K +blk.27.attn_output.weight q5_K +blk.27.ffn_down_exps.weight q5_K +blk.27.ffn_down_shexp.weight q5_K +blk.28.attn_output.weight q5_K +blk.28.ffn_down_exps.weight q5_K +blk.28.ffn_down_shexp.weight q5_K +blk.29.attn_output.weight q5_K +blk.29.ffn_down_exps.weight q5_K +blk.29.ffn_down_shexp.weight q5_K +blk.30.attn_output.weight q5_K +blk.30.ffn_down_exps.weight q5_K +blk.30.ffn_down_shexp.weight q5_K +blk.31.attn_output.weight q5_K +blk.31.ffn_down_exps.weight q5_K +blk.31.ffn_down_shexp.weight q5_K +blk.32.attn_output.weight q5_K +blk.32.ffn_down_exps.weight q5_K +blk.32.ffn_down_shexp.weight q5_K +blk.33.attn_output.weight q5_K +blk.33.ffn_down_exps.weight q5_K +blk.33.ffn_down_shexp.weight q5_K +blk.34.attn_output.weight q5_K +blk.34.ffn_down_exps.weight q5_K +blk.34.ffn_down_shexp.weight q5_K +blk.35.attn_output.weight q5_K +blk.35.ffn_down_exps.weight q5_K +blk.35.ffn_down_shexp.weight q5_K +blk.36.attn_output.weight q5_K +blk.36.ffn_down_exps.weight q5_K +blk.36.ffn_down_shexp.weight q5_K +blk.37.attn_output.weight q5_K +blk.37.ffn_down_exps.weight q5_K +blk.37.ffn_down_shexp.weight q5_K +blk.38.attn_output.weight q5_K +blk.38.ffn_down_exps.weight q5_K +blk.38.ffn_down_shexp.weight q5_K +blk.39.attn_output.weight q5_K +blk.39.ffn_down_exps.weight q5_K +blk.39.ffn_down_shexp.weight q5_K +blk.40.attn_output.weight q5_K +blk.40.ffn_down_exps.weight q5_K +blk.40.ffn_down_shexp.weight q5_K +blk.41.attn_output.weight q5_K +blk.41.ffn_down_exps.weight q5_K +blk.41.ffn_down_shexp.weight q5_K +blk.42.attn_output.weight q5_K +blk.42.ffn_down_exps.weight q5_K +blk.42.ffn_down_shexp.weight q5_K +blk.43.attn_output.weight q5_K +blk.43.ffn_down_exps.weight q5_K +blk.43.ffn_down_shexp.weight q5_K +blk.44.attn_output.weight q5_K +blk.44.ffn_down_exps.weight q5_K +blk.44.ffn_down_shexp.weight q5_K +blk.45.attn_output.weight q5_K +blk.45.ffn_down_exps.weight q5_K +blk.45.ffn_down_shexp.weight q5_K +blk.46.attn_output.weight q5_K +blk.46.ffn_down_exps.weight q5_K +blk.46.ffn_down_shexp.weight q5_K +blk.47.attn_output.weight q5_K +blk.47.ffn_down_exps.weight q5_K +blk.47.ffn_down_shexp.weight q5_K +blk.48.attn_output.weight q5_K +blk.48.ffn_down_exps.weight q5_K +blk.48.ffn_down_shexp.weight q5_K +blk.49.attn_output.weight q5_K +blk.49.ffn_down_exps.weight q5_K +blk.49.ffn_down_shexp.weight q5_K +blk.50.attn_output.weight q5_K +blk.50.ffn_down_exps.weight q5_K +blk.50.ffn_down_shexp.weight q5_K +blk.51.attn_output.weight q5_K +blk.51.ffn_down_exps.weight q5_K +blk.51.ffn_down_shexp.weight q5_K +blk.52.attn_output.weight q5_K +blk.52.ffn_down_exps.weight q5_K +blk.52.ffn_down_shexp.weight q5_K +blk.53.attn_output.weight q5_K +blk.53.ffn_down_exps.weight q5_K +blk.53.ffn_down_shexp.weight q5_K +blk.54.attn_output.weight q5_K +blk.54.ffn_down_exps.weight q5_K +blk.54.ffn_down_shexp.weight q5_K +blk.55.attn_output.weight q5_K +blk.55.ffn_down_exps.weight q5_K +blk.55.ffn_down_shexp.weight q5_K +blk.56.attn_output.weight q5_K +blk.56.ffn_down_exps.weight q5_K +blk.56.ffn_down_shexp.weight q5_K +blk.57.attn_output.weight q5_K +blk.57.ffn_down_exps.weight q5_K +blk.57.ffn_down_shexp.weight q5_K +blk.58.attn_output.weight q5_K +blk.58.ffn_down_exps.weight q5_K +blk.58.ffn_down_shexp.weight q5_K +blk.59.attn_output.weight q5_K +blk.59.ffn_down_exps.weight q5_K +blk.59.ffn_down_shexp.weight q5_K +blk.60.attn_output.weight q5_K +blk.60.ffn_down_exps.weight q5_K +blk.60.ffn_down_shexp.weight q5_K + +[Q4_K_S] q4_K +output.weight q6_K +blk.0.ffn_down.weight q5_K +blk.1.ffn_down.weight q5_K +blk.2.ffn_down.weight q5_K +blk.3.ffn_down_exps.weight q5_K +blk.3.ffn_down_shexp.weight q5_K +blk.4.ffn_down_exps.weight q5_K +blk.4.ffn_down_shexp.weight q5_K +blk.5.ffn_down_exps.weight q5_K +blk.5.ffn_down_shexp.weight q5_K +blk.6.ffn_down_exps.weight q5_K +blk.6.ffn_down_shexp.weight q5_K + +[Q4_K_M] q4_K +output.weight q6_K +blk.0.ffn_down.weight q6_K +blk.1.ffn_down.weight q6_K +blk.2.ffn_down.weight q6_K +blk.3.ffn_down_exps.weight q6_K +blk.3.ffn_down_shexp.weight q6_K +blk.4.ffn_down_exps.weight q6_K +blk.4.ffn_down_shexp.weight q6_K +blk.5.ffn_down_exps.weight q6_K +blk.5.ffn_down_shexp.weight q6_K +blk.6.ffn_down_exps.weight q6_K +blk.6.ffn_down_shexp.weight q6_K +blk.9.ffn_down_exps.weight q6_K +blk.9.ffn_down_shexp.weight q6_K +blk.12.ffn_down_exps.weight q6_K +blk.12.ffn_down_shexp.weight q6_K +blk.15.ffn_down_exps.weight q6_K +blk.15.ffn_down_shexp.weight q6_K +blk.18.ffn_down_exps.weight q6_K +blk.18.ffn_down_shexp.weight q6_K +blk.21.ffn_down_exps.weight q6_K +blk.21.ffn_down_shexp.weight q6_K +blk.24.ffn_down_exps.weight q6_K +blk.24.ffn_down_shexp.weight q6_K +blk.27.ffn_down_exps.weight q6_K +blk.27.ffn_down_shexp.weight q6_K +blk.30.ffn_down_exps.weight q6_K +blk.30.ffn_down_shexp.weight q6_K +blk.33.ffn_down_exps.weight q6_K +blk.33.ffn_down_shexp.weight q6_K +blk.36.ffn_down_exps.weight q6_K +blk.36.ffn_down_shexp.weight q6_K +blk.39.ffn_down_exps.weight q6_K +blk.39.ffn_down_shexp.weight q6_K +blk.42.ffn_down_exps.weight q6_K +blk.42.ffn_down_shexp.weight q6_K +blk.45.ffn_down_exps.weight q6_K +blk.45.ffn_down_shexp.weight q6_K +blk.48.ffn_down_exps.weight q6_K +blk.48.ffn_down_shexp.weight q6_K +blk.51.ffn_down_exps.weight q6_K +blk.51.ffn_down_shexp.weight q6_K +blk.53.ffn_down_exps.weight q6_K +blk.53.ffn_down_shexp.weight q6_K +blk.54.ffn_down_exps.weight q6_K +blk.54.ffn_down_shexp.weight q6_K +blk.55.ffn_down_exps.weight q6_K +blk.55.ffn_down_shexp.weight q6_K +blk.56.ffn_down_exps.weight q6_K +blk.56.ffn_down_shexp.weight q6_K +blk.57.ffn_down_exps.weight q6_K +blk.57.ffn_down_shexp.weight q6_K +blk.58.ffn_down_exps.weight q6_K +blk.58.ffn_down_shexp.weight q6_K +blk.59.ffn_down_exps.weight q6_K +blk.59.ffn_down_shexp.weight q6_K +blk.60.ffn_down_exps.weight q6_K +blk.60.ffn_down_shexp.weight q6_K + +[Q5_K_S] q5_K +output.weight q6_K + +[Q5_K_M] q5_K +output.weight q6_K +blk.0.ffn_down.weight q6_K +blk.1.ffn_down.weight q6_K +blk.2.ffn_down.weight q6_K +blk.3.ffn_down_exps.weight q6_K +blk.3.ffn_down_shexp.weight q6_K +blk.4.ffn_down_exps.weight q6_K +blk.4.ffn_down_shexp.weight q6_K +blk.5.ffn_down_exps.weight q6_K +blk.5.ffn_down_shexp.weight q6_K +blk.6.ffn_down_exps.weight q6_K +blk.6.ffn_down_shexp.weight q6_K +blk.9.ffn_down_exps.weight q6_K +blk.9.ffn_down_shexp.weight q6_K +blk.12.ffn_down_exps.weight q6_K +blk.12.ffn_down_shexp.weight q6_K +blk.15.ffn_down_exps.weight q6_K +blk.15.ffn_down_shexp.weight q6_K +blk.18.ffn_down_exps.weight q6_K +blk.18.ffn_down_shexp.weight q6_K +blk.21.ffn_down_exps.weight q6_K +blk.21.ffn_down_shexp.weight q6_K +blk.24.ffn_down_exps.weight q6_K +blk.24.ffn_down_shexp.weight q6_K +blk.27.ffn_down_exps.weight q6_K +blk.27.ffn_down_shexp.weight q6_K +blk.30.ffn_down_exps.weight q6_K +blk.30.ffn_down_shexp.weight q6_K +blk.33.ffn_down_exps.weight q6_K +blk.33.ffn_down_shexp.weight q6_K +blk.36.ffn_down_exps.weight q6_K +blk.36.ffn_down_shexp.weight q6_K +blk.39.ffn_down_exps.weight q6_K +blk.39.ffn_down_shexp.weight q6_K +blk.42.ffn_down_exps.weight q6_K +blk.42.ffn_down_shexp.weight q6_K +blk.45.ffn_down_exps.weight q6_K +blk.45.ffn_down_shexp.weight q6_K +blk.48.ffn_down_exps.weight q6_K +blk.48.ffn_down_shexp.weight q6_K +blk.51.ffn_down_exps.weight q6_K +blk.51.ffn_down_shexp.weight q6_K +blk.53.ffn_down_exps.weight q6_K +blk.53.ffn_down_shexp.weight q6_K +blk.54.ffn_down_exps.weight q6_K +blk.54.ffn_down_shexp.weight q6_K +blk.55.ffn_down_exps.weight q6_K +blk.55.ffn_down_shexp.weight q6_K +blk.56.ffn_down_exps.weight q6_K +blk.56.ffn_down_shexp.weight q6_K +blk.57.ffn_down_exps.weight q6_K +blk.57.ffn_down_shexp.weight q6_K +blk.58.ffn_down_exps.weight q6_K +blk.58.ffn_down_shexp.weight q6_K +blk.59.ffn_down_exps.weight q6_K +blk.59.ffn_down_shexp.weight q6_K +blk.60.ffn_down_exps.weight q6_K +blk.60.ffn_down_shexp.weight q6_K + +[Q6_K] q6_K + +[IQ2_XXS] iq2_xxs +output.weight q5_K +token_embd.weight q2_K +blk.0.ffn_down.weight q2_K +blk.1.ffn_down.weight q2_K +blk.2.ffn_down.weight q2_K +blk.3.ffn_down_exps.weight q2_K +blk.3.ffn_down_shexp.weight q2_K +blk.4.ffn_down_exps.weight q2_K +blk.4.ffn_down_shexp.weight q2_K + +[IQ2_XS] iq2_xs +output.weight q5_K +token_embd.weight q2_K +blk.0.ffn_down.weight q2_K +blk.1.ffn_down.weight q2_K +blk.2.ffn_down.weight q2_K +blk.3.ffn_down_exps.weight q2_K +blk.3.ffn_down_shexp.weight q2_K +blk.4.ffn_down_exps.weight q2_K +blk.4.ffn_down_shexp.weight q2_K + +[Q2_K_S] q2_K +output.weight q6_K +blk.0.ffn_down.weight q4_K +blk.1.ffn_down.weight q4_K +blk.2.ffn_down.weight q4_K +blk.3.ffn_down_exps.weight q4_K +blk.3.ffn_down_shexp.weight q4_K +blk.4.ffn_down_exps.weight q4_K +blk.4.ffn_down_shexp.weight q4_K +blk.5.ffn_down_exps.weight q4_K +blk.5.ffn_down_shexp.weight q4_K +blk.6.ffn_down_exps.weight q4_K +blk.6.ffn_down_shexp.weight q4_K + +[IQ3_XS] iq3_s +output.weight q6_K +blk.7.ffn_gate_exps.weight iq3_xxs +blk.7.ffn_gate_shexp.weight iq3_xxs +blk.7.ffn_up_exps.weight iq3_xxs +blk.7.ffn_up_shexp.weight iq3_xxs +blk.8.ffn_gate_exps.weight iq3_xxs +blk.8.ffn_gate_shexp.weight iq3_xxs +blk.8.ffn_up_exps.weight iq3_xxs +blk.8.ffn_up_shexp.weight iq3_xxs +blk.9.ffn_gate_exps.weight iq3_xxs +blk.9.ffn_gate_shexp.weight iq3_xxs +blk.9.ffn_up_exps.weight iq3_xxs +blk.9.ffn_up_shexp.weight iq3_xxs +blk.10.ffn_gate_exps.weight iq3_xxs +blk.10.ffn_gate_shexp.weight iq3_xxs +blk.10.ffn_up_exps.weight iq3_xxs +blk.10.ffn_up_shexp.weight iq3_xxs +blk.11.ffn_gate_exps.weight iq3_xxs +blk.11.ffn_gate_shexp.weight iq3_xxs +blk.11.ffn_up_exps.weight iq3_xxs +blk.11.ffn_up_shexp.weight iq3_xxs +blk.12.ffn_gate_exps.weight iq3_xxs +blk.12.ffn_gate_shexp.weight iq3_xxs +blk.12.ffn_up_exps.weight iq3_xxs +blk.12.ffn_up_shexp.weight iq3_xxs +blk.13.ffn_gate_exps.weight iq3_xxs +blk.13.ffn_gate_shexp.weight iq3_xxs +blk.13.ffn_up_exps.weight iq3_xxs +blk.13.ffn_up_shexp.weight iq3_xxs +blk.14.ffn_gate_exps.weight iq3_xxs +blk.14.ffn_gate_shexp.weight iq3_xxs +blk.14.ffn_up_exps.weight iq3_xxs +blk.14.ffn_up_shexp.weight iq3_xxs +blk.15.ffn_gate_exps.weight iq3_xxs +blk.15.ffn_gate_shexp.weight iq3_xxs +blk.15.ffn_up_exps.weight iq3_xxs +blk.15.ffn_up_shexp.weight iq3_xxs +blk.16.ffn_gate_exps.weight iq3_xxs +blk.16.ffn_gate_shexp.weight iq3_xxs +blk.16.ffn_up_exps.weight iq3_xxs +blk.16.ffn_up_shexp.weight iq3_xxs +blk.17.ffn_gate_exps.weight iq3_xxs +blk.17.ffn_gate_shexp.weight iq3_xxs +blk.17.ffn_up_exps.weight iq3_xxs +blk.17.ffn_up_shexp.weight iq3_xxs +blk.18.ffn_gate_exps.weight iq3_xxs +blk.18.ffn_gate_shexp.weight iq3_xxs +blk.18.ffn_up_exps.weight iq3_xxs +blk.18.ffn_up_shexp.weight iq3_xxs +blk.19.ffn_gate_exps.weight iq3_xxs +blk.19.ffn_gate_shexp.weight iq3_xxs +blk.19.ffn_up_exps.weight iq3_xxs +blk.19.ffn_up_shexp.weight iq3_xxs +blk.20.ffn_gate_exps.weight iq3_xxs +blk.20.ffn_gate_shexp.weight iq3_xxs +blk.20.ffn_up_exps.weight iq3_xxs +blk.20.ffn_up_shexp.weight iq3_xxs +blk.21.ffn_gate_exps.weight iq3_xxs +blk.21.ffn_gate_shexp.weight iq3_xxs +blk.21.ffn_up_exps.weight iq3_xxs +blk.21.ffn_up_shexp.weight iq3_xxs +blk.22.ffn_gate_exps.weight iq3_xxs +blk.22.ffn_gate_shexp.weight iq3_xxs +blk.22.ffn_up_exps.weight iq3_xxs +blk.22.ffn_up_shexp.weight iq3_xxs +blk.23.ffn_gate_exps.weight iq3_xxs +blk.23.ffn_gate_shexp.weight iq3_xxs +blk.23.ffn_up_exps.weight iq3_xxs +blk.23.ffn_up_shexp.weight iq3_xxs +blk.24.ffn_gate_exps.weight iq3_xxs +blk.24.ffn_gate_shexp.weight iq3_xxs +blk.24.ffn_up_exps.weight iq3_xxs +blk.24.ffn_up_shexp.weight iq3_xxs +blk.25.ffn_gate_exps.weight iq3_xxs +blk.25.ffn_gate_shexp.weight iq3_xxs +blk.25.ffn_up_exps.weight iq3_xxs +blk.25.ffn_up_shexp.weight iq3_xxs +blk.26.ffn_gate_exps.weight iq3_xxs +blk.26.ffn_gate_shexp.weight iq3_xxs +blk.26.ffn_up_exps.weight iq3_xxs +blk.26.ffn_up_shexp.weight iq3_xxs +blk.27.ffn_gate_exps.weight iq3_xxs +blk.27.ffn_gate_shexp.weight iq3_xxs +blk.27.ffn_up_exps.weight iq3_xxs +blk.27.ffn_up_shexp.weight iq3_xxs +blk.28.ffn_gate_exps.weight iq3_xxs +blk.28.ffn_gate_shexp.weight iq3_xxs +blk.28.ffn_up_exps.weight iq3_xxs +blk.28.ffn_up_shexp.weight iq3_xxs +blk.29.ffn_gate_exps.weight iq3_xxs +blk.29.ffn_gate_shexp.weight iq3_xxs +blk.29.ffn_up_exps.weight iq3_xxs +blk.29.ffn_up_shexp.weight iq3_xxs +blk.30.ffn_gate_exps.weight iq3_xxs +blk.30.ffn_gate_shexp.weight iq3_xxs +blk.30.ffn_up_exps.weight iq3_xxs +blk.30.ffn_up_shexp.weight iq3_xxs +blk.31.ffn_gate_exps.weight iq3_xxs +blk.31.ffn_gate_shexp.weight iq3_xxs +blk.31.ffn_up_exps.weight iq3_xxs +blk.31.ffn_up_shexp.weight iq3_xxs +blk.32.ffn_gate_exps.weight iq3_xxs +blk.32.ffn_gate_shexp.weight iq3_xxs +blk.32.ffn_up_exps.weight iq3_xxs +blk.32.ffn_up_shexp.weight iq3_xxs +blk.33.ffn_gate_exps.weight iq3_xxs +blk.33.ffn_gate_shexp.weight iq3_xxs +blk.33.ffn_up_exps.weight iq3_xxs +blk.33.ffn_up_shexp.weight iq3_xxs +blk.34.ffn_gate_exps.weight iq3_xxs +blk.34.ffn_gate_shexp.weight iq3_xxs +blk.34.ffn_up_exps.weight iq3_xxs +blk.34.ffn_up_shexp.weight iq3_xxs +blk.35.ffn_gate_exps.weight iq3_xxs +blk.35.ffn_gate_shexp.weight iq3_xxs +blk.35.ffn_up_exps.weight iq3_xxs +blk.35.ffn_up_shexp.weight iq3_xxs +blk.36.ffn_gate_exps.weight iq3_xxs +blk.36.ffn_gate_shexp.weight iq3_xxs +blk.36.ffn_up_exps.weight iq3_xxs +blk.36.ffn_up_shexp.weight iq3_xxs +blk.37.ffn_gate_exps.weight iq3_xxs +blk.37.ffn_gate_shexp.weight iq3_xxs +blk.37.ffn_up_exps.weight iq3_xxs +blk.37.ffn_up_shexp.weight iq3_xxs +blk.38.ffn_gate_exps.weight iq3_xxs +blk.38.ffn_gate_shexp.weight iq3_xxs +blk.38.ffn_up_exps.weight iq3_xxs +blk.38.ffn_up_shexp.weight iq3_xxs +blk.39.ffn_gate_exps.weight iq3_xxs +blk.39.ffn_gate_shexp.weight iq3_xxs +blk.39.ffn_up_exps.weight iq3_xxs +blk.39.ffn_up_shexp.weight iq3_xxs +blk.40.ffn_gate_exps.weight iq3_xxs +blk.40.ffn_gate_shexp.weight iq3_xxs +blk.40.ffn_up_exps.weight iq3_xxs +blk.40.ffn_up_shexp.weight iq3_xxs +blk.41.ffn_gate_exps.weight iq3_xxs +blk.41.ffn_gate_shexp.weight iq3_xxs +blk.41.ffn_up_exps.weight iq3_xxs +blk.41.ffn_up_shexp.weight iq3_xxs +blk.42.ffn_gate_exps.weight iq3_xxs +blk.42.ffn_gate_shexp.weight iq3_xxs +blk.42.ffn_up_exps.weight iq3_xxs +blk.42.ffn_up_shexp.weight iq3_xxs +blk.43.ffn_gate_exps.weight iq3_xxs +blk.43.ffn_gate_shexp.weight iq3_xxs +blk.43.ffn_up_exps.weight iq3_xxs +blk.43.ffn_up_shexp.weight iq3_xxs +blk.44.ffn_gate_exps.weight iq3_xxs +blk.44.ffn_gate_shexp.weight iq3_xxs +blk.44.ffn_up_exps.weight iq3_xxs +blk.44.ffn_up_shexp.weight iq3_xxs +blk.45.ffn_gate_exps.weight iq3_xxs +blk.45.ffn_gate_shexp.weight iq3_xxs +blk.45.ffn_up_exps.weight iq3_xxs +blk.45.ffn_up_shexp.weight iq3_xxs +blk.46.ffn_gate_exps.weight iq3_xxs +blk.46.ffn_gate_shexp.weight iq3_xxs +blk.46.ffn_up_exps.weight iq3_xxs +blk.46.ffn_up_shexp.weight iq3_xxs +blk.47.ffn_gate_exps.weight iq3_xxs +blk.47.ffn_gate_shexp.weight iq3_xxs +blk.47.ffn_up_exps.weight iq3_xxs +blk.47.ffn_up_shexp.weight iq3_xxs +blk.48.ffn_gate_exps.weight iq3_xxs +blk.48.ffn_gate_shexp.weight iq3_xxs +blk.48.ffn_up_exps.weight iq3_xxs +blk.48.ffn_up_shexp.weight iq3_xxs +blk.49.ffn_gate_exps.weight iq3_xxs +blk.49.ffn_gate_shexp.weight iq3_xxs +blk.49.ffn_up_exps.weight iq3_xxs +blk.49.ffn_up_shexp.weight iq3_xxs +blk.50.ffn_gate_exps.weight iq3_xxs +blk.50.ffn_gate_shexp.weight iq3_xxs +blk.50.ffn_up_exps.weight iq3_xxs +blk.50.ffn_up_shexp.weight iq3_xxs +blk.51.ffn_gate_exps.weight iq3_xxs +blk.51.ffn_gate_shexp.weight iq3_xxs +blk.51.ffn_up_exps.weight iq3_xxs +blk.51.ffn_up_shexp.weight iq3_xxs +blk.52.ffn_gate_exps.weight iq3_xxs +blk.52.ffn_gate_shexp.weight iq3_xxs +blk.52.ffn_up_exps.weight iq3_xxs +blk.52.ffn_up_shexp.weight iq3_xxs + +[IQ3_XXS] iq3_xxs +output.weight q5_K +token_embd.weight iq3_s +blk.0.attn_output.weight iq3_s +blk.0.ffn_down.weight q4_K +blk.1.attn_output.weight iq3_s +blk.1.ffn_down.weight q4_K +blk.2.attn_output.weight iq3_s +blk.2.ffn_down.weight q4_K +blk.3.attn_output.weight iq3_s +blk.3.ffn_down_exps.weight q4_K +blk.3.ffn_down_shexp.weight q4_K +blk.4.attn_output.weight iq3_s +blk.4.ffn_down_exps.weight q4_K +blk.4.ffn_down_shexp.weight q4_K +blk.5.attn_output.weight iq3_s +blk.5.ffn_down_exps.weight q4_K +blk.5.ffn_down_shexp.weight q4_K +blk.6.attn_output.weight iq3_s +blk.6.ffn_down_exps.weight q4_K +blk.6.ffn_down_shexp.weight q4_K +blk.7.attn_output.weight iq3_s +blk.7.ffn_down_exps.weight q3_K +blk.7.ffn_down_shexp.weight q3_K +blk.8.attn_output.weight iq3_s +blk.8.ffn_down_exps.weight q3_K +blk.8.ffn_down_shexp.weight q3_K +blk.9.attn_output.weight iq3_s +blk.9.ffn_down_exps.weight q3_K +blk.9.ffn_down_shexp.weight q3_K +blk.10.attn_output.weight iq3_s +blk.10.ffn_down_exps.weight q3_K +blk.10.ffn_down_shexp.weight q3_K +blk.11.attn_output.weight iq3_s +blk.11.ffn_down_exps.weight q3_K +blk.11.ffn_down_shexp.weight q3_K +blk.12.attn_output.weight iq3_s +blk.12.ffn_down_exps.weight q3_K +blk.12.ffn_down_shexp.weight q3_K +blk.13.attn_output.weight iq3_s +blk.13.ffn_down_exps.weight q3_K +blk.13.ffn_down_shexp.weight q3_K +blk.14.attn_output.weight iq3_s +blk.14.ffn_down_exps.weight q3_K +blk.14.ffn_down_shexp.weight q3_K +blk.15.attn_output.weight iq3_s +blk.15.ffn_down_exps.weight q3_K +blk.15.ffn_down_shexp.weight q3_K +blk.16.attn_output.weight iq3_s +blk.16.ffn_down_exps.weight q3_K +blk.16.ffn_down_shexp.weight q3_K +blk.17.attn_output.weight iq3_s +blk.17.ffn_down_exps.weight q3_K +blk.17.ffn_down_shexp.weight q3_K +blk.18.attn_output.weight iq3_s +blk.18.ffn_down_exps.weight q3_K +blk.18.ffn_down_shexp.weight q3_K +blk.19.attn_output.weight iq3_s +blk.19.ffn_down_exps.weight q3_K +blk.19.ffn_down_shexp.weight q3_K +blk.20.attn_output.weight iq3_s +blk.20.ffn_down_exps.weight q3_K +blk.20.ffn_down_shexp.weight q3_K +blk.21.attn_output.weight iq3_s +blk.21.ffn_down_exps.weight q3_K +blk.21.ffn_down_shexp.weight q3_K +blk.22.attn_output.weight iq3_s +blk.22.ffn_down_exps.weight q3_K +blk.22.ffn_down_shexp.weight q3_K +blk.23.attn_output.weight iq3_s +blk.23.ffn_down_exps.weight q3_K +blk.23.ffn_down_shexp.weight q3_K +blk.24.attn_output.weight iq3_s +blk.24.ffn_down_exps.weight q3_K +blk.24.ffn_down_shexp.weight q3_K +blk.25.attn_output.weight iq3_s +blk.25.ffn_down_exps.weight q3_K +blk.25.ffn_down_shexp.weight q3_K +blk.26.attn_output.weight iq3_s +blk.26.ffn_down_exps.weight q3_K +blk.26.ffn_down_shexp.weight q3_K +blk.27.attn_output.weight iq3_s +blk.27.ffn_down_exps.weight q3_K +blk.27.ffn_down_shexp.weight q3_K +blk.28.attn_output.weight iq3_s +blk.28.ffn_down_exps.weight q3_K +blk.28.ffn_down_shexp.weight q3_K +blk.29.attn_output.weight iq3_s +blk.29.ffn_down_exps.weight q3_K +blk.29.ffn_down_shexp.weight q3_K +blk.30.attn_output.weight iq3_s +blk.30.ffn_down_exps.weight q3_K +blk.30.ffn_down_shexp.weight q3_K +blk.31.attn_output.weight iq3_s +blk.31.ffn_down_exps.weight q3_K +blk.31.ffn_down_shexp.weight q3_K +blk.32.attn_output.weight iq3_s +blk.32.ffn_down_exps.weight q3_K +blk.32.ffn_down_shexp.weight q3_K +blk.33.attn_output.weight iq3_s +blk.33.ffn_down_exps.weight q3_K +blk.33.ffn_down_shexp.weight q3_K +blk.34.attn_output.weight iq3_s +blk.34.ffn_down_exps.weight q3_K +blk.34.ffn_down_shexp.weight q3_K +blk.35.attn_output.weight iq3_s +blk.35.ffn_down_exps.weight q3_K +blk.35.ffn_down_shexp.weight q3_K +blk.36.attn_output.weight iq3_s +blk.36.ffn_down_exps.weight q3_K +blk.36.ffn_down_shexp.weight q3_K +blk.37.attn_output.weight iq3_s +blk.37.ffn_down_exps.weight q3_K +blk.37.ffn_down_shexp.weight q3_K +blk.38.attn_output.weight iq3_s +blk.38.ffn_down_exps.weight q3_K +blk.38.ffn_down_shexp.weight q3_K +blk.39.attn_output.weight iq3_s +blk.39.ffn_down_exps.weight q3_K +blk.39.ffn_down_shexp.weight q3_K +blk.40.attn_output.weight iq3_s +blk.40.ffn_down_exps.weight q3_K +blk.40.ffn_down_shexp.weight q3_K +blk.41.attn_output.weight iq3_s +blk.41.ffn_down_exps.weight q3_K +blk.41.ffn_down_shexp.weight q3_K +blk.42.attn_output.weight iq3_s +blk.42.ffn_down_exps.weight q3_K +blk.42.ffn_down_shexp.weight q3_K +blk.43.attn_output.weight iq3_s +blk.43.ffn_down_exps.weight q3_K +blk.43.ffn_down_shexp.weight q3_K +blk.44.attn_output.weight iq3_s +blk.44.ffn_down_exps.weight q3_K +blk.44.ffn_down_shexp.weight q3_K +blk.45.attn_output.weight iq3_s +blk.45.ffn_down_exps.weight q3_K +blk.45.ffn_down_shexp.weight q3_K +blk.46.attn_output.weight iq3_s +blk.46.ffn_down_exps.weight q3_K +blk.46.ffn_down_shexp.weight q3_K +blk.47.attn_output.weight iq3_s +blk.47.ffn_down_exps.weight q3_K +blk.47.ffn_down_shexp.weight q3_K +blk.48.attn_output.weight iq3_s +blk.48.ffn_down_exps.weight q3_K +blk.48.ffn_down_shexp.weight q3_K +blk.49.attn_output.weight iq3_s +blk.49.ffn_down_exps.weight q3_K +blk.49.ffn_down_shexp.weight q3_K +blk.50.attn_output.weight iq3_s +blk.50.ffn_down_exps.weight q3_K +blk.50.ffn_down_shexp.weight q3_K +blk.51.attn_output.weight iq3_s +blk.51.ffn_down_exps.weight q3_K +blk.51.ffn_down_shexp.weight q3_K +blk.52.attn_output.weight iq3_s +blk.52.ffn_down_exps.weight q3_K +blk.52.ffn_down_shexp.weight q3_K +blk.53.attn_output.weight iq3_s +blk.53.ffn_down_exps.weight q3_K +blk.53.ffn_down_shexp.weight q3_K +blk.54.attn_output.weight iq3_s +blk.54.ffn_down_exps.weight q3_K +blk.54.ffn_down_shexp.weight q3_K +blk.55.attn_output.weight iq3_s +blk.55.ffn_down_exps.weight q3_K +blk.55.ffn_down_shexp.weight q3_K +blk.56.attn_output.weight iq3_s +blk.56.ffn_down_exps.weight q3_K +blk.56.ffn_down_shexp.weight q3_K +blk.57.attn_output.weight iq3_s +blk.57.ffn_down_exps.weight q3_K +blk.57.ffn_down_shexp.weight q3_K +blk.58.attn_output.weight iq3_s +blk.58.ffn_down_exps.weight q3_K +blk.58.ffn_down_shexp.weight q3_K +blk.59.attn_output.weight iq3_s +blk.59.ffn_down_exps.weight q3_K +blk.59.ffn_down_shexp.weight q3_K +blk.60.attn_output.weight iq3_s +blk.60.ffn_down_exps.weight q3_K +blk.60.ffn_down_shexp.weight q3_K + +[IQ1_S] iq1_s +output.weight q5_K +token_embd.weight q2_K +blk.0.attn_output.weight iq2_xxs +blk.0.ffn_down.weight q2_K +blk.1.attn_output.weight iq2_xxs +blk.1.ffn_down.weight q2_K +blk.2.attn_output.weight iq2_xxs +blk.2.ffn_down.weight q2_K +blk.3.attn_output.weight iq2_xxs +blk.3.ffn_down_exps.weight q2_K +blk.3.ffn_down_shexp.weight q2_K +blk.4.attn_output.weight iq2_xxs +blk.4.ffn_down_exps.weight q2_K +blk.4.ffn_down_shexp.weight q2_K +blk.5.attn_output.weight iq2_xxs +blk.6.attn_output.weight iq2_xxs +blk.7.attn_output.weight iq2_xxs +blk.8.attn_output.weight iq2_xxs +blk.9.attn_output.weight iq2_xxs +blk.10.attn_output.weight iq2_xxs +blk.11.attn_output.weight iq2_xxs +blk.12.attn_output.weight iq2_xxs +blk.13.attn_output.weight iq2_xxs +blk.14.attn_output.weight iq2_xxs +blk.15.attn_output.weight iq2_xxs +blk.16.attn_output.weight iq2_xxs +blk.17.attn_output.weight iq2_xxs +blk.18.attn_output.weight iq2_xxs +blk.19.attn_output.weight iq2_xxs +blk.20.attn_output.weight iq2_xxs +blk.21.attn_output.weight iq2_xxs +blk.22.attn_output.weight iq2_xxs +blk.23.attn_output.weight iq2_xxs +blk.24.attn_output.weight iq2_xxs +blk.25.attn_output.weight iq2_xxs +blk.26.attn_output.weight iq2_xxs +blk.27.attn_output.weight iq2_xxs +blk.28.attn_output.weight iq2_xxs +blk.29.attn_output.weight iq2_xxs +blk.30.attn_output.weight iq2_xxs +blk.31.attn_output.weight iq2_xxs +blk.32.attn_output.weight iq2_xxs +blk.33.attn_output.weight iq2_xxs +blk.34.attn_output.weight iq2_xxs +blk.35.attn_output.weight iq2_xxs +blk.36.attn_output.weight iq2_xxs +blk.37.attn_output.weight iq2_xxs +blk.38.attn_output.weight iq2_xxs +blk.39.attn_output.weight iq2_xxs +blk.40.attn_output.weight iq2_xxs +blk.41.attn_output.weight iq2_xxs +blk.42.attn_output.weight iq2_xxs +blk.43.attn_output.weight iq2_xxs +blk.44.attn_output.weight iq2_xxs +blk.45.attn_output.weight iq2_xxs +blk.46.attn_output.weight iq2_xxs +blk.47.attn_output.weight iq2_xxs +blk.48.attn_output.weight iq2_xxs +blk.49.attn_output.weight iq2_xxs +blk.50.attn_output.weight iq2_xxs +blk.51.attn_output.weight iq2_xxs +blk.52.attn_output.weight iq2_xxs +blk.53.attn_output.weight iq2_xxs +blk.54.attn_output.weight iq2_xxs +blk.55.attn_output.weight iq2_xxs +blk.56.attn_output.weight iq2_xxs +blk.57.attn_output.weight iq2_xxs +blk.58.attn_output.weight iq2_xxs +blk.59.attn_output.weight iq2_xxs +blk.60.attn_output.weight iq2_xxs + +[IQ4_NL] iq4_nl +output.weight q6_K +blk.0.ffn_down.weight q5_K +blk.1.ffn_down.weight q5_K +blk.2.ffn_down.weight q5_K +blk.3.ffn_down_exps.weight q5_K +blk.3.ffn_down_shexp.weight q5_K +blk.4.ffn_down_exps.weight q5_K +blk.4.ffn_down_shexp.weight q5_K +blk.5.ffn_down_exps.weight q5_K +blk.5.ffn_down_shexp.weight q5_K +blk.6.ffn_down_exps.weight q5_K +blk.6.ffn_down_shexp.weight q5_K + +[IQ3_S] iq3_s +output.weight q6_K + +[IQ3_M] iq3_s +output.weight q6_K +blk.0.attn_output.weight q4_K +blk.0.ffn_down.weight q4_K +blk.1.attn_output.weight q4_K +blk.1.ffn_down.weight q4_K +blk.2.attn_output.weight q4_K +blk.2.ffn_down.weight q4_K +blk.3.attn_output.weight q4_K +blk.3.ffn_down_exps.weight q4_K +blk.3.ffn_down_shexp.weight q4_K +blk.4.attn_output.weight q4_K +blk.4.ffn_down_exps.weight q4_K +blk.4.ffn_down_shexp.weight q4_K +blk.5.attn_output.weight q4_K +blk.5.ffn_down_exps.weight q4_K +blk.5.ffn_down_shexp.weight q4_K +blk.6.attn_output.weight q4_K +blk.6.ffn_down_exps.weight q4_K +blk.6.ffn_down_shexp.weight q4_K +blk.7.attn_output.weight q4_K +blk.8.attn_output.weight q4_K +blk.9.attn_output.weight q4_K +blk.10.attn_output.weight q4_K +blk.11.attn_output.weight q4_K +blk.12.attn_output.weight q4_K +blk.13.attn_output.weight q4_K +blk.14.attn_output.weight q4_K +blk.15.attn_output.weight q4_K +blk.16.attn_output.weight q4_K +blk.17.attn_output.weight q4_K +blk.18.attn_output.weight q4_K +blk.19.attn_output.weight q4_K +blk.20.attn_output.weight q4_K +blk.21.attn_output.weight q4_K +blk.22.attn_output.weight q4_K +blk.23.attn_output.weight q4_K +blk.24.attn_output.weight q4_K +blk.25.attn_output.weight q4_K +blk.26.attn_output.weight q4_K +blk.27.attn_output.weight q4_K +blk.28.attn_output.weight q4_K +blk.29.attn_output.weight q4_K +blk.30.attn_output.weight q4_K +blk.31.attn_output.weight q4_K +blk.32.attn_output.weight q4_K +blk.33.attn_output.weight q4_K +blk.34.attn_output.weight q4_K +blk.35.attn_output.weight q4_K +blk.36.attn_output.weight q4_K +blk.37.attn_output.weight q4_K +blk.38.attn_output.weight q4_K +blk.39.attn_output.weight q4_K +blk.40.attn_output.weight q4_K +blk.41.attn_output.weight q4_K +blk.42.attn_output.weight q4_K +blk.43.attn_output.weight q4_K +blk.44.attn_output.weight q4_K +blk.45.attn_output.weight q4_K +blk.46.attn_output.weight q4_K +blk.47.attn_output.weight q4_K +blk.48.attn_output.weight q4_K +blk.49.attn_output.weight q4_K +blk.50.attn_output.weight q4_K +blk.51.attn_output.weight q4_K +blk.52.attn_output.weight q4_K +blk.53.attn_output.weight q4_K +blk.54.attn_output.weight q4_K +blk.55.attn_output.weight q4_K +blk.56.attn_output.weight q4_K +blk.57.attn_output.weight q4_K +blk.58.attn_output.weight q4_K +blk.59.attn_output.weight q4_K +blk.60.attn_output.weight q4_K + +[IQ2_S] iq2_xs +output.weight q5_K +token_embd.weight iq3_s +blk.0.attn_output.weight iq3_s +blk.0.ffn_down.weight iq3_s +blk.1.attn_output.weight iq3_s +blk.1.ffn_down.weight iq3_s +blk.2.attn_output.weight iq3_s +blk.2.ffn_down.weight iq3_s +blk.3.attn_output.weight iq3_s +blk.3.ffn_down_exps.weight iq3_s +blk.3.ffn_down_shexp.weight iq3_s +blk.4.attn_output.weight iq3_s +blk.4.ffn_down_exps.weight iq3_s +blk.4.ffn_down_shexp.weight iq3_s +blk.5.attn_output.weight iq3_s +blk.6.attn_output.weight iq3_s +blk.7.attn_output.weight iq3_s +blk.8.attn_output.weight iq3_s +blk.9.attn_output.weight iq3_s +blk.10.attn_output.weight iq3_s +blk.11.attn_output.weight iq3_s +blk.12.attn_output.weight iq3_s +blk.13.attn_output.weight iq3_s +blk.14.attn_output.weight iq3_s +blk.15.attn_output.weight iq3_s +blk.16.attn_output.weight iq3_s +blk.17.attn_output.weight iq3_s +blk.18.attn_output.weight iq3_s +blk.19.attn_output.weight iq3_s +blk.20.attn_output.weight iq3_s +blk.21.attn_output.weight iq3_s +blk.22.attn_output.weight iq3_s +blk.23.attn_output.weight iq3_s +blk.24.attn_output.weight iq3_s +blk.25.attn_output.weight iq3_s +blk.26.attn_output.weight iq3_s +blk.27.attn_output.weight iq3_s +blk.28.attn_output.weight iq3_s +blk.29.attn_output.weight iq3_s +blk.30.attn_output.weight iq3_s +blk.31.attn_output.weight iq3_s +blk.32.attn_output.weight iq3_s +blk.33.attn_output.weight iq3_s +blk.34.attn_output.weight iq3_s +blk.35.attn_output.weight iq3_s +blk.36.attn_output.weight iq3_s +blk.37.attn_output.weight iq3_s +blk.38.attn_output.weight iq3_s +blk.39.attn_output.weight iq3_s +blk.40.attn_output.weight iq3_s +blk.41.attn_output.weight iq3_s +blk.42.attn_output.weight iq3_s +blk.43.attn_output.weight iq3_s +blk.44.attn_output.weight iq3_s +blk.45.attn_output.weight iq3_s +blk.46.attn_output.weight iq3_s +blk.47.attn_output.weight iq3_s +blk.48.attn_output.weight iq3_s +blk.49.attn_output.weight iq3_s +blk.50.attn_output.weight iq3_s +blk.51.attn_output.weight iq3_s +blk.52.attn_output.weight iq3_s +blk.53.attn_output.weight iq3_s +blk.54.attn_output.weight iq3_s +blk.55.attn_output.weight iq3_s +blk.56.attn_output.weight iq3_s +blk.57.attn_output.weight iq3_s +blk.58.attn_output.weight iq3_s +blk.59.attn_output.weight iq3_s +blk.60.attn_output.weight iq3_s + +[IQ2_M] iq2_s +output.weight q5_K +token_embd.weight iq3_s +blk.0.attn_output.weight iq3_s +blk.0.ffn_down.weight iq3_s +blk.1.attn_output.weight iq3_s +blk.1.ffn_down.weight iq3_s +blk.2.attn_output.weight iq3_s +blk.2.ffn_down.weight iq3_s +blk.3.attn_output.weight iq3_s +blk.3.ffn_down_exps.weight iq3_s +blk.3.ffn_down_shexp.weight iq3_s +blk.4.attn_output.weight iq3_s +blk.4.ffn_down_exps.weight iq3_s +blk.4.ffn_down_shexp.weight iq3_s +blk.5.attn_output.weight iq3_s +blk.6.attn_output.weight iq3_s +blk.7.attn_output.weight iq3_s +blk.8.attn_output.weight iq3_s +blk.9.attn_output.weight iq3_s +blk.10.attn_output.weight iq3_s +blk.11.attn_output.weight iq3_s +blk.12.attn_output.weight iq3_s +blk.13.attn_output.weight iq3_s +blk.14.attn_output.weight iq3_s +blk.15.attn_output.weight iq3_s +blk.16.attn_output.weight iq3_s +blk.17.attn_output.weight iq3_s +blk.18.attn_output.weight iq3_s +blk.19.attn_output.weight iq3_s +blk.20.attn_output.weight iq3_s +blk.21.attn_output.weight iq3_s +blk.22.attn_output.weight iq3_s +blk.23.attn_output.weight iq3_s +blk.24.attn_output.weight iq3_s +blk.25.attn_output.weight iq3_s +blk.26.attn_output.weight iq3_s +blk.27.attn_output.weight iq3_s +blk.28.attn_output.weight iq3_s +blk.29.attn_output.weight iq3_s +blk.30.attn_output.weight iq3_s +blk.31.attn_output.weight iq3_s +blk.32.attn_output.weight iq3_s +blk.33.attn_output.weight iq3_s +blk.34.attn_output.weight iq3_s +blk.35.attn_output.weight iq3_s +blk.36.attn_output.weight iq3_s +blk.37.attn_output.weight iq3_s +blk.38.attn_output.weight iq3_s +blk.39.attn_output.weight iq3_s +blk.40.attn_output.weight iq3_s +blk.41.attn_output.weight iq3_s +blk.42.attn_output.weight iq3_s +blk.43.attn_output.weight iq3_s +blk.44.attn_output.weight iq3_s +blk.45.attn_output.weight iq3_s +blk.46.attn_output.weight iq3_s +blk.47.attn_output.weight iq3_s +blk.48.attn_output.weight iq3_s +blk.49.attn_output.weight iq3_s +blk.50.attn_output.weight iq3_s +blk.51.attn_output.weight iq3_s +blk.52.attn_output.weight iq3_s +blk.53.attn_output.weight iq3_s +blk.54.attn_output.weight iq3_s +blk.55.attn_output.weight iq3_s +blk.56.attn_output.weight iq3_s +blk.57.attn_output.weight iq3_s +blk.58.attn_output.weight iq3_s +blk.59.attn_output.weight iq3_s +blk.60.attn_output.weight iq3_s + +[IQ4_XS] iq4_xs +output.weight q6_K +blk.0.ffn_down.weight q5_K +blk.1.ffn_down.weight q5_K +blk.2.ffn_down.weight q5_K +blk.3.ffn_down_exps.weight q5_K +blk.3.ffn_down_shexp.weight q5_K +blk.4.ffn_down_exps.weight q5_K +blk.4.ffn_down_shexp.weight q5_K +blk.5.ffn_down_exps.weight q5_K +blk.5.ffn_down_shexp.weight q5_K +blk.6.ffn_down_exps.weight q5_K +blk.6.ffn_down_shexp.weight q5_K + +[IQ1_M] iq1_m +output.weight q5_K +token_embd.weight q2_K +blk.0.attn_output.weight iq2_xxs +blk.0.ffn_down.weight q2_K +blk.1.attn_output.weight iq2_xxs +blk.1.ffn_down.weight q2_K +blk.2.attn_output.weight iq2_xxs +blk.2.ffn_down.weight q2_K +blk.3.attn_output.weight iq2_xxs +blk.3.ffn_down_exps.weight q2_K +blk.3.ffn_down_shexp.weight q2_K +blk.4.attn_output.weight iq2_xxs +blk.4.ffn_down_exps.weight q2_K +blk.4.ffn_down_shexp.weight q2_K +blk.5.attn_output.weight iq2_xxs +blk.6.attn_output.weight iq2_xxs +blk.7.attn_output.weight iq2_xxs +blk.8.attn_output.weight iq2_xxs +blk.9.attn_output.weight iq2_xxs +blk.10.attn_output.weight iq2_xxs +blk.11.attn_output.weight iq2_xxs +blk.12.attn_output.weight iq2_xxs +blk.13.attn_output.weight iq2_xxs +blk.14.attn_output.weight iq2_xxs +blk.15.attn_output.weight iq2_xxs +blk.16.attn_output.weight iq2_xxs +blk.17.attn_output.weight iq2_xxs +blk.18.attn_output.weight iq2_xxs +blk.19.attn_output.weight iq2_xxs +blk.20.attn_output.weight iq2_xxs +blk.21.attn_output.weight iq2_xxs +blk.22.attn_output.weight iq2_xxs +blk.23.attn_output.weight iq2_xxs +blk.24.attn_output.weight iq2_xxs +blk.25.attn_output.weight iq2_xxs +blk.26.attn_output.weight iq2_xxs +blk.27.attn_output.weight iq2_xxs +blk.28.attn_output.weight iq2_xxs +blk.29.attn_output.weight iq2_xxs +blk.30.attn_output.weight iq2_xxs +blk.31.attn_output.weight iq2_xxs +blk.32.attn_output.weight iq2_xxs +blk.33.attn_output.weight iq2_xxs +blk.34.attn_output.weight iq2_xxs +blk.35.attn_output.weight iq2_xxs +blk.36.attn_output.weight iq2_xxs +blk.37.attn_output.weight iq2_xxs +blk.38.attn_output.weight iq2_xxs +blk.39.attn_output.weight iq2_xxs +blk.40.attn_output.weight iq2_xxs +blk.41.attn_output.weight iq2_xxs +blk.42.attn_output.weight iq2_xxs +blk.43.attn_output.weight iq2_xxs +blk.44.attn_output.weight iq2_xxs +blk.45.attn_output.weight iq2_xxs +blk.46.attn_output.weight iq2_xxs +blk.47.attn_output.weight iq2_xxs +blk.48.attn_output.weight iq2_xxs +blk.49.attn_output.weight iq2_xxs +blk.50.attn_output.weight iq2_xxs +blk.51.attn_output.weight iq2_xxs +blk.52.attn_output.weight iq2_xxs +blk.53.attn_output.weight iq2_xxs +blk.54.attn_output.weight iq2_xxs +blk.55.attn_output.weight iq2_xxs +blk.56.attn_output.weight iq2_xxs +blk.57.attn_output.weight iq2_xxs +blk.58.attn_output.weight iq2_xxs +blk.59.attn_output.weight iq2_xxs +blk.60.attn_output.weight iq2_xxs + +[BF16] bf16 +output.weight q6_K + +[TQ1_0] tq1_0 +output.weight q6_K +token_embd.weight q4_K + +[TQ2_0] tq2_0 +output.weight q6_K +token_embd.weight q4_K + +[MXFP4_MOE] mxfp4 +output.weight q8_0 +token_embd.weight q8_0 +blk.0.attn_kv_a_mqa.weight q8_0 +blk.0.attn_output.weight q8_0 +blk.0.attn_q_a.weight q8_0 +blk.0.attn_q_b.weight q8_0 +blk.0.ffn_down.weight q8_0 +blk.0.ffn_gate.weight q8_0 +blk.0.ffn_up.weight q8_0 +blk.1.attn_kv_a_mqa.weight q8_0 +blk.1.attn_output.weight q8_0 +blk.1.attn_q_a.weight q8_0 +blk.1.attn_q_b.weight q8_0 +blk.1.ffn_down.weight q8_0 +blk.1.ffn_gate.weight q8_0 +blk.1.ffn_up.weight q8_0 +blk.2.attn_kv_a_mqa.weight q8_0 +blk.2.attn_output.weight q8_0 +blk.2.attn_q_a.weight q8_0 +blk.2.attn_q_b.weight q8_0 +blk.2.ffn_down.weight q8_0 +blk.2.ffn_gate.weight q8_0 +blk.2.ffn_up.weight q8_0 +blk.3.attn_kv_a_mqa.weight q8_0 +blk.3.attn_output.weight q8_0 +blk.3.attn_q_a.weight q8_0 +blk.3.attn_q_b.weight q8_0 +blk.3.ffn_down_shexp.weight q8_0 +blk.3.ffn_gate_shexp.weight q8_0 +blk.3.ffn_up_shexp.weight q8_0 +blk.4.attn_kv_a_mqa.weight q8_0 +blk.4.attn_output.weight q8_0 +blk.4.attn_q_a.weight q8_0 +blk.4.attn_q_b.weight q8_0 +blk.4.ffn_down_shexp.weight q8_0 +blk.4.ffn_gate_shexp.weight q8_0 +blk.4.ffn_up_shexp.weight q8_0 +blk.5.attn_kv_a_mqa.weight q8_0 +blk.5.attn_output.weight q8_0 +blk.5.attn_q_a.weight q8_0 +blk.5.attn_q_b.weight q8_0 +blk.5.ffn_down_shexp.weight q8_0 +blk.5.ffn_gate_shexp.weight q8_0 +blk.5.ffn_up_shexp.weight q8_0 +blk.6.attn_kv_a_mqa.weight q8_0 +blk.6.attn_output.weight q8_0 +blk.6.attn_q_a.weight q8_0 +blk.6.attn_q_b.weight q8_0 +blk.6.ffn_down_shexp.weight q8_0 +blk.6.ffn_gate_shexp.weight q8_0 +blk.6.ffn_up_shexp.weight q8_0 +blk.7.attn_kv_a_mqa.weight q8_0 +blk.7.attn_output.weight q8_0 +blk.7.attn_q_a.weight q8_0 +blk.7.attn_q_b.weight q8_0 +blk.7.ffn_down_shexp.weight q8_0 +blk.7.ffn_gate_shexp.weight q8_0 +blk.7.ffn_up_shexp.weight q8_0 +blk.8.attn_kv_a_mqa.weight q8_0 +blk.8.attn_output.weight q8_0 +blk.8.attn_q_a.weight q8_0 +blk.8.attn_q_b.weight q8_0 +blk.8.ffn_down_shexp.weight q8_0 +blk.8.ffn_gate_shexp.weight q8_0 +blk.8.ffn_up_shexp.weight q8_0 +blk.9.attn_kv_a_mqa.weight q8_0 +blk.9.attn_output.weight q8_0 +blk.9.attn_q_a.weight q8_0 +blk.9.attn_q_b.weight q8_0 +blk.9.ffn_down_shexp.weight q8_0 +blk.9.ffn_gate_shexp.weight q8_0 +blk.9.ffn_up_shexp.weight q8_0 +blk.10.attn_kv_a_mqa.weight q8_0 +blk.10.attn_output.weight q8_0 +blk.10.attn_q_a.weight q8_0 +blk.10.attn_q_b.weight q8_0 +blk.10.ffn_down_shexp.weight q8_0 +blk.10.ffn_gate_shexp.weight q8_0 +blk.10.ffn_up_shexp.weight q8_0 +blk.11.attn_kv_a_mqa.weight q8_0 +blk.11.attn_output.weight q8_0 +blk.11.attn_q_a.weight q8_0 +blk.11.attn_q_b.weight q8_0 +blk.11.ffn_down_shexp.weight q8_0 +blk.11.ffn_gate_shexp.weight q8_0 +blk.11.ffn_up_shexp.weight q8_0 +blk.12.attn_kv_a_mqa.weight q8_0 +blk.12.attn_output.weight q8_0 +blk.12.attn_q_a.weight q8_0 +blk.12.attn_q_b.weight q8_0 +blk.12.ffn_down_shexp.weight q8_0 +blk.12.ffn_gate_shexp.weight q8_0 +blk.12.ffn_up_shexp.weight q8_0 +blk.13.attn_kv_a_mqa.weight q8_0 +blk.13.attn_output.weight q8_0 +blk.13.attn_q_a.weight q8_0 +blk.13.attn_q_b.weight q8_0 +blk.13.ffn_down_shexp.weight q8_0 +blk.13.ffn_gate_shexp.weight q8_0 +blk.13.ffn_up_shexp.weight q8_0 +blk.14.attn_kv_a_mqa.weight q8_0 +blk.14.attn_output.weight q8_0 +blk.14.attn_q_a.weight q8_0 +blk.14.attn_q_b.weight q8_0 +blk.14.ffn_down_shexp.weight q8_0 +blk.14.ffn_gate_shexp.weight q8_0 +blk.14.ffn_up_shexp.weight q8_0 +blk.15.attn_kv_a_mqa.weight q8_0 +blk.15.attn_output.weight q8_0 +blk.15.attn_q_a.weight q8_0 +blk.15.attn_q_b.weight q8_0 +blk.15.ffn_down_shexp.weight q8_0 +blk.15.ffn_gate_shexp.weight q8_0 +blk.15.ffn_up_shexp.weight q8_0 +blk.16.attn_kv_a_mqa.weight q8_0 +blk.16.attn_output.weight q8_0 +blk.16.attn_q_a.weight q8_0 +blk.16.attn_q_b.weight q8_0 +blk.16.ffn_down_shexp.weight q8_0 +blk.16.ffn_gate_shexp.weight q8_0 +blk.16.ffn_up_shexp.weight q8_0 +blk.17.attn_kv_a_mqa.weight q8_0 +blk.17.attn_output.weight q8_0 +blk.17.attn_q_a.weight q8_0 +blk.17.attn_q_b.weight q8_0 +blk.17.ffn_down_shexp.weight q8_0 +blk.17.ffn_gate_shexp.weight q8_0 +blk.17.ffn_up_shexp.weight q8_0 +blk.18.attn_kv_a_mqa.weight q8_0 +blk.18.attn_output.weight q8_0 +blk.18.attn_q_a.weight q8_0 +blk.18.attn_q_b.weight q8_0 +blk.18.ffn_down_shexp.weight q8_0 +blk.18.ffn_gate_shexp.weight q8_0 +blk.18.ffn_up_shexp.weight q8_0 +blk.19.attn_kv_a_mqa.weight q8_0 +blk.19.attn_output.weight q8_0 +blk.19.attn_q_a.weight q8_0 +blk.19.attn_q_b.weight q8_0 +blk.19.ffn_down_shexp.weight q8_0 +blk.19.ffn_gate_shexp.weight q8_0 +blk.19.ffn_up_shexp.weight q8_0 +blk.20.attn_kv_a_mqa.weight q8_0 +blk.20.attn_output.weight q8_0 +blk.20.attn_q_a.weight q8_0 +blk.20.attn_q_b.weight q8_0 +blk.20.ffn_down_shexp.weight q8_0 +blk.20.ffn_gate_shexp.weight q8_0 +blk.20.ffn_up_shexp.weight q8_0 +blk.21.attn_kv_a_mqa.weight q8_0 +blk.21.attn_output.weight q8_0 +blk.21.attn_q_a.weight q8_0 +blk.21.attn_q_b.weight q8_0 +blk.21.ffn_down_shexp.weight q8_0 +blk.21.ffn_gate_shexp.weight q8_0 +blk.21.ffn_up_shexp.weight q8_0 +blk.22.attn_kv_a_mqa.weight q8_0 +blk.22.attn_output.weight q8_0 +blk.22.attn_q_a.weight q8_0 +blk.22.attn_q_b.weight q8_0 +blk.22.ffn_down_shexp.weight q8_0 +blk.22.ffn_gate_shexp.weight q8_0 +blk.22.ffn_up_shexp.weight q8_0 +blk.23.attn_kv_a_mqa.weight q8_0 +blk.23.attn_output.weight q8_0 +blk.23.attn_q_a.weight q8_0 +blk.23.attn_q_b.weight q8_0 +blk.23.ffn_down_shexp.weight q8_0 +blk.23.ffn_gate_shexp.weight q8_0 +blk.23.ffn_up_shexp.weight q8_0 +blk.24.attn_kv_a_mqa.weight q8_0 +blk.24.attn_output.weight q8_0 +blk.24.attn_q_a.weight q8_0 +blk.24.attn_q_b.weight q8_0 +blk.24.ffn_down_shexp.weight q8_0 +blk.24.ffn_gate_shexp.weight q8_0 +blk.24.ffn_up_shexp.weight q8_0 +blk.25.attn_kv_a_mqa.weight q8_0 +blk.25.attn_output.weight q8_0 +blk.25.attn_q_a.weight q8_0 +blk.25.attn_q_b.weight q8_0 +blk.25.ffn_down_shexp.weight q8_0 +blk.25.ffn_gate_shexp.weight q8_0 +blk.25.ffn_up_shexp.weight q8_0 +blk.26.attn_kv_a_mqa.weight q8_0 +blk.26.attn_output.weight q8_0 +blk.26.attn_q_a.weight q8_0 +blk.26.attn_q_b.weight q8_0 +blk.26.ffn_down_shexp.weight q8_0 +blk.26.ffn_gate_shexp.weight q8_0 +blk.26.ffn_up_shexp.weight q8_0 +blk.27.attn_kv_a_mqa.weight q8_0 +blk.27.attn_output.weight q8_0 +blk.27.attn_q_a.weight q8_0 +blk.27.attn_q_b.weight q8_0 +blk.27.ffn_down_shexp.weight q8_0 +blk.27.ffn_gate_shexp.weight q8_0 +blk.27.ffn_up_shexp.weight q8_0 +blk.28.attn_kv_a_mqa.weight q8_0 +blk.28.attn_output.weight q8_0 +blk.28.attn_q_a.weight q8_0 +blk.28.attn_q_b.weight q8_0 +blk.28.ffn_down_shexp.weight q8_0 +blk.28.ffn_gate_shexp.weight q8_0 +blk.28.ffn_up_shexp.weight q8_0 +blk.29.attn_kv_a_mqa.weight q8_0 +blk.29.attn_output.weight q8_0 +blk.29.attn_q_a.weight q8_0 +blk.29.attn_q_b.weight q8_0 +blk.29.ffn_down_shexp.weight q8_0 +blk.29.ffn_gate_shexp.weight q8_0 +blk.29.ffn_up_shexp.weight q8_0 +blk.30.attn_kv_a_mqa.weight q8_0 +blk.30.attn_output.weight q8_0 +blk.30.attn_q_a.weight q8_0 +blk.30.attn_q_b.weight q8_0 +blk.30.ffn_down_shexp.weight q8_0 +blk.30.ffn_gate_shexp.weight q8_0 +blk.30.ffn_up_shexp.weight q8_0 +blk.31.attn_kv_a_mqa.weight q8_0 +blk.31.attn_output.weight q8_0 +blk.31.attn_q_a.weight q8_0 +blk.31.attn_q_b.weight q8_0 +blk.31.ffn_down_shexp.weight q8_0 +blk.31.ffn_gate_shexp.weight q8_0 +blk.31.ffn_up_shexp.weight q8_0 +blk.32.attn_kv_a_mqa.weight q8_0 +blk.32.attn_output.weight q8_0 +blk.32.attn_q_a.weight q8_0 +blk.32.attn_q_b.weight q8_0 +blk.32.ffn_down_shexp.weight q8_0 +blk.32.ffn_gate_shexp.weight q8_0 +blk.32.ffn_up_shexp.weight q8_0 +blk.33.attn_kv_a_mqa.weight q8_0 +blk.33.attn_output.weight q8_0 +blk.33.attn_q_a.weight q8_0 +blk.33.attn_q_b.weight q8_0 +blk.33.ffn_down_shexp.weight q8_0 +blk.33.ffn_gate_shexp.weight q8_0 +blk.33.ffn_up_shexp.weight q8_0 +blk.34.attn_kv_a_mqa.weight q8_0 +blk.34.attn_output.weight q8_0 +blk.34.attn_q_a.weight q8_0 +blk.34.attn_q_b.weight q8_0 +blk.34.ffn_down_shexp.weight q8_0 +blk.34.ffn_gate_shexp.weight q8_0 +blk.34.ffn_up_shexp.weight q8_0 +blk.35.attn_kv_a_mqa.weight q8_0 +blk.35.attn_output.weight q8_0 +blk.35.attn_q_a.weight q8_0 +blk.35.attn_q_b.weight q8_0 +blk.35.ffn_down_shexp.weight q8_0 +blk.35.ffn_gate_shexp.weight q8_0 +blk.35.ffn_up_shexp.weight q8_0 +blk.36.attn_kv_a_mqa.weight q8_0 +blk.36.attn_output.weight q8_0 +blk.36.attn_q_a.weight q8_0 +blk.36.attn_q_b.weight q8_0 +blk.36.ffn_down_shexp.weight q8_0 +blk.36.ffn_gate_shexp.weight q8_0 +blk.36.ffn_up_shexp.weight q8_0 +blk.37.attn_kv_a_mqa.weight q8_0 +blk.37.attn_output.weight q8_0 +blk.37.attn_q_a.weight q8_0 +blk.37.attn_q_b.weight q8_0 +blk.37.ffn_down_shexp.weight q8_0 +blk.37.ffn_gate_shexp.weight q8_0 +blk.37.ffn_up_shexp.weight q8_0 +blk.38.attn_kv_a_mqa.weight q8_0 +blk.38.attn_output.weight q8_0 +blk.38.attn_q_a.weight q8_0 +blk.38.attn_q_b.weight q8_0 +blk.38.ffn_down_shexp.weight q8_0 +blk.38.ffn_gate_shexp.weight q8_0 +blk.38.ffn_up_shexp.weight q8_0 +blk.39.attn_kv_a_mqa.weight q8_0 +blk.39.attn_output.weight q8_0 +blk.39.attn_q_a.weight q8_0 +blk.39.attn_q_b.weight q8_0 +blk.39.ffn_down_shexp.weight q8_0 +blk.39.ffn_gate_shexp.weight q8_0 +blk.39.ffn_up_shexp.weight q8_0 +blk.40.attn_kv_a_mqa.weight q8_0 +blk.40.attn_output.weight q8_0 +blk.40.attn_q_a.weight q8_0 +blk.40.attn_q_b.weight q8_0 +blk.40.ffn_down_shexp.weight q8_0 +blk.40.ffn_gate_shexp.weight q8_0 +blk.40.ffn_up_shexp.weight q8_0 +blk.41.attn_kv_a_mqa.weight q8_0 +blk.41.attn_output.weight q8_0 +blk.41.attn_q_a.weight q8_0 +blk.41.attn_q_b.weight q8_0 +blk.41.ffn_down_shexp.weight q8_0 +blk.41.ffn_gate_shexp.weight q8_0 +blk.41.ffn_up_shexp.weight q8_0 +blk.42.attn_kv_a_mqa.weight q8_0 +blk.42.attn_output.weight q8_0 +blk.42.attn_q_a.weight q8_0 +blk.42.attn_q_b.weight q8_0 +blk.42.ffn_down_shexp.weight q8_0 +blk.42.ffn_gate_shexp.weight q8_0 +blk.42.ffn_up_shexp.weight q8_0 +blk.43.attn_kv_a_mqa.weight q8_0 +blk.43.attn_output.weight q8_0 +blk.43.attn_q_a.weight q8_0 +blk.43.attn_q_b.weight q8_0 +blk.43.ffn_down_shexp.weight q8_0 +blk.43.ffn_gate_shexp.weight q8_0 +blk.43.ffn_up_shexp.weight q8_0 +blk.44.attn_kv_a_mqa.weight q8_0 +blk.44.attn_output.weight q8_0 +blk.44.attn_q_a.weight q8_0 +blk.44.attn_q_b.weight q8_0 +blk.44.ffn_down_shexp.weight q8_0 +blk.44.ffn_gate_shexp.weight q8_0 +blk.44.ffn_up_shexp.weight q8_0 +blk.45.attn_kv_a_mqa.weight q8_0 +blk.45.attn_output.weight q8_0 +blk.45.attn_q_a.weight q8_0 +blk.45.attn_q_b.weight q8_0 +blk.45.ffn_down_shexp.weight q8_0 +blk.45.ffn_gate_shexp.weight q8_0 +blk.45.ffn_up_shexp.weight q8_0 +blk.46.attn_kv_a_mqa.weight q8_0 +blk.46.attn_output.weight q8_0 +blk.46.attn_q_a.weight q8_0 +blk.46.attn_q_b.weight q8_0 +blk.46.ffn_down_shexp.weight q8_0 +blk.46.ffn_gate_shexp.weight q8_0 +blk.46.ffn_up_shexp.weight q8_0 +blk.47.attn_kv_a_mqa.weight q8_0 +blk.47.attn_output.weight q8_0 +blk.47.attn_q_a.weight q8_0 +blk.47.attn_q_b.weight q8_0 +blk.47.ffn_down_shexp.weight q8_0 +blk.47.ffn_gate_shexp.weight q8_0 +blk.47.ffn_up_shexp.weight q8_0 +blk.48.attn_kv_a_mqa.weight q8_0 +blk.48.attn_output.weight q8_0 +blk.48.attn_q_a.weight q8_0 +blk.48.attn_q_b.weight q8_0 +blk.48.ffn_down_shexp.weight q8_0 +blk.48.ffn_gate_shexp.weight q8_0 +blk.48.ffn_up_shexp.weight q8_0 +blk.49.attn_kv_a_mqa.weight q8_0 +blk.49.attn_output.weight q8_0 +blk.49.attn_q_a.weight q8_0 +blk.49.attn_q_b.weight q8_0 +blk.49.ffn_down_shexp.weight q8_0 +blk.49.ffn_gate_shexp.weight q8_0 +blk.49.ffn_up_shexp.weight q8_0 +blk.50.attn_kv_a_mqa.weight q8_0 +blk.50.attn_output.weight q8_0 +blk.50.attn_q_a.weight q8_0 +blk.50.attn_q_b.weight q8_0 +blk.50.ffn_down_shexp.weight q8_0 +blk.50.ffn_gate_shexp.weight q8_0 +blk.50.ffn_up_shexp.weight q8_0 +blk.51.attn_kv_a_mqa.weight q8_0 +blk.51.attn_output.weight q8_0 +blk.51.attn_q_a.weight q8_0 +blk.51.attn_q_b.weight q8_0 +blk.51.ffn_down_shexp.weight q8_0 +blk.51.ffn_gate_shexp.weight q8_0 +blk.51.ffn_up_shexp.weight q8_0 +blk.52.attn_kv_a_mqa.weight q8_0 +blk.52.attn_output.weight q8_0 +blk.52.attn_q_a.weight q8_0 +blk.52.attn_q_b.weight q8_0 +blk.52.ffn_down_shexp.weight q8_0 +blk.52.ffn_gate_shexp.weight q8_0 +blk.52.ffn_up_shexp.weight q8_0 +blk.53.attn_kv_a_mqa.weight q8_0 +blk.53.attn_output.weight q8_0 +blk.53.attn_q_a.weight q8_0 +blk.53.attn_q_b.weight q8_0 +blk.53.ffn_down_shexp.weight q8_0 +blk.53.ffn_gate_shexp.weight q8_0 +blk.53.ffn_up_shexp.weight q8_0 +blk.54.attn_kv_a_mqa.weight q8_0 +blk.54.attn_output.weight q8_0 +blk.54.attn_q_a.weight q8_0 +blk.54.attn_q_b.weight q8_0 +blk.54.ffn_down_shexp.weight q8_0 +blk.54.ffn_gate_shexp.weight q8_0 +blk.54.ffn_up_shexp.weight q8_0 +blk.55.attn_kv_a_mqa.weight q8_0 +blk.55.attn_output.weight q8_0 +blk.55.attn_q_a.weight q8_0 +blk.55.attn_q_b.weight q8_0 +blk.55.ffn_down_shexp.weight q8_0 +blk.55.ffn_gate_shexp.weight q8_0 +blk.55.ffn_up_shexp.weight q8_0 +blk.56.attn_kv_a_mqa.weight q8_0 +blk.56.attn_output.weight q8_0 +blk.56.attn_q_a.weight q8_0 +blk.56.attn_q_b.weight q8_0 +blk.56.ffn_down_shexp.weight q8_0 +blk.56.ffn_gate_shexp.weight q8_0 +blk.56.ffn_up_shexp.weight q8_0 +blk.57.attn_kv_a_mqa.weight q8_0 +blk.57.attn_output.weight q8_0 +blk.57.attn_q_a.weight q8_0 +blk.57.attn_q_b.weight q8_0 +blk.57.ffn_down_shexp.weight q8_0 +blk.57.ffn_gate_shexp.weight q8_0 +blk.57.ffn_up_shexp.weight q8_0 +blk.58.attn_kv_a_mqa.weight q8_0 +blk.58.attn_output.weight q8_0 +blk.58.attn_q_a.weight q8_0 +blk.58.attn_q_b.weight q8_0 +blk.58.ffn_down_shexp.weight q8_0 +blk.58.ffn_gate_shexp.weight q8_0 +blk.58.ffn_up_shexp.weight q8_0 +blk.59.attn_kv_a_mqa.weight q8_0 +blk.59.attn_output.weight q8_0 +blk.59.attn_q_a.weight q8_0 +blk.59.attn_q_b.weight q8_0 +blk.59.ffn_down_shexp.weight q8_0 +blk.59.ffn_gate_shexp.weight q8_0 +blk.59.ffn_up_shexp.weight q8_0 +blk.60.attn_kv_a_mqa.weight q8_0 +blk.60.attn_output.weight q8_0 +blk.60.attn_q_a.weight q8_0 +blk.60.attn_q_b.weight q8_0 +blk.60.ffn_down_shexp.weight q8_0 +blk.60.ffn_gate_shexp.weight q8_0 +blk.60.ffn_up_shexp.weight q8_0 diff --git a/tests/snapshots/gemma-3-4b-it.schema b/tests/snapshots/gemma-3-4b-it.schema new file mode 100644 index 0000000000..1bec284c87 --- /dev/null +++ b/tests/snapshots/gemma-3-4b-it.schema @@ -0,0 +1,1455 @@ +# Model: gemma-3-4b-it +# n_embd=2560, n_ff=10240, n_vocab=262144, n_layer=34, n_head=8, n_head_kv=4 + +[F32] f32 +token_embd.weight q6_K + +[F16] f16 +token_embd.weight q6_K + +[Q4_0] q4_0 +token_embd.weight q6_K + +[Q4_1] q4_1 +token_embd.weight q6_K + +[Q8_0] q8_0 + +[Q5_0] q5_0 +token_embd.weight q6_K + +[Q5_1] q5_1 +token_embd.weight q6_K + +[Q2_K] q2_K +token_embd.weight q6_K +blk.0.ffn_down.weight q3_K +blk.0.attn_output.weight q3_K +blk.0.attn_v.weight q3_K +blk.1.ffn_down.weight q3_K +blk.1.attn_output.weight q3_K +blk.1.attn_v.weight q3_K +blk.10.ffn_down.weight q3_K +blk.10.attn_output.weight q3_K +blk.10.attn_v.weight q3_K +blk.11.ffn_down.weight q3_K +blk.11.attn_output.weight q3_K +blk.11.attn_v.weight q3_K +blk.12.ffn_down.weight q3_K +blk.12.attn_output.weight q3_K +blk.12.attn_v.weight q3_K +blk.13.ffn_down.weight q3_K +blk.13.attn_output.weight q3_K +blk.13.attn_v.weight q3_K +blk.14.attn_output.weight q3_K +blk.14.attn_v.weight q3_K +blk.2.ffn_down.weight q3_K +blk.2.attn_output.weight q3_K +blk.2.attn_v.weight q3_K +blk.3.ffn_down.weight q3_K +blk.3.attn_output.weight q3_K +blk.3.attn_v.weight q3_K +blk.4.ffn_down.weight q3_K +blk.4.attn_output.weight q3_K +blk.4.attn_v.weight q3_K +blk.5.ffn_down.weight q3_K +blk.5.attn_output.weight q3_K +blk.5.attn_v.weight q3_K +blk.6.ffn_down.weight q3_K +blk.6.attn_output.weight q3_K +blk.6.attn_v.weight q3_K +blk.7.ffn_down.weight q3_K +blk.7.attn_output.weight q3_K +blk.7.attn_v.weight q3_K +blk.8.ffn_down.weight q3_K +blk.8.attn_output.weight q3_K +blk.8.attn_v.weight q3_K +blk.9.ffn_down.weight q3_K +blk.9.attn_output.weight q3_K +blk.9.attn_v.weight q3_K +blk.14.ffn_down.weight q3_K +blk.15.ffn_down.weight q3_K +blk.15.attn_output.weight q3_K +blk.15.attn_v.weight q3_K +blk.16.ffn_down.weight q3_K +blk.16.attn_output.weight q3_K +blk.16.attn_v.weight q3_K +blk.17.ffn_down.weight q3_K +blk.17.attn_output.weight q3_K +blk.17.attn_v.weight q3_K +blk.18.ffn_down.weight q3_K +blk.18.attn_output.weight q3_K +blk.18.attn_v.weight q3_K +blk.19.ffn_down.weight q3_K +blk.19.attn_output.weight q3_K +blk.19.attn_v.weight q3_K +blk.20.ffn_down.weight q3_K +blk.20.attn_output.weight q3_K +blk.20.attn_v.weight q3_K +blk.21.ffn_down.weight q3_K +blk.21.attn_output.weight q3_K +blk.21.attn_v.weight q3_K +blk.22.ffn_down.weight q3_K +blk.22.attn_output.weight q3_K +blk.22.attn_v.weight q3_K +blk.23.ffn_down.weight q3_K +blk.23.attn_output.weight q3_K +blk.23.attn_v.weight q3_K +blk.24.ffn_down.weight q3_K +blk.24.attn_output.weight q3_K +blk.24.attn_v.weight q3_K +blk.25.ffn_down.weight q3_K +blk.25.attn_output.weight q3_K +blk.25.attn_v.weight q3_K +blk.26.ffn_down.weight q3_K +blk.26.attn_output.weight q3_K +blk.26.attn_v.weight q3_K +blk.27.ffn_down.weight q3_K +blk.27.attn_output.weight q3_K +blk.27.attn_v.weight q3_K +blk.28.ffn_down.weight q3_K +blk.28.attn_output.weight q3_K +blk.28.attn_v.weight q3_K +blk.29.ffn_down.weight q3_K +blk.29.attn_output.weight q3_K +blk.29.attn_v.weight q3_K +blk.30.ffn_down.weight q3_K +blk.30.attn_output.weight q3_K +blk.30.attn_v.weight q3_K +blk.31.ffn_down.weight q3_K +blk.31.attn_output.weight q3_K +blk.31.attn_v.weight q3_K +blk.32.ffn_down.weight q3_K +blk.32.attn_output.weight q3_K +blk.32.attn_v.weight q3_K +blk.33.ffn_down.weight q3_K +blk.33.attn_output.weight q3_K +blk.33.attn_v.weight q3_K + +[Q3_K_S] q3_K +token_embd.weight q6_K + +[Q3_K_M] q3_K +token_embd.weight q6_K +blk.0.ffn_down.weight q5_K +blk.0.attn_output.weight q4_K +blk.0.attn_v.weight q5_K +blk.1.ffn_down.weight q5_K +blk.1.attn_output.weight q4_K +blk.1.attn_v.weight q5_K +blk.10.ffn_down.weight q4_K +blk.10.attn_output.weight q4_K +blk.10.attn_v.weight q4_K +blk.11.ffn_down.weight q4_K +blk.11.attn_output.weight q4_K +blk.11.attn_v.weight q4_K +blk.12.ffn_down.weight q4_K +blk.12.attn_output.weight q4_K +blk.12.attn_v.weight q4_K +blk.13.ffn_down.weight q4_K +blk.13.attn_output.weight q4_K +blk.13.attn_v.weight q4_K +blk.14.attn_output.weight q4_K +blk.14.attn_v.weight q4_K +blk.2.ffn_down.weight q4_K +blk.2.attn_output.weight q4_K +blk.2.attn_v.weight q4_K +blk.3.ffn_down.weight q4_K +blk.3.attn_output.weight q4_K +blk.3.attn_v.weight q4_K +blk.4.ffn_down.weight q4_K +blk.4.attn_output.weight q4_K +blk.4.attn_v.weight q4_K +blk.5.ffn_down.weight q4_K +blk.5.attn_output.weight q4_K +blk.5.attn_v.weight q4_K +blk.6.ffn_down.weight q4_K +blk.6.attn_output.weight q4_K +blk.6.attn_v.weight q4_K +blk.7.ffn_down.weight q4_K +blk.7.attn_output.weight q4_K +blk.7.attn_v.weight q4_K +blk.8.ffn_down.weight q4_K +blk.8.attn_output.weight q4_K +blk.8.attn_v.weight q4_K +blk.9.ffn_down.weight q4_K +blk.9.attn_output.weight q4_K +blk.9.attn_v.weight q4_K +blk.14.ffn_down.weight q4_K +blk.15.ffn_down.weight q4_K +blk.15.attn_output.weight q4_K +blk.15.attn_v.weight q4_K +blk.16.ffn_down.weight q4_K +blk.16.attn_output.weight q4_K +blk.16.attn_v.weight q4_K +blk.17.ffn_down.weight q4_K +blk.17.attn_output.weight q4_K +blk.17.attn_v.weight q4_K +blk.18.ffn_down.weight q4_K +blk.18.attn_output.weight q4_K +blk.18.attn_v.weight q4_K +blk.19.ffn_down.weight q4_K +blk.19.attn_output.weight q4_K +blk.19.attn_v.weight q4_K +blk.20.ffn_down.weight q4_K +blk.20.attn_output.weight q4_K +blk.20.attn_v.weight q4_K +blk.21.ffn_down.weight q4_K +blk.21.attn_output.weight q4_K +blk.21.attn_v.weight q4_K +blk.22.ffn_down.weight q4_K +blk.22.attn_output.weight q4_K +blk.22.attn_v.weight q4_K +blk.23.ffn_down.weight q4_K +blk.23.attn_output.weight q4_K +blk.23.attn_v.weight q4_K +blk.24.ffn_down.weight q4_K +blk.24.attn_output.weight q4_K +blk.24.attn_v.weight q4_K +blk.25.ffn_down.weight q4_K +blk.25.attn_output.weight q4_K +blk.25.attn_v.weight q4_K +blk.26.ffn_down.weight q4_K +blk.26.attn_output.weight q4_K +blk.26.attn_v.weight q4_K +blk.27.ffn_down.weight q4_K +blk.27.attn_output.weight q4_K +blk.27.attn_v.weight q4_K +blk.28.ffn_down.weight q4_K +blk.28.attn_output.weight q4_K +blk.28.attn_v.weight q4_K +blk.29.ffn_down.weight q4_K +blk.29.attn_output.weight q4_K +blk.29.attn_v.weight q4_K +blk.30.ffn_down.weight q4_K +blk.30.attn_output.weight q4_K +blk.30.attn_v.weight q4_K +blk.31.ffn_down.weight q4_K +blk.31.attn_output.weight q4_K +blk.31.attn_v.weight q4_K +blk.32.ffn_down.weight q4_K +blk.32.attn_output.weight q4_K +blk.32.attn_v.weight q4_K +blk.33.ffn_down.weight q4_K +blk.33.attn_output.weight q4_K +blk.33.attn_v.weight q4_K + +[Q3_K_L] q3_K +token_embd.weight q6_K +blk.0.ffn_down.weight q5_K +blk.0.attn_output.weight q5_K +blk.0.attn_v.weight q5_K +blk.1.ffn_down.weight q5_K +blk.1.attn_output.weight q5_K +blk.1.attn_v.weight q5_K +blk.10.ffn_down.weight q5_K +blk.10.attn_output.weight q5_K +blk.10.attn_v.weight q5_K +blk.11.ffn_down.weight q5_K +blk.11.attn_output.weight q5_K +blk.11.attn_v.weight q5_K +blk.12.ffn_down.weight q5_K +blk.12.attn_output.weight q5_K +blk.12.attn_v.weight q5_K +blk.13.ffn_down.weight q5_K +blk.13.attn_output.weight q5_K +blk.13.attn_v.weight q5_K +blk.14.attn_output.weight q5_K +blk.14.attn_v.weight q5_K +blk.2.ffn_down.weight q5_K +blk.2.attn_output.weight q5_K +blk.2.attn_v.weight q5_K +blk.3.ffn_down.weight q5_K +blk.3.attn_output.weight q5_K +blk.3.attn_v.weight q5_K +blk.4.ffn_down.weight q5_K +blk.4.attn_output.weight q5_K +blk.4.attn_v.weight q5_K +blk.5.ffn_down.weight q5_K +blk.5.attn_output.weight q5_K +blk.5.attn_v.weight q5_K +blk.6.ffn_down.weight q5_K +blk.6.attn_output.weight q5_K +blk.6.attn_v.weight q5_K +blk.7.ffn_down.weight q5_K +blk.7.attn_output.weight q5_K +blk.7.attn_v.weight q5_K +blk.8.ffn_down.weight q5_K +blk.8.attn_output.weight q5_K +blk.8.attn_v.weight q5_K +blk.9.ffn_down.weight q5_K +blk.9.attn_output.weight q5_K +blk.9.attn_v.weight q5_K +blk.14.ffn_down.weight q5_K +blk.15.ffn_down.weight q5_K +blk.15.attn_output.weight q5_K +blk.15.attn_v.weight q5_K +blk.16.ffn_down.weight q5_K +blk.16.attn_output.weight q5_K +blk.16.attn_v.weight q5_K +blk.17.ffn_down.weight q5_K +blk.17.attn_output.weight q5_K +blk.17.attn_v.weight q5_K +blk.18.ffn_down.weight q5_K +blk.18.attn_output.weight q5_K +blk.18.attn_v.weight q5_K +blk.19.ffn_down.weight q5_K +blk.19.attn_output.weight q5_K +blk.19.attn_v.weight q5_K +blk.20.ffn_down.weight q5_K +blk.20.attn_output.weight q5_K +blk.20.attn_v.weight q5_K +blk.21.ffn_down.weight q5_K +blk.21.attn_output.weight q5_K +blk.21.attn_v.weight q5_K +blk.22.ffn_down.weight q5_K +blk.22.attn_output.weight q5_K +blk.22.attn_v.weight q5_K +blk.23.ffn_down.weight q5_K +blk.23.attn_output.weight q5_K +blk.23.attn_v.weight q5_K +blk.24.ffn_down.weight q5_K +blk.24.attn_output.weight q5_K +blk.24.attn_v.weight q5_K +blk.25.ffn_down.weight q5_K +blk.25.attn_output.weight q5_K +blk.25.attn_v.weight q5_K +blk.26.ffn_down.weight q5_K +blk.26.attn_output.weight q5_K +blk.26.attn_v.weight q5_K +blk.27.ffn_down.weight q5_K +blk.27.attn_output.weight q5_K +blk.27.attn_v.weight q5_K +blk.28.ffn_down.weight q5_K +blk.28.attn_output.weight q5_K +blk.28.attn_v.weight q5_K +blk.29.ffn_down.weight q5_K +blk.29.attn_output.weight q5_K +blk.29.attn_v.weight q5_K +blk.30.ffn_down.weight q5_K +blk.30.attn_output.weight q5_K +blk.30.attn_v.weight q5_K +blk.31.ffn_down.weight q5_K +blk.31.attn_output.weight q5_K +blk.31.attn_v.weight q5_K +blk.32.ffn_down.weight q5_K +blk.32.attn_output.weight q5_K +blk.32.attn_v.weight q5_K +blk.33.ffn_down.weight q5_K +blk.33.attn_output.weight q5_K +blk.33.attn_v.weight q5_K + +[Q4_K_S] q4_K +token_embd.weight q6_K +blk.0.ffn_down.weight q5_K +blk.0.attn_v.weight q5_K +blk.1.ffn_down.weight q5_K +blk.1.attn_v.weight q5_K +blk.10.ffn_down.weight q5_K +blk.10.attn_v.weight q5_K +blk.11.ffn_down.weight q5_K +blk.11.attn_v.weight q5_K + +[Q4_K_M] q4_K +token_embd.weight q6_K +blk.0.ffn_down.weight q6_K +blk.0.attn_v.weight q6_K +blk.1.ffn_down.weight q6_K +blk.1.attn_v.weight q6_K +blk.10.ffn_down.weight q6_K +blk.10.attn_v.weight q6_K +blk.11.ffn_down.weight q6_K +blk.11.attn_v.weight q6_K +blk.14.attn_v.weight q6_K +blk.2.ffn_down.weight q6_K +blk.4.attn_v.weight q6_K +blk.5.ffn_down.weight q6_K +blk.7.attn_v.weight q6_K +blk.8.ffn_down.weight q6_K +blk.15.ffn_down.weight q6_K +blk.15.attn_v.weight q6_K +blk.18.ffn_down.weight q6_K +blk.18.attn_v.weight q6_K +blk.21.ffn_down.weight q6_K +blk.21.attn_v.weight q6_K +blk.24.ffn_down.weight q6_K +blk.24.attn_v.weight q6_K +blk.27.ffn_down.weight q6_K +blk.27.attn_v.weight q6_K +blk.29.ffn_down.weight q6_K +blk.29.attn_v.weight q6_K +blk.30.ffn_down.weight q6_K +blk.30.attn_v.weight q6_K +blk.31.ffn_down.weight q6_K +blk.31.attn_v.weight q6_K +blk.32.ffn_down.weight q6_K +blk.32.attn_v.weight q6_K +blk.33.ffn_down.weight q6_K +blk.33.attn_v.weight q6_K + +[Q5_K_S] q5_K +token_embd.weight q6_K + +[Q5_K_M] q5_K +token_embd.weight q6_K +blk.0.ffn_down.weight q6_K +blk.0.attn_v.weight q6_K +blk.1.ffn_down.weight q6_K +blk.1.attn_v.weight q6_K +blk.10.ffn_down.weight q6_K +blk.10.attn_v.weight q6_K +blk.11.ffn_down.weight q6_K +blk.11.attn_v.weight q6_K +blk.14.attn_v.weight q6_K +blk.2.ffn_down.weight q6_K +blk.4.attn_v.weight q6_K +blk.5.ffn_down.weight q6_K +blk.7.attn_v.weight q6_K +blk.8.ffn_down.weight q6_K +blk.15.ffn_down.weight q6_K +blk.15.attn_v.weight q6_K +blk.18.ffn_down.weight q6_K +blk.18.attn_v.weight q6_K +blk.21.ffn_down.weight q6_K +blk.21.attn_v.weight q6_K +blk.24.ffn_down.weight q6_K +blk.24.attn_v.weight q6_K +blk.27.ffn_down.weight q6_K +blk.27.attn_v.weight q6_K +blk.29.ffn_down.weight q6_K +blk.29.attn_v.weight q6_K +blk.30.ffn_down.weight q6_K +blk.30.attn_v.weight q6_K +blk.31.ffn_down.weight q6_K +blk.31.attn_v.weight q6_K +blk.32.ffn_down.weight q6_K +blk.32.attn_v.weight q6_K +blk.33.ffn_down.weight q6_K +blk.33.attn_v.weight q6_K + +[Q6_K] q6_K + +[IQ2_XXS] iq2_xxs +token_embd.weight q5_K +blk.0.ffn_down.weight q2_K +blk.0.attn_v.weight q2_K +blk.1.ffn_down.weight q2_K +blk.1.attn_v.weight q2_K +blk.10.ffn_down.weight q2_K +blk.10.attn_v.weight q2_K +blk.11.ffn_down.weight q2_K +blk.11.attn_v.weight q2_K +blk.12.attn_v.weight q2_K +blk.13.attn_v.weight q2_K +blk.14.attn_v.weight q2_K +blk.2.attn_v.weight q2_K +blk.3.attn_v.weight q2_K +blk.4.attn_v.weight q2_K +blk.5.attn_v.weight q2_K +blk.6.attn_v.weight q2_K +blk.7.attn_v.weight q2_K +blk.8.attn_v.weight q2_K +blk.9.attn_v.weight q2_K +blk.15.attn_v.weight q2_K +blk.16.attn_v.weight q2_K +blk.17.attn_v.weight q2_K +blk.18.attn_v.weight q2_K +blk.19.attn_v.weight q2_K +blk.20.attn_v.weight q2_K +blk.21.attn_v.weight q2_K +blk.22.attn_v.weight q2_K +blk.23.attn_v.weight q2_K +blk.24.attn_v.weight q2_K +blk.25.attn_v.weight q2_K +blk.26.attn_v.weight q2_K +blk.27.attn_v.weight q2_K +blk.28.attn_v.weight q2_K +blk.29.attn_v.weight q2_K +blk.30.attn_v.weight q2_K +blk.31.attn_v.weight q2_K +blk.32.attn_v.weight q2_K +blk.33.attn_v.weight q2_K + +[IQ2_XS] iq2_xs +token_embd.weight q5_K +blk.0.ffn_down.weight q2_K +blk.0.attn_v.weight q2_K +blk.1.ffn_down.weight q2_K +blk.1.attn_v.weight q2_K +blk.10.ffn_down.weight q2_K +blk.10.attn_v.weight q2_K +blk.11.ffn_down.weight q2_K +blk.11.attn_v.weight q2_K +blk.12.attn_v.weight q2_K +blk.13.attn_v.weight q2_K +blk.14.attn_v.weight q2_K +blk.2.attn_v.weight q2_K +blk.3.attn_v.weight q2_K +blk.4.attn_v.weight q2_K +blk.5.attn_v.weight q2_K +blk.6.attn_v.weight q2_K +blk.7.attn_v.weight q2_K +blk.8.attn_v.weight q2_K +blk.9.attn_v.weight q2_K +blk.15.attn_v.weight q2_K +blk.16.attn_v.weight q2_K +blk.17.attn_v.weight q2_K +blk.18.attn_v.weight q2_K +blk.19.attn_v.weight q2_K +blk.20.attn_v.weight q2_K +blk.21.attn_v.weight q2_K +blk.22.attn_v.weight q2_K +blk.23.attn_v.weight q2_K +blk.24.attn_v.weight q2_K +blk.25.attn_v.weight q2_K +blk.26.attn_v.weight q2_K +blk.27.attn_v.weight q2_K +blk.28.attn_v.weight q2_K +blk.29.attn_v.weight q2_K +blk.30.attn_v.weight q2_K +blk.31.attn_v.weight q2_K +blk.32.attn_v.weight q2_K +blk.33.attn_v.weight q2_K + +[Q2_K_S] q2_K +token_embd.weight q6_K +blk.0.ffn_down.weight q4_K +blk.1.ffn_down.weight q4_K +blk.10.ffn_down.weight q4_K +blk.11.ffn_down.weight q4_K + +[IQ3_XS] iq3_s +token_embd.weight q6_K +blk.0.attn_k.weight iq3_xxs +blk.0.attn_q.weight iq3_xxs +blk.1.attn_k.weight iq3_xxs +blk.1.attn_q.weight iq3_xxs +blk.10.attn_k.weight iq3_xxs +blk.10.attn_q.weight iq3_xxs +blk.11.attn_k.weight iq3_xxs +blk.11.attn_q.weight iq3_xxs +blk.12.ffn_gate.weight iq3_xxs +blk.12.ffn_up.weight iq3_xxs +blk.12.attn_k.weight iq3_xxs +blk.12.attn_q.weight iq3_xxs +blk.13.ffn_gate.weight iq3_xxs +blk.13.ffn_up.weight iq3_xxs +blk.13.attn_k.weight iq3_xxs +blk.13.attn_q.weight iq3_xxs +blk.14.ffn_gate.weight iq3_xxs +blk.14.ffn_up.weight iq3_xxs +blk.14.attn_k.weight iq3_xxs +blk.14.attn_q.weight iq3_xxs +blk.2.ffn_gate.weight iq3_xxs +blk.2.ffn_up.weight iq3_xxs +blk.2.attn_k.weight iq3_xxs +blk.2.attn_q.weight iq3_xxs +blk.3.ffn_gate.weight iq3_xxs +blk.3.ffn_up.weight iq3_xxs +blk.3.attn_k.weight iq3_xxs +blk.3.attn_q.weight iq3_xxs +blk.4.ffn_gate.weight iq3_xxs +blk.4.ffn_up.weight iq3_xxs +blk.4.attn_k.weight iq3_xxs +blk.4.attn_q.weight iq3_xxs +blk.5.ffn_gate.weight iq3_xxs +blk.5.ffn_up.weight iq3_xxs +blk.5.attn_k.weight iq3_xxs +blk.5.attn_q.weight iq3_xxs +blk.6.ffn_gate.weight iq3_xxs +blk.6.ffn_up.weight iq3_xxs +blk.6.attn_k.weight iq3_xxs +blk.6.attn_q.weight iq3_xxs +blk.7.ffn_gate.weight iq3_xxs +blk.7.ffn_up.weight iq3_xxs +blk.7.attn_k.weight iq3_xxs +blk.7.attn_q.weight iq3_xxs +blk.8.ffn_gate.weight iq3_xxs +blk.8.ffn_up.weight iq3_xxs +blk.8.attn_k.weight iq3_xxs +blk.8.attn_q.weight iq3_xxs +blk.9.ffn_gate.weight iq3_xxs +blk.9.ffn_up.weight iq3_xxs +blk.9.attn_k.weight iq3_xxs +blk.9.attn_q.weight iq3_xxs +blk.15.ffn_gate.weight iq3_xxs +blk.15.ffn_up.weight iq3_xxs +blk.15.attn_k.weight iq3_xxs +blk.15.attn_q.weight iq3_xxs +blk.16.ffn_gate.weight iq3_xxs +blk.16.ffn_up.weight iq3_xxs +blk.16.attn_k.weight iq3_xxs +blk.16.attn_q.weight iq3_xxs +blk.17.ffn_gate.weight iq3_xxs +blk.17.ffn_up.weight iq3_xxs +blk.17.attn_k.weight iq3_xxs +blk.17.attn_q.weight iq3_xxs +blk.18.ffn_gate.weight iq3_xxs +blk.18.ffn_up.weight iq3_xxs +blk.18.attn_k.weight iq3_xxs +blk.18.attn_q.weight iq3_xxs +blk.19.ffn_gate.weight iq3_xxs +blk.19.ffn_up.weight iq3_xxs +blk.19.attn_k.weight iq3_xxs +blk.19.attn_q.weight iq3_xxs +blk.20.ffn_gate.weight iq3_xxs +blk.20.ffn_up.weight iq3_xxs +blk.20.attn_k.weight iq3_xxs +blk.20.attn_q.weight iq3_xxs +blk.21.ffn_gate.weight iq3_xxs +blk.21.ffn_up.weight iq3_xxs +blk.21.attn_k.weight iq3_xxs +blk.21.attn_q.weight iq3_xxs +blk.22.ffn_gate.weight iq3_xxs +blk.22.ffn_up.weight iq3_xxs +blk.22.attn_k.weight iq3_xxs +blk.22.attn_q.weight iq3_xxs +blk.23.ffn_gate.weight iq3_xxs +blk.23.ffn_up.weight iq3_xxs +blk.23.attn_k.weight iq3_xxs +blk.23.attn_q.weight iq3_xxs +blk.24.ffn_gate.weight iq3_xxs +blk.24.ffn_up.weight iq3_xxs +blk.24.attn_k.weight iq3_xxs +blk.24.attn_q.weight iq3_xxs +blk.25.ffn_gate.weight iq3_xxs +blk.25.ffn_up.weight iq3_xxs +blk.25.attn_k.weight iq3_xxs +blk.25.attn_q.weight iq3_xxs +blk.26.ffn_gate.weight iq3_xxs +blk.26.ffn_up.weight iq3_xxs +blk.26.attn_k.weight iq3_xxs +blk.26.attn_q.weight iq3_xxs +blk.27.ffn_gate.weight iq3_xxs +blk.27.ffn_up.weight iq3_xxs +blk.27.attn_k.weight iq3_xxs +blk.27.attn_q.weight iq3_xxs +blk.28.ffn_gate.weight iq3_xxs +blk.28.ffn_up.weight iq3_xxs +blk.28.attn_k.weight iq3_xxs +blk.28.attn_q.weight iq3_xxs +blk.29.attn_k.weight iq3_xxs +blk.29.attn_q.weight iq3_xxs +blk.30.attn_k.weight iq3_xxs +blk.30.attn_q.weight iq3_xxs +blk.31.attn_k.weight iq3_xxs +blk.31.attn_q.weight iq3_xxs +blk.32.attn_k.weight iq3_xxs +blk.32.attn_q.weight iq3_xxs +blk.33.attn_k.weight iq3_xxs +blk.33.attn_q.weight iq3_xxs + +[IQ3_XXS] iq3_xxs +token_embd.weight q5_K +blk.0.ffn_down.weight q4_K +blk.0.attn_k.weight iq2_s +blk.0.attn_output.weight iq3_s +blk.0.attn_q.weight iq2_s +blk.0.attn_v.weight iq3_s +blk.1.ffn_down.weight q4_K +blk.1.attn_k.weight iq2_s +blk.1.attn_output.weight iq3_s +blk.1.attn_q.weight iq2_s +blk.1.attn_v.weight iq3_s +blk.10.ffn_down.weight q4_K +blk.10.attn_k.weight iq2_s +blk.10.attn_output.weight iq3_s +blk.10.attn_q.weight iq2_s +blk.10.attn_v.weight iq3_s +blk.11.ffn_down.weight q4_K +blk.11.attn_k.weight iq2_s +blk.11.attn_output.weight iq3_s +blk.11.attn_q.weight iq2_s +blk.11.attn_v.weight iq3_s +blk.12.ffn_down.weight q3_K +blk.12.attn_k.weight iq2_s +blk.12.attn_output.weight iq3_s +blk.12.attn_q.weight iq2_s +blk.12.attn_v.weight iq3_s +blk.13.ffn_down.weight q3_K +blk.13.attn_k.weight iq2_s +blk.13.attn_output.weight iq3_s +blk.13.attn_q.weight iq2_s +blk.13.attn_v.weight iq3_s +blk.14.attn_k.weight iq2_s +blk.14.attn_output.weight iq3_s +blk.14.attn_q.weight iq2_s +blk.14.attn_v.weight iq3_s +blk.2.ffn_down.weight q3_K +blk.2.attn_k.weight iq2_s +blk.2.attn_output.weight iq3_s +blk.2.attn_q.weight iq2_s +blk.2.attn_v.weight iq3_s +blk.3.ffn_down.weight q3_K +blk.3.attn_k.weight iq2_s +blk.3.attn_output.weight iq3_s +blk.3.attn_q.weight iq2_s +blk.3.attn_v.weight iq3_s +blk.4.ffn_down.weight q3_K +blk.4.attn_k.weight iq2_s +blk.4.attn_output.weight iq3_s +blk.4.attn_q.weight iq2_s +blk.4.attn_v.weight iq3_s +blk.5.ffn_down.weight q3_K +blk.5.attn_k.weight iq2_s +blk.5.attn_output.weight iq3_s +blk.5.attn_q.weight iq2_s +blk.5.attn_v.weight iq3_s +blk.6.ffn_down.weight q3_K +blk.6.attn_k.weight iq2_s +blk.6.attn_output.weight iq3_s +blk.6.attn_q.weight iq2_s +blk.6.attn_v.weight iq3_s +blk.7.ffn_down.weight q3_K +blk.7.attn_k.weight iq2_s +blk.7.attn_output.weight iq3_s +blk.7.attn_q.weight iq2_s +blk.7.attn_v.weight iq3_s +blk.8.ffn_down.weight q3_K +blk.8.attn_k.weight iq2_s +blk.8.attn_output.weight iq3_s +blk.8.attn_q.weight iq2_s +blk.8.attn_v.weight iq3_s +blk.9.ffn_down.weight q3_K +blk.9.attn_k.weight iq2_s +blk.9.attn_output.weight iq3_s +blk.9.attn_q.weight iq2_s +blk.9.attn_v.weight iq3_s +blk.14.ffn_down.weight q3_K +blk.15.ffn_down.weight q3_K +blk.15.attn_k.weight iq2_s +blk.15.attn_output.weight iq3_s +blk.15.attn_q.weight iq2_s +blk.15.attn_v.weight iq3_s +blk.16.ffn_down.weight q3_K +blk.16.attn_k.weight iq2_s +blk.16.attn_output.weight iq3_s +blk.16.attn_q.weight iq2_s +blk.16.attn_v.weight iq3_s +blk.17.ffn_down.weight q3_K +blk.17.attn_k.weight iq2_s +blk.17.attn_output.weight iq3_s +blk.17.attn_q.weight iq2_s +blk.17.attn_v.weight iq3_s +blk.18.ffn_down.weight q3_K +blk.18.attn_k.weight iq2_s +blk.18.attn_output.weight iq3_s +blk.18.attn_q.weight iq2_s +blk.18.attn_v.weight iq3_s +blk.19.ffn_down.weight q3_K +blk.19.attn_k.weight iq2_s +blk.19.attn_output.weight iq3_s +blk.19.attn_q.weight iq2_s +blk.19.attn_v.weight iq3_s +blk.20.ffn_down.weight q3_K +blk.20.attn_k.weight iq2_s +blk.20.attn_output.weight iq3_s +blk.20.attn_q.weight iq2_s +blk.20.attn_v.weight iq3_s +blk.21.ffn_down.weight q3_K +blk.21.attn_k.weight iq2_s +blk.21.attn_output.weight iq3_s +blk.21.attn_q.weight iq2_s +blk.21.attn_v.weight iq3_s +blk.22.ffn_down.weight q3_K +blk.22.attn_k.weight iq2_s +blk.22.attn_output.weight iq3_s +blk.22.attn_q.weight iq2_s +blk.22.attn_v.weight iq3_s +blk.23.ffn_down.weight q3_K +blk.23.attn_k.weight iq2_s +blk.23.attn_output.weight iq3_s +blk.23.attn_q.weight iq2_s +blk.23.attn_v.weight iq3_s +blk.24.ffn_down.weight q3_K +blk.24.attn_k.weight iq2_s +blk.24.attn_output.weight iq3_s +blk.24.attn_q.weight iq2_s +blk.24.attn_v.weight iq3_s +blk.25.ffn_down.weight q3_K +blk.25.attn_k.weight iq2_s +blk.25.attn_output.weight iq3_s +blk.25.attn_q.weight iq2_s +blk.25.attn_v.weight iq3_s +blk.26.ffn_down.weight q3_K +blk.26.attn_k.weight iq2_s +blk.26.attn_output.weight iq3_s +blk.26.attn_q.weight iq2_s +blk.26.attn_v.weight iq3_s +blk.27.ffn_down.weight q3_K +blk.27.attn_k.weight iq2_s +blk.27.attn_output.weight iq3_s +blk.27.attn_q.weight iq2_s +blk.27.attn_v.weight iq3_s +blk.28.ffn_down.weight q3_K +blk.28.attn_k.weight iq2_s +blk.28.attn_output.weight iq3_s +blk.28.attn_q.weight iq2_s +blk.28.attn_v.weight iq3_s +blk.29.ffn_down.weight q3_K +blk.29.attn_k.weight iq2_s +blk.29.attn_output.weight iq3_s +blk.29.attn_q.weight iq2_s +blk.29.attn_v.weight iq3_s +blk.30.ffn_down.weight q3_K +blk.30.attn_k.weight iq2_s +blk.30.attn_output.weight iq3_s +blk.30.attn_q.weight iq2_s +blk.30.attn_v.weight iq3_s +blk.31.ffn_down.weight q3_K +blk.31.attn_k.weight iq2_s +blk.31.attn_output.weight iq3_s +blk.31.attn_q.weight iq2_s +blk.31.attn_v.weight iq3_s +blk.32.ffn_down.weight q3_K +blk.32.attn_k.weight iq2_s +blk.32.attn_output.weight iq3_s +blk.32.attn_q.weight iq2_s +blk.32.attn_v.weight iq3_s +blk.33.ffn_down.weight q3_K +blk.33.attn_k.weight iq2_s +blk.33.attn_output.weight iq3_s +blk.33.attn_q.weight iq2_s +blk.33.attn_v.weight iq3_s + +[IQ1_S] iq1_s +token_embd.weight q5_K +blk.0.ffn_down.weight q2_K +blk.0.attn_output.weight iq2_xxs +blk.0.attn_v.weight q2_K +blk.1.ffn_down.weight q2_K +blk.1.attn_output.weight iq2_xxs +blk.1.attn_v.weight q2_K +blk.10.ffn_down.weight q2_K +blk.10.attn_output.weight iq2_xxs +blk.10.attn_v.weight q2_K +blk.11.ffn_down.weight q2_K +blk.11.attn_output.weight iq2_xxs +blk.11.attn_v.weight q2_K +blk.12.attn_output.weight iq2_xxs +blk.12.attn_v.weight q2_K +blk.13.attn_output.weight iq2_xxs +blk.13.attn_v.weight q2_K +blk.14.attn_output.weight iq2_xxs +blk.14.attn_v.weight q2_K +blk.2.attn_output.weight iq2_xxs +blk.2.attn_v.weight q2_K +blk.3.attn_output.weight iq2_xxs +blk.3.attn_v.weight q2_K +blk.4.attn_output.weight iq2_xxs +blk.4.attn_v.weight q2_K +blk.5.attn_output.weight iq2_xxs +blk.5.attn_v.weight q2_K +blk.6.attn_output.weight iq2_xxs +blk.6.attn_v.weight q2_K +blk.7.attn_output.weight iq2_xxs +blk.7.attn_v.weight q2_K +blk.8.attn_output.weight iq2_xxs +blk.8.attn_v.weight q2_K +blk.9.attn_output.weight iq2_xxs +blk.9.attn_v.weight q2_K +blk.15.attn_output.weight iq2_xxs +blk.15.attn_v.weight q2_K +blk.16.attn_output.weight iq2_xxs +blk.16.attn_v.weight q2_K +blk.17.attn_output.weight iq2_xxs +blk.17.attn_v.weight q2_K +blk.18.attn_output.weight iq2_xxs +blk.18.attn_v.weight q2_K +blk.19.attn_output.weight iq2_xxs +blk.19.attn_v.weight q2_K +blk.20.attn_output.weight iq2_xxs +blk.20.attn_v.weight q2_K +blk.21.attn_output.weight iq2_xxs +blk.21.attn_v.weight q2_K +blk.22.attn_output.weight iq2_xxs +blk.22.attn_v.weight q2_K +blk.23.attn_output.weight iq2_xxs +blk.23.attn_v.weight q2_K +blk.24.attn_output.weight iq2_xxs +blk.24.attn_v.weight q2_K +blk.25.attn_output.weight iq2_xxs +blk.25.attn_v.weight q2_K +blk.26.attn_output.weight iq2_xxs +blk.26.attn_v.weight q2_K +blk.27.attn_output.weight iq2_xxs +blk.27.attn_v.weight q2_K +blk.28.attn_output.weight iq2_xxs +blk.28.attn_v.weight q2_K +blk.29.attn_output.weight iq2_xxs +blk.29.attn_v.weight q2_K +blk.30.attn_output.weight iq2_xxs +blk.30.attn_v.weight q2_K +blk.31.attn_output.weight iq2_xxs +blk.31.attn_v.weight q2_K +blk.32.attn_output.weight iq2_xxs +blk.32.attn_v.weight q2_K +blk.33.attn_output.weight iq2_xxs +blk.33.attn_v.weight q2_K + +[IQ4_NL] iq4_nl +token_embd.weight q6_K +blk.0.ffn_down.weight q5_K +blk.1.ffn_down.weight q5_K +blk.10.ffn_down.weight q5_K +blk.11.ffn_down.weight q5_K + +[IQ3_S] iq3_s +token_embd.weight q6_K + +[IQ3_M] iq3_s +token_embd.weight q6_K +blk.0.ffn_down.weight q4_K +blk.0.attn_output.weight q4_K +blk.0.attn_v.weight q4_K +blk.1.ffn_down.weight q4_K +blk.1.attn_output.weight q4_K +blk.1.attn_v.weight q4_K +blk.10.ffn_down.weight q4_K +blk.10.attn_output.weight q4_K +blk.10.attn_v.weight q4_K +blk.11.ffn_down.weight q4_K +blk.11.attn_output.weight q4_K +blk.11.attn_v.weight q4_K +blk.12.attn_output.weight q4_K +blk.12.attn_v.weight q4_K +blk.13.attn_output.weight q4_K +blk.13.attn_v.weight q4_K +blk.14.attn_output.weight q4_K +blk.14.attn_v.weight q4_K +blk.2.attn_output.weight q4_K +blk.2.attn_v.weight q4_K +blk.3.attn_output.weight q4_K +blk.3.attn_v.weight q4_K +blk.4.attn_output.weight q4_K +blk.4.attn_v.weight q4_K +blk.5.attn_output.weight q4_K +blk.5.attn_v.weight q4_K +blk.6.attn_output.weight q4_K +blk.6.attn_v.weight q4_K +blk.7.attn_output.weight q4_K +blk.7.attn_v.weight q4_K +blk.8.attn_output.weight q4_K +blk.8.attn_v.weight q4_K +blk.9.attn_output.weight q4_K +blk.9.attn_v.weight q4_K +blk.15.attn_output.weight q4_K +blk.15.attn_v.weight q4_K +blk.16.attn_output.weight q4_K +blk.16.attn_v.weight q4_K +blk.17.attn_output.weight q4_K +blk.17.attn_v.weight q4_K +blk.18.attn_output.weight q4_K +blk.18.attn_v.weight q4_K +blk.19.attn_output.weight q4_K +blk.19.attn_v.weight q4_K +blk.20.attn_output.weight q4_K +blk.20.attn_v.weight q4_K +blk.21.attn_output.weight q4_K +blk.21.attn_v.weight q4_K +blk.22.attn_output.weight q4_K +blk.22.attn_v.weight q4_K +blk.23.attn_output.weight q4_K +blk.23.attn_v.weight q4_K +blk.24.attn_output.weight q4_K +blk.24.attn_v.weight q4_K +blk.25.attn_output.weight q4_K +blk.25.attn_v.weight q4_K +blk.26.attn_output.weight q4_K +blk.26.attn_v.weight q4_K +blk.27.attn_output.weight q4_K +blk.27.attn_v.weight q4_K +blk.28.attn_output.weight q4_K +blk.28.attn_v.weight q4_K +blk.29.attn_output.weight q4_K +blk.29.attn_v.weight q4_K +blk.30.attn_output.weight q4_K +blk.30.attn_v.weight q4_K +blk.31.attn_output.weight q4_K +blk.31.attn_v.weight q4_K +blk.32.attn_output.weight q4_K +blk.32.attn_v.weight q4_K +blk.33.attn_output.weight q4_K +blk.33.attn_v.weight q4_K + +[IQ2_S] iq2_xs +token_embd.weight q5_K +blk.0.ffn_down.weight iq3_s +blk.0.attn_output.weight iq3_s +blk.0.attn_v.weight iq3_s +blk.1.ffn_down.weight iq3_s +blk.1.attn_output.weight iq3_s +blk.1.attn_v.weight iq3_s +blk.10.ffn_down.weight iq3_s +blk.10.attn_output.weight iq3_s +blk.10.attn_v.weight iq3_s +blk.11.ffn_down.weight iq3_s +blk.11.attn_output.weight iq3_s +blk.11.attn_v.weight iq3_s +blk.12.attn_output.weight iq3_s +blk.12.attn_v.weight iq3_s +blk.13.attn_output.weight iq3_s +blk.13.attn_v.weight iq3_s +blk.14.attn_output.weight iq3_s +blk.14.attn_v.weight iq3_s +blk.2.attn_output.weight iq3_s +blk.2.attn_v.weight iq3_s +blk.3.attn_output.weight iq3_s +blk.3.attn_v.weight iq3_s +blk.4.attn_output.weight iq3_s +blk.4.attn_v.weight iq3_s +blk.5.attn_output.weight iq3_s +blk.5.attn_v.weight iq3_s +blk.6.attn_output.weight iq3_s +blk.6.attn_v.weight iq3_s +blk.7.attn_output.weight iq3_s +blk.7.attn_v.weight iq3_s +blk.8.attn_output.weight iq3_s +blk.8.attn_v.weight iq3_s +blk.9.attn_output.weight iq3_s +blk.9.attn_v.weight iq3_s +blk.15.attn_output.weight iq3_s +blk.15.attn_v.weight iq3_s +blk.16.attn_output.weight iq3_s +blk.16.attn_v.weight iq3_s +blk.17.attn_output.weight iq3_s +blk.17.attn_v.weight iq3_s +blk.18.attn_output.weight iq3_s +blk.18.attn_v.weight iq3_s +blk.19.attn_output.weight iq3_s +blk.19.attn_v.weight iq3_s +blk.20.attn_output.weight iq3_s +blk.20.attn_v.weight iq3_s +blk.21.attn_output.weight iq3_s +blk.21.attn_v.weight iq3_s +blk.22.attn_output.weight iq3_s +blk.22.attn_v.weight iq3_s +blk.23.attn_output.weight iq3_s +blk.23.attn_v.weight iq3_s +blk.24.attn_output.weight iq3_s +blk.24.attn_v.weight iq3_s +blk.25.attn_output.weight iq3_s +blk.25.attn_v.weight iq3_s +blk.26.attn_output.weight iq3_s +blk.26.attn_v.weight iq3_s +blk.27.attn_output.weight iq3_s +blk.27.attn_v.weight iq3_s +blk.28.attn_output.weight iq3_s +blk.28.attn_v.weight iq3_s +blk.29.attn_output.weight iq3_s +blk.29.attn_v.weight iq3_s +blk.30.attn_output.weight iq3_s +blk.30.attn_v.weight iq3_s +blk.31.attn_output.weight iq3_s +blk.31.attn_v.weight iq3_s +blk.32.attn_output.weight iq3_s +blk.32.attn_v.weight iq3_s +blk.33.attn_output.weight iq3_s +blk.33.attn_v.weight iq3_s + +[IQ2_M] iq2_s +token_embd.weight q5_K +blk.0.ffn_down.weight iq3_s +blk.0.attn_output.weight iq3_s +blk.0.attn_v.weight iq3_s +blk.1.ffn_down.weight iq3_s +blk.1.attn_output.weight iq3_s +blk.1.attn_v.weight iq3_s +blk.10.ffn_down.weight iq3_s +blk.10.attn_output.weight iq3_s +blk.10.attn_v.weight iq3_s +blk.11.ffn_down.weight iq3_s +blk.11.attn_output.weight iq3_s +blk.11.attn_v.weight iq3_s +blk.12.attn_output.weight iq3_s +blk.12.attn_v.weight iq3_s +blk.13.attn_output.weight iq3_s +blk.13.attn_v.weight iq3_s +blk.14.attn_output.weight iq3_s +blk.14.attn_v.weight iq3_s +blk.2.attn_output.weight iq3_s +blk.2.attn_v.weight iq3_s +blk.3.attn_output.weight iq3_s +blk.3.attn_v.weight iq3_s +blk.4.attn_output.weight iq3_s +blk.4.attn_v.weight iq3_s +blk.5.attn_output.weight iq3_s +blk.5.attn_v.weight iq3_s +blk.6.attn_output.weight iq3_s +blk.6.attn_v.weight iq3_s +blk.7.attn_output.weight iq3_s +blk.7.attn_v.weight iq3_s +blk.8.attn_output.weight iq3_s +blk.8.attn_v.weight iq3_s +blk.9.attn_output.weight iq3_s +blk.9.attn_v.weight iq3_s +blk.15.attn_output.weight iq3_s +blk.15.attn_v.weight iq3_s +blk.16.attn_output.weight iq3_s +blk.16.attn_v.weight iq3_s +blk.17.attn_output.weight iq3_s +blk.17.attn_v.weight iq3_s +blk.18.attn_output.weight iq3_s +blk.18.attn_v.weight iq3_s +blk.19.attn_output.weight iq3_s +blk.19.attn_v.weight iq3_s +blk.20.attn_output.weight iq3_s +blk.20.attn_v.weight iq3_s +blk.21.attn_output.weight iq3_s +blk.21.attn_v.weight iq3_s +blk.22.attn_output.weight iq3_s +blk.22.attn_v.weight iq3_s +blk.23.attn_output.weight iq3_s +blk.23.attn_v.weight iq3_s +blk.24.attn_output.weight iq3_s +blk.24.attn_v.weight iq3_s +blk.25.attn_output.weight iq3_s +blk.25.attn_v.weight iq3_s +blk.26.attn_output.weight iq3_s +blk.26.attn_v.weight iq3_s +blk.27.attn_output.weight iq3_s +blk.27.attn_v.weight iq3_s +blk.28.attn_output.weight iq3_s +blk.28.attn_v.weight iq3_s +blk.29.attn_output.weight iq3_s +blk.29.attn_v.weight iq3_s +blk.30.attn_output.weight iq3_s +blk.30.attn_v.weight iq3_s +blk.31.attn_output.weight iq3_s +blk.31.attn_v.weight iq3_s +blk.32.attn_output.weight iq3_s +blk.32.attn_v.weight iq3_s +blk.33.attn_output.weight iq3_s +blk.33.attn_v.weight iq3_s + +[IQ4_XS] iq4_xs +token_embd.weight q6_K +blk.0.ffn_down.weight q5_K +blk.1.ffn_down.weight q5_K +blk.10.ffn_down.weight q5_K +blk.11.ffn_down.weight q5_K + +[IQ1_M] iq1_m +token_embd.weight q5_K +blk.0.ffn_down.weight q2_K +blk.0.attn_output.weight iq2_xxs +blk.0.attn_v.weight q2_K +blk.1.ffn_down.weight q2_K +blk.1.attn_output.weight iq2_xxs +blk.1.attn_v.weight q2_K +blk.10.ffn_down.weight q2_K +blk.10.attn_output.weight iq2_xxs +blk.10.attn_v.weight q2_K +blk.11.ffn_down.weight q2_K +blk.11.attn_output.weight iq2_xxs +blk.11.attn_v.weight q2_K +blk.12.attn_output.weight iq2_xxs +blk.12.attn_v.weight q2_K +blk.13.attn_output.weight iq2_xxs +blk.13.attn_v.weight q2_K +blk.14.attn_output.weight iq2_xxs +blk.14.attn_v.weight q2_K +blk.2.attn_output.weight iq2_xxs +blk.2.attn_v.weight q2_K +blk.3.attn_output.weight iq2_xxs +blk.3.attn_v.weight q2_K +blk.4.attn_output.weight iq2_xxs +blk.4.attn_v.weight q2_K +blk.5.attn_output.weight iq2_xxs +blk.5.attn_v.weight q2_K +blk.6.attn_output.weight iq2_xxs +blk.6.attn_v.weight q2_K +blk.7.attn_output.weight iq2_xxs +blk.7.attn_v.weight q2_K +blk.8.attn_output.weight iq2_xxs +blk.8.attn_v.weight q2_K +blk.9.attn_output.weight iq2_xxs +blk.9.attn_v.weight q2_K +blk.15.attn_output.weight iq2_xxs +blk.15.attn_v.weight q2_K +blk.16.attn_output.weight iq2_xxs +blk.16.attn_v.weight q2_K +blk.17.attn_output.weight iq2_xxs +blk.17.attn_v.weight q2_K +blk.18.attn_output.weight iq2_xxs +blk.18.attn_v.weight q2_K +blk.19.attn_output.weight iq2_xxs +blk.19.attn_v.weight q2_K +blk.20.attn_output.weight iq2_xxs +blk.20.attn_v.weight q2_K +blk.21.attn_output.weight iq2_xxs +blk.21.attn_v.weight q2_K +blk.22.attn_output.weight iq2_xxs +blk.22.attn_v.weight q2_K +blk.23.attn_output.weight iq2_xxs +blk.23.attn_v.weight q2_K +blk.24.attn_output.weight iq2_xxs +blk.24.attn_v.weight q2_K +blk.25.attn_output.weight iq2_xxs +blk.25.attn_v.weight q2_K +blk.26.attn_output.weight iq2_xxs +blk.26.attn_v.weight q2_K +blk.27.attn_output.weight iq2_xxs +blk.27.attn_v.weight q2_K +blk.28.attn_output.weight iq2_xxs +blk.28.attn_v.weight q2_K +blk.29.attn_output.weight iq2_xxs +blk.29.attn_v.weight q2_K +blk.30.attn_output.weight iq2_xxs +blk.30.attn_v.weight q2_K +blk.31.attn_output.weight iq2_xxs +blk.31.attn_v.weight q2_K +blk.32.attn_output.weight iq2_xxs +blk.32.attn_v.weight q2_K +blk.33.attn_output.weight iq2_xxs +blk.33.attn_v.weight q2_K + +[BF16] bf16 +token_embd.weight q6_K + +[TQ1_0] tq1_0 +token_embd.weight q6_K + +[TQ2_0] tq2_0 +token_embd.weight q6_K + +[MXFP4_MOE] mxfp4 +token_embd.weight q8_0 +blk.0.ffn_down.weight q8_0 +blk.0.ffn_gate.weight q8_0 +blk.0.ffn_up.weight q8_0 +blk.0.attn_k.weight q8_0 +blk.0.attn_output.weight q8_0 +blk.0.attn_q.weight q8_0 +blk.0.attn_v.weight q8_0 +blk.1.ffn_down.weight q8_0 +blk.1.ffn_gate.weight q8_0 +blk.1.ffn_up.weight q8_0 +blk.1.attn_k.weight q8_0 +blk.1.attn_output.weight q8_0 +blk.1.attn_q.weight q8_0 +blk.1.attn_v.weight q8_0 +blk.10.ffn_down.weight q8_0 +blk.10.ffn_gate.weight q8_0 +blk.10.ffn_up.weight q8_0 +blk.10.attn_k.weight q8_0 +blk.10.attn_output.weight q8_0 +blk.10.attn_q.weight q8_0 +blk.10.attn_v.weight q8_0 +blk.11.ffn_down.weight q8_0 +blk.11.ffn_gate.weight q8_0 +blk.11.ffn_up.weight q8_0 +blk.11.attn_k.weight q8_0 +blk.11.attn_output.weight q8_0 +blk.11.attn_q.weight q8_0 +blk.11.attn_v.weight q8_0 +blk.12.ffn_down.weight q8_0 +blk.12.ffn_gate.weight q8_0 +blk.12.ffn_up.weight q8_0 +blk.12.attn_k.weight q8_0 +blk.12.attn_output.weight q8_0 +blk.12.attn_q.weight q8_0 +blk.12.attn_v.weight q8_0 +blk.13.ffn_down.weight q8_0 +blk.13.ffn_gate.weight q8_0 +blk.13.ffn_up.weight q8_0 +blk.13.attn_k.weight q8_0 +blk.13.attn_output.weight q8_0 +blk.13.attn_q.weight q8_0 +blk.13.attn_v.weight q8_0 +blk.14.ffn_gate.weight q8_0 +blk.14.ffn_up.weight q8_0 +blk.14.attn_k.weight q8_0 +blk.14.attn_output.weight q8_0 +blk.14.attn_q.weight q8_0 +blk.14.attn_v.weight q8_0 +blk.2.ffn_down.weight q8_0 +blk.2.ffn_gate.weight q8_0 +blk.2.ffn_up.weight q8_0 +blk.2.attn_k.weight q8_0 +blk.2.attn_output.weight q8_0 +blk.2.attn_q.weight q8_0 +blk.2.attn_v.weight q8_0 +blk.3.ffn_down.weight q8_0 +blk.3.ffn_gate.weight q8_0 +blk.3.ffn_up.weight q8_0 +blk.3.attn_k.weight q8_0 +blk.3.attn_output.weight q8_0 +blk.3.attn_q.weight q8_0 +blk.3.attn_v.weight q8_0 +blk.4.ffn_down.weight q8_0 +blk.4.ffn_gate.weight q8_0 +blk.4.ffn_up.weight q8_0 +blk.4.attn_k.weight q8_0 +blk.4.attn_output.weight q8_0 +blk.4.attn_q.weight q8_0 +blk.4.attn_v.weight q8_0 +blk.5.ffn_down.weight q8_0 +blk.5.ffn_gate.weight q8_0 +blk.5.ffn_up.weight q8_0 +blk.5.attn_k.weight q8_0 +blk.5.attn_output.weight q8_0 +blk.5.attn_q.weight q8_0 +blk.5.attn_v.weight q8_0 +blk.6.ffn_down.weight q8_0 +blk.6.ffn_gate.weight q8_0 +blk.6.ffn_up.weight q8_0 +blk.6.attn_k.weight q8_0 +blk.6.attn_output.weight q8_0 +blk.6.attn_q.weight q8_0 +blk.6.attn_v.weight q8_0 +blk.7.ffn_down.weight q8_0 +blk.7.ffn_gate.weight q8_0 +blk.7.ffn_up.weight q8_0 +blk.7.attn_k.weight q8_0 +blk.7.attn_output.weight q8_0 +blk.7.attn_q.weight q8_0 +blk.7.attn_v.weight q8_0 +blk.8.ffn_down.weight q8_0 +blk.8.ffn_gate.weight q8_0 +blk.8.ffn_up.weight q8_0 +blk.8.attn_k.weight q8_0 +blk.8.attn_output.weight q8_0 +blk.8.attn_q.weight q8_0 +blk.8.attn_v.weight q8_0 +blk.9.ffn_down.weight q8_0 +blk.9.ffn_gate.weight q8_0 +blk.9.ffn_up.weight q8_0 +blk.9.attn_k.weight q8_0 +blk.9.attn_output.weight q8_0 +blk.9.attn_q.weight q8_0 +blk.9.attn_v.weight q8_0 +blk.14.ffn_down.weight q8_0 +blk.15.ffn_down.weight q8_0 +blk.15.ffn_gate.weight q8_0 +blk.15.ffn_up.weight q8_0 +blk.15.attn_k.weight q8_0 +blk.15.attn_output.weight q8_0 +blk.15.attn_q.weight q8_0 +blk.15.attn_v.weight q8_0 +blk.16.ffn_down.weight q8_0 +blk.16.ffn_gate.weight q8_0 +blk.16.ffn_up.weight q8_0 +blk.16.attn_k.weight q8_0 +blk.16.attn_output.weight q8_0 +blk.16.attn_q.weight q8_0 +blk.16.attn_v.weight q8_0 +blk.17.ffn_down.weight q8_0 +blk.17.ffn_gate.weight q8_0 +blk.17.ffn_up.weight q8_0 +blk.17.attn_k.weight q8_0 +blk.17.attn_output.weight q8_0 +blk.17.attn_q.weight q8_0 +blk.17.attn_v.weight q8_0 +blk.18.ffn_down.weight q8_0 +blk.18.ffn_gate.weight q8_0 +blk.18.ffn_up.weight q8_0 +blk.18.attn_k.weight q8_0 +blk.18.attn_output.weight q8_0 +blk.18.attn_q.weight q8_0 +blk.18.attn_v.weight q8_0 +blk.19.ffn_down.weight q8_0 +blk.19.ffn_gate.weight q8_0 +blk.19.ffn_up.weight q8_0 +blk.19.attn_k.weight q8_0 +blk.19.attn_output.weight q8_0 +blk.19.attn_q.weight q8_0 +blk.19.attn_v.weight q8_0 +blk.20.ffn_down.weight q8_0 +blk.20.ffn_gate.weight q8_0 +blk.20.ffn_up.weight q8_0 +blk.20.attn_k.weight q8_0 +blk.20.attn_output.weight q8_0 +blk.20.attn_q.weight q8_0 +blk.20.attn_v.weight q8_0 +blk.21.ffn_down.weight q8_0 +blk.21.ffn_gate.weight q8_0 +blk.21.ffn_up.weight q8_0 +blk.21.attn_k.weight q8_0 +blk.21.attn_output.weight q8_0 +blk.21.attn_q.weight q8_0 +blk.21.attn_v.weight q8_0 +blk.22.ffn_down.weight q8_0 +blk.22.ffn_gate.weight q8_0 +blk.22.ffn_up.weight q8_0 +blk.22.attn_k.weight q8_0 +blk.22.attn_output.weight q8_0 +blk.22.attn_q.weight q8_0 +blk.22.attn_v.weight q8_0 +blk.23.ffn_down.weight q8_0 +blk.23.ffn_gate.weight q8_0 +blk.23.ffn_up.weight q8_0 +blk.23.attn_k.weight q8_0 +blk.23.attn_output.weight q8_0 +blk.23.attn_q.weight q8_0 +blk.23.attn_v.weight q8_0 +blk.24.ffn_down.weight q8_0 +blk.24.ffn_gate.weight q8_0 +blk.24.ffn_up.weight q8_0 +blk.24.attn_k.weight q8_0 +blk.24.attn_output.weight q8_0 +blk.24.attn_q.weight q8_0 +blk.24.attn_v.weight q8_0 +blk.25.ffn_down.weight q8_0 +blk.25.ffn_gate.weight q8_0 +blk.25.ffn_up.weight q8_0 +blk.25.attn_k.weight q8_0 +blk.25.attn_output.weight q8_0 +blk.25.attn_q.weight q8_0 +blk.25.attn_v.weight q8_0 +blk.26.ffn_down.weight q8_0 +blk.26.ffn_gate.weight q8_0 +blk.26.ffn_up.weight q8_0 +blk.26.attn_k.weight q8_0 +blk.26.attn_output.weight q8_0 +blk.26.attn_q.weight q8_0 +blk.26.attn_v.weight q8_0 +blk.27.ffn_down.weight q8_0 +blk.27.ffn_gate.weight q8_0 +blk.27.ffn_up.weight q8_0 +blk.27.attn_k.weight q8_0 +blk.27.attn_output.weight q8_0 +blk.27.attn_q.weight q8_0 +blk.27.attn_v.weight q8_0 +blk.28.ffn_down.weight q8_0 +blk.28.ffn_gate.weight q8_0 +blk.28.ffn_up.weight q8_0 +blk.28.attn_k.weight q8_0 +blk.28.attn_output.weight q8_0 +blk.28.attn_q.weight q8_0 +blk.28.attn_v.weight q8_0 +blk.29.ffn_down.weight q8_0 +blk.29.ffn_gate.weight q8_0 +blk.29.ffn_up.weight q8_0 +blk.29.attn_k.weight q8_0 +blk.29.attn_output.weight q8_0 +blk.29.attn_q.weight q8_0 +blk.29.attn_v.weight q8_0 +blk.30.ffn_down.weight q8_0 +blk.30.ffn_gate.weight q8_0 +blk.30.ffn_up.weight q8_0 +blk.30.attn_k.weight q8_0 +blk.30.attn_output.weight q8_0 +blk.30.attn_q.weight q8_0 +blk.30.attn_v.weight q8_0 +blk.31.ffn_down.weight q8_0 +blk.31.ffn_gate.weight q8_0 +blk.31.ffn_up.weight q8_0 +blk.31.attn_k.weight q8_0 +blk.31.attn_output.weight q8_0 +blk.31.attn_q.weight q8_0 +blk.31.attn_v.weight q8_0 +blk.32.ffn_down.weight q8_0 +blk.32.ffn_gate.weight q8_0 +blk.32.ffn_up.weight q8_0 +blk.32.attn_k.weight q8_0 +blk.32.attn_output.weight q8_0 +blk.32.attn_q.weight q8_0 +blk.32.attn_v.weight q8_0 +blk.33.ffn_down.weight q8_0 +blk.33.ffn_gate.weight q8_0 +blk.33.ffn_up.weight q8_0 +blk.33.attn_k.weight q8_0 +blk.33.attn_output.weight q8_0 +blk.33.attn_q.weight q8_0 +blk.33.attn_v.weight q8_0 diff --git a/tests/snapshots/gpt-oss-120b.schema b/tests/snapshots/gpt-oss-120b.schema new file mode 100644 index 0000000000..e66fbcfd2b --- /dev/null +++ b/tests/snapshots/gpt-oss-120b.schema @@ -0,0 +1,1616 @@ +# Model: gpt-oss-120b +# n_embd=2880, n_ff=2880, n_vocab=0, n_layer=36, n_head=64, n_head_kv=8, n_expert=128 + +[F32] f32 +output.weight q6_K + +[F16] f16 +output.weight q6_K + +[Q4_0] q4_0 +output.weight q6_K + +[Q4_1] q4_1 +output.weight q6_K + +[Q8_0] q8_0 + +[Q5_0] q5_0 +output.weight q6_K + +[Q5_1] q5_1 +output.weight q6_K + +[Q2_K] q2_K +output.weight q8_0 +blk.0.attn_output.weight q3_K +blk.0.attn_v.weight q4_K +blk.0.ffn_down_exps.weight q3_K +blk.1.attn_output.weight q3_K +blk.1.attn_v.weight q4_K +blk.1.ffn_down_exps.weight q3_K +blk.2.attn_output.weight q3_K +blk.2.attn_v.weight q4_K +blk.2.ffn_down_exps.weight q3_K +blk.3.attn_output.weight q3_K +blk.3.attn_v.weight q4_K +blk.3.ffn_down_exps.weight q3_K +blk.4.attn_output.weight q3_K +blk.4.attn_v.weight q4_K +blk.4.ffn_down_exps.weight q3_K +blk.5.attn_output.weight q3_K +blk.5.attn_v.weight q4_K +blk.5.ffn_down_exps.weight q3_K +blk.6.attn_output.weight q3_K +blk.6.attn_v.weight q4_K +blk.6.ffn_down_exps.weight q3_K +blk.7.attn_output.weight q3_K +blk.7.attn_v.weight q4_K +blk.7.ffn_down_exps.weight q3_K +blk.8.attn_output.weight q3_K +blk.8.attn_v.weight q4_K +blk.8.ffn_down_exps.weight q3_K +blk.9.attn_output.weight q3_K +blk.9.attn_v.weight q4_K +blk.9.ffn_down_exps.weight q3_K +blk.10.attn_output.weight q3_K +blk.10.attn_v.weight q4_K +blk.10.ffn_down_exps.weight q3_K +blk.11.attn_output.weight q3_K +blk.11.attn_v.weight q4_K +blk.11.ffn_down_exps.weight q3_K +blk.12.attn_output.weight q3_K +blk.12.attn_v.weight q4_K +blk.12.ffn_down_exps.weight q3_K +blk.13.attn_output.weight q3_K +blk.13.attn_v.weight q4_K +blk.13.ffn_down_exps.weight q3_K +blk.14.attn_output.weight q3_K +blk.14.attn_v.weight q4_K +blk.14.ffn_down_exps.weight q3_K +blk.15.attn_output.weight q3_K +blk.15.attn_v.weight q4_K +blk.15.ffn_down_exps.weight q3_K +blk.16.attn_output.weight q3_K +blk.16.attn_v.weight q4_K +blk.16.ffn_down_exps.weight q3_K +blk.17.attn_output.weight q3_K +blk.17.attn_v.weight q4_K +blk.17.ffn_down_exps.weight q3_K +blk.18.attn_output.weight q3_K +blk.18.attn_v.weight q4_K +blk.18.ffn_down_exps.weight q3_K +blk.19.attn_output.weight q3_K +blk.19.attn_v.weight q4_K +blk.19.ffn_down_exps.weight q3_K +blk.20.attn_output.weight q3_K +blk.20.attn_v.weight q4_K +blk.20.ffn_down_exps.weight q3_K +blk.21.attn_output.weight q3_K +blk.21.attn_v.weight q4_K +blk.21.ffn_down_exps.weight q3_K +blk.22.attn_output.weight q3_K +blk.22.attn_v.weight q4_K +blk.22.ffn_down_exps.weight q3_K +blk.23.attn_output.weight q3_K +blk.23.attn_v.weight q4_K +blk.23.ffn_down_exps.weight q3_K +blk.24.attn_output.weight q3_K +blk.24.attn_v.weight q4_K +blk.24.ffn_down_exps.weight q3_K +blk.25.attn_output.weight q3_K +blk.25.attn_v.weight q4_K +blk.25.ffn_down_exps.weight q3_K +blk.26.attn_output.weight q3_K +blk.26.attn_v.weight q4_K +blk.26.ffn_down_exps.weight q3_K +blk.27.attn_output.weight q3_K +blk.27.attn_v.weight q4_K +blk.27.ffn_down_exps.weight q3_K +blk.28.attn_output.weight q3_K +blk.28.attn_v.weight q4_K +blk.28.ffn_down_exps.weight q3_K +blk.29.attn_output.weight q3_K +blk.29.attn_v.weight q4_K +blk.29.ffn_down_exps.weight q3_K +blk.30.attn_output.weight q3_K +blk.30.attn_v.weight q4_K +blk.30.ffn_down_exps.weight q3_K +blk.31.attn_output.weight q3_K +blk.31.attn_v.weight q4_K +blk.31.ffn_down_exps.weight q3_K +blk.32.attn_output.weight q3_K +blk.32.attn_v.weight q4_K +blk.32.ffn_down_exps.weight q3_K +blk.33.attn_output.weight q3_K +blk.33.attn_v.weight q4_K +blk.33.ffn_down_exps.weight q3_K +blk.34.attn_output.weight q3_K +blk.34.attn_v.weight q4_K +blk.34.ffn_down_exps.weight q3_K +blk.35.attn_output.weight q3_K +blk.35.attn_v.weight q4_K +blk.35.ffn_down_exps.weight q3_K + +[Q3_K_S] q3_K +output.weight q8_0 + +[Q3_K_M] q3_K +output.weight q8_0 +blk.0.attn_output.weight q4_K +blk.0.attn_v.weight q5_K +blk.0.ffn_down_exps.weight q5_K +blk.1.attn_output.weight q4_K +blk.1.attn_v.weight q5_K +blk.1.ffn_down_exps.weight q5_K +blk.2.attn_output.weight q4_K +blk.2.attn_v.weight q4_K +blk.2.ffn_down_exps.weight q4_K +blk.3.attn_output.weight q4_K +blk.3.attn_v.weight q4_K +blk.3.ffn_down_exps.weight q4_K +blk.4.attn_output.weight q4_K +blk.4.attn_v.weight q4_K +blk.4.ffn_down_exps.weight q4_K +blk.5.attn_output.weight q4_K +blk.5.attn_v.weight q4_K +blk.5.ffn_down_exps.weight q4_K +blk.6.attn_output.weight q4_K +blk.6.attn_v.weight q4_K +blk.6.ffn_down_exps.weight q4_K +blk.7.attn_output.weight q4_K +blk.7.attn_v.weight q4_K +blk.7.ffn_down_exps.weight q4_K +blk.8.attn_output.weight q4_K +blk.8.attn_v.weight q4_K +blk.8.ffn_down_exps.weight q4_K +blk.9.attn_output.weight q4_K +blk.9.attn_v.weight q4_K +blk.9.ffn_down_exps.weight q4_K +blk.10.attn_output.weight q4_K +blk.10.attn_v.weight q4_K +blk.10.ffn_down_exps.weight q4_K +blk.11.attn_output.weight q4_K +blk.11.attn_v.weight q4_K +blk.11.ffn_down_exps.weight q4_K +blk.12.attn_output.weight q4_K +blk.12.attn_v.weight q4_K +blk.12.ffn_down_exps.weight q4_K +blk.13.attn_output.weight q4_K +blk.13.attn_v.weight q4_K +blk.13.ffn_down_exps.weight q4_K +blk.14.attn_output.weight q4_K +blk.14.attn_v.weight q4_K +blk.14.ffn_down_exps.weight q4_K +blk.15.attn_output.weight q4_K +blk.15.attn_v.weight q4_K +blk.15.ffn_down_exps.weight q4_K +blk.16.attn_output.weight q4_K +blk.16.attn_v.weight q4_K +blk.16.ffn_down_exps.weight q4_K +blk.17.attn_output.weight q4_K +blk.17.attn_v.weight q4_K +blk.17.ffn_down_exps.weight q4_K +blk.18.attn_output.weight q4_K +blk.18.attn_v.weight q4_K +blk.18.ffn_down_exps.weight q4_K +blk.19.attn_output.weight q4_K +blk.19.attn_v.weight q4_K +blk.19.ffn_down_exps.weight q4_K +blk.20.attn_output.weight q4_K +blk.20.attn_v.weight q4_K +blk.20.ffn_down_exps.weight q4_K +blk.21.attn_output.weight q4_K +blk.21.attn_v.weight q4_K +blk.21.ffn_down_exps.weight q4_K +blk.22.attn_output.weight q4_K +blk.22.attn_v.weight q4_K +blk.22.ffn_down_exps.weight q4_K +blk.23.attn_output.weight q4_K +blk.23.attn_v.weight q4_K +blk.23.ffn_down_exps.weight q4_K +blk.24.attn_output.weight q4_K +blk.24.attn_v.weight q4_K +blk.24.ffn_down_exps.weight q4_K +blk.25.attn_output.weight q4_K +blk.25.attn_v.weight q4_K +blk.25.ffn_down_exps.weight q4_K +blk.26.attn_output.weight q4_K +blk.26.attn_v.weight q4_K +blk.26.ffn_down_exps.weight q4_K +blk.27.attn_output.weight q4_K +blk.27.attn_v.weight q4_K +blk.27.ffn_down_exps.weight q4_K +blk.28.attn_output.weight q4_K +blk.28.attn_v.weight q4_K +blk.28.ffn_down_exps.weight q4_K +blk.29.attn_output.weight q4_K +blk.29.attn_v.weight q4_K +blk.29.ffn_down_exps.weight q4_K +blk.30.attn_output.weight q4_K +blk.30.attn_v.weight q4_K +blk.30.ffn_down_exps.weight q4_K +blk.31.attn_output.weight q4_K +blk.31.attn_v.weight q4_K +blk.31.ffn_down_exps.weight q4_K +blk.32.attn_output.weight q4_K +blk.32.attn_v.weight q4_K +blk.32.ffn_down_exps.weight q4_K +blk.33.attn_output.weight q4_K +blk.33.attn_v.weight q4_K +blk.33.ffn_down_exps.weight q4_K +blk.34.attn_output.weight q4_K +blk.34.attn_v.weight q4_K +blk.34.ffn_down_exps.weight q4_K +blk.35.attn_output.weight q4_K +blk.35.attn_v.weight q4_K +blk.35.ffn_down_exps.weight q4_K + +[Q3_K_L] q3_K +output.weight q8_0 +blk.0.attn_output.weight q5_K +blk.0.attn_v.weight q5_K +blk.0.ffn_down_exps.weight q5_K +blk.1.attn_output.weight q5_K +blk.1.attn_v.weight q5_K +blk.1.ffn_down_exps.weight q5_K +blk.2.attn_output.weight q5_K +blk.2.attn_v.weight q5_K +blk.2.ffn_down_exps.weight q5_K +blk.3.attn_output.weight q5_K +blk.3.attn_v.weight q5_K +blk.3.ffn_down_exps.weight q5_K +blk.4.attn_output.weight q5_K +blk.4.attn_v.weight q5_K +blk.4.ffn_down_exps.weight q5_K +blk.5.attn_output.weight q5_K +blk.5.attn_v.weight q5_K +blk.5.ffn_down_exps.weight q5_K +blk.6.attn_output.weight q5_K +blk.6.attn_v.weight q5_K +blk.6.ffn_down_exps.weight q5_K +blk.7.attn_output.weight q5_K +blk.7.attn_v.weight q5_K +blk.7.ffn_down_exps.weight q5_K +blk.8.attn_output.weight q5_K +blk.8.attn_v.weight q5_K +blk.8.ffn_down_exps.weight q5_K +blk.9.attn_output.weight q5_K +blk.9.attn_v.weight q5_K +blk.9.ffn_down_exps.weight q5_K +blk.10.attn_output.weight q5_K +blk.10.attn_v.weight q5_K +blk.10.ffn_down_exps.weight q5_K +blk.11.attn_output.weight q5_K +blk.11.attn_v.weight q5_K +blk.11.ffn_down_exps.weight q5_K +blk.12.attn_output.weight q5_K +blk.12.attn_v.weight q5_K +blk.12.ffn_down_exps.weight q5_K +blk.13.attn_output.weight q5_K +blk.13.attn_v.weight q5_K +blk.13.ffn_down_exps.weight q5_K +blk.14.attn_output.weight q5_K +blk.14.attn_v.weight q5_K +blk.14.ffn_down_exps.weight q5_K +blk.15.attn_output.weight q5_K +blk.15.attn_v.weight q5_K +blk.15.ffn_down_exps.weight q5_K +blk.16.attn_output.weight q5_K +blk.16.attn_v.weight q5_K +blk.16.ffn_down_exps.weight q5_K +blk.17.attn_output.weight q5_K +blk.17.attn_v.weight q5_K +blk.17.ffn_down_exps.weight q5_K +blk.18.attn_output.weight q5_K +blk.18.attn_v.weight q5_K +blk.18.ffn_down_exps.weight q5_K +blk.19.attn_output.weight q5_K +blk.19.attn_v.weight q5_K +blk.19.ffn_down_exps.weight q5_K +blk.20.attn_output.weight q5_K +blk.20.attn_v.weight q5_K +blk.20.ffn_down_exps.weight q5_K +blk.21.attn_output.weight q5_K +blk.21.attn_v.weight q5_K +blk.21.ffn_down_exps.weight q5_K +blk.22.attn_output.weight q5_K +blk.22.attn_v.weight q5_K +blk.22.ffn_down_exps.weight q5_K +blk.23.attn_output.weight q5_K +blk.23.attn_v.weight q5_K +blk.23.ffn_down_exps.weight q5_K +blk.24.attn_output.weight q5_K +blk.24.attn_v.weight q5_K +blk.24.ffn_down_exps.weight q5_K +blk.25.attn_output.weight q5_K +blk.25.attn_v.weight q5_K +blk.25.ffn_down_exps.weight q5_K +blk.26.attn_output.weight q5_K +blk.26.attn_v.weight q5_K +blk.26.ffn_down_exps.weight q5_K +blk.27.attn_output.weight q5_K +blk.27.attn_v.weight q5_K +blk.27.ffn_down_exps.weight q5_K +blk.28.attn_output.weight q5_K +blk.28.attn_v.weight q5_K +blk.28.ffn_down_exps.weight q5_K +blk.29.attn_output.weight q5_K +blk.29.attn_v.weight q5_K +blk.29.ffn_down_exps.weight q5_K +blk.30.attn_output.weight q5_K +blk.30.attn_v.weight q5_K +blk.30.ffn_down_exps.weight q5_K +blk.31.attn_output.weight q5_K +blk.31.attn_v.weight q5_K +blk.31.ffn_down_exps.weight q5_K +blk.32.attn_output.weight q5_K +blk.32.attn_v.weight q5_K +blk.32.ffn_down_exps.weight q5_K +blk.33.attn_output.weight q5_K +blk.33.attn_v.weight q5_K +blk.33.ffn_down_exps.weight q5_K +blk.34.attn_output.weight q5_K +blk.34.attn_v.weight q5_K +blk.34.ffn_down_exps.weight q5_K +blk.35.attn_output.weight q5_K +blk.35.attn_v.weight q5_K +blk.35.ffn_down_exps.weight q5_K + +[Q4_K_S] q4_K +output.weight q8_0 +blk.0.attn_v.weight q5_K +blk.0.ffn_down_exps.weight q5_K +blk.1.attn_v.weight q5_K +blk.1.ffn_down_exps.weight q5_K +blk.2.attn_v.weight q5_K +blk.2.ffn_down_exps.weight q5_K +blk.3.attn_v.weight q5_K +blk.3.ffn_down_exps.weight q5_K + +[Q4_K_M] q4_K +output.weight q8_0 +blk.0.attn_v.weight q6_K +blk.0.ffn_down_exps.weight q6_K +blk.1.attn_v.weight q6_K +blk.1.ffn_down_exps.weight q6_K +blk.2.attn_v.weight q6_K +blk.2.ffn_down_exps.weight q6_K +blk.3.attn_v.weight q6_K +blk.3.ffn_down_exps.weight q6_K +blk.6.attn_v.weight q6_K +blk.6.ffn_down_exps.weight q6_K +blk.9.attn_v.weight q6_K +blk.9.ffn_down_exps.weight q6_K +blk.12.attn_v.weight q6_K +blk.12.ffn_down_exps.weight q6_K +blk.15.attn_v.weight q6_K +blk.15.ffn_down_exps.weight q6_K +blk.18.attn_v.weight q6_K +blk.18.ffn_down_exps.weight q6_K +blk.21.attn_v.weight q6_K +blk.21.ffn_down_exps.weight q6_K +blk.24.attn_v.weight q6_K +blk.24.ffn_down_exps.weight q6_K +blk.27.attn_v.weight q6_K +blk.27.ffn_down_exps.weight q6_K +blk.30.attn_v.weight q6_K +blk.30.ffn_down_exps.weight q6_K +blk.31.attn_v.weight q6_K +blk.31.ffn_down_exps.weight q6_K +blk.32.attn_v.weight q6_K +blk.32.ffn_down_exps.weight q6_K +blk.33.attn_v.weight q6_K +blk.33.ffn_down_exps.weight q6_K +blk.34.attn_v.weight q6_K +blk.34.ffn_down_exps.weight q6_K +blk.35.attn_v.weight q6_K +blk.35.ffn_down_exps.weight q6_K + +[Q5_K_S] q5_K +output.weight q8_0 + +[Q5_K_M] q5_K +output.weight q8_0 +blk.0.attn_v.weight q6_K +blk.0.ffn_down_exps.weight q6_K +blk.1.attn_v.weight q6_K +blk.1.ffn_down_exps.weight q6_K +blk.2.attn_v.weight q6_K +blk.2.ffn_down_exps.weight q6_K +blk.3.attn_v.weight q6_K +blk.3.ffn_down_exps.weight q6_K +blk.6.attn_v.weight q6_K +blk.6.ffn_down_exps.weight q6_K +blk.9.attn_v.weight q6_K +blk.9.ffn_down_exps.weight q6_K +blk.12.attn_v.weight q6_K +blk.12.ffn_down_exps.weight q6_K +blk.15.attn_v.weight q6_K +blk.15.ffn_down_exps.weight q6_K +blk.18.attn_v.weight q6_K +blk.18.ffn_down_exps.weight q6_K +blk.21.attn_v.weight q6_K +blk.21.ffn_down_exps.weight q6_K +blk.24.attn_v.weight q6_K +blk.24.ffn_down_exps.weight q6_K +blk.27.attn_v.weight q6_K +blk.27.ffn_down_exps.weight q6_K +blk.30.attn_v.weight q6_K +blk.30.ffn_down_exps.weight q6_K +blk.31.attn_v.weight q6_K +blk.31.ffn_down_exps.weight q6_K +blk.32.attn_v.weight q6_K +blk.32.ffn_down_exps.weight q6_K +blk.33.attn_v.weight q6_K +blk.33.ffn_down_exps.weight q6_K +blk.34.attn_v.weight q6_K +blk.34.ffn_down_exps.weight q6_K +blk.35.attn_v.weight q6_K +blk.35.ffn_down_exps.weight q6_K + +[Q6_K] q6_K +output.weight q8_0 + +[IQ2_XXS] iq2_xxs +output.weight q8_0 +token_embd.weight q2_K +blk.0.attn_v.weight q4_K +blk.0.ffn_down_exps.weight q2_K +blk.1.attn_v.weight q4_K +blk.1.ffn_down_exps.weight q2_K +blk.2.attn_v.weight q4_K +blk.2.ffn_down_exps.weight q2_K +blk.3.attn_v.weight q4_K +blk.3.ffn_down_exps.weight q2_K +blk.4.attn_v.weight q4_K +blk.5.attn_v.weight q4_K +blk.6.attn_v.weight q4_K +blk.7.attn_v.weight q4_K +blk.8.attn_v.weight q4_K +blk.9.attn_v.weight q4_K +blk.10.attn_v.weight q4_K +blk.11.attn_v.weight q4_K +blk.12.attn_v.weight q4_K +blk.13.attn_v.weight q4_K +blk.14.attn_v.weight q4_K +blk.15.attn_v.weight q4_K +blk.16.attn_v.weight q4_K +blk.17.attn_v.weight q4_K +blk.18.attn_v.weight q4_K +blk.19.attn_v.weight q4_K +blk.20.attn_v.weight q4_K +blk.21.attn_v.weight q4_K +blk.22.attn_v.weight q4_K +blk.23.attn_v.weight q4_K +blk.24.attn_v.weight q4_K +blk.25.attn_v.weight q4_K +blk.26.attn_v.weight q4_K +blk.27.attn_v.weight q4_K +blk.28.attn_v.weight q4_K +blk.29.attn_v.weight q4_K +blk.30.attn_v.weight q4_K +blk.31.attn_v.weight q4_K +blk.32.attn_v.weight q4_K +blk.33.attn_v.weight q4_K +blk.34.attn_v.weight q4_K +blk.35.attn_v.weight q4_K + +[IQ2_XS] iq2_xs +output.weight q8_0 +token_embd.weight q2_K +blk.0.attn_v.weight q4_K +blk.0.ffn_down_exps.weight q2_K +blk.1.attn_v.weight q4_K +blk.1.ffn_down_exps.weight q2_K +blk.2.attn_v.weight q4_K +blk.2.ffn_down_exps.weight q2_K +blk.3.attn_v.weight q4_K +blk.3.ffn_down_exps.weight q2_K +blk.4.attn_v.weight q4_K +blk.5.attn_v.weight q4_K +blk.6.attn_v.weight q4_K +blk.7.attn_v.weight q4_K +blk.8.attn_v.weight q4_K +blk.9.attn_v.weight q4_K +blk.10.attn_v.weight q4_K +blk.11.attn_v.weight q4_K +blk.12.attn_v.weight q4_K +blk.13.attn_v.weight q4_K +blk.14.attn_v.weight q4_K +blk.15.attn_v.weight q4_K +blk.16.attn_v.weight q4_K +blk.17.attn_v.weight q4_K +blk.18.attn_v.weight q4_K +blk.19.attn_v.weight q4_K +blk.20.attn_v.weight q4_K +blk.21.attn_v.weight q4_K +blk.22.attn_v.weight q4_K +blk.23.attn_v.weight q4_K +blk.24.attn_v.weight q4_K +blk.25.attn_v.weight q4_K +blk.26.attn_v.weight q4_K +blk.27.attn_v.weight q4_K +blk.28.attn_v.weight q4_K +blk.29.attn_v.weight q4_K +blk.30.attn_v.weight q4_K +blk.31.attn_v.weight q4_K +blk.32.attn_v.weight q4_K +blk.33.attn_v.weight q4_K +blk.34.attn_v.weight q4_K +blk.35.attn_v.weight q4_K + +[Q2_K_S] q2_K +output.weight q8_0 +blk.0.attn_v.weight q4_K +blk.0.ffn_down_exps.weight q4_K +blk.1.attn_v.weight q4_K +blk.1.ffn_down_exps.weight q4_K +blk.2.attn_v.weight q4_K +blk.2.ffn_down_exps.weight q4_K +blk.3.attn_v.weight q4_K +blk.3.ffn_down_exps.weight q4_K +blk.4.attn_v.weight q4_K +blk.5.attn_v.weight q4_K +blk.6.attn_v.weight q4_K +blk.7.attn_v.weight q4_K +blk.8.attn_v.weight q4_K +blk.9.attn_v.weight q4_K +blk.10.attn_v.weight q4_K +blk.11.attn_v.weight q4_K +blk.12.attn_v.weight q4_K +blk.13.attn_v.weight q4_K +blk.14.attn_v.weight q4_K +blk.15.attn_v.weight q4_K +blk.16.attn_v.weight q4_K +blk.17.attn_v.weight q4_K +blk.18.attn_v.weight q4_K +blk.19.attn_v.weight q4_K +blk.20.attn_v.weight q4_K +blk.21.attn_v.weight q4_K +blk.22.attn_v.weight q4_K +blk.23.attn_v.weight q4_K +blk.24.attn_v.weight q4_K +blk.25.attn_v.weight q4_K +blk.26.attn_v.weight q4_K +blk.27.attn_v.weight q4_K +blk.28.attn_v.weight q4_K +blk.29.attn_v.weight q4_K +blk.30.attn_v.weight q4_K +blk.31.attn_v.weight q4_K +blk.32.attn_v.weight q4_K +blk.33.attn_v.weight q4_K +blk.34.attn_v.weight q4_K +blk.35.attn_v.weight q4_K + +[IQ3_XS] iq3_s +output.weight q8_0 +blk.0.attn_k.weight iq3_xxs +blk.0.attn_q.weight iq3_xxs +blk.0.attn_v.weight q4_K +blk.1.attn_k.weight iq3_xxs +blk.1.attn_q.weight iq3_xxs +blk.1.attn_v.weight q4_K +blk.2.attn_k.weight iq3_xxs +blk.2.attn_q.weight iq3_xxs +blk.2.attn_v.weight q4_K +blk.3.attn_k.weight iq3_xxs +blk.3.attn_q.weight iq3_xxs +blk.3.attn_v.weight q4_K +blk.4.attn_k.weight iq3_xxs +blk.4.attn_q.weight iq3_xxs +blk.4.attn_v.weight q4_K +blk.4.ffn_gate_exps.weight iq3_xxs +blk.4.ffn_up_exps.weight iq3_xxs +blk.5.attn_k.weight iq3_xxs +blk.5.attn_q.weight iq3_xxs +blk.5.attn_v.weight q4_K +blk.5.ffn_gate_exps.weight iq3_xxs +blk.5.ffn_up_exps.weight iq3_xxs +blk.6.attn_k.weight iq3_xxs +blk.6.attn_q.weight iq3_xxs +blk.6.attn_v.weight q4_K +blk.6.ffn_gate_exps.weight iq3_xxs +blk.6.ffn_up_exps.weight iq3_xxs +blk.7.attn_k.weight iq3_xxs +blk.7.attn_q.weight iq3_xxs +blk.7.attn_v.weight q4_K +blk.7.ffn_gate_exps.weight iq3_xxs +blk.7.ffn_up_exps.weight iq3_xxs +blk.8.attn_k.weight iq3_xxs +blk.8.attn_q.weight iq3_xxs +blk.8.attn_v.weight q4_K +blk.8.ffn_gate_exps.weight iq3_xxs +blk.8.ffn_up_exps.weight iq3_xxs +blk.9.attn_k.weight iq3_xxs +blk.9.attn_q.weight iq3_xxs +blk.9.attn_v.weight q4_K +blk.9.ffn_gate_exps.weight iq3_xxs +blk.9.ffn_up_exps.weight iq3_xxs +blk.10.attn_k.weight iq3_xxs +blk.10.attn_q.weight iq3_xxs +blk.10.attn_v.weight q4_K +blk.10.ffn_gate_exps.weight iq3_xxs +blk.10.ffn_up_exps.weight iq3_xxs +blk.11.attn_k.weight iq3_xxs +blk.11.attn_q.weight iq3_xxs +blk.11.attn_v.weight q4_K +blk.11.ffn_gate_exps.weight iq3_xxs +blk.11.ffn_up_exps.weight iq3_xxs +blk.12.attn_k.weight iq3_xxs +blk.12.attn_q.weight iq3_xxs +blk.12.attn_v.weight q4_K +blk.12.ffn_gate_exps.weight iq3_xxs +blk.12.ffn_up_exps.weight iq3_xxs +blk.13.attn_k.weight iq3_xxs +blk.13.attn_q.weight iq3_xxs +blk.13.attn_v.weight q4_K +blk.13.ffn_gate_exps.weight iq3_xxs +blk.13.ffn_up_exps.weight iq3_xxs +blk.14.attn_k.weight iq3_xxs +blk.14.attn_q.weight iq3_xxs +blk.14.attn_v.weight q4_K +blk.14.ffn_gate_exps.weight iq3_xxs +blk.14.ffn_up_exps.weight iq3_xxs +blk.15.attn_k.weight iq3_xxs +blk.15.attn_q.weight iq3_xxs +blk.15.attn_v.weight q4_K +blk.15.ffn_gate_exps.weight iq3_xxs +blk.15.ffn_up_exps.weight iq3_xxs +blk.16.attn_k.weight iq3_xxs +blk.16.attn_q.weight iq3_xxs +blk.16.attn_v.weight q4_K +blk.16.ffn_gate_exps.weight iq3_xxs +blk.16.ffn_up_exps.weight iq3_xxs +blk.17.attn_k.weight iq3_xxs +blk.17.attn_q.weight iq3_xxs +blk.17.attn_v.weight q4_K +blk.17.ffn_gate_exps.weight iq3_xxs +blk.17.ffn_up_exps.weight iq3_xxs +blk.18.attn_k.weight iq3_xxs +blk.18.attn_q.weight iq3_xxs +blk.18.attn_v.weight q4_K +blk.18.ffn_gate_exps.weight iq3_xxs +blk.18.ffn_up_exps.weight iq3_xxs +blk.19.attn_k.weight iq3_xxs +blk.19.attn_q.weight iq3_xxs +blk.19.attn_v.weight q4_K +blk.19.ffn_gate_exps.weight iq3_xxs +blk.19.ffn_up_exps.weight iq3_xxs +blk.20.attn_k.weight iq3_xxs +blk.20.attn_q.weight iq3_xxs +blk.20.attn_v.weight q4_K +blk.20.ffn_gate_exps.weight iq3_xxs +blk.20.ffn_up_exps.weight iq3_xxs +blk.21.attn_k.weight iq3_xxs +blk.21.attn_q.weight iq3_xxs +blk.21.attn_v.weight q4_K +blk.21.ffn_gate_exps.weight iq3_xxs +blk.21.ffn_up_exps.weight iq3_xxs +blk.22.attn_k.weight iq3_xxs +blk.22.attn_q.weight iq3_xxs +blk.22.attn_v.weight q4_K +blk.22.ffn_gate_exps.weight iq3_xxs +blk.22.ffn_up_exps.weight iq3_xxs +blk.23.attn_k.weight iq3_xxs +blk.23.attn_q.weight iq3_xxs +blk.23.attn_v.weight q4_K +blk.23.ffn_gate_exps.weight iq3_xxs +blk.23.ffn_up_exps.weight iq3_xxs +blk.24.attn_k.weight iq3_xxs +blk.24.attn_q.weight iq3_xxs +blk.24.attn_v.weight q4_K +blk.24.ffn_gate_exps.weight iq3_xxs +blk.24.ffn_up_exps.weight iq3_xxs +blk.25.attn_k.weight iq3_xxs +blk.25.attn_q.weight iq3_xxs +blk.25.attn_v.weight q4_K +blk.25.ffn_gate_exps.weight iq3_xxs +blk.25.ffn_up_exps.weight iq3_xxs +blk.26.attn_k.weight iq3_xxs +blk.26.attn_q.weight iq3_xxs +blk.26.attn_v.weight q4_K +blk.26.ffn_gate_exps.weight iq3_xxs +blk.26.ffn_up_exps.weight iq3_xxs +blk.27.attn_k.weight iq3_xxs +blk.27.attn_q.weight iq3_xxs +blk.27.attn_v.weight q4_K +blk.27.ffn_gate_exps.weight iq3_xxs +blk.27.ffn_up_exps.weight iq3_xxs +blk.28.attn_k.weight iq3_xxs +blk.28.attn_q.weight iq3_xxs +blk.28.attn_v.weight q4_K +blk.28.ffn_gate_exps.weight iq3_xxs +blk.28.ffn_up_exps.weight iq3_xxs +blk.29.attn_k.weight iq3_xxs +blk.29.attn_q.weight iq3_xxs +blk.29.attn_v.weight q4_K +blk.29.ffn_gate_exps.weight iq3_xxs +blk.29.ffn_up_exps.weight iq3_xxs +blk.30.attn_k.weight iq3_xxs +blk.30.attn_q.weight iq3_xxs +blk.30.attn_v.weight q4_K +blk.30.ffn_gate_exps.weight iq3_xxs +blk.30.ffn_up_exps.weight iq3_xxs +blk.31.attn_k.weight iq3_xxs +blk.31.attn_q.weight iq3_xxs +blk.31.attn_v.weight q4_K +blk.32.attn_k.weight iq3_xxs +blk.32.attn_q.weight iq3_xxs +blk.32.attn_v.weight q4_K +blk.33.attn_k.weight iq3_xxs +blk.33.attn_q.weight iq3_xxs +blk.33.attn_v.weight q4_K +blk.34.attn_k.weight iq3_xxs +blk.34.attn_q.weight iq3_xxs +blk.34.attn_v.weight q4_K +blk.35.attn_k.weight iq3_xxs +blk.35.attn_q.weight iq3_xxs +blk.35.attn_v.weight q4_K + +[IQ3_XXS] iq3_xxs +output.weight q8_0 +token_embd.weight iq3_s +blk.0.attn_k.weight iq2_s +blk.0.attn_output.weight iq3_s +blk.0.attn_q.weight iq2_s +blk.0.attn_v.weight q4_K +blk.0.ffn_down_exps.weight q4_K +blk.1.attn_k.weight iq2_s +blk.1.attn_output.weight iq3_s +blk.1.attn_q.weight iq2_s +blk.1.attn_v.weight q4_K +blk.1.ffn_down_exps.weight q4_K +blk.2.attn_k.weight iq2_s +blk.2.attn_output.weight iq3_s +blk.2.attn_q.weight iq2_s +blk.2.attn_v.weight q4_K +blk.2.ffn_down_exps.weight q4_K +blk.3.attn_k.weight iq2_s +blk.3.attn_output.weight iq3_s +blk.3.attn_q.weight iq2_s +blk.3.attn_v.weight q4_K +blk.3.ffn_down_exps.weight q4_K +blk.4.attn_k.weight iq2_s +blk.4.attn_output.weight iq3_s +blk.4.attn_q.weight iq2_s +blk.4.attn_v.weight q4_K +blk.4.ffn_down_exps.weight q3_K +blk.5.attn_k.weight iq2_s +blk.5.attn_output.weight iq3_s +blk.5.attn_q.weight iq2_s +blk.5.attn_v.weight q4_K +blk.5.ffn_down_exps.weight q3_K +blk.6.attn_k.weight iq2_s +blk.6.attn_output.weight iq3_s +blk.6.attn_q.weight iq2_s +blk.6.attn_v.weight q4_K +blk.6.ffn_down_exps.weight q3_K +blk.7.attn_k.weight iq2_s +blk.7.attn_output.weight iq3_s +blk.7.attn_q.weight iq2_s +blk.7.attn_v.weight q4_K +blk.7.ffn_down_exps.weight q3_K +blk.8.attn_k.weight iq2_s +blk.8.attn_output.weight iq3_s +blk.8.attn_q.weight iq2_s +blk.8.attn_v.weight q4_K +blk.8.ffn_down_exps.weight q3_K +blk.9.attn_k.weight iq2_s +blk.9.attn_output.weight iq3_s +blk.9.attn_q.weight iq2_s +blk.9.attn_v.weight q4_K +blk.9.ffn_down_exps.weight q3_K +blk.10.attn_k.weight iq2_s +blk.10.attn_output.weight iq3_s +blk.10.attn_q.weight iq2_s +blk.10.attn_v.weight q4_K +blk.10.ffn_down_exps.weight q3_K +blk.11.attn_k.weight iq2_s +blk.11.attn_output.weight iq3_s +blk.11.attn_q.weight iq2_s +blk.11.attn_v.weight q4_K +blk.11.ffn_down_exps.weight q3_K +blk.12.attn_k.weight iq2_s +blk.12.attn_output.weight iq3_s +blk.12.attn_q.weight iq2_s +blk.12.attn_v.weight q4_K +blk.12.ffn_down_exps.weight q3_K +blk.13.attn_k.weight iq2_s +blk.13.attn_output.weight iq3_s +blk.13.attn_q.weight iq2_s +blk.13.attn_v.weight q4_K +blk.13.ffn_down_exps.weight q3_K +blk.14.attn_k.weight iq2_s +blk.14.attn_output.weight iq3_s +blk.14.attn_q.weight iq2_s +blk.14.attn_v.weight q4_K +blk.14.ffn_down_exps.weight q3_K +blk.15.attn_k.weight iq2_s +blk.15.attn_output.weight iq3_s +blk.15.attn_q.weight iq2_s +blk.15.attn_v.weight q4_K +blk.15.ffn_down_exps.weight q3_K +blk.16.attn_k.weight iq2_s +blk.16.attn_output.weight iq3_s +blk.16.attn_q.weight iq2_s +blk.16.attn_v.weight q4_K +blk.16.ffn_down_exps.weight q3_K +blk.17.attn_k.weight iq2_s +blk.17.attn_output.weight iq3_s +blk.17.attn_q.weight iq2_s +blk.17.attn_v.weight q4_K +blk.17.ffn_down_exps.weight q3_K +blk.18.attn_k.weight iq2_s +blk.18.attn_output.weight iq3_s +blk.18.attn_q.weight iq2_s +blk.18.attn_v.weight q4_K +blk.18.ffn_down_exps.weight q3_K +blk.19.attn_k.weight iq2_s +blk.19.attn_output.weight iq3_s +blk.19.attn_q.weight iq2_s +blk.19.attn_v.weight q4_K +blk.19.ffn_down_exps.weight q3_K +blk.20.attn_k.weight iq2_s +blk.20.attn_output.weight iq3_s +blk.20.attn_q.weight iq2_s +blk.20.attn_v.weight q4_K +blk.20.ffn_down_exps.weight q3_K +blk.21.attn_k.weight iq2_s +blk.21.attn_output.weight iq3_s +blk.21.attn_q.weight iq2_s +blk.21.attn_v.weight q4_K +blk.21.ffn_down_exps.weight q3_K +blk.22.attn_k.weight iq2_s +blk.22.attn_output.weight iq3_s +blk.22.attn_q.weight iq2_s +blk.22.attn_v.weight q4_K +blk.22.ffn_down_exps.weight q3_K +blk.23.attn_k.weight iq2_s +blk.23.attn_output.weight iq3_s +blk.23.attn_q.weight iq2_s +blk.23.attn_v.weight q4_K +blk.23.ffn_down_exps.weight q3_K +blk.24.attn_k.weight iq2_s +blk.24.attn_output.weight iq3_s +blk.24.attn_q.weight iq2_s +blk.24.attn_v.weight q4_K +blk.24.ffn_down_exps.weight q3_K +blk.25.attn_k.weight iq2_s +blk.25.attn_output.weight iq3_s +blk.25.attn_q.weight iq2_s +blk.25.attn_v.weight q4_K +blk.25.ffn_down_exps.weight q3_K +blk.26.attn_k.weight iq2_s +blk.26.attn_output.weight iq3_s +blk.26.attn_q.weight iq2_s +blk.26.attn_v.weight q4_K +blk.26.ffn_down_exps.weight q3_K +blk.27.attn_k.weight iq2_s +blk.27.attn_output.weight iq3_s +blk.27.attn_q.weight iq2_s +blk.27.attn_v.weight q4_K +blk.27.ffn_down_exps.weight q3_K +blk.28.attn_k.weight iq2_s +blk.28.attn_output.weight iq3_s +blk.28.attn_q.weight iq2_s +blk.28.attn_v.weight q4_K +blk.28.ffn_down_exps.weight q3_K +blk.29.attn_k.weight iq2_s +blk.29.attn_output.weight iq3_s +blk.29.attn_q.weight iq2_s +blk.29.attn_v.weight q4_K +blk.29.ffn_down_exps.weight q3_K +blk.30.attn_k.weight iq2_s +blk.30.attn_output.weight iq3_s +blk.30.attn_q.weight iq2_s +blk.30.attn_v.weight q4_K +blk.30.ffn_down_exps.weight q3_K +blk.31.attn_k.weight iq2_s +blk.31.attn_output.weight iq3_s +blk.31.attn_q.weight iq2_s +blk.31.attn_v.weight q4_K +blk.31.ffn_down_exps.weight q3_K +blk.32.attn_k.weight iq2_s +blk.32.attn_output.weight iq3_s +blk.32.attn_q.weight iq2_s +blk.32.attn_v.weight q4_K +blk.32.ffn_down_exps.weight q3_K +blk.33.attn_k.weight iq2_s +blk.33.attn_output.weight iq3_s +blk.33.attn_q.weight iq2_s +blk.33.attn_v.weight q4_K +blk.33.ffn_down_exps.weight q3_K +blk.34.attn_k.weight iq2_s +blk.34.attn_output.weight iq3_s +blk.34.attn_q.weight iq2_s +blk.34.attn_v.weight q4_K +blk.34.ffn_down_exps.weight q3_K +blk.35.attn_k.weight iq2_s +blk.35.attn_output.weight iq3_s +blk.35.attn_q.weight iq2_s +blk.35.attn_v.weight q4_K +blk.35.ffn_down_exps.weight q3_K + +[IQ1_S] iq1_s +output.weight q8_0 +token_embd.weight q2_K +blk.0.attn_output.weight iq2_xxs +blk.0.attn_v.weight q4_K +blk.0.ffn_down_exps.weight q2_K +blk.1.attn_output.weight iq2_xxs +blk.1.attn_v.weight q4_K +blk.1.ffn_down_exps.weight q2_K +blk.2.attn_output.weight iq2_xxs +blk.2.attn_v.weight q4_K +blk.2.ffn_down_exps.weight q2_K +blk.3.attn_output.weight iq2_xxs +blk.3.attn_v.weight q4_K +blk.3.ffn_down_exps.weight q2_K +blk.4.attn_output.weight iq2_xxs +blk.4.attn_v.weight q4_K +blk.5.attn_output.weight iq2_xxs +blk.5.attn_v.weight q4_K +blk.6.attn_output.weight iq2_xxs +blk.6.attn_v.weight q4_K +blk.7.attn_output.weight iq2_xxs +blk.7.attn_v.weight q4_K +blk.8.attn_output.weight iq2_xxs +blk.8.attn_v.weight q4_K +blk.9.attn_output.weight iq2_xxs +blk.9.attn_v.weight q4_K +blk.10.attn_output.weight iq2_xxs +blk.10.attn_v.weight q4_K +blk.11.attn_output.weight iq2_xxs +blk.11.attn_v.weight q4_K +blk.12.attn_output.weight iq2_xxs +blk.12.attn_v.weight q4_K +blk.13.attn_output.weight iq2_xxs +blk.13.attn_v.weight q4_K +blk.14.attn_output.weight iq2_xxs +blk.14.attn_v.weight q4_K +blk.15.attn_output.weight iq2_xxs +blk.15.attn_v.weight q4_K +blk.16.attn_output.weight iq2_xxs +blk.16.attn_v.weight q4_K +blk.17.attn_output.weight iq2_xxs +blk.17.attn_v.weight q4_K +blk.18.attn_output.weight iq2_xxs +blk.18.attn_v.weight q4_K +blk.19.attn_output.weight iq2_xxs +blk.19.attn_v.weight q4_K +blk.20.attn_output.weight iq2_xxs +blk.20.attn_v.weight q4_K +blk.21.attn_output.weight iq2_xxs +blk.21.attn_v.weight q4_K +blk.22.attn_output.weight iq2_xxs +blk.22.attn_v.weight q4_K +blk.23.attn_output.weight iq2_xxs +blk.23.attn_v.weight q4_K +blk.24.attn_output.weight iq2_xxs +blk.24.attn_v.weight q4_K +blk.25.attn_output.weight iq2_xxs +blk.25.attn_v.weight q4_K +blk.26.attn_output.weight iq2_xxs +blk.26.attn_v.weight q4_K +blk.27.attn_output.weight iq2_xxs +blk.27.attn_v.weight q4_K +blk.28.attn_output.weight iq2_xxs +blk.28.attn_v.weight q4_K +blk.29.attn_output.weight iq2_xxs +blk.29.attn_v.weight q4_K +blk.30.attn_output.weight iq2_xxs +blk.30.attn_v.weight q4_K +blk.31.attn_output.weight iq2_xxs +blk.31.attn_v.weight q4_K +blk.32.attn_output.weight iq2_xxs +blk.32.attn_v.weight q4_K +blk.33.attn_output.weight iq2_xxs +blk.33.attn_v.weight q4_K +blk.34.attn_output.weight iq2_xxs +blk.34.attn_v.weight q4_K +blk.35.attn_output.weight iq2_xxs +blk.35.attn_v.weight q4_K + +[IQ4_NL] iq4_nl +output.weight q6_K +blk.0.attn_v.weight q5_K +blk.0.ffn_down_exps.weight q5_K +blk.1.attn_v.weight q5_K +blk.1.ffn_down_exps.weight q5_K +blk.2.attn_v.weight q5_K +blk.2.ffn_down_exps.weight q5_K +blk.3.attn_v.weight q5_K +blk.3.ffn_down_exps.weight q5_K +blk.4.attn_v.weight q5_K +blk.5.attn_v.weight q5_K +blk.6.attn_v.weight q5_K +blk.7.attn_v.weight q5_K +blk.8.attn_v.weight q5_K +blk.9.attn_v.weight q5_K +blk.10.attn_v.weight q5_K +blk.11.attn_v.weight q5_K +blk.12.attn_v.weight q5_K +blk.13.attn_v.weight q5_K +blk.14.attn_v.weight q5_K +blk.15.attn_v.weight q5_K +blk.16.attn_v.weight q5_K +blk.17.attn_v.weight q5_K +blk.18.attn_v.weight q5_K +blk.19.attn_v.weight q5_K +blk.20.attn_v.weight q5_K +blk.21.attn_v.weight q5_K +blk.22.attn_v.weight q5_K +blk.23.attn_v.weight q5_K +blk.24.attn_v.weight q5_K +blk.25.attn_v.weight q5_K +blk.26.attn_v.weight q5_K +blk.27.attn_v.weight q5_K +blk.28.attn_v.weight q5_K +blk.29.attn_v.weight q5_K +blk.30.attn_v.weight q5_K +blk.31.attn_v.weight q5_K +blk.32.attn_v.weight q5_K +blk.33.attn_v.weight q5_K +blk.34.attn_v.weight q5_K +blk.35.attn_v.weight q5_K + +[IQ3_S] iq3_s +output.weight q8_0 +blk.0.attn_v.weight q4_K +blk.1.attn_v.weight q4_K +blk.2.attn_v.weight q4_K +blk.3.attn_v.weight q4_K +blk.4.attn_v.weight q4_K +blk.5.attn_v.weight q4_K +blk.6.attn_v.weight q4_K +blk.7.attn_v.weight q4_K +blk.8.attn_v.weight q4_K +blk.9.attn_v.weight q4_K +blk.10.attn_v.weight q4_K +blk.11.attn_v.weight q4_K +blk.12.attn_v.weight q4_K +blk.13.attn_v.weight q4_K +blk.14.attn_v.weight q4_K +blk.15.attn_v.weight q4_K +blk.16.attn_v.weight q4_K +blk.17.attn_v.weight q4_K +blk.18.attn_v.weight q4_K +blk.19.attn_v.weight q4_K +blk.20.attn_v.weight q4_K +blk.21.attn_v.weight q4_K +blk.22.attn_v.weight q4_K +blk.23.attn_v.weight q4_K +blk.24.attn_v.weight q4_K +blk.25.attn_v.weight q4_K +blk.26.attn_v.weight q4_K +blk.27.attn_v.weight q4_K +blk.28.attn_v.weight q4_K +blk.29.attn_v.weight q4_K +blk.30.attn_v.weight q4_K +blk.31.attn_v.weight q4_K +blk.32.attn_v.weight q4_K +blk.33.attn_v.weight q4_K +blk.34.attn_v.weight q4_K +blk.35.attn_v.weight q4_K + +[IQ3_M] iq3_s +output.weight q8_0 +blk.0.attn_output.weight q4_K +blk.0.attn_v.weight q4_K +blk.0.ffn_down_exps.weight q4_K +blk.1.attn_output.weight q4_K +blk.1.attn_v.weight q4_K +blk.1.ffn_down_exps.weight q4_K +blk.2.attn_output.weight q4_K +blk.2.attn_v.weight q4_K +blk.2.ffn_down_exps.weight q4_K +blk.3.attn_output.weight q4_K +blk.3.attn_v.weight q4_K +blk.3.ffn_down_exps.weight q4_K +blk.4.attn_output.weight q4_K +blk.4.attn_v.weight q4_K +blk.5.attn_output.weight q4_K +blk.5.attn_v.weight q4_K +blk.6.attn_output.weight q4_K +blk.6.attn_v.weight q4_K +blk.7.attn_output.weight q4_K +blk.7.attn_v.weight q4_K +blk.8.attn_output.weight q4_K +blk.8.attn_v.weight q4_K +blk.9.attn_output.weight q4_K +blk.9.attn_v.weight q4_K +blk.10.attn_output.weight q4_K +blk.10.attn_v.weight q4_K +blk.11.attn_output.weight q4_K +blk.11.attn_v.weight q4_K +blk.12.attn_output.weight q4_K +blk.12.attn_v.weight q4_K +blk.13.attn_output.weight q4_K +blk.13.attn_v.weight q4_K +blk.14.attn_output.weight q4_K +blk.14.attn_v.weight q4_K +blk.15.attn_output.weight q4_K +blk.15.attn_v.weight q4_K +blk.16.attn_output.weight q4_K +blk.16.attn_v.weight q4_K +blk.17.attn_output.weight q4_K +blk.17.attn_v.weight q4_K +blk.18.attn_output.weight q4_K +blk.18.attn_v.weight q4_K +blk.19.attn_output.weight q4_K +blk.19.attn_v.weight q4_K +blk.20.attn_output.weight q4_K +blk.20.attn_v.weight q4_K +blk.21.attn_output.weight q4_K +blk.21.attn_v.weight q4_K +blk.22.attn_output.weight q4_K +blk.22.attn_v.weight q4_K +blk.23.attn_output.weight q4_K +blk.23.attn_v.weight q4_K +blk.24.attn_output.weight q4_K +blk.24.attn_v.weight q4_K +blk.25.attn_output.weight q4_K +blk.25.attn_v.weight q4_K +blk.26.attn_output.weight q4_K +blk.26.attn_v.weight q4_K +blk.27.attn_output.weight q4_K +blk.27.attn_v.weight q4_K +blk.28.attn_output.weight q4_K +blk.28.attn_v.weight q4_K +blk.29.attn_output.weight q4_K +blk.29.attn_v.weight q4_K +blk.30.attn_output.weight q4_K +blk.30.attn_v.weight q4_K +blk.31.attn_output.weight q4_K +blk.31.attn_v.weight q4_K +blk.32.attn_output.weight q4_K +blk.32.attn_v.weight q4_K +blk.33.attn_output.weight q4_K +blk.33.attn_v.weight q4_K +blk.34.attn_output.weight q4_K +blk.34.attn_v.weight q4_K +blk.35.attn_output.weight q4_K +blk.35.attn_v.weight q4_K + +[IQ2_S] iq2_xs +output.weight q8_0 +token_embd.weight iq3_s +blk.0.attn_output.weight iq3_s +blk.0.attn_v.weight q4_K +blk.0.ffn_down_exps.weight iq3_s +blk.1.attn_output.weight iq3_s +blk.1.attn_v.weight q4_K +blk.1.ffn_down_exps.weight iq3_s +blk.2.attn_output.weight iq3_s +blk.2.attn_v.weight q4_K +blk.2.ffn_down_exps.weight iq3_s +blk.3.attn_output.weight iq3_s +blk.3.attn_v.weight q4_K +blk.3.ffn_down_exps.weight iq3_s +blk.4.attn_output.weight iq3_s +blk.4.attn_v.weight q4_K +blk.5.attn_output.weight iq3_s +blk.5.attn_v.weight q4_K +blk.6.attn_output.weight iq3_s +blk.6.attn_v.weight q4_K +blk.7.attn_output.weight iq3_s +blk.7.attn_v.weight q4_K +blk.8.attn_output.weight iq3_s +blk.8.attn_v.weight q4_K +blk.9.attn_output.weight iq3_s +blk.9.attn_v.weight q4_K +blk.10.attn_output.weight iq3_s +blk.10.attn_v.weight q4_K +blk.11.attn_output.weight iq3_s +blk.11.attn_v.weight q4_K +blk.12.attn_output.weight iq3_s +blk.12.attn_v.weight q4_K +blk.13.attn_output.weight iq3_s +blk.13.attn_v.weight q4_K +blk.14.attn_output.weight iq3_s +blk.14.attn_v.weight q4_K +blk.15.attn_output.weight iq3_s +blk.15.attn_v.weight q4_K +blk.16.attn_output.weight iq3_s +blk.16.attn_v.weight q4_K +blk.17.attn_output.weight iq3_s +blk.17.attn_v.weight q4_K +blk.18.attn_output.weight iq3_s +blk.18.attn_v.weight q4_K +blk.19.attn_output.weight iq3_s +blk.19.attn_v.weight q4_K +blk.20.attn_output.weight iq3_s +blk.20.attn_v.weight q4_K +blk.21.attn_output.weight iq3_s +blk.21.attn_v.weight q4_K +blk.22.attn_output.weight iq3_s +blk.22.attn_v.weight q4_K +blk.23.attn_output.weight iq3_s +blk.23.attn_v.weight q4_K +blk.24.attn_output.weight iq3_s +blk.24.attn_v.weight q4_K +blk.25.attn_output.weight iq3_s +blk.25.attn_v.weight q4_K +blk.26.attn_output.weight iq3_s +blk.26.attn_v.weight q4_K +blk.27.attn_output.weight iq3_s +blk.27.attn_v.weight q4_K +blk.28.attn_output.weight iq3_s +blk.28.attn_v.weight q4_K +blk.29.attn_output.weight iq3_s +blk.29.attn_v.weight q4_K +blk.30.attn_output.weight iq3_s +blk.30.attn_v.weight q4_K +blk.31.attn_output.weight iq3_s +blk.31.attn_v.weight q4_K +blk.32.attn_output.weight iq3_s +blk.32.attn_v.weight q4_K +blk.33.attn_output.weight iq3_s +blk.33.attn_v.weight q4_K +blk.34.attn_output.weight iq3_s +blk.34.attn_v.weight q4_K +blk.35.attn_output.weight iq3_s +blk.35.attn_v.weight q4_K + +[IQ2_M] iq2_s +output.weight q8_0 +token_embd.weight iq3_s +blk.0.attn_output.weight iq3_s +blk.0.attn_v.weight q4_K +blk.0.ffn_down_exps.weight iq3_s +blk.1.attn_output.weight iq3_s +blk.1.attn_v.weight q4_K +blk.1.ffn_down_exps.weight iq3_s +blk.2.attn_output.weight iq3_s +blk.2.attn_v.weight q4_K +blk.2.ffn_down_exps.weight iq3_s +blk.3.attn_output.weight iq3_s +blk.3.attn_v.weight q4_K +blk.3.ffn_down_exps.weight iq3_s +blk.4.attn_output.weight iq3_s +blk.4.attn_v.weight q4_K +blk.5.attn_output.weight iq3_s +blk.5.attn_v.weight q4_K +blk.6.attn_output.weight iq3_s +blk.6.attn_v.weight q4_K +blk.7.attn_output.weight iq3_s +blk.7.attn_v.weight q4_K +blk.8.attn_output.weight iq3_s +blk.8.attn_v.weight q4_K +blk.9.attn_output.weight iq3_s +blk.9.attn_v.weight q4_K +blk.10.attn_output.weight iq3_s +blk.10.attn_v.weight q4_K +blk.11.attn_output.weight iq3_s +blk.11.attn_v.weight q4_K +blk.12.attn_output.weight iq3_s +blk.12.attn_v.weight q4_K +blk.13.attn_output.weight iq3_s +blk.13.attn_v.weight q4_K +blk.14.attn_output.weight iq3_s +blk.14.attn_v.weight q4_K +blk.15.attn_output.weight iq3_s +blk.15.attn_v.weight q4_K +blk.16.attn_output.weight iq3_s +blk.16.attn_v.weight q4_K +blk.17.attn_output.weight iq3_s +blk.17.attn_v.weight q4_K +blk.18.attn_output.weight iq3_s +blk.18.attn_v.weight q4_K +blk.19.attn_output.weight iq3_s +blk.19.attn_v.weight q4_K +blk.20.attn_output.weight iq3_s +blk.20.attn_v.weight q4_K +blk.21.attn_output.weight iq3_s +blk.21.attn_v.weight q4_K +blk.22.attn_output.weight iq3_s +blk.22.attn_v.weight q4_K +blk.23.attn_output.weight iq3_s +blk.23.attn_v.weight q4_K +blk.24.attn_output.weight iq3_s +blk.24.attn_v.weight q4_K +blk.25.attn_output.weight iq3_s +blk.25.attn_v.weight q4_K +blk.26.attn_output.weight iq3_s +blk.26.attn_v.weight q4_K +blk.27.attn_output.weight iq3_s +blk.27.attn_v.weight q4_K +blk.28.attn_output.weight iq3_s +blk.28.attn_v.weight q4_K +blk.29.attn_output.weight iq3_s +blk.29.attn_v.weight q4_K +blk.30.attn_output.weight iq3_s +blk.30.attn_v.weight q4_K +blk.31.attn_output.weight iq3_s +blk.31.attn_v.weight q4_K +blk.32.attn_output.weight iq3_s +blk.32.attn_v.weight q4_K +blk.33.attn_output.weight iq3_s +blk.33.attn_v.weight q4_K +blk.34.attn_output.weight iq3_s +blk.34.attn_v.weight q4_K +blk.35.attn_output.weight iq3_s +blk.35.attn_v.weight q4_K + +[IQ4_XS] iq4_xs +output.weight q8_0 +blk.0.attn_v.weight q5_K +blk.0.ffn_down_exps.weight q5_K +blk.1.attn_v.weight q5_K +blk.1.ffn_down_exps.weight q5_K +blk.2.attn_v.weight q5_K +blk.2.ffn_down_exps.weight q5_K +blk.3.attn_v.weight q5_K +blk.3.ffn_down_exps.weight q5_K +blk.4.attn_v.weight q5_K +blk.5.attn_v.weight q5_K +blk.6.attn_v.weight q5_K +blk.7.attn_v.weight q5_K +blk.8.attn_v.weight q5_K +blk.9.attn_v.weight q5_K +blk.10.attn_v.weight q5_K +blk.11.attn_v.weight q5_K +blk.12.attn_v.weight q5_K +blk.13.attn_v.weight q5_K +blk.14.attn_v.weight q5_K +blk.15.attn_v.weight q5_K +blk.16.attn_v.weight q5_K +blk.17.attn_v.weight q5_K +blk.18.attn_v.weight q5_K +blk.19.attn_v.weight q5_K +blk.20.attn_v.weight q5_K +blk.21.attn_v.weight q5_K +blk.22.attn_v.weight q5_K +blk.23.attn_v.weight q5_K +blk.24.attn_v.weight q5_K +blk.25.attn_v.weight q5_K +blk.26.attn_v.weight q5_K +blk.27.attn_v.weight q5_K +blk.28.attn_v.weight q5_K +blk.29.attn_v.weight q5_K +blk.30.attn_v.weight q5_K +blk.31.attn_v.weight q5_K +blk.32.attn_v.weight q5_K +blk.33.attn_v.weight q5_K +blk.34.attn_v.weight q5_K +blk.35.attn_v.weight q5_K + +[IQ1_M] iq1_m +output.weight q8_0 +token_embd.weight q2_K +blk.0.attn_output.weight iq2_xxs +blk.0.attn_v.weight q4_K +blk.0.ffn_down_exps.weight q2_K +blk.1.attn_output.weight iq2_xxs +blk.1.attn_v.weight q4_K +blk.1.ffn_down_exps.weight q2_K +blk.2.attn_output.weight iq2_xxs +blk.2.attn_v.weight q4_K +blk.2.ffn_down_exps.weight q2_K +blk.3.attn_output.weight iq2_xxs +blk.3.attn_v.weight q4_K +blk.3.ffn_down_exps.weight q2_K +blk.4.attn_output.weight iq2_xxs +blk.4.attn_v.weight q4_K +blk.5.attn_output.weight iq2_xxs +blk.5.attn_v.weight q4_K +blk.6.attn_output.weight iq2_xxs +blk.6.attn_v.weight q4_K +blk.7.attn_output.weight iq2_xxs +blk.7.attn_v.weight q4_K +blk.8.attn_output.weight iq2_xxs +blk.8.attn_v.weight q4_K +blk.9.attn_output.weight iq2_xxs +blk.9.attn_v.weight q4_K +blk.10.attn_output.weight iq2_xxs +blk.10.attn_v.weight q4_K +blk.11.attn_output.weight iq2_xxs +blk.11.attn_v.weight q4_K +blk.12.attn_output.weight iq2_xxs +blk.12.attn_v.weight q4_K +blk.13.attn_output.weight iq2_xxs +blk.13.attn_v.weight q4_K +blk.14.attn_output.weight iq2_xxs +blk.14.attn_v.weight q4_K +blk.15.attn_output.weight iq2_xxs +blk.15.attn_v.weight q4_K +blk.16.attn_output.weight iq2_xxs +blk.16.attn_v.weight q4_K +blk.17.attn_output.weight iq2_xxs +blk.17.attn_v.weight q4_K +blk.18.attn_output.weight iq2_xxs +blk.18.attn_v.weight q4_K +blk.19.attn_output.weight iq2_xxs +blk.19.attn_v.weight q4_K +blk.20.attn_output.weight iq2_xxs +blk.20.attn_v.weight q4_K +blk.21.attn_output.weight iq2_xxs +blk.21.attn_v.weight q4_K +blk.22.attn_output.weight iq2_xxs +blk.22.attn_v.weight q4_K +blk.23.attn_output.weight iq2_xxs +blk.23.attn_v.weight q4_K +blk.24.attn_output.weight iq2_xxs +blk.24.attn_v.weight q4_K +blk.25.attn_output.weight iq2_xxs +blk.25.attn_v.weight q4_K +blk.26.attn_output.weight iq2_xxs +blk.26.attn_v.weight q4_K +blk.27.attn_output.weight iq2_xxs +blk.27.attn_v.weight q4_K +blk.28.attn_output.weight iq2_xxs +blk.28.attn_v.weight q4_K +blk.29.attn_output.weight iq2_xxs +blk.29.attn_v.weight q4_K +blk.30.attn_output.weight iq2_xxs +blk.30.attn_v.weight q4_K +blk.31.attn_output.weight iq2_xxs +blk.31.attn_v.weight q4_K +blk.32.attn_output.weight iq2_xxs +blk.32.attn_v.weight q4_K +blk.33.attn_output.weight iq2_xxs +blk.33.attn_v.weight q4_K +blk.34.attn_output.weight iq2_xxs +blk.34.attn_v.weight q4_K +blk.35.attn_output.weight iq2_xxs +blk.35.attn_v.weight q4_K + +[BF16] bf16 +output.weight q6_K + +[TQ1_0] tq1_0 +output.weight q8_0 +token_embd.weight q4_K + +[TQ2_0] tq2_0 +output.weight q8_0 +token_embd.weight q4_K + +[MXFP4_MOE] mxfp4 +output.weight q8_0 +token_embd.weight q8_0 +blk.0.attn_k.weight q8_0 +blk.0.attn_output.weight q8_0 +blk.0.attn_q.weight q8_0 +blk.0.attn_v.weight q8_0 +blk.1.attn_k.weight q8_0 +blk.1.attn_output.weight q8_0 +blk.1.attn_q.weight q8_0 +blk.1.attn_v.weight q8_0 +blk.2.attn_k.weight q8_0 +blk.2.attn_output.weight q8_0 +blk.2.attn_q.weight q8_0 +blk.2.attn_v.weight q8_0 +blk.3.attn_k.weight q8_0 +blk.3.attn_output.weight q8_0 +blk.3.attn_q.weight q8_0 +blk.3.attn_v.weight q8_0 +blk.4.attn_k.weight q8_0 +blk.4.attn_output.weight q8_0 +blk.4.attn_q.weight q8_0 +blk.4.attn_v.weight q8_0 +blk.5.attn_k.weight q8_0 +blk.5.attn_output.weight q8_0 +blk.5.attn_q.weight q8_0 +blk.5.attn_v.weight q8_0 +blk.6.attn_k.weight q8_0 +blk.6.attn_output.weight q8_0 +blk.6.attn_q.weight q8_0 +blk.6.attn_v.weight q8_0 +blk.7.attn_k.weight q8_0 +blk.7.attn_output.weight q8_0 +blk.7.attn_q.weight q8_0 +blk.7.attn_v.weight q8_0 +blk.8.attn_k.weight q8_0 +blk.8.attn_output.weight q8_0 +blk.8.attn_q.weight q8_0 +blk.8.attn_v.weight q8_0 +blk.9.attn_k.weight q8_0 +blk.9.attn_output.weight q8_0 +blk.9.attn_q.weight q8_0 +blk.9.attn_v.weight q8_0 +blk.10.attn_k.weight q8_0 +blk.10.attn_output.weight q8_0 +blk.10.attn_q.weight q8_0 +blk.10.attn_v.weight q8_0 +blk.11.attn_k.weight q8_0 +blk.11.attn_output.weight q8_0 +blk.11.attn_q.weight q8_0 +blk.11.attn_v.weight q8_0 +blk.12.attn_k.weight q8_0 +blk.12.attn_output.weight q8_0 +blk.12.attn_q.weight q8_0 +blk.12.attn_v.weight q8_0 +blk.13.attn_k.weight q8_0 +blk.13.attn_output.weight q8_0 +blk.13.attn_q.weight q8_0 +blk.13.attn_v.weight q8_0 +blk.14.attn_k.weight q8_0 +blk.14.attn_output.weight q8_0 +blk.14.attn_q.weight q8_0 +blk.14.attn_v.weight q8_0 +blk.15.attn_k.weight q8_0 +blk.15.attn_output.weight q8_0 +blk.15.attn_q.weight q8_0 +blk.15.attn_v.weight q8_0 +blk.16.attn_k.weight q8_0 +blk.16.attn_output.weight q8_0 +blk.16.attn_q.weight q8_0 +blk.16.attn_v.weight q8_0 +blk.17.attn_k.weight q8_0 +blk.17.attn_output.weight q8_0 +blk.17.attn_q.weight q8_0 +blk.17.attn_v.weight q8_0 +blk.18.attn_k.weight q8_0 +blk.18.attn_output.weight q8_0 +blk.18.attn_q.weight q8_0 +blk.18.attn_v.weight q8_0 +blk.19.attn_k.weight q8_0 +blk.19.attn_output.weight q8_0 +blk.19.attn_q.weight q8_0 +blk.19.attn_v.weight q8_0 +blk.20.attn_k.weight q8_0 +blk.20.attn_output.weight q8_0 +blk.20.attn_q.weight q8_0 +blk.20.attn_v.weight q8_0 +blk.21.attn_k.weight q8_0 +blk.21.attn_output.weight q8_0 +blk.21.attn_q.weight q8_0 +blk.21.attn_v.weight q8_0 +blk.22.attn_k.weight q8_0 +blk.22.attn_output.weight q8_0 +blk.22.attn_q.weight q8_0 +blk.22.attn_v.weight q8_0 +blk.23.attn_k.weight q8_0 +blk.23.attn_output.weight q8_0 +blk.23.attn_q.weight q8_0 +blk.23.attn_v.weight q8_0 +blk.24.attn_k.weight q8_0 +blk.24.attn_output.weight q8_0 +blk.24.attn_q.weight q8_0 +blk.24.attn_v.weight q8_0 +blk.25.attn_k.weight q8_0 +blk.25.attn_output.weight q8_0 +blk.25.attn_q.weight q8_0 +blk.25.attn_v.weight q8_0 +blk.26.attn_k.weight q8_0 +blk.26.attn_output.weight q8_0 +blk.26.attn_q.weight q8_0 +blk.26.attn_v.weight q8_0 +blk.27.attn_k.weight q8_0 +blk.27.attn_output.weight q8_0 +blk.27.attn_q.weight q8_0 +blk.27.attn_v.weight q8_0 +blk.28.attn_k.weight q8_0 +blk.28.attn_output.weight q8_0 +blk.28.attn_q.weight q8_0 +blk.28.attn_v.weight q8_0 +blk.29.attn_k.weight q8_0 +blk.29.attn_output.weight q8_0 +blk.29.attn_q.weight q8_0 +blk.29.attn_v.weight q8_0 +blk.30.attn_k.weight q8_0 +blk.30.attn_output.weight q8_0 +blk.30.attn_q.weight q8_0 +blk.30.attn_v.weight q8_0 +blk.31.attn_k.weight q8_0 +blk.31.attn_output.weight q8_0 +blk.31.attn_q.weight q8_0 +blk.31.attn_v.weight q8_0 +blk.32.attn_k.weight q8_0 +blk.32.attn_output.weight q8_0 +blk.32.attn_q.weight q8_0 +blk.32.attn_v.weight q8_0 +blk.33.attn_k.weight q8_0 +blk.33.attn_output.weight q8_0 +blk.33.attn_q.weight q8_0 +blk.33.attn_v.weight q8_0 +blk.34.attn_k.weight q8_0 +blk.34.attn_output.weight q8_0 +blk.34.attn_q.weight q8_0 +blk.34.attn_v.weight q8_0 +blk.35.attn_k.weight q8_0 +blk.35.attn_output.weight q8_0 +blk.35.attn_q.weight q8_0 +blk.35.attn_v.weight q8_0 diff --git a/tests/snapshots/meta-llama-3.1-70b-instruct.schema b/tests/snapshots/meta-llama-3.1-70b-instruct.schema new file mode 100644 index 0000000000..b26755d6f7 --- /dev/null +++ b/tests/snapshots/meta-llama-3.1-70b-instruct.schema @@ -0,0 +1,3899 @@ +# Model: Meta-Llama-3.1-70B-Instruct +# n_embd=8192, n_ff=28672, n_vocab=128256, n_layer=80, n_head=64, n_head_kv=8 + +[F32] f32 +output.weight q6_K + +[F16] f16 +output.weight q6_K + +[Q4_0] q4_0 +output.weight q6_K + +[Q4_1] q4_1 +output.weight q6_K + +[Q8_0] q8_0 + +[Q5_0] q5_0 +output.weight q6_K + +[Q5_1] q5_1 +output.weight q6_K + +[Q2_K] q2_K +blk.0.ffn_down.weight q3_K +blk.0.attn_output.weight q3_K +blk.0.attn_v.weight q5_K +blk.1.attn_output.weight q3_K +blk.1.attn_v.weight q5_K +blk.1.ffn_down.weight q3_K +blk.2.ffn_down.weight q3_K +blk.2.attn_output.weight q3_K +blk.2.attn_v.weight q5_K +blk.3.ffn_down.weight q3_K +blk.3.attn_output.weight q3_K +blk.3.attn_v.weight q5_K +blk.4.attn_output.weight q3_K +blk.4.attn_v.weight q5_K +blk.4.ffn_down.weight q3_K +blk.5.ffn_down.weight q3_K +blk.5.attn_output.weight q3_K +blk.5.attn_v.weight q5_K +blk.6.ffn_down.weight q3_K +blk.6.attn_output.weight q3_K +blk.6.attn_v.weight q5_K +blk.7.attn_v.weight q5_K +blk.7.ffn_down.weight q3_K +blk.7.attn_output.weight q3_K +blk.8.ffn_down.weight q3_K +blk.8.attn_output.weight q3_K +blk.8.attn_v.weight q5_K +blk.9.ffn_down.weight q3_K +blk.9.attn_output.weight q3_K +blk.9.attn_v.weight q5_K +blk.10.ffn_down.weight q3_K +blk.10.attn_output.weight q3_K +blk.10.attn_v.weight q5_K +blk.11.ffn_down.weight q3_K +blk.11.attn_output.weight q3_K +blk.11.attn_v.weight q5_K +blk.12.attn_output.weight q3_K +blk.12.attn_v.weight q5_K +blk.12.ffn_down.weight q3_K +blk.13.ffn_down.weight q3_K +blk.13.attn_output.weight q3_K +blk.13.attn_v.weight q5_K +blk.14.ffn_down.weight q3_K +blk.14.attn_output.weight q3_K +blk.14.attn_v.weight q5_K +blk.15.attn_output.weight q3_K +blk.15.attn_v.weight q5_K +blk.15.ffn_down.weight q3_K +blk.16.ffn_down.weight q3_K +blk.16.attn_output.weight q3_K +blk.16.attn_v.weight q5_K +blk.17.ffn_down.weight q3_K +blk.17.attn_output.weight q3_K +blk.17.attn_v.weight q5_K +blk.18.attn_output.weight q3_K +blk.18.attn_v.weight q5_K +blk.18.ffn_down.weight q3_K +blk.19.ffn_down.weight q3_K +blk.19.attn_output.weight q3_K +blk.19.attn_v.weight q5_K +blk.20.ffn_down.weight q3_K +blk.20.attn_output.weight q3_K +blk.20.attn_v.weight q5_K +blk.21.attn_v.weight q5_K +blk.21.ffn_down.weight q3_K +blk.21.attn_output.weight q3_K +blk.22.ffn_down.weight q3_K +blk.22.attn_output.weight q3_K +blk.22.attn_v.weight q5_K +blk.23.ffn_down.weight q3_K +blk.23.attn_output.weight q3_K +blk.23.attn_v.weight q5_K +blk.24.ffn_down.weight q3_K +blk.24.attn_output.weight q3_K +blk.24.attn_v.weight q5_K +blk.25.ffn_down.weight q3_K +blk.25.attn_output.weight q3_K +blk.25.attn_v.weight q5_K +blk.26.attn_output.weight q3_K +blk.26.attn_v.weight q5_K +blk.26.ffn_down.weight q3_K +blk.27.ffn_down.weight q3_K +blk.27.attn_output.weight q3_K +blk.27.attn_v.weight q5_K +blk.28.ffn_down.weight q3_K +blk.28.attn_output.weight q3_K +blk.28.attn_v.weight q5_K +blk.29.attn_output.weight q3_K +blk.29.attn_v.weight q5_K +blk.29.ffn_down.weight q3_K +blk.30.ffn_down.weight q3_K +blk.30.attn_output.weight q3_K +blk.30.attn_v.weight q5_K +blk.31.ffn_down.weight q3_K +blk.31.attn_output.weight q3_K +blk.31.attn_v.weight q5_K +blk.32.attn_output.weight q3_K +blk.32.attn_v.weight q5_K +blk.32.ffn_down.weight q3_K +blk.33.ffn_down.weight q3_K +blk.33.attn_output.weight q3_K +blk.33.attn_v.weight q5_K +blk.34.ffn_down.weight q3_K +blk.34.attn_output.weight q3_K +blk.34.attn_v.weight q5_K +blk.35.attn_v.weight q5_K +blk.35.ffn_down.weight q3_K +blk.35.attn_output.weight q3_K +blk.36.ffn_down.weight q3_K +blk.36.attn_output.weight q3_K +blk.36.attn_v.weight q5_K +blk.37.ffn_down.weight q3_K +blk.37.attn_output.weight q3_K +blk.37.attn_v.weight q5_K +blk.38.ffn_down.weight q3_K +blk.38.attn_output.weight q3_K +blk.38.attn_v.weight q5_K +blk.39.ffn_down.weight q3_K +blk.39.attn_output.weight q3_K +blk.39.attn_v.weight q5_K +blk.40.attn_output.weight q3_K +blk.40.attn_v.weight q5_K +blk.40.ffn_down.weight q3_K +blk.41.ffn_down.weight q3_K +blk.41.attn_output.weight q3_K +blk.41.attn_v.weight q5_K +blk.42.ffn_down.weight q3_K +blk.42.attn_output.weight q3_K +blk.42.attn_v.weight q5_K +blk.43.attn_output.weight q3_K +blk.43.attn_v.weight q5_K +blk.43.ffn_down.weight q3_K +blk.44.ffn_down.weight q3_K +blk.44.attn_output.weight q3_K +blk.44.attn_v.weight q5_K +blk.45.ffn_down.weight q3_K +blk.45.attn_output.weight q3_K +blk.45.attn_v.weight q5_K +blk.46.attn_output.weight q3_K +blk.46.attn_v.weight q5_K +blk.46.ffn_down.weight q3_K +blk.47.ffn_down.weight q3_K +blk.47.attn_output.weight q3_K +blk.47.attn_v.weight q5_K +blk.48.ffn_down.weight q3_K +blk.48.attn_output.weight q3_K +blk.48.attn_v.weight q5_K +blk.49.attn_v.weight q5_K +blk.49.ffn_down.weight q3_K +blk.49.attn_output.weight q3_K +blk.50.ffn_down.weight q3_K +blk.50.attn_output.weight q3_K +blk.50.attn_v.weight q5_K +blk.51.ffn_down.weight q3_K +blk.51.attn_output.weight q3_K +blk.51.attn_v.weight q5_K +blk.52.ffn_down.weight q3_K +blk.52.attn_output.weight q3_K +blk.52.attn_v.weight q5_K +blk.53.ffn_down.weight q3_K +blk.53.attn_output.weight q3_K +blk.53.attn_v.weight q5_K +blk.54.attn_output.weight q3_K +blk.54.attn_v.weight q5_K +blk.54.ffn_down.weight q3_K +blk.55.ffn_down.weight q3_K +blk.55.attn_output.weight q3_K +blk.55.attn_v.weight q5_K +blk.56.ffn_down.weight q3_K +blk.56.attn_output.weight q3_K +blk.56.attn_v.weight q5_K +blk.57.attn_output.weight q3_K +blk.57.attn_v.weight q5_K +blk.57.ffn_down.weight q3_K +blk.58.ffn_down.weight q3_K +blk.58.attn_output.weight q3_K +blk.58.attn_v.weight q5_K +blk.59.ffn_down.weight q3_K +blk.59.attn_output.weight q3_K +blk.59.attn_v.weight q5_K +blk.60.attn_output.weight q3_K +blk.60.attn_v.weight q5_K +blk.60.ffn_down.weight q3_K +blk.61.ffn_down.weight q3_K +blk.61.attn_output.weight q3_K +blk.61.attn_v.weight q5_K +blk.62.ffn_down.weight q3_K +blk.62.attn_output.weight q3_K +blk.62.attn_v.weight q5_K +blk.63.attn_v.weight q5_K +blk.63.ffn_down.weight q3_K +blk.63.attn_output.weight q3_K +blk.64.ffn_down.weight q3_K +blk.64.attn_output.weight q3_K +blk.64.attn_v.weight q5_K +blk.65.ffn_down.weight q3_K +blk.65.attn_output.weight q3_K +blk.65.attn_v.weight q5_K +blk.66.ffn_down.weight q3_K +blk.66.attn_output.weight q3_K +blk.66.attn_v.weight q5_K +blk.67.ffn_down.weight q3_K +blk.67.attn_output.weight q3_K +blk.67.attn_v.weight q5_K +blk.68.attn_output.weight q3_K +blk.68.attn_v.weight q5_K +blk.68.ffn_down.weight q3_K +blk.69.ffn_down.weight q3_K +blk.69.attn_output.weight q3_K +blk.69.attn_v.weight q5_K +blk.70.ffn_down.weight q3_K +blk.70.attn_output.weight q3_K +blk.70.attn_v.weight q5_K +blk.71.attn_output.weight q3_K +blk.71.attn_v.weight q5_K +blk.71.ffn_down.weight q3_K +blk.72.ffn_down.weight q3_K +blk.72.attn_output.weight q3_K +blk.72.attn_v.weight q5_K +blk.73.ffn_down.weight q3_K +blk.73.attn_output.weight q3_K +blk.73.attn_v.weight q5_K +blk.74.attn_output.weight q3_K +blk.74.attn_v.weight q5_K +blk.74.ffn_down.weight q3_K +blk.75.ffn_down.weight q3_K +blk.75.attn_output.weight q3_K +blk.75.attn_v.weight q5_K +blk.76.ffn_down.weight q3_K +blk.76.attn_output.weight q3_K +blk.76.attn_v.weight q5_K +blk.77.attn_v.weight q5_K +blk.77.ffn_down.weight q3_K +blk.77.attn_output.weight q3_K +blk.78.ffn_down.weight q3_K +blk.78.attn_output.weight q3_K +blk.78.attn_v.weight q5_K +blk.79.ffn_down.weight q3_K +blk.79.attn_output.weight q3_K +blk.79.attn_v.weight q5_K +output.weight q6_K + +[Q3_K_S] q3_K +blk.0.attn_v.weight q5_K +blk.1.attn_v.weight q5_K +blk.2.attn_v.weight q5_K +blk.3.attn_v.weight q5_K +blk.4.attn_v.weight q5_K +blk.5.attn_v.weight q5_K +blk.6.attn_v.weight q5_K +blk.7.attn_v.weight q5_K +blk.8.attn_v.weight q5_K +blk.9.attn_v.weight q5_K +blk.10.attn_v.weight q5_K +blk.11.attn_v.weight q5_K +blk.12.attn_v.weight q5_K +blk.13.attn_v.weight q5_K +blk.14.attn_v.weight q5_K +blk.15.attn_v.weight q5_K +blk.16.attn_v.weight q5_K +blk.17.attn_v.weight q5_K +blk.18.attn_v.weight q5_K +blk.19.attn_v.weight q5_K +blk.20.attn_v.weight q5_K +blk.21.attn_v.weight q5_K +blk.22.attn_v.weight q5_K +blk.23.attn_v.weight q5_K +blk.24.attn_v.weight q5_K +blk.25.attn_v.weight q5_K +blk.26.attn_v.weight q5_K +blk.27.attn_v.weight q5_K +blk.28.attn_v.weight q5_K +blk.29.attn_v.weight q5_K +blk.30.attn_v.weight q5_K +blk.31.attn_v.weight q5_K +blk.32.attn_v.weight q5_K +blk.33.attn_v.weight q5_K +blk.34.attn_v.weight q5_K +blk.35.attn_v.weight q5_K +blk.36.attn_v.weight q5_K +blk.37.attn_v.weight q5_K +blk.38.attn_v.weight q5_K +blk.39.attn_v.weight q5_K +blk.40.attn_v.weight q5_K +blk.41.attn_v.weight q5_K +blk.42.attn_v.weight q5_K +blk.43.attn_v.weight q5_K +blk.44.attn_v.weight q5_K +blk.45.attn_v.weight q5_K +blk.46.attn_v.weight q5_K +blk.47.attn_v.weight q5_K +blk.48.attn_v.weight q5_K +blk.49.attn_v.weight q5_K +blk.50.attn_v.weight q5_K +blk.51.attn_v.weight q5_K +blk.52.attn_v.weight q5_K +blk.53.attn_v.weight q5_K +blk.54.attn_v.weight q5_K +blk.55.attn_v.weight q5_K +blk.56.attn_v.weight q5_K +blk.57.attn_v.weight q5_K +blk.58.attn_v.weight q5_K +blk.59.attn_v.weight q5_K +blk.60.attn_v.weight q5_K +blk.61.attn_v.weight q5_K +blk.62.attn_v.weight q5_K +blk.63.attn_v.weight q5_K +blk.64.attn_v.weight q5_K +blk.65.attn_v.weight q5_K +blk.66.attn_v.weight q5_K +blk.67.attn_v.weight q5_K +blk.68.attn_v.weight q5_K +blk.69.attn_v.weight q5_K +blk.70.attn_v.weight q5_K +blk.71.attn_v.weight q5_K +blk.72.attn_v.weight q5_K +blk.73.attn_v.weight q5_K +blk.74.attn_v.weight q5_K +blk.75.attn_v.weight q5_K +blk.76.attn_v.weight q5_K +blk.77.attn_v.weight q5_K +blk.78.attn_v.weight q5_K +blk.79.attn_v.weight q5_K +output.weight q6_K + +[Q3_K_M] q3_K +blk.0.ffn_down.weight q5_K +blk.0.attn_output.weight q4_K +blk.0.attn_v.weight q5_K +blk.1.attn_output.weight q4_K +blk.1.attn_v.weight q5_K +blk.1.ffn_down.weight q5_K +blk.2.ffn_down.weight q5_K +blk.2.attn_output.weight q4_K +blk.2.attn_v.weight q5_K +blk.3.ffn_down.weight q5_K +blk.3.attn_output.weight q4_K +blk.3.attn_v.weight q5_K +blk.4.attn_output.weight q4_K +blk.4.attn_v.weight q5_K +blk.4.ffn_down.weight q5_K +blk.5.ffn_down.weight q4_K +blk.5.attn_output.weight q4_K +blk.5.attn_v.weight q5_K +blk.6.ffn_down.weight q4_K +blk.6.attn_output.weight q4_K +blk.6.attn_v.weight q5_K +blk.7.attn_v.weight q5_K +blk.7.ffn_down.weight q4_K +blk.7.attn_output.weight q4_K +blk.8.ffn_down.weight q4_K +blk.8.attn_output.weight q4_K +blk.8.attn_v.weight q5_K +blk.9.ffn_down.weight q4_K +blk.9.attn_output.weight q4_K +blk.9.attn_v.weight q5_K +blk.10.ffn_down.weight q4_K +blk.10.attn_output.weight q4_K +blk.10.attn_v.weight q5_K +blk.11.ffn_down.weight q4_K +blk.11.attn_output.weight q4_K +blk.11.attn_v.weight q5_K +blk.12.attn_output.weight q4_K +blk.12.attn_v.weight q5_K +blk.12.ffn_down.weight q4_K +blk.13.ffn_down.weight q4_K +blk.13.attn_output.weight q4_K +blk.13.attn_v.weight q5_K +blk.14.ffn_down.weight q4_K +blk.14.attn_output.weight q4_K +blk.14.attn_v.weight q5_K +blk.15.attn_output.weight q4_K +blk.15.attn_v.weight q5_K +blk.15.ffn_down.weight q4_K +blk.16.ffn_down.weight q4_K +blk.16.attn_output.weight q4_K +blk.16.attn_v.weight q5_K +blk.17.ffn_down.weight q4_K +blk.17.attn_output.weight q4_K +blk.17.attn_v.weight q5_K +blk.18.attn_output.weight q4_K +blk.18.attn_v.weight q5_K +blk.18.ffn_down.weight q4_K +blk.19.ffn_down.weight q4_K +blk.19.attn_output.weight q4_K +blk.19.attn_v.weight q5_K +blk.20.ffn_down.weight q4_K +blk.20.attn_output.weight q4_K +blk.20.attn_v.weight q5_K +blk.21.attn_v.weight q5_K +blk.21.ffn_down.weight q4_K +blk.21.attn_output.weight q4_K +blk.22.ffn_down.weight q4_K +blk.22.attn_output.weight q4_K +blk.22.attn_v.weight q5_K +blk.23.ffn_down.weight q4_K +blk.23.attn_output.weight q4_K +blk.23.attn_v.weight q5_K +blk.24.ffn_down.weight q4_K +blk.24.attn_output.weight q4_K +blk.24.attn_v.weight q5_K +blk.25.ffn_down.weight q4_K +blk.25.attn_output.weight q4_K +blk.25.attn_v.weight q5_K +blk.26.attn_output.weight q4_K +blk.26.attn_v.weight q5_K +blk.26.ffn_down.weight q4_K +blk.27.ffn_down.weight q4_K +blk.27.attn_output.weight q4_K +blk.27.attn_v.weight q5_K +blk.28.ffn_down.weight q4_K +blk.28.attn_output.weight q4_K +blk.28.attn_v.weight q5_K +blk.29.attn_output.weight q4_K +blk.29.attn_v.weight q5_K +blk.29.ffn_down.weight q4_K +blk.30.ffn_down.weight q4_K +blk.30.attn_output.weight q4_K +blk.30.attn_v.weight q5_K +blk.31.ffn_down.weight q4_K +blk.31.attn_output.weight q4_K +blk.31.attn_v.weight q5_K +blk.32.attn_output.weight q4_K +blk.32.attn_v.weight q5_K +blk.32.ffn_down.weight q4_K +blk.33.ffn_down.weight q4_K +blk.33.attn_output.weight q4_K +blk.33.attn_v.weight q5_K +blk.34.ffn_down.weight q4_K +blk.34.attn_output.weight q4_K +blk.34.attn_v.weight q5_K +blk.35.attn_v.weight q5_K +blk.35.ffn_down.weight q4_K +blk.35.attn_output.weight q4_K +blk.36.ffn_down.weight q4_K +blk.36.attn_output.weight q4_K +blk.36.attn_v.weight q5_K +blk.37.ffn_down.weight q4_K +blk.37.attn_output.weight q4_K +blk.37.attn_v.weight q5_K +blk.38.ffn_down.weight q4_K +blk.38.attn_output.weight q4_K +blk.38.attn_v.weight q5_K +blk.39.ffn_down.weight q4_K +blk.39.attn_output.weight q4_K +blk.39.attn_v.weight q5_K +blk.40.attn_output.weight q4_K +blk.40.attn_v.weight q5_K +blk.40.ffn_down.weight q4_K +blk.41.ffn_down.weight q4_K +blk.41.attn_output.weight q4_K +blk.41.attn_v.weight q5_K +blk.42.ffn_down.weight q4_K +blk.42.attn_output.weight q4_K +blk.42.attn_v.weight q5_K +blk.43.attn_output.weight q4_K +blk.43.attn_v.weight q5_K +blk.43.ffn_down.weight q4_K +blk.44.ffn_down.weight q4_K +blk.44.attn_output.weight q4_K +blk.44.attn_v.weight q5_K +blk.45.ffn_down.weight q4_K +blk.45.attn_output.weight q4_K +blk.45.attn_v.weight q5_K +blk.46.attn_output.weight q4_K +blk.46.attn_v.weight q5_K +blk.46.ffn_down.weight q4_K +blk.47.ffn_down.weight q4_K +blk.47.attn_output.weight q4_K +blk.47.attn_v.weight q5_K +blk.48.ffn_down.weight q4_K +blk.48.attn_output.weight q4_K +blk.48.attn_v.weight q5_K +blk.49.attn_v.weight q5_K +blk.49.ffn_down.weight q4_K +blk.49.attn_output.weight q4_K +blk.50.ffn_down.weight q4_K +blk.50.attn_output.weight q4_K +blk.50.attn_v.weight q5_K +blk.51.ffn_down.weight q4_K +blk.51.attn_output.weight q4_K +blk.51.attn_v.weight q5_K +blk.52.ffn_down.weight q4_K +blk.52.attn_output.weight q4_K +blk.52.attn_v.weight q5_K +blk.53.ffn_down.weight q4_K +blk.53.attn_output.weight q4_K +blk.53.attn_v.weight q5_K +blk.54.attn_output.weight q4_K +blk.54.attn_v.weight q5_K +blk.54.ffn_down.weight q4_K +blk.55.ffn_down.weight q4_K +blk.55.attn_output.weight q4_K +blk.55.attn_v.weight q5_K +blk.56.ffn_down.weight q4_K +blk.56.attn_output.weight q4_K +blk.56.attn_v.weight q5_K +blk.57.attn_output.weight q4_K +blk.57.attn_v.weight q5_K +blk.57.ffn_down.weight q4_K +blk.58.ffn_down.weight q4_K +blk.58.attn_output.weight q4_K +blk.58.attn_v.weight q5_K +blk.59.ffn_down.weight q4_K +blk.59.attn_output.weight q4_K +blk.59.attn_v.weight q5_K +blk.60.attn_output.weight q4_K +blk.60.attn_v.weight q5_K +blk.60.ffn_down.weight q4_K +blk.61.ffn_down.weight q4_K +blk.61.attn_output.weight q4_K +blk.61.attn_v.weight q5_K +blk.62.ffn_down.weight q4_K +blk.62.attn_output.weight q4_K +blk.62.attn_v.weight q5_K +blk.63.attn_v.weight q5_K +blk.63.ffn_down.weight q4_K +blk.63.attn_output.weight q4_K +blk.64.ffn_down.weight q4_K +blk.64.attn_output.weight q4_K +blk.64.attn_v.weight q5_K +blk.65.ffn_down.weight q4_K +blk.65.attn_output.weight q4_K +blk.65.attn_v.weight q5_K +blk.66.ffn_down.weight q4_K +blk.66.attn_output.weight q4_K +blk.66.attn_v.weight q5_K +blk.67.ffn_down.weight q4_K +blk.67.attn_output.weight q4_K +blk.67.attn_v.weight q5_K +blk.68.attn_output.weight q4_K +blk.68.attn_v.weight q5_K +blk.68.ffn_down.weight q4_K +blk.69.ffn_down.weight q4_K +blk.69.attn_output.weight q4_K +blk.69.attn_v.weight q5_K +blk.70.ffn_down.weight q4_K +blk.70.attn_output.weight q4_K +blk.70.attn_v.weight q5_K +blk.71.attn_output.weight q4_K +blk.71.attn_v.weight q5_K +blk.71.ffn_down.weight q4_K +blk.72.ffn_down.weight q4_K +blk.72.attn_output.weight q4_K +blk.72.attn_v.weight q5_K +blk.73.ffn_down.weight q4_K +blk.73.attn_output.weight q4_K +blk.73.attn_v.weight q5_K +blk.74.attn_output.weight q4_K +blk.74.attn_v.weight q5_K +blk.74.ffn_down.weight q4_K +blk.75.ffn_down.weight q4_K +blk.75.attn_output.weight q4_K +blk.75.attn_v.weight q5_K +blk.76.ffn_down.weight q4_K +blk.76.attn_output.weight q4_K +blk.76.attn_v.weight q5_K +blk.77.attn_v.weight q5_K +blk.77.ffn_down.weight q4_K +blk.77.attn_output.weight q4_K +blk.78.ffn_down.weight q4_K +blk.78.attn_output.weight q4_K +blk.78.attn_v.weight q5_K +blk.79.ffn_down.weight q4_K +blk.79.attn_output.weight q4_K +blk.79.attn_v.weight q5_K +output.weight q6_K + +[Q3_K_L] q3_K +blk.0.ffn_down.weight q5_K +blk.0.attn_output.weight q5_K +blk.0.attn_v.weight q5_K +blk.1.attn_output.weight q5_K +blk.1.attn_v.weight q5_K +blk.1.ffn_down.weight q5_K +blk.2.ffn_down.weight q5_K +blk.2.attn_output.weight q5_K +blk.2.attn_v.weight q5_K +blk.3.ffn_down.weight q5_K +blk.3.attn_output.weight q5_K +blk.3.attn_v.weight q5_K +blk.4.attn_output.weight q5_K +blk.4.attn_v.weight q5_K +blk.4.ffn_down.weight q5_K +blk.5.ffn_down.weight q5_K +blk.5.attn_output.weight q5_K +blk.5.attn_v.weight q5_K +blk.6.ffn_down.weight q5_K +blk.6.attn_output.weight q5_K +blk.6.attn_v.weight q5_K +blk.7.attn_v.weight q5_K +blk.7.ffn_down.weight q5_K +blk.7.attn_output.weight q5_K +blk.8.ffn_down.weight q5_K +blk.8.attn_output.weight q5_K +blk.8.attn_v.weight q5_K +blk.9.ffn_down.weight q5_K +blk.9.attn_output.weight q5_K +blk.9.attn_v.weight q5_K +blk.10.ffn_down.weight q5_K +blk.10.attn_output.weight q5_K +blk.10.attn_v.weight q5_K +blk.11.ffn_down.weight q5_K +blk.11.attn_output.weight q5_K +blk.11.attn_v.weight q5_K +blk.12.attn_output.weight q5_K +blk.12.attn_v.weight q5_K +blk.12.ffn_down.weight q5_K +blk.13.ffn_down.weight q5_K +blk.13.attn_output.weight q5_K +blk.13.attn_v.weight q5_K +blk.14.ffn_down.weight q5_K +blk.14.attn_output.weight q5_K +blk.14.attn_v.weight q5_K +blk.15.attn_output.weight q5_K +blk.15.attn_v.weight q5_K +blk.15.ffn_down.weight q5_K +blk.16.ffn_down.weight q5_K +blk.16.attn_output.weight q5_K +blk.16.attn_v.weight q5_K +blk.17.ffn_down.weight q5_K +blk.17.attn_output.weight q5_K +blk.17.attn_v.weight q5_K +blk.18.attn_output.weight q5_K +blk.18.attn_v.weight q5_K +blk.18.ffn_down.weight q5_K +blk.19.ffn_down.weight q5_K +blk.19.attn_output.weight q5_K +blk.19.attn_v.weight q5_K +blk.20.ffn_down.weight q5_K +blk.20.attn_output.weight q5_K +blk.20.attn_v.weight q5_K +blk.21.attn_v.weight q5_K +blk.21.ffn_down.weight q5_K +blk.21.attn_output.weight q5_K +blk.22.ffn_down.weight q5_K +blk.22.attn_output.weight q5_K +blk.22.attn_v.weight q5_K +blk.23.ffn_down.weight q5_K +blk.23.attn_output.weight q5_K +blk.23.attn_v.weight q5_K +blk.24.ffn_down.weight q5_K +blk.24.attn_output.weight q5_K +blk.24.attn_v.weight q5_K +blk.25.ffn_down.weight q5_K +blk.25.attn_output.weight q5_K +blk.25.attn_v.weight q5_K +blk.26.attn_output.weight q5_K +blk.26.attn_v.weight q5_K +blk.26.ffn_down.weight q5_K +blk.27.ffn_down.weight q5_K +blk.27.attn_output.weight q5_K +blk.27.attn_v.weight q5_K +blk.28.ffn_down.weight q5_K +blk.28.attn_output.weight q5_K +blk.28.attn_v.weight q5_K +blk.29.attn_output.weight q5_K +blk.29.attn_v.weight q5_K +blk.29.ffn_down.weight q5_K +blk.30.ffn_down.weight q5_K +blk.30.attn_output.weight q5_K +blk.30.attn_v.weight q5_K +blk.31.ffn_down.weight q5_K +blk.31.attn_output.weight q5_K +blk.31.attn_v.weight q5_K +blk.32.attn_output.weight q5_K +blk.32.attn_v.weight q5_K +blk.32.ffn_down.weight q5_K +blk.33.ffn_down.weight q5_K +blk.33.attn_output.weight q5_K +blk.33.attn_v.weight q5_K +blk.34.ffn_down.weight q5_K +blk.34.attn_output.weight q5_K +blk.34.attn_v.weight q5_K +blk.35.attn_v.weight q5_K +blk.35.ffn_down.weight q5_K +blk.35.attn_output.weight q5_K +blk.36.ffn_down.weight q5_K +blk.36.attn_output.weight q5_K +blk.36.attn_v.weight q5_K +blk.37.ffn_down.weight q5_K +blk.37.attn_output.weight q5_K +blk.37.attn_v.weight q5_K +blk.38.ffn_down.weight q5_K +blk.38.attn_output.weight q5_K +blk.38.attn_v.weight q5_K +blk.39.ffn_down.weight q5_K +blk.39.attn_output.weight q5_K +blk.39.attn_v.weight q5_K +blk.40.attn_output.weight q5_K +blk.40.attn_v.weight q5_K +blk.40.ffn_down.weight q5_K +blk.41.ffn_down.weight q5_K +blk.41.attn_output.weight q5_K +blk.41.attn_v.weight q5_K +blk.42.ffn_down.weight q5_K +blk.42.attn_output.weight q5_K +blk.42.attn_v.weight q5_K +blk.43.attn_output.weight q5_K +blk.43.attn_v.weight q5_K +blk.43.ffn_down.weight q5_K +blk.44.ffn_down.weight q5_K +blk.44.attn_output.weight q5_K +blk.44.attn_v.weight q5_K +blk.45.ffn_down.weight q5_K +blk.45.attn_output.weight q5_K +blk.45.attn_v.weight q5_K +blk.46.attn_output.weight q5_K +blk.46.attn_v.weight q5_K +blk.46.ffn_down.weight q5_K +blk.47.ffn_down.weight q5_K +blk.47.attn_output.weight q5_K +blk.47.attn_v.weight q5_K +blk.48.ffn_down.weight q5_K +blk.48.attn_output.weight q5_K +blk.48.attn_v.weight q5_K +blk.49.attn_v.weight q5_K +blk.49.ffn_down.weight q5_K +blk.49.attn_output.weight q5_K +blk.50.ffn_down.weight q5_K +blk.50.attn_output.weight q5_K +blk.50.attn_v.weight q5_K +blk.51.ffn_down.weight q5_K +blk.51.attn_output.weight q5_K +blk.51.attn_v.weight q5_K +blk.52.ffn_down.weight q5_K +blk.52.attn_output.weight q5_K +blk.52.attn_v.weight q5_K +blk.53.ffn_down.weight q5_K +blk.53.attn_output.weight q5_K +blk.53.attn_v.weight q5_K +blk.54.attn_output.weight q5_K +blk.54.attn_v.weight q5_K +blk.54.ffn_down.weight q5_K +blk.55.ffn_down.weight q5_K +blk.55.attn_output.weight q5_K +blk.55.attn_v.weight q5_K +blk.56.ffn_down.weight q5_K +blk.56.attn_output.weight q5_K +blk.56.attn_v.weight q5_K +blk.57.attn_output.weight q5_K +blk.57.attn_v.weight q5_K +blk.57.ffn_down.weight q5_K +blk.58.ffn_down.weight q5_K +blk.58.attn_output.weight q5_K +blk.58.attn_v.weight q5_K +blk.59.ffn_down.weight q5_K +blk.59.attn_output.weight q5_K +blk.59.attn_v.weight q5_K +blk.60.attn_output.weight q5_K +blk.60.attn_v.weight q5_K +blk.60.ffn_down.weight q5_K +blk.61.ffn_down.weight q5_K +blk.61.attn_output.weight q5_K +blk.61.attn_v.weight q5_K +blk.62.ffn_down.weight q5_K +blk.62.attn_output.weight q5_K +blk.62.attn_v.weight q5_K +blk.63.attn_v.weight q5_K +blk.63.ffn_down.weight q5_K +blk.63.attn_output.weight q5_K +blk.64.ffn_down.weight q5_K +blk.64.attn_output.weight q5_K +blk.64.attn_v.weight q5_K +blk.65.ffn_down.weight q5_K +blk.65.attn_output.weight q5_K +blk.65.attn_v.weight q5_K +blk.66.ffn_down.weight q5_K +blk.66.attn_output.weight q5_K +blk.66.attn_v.weight q5_K +blk.67.ffn_down.weight q5_K +blk.67.attn_output.weight q5_K +blk.67.attn_v.weight q5_K +blk.68.attn_output.weight q5_K +blk.68.attn_v.weight q5_K +blk.68.ffn_down.weight q5_K +blk.69.ffn_down.weight q5_K +blk.69.attn_output.weight q5_K +blk.69.attn_v.weight q5_K +blk.70.ffn_down.weight q5_K +blk.70.attn_output.weight q5_K +blk.70.attn_v.weight q5_K +blk.71.attn_output.weight q5_K +blk.71.attn_v.weight q5_K +blk.71.ffn_down.weight q5_K +blk.72.ffn_down.weight q5_K +blk.72.attn_output.weight q5_K +blk.72.attn_v.weight q5_K +blk.73.ffn_down.weight q5_K +blk.73.attn_output.weight q5_K +blk.73.attn_v.weight q5_K +blk.74.attn_output.weight q5_K +blk.74.attn_v.weight q5_K +blk.74.ffn_down.weight q5_K +blk.75.ffn_down.weight q5_K +blk.75.attn_output.weight q5_K +blk.75.attn_v.weight q5_K +blk.76.ffn_down.weight q5_K +blk.76.attn_output.weight q5_K +blk.76.attn_v.weight q5_K +blk.77.attn_v.weight q5_K +blk.77.ffn_down.weight q5_K +blk.77.attn_output.weight q5_K +blk.78.ffn_down.weight q5_K +blk.78.attn_output.weight q5_K +blk.78.attn_v.weight q5_K +blk.79.ffn_down.weight q5_K +blk.79.attn_output.weight q5_K +blk.79.attn_v.weight q5_K +output.weight q6_K + +[Q4_K_S] q4_K +blk.0.ffn_down.weight q5_K +blk.0.attn_v.weight q5_K +blk.1.attn_v.weight q5_K +blk.1.ffn_down.weight q5_K +blk.2.ffn_down.weight q5_K +blk.2.attn_v.weight q5_K +blk.3.ffn_down.weight q5_K +blk.3.attn_v.weight q5_K +blk.4.attn_v.weight q5_K +blk.4.ffn_down.weight q5_K +blk.5.ffn_down.weight q5_K +blk.5.attn_v.weight q5_K +blk.6.ffn_down.weight q5_K +blk.6.attn_v.weight q5_K +blk.7.attn_v.weight q5_K +blk.7.ffn_down.weight q5_K +blk.8.ffn_down.weight q5_K +blk.8.attn_v.weight q5_K +blk.9.ffn_down.weight q5_K +blk.9.attn_v.weight q5_K +blk.10.attn_v.weight q5_K +blk.11.attn_v.weight q5_K +blk.12.attn_v.weight q5_K +blk.13.attn_v.weight q5_K +blk.14.attn_v.weight q5_K +blk.15.attn_v.weight q5_K +blk.16.attn_v.weight q5_K +blk.17.attn_v.weight q5_K +blk.18.attn_v.weight q5_K +blk.19.attn_v.weight q5_K +blk.20.attn_v.weight q5_K +blk.21.attn_v.weight q5_K +blk.22.attn_v.weight q5_K +blk.23.attn_v.weight q5_K +blk.24.attn_v.weight q5_K +blk.25.attn_v.weight q5_K +blk.26.attn_v.weight q5_K +blk.27.attn_v.weight q5_K +blk.28.attn_v.weight q5_K +blk.29.attn_v.weight q5_K +blk.30.attn_v.weight q5_K +blk.31.attn_v.weight q5_K +blk.32.attn_v.weight q5_K +blk.33.attn_v.weight q5_K +blk.34.attn_v.weight q5_K +blk.35.attn_v.weight q5_K +blk.36.attn_v.weight q5_K +blk.37.attn_v.weight q5_K +blk.38.attn_v.weight q5_K +blk.39.attn_v.weight q5_K +blk.40.attn_v.weight q5_K +blk.41.attn_v.weight q5_K +blk.42.attn_v.weight q5_K +blk.43.attn_v.weight q5_K +blk.44.attn_v.weight q5_K +blk.45.attn_v.weight q5_K +blk.46.attn_v.weight q5_K +blk.47.attn_v.weight q5_K +blk.48.attn_v.weight q5_K +blk.49.attn_v.weight q5_K +blk.50.attn_v.weight q5_K +blk.51.attn_v.weight q5_K +blk.52.attn_v.weight q5_K +blk.53.attn_v.weight q5_K +blk.54.attn_v.weight q5_K +blk.55.attn_v.weight q5_K +blk.56.attn_v.weight q5_K +blk.57.attn_v.weight q5_K +blk.58.attn_v.weight q5_K +blk.59.attn_v.weight q5_K +blk.60.attn_v.weight q5_K +blk.61.attn_v.weight q5_K +blk.62.attn_v.weight q5_K +blk.63.attn_v.weight q5_K +blk.64.attn_v.weight q5_K +blk.65.attn_v.weight q5_K +blk.66.attn_v.weight q5_K +blk.67.attn_v.weight q5_K +blk.68.attn_v.weight q5_K +blk.69.attn_v.weight q5_K +blk.70.attn_v.weight q5_K +blk.71.attn_v.weight q5_K +blk.72.attn_v.weight q5_K +blk.73.attn_v.weight q5_K +blk.74.attn_v.weight q5_K +blk.75.attn_v.weight q5_K +blk.76.attn_v.weight q5_K +blk.77.attn_v.weight q5_K +blk.78.attn_v.weight q5_K +blk.79.attn_v.weight q5_K +output.weight q6_K + +[Q4_K_M] q4_K +blk.0.ffn_down.weight q6_K +blk.0.attn_v.weight q6_K +blk.1.attn_v.weight q6_K +blk.1.ffn_down.weight q6_K +blk.2.ffn_down.weight q6_K +blk.2.attn_v.weight q6_K +blk.3.ffn_down.weight q6_K +blk.3.attn_v.weight q6_K +blk.4.attn_v.weight q6_K +blk.4.ffn_down.weight q6_K +blk.5.ffn_down.weight q6_K +blk.5.attn_v.weight q6_K +blk.6.ffn_down.weight q6_K +blk.6.attn_v.weight q6_K +blk.7.attn_v.weight q6_K +blk.7.ffn_down.weight q6_K +blk.8.ffn_down.weight q6_K +blk.8.attn_v.weight q6_K +blk.9.ffn_down.weight q6_K +blk.9.attn_v.weight q6_K +blk.10.attn_v.weight q5_K +blk.11.attn_v.weight q5_K +blk.12.attn_v.weight q6_K +blk.12.ffn_down.weight q6_K +blk.13.attn_v.weight q5_K +blk.14.attn_v.weight q5_K +blk.15.attn_v.weight q6_K +blk.15.ffn_down.weight q6_K +blk.16.attn_v.weight q5_K +blk.17.attn_v.weight q5_K +blk.18.attn_v.weight q6_K +blk.18.ffn_down.weight q6_K +blk.19.attn_v.weight q5_K +blk.20.attn_v.weight q5_K +blk.21.attn_v.weight q6_K +blk.21.ffn_down.weight q6_K +blk.22.attn_v.weight q5_K +blk.23.attn_v.weight q5_K +blk.24.ffn_down.weight q6_K +blk.24.attn_v.weight q6_K +blk.25.attn_v.weight q5_K +blk.26.attn_v.weight q5_K +blk.27.ffn_down.weight q6_K +blk.27.attn_v.weight q6_K +blk.28.attn_v.weight q5_K +blk.29.attn_v.weight q5_K +blk.30.ffn_down.weight q6_K +blk.30.attn_v.weight q6_K +blk.31.attn_v.weight q5_K +blk.32.attn_v.weight q5_K +blk.33.ffn_down.weight q6_K +blk.33.attn_v.weight q6_K +blk.34.attn_v.weight q5_K +blk.35.attn_v.weight q5_K +blk.36.ffn_down.weight q6_K +blk.36.attn_v.weight q6_K +blk.37.attn_v.weight q5_K +blk.38.attn_v.weight q5_K +blk.39.ffn_down.weight q6_K +blk.39.attn_v.weight q6_K +blk.40.attn_v.weight q5_K +blk.41.attn_v.weight q5_K +blk.42.ffn_down.weight q6_K +blk.42.attn_v.weight q6_K +blk.43.attn_v.weight q5_K +blk.44.attn_v.weight q5_K +blk.45.ffn_down.weight q6_K +blk.45.attn_v.weight q6_K +blk.46.attn_v.weight q5_K +blk.47.attn_v.weight q5_K +blk.48.ffn_down.weight q6_K +blk.48.attn_v.weight q6_K +blk.49.attn_v.weight q5_K +blk.50.attn_v.weight q5_K +blk.51.ffn_down.weight q6_K +blk.51.attn_v.weight q6_K +blk.52.attn_v.weight q5_K +blk.53.attn_v.weight q5_K +blk.54.attn_v.weight q6_K +blk.54.ffn_down.weight q6_K +blk.55.attn_v.weight q5_K +blk.56.attn_v.weight q5_K +blk.57.attn_v.weight q6_K +blk.57.ffn_down.weight q6_K +blk.58.attn_v.weight q5_K +blk.59.attn_v.weight q5_K +blk.60.attn_v.weight q6_K +blk.60.ffn_down.weight q6_K +blk.61.attn_v.weight q5_K +blk.62.attn_v.weight q5_K +blk.63.attn_v.weight q6_K +blk.63.ffn_down.weight q6_K +blk.64.attn_v.weight q5_K +blk.65.attn_v.weight q5_K +blk.66.ffn_down.weight q6_K +blk.66.attn_v.weight q6_K +blk.67.attn_v.weight q5_K +blk.68.attn_v.weight q5_K +blk.69.ffn_down.weight q6_K +blk.69.attn_v.weight q6_K +blk.70.ffn_down.weight q6_K +blk.70.attn_v.weight q6_K +blk.71.attn_v.weight q6_K +blk.71.ffn_down.weight q6_K +blk.72.ffn_down.weight q6_K +blk.72.attn_v.weight q6_K +blk.73.ffn_down.weight q6_K +blk.73.attn_v.weight q6_K +blk.74.attn_v.weight q6_K +blk.74.ffn_down.weight q6_K +blk.75.ffn_down.weight q6_K +blk.75.attn_v.weight q6_K +blk.76.ffn_down.weight q6_K +blk.76.attn_v.weight q6_K +blk.77.attn_v.weight q6_K +blk.77.ffn_down.weight q6_K +blk.78.ffn_down.weight q6_K +blk.78.attn_v.weight q6_K +blk.79.ffn_down.weight q6_K +blk.79.attn_v.weight q6_K +output.weight q6_K + +[Q5_K_S] q5_K +output.weight q6_K + +[Q5_K_M] q5_K +blk.0.ffn_down.weight q6_K +blk.0.attn_v.weight q6_K +blk.1.attn_v.weight q6_K +blk.1.ffn_down.weight q6_K +blk.2.ffn_down.weight q6_K +blk.2.attn_v.weight q6_K +blk.3.ffn_down.weight q6_K +blk.3.attn_v.weight q6_K +blk.4.attn_v.weight q6_K +blk.4.ffn_down.weight q6_K +blk.5.ffn_down.weight q6_K +blk.5.attn_v.weight q6_K +blk.6.ffn_down.weight q6_K +blk.6.attn_v.weight q6_K +blk.7.attn_v.weight q6_K +blk.7.ffn_down.weight q6_K +blk.8.ffn_down.weight q6_K +blk.8.attn_v.weight q6_K +blk.9.ffn_down.weight q6_K +blk.9.attn_v.weight q6_K +blk.12.attn_v.weight q6_K +blk.12.ffn_down.weight q6_K +blk.15.attn_v.weight q6_K +blk.15.ffn_down.weight q6_K +blk.18.attn_v.weight q6_K +blk.18.ffn_down.weight q6_K +blk.21.attn_v.weight q6_K +blk.21.ffn_down.weight q6_K +blk.24.ffn_down.weight q6_K +blk.24.attn_v.weight q6_K +blk.27.ffn_down.weight q6_K +blk.27.attn_v.weight q6_K +blk.30.ffn_down.weight q6_K +blk.30.attn_v.weight q6_K +blk.33.ffn_down.weight q6_K +blk.33.attn_v.weight q6_K +blk.36.ffn_down.weight q6_K +blk.36.attn_v.weight q6_K +blk.39.ffn_down.weight q6_K +blk.39.attn_v.weight q6_K +blk.42.ffn_down.weight q6_K +blk.42.attn_v.weight q6_K +blk.45.ffn_down.weight q6_K +blk.45.attn_v.weight q6_K +blk.48.ffn_down.weight q6_K +blk.48.attn_v.weight q6_K +blk.51.ffn_down.weight q6_K +blk.51.attn_v.weight q6_K +blk.54.attn_v.weight q6_K +blk.54.ffn_down.weight q6_K +blk.57.attn_v.weight q6_K +blk.57.ffn_down.weight q6_K +blk.60.attn_v.weight q6_K +blk.60.ffn_down.weight q6_K +blk.63.attn_v.weight q6_K +blk.63.ffn_down.weight q6_K +blk.66.ffn_down.weight q6_K +blk.66.attn_v.weight q6_K +blk.69.ffn_down.weight q6_K +blk.69.attn_v.weight q6_K +blk.70.ffn_down.weight q6_K +blk.70.attn_v.weight q6_K +blk.71.attn_v.weight q6_K +blk.71.ffn_down.weight q6_K +blk.72.ffn_down.weight q6_K +blk.72.attn_v.weight q6_K +blk.73.ffn_down.weight q6_K +blk.73.attn_v.weight q6_K +blk.74.attn_v.weight q6_K +blk.74.ffn_down.weight q6_K +blk.75.ffn_down.weight q6_K +blk.75.attn_v.weight q6_K +blk.76.ffn_down.weight q6_K +blk.76.attn_v.weight q6_K +blk.77.attn_v.weight q6_K +blk.77.ffn_down.weight q6_K +blk.78.ffn_down.weight q6_K +blk.78.attn_v.weight q6_K +blk.79.ffn_down.weight q6_K +blk.79.attn_v.weight q6_K +output.weight q6_K + +[Q6_K] q6_K + +[IQ2_XXS] iq2_xxs +token_embd.weight q2_K +blk.0.ffn_down.weight q2_K +blk.0.attn_v.weight q4_K +blk.1.attn_v.weight q4_K +blk.1.ffn_down.weight q2_K +blk.2.ffn_down.weight q2_K +blk.2.attn_v.weight q4_K +blk.3.ffn_down.weight q2_K +blk.3.attn_v.weight q4_K +blk.4.attn_v.weight q4_K +blk.4.ffn_down.weight q2_K +blk.5.ffn_down.weight q2_K +blk.5.attn_v.weight q4_K +blk.6.ffn_down.weight q2_K +blk.6.attn_v.weight q4_K +blk.7.attn_v.weight q4_K +blk.7.ffn_down.weight q2_K +blk.8.ffn_down.weight q2_K +blk.8.attn_v.weight q4_K +blk.9.ffn_down.weight q2_K +blk.9.attn_v.weight q4_K +blk.10.attn_v.weight q4_K +blk.11.attn_v.weight q4_K +blk.12.attn_v.weight q4_K +blk.13.attn_v.weight q4_K +blk.14.attn_v.weight q4_K +blk.15.attn_v.weight q4_K +blk.16.attn_v.weight q4_K +blk.17.attn_v.weight q4_K +blk.18.attn_v.weight q4_K +blk.19.attn_v.weight q4_K +blk.20.attn_v.weight q4_K +blk.21.attn_v.weight q4_K +blk.22.attn_v.weight q4_K +blk.23.attn_v.weight q4_K +blk.24.attn_v.weight q4_K +blk.25.attn_v.weight q4_K +blk.26.attn_v.weight q4_K +blk.27.attn_v.weight q4_K +blk.28.attn_v.weight q4_K +blk.29.attn_v.weight q4_K +blk.30.attn_v.weight q4_K +blk.31.attn_v.weight q4_K +blk.32.attn_v.weight q4_K +blk.33.attn_v.weight q4_K +blk.34.attn_v.weight q4_K +blk.35.attn_v.weight q4_K +blk.36.attn_v.weight q4_K +blk.37.attn_v.weight q4_K +blk.38.attn_v.weight q4_K +blk.39.attn_v.weight q4_K +blk.40.attn_v.weight q4_K +blk.41.attn_v.weight q4_K +blk.42.attn_v.weight q4_K +blk.43.attn_v.weight q4_K +blk.44.attn_v.weight q4_K +blk.45.attn_v.weight q4_K +blk.46.attn_v.weight q4_K +blk.47.attn_v.weight q4_K +blk.48.attn_v.weight q4_K +blk.49.attn_v.weight q4_K +blk.50.attn_v.weight q4_K +blk.51.attn_v.weight q4_K +blk.52.attn_v.weight q4_K +blk.53.attn_v.weight q4_K +blk.54.attn_v.weight q4_K +blk.55.attn_v.weight q4_K +blk.56.attn_v.weight q4_K +blk.57.attn_v.weight q4_K +blk.58.attn_v.weight q4_K +blk.59.attn_v.weight q4_K +blk.60.attn_v.weight q4_K +blk.61.attn_v.weight q4_K +blk.62.attn_v.weight q4_K +blk.63.attn_v.weight q4_K +blk.64.attn_v.weight q4_K +blk.65.attn_v.weight q4_K +blk.66.attn_v.weight q4_K +blk.67.attn_v.weight q4_K +blk.68.attn_v.weight q4_K +blk.69.attn_v.weight q4_K +blk.70.attn_v.weight q4_K +blk.71.attn_v.weight q4_K +blk.72.attn_v.weight q4_K +blk.73.attn_v.weight q4_K +blk.74.attn_v.weight q4_K +blk.75.attn_v.weight q4_K +blk.76.attn_v.weight q4_K +blk.77.attn_v.weight q4_K +blk.78.attn_v.weight q4_K +blk.79.attn_v.weight q4_K +output.weight q5_K + +[IQ2_XS] iq2_xs +token_embd.weight q2_K +blk.0.ffn_down.weight q2_K +blk.0.attn_v.weight q4_K +blk.1.attn_v.weight q4_K +blk.1.ffn_down.weight q2_K +blk.2.ffn_down.weight q2_K +blk.2.attn_v.weight q4_K +blk.3.ffn_down.weight q2_K +blk.3.attn_v.weight q4_K +blk.4.attn_v.weight q4_K +blk.4.ffn_down.weight q2_K +blk.5.ffn_down.weight q2_K +blk.5.attn_v.weight q4_K +blk.6.ffn_down.weight q2_K +blk.6.attn_v.weight q4_K +blk.7.attn_v.weight q4_K +blk.7.ffn_down.weight q2_K +blk.8.ffn_down.weight q2_K +blk.8.attn_v.weight q4_K +blk.9.ffn_down.weight q2_K +blk.9.attn_v.weight q4_K +blk.10.attn_v.weight q4_K +blk.11.attn_v.weight q4_K +blk.12.attn_v.weight q4_K +blk.13.attn_v.weight q4_K +blk.14.attn_v.weight q4_K +blk.15.attn_v.weight q4_K +blk.16.attn_v.weight q4_K +blk.17.attn_v.weight q4_K +blk.18.attn_v.weight q4_K +blk.19.attn_v.weight q4_K +blk.20.attn_v.weight q4_K +blk.21.attn_v.weight q4_K +blk.22.attn_v.weight q4_K +blk.23.attn_v.weight q4_K +blk.24.attn_v.weight q4_K +blk.25.attn_v.weight q4_K +blk.26.attn_v.weight q4_K +blk.27.attn_v.weight q4_K +blk.28.attn_v.weight q4_K +blk.29.attn_v.weight q4_K +blk.30.attn_v.weight q4_K +blk.31.attn_v.weight q4_K +blk.32.attn_v.weight q4_K +blk.33.attn_v.weight q4_K +blk.34.attn_v.weight q4_K +blk.35.attn_v.weight q4_K +blk.36.attn_v.weight q4_K +blk.37.attn_v.weight q4_K +blk.38.attn_v.weight q4_K +blk.39.attn_v.weight q4_K +blk.40.attn_v.weight q4_K +blk.41.attn_v.weight q4_K +blk.42.attn_v.weight q4_K +blk.43.attn_v.weight q4_K +blk.44.attn_v.weight q4_K +blk.45.attn_v.weight q4_K +blk.46.attn_v.weight q4_K +blk.47.attn_v.weight q4_K +blk.48.attn_v.weight q4_K +blk.49.attn_v.weight q4_K +blk.50.attn_v.weight q4_K +blk.51.attn_v.weight q4_K +blk.52.attn_v.weight q4_K +blk.53.attn_v.weight q4_K +blk.54.attn_v.weight q4_K +blk.55.attn_v.weight q4_K +blk.56.attn_v.weight q4_K +blk.57.attn_v.weight q4_K +blk.58.attn_v.weight q4_K +blk.59.attn_v.weight q4_K +blk.60.attn_v.weight q4_K +blk.61.attn_v.weight q4_K +blk.62.attn_v.weight q4_K +blk.63.attn_v.weight q4_K +blk.64.attn_v.weight q4_K +blk.65.attn_v.weight q4_K +blk.66.attn_v.weight q4_K +blk.67.attn_v.weight q4_K +blk.68.attn_v.weight q4_K +blk.69.attn_v.weight q4_K +blk.70.attn_v.weight q4_K +blk.71.attn_v.weight q4_K +blk.72.attn_v.weight q4_K +blk.73.attn_v.weight q4_K +blk.74.attn_v.weight q4_K +blk.75.attn_v.weight q4_K +blk.76.attn_v.weight q4_K +blk.77.attn_v.weight q4_K +blk.78.attn_v.weight q4_K +blk.79.attn_v.weight q4_K +output.weight q5_K + +[Q2_K_S] q2_K +blk.0.ffn_down.weight q4_K +blk.0.attn_v.weight q5_K +blk.1.attn_v.weight q5_K +blk.1.ffn_down.weight q4_K +blk.2.ffn_down.weight q4_K +blk.2.attn_v.weight q5_K +blk.3.ffn_down.weight q4_K +blk.3.attn_v.weight q5_K +blk.4.attn_v.weight q5_K +blk.4.ffn_down.weight q4_K +blk.5.ffn_down.weight q4_K +blk.5.attn_v.weight q5_K +blk.6.ffn_down.weight q4_K +blk.6.attn_v.weight q5_K +blk.7.attn_v.weight q5_K +blk.7.ffn_down.weight q4_K +blk.8.ffn_down.weight q4_K +blk.8.attn_v.weight q5_K +blk.9.ffn_down.weight q4_K +blk.9.attn_v.weight q5_K +blk.10.attn_v.weight q5_K +blk.11.attn_v.weight q5_K +blk.12.attn_v.weight q5_K +blk.13.attn_v.weight q5_K +blk.14.attn_v.weight q5_K +blk.15.attn_v.weight q5_K +blk.16.attn_v.weight q5_K +blk.17.attn_v.weight q5_K +blk.18.attn_v.weight q5_K +blk.19.attn_v.weight q5_K +blk.20.attn_v.weight q5_K +blk.21.attn_v.weight q5_K +blk.22.attn_v.weight q5_K +blk.23.attn_v.weight q5_K +blk.24.attn_v.weight q5_K +blk.25.attn_v.weight q5_K +blk.26.attn_v.weight q5_K +blk.27.attn_v.weight q5_K +blk.28.attn_v.weight q5_K +blk.29.attn_v.weight q5_K +blk.30.attn_v.weight q5_K +blk.31.attn_v.weight q5_K +blk.32.attn_v.weight q5_K +blk.33.attn_v.weight q5_K +blk.34.attn_v.weight q5_K +blk.35.attn_v.weight q5_K +blk.36.attn_v.weight q5_K +blk.37.attn_v.weight q5_K +blk.38.attn_v.weight q5_K +blk.39.attn_v.weight q5_K +blk.40.attn_v.weight q5_K +blk.41.attn_v.weight q5_K +blk.42.attn_v.weight q5_K +blk.43.attn_v.weight q5_K +blk.44.attn_v.weight q5_K +blk.45.attn_v.weight q5_K +blk.46.attn_v.weight q5_K +blk.47.attn_v.weight q5_K +blk.48.attn_v.weight q5_K +blk.49.attn_v.weight q5_K +blk.50.attn_v.weight q5_K +blk.51.attn_v.weight q5_K +blk.52.attn_v.weight q5_K +blk.53.attn_v.weight q5_K +blk.54.attn_v.weight q5_K +blk.55.attn_v.weight q5_K +blk.56.attn_v.weight q5_K +blk.57.attn_v.weight q5_K +blk.58.attn_v.weight q5_K +blk.59.attn_v.weight q5_K +blk.60.attn_v.weight q5_K +blk.61.attn_v.weight q5_K +blk.62.attn_v.weight q5_K +blk.63.attn_v.weight q5_K +blk.64.attn_v.weight q5_K +blk.65.attn_v.weight q5_K +blk.66.attn_v.weight q5_K +blk.67.attn_v.weight q5_K +blk.68.attn_v.weight q5_K +blk.69.attn_v.weight q5_K +blk.70.attn_v.weight q5_K +blk.71.attn_v.weight q5_K +blk.72.attn_v.weight q5_K +blk.73.attn_v.weight q5_K +blk.74.attn_v.weight q5_K +blk.75.attn_v.weight q5_K +blk.76.attn_v.weight q5_K +blk.77.attn_v.weight q5_K +blk.78.attn_v.weight q5_K +blk.79.attn_v.weight q5_K +output.weight q6_K + +[IQ3_XS] iq3_s +blk.0.attn_k.weight iq3_xxs +blk.0.attn_q.weight iq3_xxs +blk.0.attn_v.weight q5_K +blk.1.attn_k.weight iq3_xxs +blk.1.attn_q.weight iq3_xxs +blk.1.attn_v.weight q5_K +blk.2.attn_k.weight iq3_xxs +blk.2.attn_q.weight iq3_xxs +blk.2.attn_v.weight q5_K +blk.3.attn_k.weight iq3_xxs +blk.3.attn_q.weight iq3_xxs +blk.3.attn_v.weight q5_K +blk.4.attn_k.weight iq3_xxs +blk.4.attn_q.weight iq3_xxs +blk.4.attn_v.weight q5_K +blk.5.attn_k.weight iq3_xxs +blk.5.attn_q.weight iq3_xxs +blk.5.attn_v.weight q5_K +blk.6.attn_k.weight iq3_xxs +blk.6.attn_q.weight iq3_xxs +blk.6.attn_v.weight q5_K +blk.7.attn_k.weight iq3_xxs +blk.7.attn_q.weight iq3_xxs +blk.7.attn_v.weight q5_K +blk.8.attn_k.weight iq3_xxs +blk.8.attn_q.weight iq3_xxs +blk.8.attn_v.weight q5_K +blk.9.attn_k.weight iq3_xxs +blk.9.attn_q.weight iq3_xxs +blk.9.attn_v.weight q5_K +blk.10.ffn_gate.weight iq3_xxs +blk.10.ffn_up.weight iq3_xxs +blk.10.attn_k.weight iq3_xxs +blk.10.attn_q.weight iq3_xxs +blk.10.attn_v.weight q5_K +blk.11.ffn_gate.weight iq3_xxs +blk.11.ffn_up.weight iq3_xxs +blk.11.attn_k.weight iq3_xxs +blk.11.attn_q.weight iq3_xxs +blk.11.attn_v.weight q5_K +blk.12.ffn_gate.weight iq3_xxs +blk.12.ffn_up.weight iq3_xxs +blk.12.attn_k.weight iq3_xxs +blk.12.attn_q.weight iq3_xxs +blk.12.attn_v.weight q5_K +blk.13.ffn_gate.weight iq3_xxs +blk.13.ffn_up.weight iq3_xxs +blk.13.attn_k.weight iq3_xxs +blk.13.attn_q.weight iq3_xxs +blk.13.attn_v.weight q5_K +blk.14.ffn_gate.weight iq3_xxs +blk.14.ffn_up.weight iq3_xxs +blk.14.attn_k.weight iq3_xxs +blk.14.attn_q.weight iq3_xxs +blk.14.attn_v.weight q5_K +blk.15.ffn_gate.weight iq3_xxs +blk.15.attn_k.weight iq3_xxs +blk.15.attn_q.weight iq3_xxs +blk.15.attn_v.weight q5_K +blk.15.ffn_up.weight iq3_xxs +blk.16.ffn_gate.weight iq3_xxs +blk.16.ffn_up.weight iq3_xxs +blk.16.attn_k.weight iq3_xxs +blk.16.attn_q.weight iq3_xxs +blk.16.attn_v.weight q5_K +blk.17.ffn_gate.weight iq3_xxs +blk.17.ffn_up.weight iq3_xxs +blk.17.attn_k.weight iq3_xxs +blk.17.attn_q.weight iq3_xxs +blk.17.attn_v.weight q5_K +blk.18.attn_k.weight iq3_xxs +blk.18.attn_q.weight iq3_xxs +blk.18.attn_v.weight q5_K +blk.18.ffn_gate.weight iq3_xxs +blk.18.ffn_up.weight iq3_xxs +blk.19.ffn_gate.weight iq3_xxs +blk.19.ffn_up.weight iq3_xxs +blk.19.attn_k.weight iq3_xxs +blk.19.attn_q.weight iq3_xxs +blk.19.attn_v.weight q5_K +blk.20.ffn_gate.weight iq3_xxs +blk.20.ffn_up.weight iq3_xxs +blk.20.attn_k.weight iq3_xxs +blk.20.attn_q.weight iq3_xxs +blk.20.attn_v.weight q5_K +blk.21.attn_k.weight iq3_xxs +blk.21.attn_q.weight iq3_xxs +blk.21.attn_v.weight q5_K +blk.21.ffn_gate.weight iq3_xxs +blk.21.ffn_up.weight iq3_xxs +blk.22.ffn_gate.weight iq3_xxs +blk.22.ffn_up.weight iq3_xxs +blk.22.attn_k.weight iq3_xxs +blk.22.attn_q.weight iq3_xxs +blk.22.attn_v.weight q5_K +blk.23.ffn_gate.weight iq3_xxs +blk.23.ffn_up.weight iq3_xxs +blk.23.attn_k.weight iq3_xxs +blk.23.attn_q.weight iq3_xxs +blk.23.attn_v.weight q5_K +blk.24.ffn_gate.weight iq3_xxs +blk.24.ffn_up.weight iq3_xxs +blk.24.attn_k.weight iq3_xxs +blk.24.attn_q.weight iq3_xxs +blk.24.attn_v.weight q5_K +blk.25.ffn_gate.weight iq3_xxs +blk.25.ffn_up.weight iq3_xxs +blk.25.attn_k.weight iq3_xxs +blk.25.attn_q.weight iq3_xxs +blk.25.attn_v.weight q5_K +blk.26.ffn_gate.weight iq3_xxs +blk.26.ffn_up.weight iq3_xxs +blk.26.attn_k.weight iq3_xxs +blk.26.attn_q.weight iq3_xxs +blk.26.attn_v.weight q5_K +blk.27.ffn_gate.weight iq3_xxs +blk.27.ffn_up.weight iq3_xxs +blk.27.attn_k.weight iq3_xxs +blk.27.attn_q.weight iq3_xxs +blk.27.attn_v.weight q5_K +blk.28.ffn_gate.weight iq3_xxs +blk.28.ffn_up.weight iq3_xxs +blk.28.attn_k.weight iq3_xxs +blk.28.attn_q.weight iq3_xxs +blk.28.attn_v.weight q5_K +blk.29.ffn_gate.weight iq3_xxs +blk.29.attn_k.weight iq3_xxs +blk.29.attn_q.weight iq3_xxs +blk.29.attn_v.weight q5_K +blk.29.ffn_up.weight iq3_xxs +blk.30.ffn_gate.weight iq3_xxs +blk.30.ffn_up.weight iq3_xxs +blk.30.attn_k.weight iq3_xxs +blk.30.attn_q.weight iq3_xxs +blk.30.attn_v.weight q5_K +blk.31.ffn_gate.weight iq3_xxs +blk.31.ffn_up.weight iq3_xxs +blk.31.attn_k.weight iq3_xxs +blk.31.attn_q.weight iq3_xxs +blk.31.attn_v.weight q5_K +blk.32.attn_k.weight iq3_xxs +blk.32.attn_q.weight iq3_xxs +blk.32.attn_v.weight q5_K +blk.32.ffn_gate.weight iq3_xxs +blk.32.ffn_up.weight iq3_xxs +blk.33.ffn_gate.weight iq3_xxs +blk.33.ffn_up.weight iq3_xxs +blk.33.attn_k.weight iq3_xxs +blk.33.attn_q.weight iq3_xxs +blk.33.attn_v.weight q5_K +blk.34.ffn_gate.weight iq3_xxs +blk.34.ffn_up.weight iq3_xxs +blk.34.attn_k.weight iq3_xxs +blk.34.attn_q.weight iq3_xxs +blk.34.attn_v.weight q5_K +blk.35.attn_k.weight iq3_xxs +blk.35.attn_q.weight iq3_xxs +blk.35.attn_v.weight q5_K +blk.35.ffn_gate.weight iq3_xxs +blk.35.ffn_up.weight iq3_xxs +blk.36.ffn_gate.weight iq3_xxs +blk.36.ffn_up.weight iq3_xxs +blk.36.attn_k.weight iq3_xxs +blk.36.attn_q.weight iq3_xxs +blk.36.attn_v.weight q5_K +blk.37.ffn_gate.weight iq3_xxs +blk.37.ffn_up.weight iq3_xxs +blk.37.attn_k.weight iq3_xxs +blk.37.attn_q.weight iq3_xxs +blk.37.attn_v.weight q5_K +blk.38.ffn_gate.weight iq3_xxs +blk.38.ffn_up.weight iq3_xxs +blk.38.attn_k.weight iq3_xxs +blk.38.attn_q.weight iq3_xxs +blk.38.attn_v.weight q5_K +blk.39.ffn_gate.weight iq3_xxs +blk.39.ffn_up.weight iq3_xxs +blk.39.attn_k.weight iq3_xxs +blk.39.attn_q.weight iq3_xxs +blk.39.attn_v.weight q5_K +blk.40.ffn_gate.weight iq3_xxs +blk.40.ffn_up.weight iq3_xxs +blk.40.attn_k.weight iq3_xxs +blk.40.attn_q.weight iq3_xxs +blk.40.attn_v.weight q5_K +blk.41.ffn_gate.weight iq3_xxs +blk.41.ffn_up.weight iq3_xxs +blk.41.attn_k.weight iq3_xxs +blk.41.attn_q.weight iq3_xxs +blk.41.attn_v.weight q5_K +blk.42.ffn_gate.weight iq3_xxs +blk.42.ffn_up.weight iq3_xxs +blk.42.attn_k.weight iq3_xxs +blk.42.attn_q.weight iq3_xxs +blk.42.attn_v.weight q5_K +blk.43.ffn_gate.weight iq3_xxs +blk.43.attn_k.weight iq3_xxs +blk.43.attn_q.weight iq3_xxs +blk.43.attn_v.weight q5_K +blk.43.ffn_up.weight iq3_xxs +blk.44.ffn_gate.weight iq3_xxs +blk.44.ffn_up.weight iq3_xxs +blk.44.attn_k.weight iq3_xxs +blk.44.attn_q.weight iq3_xxs +blk.44.attn_v.weight q5_K +blk.45.ffn_gate.weight iq3_xxs +blk.45.ffn_up.weight iq3_xxs +blk.45.attn_k.weight iq3_xxs +blk.45.attn_q.weight iq3_xxs +blk.45.attn_v.weight q5_K +blk.46.attn_k.weight iq3_xxs +blk.46.attn_q.weight iq3_xxs +blk.46.attn_v.weight q5_K +blk.46.ffn_gate.weight iq3_xxs +blk.46.ffn_up.weight iq3_xxs +blk.47.ffn_gate.weight iq3_xxs +blk.47.ffn_up.weight iq3_xxs +blk.47.attn_k.weight iq3_xxs +blk.47.attn_q.weight iq3_xxs +blk.47.attn_v.weight q5_K +blk.48.ffn_gate.weight iq3_xxs +blk.48.ffn_up.weight iq3_xxs +blk.48.attn_k.weight iq3_xxs +blk.48.attn_q.weight iq3_xxs +blk.48.attn_v.weight q5_K +blk.49.attn_k.weight iq3_xxs +blk.49.attn_q.weight iq3_xxs +blk.49.attn_v.weight q5_K +blk.49.ffn_gate.weight iq3_xxs +blk.49.ffn_up.weight iq3_xxs +blk.50.ffn_gate.weight iq3_xxs +blk.50.ffn_up.weight iq3_xxs +blk.50.attn_k.weight iq3_xxs +blk.50.attn_q.weight iq3_xxs +blk.50.attn_v.weight q5_K +blk.51.ffn_gate.weight iq3_xxs +blk.51.ffn_up.weight iq3_xxs +blk.51.attn_k.weight iq3_xxs +blk.51.attn_q.weight iq3_xxs +blk.51.attn_v.weight q5_K +blk.52.ffn_gate.weight iq3_xxs +blk.52.ffn_up.weight iq3_xxs +blk.52.attn_k.weight iq3_xxs +blk.52.attn_q.weight iq3_xxs +blk.52.attn_v.weight q5_K +blk.53.ffn_gate.weight iq3_xxs +blk.53.ffn_up.weight iq3_xxs +blk.53.attn_k.weight iq3_xxs +blk.53.attn_q.weight iq3_xxs +blk.53.attn_v.weight q5_K +blk.54.ffn_gate.weight iq3_xxs +blk.54.ffn_up.weight iq3_xxs +blk.54.attn_k.weight iq3_xxs +blk.54.attn_q.weight iq3_xxs +blk.54.attn_v.weight q5_K +blk.55.ffn_gate.weight iq3_xxs +blk.55.ffn_up.weight iq3_xxs +blk.55.attn_k.weight iq3_xxs +blk.55.attn_q.weight iq3_xxs +blk.55.attn_v.weight q5_K +blk.56.ffn_gate.weight iq3_xxs +blk.56.ffn_up.weight iq3_xxs +blk.56.attn_k.weight iq3_xxs +blk.56.attn_q.weight iq3_xxs +blk.56.attn_v.weight q5_K +blk.57.ffn_gate.weight iq3_xxs +blk.57.attn_k.weight iq3_xxs +blk.57.attn_q.weight iq3_xxs +blk.57.attn_v.weight q5_K +blk.57.ffn_up.weight iq3_xxs +blk.58.ffn_gate.weight iq3_xxs +blk.58.ffn_up.weight iq3_xxs +blk.58.attn_k.weight iq3_xxs +blk.58.attn_q.weight iq3_xxs +blk.58.attn_v.weight q5_K +blk.59.ffn_gate.weight iq3_xxs +blk.59.ffn_up.weight iq3_xxs +blk.59.attn_k.weight iq3_xxs +blk.59.attn_q.weight iq3_xxs +blk.59.attn_v.weight q5_K +blk.60.attn_k.weight iq3_xxs +blk.60.attn_q.weight iq3_xxs +blk.60.attn_v.weight q5_K +blk.60.ffn_gate.weight iq3_xxs +blk.60.ffn_up.weight iq3_xxs +blk.61.ffn_gate.weight iq3_xxs +blk.61.ffn_up.weight iq3_xxs +blk.61.attn_k.weight iq3_xxs +blk.61.attn_q.weight iq3_xxs +blk.61.attn_v.weight q5_K +blk.62.ffn_gate.weight iq3_xxs +blk.62.ffn_up.weight iq3_xxs +blk.62.attn_k.weight iq3_xxs +blk.62.attn_q.weight iq3_xxs +blk.62.attn_v.weight q5_K +blk.63.attn_k.weight iq3_xxs +blk.63.attn_q.weight iq3_xxs +blk.63.attn_v.weight q5_K +blk.63.ffn_gate.weight iq3_xxs +blk.63.ffn_up.weight iq3_xxs +blk.64.ffn_gate.weight iq3_xxs +blk.64.ffn_up.weight iq3_xxs +blk.64.attn_k.weight iq3_xxs +blk.64.attn_q.weight iq3_xxs +blk.64.attn_v.weight q5_K +blk.65.ffn_gate.weight iq3_xxs +blk.65.ffn_up.weight iq3_xxs +blk.65.attn_k.weight iq3_xxs +blk.65.attn_q.weight iq3_xxs +blk.65.attn_v.weight q5_K +blk.66.ffn_gate.weight iq3_xxs +blk.66.ffn_up.weight iq3_xxs +blk.66.attn_k.weight iq3_xxs +blk.66.attn_q.weight iq3_xxs +blk.66.attn_v.weight q5_K +blk.67.ffn_gate.weight iq3_xxs +blk.67.ffn_up.weight iq3_xxs +blk.67.attn_k.weight iq3_xxs +blk.67.attn_q.weight iq3_xxs +blk.67.attn_v.weight q5_K +blk.68.ffn_gate.weight iq3_xxs +blk.68.ffn_up.weight iq3_xxs +blk.68.attn_k.weight iq3_xxs +blk.68.attn_q.weight iq3_xxs +blk.68.attn_v.weight q5_K +blk.69.ffn_gate.weight iq3_xxs +blk.69.ffn_up.weight iq3_xxs +blk.69.attn_k.weight iq3_xxs +blk.69.attn_q.weight iq3_xxs +blk.69.attn_v.weight q5_K +blk.70.attn_k.weight iq3_xxs +blk.70.attn_q.weight iq3_xxs +blk.70.attn_v.weight q5_K +blk.71.attn_k.weight iq3_xxs +blk.71.attn_q.weight iq3_xxs +blk.71.attn_v.weight q5_K +blk.72.attn_k.weight iq3_xxs +blk.72.attn_q.weight iq3_xxs +blk.72.attn_v.weight q5_K +blk.73.attn_k.weight iq3_xxs +blk.73.attn_q.weight iq3_xxs +blk.73.attn_v.weight q5_K +blk.74.attn_k.weight iq3_xxs +blk.74.attn_q.weight iq3_xxs +blk.74.attn_v.weight q5_K +blk.75.attn_k.weight iq3_xxs +blk.75.attn_q.weight iq3_xxs +blk.75.attn_v.weight q5_K +blk.76.attn_k.weight iq3_xxs +blk.76.attn_q.weight iq3_xxs +blk.76.attn_v.weight q5_K +blk.77.attn_k.weight iq3_xxs +blk.77.attn_q.weight iq3_xxs +blk.77.attn_v.weight q5_K +blk.78.attn_k.weight iq3_xxs +blk.78.attn_q.weight iq3_xxs +blk.78.attn_v.weight q5_K +blk.79.attn_k.weight iq3_xxs +blk.79.attn_q.weight iq3_xxs +blk.79.attn_v.weight q5_K +output.weight q6_K + +[IQ3_XXS] iq3_xxs +token_embd.weight iq3_s +blk.0.ffn_down.weight q4_K +blk.0.attn_k.weight iq2_s +blk.0.attn_output.weight iq3_s +blk.0.attn_q.weight iq2_s +blk.0.attn_v.weight q5_K +blk.1.attn_k.weight iq2_s +blk.1.attn_output.weight iq3_s +blk.1.attn_q.weight iq2_s +blk.1.attn_v.weight q5_K +blk.1.ffn_down.weight q4_K +blk.2.ffn_down.weight q4_K +blk.2.attn_k.weight iq2_s +blk.2.attn_output.weight iq3_s +blk.2.attn_q.weight iq2_s +blk.2.attn_v.weight q5_K +blk.3.ffn_down.weight q4_K +blk.3.attn_k.weight iq2_s +blk.3.attn_output.weight iq3_s +blk.3.attn_q.weight iq2_s +blk.3.attn_v.weight q5_K +blk.4.attn_k.weight iq2_s +blk.4.attn_output.weight iq3_s +blk.4.attn_q.weight iq2_s +blk.4.attn_v.weight q5_K +blk.4.ffn_down.weight q4_K +blk.5.ffn_down.weight q4_K +blk.5.attn_k.weight iq2_s +blk.5.attn_output.weight iq3_s +blk.5.attn_q.weight iq2_s +blk.5.attn_v.weight q5_K +blk.6.ffn_down.weight q4_K +blk.6.attn_k.weight iq2_s +blk.6.attn_output.weight iq3_s +blk.6.attn_q.weight iq2_s +blk.6.attn_v.weight q5_K +blk.7.attn_k.weight iq2_s +blk.7.attn_q.weight iq2_s +blk.7.attn_v.weight q5_K +blk.7.ffn_down.weight q4_K +blk.7.attn_output.weight iq3_s +blk.8.ffn_down.weight q4_K +blk.8.attn_k.weight iq2_s +blk.8.attn_output.weight iq3_s +blk.8.attn_q.weight iq2_s +blk.8.attn_v.weight q5_K +blk.9.ffn_down.weight q4_K +blk.9.attn_k.weight iq2_s +blk.9.attn_output.weight iq3_s +blk.9.attn_q.weight iq2_s +blk.9.attn_v.weight q5_K +blk.10.ffn_down.weight q3_K +blk.10.attn_k.weight iq2_s +blk.10.attn_output.weight iq3_s +blk.10.attn_q.weight iq2_s +blk.10.attn_v.weight q5_K +blk.11.ffn_down.weight q3_K +blk.11.attn_k.weight iq2_s +blk.11.attn_output.weight iq3_s +blk.11.attn_q.weight iq2_s +blk.11.attn_v.weight q5_K +blk.12.attn_k.weight iq2_s +blk.12.attn_output.weight iq3_s +blk.12.attn_q.weight iq2_s +blk.12.attn_v.weight q5_K +blk.12.ffn_down.weight q3_K +blk.13.ffn_down.weight q3_K +blk.13.attn_k.weight iq2_s +blk.13.attn_output.weight iq3_s +blk.13.attn_q.weight iq2_s +blk.13.attn_v.weight q5_K +blk.14.ffn_down.weight q3_K +blk.14.attn_k.weight iq2_s +blk.14.attn_output.weight iq3_s +blk.14.attn_q.weight iq2_s +blk.14.attn_v.weight q5_K +blk.15.attn_k.weight iq2_s +blk.15.attn_output.weight iq3_s +blk.15.attn_q.weight iq2_s +blk.15.attn_v.weight q5_K +blk.15.ffn_down.weight q3_K +blk.16.ffn_down.weight q3_K +blk.16.attn_k.weight iq2_s +blk.16.attn_output.weight iq3_s +blk.16.attn_q.weight iq2_s +blk.16.attn_v.weight q5_K +blk.17.ffn_down.weight q3_K +blk.17.attn_k.weight iq2_s +blk.17.attn_output.weight iq3_s +blk.17.attn_q.weight iq2_s +blk.17.attn_v.weight q5_K +blk.18.attn_k.weight iq2_s +blk.18.attn_output.weight iq3_s +blk.18.attn_q.weight iq2_s +blk.18.attn_v.weight q5_K +blk.18.ffn_down.weight q3_K +blk.19.ffn_down.weight q3_K +blk.19.attn_k.weight iq2_s +blk.19.attn_output.weight iq3_s +blk.19.attn_q.weight iq2_s +blk.19.attn_v.weight q5_K +blk.20.ffn_down.weight q3_K +blk.20.attn_k.weight iq2_s +blk.20.attn_output.weight iq3_s +blk.20.attn_q.weight iq2_s +blk.20.attn_v.weight q5_K +blk.21.attn_k.weight iq2_s +blk.21.attn_q.weight iq2_s +blk.21.attn_v.weight q5_K +blk.21.ffn_down.weight q3_K +blk.21.attn_output.weight iq3_s +blk.22.ffn_down.weight q3_K +blk.22.attn_k.weight iq2_s +blk.22.attn_output.weight iq3_s +blk.22.attn_q.weight iq2_s +blk.22.attn_v.weight q5_K +blk.23.ffn_down.weight q3_K +blk.23.attn_k.weight iq2_s +blk.23.attn_output.weight iq3_s +blk.23.attn_q.weight iq2_s +blk.23.attn_v.weight q5_K +blk.24.ffn_down.weight q3_K +blk.24.attn_k.weight iq2_s +blk.24.attn_output.weight iq3_s +blk.24.attn_q.weight iq2_s +blk.24.attn_v.weight q5_K +blk.25.ffn_down.weight q3_K +blk.25.attn_k.weight iq2_s +blk.25.attn_output.weight iq3_s +blk.25.attn_q.weight iq2_s +blk.25.attn_v.weight q5_K +blk.26.attn_k.weight iq2_s +blk.26.attn_output.weight iq3_s +blk.26.attn_q.weight iq2_s +blk.26.attn_v.weight q5_K +blk.26.ffn_down.weight q3_K +blk.27.ffn_down.weight q3_K +blk.27.attn_k.weight iq2_s +blk.27.attn_output.weight iq3_s +blk.27.attn_q.weight iq2_s +blk.27.attn_v.weight q5_K +blk.28.ffn_down.weight q3_K +blk.28.attn_k.weight iq2_s +blk.28.attn_output.weight iq3_s +blk.28.attn_q.weight iq2_s +blk.28.attn_v.weight q5_K +blk.29.attn_k.weight iq2_s +blk.29.attn_output.weight iq3_s +blk.29.attn_q.weight iq2_s +blk.29.attn_v.weight q5_K +blk.29.ffn_down.weight q3_K +blk.30.ffn_down.weight q3_K +blk.30.attn_k.weight iq2_s +blk.30.attn_output.weight iq3_s +blk.30.attn_q.weight iq2_s +blk.30.attn_v.weight q5_K +blk.31.ffn_down.weight q3_K +blk.31.attn_k.weight iq2_s +blk.31.attn_output.weight iq3_s +blk.31.attn_q.weight iq2_s +blk.31.attn_v.weight q5_K +blk.32.attn_k.weight iq2_s +blk.32.attn_output.weight iq3_s +blk.32.attn_q.weight iq2_s +blk.32.attn_v.weight q5_K +blk.32.ffn_down.weight q3_K +blk.33.ffn_down.weight q3_K +blk.33.attn_k.weight iq2_s +blk.33.attn_output.weight iq3_s +blk.33.attn_q.weight iq2_s +blk.33.attn_v.weight q5_K +blk.34.ffn_down.weight q3_K +blk.34.attn_k.weight iq2_s +blk.34.attn_output.weight iq3_s +blk.34.attn_q.weight iq2_s +blk.34.attn_v.weight q5_K +blk.35.attn_k.weight iq2_s +blk.35.attn_q.weight iq2_s +blk.35.attn_v.weight q5_K +blk.35.ffn_down.weight q3_K +blk.35.attn_output.weight iq3_s +blk.36.ffn_down.weight q3_K +blk.36.attn_k.weight iq2_s +blk.36.attn_output.weight iq3_s +blk.36.attn_q.weight iq2_s +blk.36.attn_v.weight q5_K +blk.37.ffn_down.weight q3_K +blk.37.attn_k.weight iq2_s +blk.37.attn_output.weight iq3_s +blk.37.attn_q.weight iq2_s +blk.37.attn_v.weight q5_K +blk.38.ffn_down.weight q3_K +blk.38.attn_k.weight iq2_s +blk.38.attn_output.weight iq3_s +blk.38.attn_q.weight iq2_s +blk.38.attn_v.weight q5_K +blk.39.ffn_down.weight q3_K +blk.39.attn_k.weight iq2_s +blk.39.attn_output.weight iq3_s +blk.39.attn_q.weight iq2_s +blk.39.attn_v.weight q5_K +blk.40.attn_k.weight iq2_s +blk.40.attn_output.weight iq3_s +blk.40.attn_q.weight iq2_s +blk.40.attn_v.weight q5_K +blk.40.ffn_down.weight q3_K +blk.41.ffn_down.weight q3_K +blk.41.attn_k.weight iq2_s +blk.41.attn_output.weight iq3_s +blk.41.attn_q.weight iq2_s +blk.41.attn_v.weight q5_K +blk.42.ffn_down.weight q3_K +blk.42.attn_k.weight iq2_s +blk.42.attn_output.weight iq3_s +blk.42.attn_q.weight iq2_s +blk.42.attn_v.weight q5_K +blk.43.attn_k.weight iq2_s +blk.43.attn_output.weight iq3_s +blk.43.attn_q.weight iq2_s +blk.43.attn_v.weight q5_K +blk.43.ffn_down.weight q3_K +blk.44.ffn_down.weight q3_K +blk.44.attn_k.weight iq2_s +blk.44.attn_output.weight iq3_s +blk.44.attn_q.weight iq2_s +blk.44.attn_v.weight q5_K +blk.45.ffn_down.weight q3_K +blk.45.attn_k.weight iq2_s +blk.45.attn_output.weight iq3_s +blk.45.attn_q.weight iq2_s +blk.45.attn_v.weight q5_K +blk.46.attn_k.weight iq2_s +blk.46.attn_output.weight iq3_s +blk.46.attn_q.weight iq2_s +blk.46.attn_v.weight q5_K +blk.46.ffn_down.weight q3_K +blk.47.ffn_down.weight q3_K +blk.47.attn_k.weight iq2_s +blk.47.attn_output.weight iq3_s +blk.47.attn_q.weight iq2_s +blk.47.attn_v.weight q5_K +blk.48.ffn_down.weight q3_K +blk.48.attn_k.weight iq2_s +blk.48.attn_output.weight iq3_s +blk.48.attn_q.weight iq2_s +blk.48.attn_v.weight q5_K +blk.49.attn_k.weight iq2_s +blk.49.attn_q.weight iq2_s +blk.49.attn_v.weight q5_K +blk.49.ffn_down.weight q3_K +blk.49.attn_output.weight iq3_s +blk.50.ffn_down.weight q3_K +blk.50.attn_k.weight iq2_s +blk.50.attn_output.weight iq3_s +blk.50.attn_q.weight iq2_s +blk.50.attn_v.weight q5_K +blk.51.ffn_down.weight q3_K +blk.51.attn_k.weight iq2_s +blk.51.attn_output.weight iq3_s +blk.51.attn_q.weight iq2_s +blk.51.attn_v.weight q5_K +blk.52.ffn_down.weight q3_K +blk.52.attn_k.weight iq2_s +blk.52.attn_output.weight iq3_s +blk.52.attn_q.weight iq2_s +blk.52.attn_v.weight q5_K +blk.53.ffn_down.weight q3_K +blk.53.attn_k.weight iq2_s +blk.53.attn_output.weight iq3_s +blk.53.attn_q.weight iq2_s +blk.53.attn_v.weight q5_K +blk.54.attn_k.weight iq2_s +blk.54.attn_output.weight iq3_s +blk.54.attn_q.weight iq2_s +blk.54.attn_v.weight q5_K +blk.54.ffn_down.weight q3_K +blk.55.ffn_down.weight q3_K +blk.55.attn_k.weight iq2_s +blk.55.attn_output.weight iq3_s +blk.55.attn_q.weight iq2_s +blk.55.attn_v.weight q5_K +blk.56.ffn_down.weight q3_K +blk.56.attn_k.weight iq2_s +blk.56.attn_output.weight iq3_s +blk.56.attn_q.weight iq2_s +blk.56.attn_v.weight q5_K +blk.57.attn_k.weight iq2_s +blk.57.attn_output.weight iq3_s +blk.57.attn_q.weight iq2_s +blk.57.attn_v.weight q5_K +blk.57.ffn_down.weight q3_K +blk.58.ffn_down.weight q3_K +blk.58.attn_k.weight iq2_s +blk.58.attn_output.weight iq3_s +blk.58.attn_q.weight iq2_s +blk.58.attn_v.weight q5_K +blk.59.ffn_down.weight q3_K +blk.59.attn_k.weight iq2_s +blk.59.attn_output.weight iq3_s +blk.59.attn_q.weight iq2_s +blk.59.attn_v.weight q5_K +blk.60.attn_k.weight iq2_s +blk.60.attn_output.weight iq3_s +blk.60.attn_q.weight iq2_s +blk.60.attn_v.weight q5_K +blk.60.ffn_down.weight q3_K +blk.61.ffn_down.weight q3_K +blk.61.attn_k.weight iq2_s +blk.61.attn_output.weight iq3_s +blk.61.attn_q.weight iq2_s +blk.61.attn_v.weight q5_K +blk.62.ffn_down.weight q3_K +blk.62.attn_k.weight iq2_s +blk.62.attn_output.weight iq3_s +blk.62.attn_q.weight iq2_s +blk.62.attn_v.weight q5_K +blk.63.attn_k.weight iq2_s +blk.63.attn_q.weight iq2_s +blk.63.attn_v.weight q5_K +blk.63.ffn_down.weight q3_K +blk.63.attn_output.weight iq3_s +blk.64.ffn_down.weight q3_K +blk.64.attn_k.weight iq2_s +blk.64.attn_output.weight iq3_s +blk.64.attn_q.weight iq2_s +blk.64.attn_v.weight q5_K +blk.65.ffn_down.weight q3_K +blk.65.attn_k.weight iq2_s +blk.65.attn_output.weight iq3_s +blk.65.attn_q.weight iq2_s +blk.65.attn_v.weight q5_K +blk.66.ffn_down.weight q3_K +blk.66.attn_k.weight iq2_s +blk.66.attn_output.weight iq3_s +blk.66.attn_q.weight iq2_s +blk.66.attn_v.weight q5_K +blk.67.ffn_down.weight q3_K +blk.67.attn_k.weight iq2_s +blk.67.attn_output.weight iq3_s +blk.67.attn_q.weight iq2_s +blk.67.attn_v.weight q5_K +blk.68.attn_k.weight iq2_s +blk.68.attn_output.weight iq3_s +blk.68.attn_q.weight iq2_s +blk.68.attn_v.weight q5_K +blk.68.ffn_down.weight q3_K +blk.69.ffn_down.weight q3_K +blk.69.attn_k.weight iq2_s +blk.69.attn_output.weight iq3_s +blk.69.attn_q.weight iq2_s +blk.69.attn_v.weight q5_K +blk.70.ffn_down.weight q3_K +blk.70.attn_k.weight iq2_s +blk.70.attn_output.weight iq3_s +blk.70.attn_q.weight iq2_s +blk.70.attn_v.weight q5_K +blk.71.attn_k.weight iq2_s +blk.71.attn_output.weight iq3_s +blk.71.attn_q.weight iq2_s +blk.71.attn_v.weight q5_K +blk.71.ffn_down.weight q3_K +blk.72.ffn_down.weight q3_K +blk.72.attn_k.weight iq2_s +blk.72.attn_output.weight iq3_s +blk.72.attn_q.weight iq2_s +blk.72.attn_v.weight q5_K +blk.73.ffn_down.weight q3_K +blk.73.attn_k.weight iq2_s +blk.73.attn_output.weight iq3_s +blk.73.attn_q.weight iq2_s +blk.73.attn_v.weight q5_K +blk.74.attn_k.weight iq2_s +blk.74.attn_output.weight iq3_s +blk.74.attn_q.weight iq2_s +blk.74.attn_v.weight q5_K +blk.74.ffn_down.weight q3_K +blk.75.ffn_down.weight q3_K +blk.75.attn_k.weight iq2_s +blk.75.attn_output.weight iq3_s +blk.75.attn_q.weight iq2_s +blk.75.attn_v.weight q5_K +blk.76.ffn_down.weight q3_K +blk.76.attn_k.weight iq2_s +blk.76.attn_output.weight iq3_s +blk.76.attn_q.weight iq2_s +blk.76.attn_v.weight q5_K +blk.77.attn_k.weight iq2_s +blk.77.attn_q.weight iq2_s +blk.77.attn_v.weight q5_K +blk.77.ffn_down.weight q3_K +blk.77.attn_output.weight iq3_s +blk.78.ffn_down.weight q3_K +blk.78.attn_k.weight iq2_s +blk.78.attn_output.weight iq3_s +blk.78.attn_q.weight iq2_s +blk.78.attn_v.weight q5_K +blk.79.ffn_down.weight q3_K +blk.79.attn_k.weight iq2_s +blk.79.attn_output.weight iq3_s +blk.79.attn_q.weight iq2_s +blk.79.attn_v.weight q5_K +output.weight q5_K + +[IQ1_S] iq1_s +token_embd.weight q2_K +blk.0.ffn_down.weight q2_K +blk.0.attn_output.weight iq2_xxs +blk.0.attn_v.weight q4_K +blk.1.attn_output.weight iq2_xxs +blk.1.attn_v.weight q4_K +blk.1.ffn_down.weight q2_K +blk.2.ffn_down.weight q2_K +blk.2.attn_output.weight iq2_xxs +blk.2.attn_v.weight q4_K +blk.3.ffn_down.weight q2_K +blk.3.attn_output.weight iq2_xxs +blk.3.attn_v.weight q4_K +blk.4.attn_output.weight iq2_xxs +blk.4.attn_v.weight q4_K +blk.4.ffn_down.weight q2_K +blk.5.ffn_down.weight q2_K +blk.5.attn_output.weight iq2_xxs +blk.5.attn_v.weight q4_K +blk.6.ffn_down.weight q2_K +blk.6.attn_output.weight iq2_xxs +blk.6.attn_v.weight q4_K +blk.7.attn_v.weight q4_K +blk.7.ffn_down.weight q2_K +blk.7.attn_output.weight iq2_xxs +blk.8.ffn_down.weight q2_K +blk.8.attn_output.weight iq2_xxs +blk.8.attn_v.weight q4_K +blk.9.ffn_down.weight q2_K +blk.9.attn_output.weight iq2_xxs +blk.9.attn_v.weight q4_K +blk.10.attn_output.weight iq2_xxs +blk.10.attn_v.weight q4_K +blk.11.attn_output.weight iq2_xxs +blk.11.attn_v.weight q4_K +blk.12.attn_output.weight iq2_xxs +blk.12.attn_v.weight q4_K +blk.13.attn_output.weight iq2_xxs +blk.13.attn_v.weight q4_K +blk.14.attn_output.weight iq2_xxs +blk.14.attn_v.weight q4_K +blk.15.attn_output.weight iq2_xxs +blk.15.attn_v.weight q4_K +blk.16.attn_output.weight iq2_xxs +blk.16.attn_v.weight q4_K +blk.17.attn_output.weight iq2_xxs +blk.17.attn_v.weight q4_K +blk.18.attn_output.weight iq2_xxs +blk.18.attn_v.weight q4_K +blk.19.attn_output.weight iq2_xxs +blk.19.attn_v.weight q4_K +blk.20.attn_output.weight iq2_xxs +blk.20.attn_v.weight q4_K +blk.21.attn_v.weight q4_K +blk.21.attn_output.weight iq2_xxs +blk.22.attn_output.weight iq2_xxs +blk.22.attn_v.weight q4_K +blk.23.attn_output.weight iq2_xxs +blk.23.attn_v.weight q4_K +blk.24.attn_output.weight iq2_xxs +blk.24.attn_v.weight q4_K +blk.25.attn_output.weight iq2_xxs +blk.25.attn_v.weight q4_K +blk.26.attn_output.weight iq2_xxs +blk.26.attn_v.weight q4_K +blk.27.attn_output.weight iq2_xxs +blk.27.attn_v.weight q4_K +blk.28.attn_output.weight iq2_xxs +blk.28.attn_v.weight q4_K +blk.29.attn_output.weight iq2_xxs +blk.29.attn_v.weight q4_K +blk.30.attn_output.weight iq2_xxs +blk.30.attn_v.weight q4_K +blk.31.attn_output.weight iq2_xxs +blk.31.attn_v.weight q4_K +blk.32.attn_output.weight iq2_xxs +blk.32.attn_v.weight q4_K +blk.33.attn_output.weight iq2_xxs +blk.33.attn_v.weight q4_K +blk.34.attn_output.weight iq2_xxs +blk.34.attn_v.weight q4_K +blk.35.attn_v.weight q4_K +blk.35.attn_output.weight iq2_xxs +blk.36.attn_output.weight iq2_xxs +blk.36.attn_v.weight q4_K +blk.37.attn_output.weight iq2_xxs +blk.37.attn_v.weight q4_K +blk.38.attn_output.weight iq2_xxs +blk.38.attn_v.weight q4_K +blk.39.attn_output.weight iq2_xxs +blk.39.attn_v.weight q4_K +blk.40.attn_output.weight iq2_xxs +blk.40.attn_v.weight q4_K +blk.41.attn_output.weight iq2_xxs +blk.41.attn_v.weight q4_K +blk.42.attn_output.weight iq2_xxs +blk.42.attn_v.weight q4_K +blk.43.attn_output.weight iq2_xxs +blk.43.attn_v.weight q4_K +blk.44.attn_output.weight iq2_xxs +blk.44.attn_v.weight q4_K +blk.45.attn_output.weight iq2_xxs +blk.45.attn_v.weight q4_K +blk.46.attn_output.weight iq2_xxs +blk.46.attn_v.weight q4_K +blk.47.attn_output.weight iq2_xxs +blk.47.attn_v.weight q4_K +blk.48.attn_output.weight iq2_xxs +blk.48.attn_v.weight q4_K +blk.49.attn_v.weight q4_K +blk.49.attn_output.weight iq2_xxs +blk.50.attn_output.weight iq2_xxs +blk.50.attn_v.weight q4_K +blk.51.attn_output.weight iq2_xxs +blk.51.attn_v.weight q4_K +blk.52.attn_output.weight iq2_xxs +blk.52.attn_v.weight q4_K +blk.53.attn_output.weight iq2_xxs +blk.53.attn_v.weight q4_K +blk.54.attn_output.weight iq2_xxs +blk.54.attn_v.weight q4_K +blk.55.attn_output.weight iq2_xxs +blk.55.attn_v.weight q4_K +blk.56.attn_output.weight iq2_xxs +blk.56.attn_v.weight q4_K +blk.57.attn_output.weight iq2_xxs +blk.57.attn_v.weight q4_K +blk.58.attn_output.weight iq2_xxs +blk.58.attn_v.weight q4_K +blk.59.attn_output.weight iq2_xxs +blk.59.attn_v.weight q4_K +blk.60.attn_output.weight iq2_xxs +blk.60.attn_v.weight q4_K +blk.61.attn_output.weight iq2_xxs +blk.61.attn_v.weight q4_K +blk.62.attn_output.weight iq2_xxs +blk.62.attn_v.weight q4_K +blk.63.attn_v.weight q4_K +blk.63.attn_output.weight iq2_xxs +blk.64.attn_output.weight iq2_xxs +blk.64.attn_v.weight q4_K +blk.65.attn_output.weight iq2_xxs +blk.65.attn_v.weight q4_K +blk.66.attn_output.weight iq2_xxs +blk.66.attn_v.weight q4_K +blk.67.attn_output.weight iq2_xxs +blk.67.attn_v.weight q4_K +blk.68.attn_output.weight iq2_xxs +blk.68.attn_v.weight q4_K +blk.69.attn_output.weight iq2_xxs +blk.69.attn_v.weight q4_K +blk.70.attn_output.weight iq2_xxs +blk.70.attn_v.weight q4_K +blk.71.attn_output.weight iq2_xxs +blk.71.attn_v.weight q4_K +blk.72.attn_output.weight iq2_xxs +blk.72.attn_v.weight q4_K +blk.73.attn_output.weight iq2_xxs +blk.73.attn_v.weight q4_K +blk.74.attn_output.weight iq2_xxs +blk.74.attn_v.weight q4_K +blk.75.attn_output.weight iq2_xxs +blk.75.attn_v.weight q4_K +blk.76.attn_output.weight iq2_xxs +blk.76.attn_v.weight q4_K +blk.77.attn_v.weight q4_K +blk.77.attn_output.weight iq2_xxs +blk.78.attn_output.weight iq2_xxs +blk.78.attn_v.weight q4_K +blk.79.attn_output.weight iq2_xxs +blk.79.attn_v.weight q4_K +output.weight q5_K + +[IQ4_NL] iq4_nl +blk.0.ffn_down.weight q5_K +blk.0.attn_v.weight q5_K +blk.1.attn_v.weight q5_K +blk.1.ffn_down.weight q5_K +blk.2.ffn_down.weight q5_K +blk.2.attn_v.weight q5_K +blk.3.ffn_down.weight q5_K +blk.3.attn_v.weight q5_K +blk.4.attn_v.weight q5_K +blk.4.ffn_down.weight q5_K +blk.5.ffn_down.weight q5_K +blk.5.attn_v.weight q5_K +blk.6.ffn_down.weight q5_K +blk.6.attn_v.weight q5_K +blk.7.attn_v.weight q5_K +blk.7.ffn_down.weight q5_K +blk.8.ffn_down.weight q5_K +blk.8.attn_v.weight q5_K +blk.9.ffn_down.weight q5_K +blk.9.attn_v.weight q5_K +blk.10.attn_v.weight q5_K +blk.11.attn_v.weight q5_K +blk.12.attn_v.weight q5_K +blk.13.attn_v.weight q5_K +blk.14.attn_v.weight q5_K +blk.15.attn_v.weight q5_K +blk.16.attn_v.weight q5_K +blk.17.attn_v.weight q5_K +blk.18.attn_v.weight q5_K +blk.19.attn_v.weight q5_K +blk.20.attn_v.weight q5_K +blk.21.attn_v.weight q5_K +blk.22.attn_v.weight q5_K +blk.23.attn_v.weight q5_K +blk.24.attn_v.weight q5_K +blk.25.attn_v.weight q5_K +blk.26.attn_v.weight q5_K +blk.27.attn_v.weight q5_K +blk.28.attn_v.weight q5_K +blk.29.attn_v.weight q5_K +blk.30.attn_v.weight q5_K +blk.31.attn_v.weight q5_K +blk.32.attn_v.weight q5_K +blk.33.attn_v.weight q5_K +blk.34.attn_v.weight q5_K +blk.35.attn_v.weight q5_K +blk.36.attn_v.weight q5_K +blk.37.attn_v.weight q5_K +blk.38.attn_v.weight q5_K +blk.39.attn_v.weight q5_K +blk.40.attn_v.weight q5_K +blk.41.attn_v.weight q5_K +blk.42.attn_v.weight q5_K +blk.43.attn_v.weight q5_K +blk.44.attn_v.weight q5_K +blk.45.attn_v.weight q5_K +blk.46.attn_v.weight q5_K +blk.47.attn_v.weight q5_K +blk.48.attn_v.weight q5_K +blk.49.attn_v.weight q5_K +blk.50.attn_v.weight q5_K +blk.51.attn_v.weight q5_K +blk.52.attn_v.weight q5_K +blk.53.attn_v.weight q5_K +blk.54.attn_v.weight q5_K +blk.55.attn_v.weight q5_K +blk.56.attn_v.weight q5_K +blk.57.attn_v.weight q5_K +blk.58.attn_v.weight q5_K +blk.59.attn_v.weight q5_K +blk.60.attn_v.weight q5_K +blk.61.attn_v.weight q5_K +blk.62.attn_v.weight q5_K +blk.63.attn_v.weight q5_K +blk.64.attn_v.weight q5_K +blk.65.attn_v.weight q5_K +blk.66.attn_v.weight q5_K +blk.67.attn_v.weight q5_K +blk.68.attn_v.weight q5_K +blk.69.attn_v.weight q5_K +blk.70.attn_v.weight q5_K +blk.71.attn_v.weight q5_K +blk.72.attn_v.weight q5_K +blk.73.attn_v.weight q5_K +blk.74.attn_v.weight q5_K +blk.75.attn_v.weight q5_K +blk.76.attn_v.weight q5_K +blk.77.attn_v.weight q5_K +blk.78.attn_v.weight q5_K +blk.79.attn_v.weight q5_K +output.weight q6_K + +[IQ3_S] iq3_s +blk.0.attn_v.weight q5_K +blk.1.attn_v.weight q5_K +blk.2.attn_v.weight q5_K +blk.3.attn_v.weight q5_K +blk.4.attn_v.weight q5_K +blk.5.attn_v.weight q5_K +blk.6.attn_v.weight q5_K +blk.7.attn_v.weight q5_K +blk.8.attn_v.weight q5_K +blk.9.attn_v.weight q5_K +blk.10.attn_v.weight q5_K +blk.11.attn_v.weight q5_K +blk.12.attn_v.weight q5_K +blk.13.attn_v.weight q5_K +blk.14.attn_v.weight q5_K +blk.15.attn_v.weight q5_K +blk.16.attn_v.weight q5_K +blk.17.attn_v.weight q5_K +blk.18.attn_v.weight q5_K +blk.19.attn_v.weight q5_K +blk.20.attn_v.weight q5_K +blk.21.attn_v.weight q5_K +blk.22.attn_v.weight q5_K +blk.23.attn_v.weight q5_K +blk.24.attn_v.weight q5_K +blk.25.attn_v.weight q5_K +blk.26.attn_v.weight q5_K +blk.27.attn_v.weight q5_K +blk.28.attn_v.weight q5_K +blk.29.attn_v.weight q5_K +blk.30.attn_v.weight q5_K +blk.31.attn_v.weight q5_K +blk.32.attn_v.weight q5_K +blk.33.attn_v.weight q5_K +blk.34.attn_v.weight q5_K +blk.35.attn_v.weight q5_K +blk.36.attn_v.weight q5_K +blk.37.attn_v.weight q5_K +blk.38.attn_v.weight q5_K +blk.39.attn_v.weight q5_K +blk.40.attn_v.weight q5_K +blk.41.attn_v.weight q5_K +blk.42.attn_v.weight q5_K +blk.43.attn_v.weight q5_K +blk.44.attn_v.weight q5_K +blk.45.attn_v.weight q5_K +blk.46.attn_v.weight q5_K +blk.47.attn_v.weight q5_K +blk.48.attn_v.weight q5_K +blk.49.attn_v.weight q5_K +blk.50.attn_v.weight q5_K +blk.51.attn_v.weight q5_K +blk.52.attn_v.weight q5_K +blk.53.attn_v.weight q5_K +blk.54.attn_v.weight q5_K +blk.55.attn_v.weight q5_K +blk.56.attn_v.weight q5_K +blk.57.attn_v.weight q5_K +blk.58.attn_v.weight q5_K +blk.59.attn_v.weight q5_K +blk.60.attn_v.weight q5_K +blk.61.attn_v.weight q5_K +blk.62.attn_v.weight q5_K +blk.63.attn_v.weight q5_K +blk.64.attn_v.weight q5_K +blk.65.attn_v.weight q5_K +blk.66.attn_v.weight q5_K +blk.67.attn_v.weight q5_K +blk.68.attn_v.weight q5_K +blk.69.attn_v.weight q5_K +blk.70.attn_v.weight q5_K +blk.71.attn_v.weight q5_K +blk.72.attn_v.weight q5_K +blk.73.attn_v.weight q5_K +blk.74.attn_v.weight q5_K +blk.75.attn_v.weight q5_K +blk.76.attn_v.weight q5_K +blk.77.attn_v.weight q5_K +blk.78.attn_v.weight q5_K +blk.79.attn_v.weight q5_K +output.weight q6_K + +[IQ3_M] iq3_s +blk.0.ffn_down.weight q4_K +blk.0.attn_output.weight q4_K +blk.0.attn_v.weight q5_K +blk.1.attn_output.weight q4_K +blk.1.attn_v.weight q5_K +blk.1.ffn_down.weight q4_K +blk.2.ffn_down.weight q4_K +blk.2.attn_output.weight q4_K +blk.2.attn_v.weight q5_K +blk.3.ffn_down.weight q4_K +blk.3.attn_output.weight q4_K +blk.3.attn_v.weight q5_K +blk.4.attn_output.weight q4_K +blk.4.attn_v.weight q5_K +blk.4.ffn_down.weight q4_K +blk.5.ffn_down.weight q4_K +blk.5.attn_output.weight q4_K +blk.5.attn_v.weight q5_K +blk.6.ffn_down.weight q4_K +blk.6.attn_output.weight q4_K +blk.6.attn_v.weight q5_K +blk.7.attn_v.weight q5_K +blk.7.ffn_down.weight q4_K +blk.7.attn_output.weight q4_K +blk.8.ffn_down.weight q4_K +blk.8.attn_output.weight q4_K +blk.8.attn_v.weight q5_K +blk.9.ffn_down.weight q4_K +blk.9.attn_output.weight q4_K +blk.9.attn_v.weight q5_K +blk.10.attn_output.weight q4_K +blk.10.attn_v.weight q5_K +blk.11.attn_output.weight q4_K +blk.11.attn_v.weight q5_K +blk.12.attn_output.weight q4_K +blk.12.attn_v.weight q5_K +blk.13.attn_output.weight q4_K +blk.13.attn_v.weight q5_K +blk.14.attn_output.weight q4_K +blk.14.attn_v.weight q5_K +blk.15.attn_output.weight q4_K +blk.15.attn_v.weight q5_K +blk.16.attn_output.weight q4_K +blk.16.attn_v.weight q5_K +blk.17.attn_output.weight q4_K +blk.17.attn_v.weight q5_K +blk.18.attn_output.weight q4_K +blk.18.attn_v.weight q5_K +blk.19.attn_output.weight q4_K +blk.19.attn_v.weight q5_K +blk.20.attn_output.weight q4_K +blk.20.attn_v.weight q5_K +blk.21.attn_v.weight q5_K +blk.21.attn_output.weight q4_K +blk.22.attn_output.weight q4_K +blk.22.attn_v.weight q5_K +blk.23.attn_output.weight q4_K +blk.23.attn_v.weight q5_K +blk.24.attn_output.weight q4_K +blk.24.attn_v.weight q5_K +blk.25.attn_output.weight q4_K +blk.25.attn_v.weight q5_K +blk.26.attn_output.weight q4_K +blk.26.attn_v.weight q5_K +blk.27.attn_output.weight q4_K +blk.27.attn_v.weight q5_K +blk.28.attn_output.weight q4_K +blk.28.attn_v.weight q5_K +blk.29.attn_output.weight q4_K +blk.29.attn_v.weight q5_K +blk.30.attn_output.weight q4_K +blk.30.attn_v.weight q5_K +blk.31.attn_output.weight q4_K +blk.31.attn_v.weight q5_K +blk.32.attn_output.weight q4_K +blk.32.attn_v.weight q5_K +blk.33.attn_output.weight q4_K +blk.33.attn_v.weight q5_K +blk.34.attn_output.weight q4_K +blk.34.attn_v.weight q5_K +blk.35.attn_v.weight q5_K +blk.35.attn_output.weight q4_K +blk.36.attn_output.weight q4_K +blk.36.attn_v.weight q5_K +blk.37.attn_output.weight q4_K +blk.37.attn_v.weight q5_K +blk.38.attn_output.weight q4_K +blk.38.attn_v.weight q5_K +blk.39.attn_output.weight q4_K +blk.39.attn_v.weight q5_K +blk.40.attn_output.weight q4_K +blk.40.attn_v.weight q5_K +blk.41.attn_output.weight q4_K +blk.41.attn_v.weight q5_K +blk.42.attn_output.weight q4_K +blk.42.attn_v.weight q5_K +blk.43.attn_output.weight q4_K +blk.43.attn_v.weight q5_K +blk.44.attn_output.weight q4_K +blk.44.attn_v.weight q5_K +blk.45.attn_output.weight q4_K +blk.45.attn_v.weight q5_K +blk.46.attn_output.weight q4_K +blk.46.attn_v.weight q5_K +blk.47.attn_output.weight q4_K +blk.47.attn_v.weight q5_K +blk.48.attn_output.weight q4_K +blk.48.attn_v.weight q5_K +blk.49.attn_v.weight q5_K +blk.49.attn_output.weight q4_K +blk.50.attn_output.weight q4_K +blk.50.attn_v.weight q5_K +blk.51.attn_output.weight q4_K +blk.51.attn_v.weight q5_K +blk.52.attn_output.weight q4_K +blk.52.attn_v.weight q5_K +blk.53.attn_output.weight q4_K +blk.53.attn_v.weight q5_K +blk.54.attn_output.weight q4_K +blk.54.attn_v.weight q5_K +blk.55.attn_output.weight q4_K +blk.55.attn_v.weight q5_K +blk.56.attn_output.weight q4_K +blk.56.attn_v.weight q5_K +blk.57.attn_output.weight q4_K +blk.57.attn_v.weight q5_K +blk.58.attn_output.weight q4_K +blk.58.attn_v.weight q5_K +blk.59.attn_output.weight q4_K +blk.59.attn_v.weight q5_K +blk.60.attn_output.weight q4_K +blk.60.attn_v.weight q5_K +blk.61.attn_output.weight q4_K +blk.61.attn_v.weight q5_K +blk.62.attn_output.weight q4_K +blk.62.attn_v.weight q5_K +blk.63.attn_v.weight q5_K +blk.63.attn_output.weight q4_K +blk.64.attn_output.weight q4_K +blk.64.attn_v.weight q5_K +blk.65.attn_output.weight q4_K +blk.65.attn_v.weight q5_K +blk.66.attn_output.weight q4_K +blk.66.attn_v.weight q5_K +blk.67.attn_output.weight q4_K +blk.67.attn_v.weight q5_K +blk.68.attn_output.weight q4_K +blk.68.attn_v.weight q5_K +blk.69.attn_output.weight q4_K +blk.69.attn_v.weight q5_K +blk.70.attn_output.weight q4_K +blk.70.attn_v.weight q5_K +blk.71.attn_output.weight q4_K +blk.71.attn_v.weight q5_K +blk.72.attn_output.weight q4_K +blk.72.attn_v.weight q5_K +blk.73.attn_output.weight q4_K +blk.73.attn_v.weight q5_K +blk.74.attn_output.weight q4_K +blk.74.attn_v.weight q5_K +blk.75.attn_output.weight q4_K +blk.75.attn_v.weight q5_K +blk.76.attn_output.weight q4_K +blk.76.attn_v.weight q5_K +blk.77.attn_v.weight q5_K +blk.77.attn_output.weight q4_K +blk.78.attn_output.weight q4_K +blk.78.attn_v.weight q5_K +blk.79.attn_output.weight q4_K +blk.79.attn_v.weight q5_K +output.weight q6_K + +[IQ2_S] iq2_xs +token_embd.weight iq3_s +blk.0.ffn_down.weight iq3_s +blk.0.attn_output.weight iq3_s +blk.0.attn_v.weight q4_K +blk.1.attn_output.weight iq3_s +blk.1.attn_v.weight q4_K +blk.1.ffn_down.weight iq3_s +blk.2.ffn_down.weight iq3_s +blk.2.attn_output.weight iq3_s +blk.2.attn_v.weight q4_K +blk.3.ffn_down.weight iq3_s +blk.3.attn_output.weight iq3_s +blk.3.attn_v.weight q4_K +blk.4.attn_output.weight iq3_s +blk.4.attn_v.weight q4_K +blk.4.ffn_down.weight iq3_s +blk.5.ffn_down.weight iq3_s +blk.5.attn_output.weight iq3_s +blk.5.attn_v.weight q4_K +blk.6.ffn_down.weight iq3_s +blk.6.attn_output.weight iq3_s +blk.6.attn_v.weight q4_K +blk.7.attn_v.weight q4_K +blk.7.ffn_down.weight iq3_s +blk.7.attn_output.weight iq3_s +blk.8.ffn_down.weight iq3_s +blk.8.attn_output.weight iq3_s +blk.8.attn_v.weight q4_K +blk.9.ffn_down.weight iq3_s +blk.9.attn_output.weight iq3_s +blk.9.attn_v.weight q4_K +blk.10.attn_output.weight iq3_s +blk.10.attn_v.weight q4_K +blk.11.attn_output.weight iq3_s +blk.11.attn_v.weight q4_K +blk.12.attn_output.weight iq3_s +blk.12.attn_v.weight q4_K +blk.13.attn_output.weight iq3_s +blk.13.attn_v.weight q4_K +blk.14.attn_output.weight iq3_s +blk.14.attn_v.weight q4_K +blk.15.attn_output.weight iq3_s +blk.15.attn_v.weight q4_K +blk.16.attn_output.weight iq3_s +blk.16.attn_v.weight q4_K +blk.17.attn_output.weight iq3_s +blk.17.attn_v.weight q4_K +blk.18.attn_output.weight iq3_s +blk.18.attn_v.weight q4_K +blk.19.attn_output.weight iq3_s +blk.19.attn_v.weight q4_K +blk.20.attn_output.weight iq3_s +blk.20.attn_v.weight q4_K +blk.21.attn_v.weight q4_K +blk.21.attn_output.weight iq3_s +blk.22.attn_output.weight iq3_s +blk.22.attn_v.weight q4_K +blk.23.attn_output.weight iq3_s +blk.23.attn_v.weight q4_K +blk.24.attn_output.weight iq3_s +blk.24.attn_v.weight q4_K +blk.25.attn_output.weight iq3_s +blk.25.attn_v.weight q4_K +blk.26.attn_output.weight iq3_s +blk.26.attn_v.weight q4_K +blk.27.attn_output.weight iq3_s +blk.27.attn_v.weight q4_K +blk.28.attn_output.weight iq3_s +blk.28.attn_v.weight q4_K +blk.29.attn_output.weight iq3_s +blk.29.attn_v.weight q4_K +blk.30.attn_output.weight iq3_s +blk.30.attn_v.weight q4_K +blk.31.attn_output.weight iq3_s +blk.31.attn_v.weight q4_K +blk.32.attn_output.weight iq3_s +blk.32.attn_v.weight q4_K +blk.33.attn_output.weight iq3_s +blk.33.attn_v.weight q4_K +blk.34.attn_output.weight iq3_s +blk.34.attn_v.weight q4_K +blk.35.attn_v.weight q4_K +blk.35.attn_output.weight iq3_s +blk.36.attn_output.weight iq3_s +blk.36.attn_v.weight q4_K +blk.37.attn_output.weight iq3_s +blk.37.attn_v.weight q4_K +blk.38.attn_output.weight iq3_s +blk.38.attn_v.weight q4_K +blk.39.attn_output.weight iq3_s +blk.39.attn_v.weight q4_K +blk.40.attn_output.weight iq3_s +blk.40.attn_v.weight q4_K +blk.41.attn_output.weight iq3_s +blk.41.attn_v.weight q4_K +blk.42.attn_output.weight iq3_s +blk.42.attn_v.weight q4_K +blk.43.attn_output.weight iq3_s +blk.43.attn_v.weight q4_K +blk.44.attn_output.weight iq3_s +blk.44.attn_v.weight q4_K +blk.45.attn_output.weight iq3_s +blk.45.attn_v.weight q4_K +blk.46.attn_output.weight iq3_s +blk.46.attn_v.weight q4_K +blk.47.attn_output.weight iq3_s +blk.47.attn_v.weight q4_K +blk.48.attn_output.weight iq3_s +blk.48.attn_v.weight q4_K +blk.49.attn_v.weight q4_K +blk.49.attn_output.weight iq3_s +blk.50.attn_output.weight iq3_s +blk.50.attn_v.weight q4_K +blk.51.attn_output.weight iq3_s +blk.51.attn_v.weight q4_K +blk.52.attn_output.weight iq3_s +blk.52.attn_v.weight q4_K +blk.53.attn_output.weight iq3_s +blk.53.attn_v.weight q4_K +blk.54.attn_output.weight iq3_s +blk.54.attn_v.weight q4_K +blk.55.attn_output.weight iq3_s +blk.55.attn_v.weight q4_K +blk.56.attn_output.weight iq3_s +blk.56.attn_v.weight q4_K +blk.57.attn_output.weight iq3_s +blk.57.attn_v.weight q4_K +blk.58.attn_output.weight iq3_s +blk.58.attn_v.weight q4_K +blk.59.attn_output.weight iq3_s +blk.59.attn_v.weight q4_K +blk.60.attn_output.weight iq3_s +blk.60.attn_v.weight q4_K +blk.61.attn_output.weight iq3_s +blk.61.attn_v.weight q4_K +blk.62.attn_output.weight iq3_s +blk.62.attn_v.weight q4_K +blk.63.attn_v.weight q4_K +blk.63.attn_output.weight iq3_s +blk.64.attn_output.weight iq3_s +blk.64.attn_v.weight q4_K +blk.65.attn_output.weight iq3_s +blk.65.attn_v.weight q4_K +blk.66.attn_output.weight iq3_s +blk.66.attn_v.weight q4_K +blk.67.attn_output.weight iq3_s +blk.67.attn_v.weight q4_K +blk.68.attn_output.weight iq3_s +blk.68.attn_v.weight q4_K +blk.69.attn_output.weight iq3_s +blk.69.attn_v.weight q4_K +blk.70.attn_output.weight iq3_s +blk.70.attn_v.weight q4_K +blk.71.attn_output.weight iq3_s +blk.71.attn_v.weight q4_K +blk.72.attn_output.weight iq3_s +blk.72.attn_v.weight q4_K +blk.73.attn_output.weight iq3_s +blk.73.attn_v.weight q4_K +blk.74.attn_output.weight iq3_s +blk.74.attn_v.weight q4_K +blk.75.attn_output.weight iq3_s +blk.75.attn_v.weight q4_K +blk.76.attn_output.weight iq3_s +blk.76.attn_v.weight q4_K +blk.77.attn_v.weight q4_K +blk.77.attn_output.weight iq3_s +blk.78.attn_output.weight iq3_s +blk.78.attn_v.weight q4_K +blk.79.attn_output.weight iq3_s +blk.79.attn_v.weight q4_K +output.weight q5_K + +[IQ2_M] iq2_s +token_embd.weight iq3_s +blk.0.ffn_down.weight iq3_s +blk.0.attn_output.weight iq3_s +blk.0.attn_v.weight q4_K +blk.1.attn_output.weight iq3_s +blk.1.attn_v.weight q4_K +blk.1.ffn_down.weight iq3_s +blk.2.ffn_down.weight iq3_s +blk.2.attn_output.weight iq3_s +blk.2.attn_v.weight q4_K +blk.3.ffn_down.weight iq3_s +blk.3.attn_output.weight iq3_s +blk.3.attn_v.weight q4_K +blk.4.attn_output.weight iq3_s +blk.4.attn_v.weight q4_K +blk.4.ffn_down.weight iq3_s +blk.5.ffn_down.weight iq3_s +blk.5.attn_output.weight iq3_s +blk.5.attn_v.weight q4_K +blk.6.ffn_down.weight iq3_s +blk.6.attn_output.weight iq3_s +blk.6.attn_v.weight q4_K +blk.7.attn_v.weight q4_K +blk.7.ffn_down.weight iq3_s +blk.7.attn_output.weight iq3_s +blk.8.ffn_down.weight iq3_s +blk.8.attn_output.weight iq3_s +blk.8.attn_v.weight q4_K +blk.9.ffn_down.weight iq3_s +blk.9.attn_output.weight iq3_s +blk.9.attn_v.weight q4_K +blk.10.attn_output.weight iq3_s +blk.10.attn_v.weight q4_K +blk.11.attn_output.weight iq3_s +blk.11.attn_v.weight q4_K +blk.12.attn_output.weight iq3_s +blk.12.attn_v.weight q4_K +blk.13.attn_output.weight iq3_s +blk.13.attn_v.weight q4_K +blk.14.attn_output.weight iq3_s +blk.14.attn_v.weight q4_K +blk.15.attn_output.weight iq3_s +blk.15.attn_v.weight q4_K +blk.16.attn_output.weight iq3_s +blk.16.attn_v.weight q4_K +blk.17.attn_output.weight iq3_s +blk.17.attn_v.weight q4_K +blk.18.attn_output.weight iq3_s +blk.18.attn_v.weight q4_K +blk.19.attn_output.weight iq3_s +blk.19.attn_v.weight q4_K +blk.20.attn_output.weight iq3_s +blk.20.attn_v.weight q4_K +blk.21.attn_v.weight q4_K +blk.21.attn_output.weight iq3_s +blk.22.attn_output.weight iq3_s +blk.22.attn_v.weight q4_K +blk.23.attn_output.weight iq3_s +blk.23.attn_v.weight q4_K +blk.24.attn_output.weight iq3_s +blk.24.attn_v.weight q4_K +blk.25.attn_output.weight iq3_s +blk.25.attn_v.weight q4_K +blk.26.attn_output.weight iq3_s +blk.26.attn_v.weight q4_K +blk.27.attn_output.weight iq3_s +blk.27.attn_v.weight q4_K +blk.28.attn_output.weight iq3_s +blk.28.attn_v.weight q4_K +blk.29.attn_output.weight iq3_s +blk.29.attn_v.weight q4_K +blk.30.attn_output.weight iq3_s +blk.30.attn_v.weight q4_K +blk.31.attn_output.weight iq3_s +blk.31.attn_v.weight q4_K +blk.32.attn_output.weight iq3_s +blk.32.attn_v.weight q4_K +blk.33.attn_output.weight iq3_s +blk.33.attn_v.weight q4_K +blk.34.attn_output.weight iq3_s +blk.34.attn_v.weight q4_K +blk.35.attn_v.weight q4_K +blk.35.attn_output.weight iq3_s +blk.36.attn_output.weight iq3_s +blk.36.attn_v.weight q4_K +blk.37.attn_output.weight iq3_s +blk.37.attn_v.weight q4_K +blk.38.attn_output.weight iq3_s +blk.38.attn_v.weight q4_K +blk.39.attn_output.weight iq3_s +blk.39.attn_v.weight q4_K +blk.40.attn_output.weight iq3_s +blk.40.attn_v.weight q4_K +blk.41.attn_output.weight iq3_s +blk.41.attn_v.weight q4_K +blk.42.attn_output.weight iq3_s +blk.42.attn_v.weight q4_K +blk.43.attn_output.weight iq3_s +blk.43.attn_v.weight q4_K +blk.44.attn_output.weight iq3_s +blk.44.attn_v.weight q4_K +blk.45.attn_output.weight iq3_s +blk.45.attn_v.weight q4_K +blk.46.attn_output.weight iq3_s +blk.46.attn_v.weight q4_K +blk.47.attn_output.weight iq3_s +blk.47.attn_v.weight q4_K +blk.48.attn_output.weight iq3_s +blk.48.attn_v.weight q4_K +blk.49.attn_v.weight q4_K +blk.49.attn_output.weight iq3_s +blk.50.attn_output.weight iq3_s +blk.50.attn_v.weight q4_K +blk.51.attn_output.weight iq3_s +blk.51.attn_v.weight q4_K +blk.52.attn_output.weight iq3_s +blk.52.attn_v.weight q4_K +blk.53.attn_output.weight iq3_s +blk.53.attn_v.weight q4_K +blk.54.attn_output.weight iq3_s +blk.54.attn_v.weight q4_K +blk.55.attn_output.weight iq3_s +blk.55.attn_v.weight q4_K +blk.56.attn_output.weight iq3_s +blk.56.attn_v.weight q4_K +blk.57.attn_output.weight iq3_s +blk.57.attn_v.weight q4_K +blk.58.attn_output.weight iq3_s +blk.58.attn_v.weight q4_K +blk.59.attn_output.weight iq3_s +blk.59.attn_v.weight q4_K +blk.60.attn_output.weight iq3_s +blk.60.attn_v.weight q4_K +blk.61.attn_output.weight iq3_s +blk.61.attn_v.weight q4_K +blk.62.attn_output.weight iq3_s +blk.62.attn_v.weight q4_K +blk.63.attn_v.weight q4_K +blk.63.attn_output.weight iq3_s +blk.64.attn_output.weight iq3_s +blk.64.attn_v.weight q4_K +blk.65.attn_output.weight iq3_s +blk.65.attn_v.weight q4_K +blk.66.attn_output.weight iq3_s +blk.66.attn_v.weight q4_K +blk.67.attn_output.weight iq3_s +blk.67.attn_v.weight q4_K +blk.68.attn_output.weight iq3_s +blk.68.attn_v.weight q4_K +blk.69.attn_output.weight iq3_s +blk.69.attn_v.weight q4_K +blk.70.attn_output.weight iq3_s +blk.70.attn_v.weight q4_K +blk.71.attn_output.weight iq3_s +blk.71.attn_v.weight q4_K +blk.72.attn_output.weight iq3_s +blk.72.attn_v.weight q4_K +blk.73.attn_output.weight iq3_s +blk.73.attn_v.weight q4_K +blk.74.attn_output.weight iq3_s +blk.74.attn_v.weight q4_K +blk.75.attn_output.weight iq3_s +blk.75.attn_v.weight q4_K +blk.76.attn_output.weight iq3_s +blk.76.attn_v.weight q4_K +blk.77.attn_v.weight q4_K +blk.77.attn_output.weight iq3_s +blk.78.attn_output.weight iq3_s +blk.78.attn_v.weight q4_K +blk.79.attn_output.weight iq3_s +blk.79.attn_v.weight q4_K +output.weight q5_K + +[IQ4_XS] iq4_xs +blk.0.ffn_down.weight q5_K +blk.0.attn_v.weight q5_K +blk.1.attn_v.weight q5_K +blk.1.ffn_down.weight q5_K +blk.2.ffn_down.weight q5_K +blk.2.attn_v.weight q5_K +blk.3.ffn_down.weight q5_K +blk.3.attn_v.weight q5_K +blk.4.attn_v.weight q5_K +blk.4.ffn_down.weight q5_K +blk.5.ffn_down.weight q5_K +blk.5.attn_v.weight q5_K +blk.6.ffn_down.weight q5_K +blk.6.attn_v.weight q5_K +blk.7.attn_v.weight q5_K +blk.7.ffn_down.weight q5_K +blk.8.ffn_down.weight q5_K +blk.8.attn_v.weight q5_K +blk.9.ffn_down.weight q5_K +blk.9.attn_v.weight q5_K +blk.10.attn_v.weight q5_K +blk.11.attn_v.weight q5_K +blk.12.attn_v.weight q5_K +blk.13.attn_v.weight q5_K +blk.14.attn_v.weight q5_K +blk.15.attn_v.weight q5_K +blk.16.attn_v.weight q5_K +blk.17.attn_v.weight q5_K +blk.18.attn_v.weight q5_K +blk.19.attn_v.weight q5_K +blk.20.attn_v.weight q5_K +blk.21.attn_v.weight q5_K +blk.22.attn_v.weight q5_K +blk.23.attn_v.weight q5_K +blk.24.attn_v.weight q5_K +blk.25.attn_v.weight q5_K +blk.26.attn_v.weight q5_K +blk.27.attn_v.weight q5_K +blk.28.attn_v.weight q5_K +blk.29.attn_v.weight q5_K +blk.30.attn_v.weight q5_K +blk.31.attn_v.weight q5_K +blk.32.attn_v.weight q5_K +blk.33.attn_v.weight q5_K +blk.34.attn_v.weight q5_K +blk.35.attn_v.weight q5_K +blk.36.attn_v.weight q5_K +blk.37.attn_v.weight q5_K +blk.38.attn_v.weight q5_K +blk.39.attn_v.weight q5_K +blk.40.attn_v.weight q5_K +blk.41.attn_v.weight q5_K +blk.42.attn_v.weight q5_K +blk.43.attn_v.weight q5_K +blk.44.attn_v.weight q5_K +blk.45.attn_v.weight q5_K +blk.46.attn_v.weight q5_K +blk.47.attn_v.weight q5_K +blk.48.attn_v.weight q5_K +blk.49.attn_v.weight q5_K +blk.50.attn_v.weight q5_K +blk.51.attn_v.weight q5_K +blk.52.attn_v.weight q5_K +blk.53.attn_v.weight q5_K +blk.54.attn_v.weight q5_K +blk.55.attn_v.weight q5_K +blk.56.attn_v.weight q5_K +blk.57.attn_v.weight q5_K +blk.58.attn_v.weight q5_K +blk.59.attn_v.weight q5_K +blk.60.attn_v.weight q5_K +blk.61.attn_v.weight q5_K +blk.62.attn_v.weight q5_K +blk.63.attn_v.weight q5_K +blk.64.attn_v.weight q5_K +blk.65.attn_v.weight q5_K +blk.66.attn_v.weight q5_K +blk.67.attn_v.weight q5_K +blk.68.attn_v.weight q5_K +blk.69.attn_v.weight q5_K +blk.70.attn_v.weight q5_K +blk.71.attn_v.weight q5_K +blk.72.attn_v.weight q5_K +blk.73.attn_v.weight q5_K +blk.74.attn_v.weight q5_K +blk.75.attn_v.weight q5_K +blk.76.attn_v.weight q5_K +blk.77.attn_v.weight q5_K +blk.78.attn_v.weight q5_K +blk.79.attn_v.weight q5_K +output.weight q6_K + +[IQ1_M] iq1_m +token_embd.weight q2_K +blk.0.ffn_down.weight q2_K +blk.0.attn_output.weight iq2_xxs +blk.0.attn_v.weight q4_K +blk.1.attn_output.weight iq2_xxs +blk.1.attn_v.weight q4_K +blk.1.ffn_down.weight q2_K +blk.2.ffn_down.weight q2_K +blk.2.attn_output.weight iq2_xxs +blk.2.attn_v.weight q4_K +blk.3.ffn_down.weight q2_K +blk.3.attn_output.weight iq2_xxs +blk.3.attn_v.weight q4_K +blk.4.attn_output.weight iq2_xxs +blk.4.attn_v.weight q4_K +blk.4.ffn_down.weight q2_K +blk.5.ffn_down.weight q2_K +blk.5.attn_output.weight iq2_xxs +blk.5.attn_v.weight q4_K +blk.6.ffn_down.weight q2_K +blk.6.attn_output.weight iq2_xxs +blk.6.attn_v.weight q4_K +blk.7.attn_v.weight q4_K +blk.7.ffn_down.weight q2_K +blk.7.attn_output.weight iq2_xxs +blk.8.ffn_down.weight q2_K +blk.8.attn_output.weight iq2_xxs +blk.8.attn_v.weight q4_K +blk.9.ffn_down.weight q2_K +blk.9.attn_output.weight iq2_xxs +blk.9.attn_v.weight q4_K +blk.10.attn_output.weight iq2_xxs +blk.10.attn_v.weight q4_K +blk.11.attn_output.weight iq2_xxs +blk.11.attn_v.weight q4_K +blk.12.attn_output.weight iq2_xxs +blk.12.attn_v.weight q4_K +blk.13.attn_output.weight iq2_xxs +blk.13.attn_v.weight q4_K +blk.14.attn_output.weight iq2_xxs +blk.14.attn_v.weight q4_K +blk.15.attn_output.weight iq2_xxs +blk.15.attn_v.weight q4_K +blk.16.attn_output.weight iq2_xxs +blk.16.attn_v.weight q4_K +blk.17.attn_output.weight iq2_xxs +blk.17.attn_v.weight q4_K +blk.18.attn_output.weight iq2_xxs +blk.18.attn_v.weight q4_K +blk.19.attn_output.weight iq2_xxs +blk.19.attn_v.weight q4_K +blk.20.attn_output.weight iq2_xxs +blk.20.attn_v.weight q4_K +blk.21.attn_v.weight q4_K +blk.21.attn_output.weight iq2_xxs +blk.22.attn_output.weight iq2_xxs +blk.22.attn_v.weight q4_K +blk.23.attn_output.weight iq2_xxs +blk.23.attn_v.weight q4_K +blk.24.attn_output.weight iq2_xxs +blk.24.attn_v.weight q4_K +blk.25.attn_output.weight iq2_xxs +blk.25.attn_v.weight q4_K +blk.26.attn_output.weight iq2_xxs +blk.26.attn_v.weight q4_K +blk.27.attn_output.weight iq2_xxs +blk.27.attn_v.weight q4_K +blk.28.attn_output.weight iq2_xxs +blk.28.attn_v.weight q4_K +blk.29.attn_output.weight iq2_xxs +blk.29.attn_v.weight q4_K +blk.30.attn_output.weight iq2_xxs +blk.30.attn_v.weight q4_K +blk.31.attn_output.weight iq2_xxs +blk.31.attn_v.weight q4_K +blk.32.attn_output.weight iq2_xxs +blk.32.attn_v.weight q4_K +blk.33.attn_output.weight iq2_xxs +blk.33.attn_v.weight q4_K +blk.34.attn_output.weight iq2_xxs +blk.34.attn_v.weight q4_K +blk.35.attn_v.weight q4_K +blk.35.attn_output.weight iq2_xxs +blk.36.attn_output.weight iq2_xxs +blk.36.attn_v.weight q4_K +blk.37.attn_output.weight iq2_xxs +blk.37.attn_v.weight q4_K +blk.38.attn_output.weight iq2_xxs +blk.38.attn_v.weight q4_K +blk.39.attn_output.weight iq2_xxs +blk.39.attn_v.weight q4_K +blk.40.attn_output.weight iq2_xxs +blk.40.attn_v.weight q4_K +blk.41.attn_output.weight iq2_xxs +blk.41.attn_v.weight q4_K +blk.42.attn_output.weight iq2_xxs +blk.42.attn_v.weight q4_K +blk.43.attn_output.weight iq2_xxs +blk.43.attn_v.weight q4_K +blk.44.attn_output.weight iq2_xxs +blk.44.attn_v.weight q4_K +blk.45.attn_output.weight iq2_xxs +blk.45.attn_v.weight q4_K +blk.46.attn_output.weight iq2_xxs +blk.46.attn_v.weight q4_K +blk.47.attn_output.weight iq2_xxs +blk.47.attn_v.weight q4_K +blk.48.attn_output.weight iq2_xxs +blk.48.attn_v.weight q4_K +blk.49.attn_v.weight q4_K +blk.49.attn_output.weight iq2_xxs +blk.50.attn_output.weight iq2_xxs +blk.50.attn_v.weight q4_K +blk.51.attn_output.weight iq2_xxs +blk.51.attn_v.weight q4_K +blk.52.attn_output.weight iq2_xxs +blk.52.attn_v.weight q4_K +blk.53.attn_output.weight iq2_xxs +blk.53.attn_v.weight q4_K +blk.54.attn_output.weight iq2_xxs +blk.54.attn_v.weight q4_K +blk.55.attn_output.weight iq2_xxs +blk.55.attn_v.weight q4_K +blk.56.attn_output.weight iq2_xxs +blk.56.attn_v.weight q4_K +blk.57.attn_output.weight iq2_xxs +blk.57.attn_v.weight q4_K +blk.58.attn_output.weight iq2_xxs +blk.58.attn_v.weight q4_K +blk.59.attn_output.weight iq2_xxs +blk.59.attn_v.weight q4_K +blk.60.attn_output.weight iq2_xxs +blk.60.attn_v.weight q4_K +blk.61.attn_output.weight iq2_xxs +blk.61.attn_v.weight q4_K +blk.62.attn_output.weight iq2_xxs +blk.62.attn_v.weight q4_K +blk.63.attn_v.weight q4_K +blk.63.attn_output.weight iq2_xxs +blk.64.attn_output.weight iq2_xxs +blk.64.attn_v.weight q4_K +blk.65.attn_output.weight iq2_xxs +blk.65.attn_v.weight q4_K +blk.66.attn_output.weight iq2_xxs +blk.66.attn_v.weight q4_K +blk.67.attn_output.weight iq2_xxs +blk.67.attn_v.weight q4_K +blk.68.attn_output.weight iq2_xxs +blk.68.attn_v.weight q4_K +blk.69.attn_output.weight iq2_xxs +blk.69.attn_v.weight q4_K +blk.70.attn_output.weight iq2_xxs +blk.70.attn_v.weight q4_K +blk.71.attn_output.weight iq2_xxs +blk.71.attn_v.weight q4_K +blk.72.attn_output.weight iq2_xxs +blk.72.attn_v.weight q4_K +blk.73.attn_output.weight iq2_xxs +blk.73.attn_v.weight q4_K +blk.74.attn_output.weight iq2_xxs +blk.74.attn_v.weight q4_K +blk.75.attn_output.weight iq2_xxs +blk.75.attn_v.weight q4_K +blk.76.attn_output.weight iq2_xxs +blk.76.attn_v.weight q4_K +blk.77.attn_v.weight q4_K +blk.77.attn_output.weight iq2_xxs +blk.78.attn_output.weight iq2_xxs +blk.78.attn_v.weight q4_K +blk.79.attn_output.weight iq2_xxs +blk.79.attn_v.weight q4_K +output.weight q5_K + +[BF16] bf16 +output.weight q6_K + +[TQ1_0] tq1_0 +token_embd.weight q4_K +output.weight q6_K + +[TQ2_0] tq2_0 +token_embd.weight q4_K +output.weight q6_K + +[MXFP4_MOE] mxfp4 +token_embd.weight q8_0 +blk.0.ffn_down.weight q8_0 +blk.0.ffn_gate.weight q8_0 +blk.0.ffn_up.weight q8_0 +blk.0.attn_k.weight q8_0 +blk.0.attn_output.weight q8_0 +blk.0.attn_q.weight q8_0 +blk.0.attn_v.weight q8_0 +blk.1.ffn_gate.weight q8_0 +blk.1.attn_k.weight q8_0 +blk.1.attn_output.weight q8_0 +blk.1.attn_q.weight q8_0 +blk.1.attn_v.weight q8_0 +blk.1.ffn_down.weight q8_0 +blk.1.ffn_up.weight q8_0 +blk.2.ffn_down.weight q8_0 +blk.2.ffn_gate.weight q8_0 +blk.2.ffn_up.weight q8_0 +blk.2.attn_k.weight q8_0 +blk.2.attn_output.weight q8_0 +blk.2.attn_q.weight q8_0 +blk.2.attn_v.weight q8_0 +blk.3.ffn_down.weight q8_0 +blk.3.ffn_gate.weight q8_0 +blk.3.ffn_up.weight q8_0 +blk.3.attn_k.weight q8_0 +blk.3.attn_output.weight q8_0 +blk.3.attn_q.weight q8_0 +blk.3.attn_v.weight q8_0 +blk.4.attn_k.weight q8_0 +blk.4.attn_output.weight q8_0 +blk.4.attn_q.weight q8_0 +blk.4.attn_v.weight q8_0 +blk.4.ffn_down.weight q8_0 +blk.4.ffn_gate.weight q8_0 +blk.4.ffn_up.weight q8_0 +blk.5.ffn_down.weight q8_0 +blk.5.ffn_gate.weight q8_0 +blk.5.ffn_up.weight q8_0 +blk.5.attn_k.weight q8_0 +blk.5.attn_output.weight q8_0 +blk.5.attn_q.weight q8_0 +blk.5.attn_v.weight q8_0 +blk.6.ffn_down.weight q8_0 +blk.6.ffn_gate.weight q8_0 +blk.6.ffn_up.weight q8_0 +blk.6.attn_k.weight q8_0 +blk.6.attn_output.weight q8_0 +blk.6.attn_q.weight q8_0 +blk.6.attn_v.weight q8_0 +blk.7.attn_k.weight q8_0 +blk.7.attn_q.weight q8_0 +blk.7.attn_v.weight q8_0 +blk.7.ffn_down.weight q8_0 +blk.7.ffn_gate.weight q8_0 +blk.7.ffn_up.weight q8_0 +blk.7.attn_output.weight q8_0 +blk.8.ffn_down.weight q8_0 +blk.8.ffn_gate.weight q8_0 +blk.8.ffn_up.weight q8_0 +blk.8.attn_k.weight q8_0 +blk.8.attn_output.weight q8_0 +blk.8.attn_q.weight q8_0 +blk.8.attn_v.weight q8_0 +blk.9.ffn_down.weight q8_0 +blk.9.ffn_gate.weight q8_0 +blk.9.ffn_up.weight q8_0 +blk.9.attn_k.weight q8_0 +blk.9.attn_output.weight q8_0 +blk.9.attn_q.weight q8_0 +blk.9.attn_v.weight q8_0 +blk.10.ffn_down.weight q8_0 +blk.10.ffn_gate.weight q8_0 +blk.10.ffn_up.weight q8_0 +blk.10.attn_k.weight q8_0 +blk.10.attn_output.weight q8_0 +blk.10.attn_q.weight q8_0 +blk.10.attn_v.weight q8_0 +blk.11.ffn_down.weight q8_0 +blk.11.ffn_gate.weight q8_0 +blk.11.ffn_up.weight q8_0 +blk.11.attn_k.weight q8_0 +blk.11.attn_output.weight q8_0 +blk.11.attn_q.weight q8_0 +blk.11.attn_v.weight q8_0 +blk.12.ffn_gate.weight q8_0 +blk.12.ffn_up.weight q8_0 +blk.12.attn_k.weight q8_0 +blk.12.attn_output.weight q8_0 +blk.12.attn_q.weight q8_0 +blk.12.attn_v.weight q8_0 +blk.12.ffn_down.weight q8_0 +blk.13.ffn_down.weight q8_0 +blk.13.ffn_gate.weight q8_0 +blk.13.ffn_up.weight q8_0 +blk.13.attn_k.weight q8_0 +blk.13.attn_output.weight q8_0 +blk.13.attn_q.weight q8_0 +blk.13.attn_v.weight q8_0 +blk.14.ffn_down.weight q8_0 +blk.14.ffn_gate.weight q8_0 +blk.14.ffn_up.weight q8_0 +blk.14.attn_k.weight q8_0 +blk.14.attn_output.weight q8_0 +blk.14.attn_q.weight q8_0 +blk.14.attn_v.weight q8_0 +blk.15.ffn_gate.weight q8_0 +blk.15.attn_k.weight q8_0 +blk.15.attn_output.weight q8_0 +blk.15.attn_q.weight q8_0 +blk.15.attn_v.weight q8_0 +blk.15.ffn_down.weight q8_0 +blk.15.ffn_up.weight q8_0 +blk.16.ffn_down.weight q8_0 +blk.16.ffn_gate.weight q8_0 +blk.16.ffn_up.weight q8_0 +blk.16.attn_k.weight q8_0 +blk.16.attn_output.weight q8_0 +blk.16.attn_q.weight q8_0 +blk.16.attn_v.weight q8_0 +blk.17.ffn_down.weight q8_0 +blk.17.ffn_gate.weight q8_0 +blk.17.ffn_up.weight q8_0 +blk.17.attn_k.weight q8_0 +blk.17.attn_output.weight q8_0 +blk.17.attn_q.weight q8_0 +blk.17.attn_v.weight q8_0 +blk.18.attn_k.weight q8_0 +blk.18.attn_output.weight q8_0 +blk.18.attn_q.weight q8_0 +blk.18.attn_v.weight q8_0 +blk.18.ffn_down.weight q8_0 +blk.18.ffn_gate.weight q8_0 +blk.18.ffn_up.weight q8_0 +blk.19.ffn_down.weight q8_0 +blk.19.ffn_gate.weight q8_0 +blk.19.ffn_up.weight q8_0 +blk.19.attn_k.weight q8_0 +blk.19.attn_output.weight q8_0 +blk.19.attn_q.weight q8_0 +blk.19.attn_v.weight q8_0 +blk.20.ffn_down.weight q8_0 +blk.20.ffn_gate.weight q8_0 +blk.20.ffn_up.weight q8_0 +blk.20.attn_k.weight q8_0 +blk.20.attn_output.weight q8_0 +blk.20.attn_q.weight q8_0 +blk.20.attn_v.weight q8_0 +blk.21.attn_k.weight q8_0 +blk.21.attn_q.weight q8_0 +blk.21.attn_v.weight q8_0 +blk.21.ffn_down.weight q8_0 +blk.21.ffn_gate.weight q8_0 +blk.21.ffn_up.weight q8_0 +blk.21.attn_output.weight q8_0 +blk.22.ffn_down.weight q8_0 +blk.22.ffn_gate.weight q8_0 +blk.22.ffn_up.weight q8_0 +blk.22.attn_k.weight q8_0 +blk.22.attn_output.weight q8_0 +blk.22.attn_q.weight q8_0 +blk.22.attn_v.weight q8_0 +blk.23.ffn_down.weight q8_0 +blk.23.ffn_gate.weight q8_0 +blk.23.ffn_up.weight q8_0 +blk.23.attn_k.weight q8_0 +blk.23.attn_output.weight q8_0 +blk.23.attn_q.weight q8_0 +blk.23.attn_v.weight q8_0 +blk.24.ffn_down.weight q8_0 +blk.24.ffn_gate.weight q8_0 +blk.24.ffn_up.weight q8_0 +blk.24.attn_k.weight q8_0 +blk.24.attn_output.weight q8_0 +blk.24.attn_q.weight q8_0 +blk.24.attn_v.weight q8_0 +blk.25.ffn_down.weight q8_0 +blk.25.ffn_gate.weight q8_0 +blk.25.ffn_up.weight q8_0 +blk.25.attn_k.weight q8_0 +blk.25.attn_output.weight q8_0 +blk.25.attn_q.weight q8_0 +blk.25.attn_v.weight q8_0 +blk.26.ffn_gate.weight q8_0 +blk.26.ffn_up.weight q8_0 +blk.26.attn_k.weight q8_0 +blk.26.attn_output.weight q8_0 +blk.26.attn_q.weight q8_0 +blk.26.attn_v.weight q8_0 +blk.26.ffn_down.weight q8_0 +blk.27.ffn_down.weight q8_0 +blk.27.ffn_gate.weight q8_0 +blk.27.ffn_up.weight q8_0 +blk.27.attn_k.weight q8_0 +blk.27.attn_output.weight q8_0 +blk.27.attn_q.weight q8_0 +blk.27.attn_v.weight q8_0 +blk.28.ffn_down.weight q8_0 +blk.28.ffn_gate.weight q8_0 +blk.28.ffn_up.weight q8_0 +blk.28.attn_k.weight q8_0 +blk.28.attn_output.weight q8_0 +blk.28.attn_q.weight q8_0 +blk.28.attn_v.weight q8_0 +blk.29.ffn_gate.weight q8_0 +blk.29.attn_k.weight q8_0 +blk.29.attn_output.weight q8_0 +blk.29.attn_q.weight q8_0 +blk.29.attn_v.weight q8_0 +blk.29.ffn_down.weight q8_0 +blk.29.ffn_up.weight q8_0 +blk.30.ffn_down.weight q8_0 +blk.30.ffn_gate.weight q8_0 +blk.30.ffn_up.weight q8_0 +blk.30.attn_k.weight q8_0 +blk.30.attn_output.weight q8_0 +blk.30.attn_q.weight q8_0 +blk.30.attn_v.weight q8_0 +blk.31.ffn_down.weight q8_0 +blk.31.ffn_gate.weight q8_0 +blk.31.ffn_up.weight q8_0 +blk.31.attn_k.weight q8_0 +blk.31.attn_output.weight q8_0 +blk.31.attn_q.weight q8_0 +blk.31.attn_v.weight q8_0 +blk.32.attn_k.weight q8_0 +blk.32.attn_output.weight q8_0 +blk.32.attn_q.weight q8_0 +blk.32.attn_v.weight q8_0 +blk.32.ffn_down.weight q8_0 +blk.32.ffn_gate.weight q8_0 +blk.32.ffn_up.weight q8_0 +blk.33.ffn_down.weight q8_0 +blk.33.ffn_gate.weight q8_0 +blk.33.ffn_up.weight q8_0 +blk.33.attn_k.weight q8_0 +blk.33.attn_output.weight q8_0 +blk.33.attn_q.weight q8_0 +blk.33.attn_v.weight q8_0 +blk.34.ffn_down.weight q8_0 +blk.34.ffn_gate.weight q8_0 +blk.34.ffn_up.weight q8_0 +blk.34.attn_k.weight q8_0 +blk.34.attn_output.weight q8_0 +blk.34.attn_q.weight q8_0 +blk.34.attn_v.weight q8_0 +blk.35.attn_k.weight q8_0 +blk.35.attn_q.weight q8_0 +blk.35.attn_v.weight q8_0 +blk.35.ffn_down.weight q8_0 +blk.35.ffn_gate.weight q8_0 +blk.35.ffn_up.weight q8_0 +blk.35.attn_output.weight q8_0 +blk.36.ffn_down.weight q8_0 +blk.36.ffn_gate.weight q8_0 +blk.36.ffn_up.weight q8_0 +blk.36.attn_k.weight q8_0 +blk.36.attn_output.weight q8_0 +blk.36.attn_q.weight q8_0 +blk.36.attn_v.weight q8_0 +blk.37.ffn_down.weight q8_0 +blk.37.ffn_gate.weight q8_0 +blk.37.ffn_up.weight q8_0 +blk.37.attn_k.weight q8_0 +blk.37.attn_output.weight q8_0 +blk.37.attn_q.weight q8_0 +blk.37.attn_v.weight q8_0 +blk.38.ffn_down.weight q8_0 +blk.38.ffn_gate.weight q8_0 +blk.38.ffn_up.weight q8_0 +blk.38.attn_k.weight q8_0 +blk.38.attn_output.weight q8_0 +blk.38.attn_q.weight q8_0 +blk.38.attn_v.weight q8_0 +blk.39.ffn_down.weight q8_0 +blk.39.ffn_gate.weight q8_0 +blk.39.ffn_up.weight q8_0 +blk.39.attn_k.weight q8_0 +blk.39.attn_output.weight q8_0 +blk.39.attn_q.weight q8_0 +blk.39.attn_v.weight q8_0 +blk.40.ffn_gate.weight q8_0 +blk.40.ffn_up.weight q8_0 +blk.40.attn_k.weight q8_0 +blk.40.attn_output.weight q8_0 +blk.40.attn_q.weight q8_0 +blk.40.attn_v.weight q8_0 +blk.40.ffn_down.weight q8_0 +blk.41.ffn_down.weight q8_0 +blk.41.ffn_gate.weight q8_0 +blk.41.ffn_up.weight q8_0 +blk.41.attn_k.weight q8_0 +blk.41.attn_output.weight q8_0 +blk.41.attn_q.weight q8_0 +blk.41.attn_v.weight q8_0 +blk.42.ffn_down.weight q8_0 +blk.42.ffn_gate.weight q8_0 +blk.42.ffn_up.weight q8_0 +blk.42.attn_k.weight q8_0 +blk.42.attn_output.weight q8_0 +blk.42.attn_q.weight q8_0 +blk.42.attn_v.weight q8_0 +blk.43.ffn_gate.weight q8_0 +blk.43.attn_k.weight q8_0 +blk.43.attn_output.weight q8_0 +blk.43.attn_q.weight q8_0 +blk.43.attn_v.weight q8_0 +blk.43.ffn_down.weight q8_0 +blk.43.ffn_up.weight q8_0 +blk.44.ffn_down.weight q8_0 +blk.44.ffn_gate.weight q8_0 +blk.44.ffn_up.weight q8_0 +blk.44.attn_k.weight q8_0 +blk.44.attn_output.weight q8_0 +blk.44.attn_q.weight q8_0 +blk.44.attn_v.weight q8_0 +blk.45.ffn_down.weight q8_0 +blk.45.ffn_gate.weight q8_0 +blk.45.ffn_up.weight q8_0 +blk.45.attn_k.weight q8_0 +blk.45.attn_output.weight q8_0 +blk.45.attn_q.weight q8_0 +blk.45.attn_v.weight q8_0 +blk.46.attn_k.weight q8_0 +blk.46.attn_output.weight q8_0 +blk.46.attn_q.weight q8_0 +blk.46.attn_v.weight q8_0 +blk.46.ffn_down.weight q8_0 +blk.46.ffn_gate.weight q8_0 +blk.46.ffn_up.weight q8_0 +blk.47.ffn_down.weight q8_0 +blk.47.ffn_gate.weight q8_0 +blk.47.ffn_up.weight q8_0 +blk.47.attn_k.weight q8_0 +blk.47.attn_output.weight q8_0 +blk.47.attn_q.weight q8_0 +blk.47.attn_v.weight q8_0 +blk.48.ffn_down.weight q8_0 +blk.48.ffn_gate.weight q8_0 +blk.48.ffn_up.weight q8_0 +blk.48.attn_k.weight q8_0 +blk.48.attn_output.weight q8_0 +blk.48.attn_q.weight q8_0 +blk.48.attn_v.weight q8_0 +blk.49.attn_k.weight q8_0 +blk.49.attn_q.weight q8_0 +blk.49.attn_v.weight q8_0 +blk.49.ffn_down.weight q8_0 +blk.49.ffn_gate.weight q8_0 +blk.49.ffn_up.weight q8_0 +blk.49.attn_output.weight q8_0 +blk.50.ffn_down.weight q8_0 +blk.50.ffn_gate.weight q8_0 +blk.50.ffn_up.weight q8_0 +blk.50.attn_k.weight q8_0 +blk.50.attn_output.weight q8_0 +blk.50.attn_q.weight q8_0 +blk.50.attn_v.weight q8_0 +blk.51.ffn_down.weight q8_0 +blk.51.ffn_gate.weight q8_0 +blk.51.ffn_up.weight q8_0 +blk.51.attn_k.weight q8_0 +blk.51.attn_output.weight q8_0 +blk.51.attn_q.weight q8_0 +blk.51.attn_v.weight q8_0 +blk.52.ffn_down.weight q8_0 +blk.52.ffn_gate.weight q8_0 +blk.52.ffn_up.weight q8_0 +blk.52.attn_k.weight q8_0 +blk.52.attn_output.weight q8_0 +blk.52.attn_q.weight q8_0 +blk.52.attn_v.weight q8_0 +blk.53.ffn_down.weight q8_0 +blk.53.ffn_gate.weight q8_0 +blk.53.ffn_up.weight q8_0 +blk.53.attn_k.weight q8_0 +blk.53.attn_output.weight q8_0 +blk.53.attn_q.weight q8_0 +blk.53.attn_v.weight q8_0 +blk.54.ffn_gate.weight q8_0 +blk.54.ffn_up.weight q8_0 +blk.54.attn_k.weight q8_0 +blk.54.attn_output.weight q8_0 +blk.54.attn_q.weight q8_0 +blk.54.attn_v.weight q8_0 +blk.54.ffn_down.weight q8_0 +blk.55.ffn_down.weight q8_0 +blk.55.ffn_gate.weight q8_0 +blk.55.ffn_up.weight q8_0 +blk.55.attn_k.weight q8_0 +blk.55.attn_output.weight q8_0 +blk.55.attn_q.weight q8_0 +blk.55.attn_v.weight q8_0 +blk.56.ffn_down.weight q8_0 +blk.56.ffn_gate.weight q8_0 +blk.56.ffn_up.weight q8_0 +blk.56.attn_k.weight q8_0 +blk.56.attn_output.weight q8_0 +blk.56.attn_q.weight q8_0 +blk.56.attn_v.weight q8_0 +blk.57.ffn_gate.weight q8_0 +blk.57.attn_k.weight q8_0 +blk.57.attn_output.weight q8_0 +blk.57.attn_q.weight q8_0 +blk.57.attn_v.weight q8_0 +blk.57.ffn_down.weight q8_0 +blk.57.ffn_up.weight q8_0 +blk.58.ffn_down.weight q8_0 +blk.58.ffn_gate.weight q8_0 +blk.58.ffn_up.weight q8_0 +blk.58.attn_k.weight q8_0 +blk.58.attn_output.weight q8_0 +blk.58.attn_q.weight q8_0 +blk.58.attn_v.weight q8_0 +blk.59.ffn_down.weight q8_0 +blk.59.ffn_gate.weight q8_0 +blk.59.ffn_up.weight q8_0 +blk.59.attn_k.weight q8_0 +blk.59.attn_output.weight q8_0 +blk.59.attn_q.weight q8_0 +blk.59.attn_v.weight q8_0 +blk.60.attn_k.weight q8_0 +blk.60.attn_output.weight q8_0 +blk.60.attn_q.weight q8_0 +blk.60.attn_v.weight q8_0 +blk.60.ffn_down.weight q8_0 +blk.60.ffn_gate.weight q8_0 +blk.60.ffn_up.weight q8_0 +blk.61.ffn_down.weight q8_0 +blk.61.ffn_gate.weight q8_0 +blk.61.ffn_up.weight q8_0 +blk.61.attn_k.weight q8_0 +blk.61.attn_output.weight q8_0 +blk.61.attn_q.weight q8_0 +blk.61.attn_v.weight q8_0 +blk.62.ffn_down.weight q8_0 +blk.62.ffn_gate.weight q8_0 +blk.62.ffn_up.weight q8_0 +blk.62.attn_k.weight q8_0 +blk.62.attn_output.weight q8_0 +blk.62.attn_q.weight q8_0 +blk.62.attn_v.weight q8_0 +blk.63.attn_k.weight q8_0 +blk.63.attn_q.weight q8_0 +blk.63.attn_v.weight q8_0 +blk.63.ffn_down.weight q8_0 +blk.63.ffn_gate.weight q8_0 +blk.63.ffn_up.weight q8_0 +blk.63.attn_output.weight q8_0 +blk.64.ffn_down.weight q8_0 +blk.64.ffn_gate.weight q8_0 +blk.64.ffn_up.weight q8_0 +blk.64.attn_k.weight q8_0 +blk.64.attn_output.weight q8_0 +blk.64.attn_q.weight q8_0 +blk.64.attn_v.weight q8_0 +blk.65.ffn_down.weight q8_0 +blk.65.ffn_gate.weight q8_0 +blk.65.ffn_up.weight q8_0 +blk.65.attn_k.weight q8_0 +blk.65.attn_output.weight q8_0 +blk.65.attn_q.weight q8_0 +blk.65.attn_v.weight q8_0 +blk.66.ffn_down.weight q8_0 +blk.66.ffn_gate.weight q8_0 +blk.66.ffn_up.weight q8_0 +blk.66.attn_k.weight q8_0 +blk.66.attn_output.weight q8_0 +blk.66.attn_q.weight q8_0 +blk.66.attn_v.weight q8_0 +blk.67.ffn_down.weight q8_0 +blk.67.ffn_gate.weight q8_0 +blk.67.ffn_up.weight q8_0 +blk.67.attn_k.weight q8_0 +blk.67.attn_output.weight q8_0 +blk.67.attn_q.weight q8_0 +blk.67.attn_v.weight q8_0 +blk.68.ffn_gate.weight q8_0 +blk.68.ffn_up.weight q8_0 +blk.68.attn_k.weight q8_0 +blk.68.attn_output.weight q8_0 +blk.68.attn_q.weight q8_0 +blk.68.attn_v.weight q8_0 +blk.68.ffn_down.weight q8_0 +blk.69.ffn_down.weight q8_0 +blk.69.ffn_gate.weight q8_0 +blk.69.ffn_up.weight q8_0 +blk.69.attn_k.weight q8_0 +blk.69.attn_output.weight q8_0 +blk.69.attn_q.weight q8_0 +blk.69.attn_v.weight q8_0 +blk.70.ffn_down.weight q8_0 +blk.70.ffn_gate.weight q8_0 +blk.70.ffn_up.weight q8_0 +blk.70.attn_k.weight q8_0 +blk.70.attn_output.weight q8_0 +blk.70.attn_q.weight q8_0 +blk.70.attn_v.weight q8_0 +blk.71.ffn_gate.weight q8_0 +blk.71.attn_k.weight q8_0 +blk.71.attn_output.weight q8_0 +blk.71.attn_q.weight q8_0 +blk.71.attn_v.weight q8_0 +blk.71.ffn_down.weight q8_0 +blk.71.ffn_up.weight q8_0 +blk.72.ffn_down.weight q8_0 +blk.72.ffn_gate.weight q8_0 +blk.72.ffn_up.weight q8_0 +blk.72.attn_k.weight q8_0 +blk.72.attn_output.weight q8_0 +blk.72.attn_q.weight q8_0 +blk.72.attn_v.weight q8_0 +blk.73.ffn_down.weight q8_0 +blk.73.ffn_gate.weight q8_0 +blk.73.ffn_up.weight q8_0 +blk.73.attn_k.weight q8_0 +blk.73.attn_output.weight q8_0 +blk.73.attn_q.weight q8_0 +blk.73.attn_v.weight q8_0 +blk.74.attn_k.weight q8_0 +blk.74.attn_output.weight q8_0 +blk.74.attn_q.weight q8_0 +blk.74.attn_v.weight q8_0 +blk.74.ffn_down.weight q8_0 +blk.74.ffn_gate.weight q8_0 +blk.74.ffn_up.weight q8_0 +blk.75.ffn_down.weight q8_0 +blk.75.ffn_gate.weight q8_0 +blk.75.ffn_up.weight q8_0 +blk.75.attn_k.weight q8_0 +blk.75.attn_output.weight q8_0 +blk.75.attn_q.weight q8_0 +blk.75.attn_v.weight q8_0 +blk.76.ffn_down.weight q8_0 +blk.76.ffn_gate.weight q8_0 +blk.76.ffn_up.weight q8_0 +blk.76.attn_k.weight q8_0 +blk.76.attn_output.weight q8_0 +blk.76.attn_q.weight q8_0 +blk.76.attn_v.weight q8_0 +blk.77.attn_k.weight q8_0 +blk.77.attn_q.weight q8_0 +blk.77.attn_v.weight q8_0 +blk.77.ffn_down.weight q8_0 +blk.77.ffn_gate.weight q8_0 +blk.77.ffn_up.weight q8_0 +blk.77.attn_output.weight q8_0 +blk.78.ffn_down.weight q8_0 +blk.78.ffn_gate.weight q8_0 +blk.78.ffn_up.weight q8_0 +blk.78.attn_k.weight q8_0 +blk.78.attn_output.weight q8_0 +blk.78.attn_q.weight q8_0 +blk.78.attn_v.weight q8_0 +blk.79.ffn_down.weight q8_0 +blk.79.ffn_gate.weight q8_0 +blk.79.ffn_up.weight q8_0 +blk.79.attn_k.weight q8_0 +blk.79.attn_output.weight q8_0 +blk.79.attn_q.weight q8_0 +blk.79.attn_v.weight q8_0 +output.weight q8_0 diff --git a/tests/snapshots/nemotron-nano-3-30b-a3b.schema b/tests/snapshots/nemotron-nano-3-30b-a3b.schema new file mode 100644 index 0000000000..c68a6e0892 --- /dev/null +++ b/tests/snapshots/nemotron-nano-3-30b-a3b.schema @@ -0,0 +1,696 @@ +# Model: Nemotron-Nano-3-30B-A3B +# n_embd=2688, n_ff=0, n_vocab=131072, n_layer=52, n_head=32, n_head_kv=0, n_expert=128 + +[F32] f32 +output.weight q6_K + +[F16] f16 +output.weight q6_K + +[Q4_0] q4_0 +output.weight q6_K + +[Q4_1] q4_1 +output.weight q6_K + +[Q8_0] q8_0 + +[Q5_0] q5_0 +output.weight q6_K + +[Q5_1] q5_1 +output.weight q6_K + +[Q2_K] q2_K +output.weight q8_0 +blk.1.ffn_down_exps.weight q3_K +blk.1.ffn_down_shexp.weight q3_K +blk.3.ffn_down_exps.weight q3_K +blk.3.ffn_down_shexp.weight q3_K +blk.5.attn_output.weight q3_K +blk.5.attn_v.weight q3_K +blk.6.ffn_down_exps.weight q3_K +blk.6.ffn_down_shexp.weight q3_K +blk.8.ffn_down_exps.weight q3_K +blk.8.ffn_down_shexp.weight q3_K +blk.10.ffn_down_exps.weight q3_K +blk.10.ffn_down_shexp.weight q3_K +blk.12.attn_output.weight q3_K +blk.12.attn_v.weight q3_K +blk.13.ffn_down_exps.weight q3_K +blk.13.ffn_down_shexp.weight q3_K +blk.15.ffn_down_exps.weight q3_K +blk.15.ffn_down_shexp.weight q3_K +blk.17.ffn_down_exps.weight q3_K +blk.17.ffn_down_shexp.weight q3_K +blk.19.attn_output.weight q3_K +blk.19.attn_v.weight q3_K +blk.20.ffn_down_exps.weight q3_K +blk.20.ffn_down_shexp.weight q3_K +blk.22.ffn_down_exps.weight q3_K +blk.22.ffn_down_shexp.weight q3_K +blk.24.ffn_down_exps.weight q3_K +blk.24.ffn_down_shexp.weight q3_K +blk.26.attn_output.weight q3_K +blk.26.attn_v.weight q3_K +blk.27.ffn_down_exps.weight q3_K +blk.27.ffn_down_shexp.weight q3_K +blk.29.ffn_down_exps.weight q3_K +blk.29.ffn_down_shexp.weight q3_K +blk.31.ffn_down_exps.weight q3_K +blk.31.ffn_down_shexp.weight q3_K +blk.33.attn_output.weight q3_K +blk.33.attn_v.weight q3_K +blk.34.ffn_down_exps.weight q3_K +blk.34.ffn_down_shexp.weight q3_K +blk.36.ffn_down_exps.weight q3_K +blk.36.ffn_down_shexp.weight q3_K +blk.38.ffn_down_exps.weight q3_K +blk.38.ffn_down_shexp.weight q3_K +blk.40.ffn_down_exps.weight q3_K +blk.40.ffn_down_shexp.weight q3_K +blk.42.attn_output.weight q3_K +blk.42.attn_v.weight q3_K +blk.43.ffn_down_exps.weight q3_K +blk.43.ffn_down_shexp.weight q3_K +blk.45.ffn_down_exps.weight q3_K +blk.45.ffn_down_shexp.weight q3_K +blk.47.ffn_down_exps.weight q3_K +blk.47.ffn_down_shexp.weight q3_K +blk.49.ffn_down_exps.weight q3_K +blk.49.ffn_down_shexp.weight q3_K +blk.51.ffn_down_exps.weight q3_K +blk.51.ffn_down_shexp.weight q3_K + +[Q3_K_S] q3_K +output.weight q8_0 + +[Q3_K_M] q3_K +output.weight q8_0 +blk.1.ffn_down_exps.weight q5_K +blk.1.ffn_down_shexp.weight q5_K +blk.3.ffn_down_exps.weight q4_K +blk.3.ffn_down_shexp.weight q4_K +blk.5.attn_output.weight q4_K +blk.5.attn_v.weight q5_K +blk.6.ffn_down_exps.weight q4_K +blk.6.ffn_down_shexp.weight q4_K +blk.8.ffn_down_exps.weight q4_K +blk.8.ffn_down_shexp.weight q4_K +blk.10.ffn_down_exps.weight q4_K +blk.10.ffn_down_shexp.weight q4_K +blk.12.attn_output.weight q4_K +blk.12.attn_v.weight q5_K +blk.13.ffn_down_exps.weight q4_K +blk.13.ffn_down_shexp.weight q4_K +blk.15.ffn_down_exps.weight q4_K +blk.15.ffn_down_shexp.weight q4_K +blk.17.ffn_down_exps.weight q4_K +blk.17.ffn_down_shexp.weight q4_K +blk.19.attn_output.weight q4_K +blk.19.attn_v.weight q4_K +blk.20.ffn_down_exps.weight q4_K +blk.20.ffn_down_shexp.weight q4_K +blk.22.ffn_down_exps.weight q4_K +blk.22.ffn_down_shexp.weight q4_K +blk.24.ffn_down_exps.weight q4_K +blk.24.ffn_down_shexp.weight q4_K +blk.26.attn_output.weight q4_K +blk.26.attn_v.weight q4_K +blk.27.ffn_down_exps.weight q4_K +blk.27.ffn_down_shexp.weight q4_K +blk.29.ffn_down_exps.weight q4_K +blk.29.ffn_down_shexp.weight q4_K +blk.31.ffn_down_exps.weight q4_K +blk.31.ffn_down_shexp.weight q4_K +blk.33.attn_output.weight q4_K +blk.33.attn_v.weight q4_K +blk.34.ffn_down_exps.weight q4_K +blk.34.ffn_down_shexp.weight q4_K +blk.36.ffn_down_exps.weight q4_K +blk.36.ffn_down_shexp.weight q4_K +blk.38.ffn_down_exps.weight q4_K +blk.38.ffn_down_shexp.weight q4_K +blk.40.ffn_down_exps.weight q4_K +blk.40.ffn_down_shexp.weight q4_K +blk.42.attn_output.weight q4_K +blk.42.attn_v.weight q4_K +blk.43.ffn_down_exps.weight q4_K +blk.43.ffn_down_shexp.weight q4_K +blk.45.ffn_down_exps.weight q4_K +blk.45.ffn_down_shexp.weight q4_K +blk.47.ffn_down_exps.weight q4_K +blk.47.ffn_down_shexp.weight q4_K +blk.49.ffn_down_exps.weight q4_K +blk.49.ffn_down_shexp.weight q4_K +blk.51.ffn_down_exps.weight q4_K +blk.51.ffn_down_shexp.weight q4_K + +[Q3_K_L] q3_K +output.weight q8_0 +blk.1.ffn_down_exps.weight q5_K +blk.1.ffn_down_shexp.weight q5_K +blk.3.ffn_down_exps.weight q5_K +blk.3.ffn_down_shexp.weight q5_K +blk.5.attn_output.weight q5_K +blk.5.attn_v.weight q5_K +blk.6.ffn_down_exps.weight q5_K +blk.6.ffn_down_shexp.weight q5_K +blk.8.ffn_down_exps.weight q5_K +blk.8.ffn_down_shexp.weight q5_K +blk.10.ffn_down_exps.weight q5_K +blk.10.ffn_down_shexp.weight q5_K +blk.12.attn_output.weight q5_K +blk.12.attn_v.weight q5_K +blk.13.ffn_down_exps.weight q5_K +blk.13.ffn_down_shexp.weight q5_K +blk.15.ffn_down_exps.weight q5_K +blk.15.ffn_down_shexp.weight q5_K +blk.17.ffn_down_exps.weight q5_K +blk.17.ffn_down_shexp.weight q5_K +blk.19.attn_output.weight q5_K +blk.19.attn_v.weight q5_K +blk.20.ffn_down_exps.weight q5_K +blk.20.ffn_down_shexp.weight q5_K +blk.22.ffn_down_exps.weight q5_K +blk.22.ffn_down_shexp.weight q5_K +blk.24.ffn_down_exps.weight q5_K +blk.24.ffn_down_shexp.weight q5_K +blk.26.attn_output.weight q5_K +blk.26.attn_v.weight q5_K +blk.27.ffn_down_exps.weight q5_K +blk.27.ffn_down_shexp.weight q5_K +blk.29.ffn_down_exps.weight q5_K +blk.29.ffn_down_shexp.weight q5_K +blk.31.ffn_down_exps.weight q5_K +blk.31.ffn_down_shexp.weight q5_K +blk.33.attn_output.weight q5_K +blk.33.attn_v.weight q5_K +blk.34.ffn_down_exps.weight q5_K +blk.34.ffn_down_shexp.weight q5_K +blk.36.ffn_down_exps.weight q5_K +blk.36.ffn_down_shexp.weight q5_K +blk.38.ffn_down_exps.weight q5_K +blk.38.ffn_down_shexp.weight q5_K +blk.40.ffn_down_exps.weight q5_K +blk.40.ffn_down_shexp.weight q5_K +blk.42.attn_output.weight q5_K +blk.42.attn_v.weight q5_K +blk.43.ffn_down_exps.weight q5_K +blk.43.ffn_down_shexp.weight q5_K +blk.45.ffn_down_exps.weight q5_K +blk.45.ffn_down_shexp.weight q5_K +blk.47.ffn_down_exps.weight q5_K +blk.47.ffn_down_shexp.weight q5_K +blk.49.ffn_down_exps.weight q5_K +blk.49.ffn_down_shexp.weight q5_K +blk.51.ffn_down_exps.weight q5_K +blk.51.ffn_down_shexp.weight q5_K + +[Q4_K_S] q4_K +output.weight q8_0 +blk.1.ffn_down_exps.weight q5_K +blk.1.ffn_down_shexp.weight q5_K +blk.3.ffn_down_exps.weight q5_K +blk.3.ffn_down_shexp.weight q5_K +blk.5.attn_v.weight q5_K +blk.12.attn_v.weight q5_K +blk.19.attn_v.weight q5_K +blk.26.attn_v.weight q5_K + +[Q4_K_M] q4_K +output.weight q8_0 +blk.1.ffn_down_exps.weight q6_K +blk.1.ffn_down_shexp.weight q6_K +blk.3.ffn_down_exps.weight q6_K +blk.3.ffn_down_shexp.weight q6_K +blk.8.ffn_down_exps.weight q6_K +blk.8.ffn_down_shexp.weight q6_K +blk.17.ffn_down_exps.weight q6_K +blk.17.ffn_down_shexp.weight q6_K +blk.19.attn_v.weight q6_K +blk.20.ffn_down_exps.weight q6_K +blk.20.ffn_down_shexp.weight q6_K +blk.29.ffn_down_exps.weight q6_K +blk.29.ffn_down_shexp.weight q6_K +blk.38.ffn_down_exps.weight q6_K +blk.38.ffn_down_shexp.weight q6_K +blk.42.attn_v.weight q6_K +blk.45.ffn_down_exps.weight q6_K +blk.45.ffn_down_shexp.weight q6_K +blk.47.ffn_down_exps.weight q6_K +blk.47.ffn_down_shexp.weight q6_K +blk.49.ffn_down_exps.weight q6_K +blk.49.ffn_down_shexp.weight q6_K +blk.51.ffn_down_exps.weight q6_K +blk.51.ffn_down_shexp.weight q6_K + +[Q5_K_S] q5_K +output.weight q8_0 + +[Q5_K_M] q5_K +output.weight q8_0 +blk.1.ffn_down_exps.weight q6_K +blk.1.ffn_down_shexp.weight q6_K +blk.3.ffn_down_exps.weight q6_K +blk.3.ffn_down_shexp.weight q6_K +blk.8.ffn_down_exps.weight q6_K +blk.8.ffn_down_shexp.weight q6_K +blk.17.ffn_down_exps.weight q6_K +blk.17.ffn_down_shexp.weight q6_K +blk.19.attn_v.weight q6_K +blk.20.ffn_down_exps.weight q6_K +blk.20.ffn_down_shexp.weight q6_K +blk.29.ffn_down_exps.weight q6_K +blk.29.ffn_down_shexp.weight q6_K +blk.38.ffn_down_exps.weight q6_K +blk.38.ffn_down_shexp.weight q6_K +blk.42.attn_v.weight q6_K +blk.45.ffn_down_exps.weight q6_K +blk.45.ffn_down_shexp.weight q6_K +blk.47.ffn_down_exps.weight q6_K +blk.47.ffn_down_shexp.weight q6_K +blk.49.ffn_down_exps.weight q6_K +blk.49.ffn_down_shexp.weight q6_K +blk.51.ffn_down_exps.weight q6_K +blk.51.ffn_down_shexp.weight q6_K + +[Q6_K] q6_K +output.weight q8_0 + +[IQ2_XXS] iq2_xxs +output.weight q8_0 +token_embd.weight q2_K +blk.1.ffn_down_exps.weight q2_K +blk.1.ffn_down_shexp.weight q2_K +blk.3.ffn_down_exps.weight q2_K +blk.3.ffn_down_shexp.weight q2_K +blk.5.attn_v.weight q4_K +blk.6.ffn_down_exps.weight q2_K +blk.6.ffn_down_shexp.weight q2_K +blk.12.attn_v.weight q4_K +blk.19.attn_v.weight q4_K +blk.26.attn_v.weight q4_K +blk.33.attn_v.weight q4_K +blk.42.attn_v.weight q4_K + +[IQ2_XS] iq2_xs +output.weight q8_0 +token_embd.weight q2_K +blk.1.ffn_down_exps.weight q2_K +blk.1.ffn_down_shexp.weight q2_K +blk.3.ffn_down_exps.weight q2_K +blk.3.ffn_down_shexp.weight q2_K +blk.5.attn_v.weight q4_K +blk.6.ffn_down_exps.weight q2_K +blk.6.ffn_down_shexp.weight q2_K +blk.12.attn_v.weight q4_K +blk.19.attn_v.weight q4_K +blk.26.attn_v.weight q4_K +blk.33.attn_v.weight q4_K +blk.42.attn_v.weight q4_K + +[Q2_K_S] q2_K +output.weight q8_0 +blk.1.ffn_down_exps.weight q4_K +blk.1.ffn_down_shexp.weight q4_K +blk.3.ffn_down_exps.weight q4_K +blk.3.ffn_down_shexp.weight q4_K + +[IQ3_XS] iq3_s +output.weight q8_0 +blk.5.attn_k.weight iq3_xxs +blk.5.attn_q.weight iq3_xxs +blk.6.ffn_up_exps.weight iq3_xxs +blk.6.ffn_up_shexp.weight iq3_xxs +blk.8.ffn_up_exps.weight iq3_xxs +blk.8.ffn_up_shexp.weight iq3_xxs +blk.10.ffn_up_exps.weight iq3_xxs +blk.10.ffn_up_shexp.weight iq3_xxs +blk.12.attn_k.weight iq3_xxs +blk.12.attn_q.weight iq3_xxs +blk.13.ffn_up_exps.weight iq3_xxs +blk.13.ffn_up_shexp.weight iq3_xxs +blk.15.ffn_up_exps.weight iq3_xxs +blk.15.ffn_up_shexp.weight iq3_xxs +blk.17.ffn_up_exps.weight iq3_xxs +blk.17.ffn_up_shexp.weight iq3_xxs +blk.19.attn_k.weight iq3_xxs +blk.19.attn_q.weight iq3_xxs +blk.20.ffn_up_exps.weight iq3_xxs +blk.20.ffn_up_shexp.weight iq3_xxs +blk.22.ffn_up_exps.weight iq3_xxs +blk.22.ffn_up_shexp.weight iq3_xxs +blk.24.ffn_up_exps.weight iq3_xxs +blk.24.ffn_up_shexp.weight iq3_xxs +blk.26.attn_k.weight iq3_xxs +blk.26.attn_q.weight iq3_xxs +blk.27.ffn_up_exps.weight iq3_xxs +blk.27.ffn_up_shexp.weight iq3_xxs +blk.29.ffn_up_exps.weight iq3_xxs +blk.29.ffn_up_shexp.weight iq3_xxs +blk.31.ffn_up_exps.weight iq3_xxs +blk.31.ffn_up_shexp.weight iq3_xxs +blk.33.attn_k.weight iq3_xxs +blk.33.attn_q.weight iq3_xxs +blk.34.ffn_up_exps.weight iq3_xxs +blk.34.ffn_up_shexp.weight iq3_xxs +blk.36.ffn_up_exps.weight iq3_xxs +blk.36.ffn_up_shexp.weight iq3_xxs +blk.38.ffn_up_exps.weight iq3_xxs +blk.38.ffn_up_shexp.weight iq3_xxs +blk.40.ffn_up_exps.weight iq3_xxs +blk.40.ffn_up_shexp.weight iq3_xxs +blk.42.attn_k.weight iq3_xxs +blk.42.attn_q.weight iq3_xxs +blk.43.ffn_up_exps.weight iq3_xxs +blk.43.ffn_up_shexp.weight iq3_xxs + +[IQ3_XXS] iq3_xxs +output.weight q8_0 +token_embd.weight iq3_s +blk.1.ffn_down_exps.weight q4_K +blk.1.ffn_down_shexp.weight q4_K +blk.3.ffn_down_exps.weight q4_K +blk.3.ffn_down_shexp.weight q4_K +blk.5.attn_k.weight iq2_s +blk.5.attn_output.weight iq3_s +blk.5.attn_q.weight iq2_s +blk.5.attn_v.weight iq3_s +blk.6.ffn_down_exps.weight q3_K +blk.6.ffn_down_shexp.weight q3_K +blk.8.ffn_down_exps.weight q3_K +blk.8.ffn_down_shexp.weight q3_K +blk.10.ffn_down_exps.weight q3_K +blk.10.ffn_down_shexp.weight q3_K +blk.12.attn_k.weight iq2_s +blk.12.attn_output.weight iq3_s +blk.12.attn_q.weight iq2_s +blk.12.attn_v.weight iq3_s +blk.13.ffn_down_exps.weight q3_K +blk.13.ffn_down_shexp.weight q3_K +blk.15.ffn_down_exps.weight q3_K +blk.15.ffn_down_shexp.weight q3_K +blk.17.ffn_down_exps.weight q3_K +blk.17.ffn_down_shexp.weight q3_K +blk.19.attn_k.weight iq2_s +blk.19.attn_output.weight iq3_s +blk.19.attn_q.weight iq2_s +blk.19.attn_v.weight iq3_s +blk.20.ffn_down_exps.weight q3_K +blk.20.ffn_down_shexp.weight q3_K +blk.22.ffn_down_exps.weight q3_K +blk.22.ffn_down_shexp.weight q3_K +blk.24.ffn_down_exps.weight q3_K +blk.24.ffn_down_shexp.weight q3_K +blk.26.attn_k.weight iq2_s +blk.26.attn_output.weight iq3_s +blk.26.attn_q.weight iq2_s +blk.26.attn_v.weight iq3_s +blk.27.ffn_down_exps.weight q3_K +blk.27.ffn_down_shexp.weight q3_K +blk.29.ffn_down_exps.weight q3_K +blk.29.ffn_down_shexp.weight q3_K +blk.31.ffn_down_exps.weight q3_K +blk.31.ffn_down_shexp.weight q3_K +blk.33.attn_k.weight iq2_s +blk.33.attn_output.weight iq3_s +blk.33.attn_q.weight iq2_s +blk.33.attn_v.weight iq3_s +blk.34.ffn_down_exps.weight q3_K +blk.34.ffn_down_shexp.weight q3_K +blk.36.ffn_down_exps.weight q3_K +blk.36.ffn_down_shexp.weight q3_K +blk.38.ffn_down_exps.weight q3_K +blk.38.ffn_down_shexp.weight q3_K +blk.40.ffn_down_exps.weight q3_K +blk.40.ffn_down_shexp.weight q3_K +blk.42.attn_k.weight iq2_s +blk.42.attn_output.weight iq3_s +blk.42.attn_q.weight iq2_s +blk.42.attn_v.weight iq3_s +blk.43.ffn_down_exps.weight q3_K +blk.43.ffn_down_shexp.weight q3_K +blk.45.ffn_down_exps.weight q3_K +blk.45.ffn_down_shexp.weight q3_K +blk.47.ffn_down_exps.weight q3_K +blk.47.ffn_down_shexp.weight q3_K +blk.49.ffn_down_exps.weight q3_K +blk.49.ffn_down_shexp.weight q3_K +blk.51.ffn_down_exps.weight q3_K +blk.51.ffn_down_shexp.weight q3_K + +[IQ1_S] iq1_s +output.weight q8_0 +token_embd.weight q2_K +blk.1.ffn_down_exps.weight q2_K +blk.1.ffn_down_shexp.weight q2_K +blk.3.ffn_down_exps.weight q2_K +blk.3.ffn_down_shexp.weight q2_K +blk.5.attn_output.weight iq2_xxs +blk.5.attn_v.weight q4_K +blk.6.ffn_down_exps.weight q2_K +blk.6.ffn_down_shexp.weight q2_K +blk.12.attn_output.weight iq2_xxs +blk.12.attn_v.weight q4_K +blk.19.attn_output.weight iq2_xxs +blk.19.attn_v.weight q4_K +blk.26.attn_output.weight iq2_xxs +blk.26.attn_v.weight q4_K +blk.33.attn_output.weight iq2_xxs +blk.33.attn_v.weight q4_K +blk.42.attn_output.weight iq2_xxs +blk.42.attn_v.weight q4_K + +[IQ4_NL] iq4_nl +output.weight q6_K +blk.1.ffn_down_exps.weight q5_K +blk.1.ffn_down_shexp.weight q5_K +blk.3.ffn_down_exps.weight q5_K +blk.3.ffn_down_shexp.weight q5_K + +[IQ3_S] iq3_s +output.weight q8_0 + +[IQ3_M] iq3_s +output.weight q8_0 +blk.1.ffn_down_exps.weight q4_K +blk.1.ffn_down_shexp.weight q4_K +blk.3.ffn_down_exps.weight q4_K +blk.3.ffn_down_shexp.weight q4_K +blk.5.attn_output.weight q4_K +blk.5.attn_v.weight q4_K +blk.12.attn_output.weight q4_K +blk.12.attn_v.weight q4_K +blk.19.attn_output.weight q4_K +blk.19.attn_v.weight q4_K +blk.26.attn_output.weight q4_K +blk.26.attn_v.weight q4_K +blk.33.attn_output.weight q4_K +blk.33.attn_v.weight q4_K +blk.42.attn_output.weight q4_K +blk.42.attn_v.weight q4_K + +[IQ2_S] iq2_xs +output.weight q8_0 +token_embd.weight iq3_s +blk.1.ffn_down_exps.weight iq3_s +blk.1.ffn_down_shexp.weight iq3_s +blk.3.ffn_down_exps.weight iq3_s +blk.3.ffn_down_shexp.weight iq3_s +blk.5.attn_output.weight iq3_s +blk.5.attn_v.weight q4_K +blk.6.ffn_down_exps.weight iq3_s +blk.6.ffn_down_shexp.weight iq3_s +blk.12.attn_output.weight iq3_s +blk.12.attn_v.weight q4_K +blk.19.attn_output.weight iq3_s +blk.19.attn_v.weight q4_K +blk.26.attn_output.weight iq3_s +blk.26.attn_v.weight q4_K +blk.33.attn_output.weight iq3_s +blk.33.attn_v.weight q4_K +blk.42.attn_output.weight iq3_s +blk.42.attn_v.weight q4_K + +[IQ2_M] iq2_s +output.weight q8_0 +token_embd.weight iq3_s +blk.1.ffn_down_exps.weight iq3_s +blk.1.ffn_down_shexp.weight iq3_s +blk.3.ffn_down_exps.weight iq3_s +blk.3.ffn_down_shexp.weight iq3_s +blk.5.attn_output.weight iq3_s +blk.5.attn_v.weight q4_K +blk.6.ffn_down_exps.weight iq3_s +blk.6.ffn_down_shexp.weight iq3_s +blk.12.attn_output.weight iq3_s +blk.12.attn_v.weight q4_K +blk.19.attn_output.weight iq3_s +blk.19.attn_v.weight q4_K +blk.26.attn_output.weight iq3_s +blk.26.attn_v.weight q4_K +blk.33.attn_output.weight iq3_s +blk.33.attn_v.weight q4_K +blk.42.attn_output.weight iq3_s +blk.42.attn_v.weight q4_K + +[IQ4_XS] iq4_xs +output.weight q8_0 +blk.1.ffn_down_exps.weight q5_K +blk.1.ffn_down_shexp.weight q5_K +blk.3.ffn_down_exps.weight q5_K +blk.3.ffn_down_shexp.weight q5_K + +[IQ1_M] iq1_m +output.weight q8_0 +token_embd.weight q2_K +blk.1.ffn_down_exps.weight q2_K +blk.1.ffn_down_shexp.weight q2_K +blk.3.ffn_down_exps.weight q2_K +blk.3.ffn_down_shexp.weight q2_K +blk.5.attn_output.weight iq2_xxs +blk.5.attn_v.weight q4_K +blk.6.ffn_down_exps.weight q2_K +blk.6.ffn_down_shexp.weight q2_K +blk.12.attn_output.weight iq2_xxs +blk.12.attn_v.weight q4_K +blk.19.attn_output.weight iq2_xxs +blk.19.attn_v.weight q4_K +blk.26.attn_output.weight iq2_xxs +blk.26.attn_v.weight q4_K +blk.33.attn_output.weight iq2_xxs +blk.33.attn_v.weight q4_K +blk.42.attn_output.weight iq2_xxs +blk.42.attn_v.weight q4_K + +[BF16] bf16 +output.weight q6_K + +[TQ1_0] tq1_0 +output.weight q8_0 +token_embd.weight q4_K + +[TQ2_0] tq2_0 +output.weight q8_0 +token_embd.weight q4_K + +[MXFP4_MOE] mxfp4 +output.weight q8_0 +token_embd.weight q8_0 +blk.0.ssm_in.weight q8_0 +blk.0.ssm_out.weight q8_0 +blk.1.ffn_down_shexp.weight q8_0 +blk.1.ffn_up_shexp.weight q8_0 +blk.2.ssm_in.weight q8_0 +blk.2.ssm_out.weight q8_0 +blk.3.ffn_down_shexp.weight q8_0 +blk.3.ffn_up_shexp.weight q8_0 +blk.4.ssm_in.weight q8_0 +blk.4.ssm_out.weight q8_0 +blk.5.attn_k.weight q8_0 +blk.5.attn_output.weight q8_0 +blk.5.attn_q.weight q8_0 +blk.5.attn_v.weight q8_0 +blk.6.ffn_down_shexp.weight q8_0 +blk.6.ffn_up_shexp.weight q8_0 +blk.7.ssm_in.weight q8_0 +blk.7.ssm_out.weight q8_0 +blk.8.ffn_down_shexp.weight q8_0 +blk.8.ffn_up_shexp.weight q8_0 +blk.9.ssm_in.weight q8_0 +blk.9.ssm_out.weight q8_0 +blk.10.ffn_down_shexp.weight q8_0 +blk.10.ffn_up_shexp.weight q8_0 +blk.11.ssm_in.weight q8_0 +blk.11.ssm_out.weight q8_0 +blk.12.attn_k.weight q8_0 +blk.12.attn_output.weight q8_0 +blk.12.attn_q.weight q8_0 +blk.12.attn_v.weight q8_0 +blk.13.ffn_down_shexp.weight q8_0 +blk.13.ffn_up_shexp.weight q8_0 +blk.14.ssm_in.weight q8_0 +blk.14.ssm_out.weight q8_0 +blk.15.ffn_down_shexp.weight q8_0 +blk.15.ffn_up_shexp.weight q8_0 +blk.16.ssm_in.weight q8_0 +blk.16.ssm_out.weight q8_0 +blk.17.ffn_down_shexp.weight q8_0 +blk.17.ffn_up_shexp.weight q8_0 +blk.18.ssm_in.weight q8_0 +blk.18.ssm_out.weight q8_0 +blk.19.attn_k.weight q8_0 +blk.19.attn_output.weight q8_0 +blk.19.attn_q.weight q8_0 +blk.19.attn_v.weight q8_0 +blk.20.ffn_down_shexp.weight q8_0 +blk.20.ffn_up_shexp.weight q8_0 +blk.21.ssm_in.weight q8_0 +blk.21.ssm_out.weight q8_0 +blk.22.ffn_down_shexp.weight q8_0 +blk.22.ffn_up_shexp.weight q8_0 +blk.23.ssm_in.weight q8_0 +blk.23.ssm_out.weight q8_0 +blk.24.ffn_down_shexp.weight q8_0 +blk.24.ffn_up_shexp.weight q8_0 +blk.25.ssm_in.weight q8_0 +blk.25.ssm_out.weight q8_0 +blk.26.attn_k.weight q8_0 +blk.26.attn_output.weight q8_0 +blk.26.attn_q.weight q8_0 +blk.26.attn_v.weight q8_0 +blk.27.ffn_down_shexp.weight q8_0 +blk.27.ffn_up_shexp.weight q8_0 +blk.28.ssm_in.weight q8_0 +blk.28.ssm_out.weight q8_0 +blk.29.ffn_down_shexp.weight q8_0 +blk.29.ffn_up_shexp.weight q8_0 +blk.30.ssm_in.weight q8_0 +blk.30.ssm_out.weight q8_0 +blk.31.ffn_down_shexp.weight q8_0 +blk.31.ffn_up_shexp.weight q8_0 +blk.32.ssm_in.weight q8_0 +blk.32.ssm_out.weight q8_0 +blk.33.attn_k.weight q8_0 +blk.33.attn_output.weight q8_0 +blk.33.attn_q.weight q8_0 +blk.33.attn_v.weight q8_0 +blk.34.ffn_down_shexp.weight q8_0 +blk.34.ffn_up_shexp.weight q8_0 +blk.35.ssm_in.weight q8_0 +blk.35.ssm_out.weight q8_0 +blk.36.ffn_down_shexp.weight q8_0 +blk.36.ffn_up_shexp.weight q8_0 +blk.37.ssm_in.weight q8_0 +blk.37.ssm_out.weight q8_0 +blk.38.ffn_down_shexp.weight q8_0 +blk.38.ffn_up_shexp.weight q8_0 +blk.39.ssm_in.weight q8_0 +blk.39.ssm_out.weight q8_0 +blk.40.ffn_down_shexp.weight q8_0 +blk.40.ffn_up_shexp.weight q8_0 +blk.41.ssm_in.weight q8_0 +blk.41.ssm_out.weight q8_0 +blk.42.attn_k.weight q8_0 +blk.42.attn_output.weight q8_0 +blk.42.attn_q.weight q8_0 +blk.42.attn_v.weight q8_0 +blk.43.ffn_down_shexp.weight q8_0 +blk.43.ffn_up_shexp.weight q8_0 +blk.44.ssm_in.weight q8_0 +blk.44.ssm_out.weight q8_0 +blk.45.ffn_down_shexp.weight q8_0 +blk.45.ffn_up_shexp.weight q8_0 +blk.46.ssm_in.weight q8_0 +blk.46.ssm_out.weight q8_0 +blk.47.ffn_down_shexp.weight q8_0 +blk.47.ffn_up_shexp.weight q8_0 +blk.48.ssm_in.weight q8_0 +blk.48.ssm_out.weight q8_0 +blk.49.ffn_down_shexp.weight q8_0 +blk.49.ffn_up_shexp.weight q8_0 +blk.50.ssm_in.weight q8_0 +blk.50.ssm_out.weight q8_0 +blk.51.ffn_down_shexp.weight q8_0 +blk.51.ffn_up_shexp.weight q8_0 diff --git a/tests/snapshots/qwen3-14b.schema b/tests/snapshots/qwen3-14b.schema new file mode 100644 index 0000000000..010849a57b --- /dev/null +++ b/tests/snapshots/qwen3-14b.schema @@ -0,0 +1,1908 @@ +# Model: Qwen3-14B +# n_embd=5120, n_ff=17408, n_vocab=151936, n_layer=40, n_head=40, n_head_kv=8 + +[F32] f32 +output.weight q6_K + +[F16] f16 +output.weight q6_K + +[Q4_0] q4_0 +output.weight q6_K + +[Q4_1] q4_1 +output.weight q6_K + +[Q8_0] q8_0 + +[Q5_0] q5_0 +output.weight q6_K + +[Q5_1] q5_1 +output.weight q6_K + +[Q2_K] q2_K +output.weight q6_K +blk.0.attn_output.weight q3_K +blk.0.attn_v.weight q4_K +blk.0.ffn_down.weight q3_K +blk.1.attn_output.weight q3_K +blk.1.attn_v.weight q4_K +blk.1.ffn_down.weight q3_K +blk.2.attn_output.weight q3_K +blk.2.attn_v.weight q4_K +blk.2.ffn_down.weight q3_K +blk.3.attn_output.weight q3_K +blk.3.attn_v.weight q4_K +blk.3.ffn_down.weight q3_K +blk.4.attn_output.weight q3_K +blk.4.attn_v.weight q4_K +blk.4.ffn_down.weight q3_K +blk.5.attn_output.weight q3_K +blk.5.attn_v.weight q4_K +blk.5.ffn_down.weight q3_K +blk.6.attn_output.weight q3_K +blk.6.attn_v.weight q4_K +blk.6.ffn_down.weight q3_K +blk.7.attn_output.weight q3_K +blk.7.attn_v.weight q4_K +blk.7.ffn_down.weight q3_K +blk.8.attn_output.weight q3_K +blk.8.attn_v.weight q4_K +blk.8.ffn_down.weight q3_K +blk.9.attn_output.weight q3_K +blk.9.attn_v.weight q4_K +blk.9.ffn_down.weight q3_K +blk.10.attn_output.weight q3_K +blk.10.attn_v.weight q4_K +blk.10.ffn_down.weight q3_K +blk.11.attn_output.weight q3_K +blk.11.attn_v.weight q4_K +blk.11.ffn_down.weight q3_K +blk.12.attn_output.weight q3_K +blk.12.attn_v.weight q4_K +blk.12.ffn_down.weight q3_K +blk.13.attn_output.weight q3_K +blk.13.attn_v.weight q4_K +blk.13.ffn_down.weight q3_K +blk.14.attn_output.weight q3_K +blk.14.attn_v.weight q4_K +blk.14.ffn_down.weight q3_K +blk.15.attn_output.weight q3_K +blk.15.attn_v.weight q4_K +blk.15.ffn_down.weight q3_K +blk.16.attn_output.weight q3_K +blk.16.attn_v.weight q4_K +blk.16.ffn_down.weight q3_K +blk.17.attn_output.weight q3_K +blk.17.attn_v.weight q4_K +blk.17.ffn_down.weight q3_K +blk.18.attn_output.weight q3_K +blk.18.attn_v.weight q4_K +blk.18.ffn_down.weight q3_K +blk.19.attn_output.weight q3_K +blk.19.attn_v.weight q4_K +blk.19.ffn_down.weight q3_K +blk.20.attn_output.weight q3_K +blk.20.attn_v.weight q4_K +blk.20.ffn_down.weight q3_K +blk.21.attn_output.weight q3_K +blk.21.attn_v.weight q4_K +blk.21.ffn_down.weight q3_K +blk.22.attn_output.weight q3_K +blk.22.attn_v.weight q4_K +blk.22.ffn_down.weight q3_K +blk.23.attn_output.weight q3_K +blk.23.attn_v.weight q4_K +blk.23.ffn_down.weight q3_K +blk.24.attn_output.weight q3_K +blk.24.attn_v.weight q4_K +blk.24.ffn_down.weight q3_K +blk.25.attn_output.weight q3_K +blk.25.attn_v.weight q4_K +blk.25.ffn_down.weight q3_K +blk.26.attn_output.weight q3_K +blk.26.attn_v.weight q4_K +blk.26.ffn_down.weight q3_K +blk.27.attn_output.weight q3_K +blk.27.attn_v.weight q4_K +blk.27.ffn_down.weight q3_K +blk.28.attn_output.weight q3_K +blk.28.attn_v.weight q4_K +blk.28.ffn_down.weight q3_K +blk.29.attn_output.weight q3_K +blk.29.attn_v.weight q4_K +blk.29.ffn_down.weight q3_K +blk.30.attn_output.weight q3_K +blk.30.attn_v.weight q4_K +blk.30.ffn_down.weight q3_K +blk.31.attn_output.weight q3_K +blk.31.attn_v.weight q4_K +blk.31.ffn_down.weight q3_K +blk.32.attn_output.weight q3_K +blk.32.attn_v.weight q4_K +blk.32.ffn_down.weight q3_K +blk.33.attn_output.weight q3_K +blk.33.attn_v.weight q4_K +blk.33.ffn_down.weight q3_K +blk.34.attn_output.weight q3_K +blk.34.attn_v.weight q4_K +blk.34.ffn_down.weight q3_K +blk.35.attn_output.weight q3_K +blk.35.attn_v.weight q4_K +blk.35.ffn_down.weight q3_K +blk.36.attn_output.weight q3_K +blk.36.attn_v.weight q4_K +blk.36.ffn_down.weight q3_K +blk.37.attn_output.weight q3_K +blk.37.attn_v.weight q4_K +blk.37.ffn_down.weight q3_K +blk.38.attn_output.weight q3_K +blk.38.attn_v.weight q4_K +blk.38.ffn_down.weight q3_K +blk.39.attn_output.weight q3_K +blk.39.attn_v.weight q4_K +blk.39.ffn_down.weight q3_K + +[Q3_K_S] q3_K +output.weight q6_K + +[Q3_K_M] q3_K +output.weight q6_K +blk.0.attn_output.weight q4_K +blk.0.attn_v.weight q5_K +blk.0.ffn_down.weight q5_K +blk.1.attn_output.weight q4_K +blk.1.attn_v.weight q5_K +blk.1.ffn_down.weight q5_K +blk.2.attn_output.weight q4_K +blk.2.attn_v.weight q4_K +blk.2.ffn_down.weight q4_K +blk.3.attn_output.weight q4_K +blk.3.attn_v.weight q4_K +blk.3.ffn_down.weight q4_K +blk.4.attn_output.weight q4_K +blk.4.attn_v.weight q4_K +blk.4.ffn_down.weight q4_K +blk.5.attn_output.weight q4_K +blk.5.attn_v.weight q4_K +blk.5.ffn_down.weight q4_K +blk.6.attn_output.weight q4_K +blk.6.attn_v.weight q4_K +blk.6.ffn_down.weight q4_K +blk.7.attn_output.weight q4_K +blk.7.attn_v.weight q4_K +blk.7.ffn_down.weight q4_K +blk.8.attn_output.weight q4_K +blk.8.attn_v.weight q4_K +blk.8.ffn_down.weight q4_K +blk.9.attn_output.weight q4_K +blk.9.attn_v.weight q4_K +blk.9.ffn_down.weight q4_K +blk.10.attn_output.weight q4_K +blk.10.attn_v.weight q4_K +blk.10.ffn_down.weight q4_K +blk.11.attn_output.weight q4_K +blk.11.attn_v.weight q4_K +blk.11.ffn_down.weight q4_K +blk.12.attn_output.weight q4_K +blk.12.attn_v.weight q4_K +blk.12.ffn_down.weight q4_K +blk.13.attn_output.weight q4_K +blk.13.attn_v.weight q4_K +blk.13.ffn_down.weight q4_K +blk.14.attn_output.weight q4_K +blk.14.attn_v.weight q4_K +blk.14.ffn_down.weight q4_K +blk.15.attn_output.weight q4_K +blk.15.attn_v.weight q4_K +blk.15.ffn_down.weight q4_K +blk.16.attn_output.weight q4_K +blk.16.attn_v.weight q4_K +blk.16.ffn_down.weight q4_K +blk.17.attn_output.weight q4_K +blk.17.attn_v.weight q4_K +blk.17.ffn_down.weight q4_K +blk.18.attn_output.weight q4_K +blk.18.attn_v.weight q4_K +blk.18.ffn_down.weight q4_K +blk.19.attn_output.weight q4_K +blk.19.attn_v.weight q4_K +blk.19.ffn_down.weight q4_K +blk.20.attn_output.weight q4_K +blk.20.attn_v.weight q4_K +blk.20.ffn_down.weight q4_K +blk.21.attn_output.weight q4_K +blk.21.attn_v.weight q4_K +blk.21.ffn_down.weight q4_K +blk.22.attn_output.weight q4_K +blk.22.attn_v.weight q4_K +blk.22.ffn_down.weight q4_K +blk.23.attn_output.weight q4_K +blk.23.attn_v.weight q4_K +blk.23.ffn_down.weight q4_K +blk.24.attn_output.weight q4_K +blk.24.attn_v.weight q4_K +blk.24.ffn_down.weight q4_K +blk.25.attn_output.weight q4_K +blk.25.attn_v.weight q4_K +blk.25.ffn_down.weight q4_K +blk.26.attn_output.weight q4_K +blk.26.attn_v.weight q4_K +blk.26.ffn_down.weight q4_K +blk.27.attn_output.weight q4_K +blk.27.attn_v.weight q4_K +blk.27.ffn_down.weight q4_K +blk.28.attn_output.weight q4_K +blk.28.attn_v.weight q4_K +blk.28.ffn_down.weight q4_K +blk.29.attn_output.weight q4_K +blk.29.attn_v.weight q4_K +blk.29.ffn_down.weight q4_K +blk.30.attn_output.weight q4_K +blk.30.attn_v.weight q4_K +blk.30.ffn_down.weight q4_K +blk.31.attn_output.weight q4_K +blk.31.attn_v.weight q4_K +blk.31.ffn_down.weight q4_K +blk.32.attn_output.weight q4_K +blk.32.attn_v.weight q4_K +blk.32.ffn_down.weight q4_K +blk.33.attn_output.weight q4_K +blk.33.attn_v.weight q4_K +blk.33.ffn_down.weight q4_K +blk.34.attn_output.weight q4_K +blk.34.attn_v.weight q4_K +blk.34.ffn_down.weight q4_K +blk.35.attn_output.weight q4_K +blk.35.attn_v.weight q4_K +blk.35.ffn_down.weight q4_K +blk.36.attn_output.weight q4_K +blk.36.attn_v.weight q4_K +blk.36.ffn_down.weight q4_K +blk.37.attn_output.weight q4_K +blk.37.attn_v.weight q4_K +blk.37.ffn_down.weight q4_K +blk.38.attn_output.weight q4_K +blk.38.attn_v.weight q4_K +blk.38.ffn_down.weight q4_K +blk.39.attn_output.weight q4_K +blk.39.attn_v.weight q4_K +blk.39.ffn_down.weight q4_K + +[Q3_K_L] q3_K +output.weight q6_K +blk.0.attn_output.weight q5_K +blk.0.attn_v.weight q5_K +blk.0.ffn_down.weight q5_K +blk.1.attn_output.weight q5_K +blk.1.attn_v.weight q5_K +blk.1.ffn_down.weight q5_K +blk.2.attn_output.weight q5_K +blk.2.attn_v.weight q5_K +blk.2.ffn_down.weight q5_K +blk.3.attn_output.weight q5_K +blk.3.attn_v.weight q5_K +blk.3.ffn_down.weight q5_K +blk.4.attn_output.weight q5_K +blk.4.attn_v.weight q5_K +blk.4.ffn_down.weight q5_K +blk.5.attn_output.weight q5_K +blk.5.attn_v.weight q5_K +blk.5.ffn_down.weight q5_K +blk.6.attn_output.weight q5_K +blk.6.attn_v.weight q5_K +blk.6.ffn_down.weight q5_K +blk.7.attn_output.weight q5_K +blk.7.attn_v.weight q5_K +blk.7.ffn_down.weight q5_K +blk.8.attn_output.weight q5_K +blk.8.attn_v.weight q5_K +blk.8.ffn_down.weight q5_K +blk.9.attn_output.weight q5_K +blk.9.attn_v.weight q5_K +blk.9.ffn_down.weight q5_K +blk.10.attn_output.weight q5_K +blk.10.attn_v.weight q5_K +blk.10.ffn_down.weight q5_K +blk.11.attn_output.weight q5_K +blk.11.attn_v.weight q5_K +blk.11.ffn_down.weight q5_K +blk.12.attn_output.weight q5_K +blk.12.attn_v.weight q5_K +blk.12.ffn_down.weight q5_K +blk.13.attn_output.weight q5_K +blk.13.attn_v.weight q5_K +blk.13.ffn_down.weight q5_K +blk.14.attn_output.weight q5_K +blk.14.attn_v.weight q5_K +blk.14.ffn_down.weight q5_K +blk.15.attn_output.weight q5_K +blk.15.attn_v.weight q5_K +blk.15.ffn_down.weight q5_K +blk.16.attn_output.weight q5_K +blk.16.attn_v.weight q5_K +blk.16.ffn_down.weight q5_K +blk.17.attn_output.weight q5_K +blk.17.attn_v.weight q5_K +blk.17.ffn_down.weight q5_K +blk.18.attn_output.weight q5_K +blk.18.attn_v.weight q5_K +blk.18.ffn_down.weight q5_K +blk.19.attn_output.weight q5_K +blk.19.attn_v.weight q5_K +blk.19.ffn_down.weight q5_K +blk.20.attn_output.weight q5_K +blk.20.attn_v.weight q5_K +blk.20.ffn_down.weight q5_K +blk.21.attn_output.weight q5_K +blk.21.attn_v.weight q5_K +blk.21.ffn_down.weight q5_K +blk.22.attn_output.weight q5_K +blk.22.attn_v.weight q5_K +blk.22.ffn_down.weight q5_K +blk.23.attn_output.weight q5_K +blk.23.attn_v.weight q5_K +blk.23.ffn_down.weight q5_K +blk.24.attn_output.weight q5_K +blk.24.attn_v.weight q5_K +blk.24.ffn_down.weight q5_K +blk.25.attn_output.weight q5_K +blk.25.attn_v.weight q5_K +blk.25.ffn_down.weight q5_K +blk.26.attn_output.weight q5_K +blk.26.attn_v.weight q5_K +blk.26.ffn_down.weight q5_K +blk.27.attn_output.weight q5_K +blk.27.attn_v.weight q5_K +blk.27.ffn_down.weight q5_K +blk.28.attn_output.weight q5_K +blk.28.attn_v.weight q5_K +blk.28.ffn_down.weight q5_K +blk.29.attn_output.weight q5_K +blk.29.attn_v.weight q5_K +blk.29.ffn_down.weight q5_K +blk.30.attn_output.weight q5_K +blk.30.attn_v.weight q5_K +blk.30.ffn_down.weight q5_K +blk.31.attn_output.weight q5_K +blk.31.attn_v.weight q5_K +blk.31.ffn_down.weight q5_K +blk.32.attn_output.weight q5_K +blk.32.attn_v.weight q5_K +blk.32.ffn_down.weight q5_K +blk.33.attn_output.weight q5_K +blk.33.attn_v.weight q5_K +blk.33.ffn_down.weight q5_K +blk.34.attn_output.weight q5_K +blk.34.attn_v.weight q5_K +blk.34.ffn_down.weight q5_K +blk.35.attn_output.weight q5_K +blk.35.attn_v.weight q5_K +blk.35.ffn_down.weight q5_K +blk.36.attn_output.weight q5_K +blk.36.attn_v.weight q5_K +blk.36.ffn_down.weight q5_K +blk.37.attn_output.weight q5_K +blk.37.attn_v.weight q5_K +blk.37.ffn_down.weight q5_K +blk.38.attn_output.weight q5_K +blk.38.attn_v.weight q5_K +blk.38.ffn_down.weight q5_K +blk.39.attn_output.weight q5_K +blk.39.attn_v.weight q5_K +blk.39.ffn_down.weight q5_K + +[Q4_K_S] q4_K +output.weight q6_K +blk.0.attn_v.weight q5_K +blk.0.ffn_down.weight q5_K +blk.1.attn_v.weight q5_K +blk.1.ffn_down.weight q5_K +blk.2.attn_v.weight q5_K +blk.2.ffn_down.weight q5_K +blk.3.attn_v.weight q5_K +blk.3.ffn_down.weight q5_K +blk.4.ffn_down.weight q5_K + +[Q4_K_M] q4_K +output.weight q6_K +blk.0.attn_v.weight q6_K +blk.0.ffn_down.weight q6_K +blk.1.attn_v.weight q6_K +blk.1.ffn_down.weight q6_K +blk.2.attn_v.weight q6_K +blk.2.ffn_down.weight q6_K +blk.3.attn_v.weight q6_K +blk.3.ffn_down.weight q6_K +blk.4.attn_v.weight q6_K +blk.4.ffn_down.weight q6_K +blk.7.attn_v.weight q6_K +blk.7.ffn_down.weight q6_K +blk.10.attn_v.weight q6_K +blk.10.ffn_down.weight q6_K +blk.13.attn_v.weight q6_K +blk.13.ffn_down.weight q6_K +blk.16.attn_v.weight q6_K +blk.16.ffn_down.weight q6_K +blk.19.attn_v.weight q6_K +blk.19.ffn_down.weight q6_K +blk.22.attn_v.weight q6_K +blk.22.ffn_down.weight q6_K +blk.25.attn_v.weight q6_K +blk.25.ffn_down.weight q6_K +blk.28.attn_v.weight q6_K +blk.28.ffn_down.weight q6_K +blk.31.attn_v.weight q6_K +blk.31.ffn_down.weight q6_K +blk.34.attn_v.weight q6_K +blk.34.ffn_down.weight q6_K +blk.35.attn_v.weight q6_K +blk.35.ffn_down.weight q6_K +blk.36.attn_v.weight q6_K +blk.36.ffn_down.weight q6_K +blk.37.attn_v.weight q6_K +blk.37.ffn_down.weight q6_K +blk.38.attn_v.weight q6_K +blk.38.ffn_down.weight q6_K +blk.39.attn_v.weight q6_K +blk.39.ffn_down.weight q6_K + +[Q5_K_S] q5_K +output.weight q6_K + +[Q5_K_M] q5_K +output.weight q6_K +blk.0.attn_v.weight q6_K +blk.0.ffn_down.weight q6_K +blk.1.attn_v.weight q6_K +blk.1.ffn_down.weight q6_K +blk.2.attn_v.weight q6_K +blk.2.ffn_down.weight q6_K +blk.3.attn_v.weight q6_K +blk.3.ffn_down.weight q6_K +blk.4.attn_v.weight q6_K +blk.4.ffn_down.weight q6_K +blk.7.attn_v.weight q6_K +blk.7.ffn_down.weight q6_K +blk.10.attn_v.weight q6_K +blk.10.ffn_down.weight q6_K +blk.13.attn_v.weight q6_K +blk.13.ffn_down.weight q6_K +blk.16.attn_v.weight q6_K +blk.16.ffn_down.weight q6_K +blk.19.attn_v.weight q6_K +blk.19.ffn_down.weight q6_K +blk.22.attn_v.weight q6_K +blk.22.ffn_down.weight q6_K +blk.25.attn_v.weight q6_K +blk.25.ffn_down.weight q6_K +blk.28.attn_v.weight q6_K +blk.28.ffn_down.weight q6_K +blk.31.attn_v.weight q6_K +blk.31.ffn_down.weight q6_K +blk.34.attn_v.weight q6_K +blk.34.ffn_down.weight q6_K +blk.35.attn_v.weight q6_K +blk.35.ffn_down.weight q6_K +blk.36.attn_v.weight q6_K +blk.36.ffn_down.weight q6_K +blk.37.attn_v.weight q6_K +blk.37.ffn_down.weight q6_K +blk.38.attn_v.weight q6_K +blk.38.ffn_down.weight q6_K +blk.39.attn_v.weight q6_K +blk.39.ffn_down.weight q6_K + +[Q6_K] q6_K + +[IQ2_XXS] iq2_xxs +output.weight q5_K +token_embd.weight q2_K +blk.0.attn_v.weight q4_K +blk.0.ffn_down.weight q2_K +blk.1.attn_v.weight q4_K +blk.1.ffn_down.weight q2_K +blk.2.attn_v.weight q4_K +blk.2.ffn_down.weight q2_K +blk.3.attn_v.weight q4_K +blk.3.ffn_down.weight q2_K +blk.4.attn_v.weight q4_K +blk.4.ffn_down.weight q2_K +blk.5.attn_v.weight q4_K +blk.6.attn_v.weight q4_K +blk.7.attn_v.weight q4_K +blk.8.attn_v.weight q4_K +blk.9.attn_v.weight q4_K +blk.10.attn_v.weight q4_K +blk.11.attn_v.weight q4_K +blk.12.attn_v.weight q4_K +blk.13.attn_v.weight q4_K +blk.14.attn_v.weight q4_K +blk.15.attn_v.weight q4_K +blk.16.attn_v.weight q4_K +blk.17.attn_v.weight q4_K +blk.18.attn_v.weight q4_K +blk.19.attn_v.weight q4_K +blk.20.attn_v.weight q4_K +blk.21.attn_v.weight q4_K +blk.22.attn_v.weight q4_K +blk.23.attn_v.weight q4_K +blk.24.attn_v.weight q4_K +blk.25.attn_v.weight q4_K +blk.26.attn_v.weight q4_K +blk.27.attn_v.weight q4_K +blk.28.attn_v.weight q4_K +blk.29.attn_v.weight q4_K +blk.30.attn_v.weight q4_K +blk.31.attn_v.weight q4_K +blk.32.attn_v.weight q4_K +blk.33.attn_v.weight q4_K +blk.34.attn_v.weight q4_K +blk.35.attn_v.weight q4_K +blk.36.attn_v.weight q4_K +blk.37.attn_v.weight q4_K +blk.38.attn_v.weight q4_K +blk.39.attn_v.weight q4_K + +[IQ2_XS] iq2_xs +output.weight q5_K +token_embd.weight q2_K +blk.0.attn_v.weight q4_K +blk.0.ffn_down.weight q2_K +blk.1.attn_v.weight q4_K +blk.1.ffn_down.weight q2_K +blk.2.attn_v.weight q4_K +blk.2.ffn_down.weight q2_K +blk.3.attn_v.weight q4_K +blk.3.ffn_down.weight q2_K +blk.4.attn_v.weight q4_K +blk.4.ffn_down.weight q2_K +blk.5.attn_v.weight q4_K +blk.6.attn_v.weight q4_K +blk.7.attn_v.weight q4_K +blk.8.attn_v.weight q4_K +blk.9.attn_v.weight q4_K +blk.10.attn_v.weight q4_K +blk.11.attn_v.weight q4_K +blk.12.attn_v.weight q4_K +blk.13.attn_v.weight q4_K +blk.14.attn_v.weight q4_K +blk.15.attn_v.weight q4_K +blk.16.attn_v.weight q4_K +blk.17.attn_v.weight q4_K +blk.18.attn_v.weight q4_K +blk.19.attn_v.weight q4_K +blk.20.attn_v.weight q4_K +blk.21.attn_v.weight q4_K +blk.22.attn_v.weight q4_K +blk.23.attn_v.weight q4_K +blk.24.attn_v.weight q4_K +blk.25.attn_v.weight q4_K +blk.26.attn_v.weight q4_K +blk.27.attn_v.weight q4_K +blk.28.attn_v.weight q4_K +blk.29.attn_v.weight q4_K +blk.30.attn_v.weight q4_K +blk.31.attn_v.weight q4_K +blk.32.attn_v.weight q4_K +blk.33.attn_v.weight q4_K +blk.34.attn_v.weight q4_K +blk.35.attn_v.weight q4_K +blk.36.attn_v.weight q4_K +blk.37.attn_v.weight q4_K +blk.38.attn_v.weight q4_K +blk.39.attn_v.weight q4_K + +[Q2_K_S] q2_K +output.weight q6_K +blk.0.attn_v.weight q4_K +blk.0.ffn_down.weight q4_K +blk.1.attn_v.weight q4_K +blk.1.ffn_down.weight q4_K +blk.2.attn_v.weight q4_K +blk.2.ffn_down.weight q4_K +blk.3.attn_v.weight q4_K +blk.3.ffn_down.weight q4_K +blk.4.attn_v.weight q4_K +blk.4.ffn_down.weight q4_K +blk.5.attn_v.weight q4_K +blk.6.attn_v.weight q4_K +blk.7.attn_v.weight q4_K +blk.8.attn_v.weight q4_K +blk.9.attn_v.weight q4_K +blk.10.attn_v.weight q4_K +blk.11.attn_v.weight q4_K +blk.12.attn_v.weight q4_K +blk.13.attn_v.weight q4_K +blk.14.attn_v.weight q4_K +blk.15.attn_v.weight q4_K +blk.16.attn_v.weight q4_K +blk.17.attn_v.weight q4_K +blk.18.attn_v.weight q4_K +blk.19.attn_v.weight q4_K +blk.20.attn_v.weight q4_K +blk.21.attn_v.weight q4_K +blk.22.attn_v.weight q4_K +blk.23.attn_v.weight q4_K +blk.24.attn_v.weight q4_K +blk.25.attn_v.weight q4_K +blk.26.attn_v.weight q4_K +blk.27.attn_v.weight q4_K +blk.28.attn_v.weight q4_K +blk.29.attn_v.weight q4_K +blk.30.attn_v.weight q4_K +blk.31.attn_v.weight q4_K +blk.32.attn_v.weight q4_K +blk.33.attn_v.weight q4_K +blk.34.attn_v.weight q4_K +blk.35.attn_v.weight q4_K +blk.36.attn_v.weight q4_K +blk.37.attn_v.weight q4_K +blk.38.attn_v.weight q4_K +blk.39.attn_v.weight q4_K + +[IQ3_XS] iq3_s +output.weight q6_K +blk.0.attn_k.weight iq3_xxs +blk.0.attn_q.weight iq3_xxs +blk.0.attn_v.weight q4_K +blk.1.attn_k.weight iq3_xxs +blk.1.attn_q.weight iq3_xxs +blk.1.attn_v.weight q4_K +blk.2.attn_k.weight iq3_xxs +blk.2.attn_q.weight iq3_xxs +blk.2.attn_v.weight q4_K +blk.3.attn_k.weight iq3_xxs +blk.3.attn_q.weight iq3_xxs +blk.3.attn_v.weight q4_K +blk.4.attn_k.weight iq3_xxs +blk.4.attn_q.weight iq3_xxs +blk.4.attn_v.weight q4_K +blk.5.attn_k.weight iq3_xxs +blk.5.attn_q.weight iq3_xxs +blk.5.attn_v.weight q4_K +blk.5.ffn_gate.weight iq3_xxs +blk.5.ffn_up.weight iq3_xxs +blk.6.attn_k.weight iq3_xxs +blk.6.attn_q.weight iq3_xxs +blk.6.attn_v.weight q4_K +blk.6.ffn_gate.weight iq3_xxs +blk.6.ffn_up.weight iq3_xxs +blk.7.attn_k.weight iq3_xxs +blk.7.attn_q.weight iq3_xxs +blk.7.attn_v.weight q4_K +blk.7.ffn_gate.weight iq3_xxs +blk.7.ffn_up.weight iq3_xxs +blk.8.attn_k.weight iq3_xxs +blk.8.attn_q.weight iq3_xxs +blk.8.attn_v.weight q4_K +blk.8.ffn_gate.weight iq3_xxs +blk.8.ffn_up.weight iq3_xxs +blk.9.attn_k.weight iq3_xxs +blk.9.attn_q.weight iq3_xxs +blk.9.attn_v.weight q4_K +blk.9.ffn_gate.weight iq3_xxs +blk.9.ffn_up.weight iq3_xxs +blk.10.attn_k.weight iq3_xxs +blk.10.attn_q.weight iq3_xxs +blk.10.attn_v.weight q4_K +blk.10.ffn_gate.weight iq3_xxs +blk.10.ffn_up.weight iq3_xxs +blk.11.attn_k.weight iq3_xxs +blk.11.attn_q.weight iq3_xxs +blk.11.attn_v.weight q4_K +blk.11.ffn_gate.weight iq3_xxs +blk.11.ffn_up.weight iq3_xxs +blk.12.attn_k.weight iq3_xxs +blk.12.attn_q.weight iq3_xxs +blk.12.attn_v.weight q4_K +blk.12.ffn_gate.weight iq3_xxs +blk.12.ffn_up.weight iq3_xxs +blk.13.attn_k.weight iq3_xxs +blk.13.attn_q.weight iq3_xxs +blk.13.attn_v.weight q4_K +blk.13.ffn_gate.weight iq3_xxs +blk.13.ffn_up.weight iq3_xxs +blk.14.attn_k.weight iq3_xxs +blk.14.attn_q.weight iq3_xxs +blk.14.attn_v.weight q4_K +blk.14.ffn_gate.weight iq3_xxs +blk.14.ffn_up.weight iq3_xxs +blk.15.attn_k.weight iq3_xxs +blk.15.attn_q.weight iq3_xxs +blk.15.attn_v.weight q4_K +blk.15.ffn_gate.weight iq3_xxs +blk.15.ffn_up.weight iq3_xxs +blk.16.attn_k.weight iq3_xxs +blk.16.attn_q.weight iq3_xxs +blk.16.attn_v.weight q4_K +blk.16.ffn_gate.weight iq3_xxs +blk.16.ffn_up.weight iq3_xxs +blk.17.attn_k.weight iq3_xxs +blk.17.attn_q.weight iq3_xxs +blk.17.attn_v.weight q4_K +blk.17.ffn_gate.weight iq3_xxs +blk.17.ffn_up.weight iq3_xxs +blk.18.attn_k.weight iq3_xxs +blk.18.attn_q.weight iq3_xxs +blk.18.attn_v.weight q4_K +blk.18.ffn_gate.weight iq3_xxs +blk.18.ffn_up.weight iq3_xxs +blk.19.attn_k.weight iq3_xxs +blk.19.attn_q.weight iq3_xxs +blk.19.attn_v.weight q4_K +blk.19.ffn_gate.weight iq3_xxs +blk.19.ffn_up.weight iq3_xxs +blk.20.attn_k.weight iq3_xxs +blk.20.attn_q.weight iq3_xxs +blk.20.attn_v.weight q4_K +blk.20.ffn_gate.weight iq3_xxs +blk.20.ffn_up.weight iq3_xxs +blk.21.attn_k.weight iq3_xxs +blk.21.attn_q.weight iq3_xxs +blk.21.attn_v.weight q4_K +blk.21.ffn_gate.weight iq3_xxs +blk.21.ffn_up.weight iq3_xxs +blk.22.attn_k.weight iq3_xxs +blk.22.attn_q.weight iq3_xxs +blk.22.attn_v.weight q4_K +blk.22.ffn_gate.weight iq3_xxs +blk.22.ffn_up.weight iq3_xxs +blk.23.attn_k.weight iq3_xxs +blk.23.attn_q.weight iq3_xxs +blk.23.attn_v.weight q4_K +blk.23.ffn_gate.weight iq3_xxs +blk.23.ffn_up.weight iq3_xxs +blk.24.attn_k.weight iq3_xxs +blk.24.attn_q.weight iq3_xxs +blk.24.attn_v.weight q4_K +blk.24.ffn_gate.weight iq3_xxs +blk.24.ffn_up.weight iq3_xxs +blk.25.attn_k.weight iq3_xxs +blk.25.attn_q.weight iq3_xxs +blk.25.attn_v.weight q4_K +blk.25.ffn_gate.weight iq3_xxs +blk.25.ffn_up.weight iq3_xxs +blk.26.attn_k.weight iq3_xxs +blk.26.attn_q.weight iq3_xxs +blk.26.attn_v.weight q4_K +blk.26.ffn_gate.weight iq3_xxs +blk.26.ffn_up.weight iq3_xxs +blk.27.attn_k.weight iq3_xxs +blk.27.attn_q.weight iq3_xxs +blk.27.attn_v.weight q4_K +blk.27.ffn_gate.weight iq3_xxs +blk.27.ffn_up.weight iq3_xxs +blk.28.attn_k.weight iq3_xxs +blk.28.attn_q.weight iq3_xxs +blk.28.attn_v.weight q4_K +blk.28.ffn_gate.weight iq3_xxs +blk.28.ffn_up.weight iq3_xxs +blk.29.attn_k.weight iq3_xxs +blk.29.attn_q.weight iq3_xxs +blk.29.attn_v.weight q4_K +blk.29.ffn_gate.weight iq3_xxs +blk.29.ffn_up.weight iq3_xxs +blk.30.attn_k.weight iq3_xxs +blk.30.attn_q.weight iq3_xxs +blk.30.attn_v.weight q4_K +blk.30.ffn_gate.weight iq3_xxs +blk.30.ffn_up.weight iq3_xxs +blk.31.attn_k.weight iq3_xxs +blk.31.attn_q.weight iq3_xxs +blk.31.attn_v.weight q4_K +blk.31.ffn_gate.weight iq3_xxs +blk.31.ffn_up.weight iq3_xxs +blk.32.attn_k.weight iq3_xxs +blk.32.attn_q.weight iq3_xxs +blk.32.attn_v.weight q4_K +blk.32.ffn_gate.weight iq3_xxs +blk.32.ffn_up.weight iq3_xxs +blk.33.attn_k.weight iq3_xxs +blk.33.attn_q.weight iq3_xxs +blk.33.attn_v.weight q4_K +blk.33.ffn_gate.weight iq3_xxs +blk.33.ffn_up.weight iq3_xxs +blk.34.attn_k.weight iq3_xxs +blk.34.attn_q.weight iq3_xxs +blk.34.attn_v.weight q4_K +blk.34.ffn_gate.weight iq3_xxs +blk.34.ffn_up.weight iq3_xxs +blk.35.attn_k.weight iq3_xxs +blk.35.attn_q.weight iq3_xxs +blk.35.attn_v.weight q4_K +blk.36.attn_k.weight iq3_xxs +blk.36.attn_q.weight iq3_xxs +blk.36.attn_v.weight q4_K +blk.37.attn_k.weight iq3_xxs +blk.37.attn_q.weight iq3_xxs +blk.37.attn_v.weight q4_K +blk.38.attn_k.weight iq3_xxs +blk.38.attn_q.weight iq3_xxs +blk.38.attn_v.weight q4_K +blk.39.attn_k.weight iq3_xxs +blk.39.attn_q.weight iq3_xxs +blk.39.attn_v.weight q4_K + +[IQ3_XXS] iq3_xxs +output.weight q5_K +token_embd.weight iq3_s +blk.0.attn_k.weight iq2_s +blk.0.attn_output.weight iq3_s +blk.0.attn_q.weight iq2_s +blk.0.attn_v.weight q4_K +blk.0.ffn_down.weight q4_K +blk.1.attn_k.weight iq2_s +blk.1.attn_output.weight iq3_s +blk.1.attn_q.weight iq2_s +blk.1.attn_v.weight q4_K +blk.1.ffn_down.weight q4_K +blk.2.attn_k.weight iq2_s +blk.2.attn_output.weight iq3_s +blk.2.attn_q.weight iq2_s +blk.2.attn_v.weight q4_K +blk.2.ffn_down.weight q4_K +blk.3.attn_k.weight iq2_s +blk.3.attn_output.weight iq3_s +blk.3.attn_q.weight iq2_s +blk.3.attn_v.weight q4_K +blk.3.ffn_down.weight q4_K +blk.4.attn_k.weight iq2_s +blk.4.attn_output.weight iq3_s +blk.4.attn_q.weight iq2_s +blk.4.attn_v.weight q4_K +blk.4.ffn_down.weight q4_K +blk.5.attn_k.weight iq2_s +blk.5.attn_output.weight iq3_s +blk.5.attn_q.weight iq2_s +blk.5.attn_v.weight q4_K +blk.5.ffn_down.weight q3_K +blk.6.attn_k.weight iq2_s +blk.6.attn_output.weight iq3_s +blk.6.attn_q.weight iq2_s +blk.6.attn_v.weight q4_K +blk.6.ffn_down.weight q3_K +blk.7.attn_k.weight iq2_s +blk.7.attn_output.weight iq3_s +blk.7.attn_q.weight iq2_s +blk.7.attn_v.weight q4_K +blk.7.ffn_down.weight q3_K +blk.8.attn_k.weight iq2_s +blk.8.attn_output.weight iq3_s +blk.8.attn_q.weight iq2_s +blk.8.attn_v.weight q4_K +blk.8.ffn_down.weight q3_K +blk.9.attn_k.weight iq2_s +blk.9.attn_output.weight iq3_s +blk.9.attn_q.weight iq2_s +blk.9.attn_v.weight q4_K +blk.9.ffn_down.weight q3_K +blk.10.attn_k.weight iq2_s +blk.10.attn_output.weight iq3_s +blk.10.attn_q.weight iq2_s +blk.10.attn_v.weight q4_K +blk.10.ffn_down.weight q3_K +blk.11.attn_k.weight iq2_s +blk.11.attn_output.weight iq3_s +blk.11.attn_q.weight iq2_s +blk.11.attn_v.weight q4_K +blk.11.ffn_down.weight q3_K +blk.12.attn_k.weight iq2_s +blk.12.attn_output.weight iq3_s +blk.12.attn_q.weight iq2_s +blk.12.attn_v.weight q4_K +blk.12.ffn_down.weight q3_K +blk.13.attn_k.weight iq2_s +blk.13.attn_output.weight iq3_s +blk.13.attn_q.weight iq2_s +blk.13.attn_v.weight q4_K +blk.13.ffn_down.weight q3_K +blk.14.attn_k.weight iq2_s +blk.14.attn_output.weight iq3_s +blk.14.attn_q.weight iq2_s +blk.14.attn_v.weight q4_K +blk.14.ffn_down.weight q3_K +blk.15.attn_k.weight iq2_s +blk.15.attn_output.weight iq3_s +blk.15.attn_q.weight iq2_s +blk.15.attn_v.weight q4_K +blk.15.ffn_down.weight q3_K +blk.16.attn_k.weight iq2_s +blk.16.attn_output.weight iq3_s +blk.16.attn_q.weight iq2_s +blk.16.attn_v.weight q4_K +blk.16.ffn_down.weight q3_K +blk.17.attn_k.weight iq2_s +blk.17.attn_output.weight iq3_s +blk.17.attn_q.weight iq2_s +blk.17.attn_v.weight q4_K +blk.17.ffn_down.weight q3_K +blk.18.attn_k.weight iq2_s +blk.18.attn_output.weight iq3_s +blk.18.attn_q.weight iq2_s +blk.18.attn_v.weight q4_K +blk.18.ffn_down.weight q3_K +blk.19.attn_k.weight iq2_s +blk.19.attn_output.weight iq3_s +blk.19.attn_q.weight iq2_s +blk.19.attn_v.weight q4_K +blk.19.ffn_down.weight q3_K +blk.20.attn_k.weight iq2_s +blk.20.attn_output.weight iq3_s +blk.20.attn_q.weight iq2_s +blk.20.attn_v.weight q4_K +blk.20.ffn_down.weight q3_K +blk.21.attn_k.weight iq2_s +blk.21.attn_output.weight iq3_s +blk.21.attn_q.weight iq2_s +blk.21.attn_v.weight q4_K +blk.21.ffn_down.weight q3_K +blk.22.attn_k.weight iq2_s +blk.22.attn_output.weight iq3_s +blk.22.attn_q.weight iq2_s +blk.22.attn_v.weight q4_K +blk.22.ffn_down.weight q3_K +blk.23.attn_k.weight iq2_s +blk.23.attn_output.weight iq3_s +blk.23.attn_q.weight iq2_s +blk.23.attn_v.weight q4_K +blk.23.ffn_down.weight q3_K +blk.24.attn_k.weight iq2_s +blk.24.attn_output.weight iq3_s +blk.24.attn_q.weight iq2_s +blk.24.attn_v.weight q4_K +blk.24.ffn_down.weight q3_K +blk.25.attn_k.weight iq2_s +blk.25.attn_output.weight iq3_s +blk.25.attn_q.weight iq2_s +blk.25.attn_v.weight q4_K +blk.25.ffn_down.weight q3_K +blk.26.attn_k.weight iq2_s +blk.26.attn_output.weight iq3_s +blk.26.attn_q.weight iq2_s +blk.26.attn_v.weight q4_K +blk.26.ffn_down.weight q3_K +blk.27.attn_k.weight iq2_s +blk.27.attn_output.weight iq3_s +blk.27.attn_q.weight iq2_s +blk.27.attn_v.weight q4_K +blk.27.ffn_down.weight q3_K +blk.28.attn_k.weight iq2_s +blk.28.attn_output.weight iq3_s +blk.28.attn_q.weight iq2_s +blk.28.attn_v.weight q4_K +blk.28.ffn_down.weight q3_K +blk.29.attn_k.weight iq2_s +blk.29.attn_output.weight iq3_s +blk.29.attn_q.weight iq2_s +blk.29.attn_v.weight q4_K +blk.29.ffn_down.weight q3_K +blk.30.attn_k.weight iq2_s +blk.30.attn_output.weight iq3_s +blk.30.attn_q.weight iq2_s +blk.30.attn_v.weight q4_K +blk.30.ffn_down.weight q3_K +blk.31.attn_k.weight iq2_s +blk.31.attn_output.weight iq3_s +blk.31.attn_q.weight iq2_s +blk.31.attn_v.weight q4_K +blk.31.ffn_down.weight q3_K +blk.32.attn_k.weight iq2_s +blk.32.attn_output.weight iq3_s +blk.32.attn_q.weight iq2_s +blk.32.attn_v.weight q4_K +blk.32.ffn_down.weight q3_K +blk.33.attn_k.weight iq2_s +blk.33.attn_output.weight iq3_s +blk.33.attn_q.weight iq2_s +blk.33.attn_v.weight q4_K +blk.33.ffn_down.weight q3_K +blk.34.attn_k.weight iq2_s +blk.34.attn_output.weight iq3_s +blk.34.attn_q.weight iq2_s +blk.34.attn_v.weight q4_K +blk.34.ffn_down.weight q3_K +blk.35.attn_k.weight iq2_s +blk.35.attn_output.weight iq3_s +blk.35.attn_q.weight iq2_s +blk.35.attn_v.weight q4_K +blk.35.ffn_down.weight q3_K +blk.36.attn_k.weight iq2_s +blk.36.attn_output.weight iq3_s +blk.36.attn_q.weight iq2_s +blk.36.attn_v.weight q4_K +blk.36.ffn_down.weight q3_K +blk.37.attn_k.weight iq2_s +blk.37.attn_output.weight iq3_s +blk.37.attn_q.weight iq2_s +blk.37.attn_v.weight q4_K +blk.37.ffn_down.weight q3_K +blk.38.attn_k.weight iq2_s +blk.38.attn_output.weight iq3_s +blk.38.attn_q.weight iq2_s +blk.38.attn_v.weight q4_K +blk.38.ffn_down.weight q3_K +blk.39.attn_k.weight iq2_s +blk.39.attn_output.weight iq3_s +blk.39.attn_q.weight iq2_s +blk.39.attn_v.weight q4_K +blk.39.ffn_down.weight q3_K + +[IQ1_S] iq1_s +output.weight q5_K +token_embd.weight q2_K +blk.0.attn_output.weight iq2_xxs +blk.0.attn_v.weight q4_K +blk.0.ffn_down.weight q2_K +blk.1.attn_output.weight iq2_xxs +blk.1.attn_v.weight q4_K +blk.1.ffn_down.weight q2_K +blk.2.attn_output.weight iq2_xxs +blk.2.attn_v.weight q4_K +blk.2.ffn_down.weight q2_K +blk.3.attn_output.weight iq2_xxs +blk.3.attn_v.weight q4_K +blk.3.ffn_down.weight q2_K +blk.4.attn_output.weight iq2_xxs +blk.4.attn_v.weight q4_K +blk.4.ffn_down.weight q2_K +blk.5.attn_output.weight iq2_xxs +blk.5.attn_v.weight q4_K +blk.6.attn_output.weight iq2_xxs +blk.6.attn_v.weight q4_K +blk.7.attn_output.weight iq2_xxs +blk.7.attn_v.weight q4_K +blk.8.attn_output.weight iq2_xxs +blk.8.attn_v.weight q4_K +blk.9.attn_output.weight iq2_xxs +blk.9.attn_v.weight q4_K +blk.10.attn_output.weight iq2_xxs +blk.10.attn_v.weight q4_K +blk.11.attn_output.weight iq2_xxs +blk.11.attn_v.weight q4_K +blk.12.attn_output.weight iq2_xxs +blk.12.attn_v.weight q4_K +blk.13.attn_output.weight iq2_xxs +blk.13.attn_v.weight q4_K +blk.14.attn_output.weight iq2_xxs +blk.14.attn_v.weight q4_K +blk.15.attn_output.weight iq2_xxs +blk.15.attn_v.weight q4_K +blk.16.attn_output.weight iq2_xxs +blk.16.attn_v.weight q4_K +blk.17.attn_output.weight iq2_xxs +blk.17.attn_v.weight q4_K +blk.18.attn_output.weight iq2_xxs +blk.18.attn_v.weight q4_K +blk.19.attn_output.weight iq2_xxs +blk.19.attn_v.weight q4_K +blk.20.attn_output.weight iq2_xxs +blk.20.attn_v.weight q4_K +blk.21.attn_output.weight iq2_xxs +blk.21.attn_v.weight q4_K +blk.22.attn_output.weight iq2_xxs +blk.22.attn_v.weight q4_K +blk.23.attn_output.weight iq2_xxs +blk.23.attn_v.weight q4_K +blk.24.attn_output.weight iq2_xxs +blk.24.attn_v.weight q4_K +blk.25.attn_output.weight iq2_xxs +blk.25.attn_v.weight q4_K +blk.26.attn_output.weight iq2_xxs +blk.26.attn_v.weight q4_K +blk.27.attn_output.weight iq2_xxs +blk.27.attn_v.weight q4_K +blk.28.attn_output.weight iq2_xxs +blk.28.attn_v.weight q4_K +blk.29.attn_output.weight iq2_xxs +blk.29.attn_v.weight q4_K +blk.30.attn_output.weight iq2_xxs +blk.30.attn_v.weight q4_K +blk.31.attn_output.weight iq2_xxs +blk.31.attn_v.weight q4_K +blk.32.attn_output.weight iq2_xxs +blk.32.attn_v.weight q4_K +blk.33.attn_output.weight iq2_xxs +blk.33.attn_v.weight q4_K +blk.34.attn_output.weight iq2_xxs +blk.34.attn_v.weight q4_K +blk.35.attn_output.weight iq2_xxs +blk.35.attn_v.weight q4_K +blk.36.attn_output.weight iq2_xxs +blk.36.attn_v.weight q4_K +blk.37.attn_output.weight iq2_xxs +blk.37.attn_v.weight q4_K +blk.38.attn_output.weight iq2_xxs +blk.38.attn_v.weight q4_K +blk.39.attn_output.weight iq2_xxs +blk.39.attn_v.weight q4_K + +[IQ4_NL] iq4_nl +output.weight q6_K +blk.0.attn_v.weight q5_K +blk.0.ffn_down.weight q5_K +blk.1.attn_v.weight q5_K +blk.1.ffn_down.weight q5_K +blk.2.attn_v.weight q5_K +blk.2.ffn_down.weight q5_K +blk.3.attn_v.weight q5_K +blk.3.ffn_down.weight q5_K +blk.4.attn_v.weight q5_K +blk.4.ffn_down.weight q5_K +blk.5.attn_v.weight q5_K +blk.6.attn_v.weight q5_K +blk.7.attn_v.weight q5_K +blk.8.attn_v.weight q5_K +blk.9.attn_v.weight q5_K +blk.10.attn_v.weight q5_K +blk.11.attn_v.weight q5_K +blk.12.attn_v.weight q5_K +blk.13.attn_v.weight q5_K +blk.14.attn_v.weight q5_K +blk.15.attn_v.weight q5_K +blk.16.attn_v.weight q5_K +blk.17.attn_v.weight q5_K +blk.18.attn_v.weight q5_K +blk.19.attn_v.weight q5_K +blk.20.attn_v.weight q5_K +blk.21.attn_v.weight q5_K +blk.22.attn_v.weight q5_K +blk.23.attn_v.weight q5_K +blk.24.attn_v.weight q5_K +blk.25.attn_v.weight q5_K +blk.26.attn_v.weight q5_K +blk.27.attn_v.weight q5_K +blk.28.attn_v.weight q5_K +blk.29.attn_v.weight q5_K +blk.30.attn_v.weight q5_K +blk.31.attn_v.weight q5_K +blk.32.attn_v.weight q5_K +blk.33.attn_v.weight q5_K +blk.34.attn_v.weight q5_K +blk.35.attn_v.weight q5_K +blk.36.attn_v.weight q5_K +blk.37.attn_v.weight q5_K +blk.38.attn_v.weight q5_K +blk.39.attn_v.weight q5_K + +[IQ3_S] iq3_s +output.weight q6_K +blk.0.attn_v.weight q4_K +blk.1.attn_v.weight q4_K +blk.2.attn_v.weight q4_K +blk.3.attn_v.weight q4_K +blk.4.attn_v.weight q4_K +blk.5.attn_v.weight q4_K +blk.6.attn_v.weight q4_K +blk.7.attn_v.weight q4_K +blk.8.attn_v.weight q4_K +blk.9.attn_v.weight q4_K +blk.10.attn_v.weight q4_K +blk.11.attn_v.weight q4_K +blk.12.attn_v.weight q4_K +blk.13.attn_v.weight q4_K +blk.14.attn_v.weight q4_K +blk.15.attn_v.weight q4_K +blk.16.attn_v.weight q4_K +blk.17.attn_v.weight q4_K +blk.18.attn_v.weight q4_K +blk.19.attn_v.weight q4_K +blk.20.attn_v.weight q4_K +blk.21.attn_v.weight q4_K +blk.22.attn_v.weight q4_K +blk.23.attn_v.weight q4_K +blk.24.attn_v.weight q4_K +blk.25.attn_v.weight q4_K +blk.26.attn_v.weight q4_K +blk.27.attn_v.weight q4_K +blk.28.attn_v.weight q4_K +blk.29.attn_v.weight q4_K +blk.30.attn_v.weight q4_K +blk.31.attn_v.weight q4_K +blk.32.attn_v.weight q4_K +blk.33.attn_v.weight q4_K +blk.34.attn_v.weight q4_K +blk.35.attn_v.weight q4_K +blk.36.attn_v.weight q4_K +blk.37.attn_v.weight q4_K +blk.38.attn_v.weight q4_K +blk.39.attn_v.weight q4_K + +[IQ3_M] iq3_s +output.weight q6_K +blk.0.attn_output.weight q4_K +blk.0.attn_v.weight q4_K +blk.0.ffn_down.weight q4_K +blk.1.attn_output.weight q4_K +blk.1.attn_v.weight q4_K +blk.1.ffn_down.weight q4_K +blk.2.attn_output.weight q4_K +blk.2.attn_v.weight q4_K +blk.2.ffn_down.weight q4_K +blk.3.attn_output.weight q4_K +blk.3.attn_v.weight q4_K +blk.3.ffn_down.weight q4_K +blk.4.attn_output.weight q4_K +blk.4.attn_v.weight q4_K +blk.4.ffn_down.weight q4_K +blk.5.attn_output.weight q4_K +blk.5.attn_v.weight q4_K +blk.6.attn_output.weight q4_K +blk.6.attn_v.weight q4_K +blk.7.attn_output.weight q4_K +blk.7.attn_v.weight q4_K +blk.8.attn_output.weight q4_K +blk.8.attn_v.weight q4_K +blk.9.attn_output.weight q4_K +blk.9.attn_v.weight q4_K +blk.10.attn_output.weight q4_K +blk.10.attn_v.weight q4_K +blk.11.attn_output.weight q4_K +blk.11.attn_v.weight q4_K +blk.12.attn_output.weight q4_K +blk.12.attn_v.weight q4_K +blk.13.attn_output.weight q4_K +blk.13.attn_v.weight q4_K +blk.14.attn_output.weight q4_K +blk.14.attn_v.weight q4_K +blk.15.attn_output.weight q4_K +blk.15.attn_v.weight q4_K +blk.16.attn_output.weight q4_K +blk.16.attn_v.weight q4_K +blk.17.attn_output.weight q4_K +blk.17.attn_v.weight q4_K +blk.18.attn_output.weight q4_K +blk.18.attn_v.weight q4_K +blk.19.attn_output.weight q4_K +blk.19.attn_v.weight q4_K +blk.20.attn_output.weight q4_K +blk.20.attn_v.weight q4_K +blk.21.attn_output.weight q4_K +blk.21.attn_v.weight q4_K +blk.22.attn_output.weight q4_K +blk.22.attn_v.weight q4_K +blk.23.attn_output.weight q4_K +blk.23.attn_v.weight q4_K +blk.24.attn_output.weight q4_K +blk.24.attn_v.weight q4_K +blk.25.attn_output.weight q4_K +blk.25.attn_v.weight q4_K +blk.26.attn_output.weight q4_K +blk.26.attn_v.weight q4_K +blk.27.attn_output.weight q4_K +blk.27.attn_v.weight q4_K +blk.28.attn_output.weight q4_K +blk.28.attn_v.weight q4_K +blk.29.attn_output.weight q4_K +blk.29.attn_v.weight q4_K +blk.30.attn_output.weight q4_K +blk.30.attn_v.weight q4_K +blk.31.attn_output.weight q4_K +blk.31.attn_v.weight q4_K +blk.32.attn_output.weight q4_K +blk.32.attn_v.weight q4_K +blk.33.attn_output.weight q4_K +blk.33.attn_v.weight q4_K +blk.34.attn_output.weight q4_K +blk.34.attn_v.weight q4_K +blk.35.attn_output.weight q4_K +blk.35.attn_v.weight q4_K +blk.36.attn_output.weight q4_K +blk.36.attn_v.weight q4_K +blk.37.attn_output.weight q4_K +blk.37.attn_v.weight q4_K +blk.38.attn_output.weight q4_K +blk.38.attn_v.weight q4_K +blk.39.attn_output.weight q4_K +blk.39.attn_v.weight q4_K + +[IQ2_S] iq2_xs +output.weight q5_K +token_embd.weight iq3_s +blk.0.attn_output.weight iq3_s +blk.0.attn_v.weight q4_K +blk.0.ffn_down.weight iq3_s +blk.1.attn_output.weight iq3_s +blk.1.attn_v.weight q4_K +blk.1.ffn_down.weight iq3_s +blk.2.attn_output.weight iq3_s +blk.2.attn_v.weight q4_K +blk.2.ffn_down.weight iq3_s +blk.3.attn_output.weight iq3_s +blk.3.attn_v.weight q4_K +blk.3.ffn_down.weight iq3_s +blk.4.attn_output.weight iq3_s +blk.4.attn_v.weight q4_K +blk.4.ffn_down.weight iq3_s +blk.5.attn_output.weight iq3_s +blk.5.attn_v.weight q4_K +blk.6.attn_output.weight iq3_s +blk.6.attn_v.weight q4_K +blk.7.attn_output.weight iq3_s +blk.7.attn_v.weight q4_K +blk.8.attn_output.weight iq3_s +blk.8.attn_v.weight q4_K +blk.9.attn_output.weight iq3_s +blk.9.attn_v.weight q4_K +blk.10.attn_output.weight iq3_s +blk.10.attn_v.weight q4_K +blk.11.attn_output.weight iq3_s +blk.11.attn_v.weight q4_K +blk.12.attn_output.weight iq3_s +blk.12.attn_v.weight q4_K +blk.13.attn_output.weight iq3_s +blk.13.attn_v.weight q4_K +blk.14.attn_output.weight iq3_s +blk.14.attn_v.weight q4_K +blk.15.attn_output.weight iq3_s +blk.15.attn_v.weight q4_K +blk.16.attn_output.weight iq3_s +blk.16.attn_v.weight q4_K +blk.17.attn_output.weight iq3_s +blk.17.attn_v.weight q4_K +blk.18.attn_output.weight iq3_s +blk.18.attn_v.weight q4_K +blk.19.attn_output.weight iq3_s +blk.19.attn_v.weight q4_K +blk.20.attn_output.weight iq3_s +blk.20.attn_v.weight q4_K +blk.21.attn_output.weight iq3_s +blk.21.attn_v.weight q4_K +blk.22.attn_output.weight iq3_s +blk.22.attn_v.weight q4_K +blk.23.attn_output.weight iq3_s +blk.23.attn_v.weight q4_K +blk.24.attn_output.weight iq3_s +blk.24.attn_v.weight q4_K +blk.25.attn_output.weight iq3_s +blk.25.attn_v.weight q4_K +blk.26.attn_output.weight iq3_s +blk.26.attn_v.weight q4_K +blk.27.attn_output.weight iq3_s +blk.27.attn_v.weight q4_K +blk.28.attn_output.weight iq3_s +blk.28.attn_v.weight q4_K +blk.29.attn_output.weight iq3_s +blk.29.attn_v.weight q4_K +blk.30.attn_output.weight iq3_s +blk.30.attn_v.weight q4_K +blk.31.attn_output.weight iq3_s +blk.31.attn_v.weight q4_K +blk.32.attn_output.weight iq3_s +blk.32.attn_v.weight q4_K +blk.33.attn_output.weight iq3_s +blk.33.attn_v.weight q4_K +blk.34.attn_output.weight iq3_s +blk.34.attn_v.weight q4_K +blk.35.attn_output.weight iq3_s +blk.35.attn_v.weight q4_K +blk.36.attn_output.weight iq3_s +blk.36.attn_v.weight q4_K +blk.37.attn_output.weight iq3_s +blk.37.attn_v.weight q4_K +blk.38.attn_output.weight iq3_s +blk.38.attn_v.weight q4_K +blk.39.attn_output.weight iq3_s +blk.39.attn_v.weight q4_K + +[IQ2_M] iq2_s +output.weight q5_K +token_embd.weight iq3_s +blk.0.attn_output.weight iq3_s +blk.0.attn_v.weight q4_K +blk.0.ffn_down.weight iq3_s +blk.1.attn_output.weight iq3_s +blk.1.attn_v.weight q4_K +blk.1.ffn_down.weight iq3_s +blk.2.attn_output.weight iq3_s +blk.2.attn_v.weight q4_K +blk.2.ffn_down.weight iq3_s +blk.3.attn_output.weight iq3_s +blk.3.attn_v.weight q4_K +blk.3.ffn_down.weight iq3_s +blk.4.attn_output.weight iq3_s +blk.4.attn_v.weight q4_K +blk.4.ffn_down.weight iq3_s +blk.5.attn_output.weight iq3_s +blk.5.attn_v.weight q4_K +blk.6.attn_output.weight iq3_s +blk.6.attn_v.weight q4_K +blk.7.attn_output.weight iq3_s +blk.7.attn_v.weight q4_K +blk.8.attn_output.weight iq3_s +blk.8.attn_v.weight q4_K +blk.9.attn_output.weight iq3_s +blk.9.attn_v.weight q4_K +blk.10.attn_output.weight iq3_s +blk.10.attn_v.weight q4_K +blk.11.attn_output.weight iq3_s +blk.11.attn_v.weight q4_K +blk.12.attn_output.weight iq3_s +blk.12.attn_v.weight q4_K +blk.13.attn_output.weight iq3_s +blk.13.attn_v.weight q4_K +blk.14.attn_output.weight iq3_s +blk.14.attn_v.weight q4_K +blk.15.attn_output.weight iq3_s +blk.15.attn_v.weight q4_K +blk.16.attn_output.weight iq3_s +blk.16.attn_v.weight q4_K +blk.17.attn_output.weight iq3_s +blk.17.attn_v.weight q4_K +blk.18.attn_output.weight iq3_s +blk.18.attn_v.weight q4_K +blk.19.attn_output.weight iq3_s +blk.19.attn_v.weight q4_K +blk.20.attn_output.weight iq3_s +blk.20.attn_v.weight q4_K +blk.21.attn_output.weight iq3_s +blk.21.attn_v.weight q4_K +blk.22.attn_output.weight iq3_s +blk.22.attn_v.weight q4_K +blk.23.attn_output.weight iq3_s +blk.23.attn_v.weight q4_K +blk.24.attn_output.weight iq3_s +blk.24.attn_v.weight q4_K +blk.25.attn_output.weight iq3_s +blk.25.attn_v.weight q4_K +blk.26.attn_output.weight iq3_s +blk.26.attn_v.weight q4_K +blk.27.attn_output.weight iq3_s +blk.27.attn_v.weight q4_K +blk.28.attn_output.weight iq3_s +blk.28.attn_v.weight q4_K +blk.29.attn_output.weight iq3_s +blk.29.attn_v.weight q4_K +blk.30.attn_output.weight iq3_s +blk.30.attn_v.weight q4_K +blk.31.attn_output.weight iq3_s +blk.31.attn_v.weight q4_K +blk.32.attn_output.weight iq3_s +blk.32.attn_v.weight q4_K +blk.33.attn_output.weight iq3_s +blk.33.attn_v.weight q4_K +blk.34.attn_output.weight iq3_s +blk.34.attn_v.weight q4_K +blk.35.attn_output.weight iq3_s +blk.35.attn_v.weight q4_K +blk.36.attn_output.weight iq3_s +blk.36.attn_v.weight q4_K +blk.37.attn_output.weight iq3_s +blk.37.attn_v.weight q4_K +blk.38.attn_output.weight iq3_s +blk.38.attn_v.weight q4_K +blk.39.attn_output.weight iq3_s +blk.39.attn_v.weight q4_K + +[IQ4_XS] iq4_xs +output.weight q6_K +blk.0.attn_v.weight q5_K +blk.0.ffn_down.weight q5_K +blk.1.attn_v.weight q5_K +blk.1.ffn_down.weight q5_K +blk.2.attn_v.weight q5_K +blk.2.ffn_down.weight q5_K +blk.3.attn_v.weight q5_K +blk.3.ffn_down.weight q5_K +blk.4.attn_v.weight q5_K +blk.4.ffn_down.weight q5_K +blk.5.attn_v.weight q5_K +blk.6.attn_v.weight q5_K +blk.7.attn_v.weight q5_K +blk.8.attn_v.weight q5_K +blk.9.attn_v.weight q5_K +blk.10.attn_v.weight q5_K +blk.11.attn_v.weight q5_K +blk.12.attn_v.weight q5_K +blk.13.attn_v.weight q5_K +blk.14.attn_v.weight q5_K +blk.15.attn_v.weight q5_K +blk.16.attn_v.weight q5_K +blk.17.attn_v.weight q5_K +blk.18.attn_v.weight q5_K +blk.19.attn_v.weight q5_K +blk.20.attn_v.weight q5_K +blk.21.attn_v.weight q5_K +blk.22.attn_v.weight q5_K +blk.23.attn_v.weight q5_K +blk.24.attn_v.weight q5_K +blk.25.attn_v.weight q5_K +blk.26.attn_v.weight q5_K +blk.27.attn_v.weight q5_K +blk.28.attn_v.weight q5_K +blk.29.attn_v.weight q5_K +blk.30.attn_v.weight q5_K +blk.31.attn_v.weight q5_K +blk.32.attn_v.weight q5_K +blk.33.attn_v.weight q5_K +blk.34.attn_v.weight q5_K +blk.35.attn_v.weight q5_K +blk.36.attn_v.weight q5_K +blk.37.attn_v.weight q5_K +blk.38.attn_v.weight q5_K +blk.39.attn_v.weight q5_K + +[IQ1_M] iq1_m +output.weight q5_K +token_embd.weight q2_K +blk.0.attn_output.weight iq2_xxs +blk.0.attn_v.weight q4_K +blk.0.ffn_down.weight q2_K +blk.1.attn_output.weight iq2_xxs +blk.1.attn_v.weight q4_K +blk.1.ffn_down.weight q2_K +blk.2.attn_output.weight iq2_xxs +blk.2.attn_v.weight q4_K +blk.2.ffn_down.weight q2_K +blk.3.attn_output.weight iq2_xxs +blk.3.attn_v.weight q4_K +blk.3.ffn_down.weight q2_K +blk.4.attn_output.weight iq2_xxs +blk.4.attn_v.weight q4_K +blk.4.ffn_down.weight q2_K +blk.5.attn_output.weight iq2_xxs +blk.5.attn_v.weight q4_K +blk.6.attn_output.weight iq2_xxs +blk.6.attn_v.weight q4_K +blk.7.attn_output.weight iq2_xxs +blk.7.attn_v.weight q4_K +blk.8.attn_output.weight iq2_xxs +blk.8.attn_v.weight q4_K +blk.9.attn_output.weight iq2_xxs +blk.9.attn_v.weight q4_K +blk.10.attn_output.weight iq2_xxs +blk.10.attn_v.weight q4_K +blk.11.attn_output.weight iq2_xxs +blk.11.attn_v.weight q4_K +blk.12.attn_output.weight iq2_xxs +blk.12.attn_v.weight q4_K +blk.13.attn_output.weight iq2_xxs +blk.13.attn_v.weight q4_K +blk.14.attn_output.weight iq2_xxs +blk.14.attn_v.weight q4_K +blk.15.attn_output.weight iq2_xxs +blk.15.attn_v.weight q4_K +blk.16.attn_output.weight iq2_xxs +blk.16.attn_v.weight q4_K +blk.17.attn_output.weight iq2_xxs +blk.17.attn_v.weight q4_K +blk.18.attn_output.weight iq2_xxs +blk.18.attn_v.weight q4_K +blk.19.attn_output.weight iq2_xxs +blk.19.attn_v.weight q4_K +blk.20.attn_output.weight iq2_xxs +blk.20.attn_v.weight q4_K +blk.21.attn_output.weight iq2_xxs +blk.21.attn_v.weight q4_K +blk.22.attn_output.weight iq2_xxs +blk.22.attn_v.weight q4_K +blk.23.attn_output.weight iq2_xxs +blk.23.attn_v.weight q4_K +blk.24.attn_output.weight iq2_xxs +blk.24.attn_v.weight q4_K +blk.25.attn_output.weight iq2_xxs +blk.25.attn_v.weight q4_K +blk.26.attn_output.weight iq2_xxs +blk.26.attn_v.weight q4_K +blk.27.attn_output.weight iq2_xxs +blk.27.attn_v.weight q4_K +blk.28.attn_output.weight iq2_xxs +blk.28.attn_v.weight q4_K +blk.29.attn_output.weight iq2_xxs +blk.29.attn_v.weight q4_K +blk.30.attn_output.weight iq2_xxs +blk.30.attn_v.weight q4_K +blk.31.attn_output.weight iq2_xxs +blk.31.attn_v.weight q4_K +blk.32.attn_output.weight iq2_xxs +blk.32.attn_v.weight q4_K +blk.33.attn_output.weight iq2_xxs +blk.33.attn_v.weight q4_K +blk.34.attn_output.weight iq2_xxs +blk.34.attn_v.weight q4_K +blk.35.attn_output.weight iq2_xxs +blk.35.attn_v.weight q4_K +blk.36.attn_output.weight iq2_xxs +blk.36.attn_v.weight q4_K +blk.37.attn_output.weight iq2_xxs +blk.37.attn_v.weight q4_K +blk.38.attn_output.weight iq2_xxs +blk.38.attn_v.weight q4_K +blk.39.attn_output.weight iq2_xxs +blk.39.attn_v.weight q4_K + +[BF16] bf16 +output.weight q6_K + +[TQ1_0] tq1_0 +output.weight q6_K +token_embd.weight q4_K + +[TQ2_0] tq2_0 +output.weight q6_K +token_embd.weight q4_K + +[MXFP4_MOE] mxfp4 +output.weight q8_0 +token_embd.weight q8_0 +blk.0.attn_k.weight q8_0 +blk.0.attn_output.weight q8_0 +blk.0.attn_q.weight q8_0 +blk.0.attn_v.weight q8_0 +blk.0.ffn_down.weight q8_0 +blk.0.ffn_gate.weight q8_0 +blk.0.ffn_up.weight q8_0 +blk.1.attn_k.weight q8_0 +blk.1.attn_output.weight q8_0 +blk.1.attn_q.weight q8_0 +blk.1.attn_v.weight q8_0 +blk.1.ffn_down.weight q8_0 +blk.1.ffn_gate.weight q8_0 +blk.1.ffn_up.weight q8_0 +blk.2.attn_k.weight q8_0 +blk.2.attn_output.weight q8_0 +blk.2.attn_q.weight q8_0 +blk.2.attn_v.weight q8_0 +blk.2.ffn_down.weight q8_0 +blk.2.ffn_gate.weight q8_0 +blk.2.ffn_up.weight q8_0 +blk.3.attn_k.weight q8_0 +blk.3.attn_output.weight q8_0 +blk.3.attn_q.weight q8_0 +blk.3.attn_v.weight q8_0 +blk.3.ffn_down.weight q8_0 +blk.3.ffn_gate.weight q8_0 +blk.3.ffn_up.weight q8_0 +blk.4.attn_k.weight q8_0 +blk.4.attn_output.weight q8_0 +blk.4.attn_q.weight q8_0 +blk.4.attn_v.weight q8_0 +blk.4.ffn_down.weight q8_0 +blk.4.ffn_gate.weight q8_0 +blk.4.ffn_up.weight q8_0 +blk.5.attn_k.weight q8_0 +blk.5.attn_output.weight q8_0 +blk.5.attn_q.weight q8_0 +blk.5.attn_v.weight q8_0 +blk.5.ffn_down.weight q8_0 +blk.5.ffn_gate.weight q8_0 +blk.5.ffn_up.weight q8_0 +blk.6.attn_k.weight q8_0 +blk.6.attn_output.weight q8_0 +blk.6.attn_q.weight q8_0 +blk.6.attn_v.weight q8_0 +blk.6.ffn_down.weight q8_0 +blk.6.ffn_gate.weight q8_0 +blk.6.ffn_up.weight q8_0 +blk.7.attn_k.weight q8_0 +blk.7.attn_output.weight q8_0 +blk.7.attn_q.weight q8_0 +blk.7.attn_v.weight q8_0 +blk.7.ffn_down.weight q8_0 +blk.7.ffn_gate.weight q8_0 +blk.7.ffn_up.weight q8_0 +blk.8.attn_k.weight q8_0 +blk.8.attn_output.weight q8_0 +blk.8.attn_q.weight q8_0 +blk.8.attn_v.weight q8_0 +blk.8.ffn_down.weight q8_0 +blk.8.ffn_gate.weight q8_0 +blk.8.ffn_up.weight q8_0 +blk.9.attn_k.weight q8_0 +blk.9.attn_output.weight q8_0 +blk.9.attn_q.weight q8_0 +blk.9.attn_v.weight q8_0 +blk.9.ffn_down.weight q8_0 +blk.9.ffn_gate.weight q8_0 +blk.9.ffn_up.weight q8_0 +blk.10.attn_k.weight q8_0 +blk.10.attn_output.weight q8_0 +blk.10.attn_q.weight q8_0 +blk.10.attn_v.weight q8_0 +blk.10.ffn_down.weight q8_0 +blk.10.ffn_gate.weight q8_0 +blk.10.ffn_up.weight q8_0 +blk.11.attn_k.weight q8_0 +blk.11.attn_output.weight q8_0 +blk.11.attn_q.weight q8_0 +blk.11.attn_v.weight q8_0 +blk.11.ffn_down.weight q8_0 +blk.11.ffn_gate.weight q8_0 +blk.11.ffn_up.weight q8_0 +blk.12.attn_k.weight q8_0 +blk.12.attn_output.weight q8_0 +blk.12.attn_q.weight q8_0 +blk.12.attn_v.weight q8_0 +blk.12.ffn_down.weight q8_0 +blk.12.ffn_gate.weight q8_0 +blk.12.ffn_up.weight q8_0 +blk.13.attn_k.weight q8_0 +blk.13.attn_output.weight q8_0 +blk.13.attn_q.weight q8_0 +blk.13.attn_v.weight q8_0 +blk.13.ffn_down.weight q8_0 +blk.13.ffn_gate.weight q8_0 +blk.13.ffn_up.weight q8_0 +blk.14.attn_k.weight q8_0 +blk.14.attn_output.weight q8_0 +blk.14.attn_q.weight q8_0 +blk.14.attn_v.weight q8_0 +blk.14.ffn_down.weight q8_0 +blk.14.ffn_gate.weight q8_0 +blk.14.ffn_up.weight q8_0 +blk.15.attn_k.weight q8_0 +blk.15.attn_output.weight q8_0 +blk.15.attn_q.weight q8_0 +blk.15.attn_v.weight q8_0 +blk.15.ffn_down.weight q8_0 +blk.15.ffn_gate.weight q8_0 +blk.15.ffn_up.weight q8_0 +blk.16.attn_k.weight q8_0 +blk.16.attn_output.weight q8_0 +blk.16.attn_q.weight q8_0 +blk.16.attn_v.weight q8_0 +blk.16.ffn_down.weight q8_0 +blk.16.ffn_gate.weight q8_0 +blk.16.ffn_up.weight q8_0 +blk.17.attn_k.weight q8_0 +blk.17.attn_output.weight q8_0 +blk.17.attn_q.weight q8_0 +blk.17.attn_v.weight q8_0 +blk.17.ffn_down.weight q8_0 +blk.17.ffn_gate.weight q8_0 +blk.17.ffn_up.weight q8_0 +blk.18.attn_k.weight q8_0 +blk.18.attn_output.weight q8_0 +blk.18.attn_q.weight q8_0 +blk.18.attn_v.weight q8_0 +blk.18.ffn_down.weight q8_0 +blk.18.ffn_gate.weight q8_0 +blk.18.ffn_up.weight q8_0 +blk.19.attn_k.weight q8_0 +blk.19.attn_output.weight q8_0 +blk.19.attn_q.weight q8_0 +blk.19.attn_v.weight q8_0 +blk.19.ffn_down.weight q8_0 +blk.19.ffn_gate.weight q8_0 +blk.19.ffn_up.weight q8_0 +blk.20.attn_k.weight q8_0 +blk.20.attn_output.weight q8_0 +blk.20.attn_q.weight q8_0 +blk.20.attn_v.weight q8_0 +blk.20.ffn_down.weight q8_0 +blk.20.ffn_gate.weight q8_0 +blk.20.ffn_up.weight q8_0 +blk.21.attn_k.weight q8_0 +blk.21.attn_output.weight q8_0 +blk.21.attn_q.weight q8_0 +blk.21.attn_v.weight q8_0 +blk.21.ffn_down.weight q8_0 +blk.21.ffn_gate.weight q8_0 +blk.21.ffn_up.weight q8_0 +blk.22.attn_k.weight q8_0 +blk.22.attn_output.weight q8_0 +blk.22.attn_q.weight q8_0 +blk.22.attn_v.weight q8_0 +blk.22.ffn_down.weight q8_0 +blk.22.ffn_gate.weight q8_0 +blk.22.ffn_up.weight q8_0 +blk.23.attn_k.weight q8_0 +blk.23.attn_output.weight q8_0 +blk.23.attn_q.weight q8_0 +blk.23.attn_v.weight q8_0 +blk.23.ffn_down.weight q8_0 +blk.23.ffn_gate.weight q8_0 +blk.23.ffn_up.weight q8_0 +blk.24.attn_k.weight q8_0 +blk.24.attn_output.weight q8_0 +blk.24.attn_q.weight q8_0 +blk.24.attn_v.weight q8_0 +blk.24.ffn_down.weight q8_0 +blk.24.ffn_gate.weight q8_0 +blk.24.ffn_up.weight q8_0 +blk.25.attn_k.weight q8_0 +blk.25.attn_output.weight q8_0 +blk.25.attn_q.weight q8_0 +blk.25.attn_v.weight q8_0 +blk.25.ffn_down.weight q8_0 +blk.25.ffn_gate.weight q8_0 +blk.25.ffn_up.weight q8_0 +blk.26.attn_k.weight q8_0 +blk.26.attn_output.weight q8_0 +blk.26.attn_q.weight q8_0 +blk.26.attn_v.weight q8_0 +blk.26.ffn_down.weight q8_0 +blk.26.ffn_gate.weight q8_0 +blk.26.ffn_up.weight q8_0 +blk.27.attn_k.weight q8_0 +blk.27.attn_output.weight q8_0 +blk.27.attn_q.weight q8_0 +blk.27.attn_v.weight q8_0 +blk.27.ffn_down.weight q8_0 +blk.27.ffn_gate.weight q8_0 +blk.27.ffn_up.weight q8_0 +blk.28.attn_k.weight q8_0 +blk.28.attn_output.weight q8_0 +blk.28.attn_q.weight q8_0 +blk.28.attn_v.weight q8_0 +blk.28.ffn_down.weight q8_0 +blk.28.ffn_gate.weight q8_0 +blk.28.ffn_up.weight q8_0 +blk.29.attn_k.weight q8_0 +blk.29.attn_output.weight q8_0 +blk.29.attn_q.weight q8_0 +blk.29.attn_v.weight q8_0 +blk.29.ffn_down.weight q8_0 +blk.29.ffn_gate.weight q8_0 +blk.29.ffn_up.weight q8_0 +blk.30.attn_k.weight q8_0 +blk.30.attn_output.weight q8_0 +blk.30.attn_q.weight q8_0 +blk.30.attn_v.weight q8_0 +blk.30.ffn_down.weight q8_0 +blk.30.ffn_gate.weight q8_0 +blk.30.ffn_up.weight q8_0 +blk.31.attn_k.weight q8_0 +blk.31.attn_output.weight q8_0 +blk.31.attn_q.weight q8_0 +blk.31.attn_v.weight q8_0 +blk.31.ffn_down.weight q8_0 +blk.31.ffn_gate.weight q8_0 +blk.31.ffn_up.weight q8_0 +blk.32.attn_k.weight q8_0 +blk.32.attn_output.weight q8_0 +blk.32.attn_q.weight q8_0 +blk.32.attn_v.weight q8_0 +blk.32.ffn_down.weight q8_0 +blk.32.ffn_gate.weight q8_0 +blk.32.ffn_up.weight q8_0 +blk.33.attn_k.weight q8_0 +blk.33.attn_output.weight q8_0 +blk.33.attn_q.weight q8_0 +blk.33.attn_v.weight q8_0 +blk.33.ffn_down.weight q8_0 +blk.33.ffn_gate.weight q8_0 +blk.33.ffn_up.weight q8_0 +blk.34.attn_k.weight q8_0 +blk.34.attn_output.weight q8_0 +blk.34.attn_q.weight q8_0 +blk.34.attn_v.weight q8_0 +blk.34.ffn_down.weight q8_0 +blk.34.ffn_gate.weight q8_0 +blk.34.ffn_up.weight q8_0 +blk.35.attn_k.weight q8_0 +blk.35.attn_output.weight q8_0 +blk.35.attn_q.weight q8_0 +blk.35.attn_v.weight q8_0 +blk.35.ffn_down.weight q8_0 +blk.35.ffn_gate.weight q8_0 +blk.35.ffn_up.weight q8_0 +blk.36.attn_k.weight q8_0 +blk.36.attn_output.weight q8_0 +blk.36.attn_q.weight q8_0 +blk.36.attn_v.weight q8_0 +blk.36.ffn_down.weight q8_0 +blk.36.ffn_gate.weight q8_0 +blk.36.ffn_up.weight q8_0 +blk.37.attn_k.weight q8_0 +blk.37.attn_output.weight q8_0 +blk.37.attn_q.weight q8_0 +blk.37.attn_v.weight q8_0 +blk.37.ffn_down.weight q8_0 +blk.37.ffn_gate.weight q8_0 +blk.37.ffn_up.weight q8_0 +blk.38.attn_k.weight q8_0 +blk.38.attn_output.weight q8_0 +blk.38.attn_q.weight q8_0 +blk.38.attn_v.weight q8_0 +blk.38.ffn_down.weight q8_0 +blk.38.ffn_gate.weight q8_0 +blk.38.ffn_up.weight q8_0 +blk.39.attn_k.weight q8_0 +blk.39.attn_output.weight q8_0 +blk.39.attn_q.weight q8_0 +blk.39.attn_v.weight q8_0 +blk.39.ffn_down.weight q8_0 +blk.39.ffn_gate.weight q8_0 +blk.39.ffn_up.weight q8_0 diff --git a/tests/snapshots/qwen3-coder-next.schema b/tests/snapshots/qwen3-coder-next.schema new file mode 100644 index 0000000000..d71fe86a1f --- /dev/null +++ b/tests/snapshots/qwen3-coder-next.schema @@ -0,0 +1,1713 @@ +# Model: Qwen3-Coder-Next +# n_embd=2048, n_ff=5120, n_vocab=151936, n_layer=48, n_head=16, n_head_kv=2, n_expert=512 + +[F32] f32 +output.weight q6_K + +[F16] f16 +output.weight q6_K + +[Q4_0] q4_0 +output.weight q6_K + +[Q4_1] q4_1 +output.weight q6_K + +[Q8_0] q8_0 + +[Q5_0] q5_0 +output.weight q6_K + +[Q5_1] q5_1 +output.weight q6_K + +[Q2_K] q2_K +blk.0.ffn_down_exps.weight q3_K +blk.0.ffn_down_shexp.weight q3_K +blk.1.ffn_down_shexp.weight q3_K +blk.1.ffn_down_exps.weight q3_K +blk.2.ffn_down_shexp.weight q3_K +blk.2.ffn_down_exps.weight q3_K +blk.3.ffn_down_shexp.weight q3_K +blk.3.attn_output.weight q3_K +blk.3.attn_v.weight q4_K +blk.3.ffn_down_exps.weight q3_K +blk.4.ffn_down_shexp.weight q3_K +blk.4.ffn_down_exps.weight q3_K +blk.5.ffn_down_shexp.weight q3_K +blk.5.ffn_down_exps.weight q3_K +blk.6.ffn_down_exps.weight q3_K +blk.6.ffn_down_shexp.weight q3_K +blk.7.ffn_down_shexp.weight q3_K +blk.7.attn_output.weight q3_K +blk.7.attn_v.weight q4_K +blk.7.ffn_down_exps.weight q3_K +blk.8.ffn_down_shexp.weight q3_K +blk.8.ffn_down_exps.weight q3_K +blk.9.ffn_down_shexp.weight q3_K +blk.10.ffn_down_shexp.weight q3_K +blk.9.ffn_down_exps.weight q3_K +blk.10.ffn_down_exps.weight q3_K +blk.11.ffn_down_shexp.weight q3_K +blk.11.attn_output.weight q3_K +blk.11.attn_v.weight q4_K +blk.11.ffn_down_exps.weight q3_K +blk.12.ffn_down_exps.weight q3_K +blk.12.ffn_down_shexp.weight q3_K +blk.13.ffn_down_shexp.weight q3_K +blk.13.ffn_down_exps.weight q3_K +blk.14.ffn_down_shexp.weight q3_K +blk.14.ffn_down_exps.weight q3_K +blk.15.ffn_down_shexp.weight q3_K +blk.15.attn_output.weight q3_K +blk.15.attn_v.weight q4_K +blk.15.ffn_down_exps.weight q3_K +blk.16.ffn_down_shexp.weight q3_K +blk.16.ffn_down_exps.weight q3_K +blk.17.ffn_down_exps.weight q3_K +blk.17.ffn_down_shexp.weight q3_K +blk.18.ffn_down_shexp.weight q3_K +blk.18.ffn_down_exps.weight q3_K +blk.19.ffn_down_shexp.weight q3_K +blk.19.attn_output.weight q3_K +blk.19.attn_v.weight q4_K +blk.19.ffn_down_exps.weight q3_K +blk.20.ffn_down_shexp.weight q3_K +blk.20.ffn_down_exps.weight q3_K +blk.21.ffn_down_shexp.weight q3_K +blk.21.ffn_down_exps.weight q3_K +blk.22.ffn_down_shexp.weight q3_K +blk.22.ffn_down_exps.weight q3_K +blk.23.ffn_down_exps.weight q3_K +blk.23.ffn_down_shexp.weight q3_K +blk.23.attn_output.weight q3_K +blk.23.attn_v.weight q4_K +blk.24.ffn_down_shexp.weight q3_K +blk.24.ffn_down_exps.weight q3_K +blk.25.ffn_down_shexp.weight q3_K +blk.25.ffn_down_exps.weight q3_K +blk.26.ffn_down_shexp.weight q3_K +blk.26.ffn_down_exps.weight q3_K +blk.27.ffn_down_shexp.weight q3_K +blk.27.attn_output.weight q3_K +blk.27.attn_v.weight q4_K +blk.27.ffn_down_exps.weight q3_K +blk.28.ffn_down_shexp.weight q3_K +blk.28.ffn_down_exps.weight q3_K +blk.29.ffn_down_exps.weight q3_K +blk.29.ffn_down_shexp.weight q3_K +blk.30.ffn_down_shexp.weight q3_K +blk.30.ffn_down_exps.weight q3_K +blk.31.ffn_down_shexp.weight q3_K +blk.31.attn_output.weight q3_K +blk.31.attn_v.weight q4_K +blk.31.ffn_down_exps.weight q3_K +blk.32.ffn_down_shexp.weight q3_K +blk.32.ffn_down_exps.weight q3_K +blk.33.ffn_down_shexp.weight q3_K +blk.33.ffn_down_exps.weight q3_K +blk.34.ffn_down_exps.weight q3_K +blk.34.ffn_down_shexp.weight q3_K +blk.35.ffn_down_shexp.weight q3_K +blk.35.attn_output.weight q3_K +blk.35.attn_v.weight q4_K +blk.35.ffn_down_exps.weight q3_K +blk.36.ffn_down_shexp.weight q3_K +blk.36.ffn_down_exps.weight q3_K +blk.37.ffn_down_shexp.weight q3_K +blk.37.ffn_down_exps.weight q3_K +blk.38.ffn_down_shexp.weight q3_K +blk.38.ffn_down_exps.weight q3_K +blk.39.ffn_down_shexp.weight q3_K +blk.39.attn_output.weight q3_K +blk.39.attn_v.weight q4_K +blk.39.ffn_down_exps.weight q3_K +blk.40.ffn_down_exps.weight q3_K +blk.40.ffn_down_shexp.weight q3_K +blk.41.ffn_down_shexp.weight q3_K +blk.41.ffn_down_exps.weight q3_K +blk.42.ffn_down_shexp.weight q3_K +blk.42.ffn_down_exps.weight q3_K +blk.43.ffn_down_shexp.weight q3_K +blk.43.attn_output.weight q3_K +blk.43.attn_v.weight q4_K +blk.43.ffn_down_exps.weight q3_K +blk.44.ffn_down_shexp.weight q3_K +blk.44.ffn_down_exps.weight q3_K +blk.45.ffn_down_shexp.weight q3_K +blk.45.ffn_down_exps.weight q3_K +blk.46.ffn_down_exps.weight q3_K +blk.46.ffn_down_shexp.weight q3_K +blk.47.ffn_down_shexp.weight q3_K +blk.47.attn_output.weight q3_K +blk.47.attn_v.weight q4_K +output.weight q6_K +blk.47.ffn_down_exps.weight q3_K + +[Q3_K_S] q3_K +output.weight q6_K + +[Q3_K_M] q3_K +blk.0.attn_qkv.weight q4_K +blk.0.ffn_down_exps.weight q5_K +blk.0.ffn_down_shexp.weight q5_K +blk.1.attn_qkv.weight q4_K +blk.1.ffn_down_shexp.weight q5_K +blk.1.ffn_down_exps.weight q5_K +blk.2.attn_qkv.weight q4_K +blk.2.ffn_down_shexp.weight q5_K +blk.2.ffn_down_exps.weight q5_K +blk.3.ffn_down_shexp.weight q4_K +blk.3.attn_output.weight q4_K +blk.3.attn_v.weight q5_K +blk.3.ffn_down_exps.weight q4_K +blk.4.attn_qkv.weight q4_K +blk.4.ffn_down_shexp.weight q4_K +blk.4.ffn_down_exps.weight q4_K +blk.5.attn_qkv.weight q4_K +blk.5.ffn_down_shexp.weight q4_K +blk.5.ffn_down_exps.weight q4_K +blk.6.attn_qkv.weight q4_K +blk.6.ffn_down_exps.weight q4_K +blk.6.ffn_down_shexp.weight q4_K +blk.7.ffn_down_shexp.weight q4_K +blk.7.attn_output.weight q4_K +blk.7.attn_v.weight q5_K +blk.7.ffn_down_exps.weight q4_K +blk.8.attn_qkv.weight q4_K +blk.8.ffn_down_shexp.weight q4_K +blk.8.ffn_down_exps.weight q4_K +blk.9.attn_qkv.weight q4_K +blk.9.ffn_down_shexp.weight q4_K +blk.10.attn_qkv.weight q4_K +blk.10.ffn_down_shexp.weight q4_K +blk.9.ffn_down_exps.weight q4_K +blk.10.ffn_down_exps.weight q4_K +blk.11.ffn_down_shexp.weight q4_K +blk.11.attn_output.weight q4_K +blk.11.attn_v.weight q4_K +blk.11.ffn_down_exps.weight q4_K +blk.12.attn_qkv.weight q4_K +blk.12.ffn_down_exps.weight q4_K +blk.12.ffn_down_shexp.weight q4_K +blk.13.attn_qkv.weight q4_K +blk.13.ffn_down_shexp.weight q4_K +blk.13.ffn_down_exps.weight q4_K +blk.14.attn_qkv.weight q4_K +blk.14.ffn_down_shexp.weight q4_K +blk.14.ffn_down_exps.weight q4_K +blk.15.ffn_down_shexp.weight q4_K +blk.15.attn_output.weight q4_K +blk.15.attn_v.weight q4_K +blk.15.ffn_down_exps.weight q4_K +blk.16.attn_qkv.weight q4_K +blk.16.ffn_down_shexp.weight q4_K +blk.16.ffn_down_exps.weight q4_K +blk.17.attn_qkv.weight q4_K +blk.17.ffn_down_exps.weight q4_K +blk.17.ffn_down_shexp.weight q4_K +blk.18.attn_qkv.weight q4_K +blk.18.ffn_down_shexp.weight q4_K +blk.18.ffn_down_exps.weight q4_K +blk.19.ffn_down_shexp.weight q4_K +blk.19.attn_output.weight q4_K +blk.19.attn_v.weight q4_K +blk.19.ffn_down_exps.weight q4_K +blk.20.attn_qkv.weight q4_K +blk.20.ffn_down_shexp.weight q4_K +blk.20.ffn_down_exps.weight q4_K +blk.21.attn_qkv.weight q4_K +blk.21.ffn_down_shexp.weight q4_K +blk.21.ffn_down_exps.weight q4_K +blk.22.attn_qkv.weight q4_K +blk.22.ffn_down_shexp.weight q4_K +blk.22.ffn_down_exps.weight q4_K +blk.23.ffn_down_exps.weight q4_K +blk.23.ffn_down_shexp.weight q4_K +blk.23.attn_output.weight q4_K +blk.23.attn_v.weight q4_K +blk.24.attn_qkv.weight q4_K +blk.24.ffn_down_shexp.weight q4_K +blk.24.ffn_down_exps.weight q4_K +blk.25.attn_qkv.weight q4_K +blk.25.ffn_down_shexp.weight q4_K +blk.25.ffn_down_exps.weight q4_K +blk.26.attn_qkv.weight q4_K +blk.26.ffn_down_shexp.weight q4_K +blk.26.ffn_down_exps.weight q4_K +blk.27.ffn_down_shexp.weight q4_K +blk.27.attn_output.weight q4_K +blk.27.attn_v.weight q4_K +blk.27.ffn_down_exps.weight q4_K +blk.28.attn_qkv.weight q4_K +blk.28.ffn_down_shexp.weight q4_K +blk.28.ffn_down_exps.weight q4_K +blk.29.attn_qkv.weight q4_K +blk.29.ffn_down_exps.weight q4_K +blk.29.ffn_down_shexp.weight q4_K +blk.30.attn_qkv.weight q4_K +blk.30.ffn_down_shexp.weight q4_K +blk.30.ffn_down_exps.weight q4_K +blk.31.ffn_down_shexp.weight q4_K +blk.31.attn_output.weight q4_K +blk.31.attn_v.weight q4_K +blk.31.ffn_down_exps.weight q4_K +blk.32.attn_qkv.weight q4_K +blk.32.ffn_down_shexp.weight q4_K +blk.32.ffn_down_exps.weight q4_K +blk.33.attn_qkv.weight q4_K +blk.33.ffn_down_shexp.weight q4_K +blk.33.ffn_down_exps.weight q4_K +blk.34.attn_qkv.weight q4_K +blk.34.ffn_down_exps.weight q4_K +blk.34.ffn_down_shexp.weight q4_K +blk.35.ffn_down_shexp.weight q4_K +blk.35.attn_output.weight q4_K +blk.35.attn_v.weight q4_K +blk.35.ffn_down_exps.weight q4_K +blk.36.attn_qkv.weight q4_K +blk.36.ffn_down_shexp.weight q4_K +blk.36.ffn_down_exps.weight q4_K +blk.37.attn_qkv.weight q4_K +blk.37.ffn_down_shexp.weight q4_K +blk.37.ffn_down_exps.weight q4_K +blk.38.attn_qkv.weight q4_K +blk.38.ffn_down_shexp.weight q4_K +blk.38.ffn_down_exps.weight q4_K +blk.39.ffn_down_shexp.weight q4_K +blk.39.attn_output.weight q4_K +blk.39.attn_v.weight q4_K +blk.39.ffn_down_exps.weight q4_K +blk.40.attn_qkv.weight q4_K +blk.40.ffn_down_exps.weight q4_K +blk.40.ffn_down_shexp.weight q4_K +blk.41.attn_qkv.weight q4_K +blk.41.ffn_down_shexp.weight q4_K +blk.41.ffn_down_exps.weight q4_K +blk.42.attn_qkv.weight q4_K +blk.42.ffn_down_shexp.weight q4_K +blk.42.ffn_down_exps.weight q4_K +blk.43.ffn_down_shexp.weight q4_K +blk.43.attn_output.weight q4_K +blk.43.attn_v.weight q4_K +blk.43.ffn_down_exps.weight q4_K +blk.44.attn_qkv.weight q4_K +blk.44.ffn_down_shexp.weight q4_K +blk.44.ffn_down_exps.weight q4_K +blk.45.attn_qkv.weight q4_K +blk.45.ffn_down_shexp.weight q4_K +blk.45.ffn_down_exps.weight q4_K +blk.46.attn_qkv.weight q4_K +blk.46.ffn_down_exps.weight q4_K +blk.46.ffn_down_shexp.weight q4_K +blk.47.ffn_down_shexp.weight q4_K +blk.47.attn_output.weight q4_K +blk.47.attn_v.weight q4_K +output.weight q6_K +blk.47.ffn_down_exps.weight q4_K + +[Q3_K_L] q3_K +blk.0.attn_qkv.weight q4_K +blk.0.ffn_down_exps.weight q5_K +blk.0.ffn_down_shexp.weight q5_K +blk.1.attn_qkv.weight q4_K +blk.1.ffn_down_shexp.weight q5_K +blk.1.ffn_down_exps.weight q5_K +blk.2.attn_qkv.weight q4_K +blk.2.ffn_down_shexp.weight q5_K +blk.2.ffn_down_exps.weight q5_K +blk.3.ffn_down_shexp.weight q5_K +blk.3.attn_output.weight q5_K +blk.3.attn_v.weight q5_K +blk.3.ffn_down_exps.weight q5_K +blk.4.attn_qkv.weight q4_K +blk.4.ffn_down_shexp.weight q5_K +blk.4.ffn_down_exps.weight q5_K +blk.5.attn_qkv.weight q4_K +blk.5.ffn_down_shexp.weight q5_K +blk.5.ffn_down_exps.weight q5_K +blk.6.attn_qkv.weight q4_K +blk.6.ffn_down_exps.weight q5_K +blk.6.ffn_down_shexp.weight q5_K +blk.7.ffn_down_shexp.weight q5_K +blk.7.attn_output.weight q5_K +blk.7.attn_v.weight q5_K +blk.7.ffn_down_exps.weight q5_K +blk.8.attn_qkv.weight q4_K +blk.8.ffn_down_shexp.weight q5_K +blk.8.ffn_down_exps.weight q5_K +blk.9.attn_qkv.weight q4_K +blk.9.ffn_down_shexp.weight q5_K +blk.10.attn_qkv.weight q4_K +blk.10.ffn_down_shexp.weight q5_K +blk.9.ffn_down_exps.weight q5_K +blk.10.ffn_down_exps.weight q5_K +blk.11.ffn_down_shexp.weight q5_K +blk.11.attn_output.weight q5_K +blk.11.attn_v.weight q5_K +blk.11.ffn_down_exps.weight q5_K +blk.12.attn_qkv.weight q4_K +blk.12.ffn_down_exps.weight q5_K +blk.12.ffn_down_shexp.weight q5_K +blk.13.attn_qkv.weight q4_K +blk.13.ffn_down_shexp.weight q5_K +blk.13.ffn_down_exps.weight q5_K +blk.14.attn_qkv.weight q4_K +blk.14.ffn_down_shexp.weight q5_K +blk.14.ffn_down_exps.weight q5_K +blk.15.ffn_down_shexp.weight q5_K +blk.15.attn_output.weight q5_K +blk.15.attn_v.weight q5_K +blk.15.ffn_down_exps.weight q5_K +blk.16.attn_qkv.weight q4_K +blk.16.ffn_down_shexp.weight q5_K +blk.16.ffn_down_exps.weight q5_K +blk.17.attn_qkv.weight q4_K +blk.17.ffn_down_exps.weight q5_K +blk.17.ffn_down_shexp.weight q5_K +blk.18.attn_qkv.weight q4_K +blk.18.ffn_down_shexp.weight q5_K +blk.18.ffn_down_exps.weight q5_K +blk.19.ffn_down_shexp.weight q5_K +blk.19.attn_output.weight q5_K +blk.19.attn_v.weight q5_K +blk.19.ffn_down_exps.weight q5_K +blk.20.attn_qkv.weight q4_K +blk.20.ffn_down_shexp.weight q5_K +blk.20.ffn_down_exps.weight q5_K +blk.21.attn_qkv.weight q4_K +blk.21.ffn_down_shexp.weight q5_K +blk.21.ffn_down_exps.weight q5_K +blk.22.attn_qkv.weight q4_K +blk.22.ffn_down_shexp.weight q5_K +blk.22.ffn_down_exps.weight q5_K +blk.23.ffn_down_exps.weight q5_K +blk.23.ffn_down_shexp.weight q5_K +blk.23.attn_output.weight q5_K +blk.23.attn_v.weight q5_K +blk.24.attn_qkv.weight q4_K +blk.24.ffn_down_shexp.weight q5_K +blk.24.ffn_down_exps.weight q5_K +blk.25.attn_qkv.weight q4_K +blk.25.ffn_down_shexp.weight q5_K +blk.25.ffn_down_exps.weight q5_K +blk.26.attn_qkv.weight q4_K +blk.26.ffn_down_shexp.weight q5_K +blk.26.ffn_down_exps.weight q5_K +blk.27.ffn_down_shexp.weight q5_K +blk.27.attn_output.weight q5_K +blk.27.attn_v.weight q5_K +blk.27.ffn_down_exps.weight q5_K +blk.28.attn_qkv.weight q4_K +blk.28.ffn_down_shexp.weight q5_K +blk.28.ffn_down_exps.weight q5_K +blk.29.attn_qkv.weight q4_K +blk.29.ffn_down_exps.weight q5_K +blk.29.ffn_down_shexp.weight q5_K +blk.30.attn_qkv.weight q4_K +blk.30.ffn_down_shexp.weight q5_K +blk.30.ffn_down_exps.weight q5_K +blk.31.ffn_down_shexp.weight q5_K +blk.31.attn_output.weight q5_K +blk.31.attn_v.weight q5_K +blk.31.ffn_down_exps.weight q5_K +blk.32.attn_qkv.weight q4_K +blk.32.ffn_down_shexp.weight q5_K +blk.32.ffn_down_exps.weight q5_K +blk.33.attn_qkv.weight q4_K +blk.33.ffn_down_shexp.weight q5_K +blk.33.ffn_down_exps.weight q5_K +blk.34.attn_qkv.weight q4_K +blk.34.ffn_down_exps.weight q5_K +blk.34.ffn_down_shexp.weight q5_K +blk.35.ffn_down_shexp.weight q5_K +blk.35.attn_output.weight q5_K +blk.35.attn_v.weight q5_K +blk.35.ffn_down_exps.weight q5_K +blk.36.attn_qkv.weight q4_K +blk.36.ffn_down_shexp.weight q5_K +blk.36.ffn_down_exps.weight q5_K +blk.37.attn_qkv.weight q4_K +blk.37.ffn_down_shexp.weight q5_K +blk.37.ffn_down_exps.weight q5_K +blk.38.attn_qkv.weight q4_K +blk.38.ffn_down_shexp.weight q5_K +blk.38.ffn_down_exps.weight q5_K +blk.39.ffn_down_shexp.weight q5_K +blk.39.attn_output.weight q5_K +blk.39.attn_v.weight q5_K +blk.39.ffn_down_exps.weight q5_K +blk.40.attn_qkv.weight q4_K +blk.40.ffn_down_exps.weight q5_K +blk.40.ffn_down_shexp.weight q5_K +blk.41.attn_qkv.weight q4_K +blk.41.ffn_down_shexp.weight q5_K +blk.41.ffn_down_exps.weight q5_K +blk.42.attn_qkv.weight q4_K +blk.42.ffn_down_shexp.weight q5_K +blk.42.ffn_down_exps.weight q5_K +blk.43.ffn_down_shexp.weight q5_K +blk.43.attn_output.weight q5_K +blk.43.attn_v.weight q5_K +blk.43.ffn_down_exps.weight q5_K +blk.44.attn_qkv.weight q4_K +blk.44.ffn_down_shexp.weight q5_K +blk.44.ffn_down_exps.weight q5_K +blk.45.attn_qkv.weight q4_K +blk.45.ffn_down_shexp.weight q5_K +blk.45.ffn_down_exps.weight q5_K +blk.46.attn_qkv.weight q4_K +blk.46.ffn_down_exps.weight q5_K +blk.46.ffn_down_shexp.weight q5_K +blk.47.ffn_down_shexp.weight q5_K +blk.47.attn_output.weight q5_K +blk.47.attn_v.weight q5_K +output.weight q6_K +blk.47.ffn_down_exps.weight q5_K + +[Q4_K_S] q4_K +blk.0.ffn_down_exps.weight q5_K +blk.0.ffn_down_shexp.weight q5_K +blk.1.ffn_down_shexp.weight q5_K +blk.1.ffn_down_exps.weight q5_K +blk.2.ffn_down_shexp.weight q5_K +blk.2.ffn_down_exps.weight q5_K +blk.3.ffn_down_shexp.weight q5_K +blk.3.attn_v.weight q5_K +blk.3.ffn_down_exps.weight q5_K +blk.4.ffn_down_shexp.weight q5_K +blk.4.ffn_down_exps.weight q5_K +blk.5.ffn_down_shexp.weight q5_K +blk.5.ffn_down_exps.weight q5_K +blk.7.attn_v.weight q5_K +blk.11.attn_v.weight q5_K +blk.15.attn_v.weight q5_K +output.weight q6_K + +[Q4_K_M] q4_K +blk.0.attn_qkv.weight q5_K +blk.0.ffn_down_exps.weight q6_K +blk.0.ffn_down_shexp.weight q6_K +blk.1.attn_qkv.weight q5_K +blk.1.ffn_down_shexp.weight q6_K +blk.1.ffn_down_exps.weight q6_K +blk.2.attn_qkv.weight q5_K +blk.2.ffn_down_shexp.weight q6_K +blk.2.ffn_down_exps.weight q6_K +blk.3.ffn_down_shexp.weight q6_K +blk.3.attn_v.weight q6_K +blk.3.ffn_down_exps.weight q6_K +blk.4.attn_qkv.weight q5_K +blk.4.ffn_down_shexp.weight q6_K +blk.4.ffn_down_exps.weight q6_K +blk.5.attn_qkv.weight q5_K +blk.5.ffn_down_shexp.weight q6_K +blk.5.ffn_down_exps.weight q6_K +blk.6.attn_qkv.weight q5_K +blk.7.attn_v.weight q6_K +blk.8.attn_qkv.weight q5_K +blk.8.ffn_down_shexp.weight q6_K +blk.8.ffn_down_exps.weight q6_K +blk.9.attn_qkv.weight q5_K +blk.10.attn_qkv.weight q5_K +blk.11.ffn_down_shexp.weight q6_K +blk.11.attn_v.weight q6_K +blk.11.ffn_down_exps.weight q6_K +blk.12.attn_qkv.weight q5_K +blk.13.attn_qkv.weight q5_K +blk.14.attn_qkv.weight q5_K +blk.14.ffn_down_shexp.weight q6_K +blk.14.ffn_down_exps.weight q6_K +blk.15.attn_v.weight q6_K +blk.16.attn_qkv.weight q5_K +blk.17.attn_qkv.weight q5_K +blk.17.ffn_down_exps.weight q6_K +blk.17.ffn_down_shexp.weight q6_K +blk.18.attn_qkv.weight q5_K +blk.19.attn_v.weight q6_K +blk.20.attn_qkv.weight q5_K +blk.20.ffn_down_shexp.weight q6_K +blk.20.ffn_down_exps.weight q6_K +blk.21.attn_qkv.weight q5_K +blk.22.attn_qkv.weight q5_K +blk.23.ffn_down_exps.weight q6_K +blk.23.ffn_down_shexp.weight q6_K +blk.23.attn_v.weight q6_K +blk.24.attn_qkv.weight q5_K +blk.25.attn_qkv.weight q5_K +blk.26.attn_qkv.weight q5_K +blk.26.ffn_down_shexp.weight q6_K +blk.26.ffn_down_exps.weight q6_K +blk.28.attn_qkv.weight q5_K +blk.29.attn_qkv.weight q5_K +blk.29.ffn_down_exps.weight q6_K +blk.29.ffn_down_shexp.weight q6_K +blk.30.attn_qkv.weight q5_K +blk.32.attn_qkv.weight q5_K +blk.32.ffn_down_shexp.weight q6_K +blk.32.ffn_down_exps.weight q6_K +blk.33.attn_qkv.weight q5_K +blk.34.attn_qkv.weight q5_K +blk.35.ffn_down_shexp.weight q6_K +blk.35.attn_v.weight q6_K +blk.35.ffn_down_exps.weight q6_K +blk.36.attn_qkv.weight q5_K +blk.37.attn_qkv.weight q5_K +blk.38.attn_qkv.weight q5_K +blk.38.ffn_down_shexp.weight q6_K +blk.38.ffn_down_exps.weight q6_K +blk.40.attn_qkv.weight q5_K +blk.41.attn_qkv.weight q5_K +blk.41.ffn_down_shexp.weight q6_K +blk.41.ffn_down_exps.weight q6_K +blk.42.attn_qkv.weight q5_K +blk.42.ffn_down_shexp.weight q6_K +blk.42.ffn_down_exps.weight q6_K +blk.43.ffn_down_shexp.weight q6_K +blk.43.ffn_down_exps.weight q6_K +blk.44.attn_qkv.weight q5_K +blk.44.ffn_down_shexp.weight q6_K +blk.44.ffn_down_exps.weight q6_K +blk.45.attn_qkv.weight q5_K +blk.45.ffn_down_shexp.weight q6_K +blk.45.ffn_down_exps.weight q6_K +blk.46.attn_qkv.weight q5_K +blk.46.ffn_down_exps.weight q6_K +blk.46.ffn_down_shexp.weight q6_K +blk.47.ffn_down_shexp.weight q6_K +blk.47.attn_v.weight q6_K +output.weight q6_K +blk.47.ffn_down_exps.weight q6_K + +[Q5_K_S] q5_K +output.weight q6_K + +[Q5_K_M] q5_K +blk.0.attn_qkv.weight q6_K +blk.0.ffn_down_exps.weight q6_K +blk.0.ffn_down_shexp.weight q6_K +blk.1.attn_qkv.weight q6_K +blk.1.ffn_down_shexp.weight q6_K +blk.1.ffn_down_exps.weight q6_K +blk.2.attn_qkv.weight q6_K +blk.2.ffn_down_shexp.weight q6_K +blk.2.ffn_down_exps.weight q6_K +blk.3.ffn_down_shexp.weight q6_K +blk.3.attn_v.weight q6_K +blk.3.ffn_down_exps.weight q6_K +blk.4.attn_qkv.weight q6_K +blk.4.ffn_down_shexp.weight q6_K +blk.4.ffn_down_exps.weight q6_K +blk.5.attn_qkv.weight q6_K +blk.5.ffn_down_shexp.weight q6_K +blk.5.ffn_down_exps.weight q6_K +blk.6.attn_qkv.weight q6_K +blk.7.attn_v.weight q6_K +blk.8.attn_qkv.weight q6_K +blk.8.ffn_down_shexp.weight q6_K +blk.8.ffn_down_exps.weight q6_K +blk.9.attn_qkv.weight q6_K +blk.10.attn_qkv.weight q6_K +blk.11.ffn_down_shexp.weight q6_K +blk.11.attn_v.weight q6_K +blk.11.ffn_down_exps.weight q6_K +blk.12.attn_qkv.weight q6_K +blk.13.attn_qkv.weight q6_K +blk.14.attn_qkv.weight q6_K +blk.14.ffn_down_shexp.weight q6_K +blk.14.ffn_down_exps.weight q6_K +blk.15.attn_v.weight q6_K +blk.16.attn_qkv.weight q6_K +blk.17.attn_qkv.weight q6_K +blk.17.ffn_down_exps.weight q6_K +blk.17.ffn_down_shexp.weight q6_K +blk.18.attn_qkv.weight q6_K +blk.19.attn_v.weight q6_K +blk.20.attn_qkv.weight q6_K +blk.20.ffn_down_shexp.weight q6_K +blk.20.ffn_down_exps.weight q6_K +blk.21.attn_qkv.weight q6_K +blk.22.attn_qkv.weight q6_K +blk.23.ffn_down_exps.weight q6_K +blk.23.ffn_down_shexp.weight q6_K +blk.23.attn_v.weight q6_K +blk.24.attn_qkv.weight q6_K +blk.25.attn_qkv.weight q6_K +blk.26.attn_qkv.weight q6_K +blk.26.ffn_down_shexp.weight q6_K +blk.26.ffn_down_exps.weight q6_K +blk.28.attn_qkv.weight q6_K +blk.29.attn_qkv.weight q6_K +blk.29.ffn_down_exps.weight q6_K +blk.29.ffn_down_shexp.weight q6_K +blk.30.attn_qkv.weight q6_K +blk.32.attn_qkv.weight q6_K +blk.32.ffn_down_shexp.weight q6_K +blk.32.ffn_down_exps.weight q6_K +blk.33.attn_qkv.weight q6_K +blk.34.attn_qkv.weight q6_K +blk.35.ffn_down_shexp.weight q6_K +blk.35.attn_v.weight q6_K +blk.35.ffn_down_exps.weight q6_K +blk.36.attn_qkv.weight q6_K +blk.37.attn_qkv.weight q6_K +blk.38.attn_qkv.weight q6_K +blk.38.ffn_down_shexp.weight q6_K +blk.38.ffn_down_exps.weight q6_K +blk.40.attn_qkv.weight q6_K +blk.41.attn_qkv.weight q6_K +blk.41.ffn_down_shexp.weight q6_K +blk.41.ffn_down_exps.weight q6_K +blk.42.attn_qkv.weight q6_K +blk.42.ffn_down_shexp.weight q6_K +blk.42.ffn_down_exps.weight q6_K +blk.43.ffn_down_shexp.weight q6_K +blk.43.ffn_down_exps.weight q6_K +blk.44.attn_qkv.weight q6_K +blk.44.ffn_down_shexp.weight q6_K +blk.44.ffn_down_exps.weight q6_K +blk.45.attn_qkv.weight q6_K +blk.45.ffn_down_shexp.weight q6_K +blk.45.ffn_down_exps.weight q6_K +blk.46.attn_qkv.weight q6_K +blk.46.ffn_down_exps.weight q6_K +blk.46.ffn_down_shexp.weight q6_K +blk.47.ffn_down_shexp.weight q6_K +blk.47.attn_v.weight q6_K +output.weight q6_K +blk.47.ffn_down_exps.weight q6_K + +[Q6_K] q6_K + +[IQ2_XXS] iq2_xxs +token_embd.weight q2_K +blk.0.ffn_down_exps.weight q2_K +blk.0.ffn_down_shexp.weight q2_K +blk.1.ffn_down_shexp.weight q2_K +blk.1.ffn_down_exps.weight q2_K +blk.2.ffn_down_shexp.weight q2_K +blk.2.ffn_down_exps.weight q2_K +blk.3.attn_v.weight q4_K +blk.7.attn_v.weight q4_K +blk.11.attn_v.weight q4_K +blk.15.attn_v.weight q4_K +blk.19.attn_v.weight q4_K +blk.23.attn_v.weight q4_K +blk.27.attn_v.weight q4_K +blk.31.attn_v.weight q4_K +blk.35.attn_v.weight q4_K +blk.39.attn_v.weight q4_K +blk.43.attn_v.weight q4_K +blk.47.attn_v.weight q4_K +output.weight q5_K + +[IQ2_XS] iq2_xs +token_embd.weight q2_K +blk.0.ffn_down_exps.weight q2_K +blk.0.ffn_down_shexp.weight q2_K +blk.1.ffn_down_shexp.weight q2_K +blk.1.ffn_down_exps.weight q2_K +blk.2.ffn_down_shexp.weight q2_K +blk.2.ffn_down_exps.weight q2_K +blk.3.attn_v.weight q4_K +blk.7.attn_v.weight q4_K +blk.11.attn_v.weight q4_K +blk.15.attn_v.weight q4_K +blk.19.attn_v.weight q4_K +blk.23.attn_v.weight q4_K +blk.27.attn_v.weight q4_K +blk.31.attn_v.weight q4_K +blk.35.attn_v.weight q4_K +blk.39.attn_v.weight q4_K +blk.43.attn_v.weight q4_K +blk.47.attn_v.weight q4_K +output.weight q5_K + +[Q2_K_S] q2_K +blk.0.ffn_down_exps.weight q4_K +blk.0.ffn_down_shexp.weight q4_K +blk.1.ffn_down_shexp.weight q4_K +blk.1.ffn_down_exps.weight q4_K +blk.2.ffn_down_shexp.weight q4_K +blk.2.ffn_down_exps.weight q4_K +blk.3.ffn_down_shexp.weight q4_K +blk.3.attn_v.weight q4_K +blk.3.ffn_down_exps.weight q4_K +blk.4.ffn_down_shexp.weight q4_K +blk.4.ffn_down_exps.weight q4_K +blk.5.ffn_down_shexp.weight q4_K +blk.5.ffn_down_exps.weight q4_K +blk.7.attn_v.weight q4_K +blk.11.attn_v.weight q4_K +blk.15.attn_v.weight q4_K +blk.19.attn_v.weight q4_K +blk.23.attn_v.weight q4_K +blk.27.attn_v.weight q4_K +blk.31.attn_v.weight q4_K +blk.35.attn_v.weight q4_K +blk.39.attn_v.weight q4_K +blk.43.attn_v.weight q4_K +blk.47.attn_v.weight q4_K +output.weight q6_K + +[IQ3_XS] iq3_s +blk.3.attn_k.weight iq3_xxs +blk.3.attn_q.weight iq3_xxs +blk.3.attn_v.weight q4_K +blk.6.ffn_gate_exps.weight iq3_xxs +blk.6.ffn_up_exps.weight iq3_xxs +blk.6.ffn_gate_shexp.weight iq3_xxs +blk.6.ffn_up_shexp.weight iq3_xxs +blk.7.ffn_gate_shexp.weight iq3_xxs +blk.7.ffn_up_shexp.weight iq3_xxs +blk.7.attn_k.weight iq3_xxs +blk.7.attn_q.weight iq3_xxs +blk.7.attn_v.weight q4_K +blk.7.ffn_gate_exps.weight iq3_xxs +blk.7.ffn_up_exps.weight iq3_xxs +blk.8.ffn_gate_shexp.weight iq3_xxs +blk.8.ffn_up_shexp.weight iq3_xxs +blk.8.ffn_gate_exps.weight iq3_xxs +blk.8.ffn_up_exps.weight iq3_xxs +blk.9.ffn_gate_shexp.weight iq3_xxs +blk.9.ffn_up_shexp.weight iq3_xxs +blk.10.ffn_gate_shexp.weight iq3_xxs +blk.10.ffn_up_shexp.weight iq3_xxs +blk.9.ffn_gate_exps.weight iq3_xxs +blk.9.ffn_up_exps.weight iq3_xxs +blk.10.ffn_gate_exps.weight iq3_xxs +blk.10.ffn_up_exps.weight iq3_xxs +blk.11.ffn_gate_shexp.weight iq3_xxs +blk.11.ffn_up_shexp.weight iq3_xxs +blk.11.attn_k.weight iq3_xxs +blk.11.attn_q.weight iq3_xxs +blk.11.attn_v.weight q4_K +blk.11.ffn_gate_exps.weight iq3_xxs +blk.11.ffn_up_exps.weight iq3_xxs +blk.12.ffn_gate_exps.weight iq3_xxs +blk.12.ffn_up_exps.weight iq3_xxs +blk.12.ffn_gate_shexp.weight iq3_xxs +blk.12.ffn_up_shexp.weight iq3_xxs +blk.13.ffn_gate_shexp.weight iq3_xxs +blk.13.ffn_up_shexp.weight iq3_xxs +blk.13.ffn_gate_exps.weight iq3_xxs +blk.13.ffn_up_exps.weight iq3_xxs +blk.14.ffn_gate_shexp.weight iq3_xxs +blk.14.ffn_up_shexp.weight iq3_xxs +blk.14.ffn_gate_exps.weight iq3_xxs +blk.14.ffn_up_exps.weight iq3_xxs +blk.15.ffn_gate_shexp.weight iq3_xxs +blk.15.ffn_up_shexp.weight iq3_xxs +blk.15.attn_k.weight iq3_xxs +blk.15.attn_q.weight iq3_xxs +blk.15.attn_v.weight q4_K +blk.15.ffn_gate_exps.weight iq3_xxs +blk.15.ffn_up_exps.weight iq3_xxs +blk.16.ffn_gate_shexp.weight iq3_xxs +blk.16.ffn_up_shexp.weight iq3_xxs +blk.16.ffn_gate_exps.weight iq3_xxs +blk.16.ffn_up_exps.weight iq3_xxs +blk.17.ffn_gate_exps.weight iq3_xxs +blk.17.ffn_up_exps.weight iq3_xxs +blk.17.ffn_gate_shexp.weight iq3_xxs +blk.17.ffn_up_shexp.weight iq3_xxs +blk.18.ffn_gate_shexp.weight iq3_xxs +blk.18.ffn_up_shexp.weight iq3_xxs +blk.18.ffn_gate_exps.weight iq3_xxs +blk.18.ffn_up_exps.weight iq3_xxs +blk.19.ffn_gate_shexp.weight iq3_xxs +blk.19.ffn_up_shexp.weight iq3_xxs +blk.19.attn_k.weight iq3_xxs +blk.19.attn_q.weight iq3_xxs +blk.19.attn_v.weight q4_K +blk.19.ffn_gate_exps.weight iq3_xxs +blk.19.ffn_up_exps.weight iq3_xxs +blk.20.ffn_gate_shexp.weight iq3_xxs +blk.20.ffn_up_shexp.weight iq3_xxs +blk.20.ffn_gate_exps.weight iq3_xxs +blk.20.ffn_up_exps.weight iq3_xxs +blk.21.ffn_gate_shexp.weight iq3_xxs +blk.21.ffn_up_shexp.weight iq3_xxs +blk.21.ffn_gate_exps.weight iq3_xxs +blk.21.ffn_up_exps.weight iq3_xxs +blk.22.ffn_gate_shexp.weight iq3_xxs +blk.22.ffn_up_shexp.weight iq3_xxs +blk.22.ffn_gate_exps.weight iq3_xxs +blk.22.ffn_up_exps.weight iq3_xxs +blk.23.ffn_gate_exps.weight iq3_xxs +blk.23.ffn_up_exps.weight iq3_xxs +blk.23.ffn_gate_shexp.weight iq3_xxs +blk.23.ffn_up_shexp.weight iq3_xxs +blk.23.attn_k.weight iq3_xxs +blk.23.attn_q.weight iq3_xxs +blk.23.attn_v.weight q4_K +blk.24.ffn_gate_shexp.weight iq3_xxs +blk.24.ffn_up_shexp.weight iq3_xxs +blk.24.ffn_gate_exps.weight iq3_xxs +blk.24.ffn_up_exps.weight iq3_xxs +blk.25.ffn_gate_shexp.weight iq3_xxs +blk.25.ffn_up_shexp.weight iq3_xxs +blk.25.ffn_gate_exps.weight iq3_xxs +blk.25.ffn_up_exps.weight iq3_xxs +blk.26.ffn_gate_shexp.weight iq3_xxs +blk.26.ffn_up_shexp.weight iq3_xxs +blk.26.ffn_gate_exps.weight iq3_xxs +blk.26.ffn_up_exps.weight iq3_xxs +blk.27.ffn_gate_shexp.weight iq3_xxs +blk.27.ffn_up_shexp.weight iq3_xxs +blk.27.attn_k.weight iq3_xxs +blk.27.attn_q.weight iq3_xxs +blk.27.attn_v.weight q4_K +blk.27.ffn_gate_exps.weight iq3_xxs +blk.27.ffn_up_exps.weight iq3_xxs +blk.28.ffn_gate_shexp.weight iq3_xxs +blk.28.ffn_up_shexp.weight iq3_xxs +blk.28.ffn_gate_exps.weight iq3_xxs +blk.28.ffn_up_exps.weight iq3_xxs +blk.29.ffn_gate_exps.weight iq3_xxs +blk.29.ffn_up_exps.weight iq3_xxs +blk.29.ffn_gate_shexp.weight iq3_xxs +blk.29.ffn_up_shexp.weight iq3_xxs +blk.30.ffn_gate_shexp.weight iq3_xxs +blk.30.ffn_up_shexp.weight iq3_xxs +blk.30.ffn_gate_exps.weight iq3_xxs +blk.30.ffn_up_exps.weight iq3_xxs +blk.31.ffn_gate_shexp.weight iq3_xxs +blk.31.ffn_up_shexp.weight iq3_xxs +blk.31.attn_k.weight iq3_xxs +blk.31.attn_q.weight iq3_xxs +blk.31.attn_v.weight q4_K +blk.31.ffn_gate_exps.weight iq3_xxs +blk.31.ffn_up_exps.weight iq3_xxs +blk.32.ffn_gate_shexp.weight iq3_xxs +blk.32.ffn_up_shexp.weight iq3_xxs +blk.32.ffn_gate_exps.weight iq3_xxs +blk.32.ffn_up_exps.weight iq3_xxs +blk.33.ffn_gate_shexp.weight iq3_xxs +blk.33.ffn_up_shexp.weight iq3_xxs +blk.33.ffn_gate_exps.weight iq3_xxs +blk.33.ffn_up_exps.weight iq3_xxs +blk.34.ffn_gate_exps.weight iq3_xxs +blk.34.ffn_up_exps.weight iq3_xxs +blk.34.ffn_gate_shexp.weight iq3_xxs +blk.34.ffn_up_shexp.weight iq3_xxs +blk.35.ffn_gate_shexp.weight iq3_xxs +blk.35.ffn_up_shexp.weight iq3_xxs +blk.35.attn_k.weight iq3_xxs +blk.35.attn_q.weight iq3_xxs +blk.35.attn_v.weight q4_K +blk.35.ffn_gate_exps.weight iq3_xxs +blk.35.ffn_up_exps.weight iq3_xxs +blk.36.ffn_gate_shexp.weight iq3_xxs +blk.36.ffn_up_shexp.weight iq3_xxs +blk.36.ffn_gate_exps.weight iq3_xxs +blk.36.ffn_up_exps.weight iq3_xxs +blk.37.ffn_gate_shexp.weight iq3_xxs +blk.37.ffn_up_shexp.weight iq3_xxs +blk.37.ffn_gate_exps.weight iq3_xxs +blk.37.ffn_up_exps.weight iq3_xxs +blk.38.ffn_gate_shexp.weight iq3_xxs +blk.38.ffn_up_shexp.weight iq3_xxs +blk.38.ffn_gate_exps.weight iq3_xxs +blk.38.ffn_up_exps.weight iq3_xxs +blk.39.ffn_gate_shexp.weight iq3_xxs +blk.39.ffn_up_shexp.weight iq3_xxs +blk.39.attn_k.weight iq3_xxs +blk.39.attn_q.weight iq3_xxs +blk.39.attn_v.weight q4_K +blk.39.ffn_gate_exps.weight iq3_xxs +blk.39.ffn_up_exps.weight iq3_xxs +blk.40.ffn_gate_exps.weight iq3_xxs +blk.40.ffn_up_exps.weight iq3_xxs +blk.40.ffn_gate_shexp.weight iq3_xxs +blk.40.ffn_up_shexp.weight iq3_xxs +blk.41.ffn_gate_shexp.weight iq3_xxs +blk.41.ffn_up_shexp.weight iq3_xxs +blk.41.ffn_gate_exps.weight iq3_xxs +blk.41.ffn_up_exps.weight iq3_xxs +blk.43.attn_k.weight iq3_xxs +blk.43.attn_q.weight iq3_xxs +blk.43.attn_v.weight q4_K +blk.47.attn_k.weight iq3_xxs +blk.47.attn_q.weight iq3_xxs +blk.47.attn_v.weight q4_K +output.weight q6_K + +[IQ3_XXS] iq3_xxs +token_embd.weight iq3_s +blk.0.ffn_down_exps.weight q4_K +blk.0.ffn_down_shexp.weight q4_K +blk.1.ffn_down_shexp.weight q4_K +blk.1.ffn_down_exps.weight q4_K +blk.2.ffn_down_shexp.weight q4_K +blk.2.ffn_down_exps.weight q4_K +blk.3.ffn_down_shexp.weight q4_K +blk.3.attn_k.weight iq2_s +blk.3.attn_output.weight iq3_s +blk.3.attn_q.weight iq2_s +blk.3.attn_v.weight q4_K +blk.3.ffn_down_exps.weight q4_K +blk.4.ffn_down_shexp.weight q4_K +blk.4.ffn_down_exps.weight q4_K +blk.5.ffn_down_shexp.weight q4_K +blk.5.ffn_down_exps.weight q4_K +blk.6.ffn_down_exps.weight q3_K +blk.6.ffn_down_shexp.weight q3_K +blk.7.ffn_down_shexp.weight q3_K +blk.7.attn_k.weight iq2_s +blk.7.attn_output.weight iq3_s +blk.7.attn_q.weight iq2_s +blk.7.attn_v.weight q4_K +blk.7.ffn_down_exps.weight q3_K +blk.8.ffn_down_shexp.weight q3_K +blk.8.ffn_down_exps.weight q3_K +blk.9.ffn_down_shexp.weight q3_K +blk.10.ffn_down_shexp.weight q3_K +blk.9.ffn_down_exps.weight q3_K +blk.10.ffn_down_exps.weight q3_K +blk.11.ffn_down_shexp.weight q3_K +blk.11.attn_k.weight iq2_s +blk.11.attn_output.weight iq3_s +blk.11.attn_q.weight iq2_s +blk.11.attn_v.weight q4_K +blk.11.ffn_down_exps.weight q3_K +blk.12.ffn_down_exps.weight q3_K +blk.12.ffn_down_shexp.weight q3_K +blk.13.ffn_down_shexp.weight q3_K +blk.13.ffn_down_exps.weight q3_K +blk.14.ffn_down_shexp.weight q3_K +blk.14.ffn_down_exps.weight q3_K +blk.15.ffn_down_shexp.weight q3_K +blk.15.attn_k.weight iq2_s +blk.15.attn_output.weight iq3_s +blk.15.attn_q.weight iq2_s +blk.15.attn_v.weight q4_K +blk.15.ffn_down_exps.weight q3_K +blk.16.ffn_down_shexp.weight q3_K +blk.16.ffn_down_exps.weight q3_K +blk.17.ffn_down_exps.weight q3_K +blk.17.ffn_down_shexp.weight q3_K +blk.18.ffn_down_shexp.weight q3_K +blk.18.ffn_down_exps.weight q3_K +blk.19.ffn_down_shexp.weight q3_K +blk.19.attn_k.weight iq2_s +blk.19.attn_output.weight iq3_s +blk.19.attn_q.weight iq2_s +blk.19.attn_v.weight q4_K +blk.19.ffn_down_exps.weight q3_K +blk.20.ffn_down_shexp.weight q3_K +blk.20.ffn_down_exps.weight q3_K +blk.21.ffn_down_shexp.weight q3_K +blk.21.ffn_down_exps.weight q3_K +blk.22.ffn_down_shexp.weight q3_K +blk.22.ffn_down_exps.weight q3_K +blk.23.ffn_down_exps.weight q3_K +blk.23.ffn_down_shexp.weight q3_K +blk.23.attn_k.weight iq2_s +blk.23.attn_output.weight iq3_s +blk.23.attn_q.weight iq2_s +blk.23.attn_v.weight q4_K +blk.24.ffn_down_shexp.weight q3_K +blk.24.ffn_down_exps.weight q3_K +blk.25.ffn_down_shexp.weight q3_K +blk.25.ffn_down_exps.weight q3_K +blk.26.ffn_down_shexp.weight q3_K +blk.26.ffn_down_exps.weight q3_K +blk.27.ffn_down_shexp.weight q3_K +blk.27.attn_k.weight iq2_s +blk.27.attn_output.weight iq3_s +blk.27.attn_q.weight iq2_s +blk.27.attn_v.weight q4_K +blk.27.ffn_down_exps.weight q3_K +blk.28.ffn_down_shexp.weight q3_K +blk.28.ffn_down_exps.weight q3_K +blk.29.ffn_down_exps.weight q3_K +blk.29.ffn_down_shexp.weight q3_K +blk.30.ffn_down_shexp.weight q3_K +blk.30.ffn_down_exps.weight q3_K +blk.31.ffn_down_shexp.weight q3_K +blk.31.attn_k.weight iq2_s +blk.31.attn_output.weight iq3_s +blk.31.attn_q.weight iq2_s +blk.31.attn_v.weight q4_K +blk.31.ffn_down_exps.weight q3_K +blk.32.ffn_down_shexp.weight q3_K +blk.32.ffn_down_exps.weight q3_K +blk.33.ffn_down_shexp.weight q3_K +blk.33.ffn_down_exps.weight q3_K +blk.34.ffn_down_exps.weight q3_K +blk.34.ffn_down_shexp.weight q3_K +blk.35.ffn_down_shexp.weight q3_K +blk.35.attn_k.weight iq2_s +blk.35.attn_output.weight iq3_s +blk.35.attn_q.weight iq2_s +blk.35.attn_v.weight q4_K +blk.35.ffn_down_exps.weight q3_K +blk.36.ffn_down_shexp.weight q3_K +blk.36.ffn_down_exps.weight q3_K +blk.37.ffn_down_shexp.weight q3_K +blk.37.ffn_down_exps.weight q3_K +blk.38.ffn_down_shexp.weight q3_K +blk.38.ffn_down_exps.weight q3_K +blk.39.ffn_down_shexp.weight q3_K +blk.39.attn_k.weight iq2_s +blk.39.attn_output.weight iq3_s +blk.39.attn_q.weight iq2_s +blk.39.attn_v.weight q4_K +blk.39.ffn_down_exps.weight q3_K +blk.40.ffn_down_exps.weight q3_K +blk.40.ffn_down_shexp.weight q3_K +blk.41.ffn_down_shexp.weight q3_K +blk.41.ffn_down_exps.weight q3_K +blk.42.ffn_down_shexp.weight q3_K +blk.42.ffn_down_exps.weight q3_K +blk.43.ffn_down_shexp.weight q3_K +blk.43.attn_k.weight iq2_s +blk.43.attn_output.weight iq3_s +blk.43.attn_q.weight iq2_s +blk.43.attn_v.weight q4_K +blk.43.ffn_down_exps.weight q3_K +blk.44.ffn_down_shexp.weight q3_K +blk.44.ffn_down_exps.weight q3_K +blk.45.ffn_down_shexp.weight q3_K +blk.45.ffn_down_exps.weight q3_K +blk.46.ffn_down_exps.weight q3_K +blk.46.ffn_down_shexp.weight q3_K +blk.47.ffn_down_shexp.weight q3_K +blk.47.attn_k.weight iq2_s +blk.47.attn_output.weight iq3_s +blk.47.attn_q.weight iq2_s +blk.47.attn_v.weight q4_K +output.weight q5_K +blk.47.ffn_down_exps.weight q3_K + +[IQ1_S] iq1_s +token_embd.weight q2_K +blk.0.ffn_down_exps.weight q2_K +blk.0.ffn_down_shexp.weight q2_K +blk.1.ffn_down_shexp.weight q2_K +blk.1.ffn_down_exps.weight q2_K +blk.2.ffn_down_shexp.weight q2_K +blk.2.ffn_down_exps.weight q2_K +blk.3.attn_output.weight iq2_xxs +blk.3.attn_v.weight q4_K +blk.7.attn_output.weight iq2_xxs +blk.7.attn_v.weight q4_K +blk.11.attn_output.weight iq2_xxs +blk.11.attn_v.weight q4_K +blk.15.attn_output.weight iq2_xxs +blk.15.attn_v.weight q4_K +blk.19.attn_output.weight iq2_xxs +blk.19.attn_v.weight q4_K +blk.23.attn_output.weight iq2_xxs +blk.23.attn_v.weight q4_K +blk.27.attn_output.weight iq2_xxs +blk.27.attn_v.weight q4_K +blk.31.attn_output.weight iq2_xxs +blk.31.attn_v.weight q4_K +blk.35.attn_output.weight iq2_xxs +blk.35.attn_v.weight q4_K +blk.39.attn_output.weight iq2_xxs +blk.39.attn_v.weight q4_K +blk.43.attn_output.weight iq2_xxs +blk.43.attn_v.weight q4_K +blk.47.attn_output.weight iq2_xxs +blk.47.attn_v.weight q4_K +output.weight q5_K + +[IQ4_NL] iq4_nl +blk.0.ffn_down_exps.weight q5_K +blk.0.ffn_down_shexp.weight q5_K +blk.1.ffn_down_shexp.weight q5_K +blk.1.ffn_down_exps.weight q5_K +blk.2.ffn_down_shexp.weight q5_K +blk.2.ffn_down_exps.weight q5_K +blk.3.ffn_down_shexp.weight q5_K +blk.3.attn_v.weight q5_K +blk.3.ffn_down_exps.weight q5_K +blk.4.ffn_down_shexp.weight q5_K +blk.4.ffn_down_exps.weight q5_K +blk.5.ffn_down_shexp.weight q5_K +blk.5.ffn_down_exps.weight q5_K +blk.7.attn_v.weight q5_K +blk.11.attn_v.weight q5_K +blk.15.attn_v.weight q5_K +blk.19.attn_v.weight q5_K +blk.23.attn_v.weight q5_K +blk.27.attn_v.weight q5_K +blk.31.attn_v.weight q5_K +blk.35.attn_v.weight q5_K +blk.39.attn_v.weight q5_K +blk.43.attn_v.weight q5_K +blk.47.attn_v.weight q5_K +output.weight q6_K + +[IQ3_S] iq3_s +blk.3.attn_v.weight q4_K +blk.7.attn_v.weight q4_K +blk.11.attn_v.weight q4_K +blk.15.attn_v.weight q4_K +blk.19.attn_v.weight q4_K +blk.23.attn_v.weight q4_K +blk.27.attn_v.weight q4_K +blk.31.attn_v.weight q4_K +blk.35.attn_v.weight q4_K +blk.39.attn_v.weight q4_K +blk.43.attn_v.weight q4_K +blk.47.attn_v.weight q4_K +output.weight q6_K + +[IQ3_M] iq3_s +blk.0.attn_qkv.weight q4_K +blk.0.ffn_down_exps.weight q4_K +blk.0.ffn_down_shexp.weight q4_K +blk.1.attn_qkv.weight q4_K +blk.1.ffn_down_shexp.weight q4_K +blk.1.ffn_down_exps.weight q4_K +blk.2.attn_qkv.weight q4_K +blk.2.ffn_down_shexp.weight q4_K +blk.2.ffn_down_exps.weight q4_K +blk.3.ffn_down_shexp.weight q4_K +blk.3.attn_output.weight q4_K +blk.3.attn_v.weight q4_K +blk.3.ffn_down_exps.weight q4_K +blk.4.attn_qkv.weight q4_K +blk.4.ffn_down_shexp.weight q4_K +blk.4.ffn_down_exps.weight q4_K +blk.5.attn_qkv.weight q4_K +blk.5.ffn_down_shexp.weight q4_K +blk.5.ffn_down_exps.weight q4_K +blk.6.attn_qkv.weight q4_K +blk.7.attn_output.weight q4_K +blk.7.attn_v.weight q4_K +blk.8.attn_qkv.weight q4_K +blk.9.attn_qkv.weight q4_K +blk.10.attn_qkv.weight q4_K +blk.11.attn_output.weight q4_K +blk.11.attn_v.weight q4_K +blk.12.attn_qkv.weight q4_K +blk.13.attn_qkv.weight q4_K +blk.14.attn_qkv.weight q4_K +blk.15.attn_output.weight q4_K +blk.15.attn_v.weight q4_K +blk.16.attn_qkv.weight q4_K +blk.17.attn_qkv.weight q4_K +blk.18.attn_qkv.weight q4_K +blk.19.attn_output.weight q4_K +blk.19.attn_v.weight q4_K +blk.20.attn_qkv.weight q4_K +blk.21.attn_qkv.weight q4_K +blk.22.attn_qkv.weight q4_K +blk.23.attn_output.weight q4_K +blk.23.attn_v.weight q4_K +blk.24.attn_qkv.weight q4_K +blk.25.attn_qkv.weight q4_K +blk.26.attn_qkv.weight q4_K +blk.27.attn_output.weight q4_K +blk.27.attn_v.weight q4_K +blk.28.attn_qkv.weight q4_K +blk.29.attn_qkv.weight q4_K +blk.30.attn_qkv.weight q4_K +blk.31.attn_output.weight q4_K +blk.31.attn_v.weight q4_K +blk.32.attn_qkv.weight q4_K +blk.33.attn_qkv.weight q4_K +blk.34.attn_qkv.weight q4_K +blk.35.attn_output.weight q4_K +blk.35.attn_v.weight q4_K +blk.36.attn_qkv.weight q4_K +blk.37.attn_qkv.weight q4_K +blk.38.attn_qkv.weight q4_K +blk.39.attn_output.weight q4_K +blk.39.attn_v.weight q4_K +blk.40.attn_qkv.weight q4_K +blk.41.attn_qkv.weight q4_K +blk.42.attn_qkv.weight q4_K +blk.43.attn_output.weight q4_K +blk.43.attn_v.weight q4_K +blk.44.attn_qkv.weight q4_K +blk.45.attn_qkv.weight q4_K +blk.46.attn_qkv.weight q4_K +blk.47.attn_output.weight q4_K +blk.47.attn_v.weight q4_K +output.weight q6_K + +[IQ2_S] iq2_xs +token_embd.weight iq3_s +blk.0.ffn_down_exps.weight iq3_s +blk.0.ffn_down_shexp.weight iq3_s +blk.1.ffn_down_shexp.weight iq3_s +blk.1.ffn_down_exps.weight iq3_s +blk.2.ffn_down_shexp.weight iq3_s +blk.2.ffn_down_exps.weight iq3_s +blk.3.attn_output.weight iq3_s +blk.3.attn_v.weight q4_K +blk.7.attn_output.weight iq3_s +blk.7.attn_v.weight q4_K +blk.11.attn_output.weight iq3_s +blk.11.attn_v.weight q4_K +blk.15.attn_output.weight iq3_s +blk.15.attn_v.weight q4_K +blk.19.attn_output.weight iq3_s +blk.19.attn_v.weight q4_K +blk.23.attn_output.weight iq3_s +blk.23.attn_v.weight q4_K +blk.27.attn_output.weight iq3_s +blk.27.attn_v.weight q4_K +blk.31.attn_output.weight iq3_s +blk.31.attn_v.weight q4_K +blk.35.attn_output.weight iq3_s +blk.35.attn_v.weight q4_K +blk.39.attn_output.weight iq3_s +blk.39.attn_v.weight q4_K +blk.43.attn_output.weight iq3_s +blk.43.attn_v.weight q4_K +blk.47.attn_output.weight iq3_s +blk.47.attn_v.weight q4_K +output.weight q5_K + +[IQ2_M] iq2_s +token_embd.weight iq3_s +blk.0.ffn_down_exps.weight iq3_s +blk.0.ffn_down_shexp.weight iq3_s +blk.1.ffn_down_shexp.weight iq3_s +blk.1.ffn_down_exps.weight iq3_s +blk.2.ffn_down_shexp.weight iq3_s +blk.2.ffn_down_exps.weight iq3_s +blk.3.attn_output.weight iq3_s +blk.3.attn_v.weight q4_K +blk.7.attn_output.weight iq3_s +blk.7.attn_v.weight q4_K +blk.11.attn_output.weight iq3_s +blk.11.attn_v.weight q4_K +blk.15.attn_output.weight iq3_s +blk.15.attn_v.weight q4_K +blk.19.attn_output.weight iq3_s +blk.19.attn_v.weight q4_K +blk.23.attn_output.weight iq3_s +blk.23.attn_v.weight q4_K +blk.27.attn_output.weight iq3_s +blk.27.attn_v.weight q4_K +blk.31.attn_output.weight iq3_s +blk.31.attn_v.weight q4_K +blk.35.attn_output.weight iq3_s +blk.35.attn_v.weight q4_K +blk.39.attn_output.weight iq3_s +blk.39.attn_v.weight q4_K +blk.43.attn_output.weight iq3_s +blk.43.attn_v.weight q4_K +blk.47.attn_output.weight iq3_s +blk.47.attn_v.weight q4_K +output.weight q5_K + +[IQ4_XS] iq4_xs +blk.0.ffn_down_exps.weight q5_K +blk.0.ffn_down_shexp.weight q5_K +blk.1.ffn_down_shexp.weight q5_K +blk.1.ffn_down_exps.weight q5_K +blk.2.ffn_down_shexp.weight q5_K +blk.2.ffn_down_exps.weight q5_K +blk.3.ffn_down_shexp.weight q5_K +blk.3.attn_v.weight q5_K +blk.3.ffn_down_exps.weight q5_K +blk.4.ffn_down_shexp.weight q5_K +blk.4.ffn_down_exps.weight q5_K +blk.5.ffn_down_shexp.weight q5_K +blk.5.ffn_down_exps.weight q5_K +blk.7.attn_v.weight q5_K +blk.11.attn_v.weight q5_K +blk.15.attn_v.weight q5_K +blk.19.attn_v.weight q5_K +blk.23.attn_v.weight q5_K +blk.27.attn_v.weight q5_K +blk.31.attn_v.weight q5_K +blk.35.attn_v.weight q5_K +blk.39.attn_v.weight q5_K +blk.43.attn_v.weight q5_K +blk.47.attn_v.weight q5_K +output.weight q6_K + +[IQ1_M] iq1_m +token_embd.weight q2_K +blk.0.ffn_down_exps.weight q2_K +blk.0.ffn_down_shexp.weight q2_K +blk.1.ffn_down_shexp.weight q2_K +blk.1.ffn_down_exps.weight q2_K +blk.2.ffn_down_shexp.weight q2_K +blk.2.ffn_down_exps.weight q2_K +blk.3.attn_output.weight iq2_xxs +blk.3.attn_v.weight q4_K +blk.7.attn_output.weight iq2_xxs +blk.7.attn_v.weight q4_K +blk.11.attn_output.weight iq2_xxs +blk.11.attn_v.weight q4_K +blk.15.attn_output.weight iq2_xxs +blk.15.attn_v.weight q4_K +blk.19.attn_output.weight iq2_xxs +blk.19.attn_v.weight q4_K +blk.23.attn_output.weight iq2_xxs +blk.23.attn_v.weight q4_K +blk.27.attn_output.weight iq2_xxs +blk.27.attn_v.weight q4_K +blk.31.attn_output.weight iq2_xxs +blk.31.attn_v.weight q4_K +blk.35.attn_output.weight iq2_xxs +blk.35.attn_v.weight q4_K +blk.39.attn_output.weight iq2_xxs +blk.39.attn_v.weight q4_K +blk.43.attn_output.weight iq2_xxs +blk.43.attn_v.weight q4_K +blk.47.attn_output.weight iq2_xxs +blk.47.attn_v.weight q4_K +output.weight q5_K + +[BF16] bf16 +output.weight q6_K + +[TQ1_0] tq1_0 +token_embd.weight q4_K +output.weight q6_K + +[TQ2_0] tq2_0 +token_embd.weight q4_K +output.weight q6_K + +[MXFP4_MOE] mxfp4 +token_embd.weight q8_0 +blk.0.ssm_ba.weight q8_0 +blk.0.attn_qkv.weight q8_0 +blk.0.attn_gate.weight q8_0 +blk.0.ssm_out.weight q8_0 +blk.0.ffn_down_shexp.weight q8_0 +blk.0.ffn_gate_shexp.weight q8_0 +blk.0.ffn_up_shexp.weight q8_0 +blk.1.ssm_ba.weight q8_0 +blk.1.attn_qkv.weight q8_0 +blk.1.attn_gate.weight q8_0 +blk.1.ssm_out.weight q8_0 +blk.1.ffn_down_shexp.weight q8_0 +blk.1.ffn_gate_shexp.weight q8_0 +blk.1.ffn_up_shexp.weight q8_0 +blk.2.ssm_ba.weight q8_0 +blk.2.attn_qkv.weight q8_0 +blk.2.attn_gate.weight q8_0 +blk.2.ssm_out.weight q8_0 +blk.2.ffn_down_shexp.weight q8_0 +blk.2.ffn_gate_shexp.weight q8_0 +blk.2.ffn_up_shexp.weight q8_0 +blk.3.ffn_down_shexp.weight q8_0 +blk.3.ffn_gate_shexp.weight q8_0 +blk.3.ffn_up_shexp.weight q8_0 +blk.3.attn_k.weight q8_0 +blk.3.attn_output.weight q8_0 +blk.3.attn_q.weight q8_0 +blk.3.attn_v.weight q8_0 +blk.4.ssm_ba.weight q8_0 +blk.4.attn_qkv.weight q8_0 +blk.4.attn_gate.weight q8_0 +blk.4.ssm_out.weight q8_0 +blk.4.ffn_down_shexp.weight q8_0 +blk.4.ffn_gate_shexp.weight q8_0 +blk.4.ffn_up_shexp.weight q8_0 +blk.5.ssm_ba.weight q8_0 +blk.5.attn_qkv.weight q8_0 +blk.5.attn_gate.weight q8_0 +blk.5.ssm_out.weight q8_0 +blk.5.ffn_down_shexp.weight q8_0 +blk.5.ffn_gate_shexp.weight q8_0 +blk.5.ffn_up_shexp.weight q8_0 +blk.6.ssm_ba.weight q8_0 +blk.6.attn_qkv.weight q8_0 +blk.6.attn_gate.weight q8_0 +blk.6.ssm_out.weight q8_0 +blk.6.ffn_down_shexp.weight q8_0 +blk.6.ffn_gate_shexp.weight q8_0 +blk.6.ffn_up_shexp.weight q8_0 +blk.7.ffn_down_shexp.weight q8_0 +blk.7.ffn_gate_shexp.weight q8_0 +blk.7.ffn_up_shexp.weight q8_0 +blk.7.attn_k.weight q8_0 +blk.7.attn_output.weight q8_0 +blk.7.attn_q.weight q8_0 +blk.7.attn_v.weight q8_0 +blk.8.ssm_ba.weight q8_0 +blk.8.attn_qkv.weight q8_0 +blk.8.attn_gate.weight q8_0 +blk.8.ssm_out.weight q8_0 +blk.8.ffn_down_shexp.weight q8_0 +blk.8.ffn_gate_shexp.weight q8_0 +blk.8.ffn_up_shexp.weight q8_0 +blk.9.ssm_ba.weight q8_0 +blk.9.attn_qkv.weight q8_0 +blk.9.attn_gate.weight q8_0 +blk.9.ssm_out.weight q8_0 +blk.9.ffn_down_shexp.weight q8_0 +blk.9.ffn_gate_shexp.weight q8_0 +blk.9.ffn_up_shexp.weight q8_0 +blk.10.ssm_ba.weight q8_0 +blk.10.attn_qkv.weight q8_0 +blk.10.attn_gate.weight q8_0 +blk.10.ssm_out.weight q8_0 +blk.10.ffn_down_shexp.weight q8_0 +blk.10.ffn_gate_shexp.weight q8_0 +blk.10.ffn_up_shexp.weight q8_0 +blk.11.ffn_down_shexp.weight q8_0 +blk.11.ffn_gate_shexp.weight q8_0 +blk.11.ffn_up_shexp.weight q8_0 +blk.11.attn_k.weight q8_0 +blk.11.attn_output.weight q8_0 +blk.11.attn_q.weight q8_0 +blk.11.attn_v.weight q8_0 +blk.12.ssm_ba.weight q8_0 +blk.12.attn_qkv.weight q8_0 +blk.12.attn_gate.weight q8_0 +blk.12.ssm_out.weight q8_0 +blk.12.ffn_down_shexp.weight q8_0 +blk.12.ffn_gate_shexp.weight q8_0 +blk.12.ffn_up_shexp.weight q8_0 +blk.13.ssm_ba.weight q8_0 +blk.13.attn_qkv.weight q8_0 +blk.13.attn_gate.weight q8_0 +blk.13.ssm_out.weight q8_0 +blk.13.ffn_down_shexp.weight q8_0 +blk.13.ffn_gate_shexp.weight q8_0 +blk.13.ffn_up_shexp.weight q8_0 +blk.14.ssm_ba.weight q8_0 +blk.14.attn_qkv.weight q8_0 +blk.14.attn_gate.weight q8_0 +blk.14.ssm_out.weight q8_0 +blk.14.ffn_down_shexp.weight q8_0 +blk.14.ffn_gate_shexp.weight q8_0 +blk.14.ffn_up_shexp.weight q8_0 +blk.15.ffn_down_shexp.weight q8_0 +blk.15.ffn_gate_shexp.weight q8_0 +blk.15.ffn_up_shexp.weight q8_0 +blk.15.attn_k.weight q8_0 +blk.15.attn_output.weight q8_0 +blk.15.attn_q.weight q8_0 +blk.15.attn_v.weight q8_0 +blk.16.ssm_ba.weight q8_0 +blk.16.attn_qkv.weight q8_0 +blk.16.attn_gate.weight q8_0 +blk.16.ssm_out.weight q8_0 +blk.16.ffn_down_shexp.weight q8_0 +blk.16.ffn_gate_shexp.weight q8_0 +blk.16.ffn_up_shexp.weight q8_0 +blk.17.ssm_ba.weight q8_0 +blk.17.attn_qkv.weight q8_0 +blk.17.attn_gate.weight q8_0 +blk.17.ssm_out.weight q8_0 +blk.17.ffn_down_shexp.weight q8_0 +blk.17.ffn_gate_shexp.weight q8_0 +blk.17.ffn_up_shexp.weight q8_0 +blk.18.ssm_ba.weight q8_0 +blk.18.attn_qkv.weight q8_0 +blk.18.attn_gate.weight q8_0 +blk.18.ssm_out.weight q8_0 +blk.18.ffn_down_shexp.weight q8_0 +blk.18.ffn_gate_shexp.weight q8_0 +blk.18.ffn_up_shexp.weight q8_0 +blk.19.ffn_down_shexp.weight q8_0 +blk.19.ffn_gate_shexp.weight q8_0 +blk.19.ffn_up_shexp.weight q8_0 +blk.19.attn_k.weight q8_0 +blk.19.attn_output.weight q8_0 +blk.19.attn_q.weight q8_0 +blk.19.attn_v.weight q8_0 +blk.20.ssm_ba.weight q8_0 +blk.20.attn_qkv.weight q8_0 +blk.20.attn_gate.weight q8_0 +blk.20.ssm_out.weight q8_0 +blk.20.ffn_down_shexp.weight q8_0 +blk.20.ffn_gate_shexp.weight q8_0 +blk.20.ffn_up_shexp.weight q8_0 +blk.21.ssm_ba.weight q8_0 +blk.21.attn_qkv.weight q8_0 +blk.21.attn_gate.weight q8_0 +blk.21.ssm_out.weight q8_0 +blk.21.ffn_down_shexp.weight q8_0 +blk.21.ffn_gate_shexp.weight q8_0 +blk.21.ffn_up_shexp.weight q8_0 +blk.22.ssm_ba.weight q8_0 +blk.22.attn_qkv.weight q8_0 +blk.22.attn_gate.weight q8_0 +blk.22.ssm_out.weight q8_0 +blk.22.ffn_down_shexp.weight q8_0 +blk.22.ffn_gate_shexp.weight q8_0 +blk.22.ffn_up_shexp.weight q8_0 +blk.23.ffn_down_shexp.weight q8_0 +blk.23.ffn_gate_shexp.weight q8_0 +blk.23.ffn_up_shexp.weight q8_0 +blk.23.attn_k.weight q8_0 +blk.23.attn_output.weight q8_0 +blk.23.attn_q.weight q8_0 +blk.23.attn_v.weight q8_0 +blk.24.ssm_ba.weight q8_0 +blk.24.attn_qkv.weight q8_0 +blk.24.attn_gate.weight q8_0 +blk.24.ssm_out.weight q8_0 +blk.24.ffn_down_shexp.weight q8_0 +blk.24.ffn_gate_shexp.weight q8_0 +blk.24.ffn_up_shexp.weight q8_0 +blk.25.ssm_ba.weight q8_0 +blk.25.attn_qkv.weight q8_0 +blk.25.attn_gate.weight q8_0 +blk.25.ssm_out.weight q8_0 +blk.25.ffn_down_shexp.weight q8_0 +blk.25.ffn_gate_shexp.weight q8_0 +blk.25.ffn_up_shexp.weight q8_0 +blk.26.ssm_ba.weight q8_0 +blk.26.attn_qkv.weight q8_0 +blk.26.attn_gate.weight q8_0 +blk.26.ssm_out.weight q8_0 +blk.26.ffn_down_shexp.weight q8_0 +blk.26.ffn_gate_shexp.weight q8_0 +blk.26.ffn_up_shexp.weight q8_0 +blk.27.ffn_down_shexp.weight q8_0 +blk.27.ffn_gate_shexp.weight q8_0 +blk.27.ffn_up_shexp.weight q8_0 +blk.27.attn_k.weight q8_0 +blk.27.attn_output.weight q8_0 +blk.27.attn_q.weight q8_0 +blk.27.attn_v.weight q8_0 +blk.28.ssm_ba.weight q8_0 +blk.28.attn_qkv.weight q8_0 +blk.28.attn_gate.weight q8_0 +blk.28.ssm_out.weight q8_0 +blk.28.ffn_down_shexp.weight q8_0 +blk.28.ffn_gate_shexp.weight q8_0 +blk.28.ffn_up_shexp.weight q8_0 +blk.29.ssm_ba.weight q8_0 +blk.29.attn_qkv.weight q8_0 +blk.29.attn_gate.weight q8_0 +blk.29.ssm_out.weight q8_0 +blk.29.ffn_down_shexp.weight q8_0 +blk.29.ffn_gate_shexp.weight q8_0 +blk.29.ffn_up_shexp.weight q8_0 +blk.30.ssm_ba.weight q8_0 +blk.30.attn_qkv.weight q8_0 +blk.30.attn_gate.weight q8_0 +blk.30.ssm_out.weight q8_0 +blk.30.ffn_down_shexp.weight q8_0 +blk.30.ffn_gate_shexp.weight q8_0 +blk.30.ffn_up_shexp.weight q8_0 +blk.31.ffn_down_shexp.weight q8_0 +blk.31.ffn_gate_shexp.weight q8_0 +blk.31.ffn_up_shexp.weight q8_0 +blk.31.attn_k.weight q8_0 +blk.31.attn_output.weight q8_0 +blk.31.attn_q.weight q8_0 +blk.31.attn_v.weight q8_0 +blk.32.ssm_ba.weight q8_0 +blk.32.attn_qkv.weight q8_0 +blk.32.attn_gate.weight q8_0 +blk.32.ssm_out.weight q8_0 +blk.32.ffn_down_shexp.weight q8_0 +blk.32.ffn_gate_shexp.weight q8_0 +blk.32.ffn_up_shexp.weight q8_0 +blk.33.ssm_ba.weight q8_0 +blk.33.attn_qkv.weight q8_0 +blk.33.attn_gate.weight q8_0 +blk.33.ssm_out.weight q8_0 +blk.33.ffn_down_shexp.weight q8_0 +blk.33.ffn_gate_shexp.weight q8_0 +blk.33.ffn_up_shexp.weight q8_0 +blk.34.ssm_ba.weight q8_0 +blk.34.attn_qkv.weight q8_0 +blk.34.attn_gate.weight q8_0 +blk.34.ssm_out.weight q8_0 +blk.34.ffn_down_shexp.weight q8_0 +blk.34.ffn_gate_shexp.weight q8_0 +blk.34.ffn_up_shexp.weight q8_0 +blk.35.ffn_down_shexp.weight q8_0 +blk.35.ffn_gate_shexp.weight q8_0 +blk.35.ffn_up_shexp.weight q8_0 +blk.35.attn_k.weight q8_0 +blk.35.attn_output.weight q8_0 +blk.35.attn_q.weight q8_0 +blk.35.attn_v.weight q8_0 +blk.36.ssm_ba.weight q8_0 +blk.36.attn_qkv.weight q8_0 +blk.36.attn_gate.weight q8_0 +blk.36.ssm_out.weight q8_0 +blk.36.ffn_down_shexp.weight q8_0 +blk.36.ffn_gate_shexp.weight q8_0 +blk.36.ffn_up_shexp.weight q8_0 +blk.37.ssm_ba.weight q8_0 +blk.37.attn_qkv.weight q8_0 +blk.37.attn_gate.weight q8_0 +blk.37.ssm_out.weight q8_0 +blk.37.ffn_down_shexp.weight q8_0 +blk.37.ffn_gate_shexp.weight q8_0 +blk.37.ffn_up_shexp.weight q8_0 +blk.38.ssm_ba.weight q8_0 +blk.38.attn_qkv.weight q8_0 +blk.38.attn_gate.weight q8_0 +blk.38.ssm_out.weight q8_0 +blk.38.ffn_down_shexp.weight q8_0 +blk.38.ffn_gate_shexp.weight q8_0 +blk.38.ffn_up_shexp.weight q8_0 +blk.39.ffn_down_shexp.weight q8_0 +blk.39.ffn_gate_shexp.weight q8_0 +blk.39.ffn_up_shexp.weight q8_0 +blk.39.attn_k.weight q8_0 +blk.39.attn_output.weight q8_0 +blk.39.attn_q.weight q8_0 +blk.39.attn_v.weight q8_0 +blk.40.ssm_ba.weight q8_0 +blk.40.attn_qkv.weight q8_0 +blk.40.attn_gate.weight q8_0 +blk.40.ssm_out.weight q8_0 +blk.40.ffn_down_shexp.weight q8_0 +blk.40.ffn_gate_shexp.weight q8_0 +blk.40.ffn_up_shexp.weight q8_0 +blk.41.ssm_ba.weight q8_0 +blk.41.attn_qkv.weight q8_0 +blk.41.attn_gate.weight q8_0 +blk.41.ssm_out.weight q8_0 +blk.41.ffn_down_shexp.weight q8_0 +blk.41.ffn_gate_shexp.weight q8_0 +blk.41.ffn_up_shexp.weight q8_0 +blk.42.ssm_ba.weight q8_0 +blk.42.attn_qkv.weight q8_0 +blk.42.attn_gate.weight q8_0 +blk.42.ssm_out.weight q8_0 +blk.42.ffn_down_shexp.weight q8_0 +blk.42.ffn_gate_shexp.weight q8_0 +blk.42.ffn_up_shexp.weight q8_0 +blk.43.ffn_down_shexp.weight q8_0 +blk.43.ffn_gate_shexp.weight q8_0 +blk.43.ffn_up_shexp.weight q8_0 +blk.43.attn_k.weight q8_0 +blk.43.attn_output.weight q8_0 +blk.43.attn_q.weight q8_0 +blk.43.attn_v.weight q8_0 +blk.44.ssm_ba.weight q8_0 +blk.44.attn_qkv.weight q8_0 +blk.44.attn_gate.weight q8_0 +blk.44.ssm_out.weight q8_0 +blk.44.ffn_down_shexp.weight q8_0 +blk.44.ffn_gate_shexp.weight q8_0 +blk.44.ffn_up_shexp.weight q8_0 +blk.45.ssm_ba.weight q8_0 +blk.45.attn_qkv.weight q8_0 +blk.45.attn_gate.weight q8_0 +blk.45.ssm_out.weight q8_0 +blk.45.ffn_down_shexp.weight q8_0 +blk.45.ffn_gate_shexp.weight q8_0 +blk.45.ffn_up_shexp.weight q8_0 +blk.46.ssm_ba.weight q8_0 +blk.46.attn_qkv.weight q8_0 +blk.46.attn_gate.weight q8_0 +blk.46.ssm_out.weight q8_0 +blk.46.ffn_down_shexp.weight q8_0 +blk.46.ffn_gate_shexp.weight q8_0 +blk.46.ffn_up_shexp.weight q8_0 +blk.47.ffn_down_shexp.weight q8_0 +blk.47.ffn_gate_shexp.weight q8_0 +blk.47.ffn_up_shexp.weight q8_0 +blk.47.attn_k.weight q8_0 +blk.47.attn_output.weight q8_0 +blk.47.attn_q.weight q8_0 +blk.47.attn_v.weight q8_0 +output.weight q8_0 diff --git a/tests/snapshots/qwen3.5-27b.schema b/tests/snapshots/qwen3.5-27b.schema new file mode 100644 index 0000000000..f1f655a847 --- /dev/null +++ b/tests/snapshots/qwen3.5-27b.schema @@ -0,0 +1,1837 @@ +# Model: Qwen3.5-27B +# n_embd=5120, n_ff=17408, n_vocab=248320, n_layer=64, n_head=24, n_head_kv=4 + +[F32] f32 +output.weight q6_K + +[F16] f16 +output.weight q6_K + +[Q4_0] q4_0 +output.weight q6_K + +[Q4_1] q4_1 +output.weight q6_K + +[Q8_0] q8_0 + +[Q5_0] q5_0 +output.weight q6_K + +[Q5_1] q5_1 +output.weight q6_K + +[Q2_K] q2_K +output.weight q6_K +blk.0.ffn_down.weight q3_K +blk.1.ffn_down.weight q3_K +blk.2.ffn_down.weight q3_K +blk.3.attn_output.weight q3_K +blk.3.attn_v.weight q4_K +blk.3.ffn_down.weight q3_K +blk.4.ffn_down.weight q3_K +blk.5.ffn_down.weight q3_K +blk.6.ffn_down.weight q3_K +blk.7.attn_output.weight q3_K +blk.7.attn_v.weight q4_K +blk.7.ffn_down.weight q3_K +blk.8.ffn_down.weight q3_K +blk.9.ffn_down.weight q3_K +blk.10.ffn_down.weight q3_K +blk.11.attn_output.weight q3_K +blk.11.attn_v.weight q4_K +blk.11.ffn_down.weight q3_K +blk.12.ffn_down.weight q3_K +blk.13.ffn_down.weight q3_K +blk.14.ffn_down.weight q3_K +blk.15.attn_output.weight q3_K +blk.15.attn_v.weight q4_K +blk.15.ffn_down.weight q3_K +blk.16.ffn_down.weight q3_K +blk.17.ffn_down.weight q3_K +blk.18.ffn_down.weight q3_K +blk.19.attn_output.weight q3_K +blk.19.attn_v.weight q4_K +blk.19.ffn_down.weight q3_K +blk.20.ffn_down.weight q3_K +blk.21.ffn_down.weight q3_K +blk.22.ffn_down.weight q3_K +blk.23.attn_output.weight q3_K +blk.23.attn_v.weight q4_K +blk.23.ffn_down.weight q3_K +blk.24.ffn_down.weight q3_K +blk.25.ffn_down.weight q3_K +blk.26.ffn_down.weight q3_K +blk.27.attn_output.weight q3_K +blk.27.attn_v.weight q4_K +blk.27.ffn_down.weight q3_K +blk.28.ffn_down.weight q3_K +blk.29.ffn_down.weight q3_K +blk.30.ffn_down.weight q3_K +blk.31.attn_output.weight q3_K +blk.31.attn_v.weight q4_K +blk.31.ffn_down.weight q3_K +blk.32.ffn_down.weight q3_K +blk.33.ffn_down.weight q3_K +blk.34.ffn_down.weight q3_K +blk.35.attn_output.weight q3_K +blk.35.attn_v.weight q4_K +blk.35.ffn_down.weight q3_K +blk.36.ffn_down.weight q3_K +blk.37.ffn_down.weight q3_K +blk.38.ffn_down.weight q3_K +blk.39.attn_output.weight q3_K +blk.39.attn_v.weight q4_K +blk.39.ffn_down.weight q3_K +blk.40.ffn_down.weight q3_K +blk.41.ffn_down.weight q3_K +blk.42.ffn_down.weight q3_K +blk.43.attn_output.weight q3_K +blk.43.attn_v.weight q4_K +blk.43.ffn_down.weight q3_K +blk.44.ffn_down.weight q3_K +blk.45.ffn_down.weight q3_K +blk.46.ffn_down.weight q3_K +blk.47.attn_output.weight q3_K +blk.47.attn_v.weight q4_K +blk.47.ffn_down.weight q3_K +blk.48.ffn_down.weight q3_K +blk.49.ffn_down.weight q3_K +blk.50.ffn_down.weight q3_K +blk.51.attn_output.weight q3_K +blk.51.attn_v.weight q4_K +blk.51.ffn_down.weight q3_K +blk.52.ffn_down.weight q3_K +blk.53.ffn_down.weight q3_K +blk.54.ffn_down.weight q3_K +blk.55.attn_output.weight q3_K +blk.55.attn_v.weight q4_K +blk.55.ffn_down.weight q3_K +blk.56.ffn_down.weight q3_K +blk.57.ffn_down.weight q3_K +blk.58.ffn_down.weight q3_K +blk.59.attn_output.weight q3_K +blk.59.attn_v.weight q4_K +blk.59.ffn_down.weight q3_K +blk.60.ffn_down.weight q3_K +blk.61.ffn_down.weight q3_K +blk.62.ffn_down.weight q3_K +blk.63.attn_output.weight q3_K +blk.63.attn_v.weight q4_K +blk.63.ffn_down.weight q3_K + +[Q3_K_S] q3_K +output.weight q6_K + +[Q3_K_M] q3_K +output.weight q6_K +blk.0.attn_qkv.weight q4_K +blk.0.ffn_down.weight q5_K +blk.1.attn_qkv.weight q4_K +blk.1.ffn_down.weight q5_K +blk.2.attn_qkv.weight q4_K +blk.2.ffn_down.weight q5_K +blk.3.attn_output.weight q4_K +blk.3.attn_v.weight q5_K +blk.3.ffn_down.weight q5_K +blk.4.attn_qkv.weight q4_K +blk.4.ffn_down.weight q4_K +blk.5.attn_qkv.weight q4_K +blk.5.ffn_down.weight q4_K +blk.6.attn_qkv.weight q4_K +blk.6.ffn_down.weight q4_K +blk.7.attn_output.weight q4_K +blk.7.attn_v.weight q5_K +blk.7.ffn_down.weight q4_K +blk.8.attn_qkv.weight q4_K +blk.8.ffn_down.weight q4_K +blk.9.attn_qkv.weight q4_K +blk.9.ffn_down.weight q4_K +blk.10.attn_qkv.weight q4_K +blk.10.ffn_down.weight q4_K +blk.11.attn_output.weight q4_K +blk.11.attn_v.weight q4_K +blk.11.ffn_down.weight q4_K +blk.12.attn_qkv.weight q4_K +blk.12.ffn_down.weight q4_K +blk.13.attn_qkv.weight q4_K +blk.13.ffn_down.weight q4_K +blk.14.attn_qkv.weight q4_K +blk.14.ffn_down.weight q4_K +blk.15.attn_output.weight q4_K +blk.15.attn_v.weight q4_K +blk.15.ffn_down.weight q4_K +blk.16.attn_qkv.weight q4_K +blk.16.ffn_down.weight q4_K +blk.17.attn_qkv.weight q4_K +blk.17.ffn_down.weight q4_K +blk.18.attn_qkv.weight q4_K +blk.18.ffn_down.weight q4_K +blk.19.attn_output.weight q4_K +blk.19.attn_v.weight q4_K +blk.19.ffn_down.weight q4_K +blk.20.attn_qkv.weight q4_K +blk.20.ffn_down.weight q4_K +blk.21.attn_qkv.weight q4_K +blk.21.ffn_down.weight q4_K +blk.22.attn_qkv.weight q4_K +blk.22.ffn_down.weight q4_K +blk.23.attn_output.weight q4_K +blk.23.attn_v.weight q4_K +blk.23.ffn_down.weight q4_K +blk.24.attn_qkv.weight q4_K +blk.24.ffn_down.weight q4_K +blk.25.attn_qkv.weight q4_K +blk.25.ffn_down.weight q4_K +blk.26.attn_qkv.weight q4_K +blk.26.ffn_down.weight q4_K +blk.27.attn_output.weight q4_K +blk.27.attn_v.weight q4_K +blk.27.ffn_down.weight q4_K +blk.28.attn_qkv.weight q4_K +blk.28.ffn_down.weight q4_K +blk.29.attn_qkv.weight q4_K +blk.29.ffn_down.weight q4_K +blk.30.attn_qkv.weight q4_K +blk.30.ffn_down.weight q4_K +blk.31.attn_output.weight q4_K +blk.31.attn_v.weight q4_K +blk.31.ffn_down.weight q4_K +blk.32.attn_qkv.weight q4_K +blk.32.ffn_down.weight q4_K +blk.33.attn_qkv.weight q4_K +blk.33.ffn_down.weight q4_K +blk.34.attn_qkv.weight q4_K +blk.34.ffn_down.weight q4_K +blk.35.attn_output.weight q4_K +blk.35.attn_v.weight q4_K +blk.35.ffn_down.weight q4_K +blk.36.attn_qkv.weight q4_K +blk.36.ffn_down.weight q4_K +blk.37.attn_qkv.weight q4_K +blk.37.ffn_down.weight q4_K +blk.38.attn_qkv.weight q4_K +blk.38.ffn_down.weight q4_K +blk.39.attn_output.weight q4_K +blk.39.attn_v.weight q4_K +blk.39.ffn_down.weight q4_K +blk.40.attn_qkv.weight q4_K +blk.40.ffn_down.weight q4_K +blk.41.attn_qkv.weight q4_K +blk.41.ffn_down.weight q4_K +blk.42.attn_qkv.weight q4_K +blk.42.ffn_down.weight q4_K +blk.43.attn_output.weight q4_K +blk.43.attn_v.weight q4_K +blk.43.ffn_down.weight q4_K +blk.44.attn_qkv.weight q4_K +blk.44.ffn_down.weight q4_K +blk.45.attn_qkv.weight q4_K +blk.45.ffn_down.weight q4_K +blk.46.attn_qkv.weight q4_K +blk.46.ffn_down.weight q4_K +blk.47.attn_output.weight q4_K +blk.47.attn_v.weight q4_K +blk.47.ffn_down.weight q4_K +blk.48.attn_qkv.weight q4_K +blk.48.ffn_down.weight q4_K +blk.49.attn_qkv.weight q4_K +blk.49.ffn_down.weight q4_K +blk.50.attn_qkv.weight q4_K +blk.50.ffn_down.weight q4_K +blk.51.attn_output.weight q4_K +blk.51.attn_v.weight q4_K +blk.51.ffn_down.weight q4_K +blk.52.attn_qkv.weight q4_K +blk.52.ffn_down.weight q4_K +blk.53.attn_qkv.weight q4_K +blk.53.ffn_down.weight q4_K +blk.54.attn_qkv.weight q4_K +blk.54.ffn_down.weight q4_K +blk.55.attn_output.weight q4_K +blk.55.attn_v.weight q4_K +blk.55.ffn_down.weight q4_K +blk.56.attn_qkv.weight q4_K +blk.56.ffn_down.weight q4_K +blk.57.attn_qkv.weight q4_K +blk.57.ffn_down.weight q4_K +blk.58.attn_qkv.weight q4_K +blk.58.ffn_down.weight q4_K +blk.59.attn_output.weight q4_K +blk.59.attn_v.weight q4_K +blk.59.ffn_down.weight q4_K +blk.60.attn_qkv.weight q4_K +blk.60.ffn_down.weight q4_K +blk.61.attn_qkv.weight q4_K +blk.61.ffn_down.weight q4_K +blk.62.attn_qkv.weight q4_K +blk.62.ffn_down.weight q4_K +blk.63.attn_output.weight q4_K +blk.63.attn_v.weight q4_K +blk.63.ffn_down.weight q4_K + +[Q3_K_L] q3_K +output.weight q6_K +blk.0.attn_qkv.weight q4_K +blk.0.ffn_down.weight q5_K +blk.1.attn_qkv.weight q4_K +blk.1.ffn_down.weight q5_K +blk.2.attn_qkv.weight q4_K +blk.2.ffn_down.weight q5_K +blk.3.attn_output.weight q5_K +blk.3.attn_v.weight q5_K +blk.3.ffn_down.weight q5_K +blk.4.attn_qkv.weight q4_K +blk.4.ffn_down.weight q5_K +blk.5.attn_qkv.weight q4_K +blk.5.ffn_down.weight q5_K +blk.6.attn_qkv.weight q4_K +blk.6.ffn_down.weight q5_K +blk.7.attn_output.weight q5_K +blk.7.attn_v.weight q5_K +blk.7.ffn_down.weight q5_K +blk.8.attn_qkv.weight q4_K +blk.8.ffn_down.weight q5_K +blk.9.attn_qkv.weight q4_K +blk.9.ffn_down.weight q5_K +blk.10.attn_qkv.weight q4_K +blk.10.ffn_down.weight q5_K +blk.11.attn_output.weight q5_K +blk.11.attn_v.weight q5_K +blk.11.ffn_down.weight q5_K +blk.12.attn_qkv.weight q4_K +blk.12.ffn_down.weight q5_K +blk.13.attn_qkv.weight q4_K +blk.13.ffn_down.weight q5_K +blk.14.attn_qkv.weight q4_K +blk.14.ffn_down.weight q5_K +blk.15.attn_output.weight q5_K +blk.15.attn_v.weight q5_K +blk.15.ffn_down.weight q5_K +blk.16.attn_qkv.weight q4_K +blk.16.ffn_down.weight q5_K +blk.17.attn_qkv.weight q4_K +blk.17.ffn_down.weight q5_K +blk.18.attn_qkv.weight q4_K +blk.18.ffn_down.weight q5_K +blk.19.attn_output.weight q5_K +blk.19.attn_v.weight q5_K +blk.19.ffn_down.weight q5_K +blk.20.attn_qkv.weight q4_K +blk.20.ffn_down.weight q5_K +blk.21.attn_qkv.weight q4_K +blk.21.ffn_down.weight q5_K +blk.22.attn_qkv.weight q4_K +blk.22.ffn_down.weight q5_K +blk.23.attn_output.weight q5_K +blk.23.attn_v.weight q5_K +blk.23.ffn_down.weight q5_K +blk.24.attn_qkv.weight q4_K +blk.24.ffn_down.weight q5_K +blk.25.attn_qkv.weight q4_K +blk.25.ffn_down.weight q5_K +blk.26.attn_qkv.weight q4_K +blk.26.ffn_down.weight q5_K +blk.27.attn_output.weight q5_K +blk.27.attn_v.weight q5_K +blk.27.ffn_down.weight q5_K +blk.28.attn_qkv.weight q4_K +blk.28.ffn_down.weight q5_K +blk.29.attn_qkv.weight q4_K +blk.29.ffn_down.weight q5_K +blk.30.attn_qkv.weight q4_K +blk.30.ffn_down.weight q5_K +blk.31.attn_output.weight q5_K +blk.31.attn_v.weight q5_K +blk.31.ffn_down.weight q5_K +blk.32.attn_qkv.weight q4_K +blk.32.ffn_down.weight q5_K +blk.33.attn_qkv.weight q4_K +blk.33.ffn_down.weight q5_K +blk.34.attn_qkv.weight q4_K +blk.34.ffn_down.weight q5_K +blk.35.attn_output.weight q5_K +blk.35.attn_v.weight q5_K +blk.35.ffn_down.weight q5_K +blk.36.attn_qkv.weight q4_K +blk.36.ffn_down.weight q5_K +blk.37.attn_qkv.weight q4_K +blk.37.ffn_down.weight q5_K +blk.38.attn_qkv.weight q4_K +blk.38.ffn_down.weight q5_K +blk.39.attn_output.weight q5_K +blk.39.attn_v.weight q5_K +blk.39.ffn_down.weight q5_K +blk.40.attn_qkv.weight q4_K +blk.40.ffn_down.weight q5_K +blk.41.attn_qkv.weight q4_K +blk.41.ffn_down.weight q5_K +blk.42.attn_qkv.weight q4_K +blk.42.ffn_down.weight q5_K +blk.43.attn_output.weight q5_K +blk.43.attn_v.weight q5_K +blk.43.ffn_down.weight q5_K +blk.44.attn_qkv.weight q4_K +blk.44.ffn_down.weight q5_K +blk.45.attn_qkv.weight q4_K +blk.45.ffn_down.weight q5_K +blk.46.attn_qkv.weight q4_K +blk.46.ffn_down.weight q5_K +blk.47.attn_output.weight q5_K +blk.47.attn_v.weight q5_K +blk.47.ffn_down.weight q5_K +blk.48.attn_qkv.weight q4_K +blk.48.ffn_down.weight q5_K +blk.49.attn_qkv.weight q4_K +blk.49.ffn_down.weight q5_K +blk.50.attn_qkv.weight q4_K +blk.50.ffn_down.weight q5_K +blk.51.attn_output.weight q5_K +blk.51.attn_v.weight q5_K +blk.51.ffn_down.weight q5_K +blk.52.attn_qkv.weight q4_K +blk.52.ffn_down.weight q5_K +blk.53.attn_qkv.weight q4_K +blk.53.ffn_down.weight q5_K +blk.54.attn_qkv.weight q4_K +blk.54.ffn_down.weight q5_K +blk.55.attn_output.weight q5_K +blk.55.attn_v.weight q5_K +blk.55.ffn_down.weight q5_K +blk.56.attn_qkv.weight q4_K +blk.56.ffn_down.weight q5_K +blk.57.attn_qkv.weight q4_K +blk.57.ffn_down.weight q5_K +blk.58.attn_qkv.weight q4_K +blk.58.ffn_down.weight q5_K +blk.59.attn_output.weight q5_K +blk.59.attn_v.weight q5_K +blk.59.ffn_down.weight q5_K +blk.60.attn_qkv.weight q4_K +blk.60.ffn_down.weight q5_K +blk.61.attn_qkv.weight q4_K +blk.61.ffn_down.weight q5_K +blk.62.attn_qkv.weight q4_K +blk.62.ffn_down.weight q5_K +blk.63.attn_output.weight q5_K +blk.63.attn_v.weight q5_K +blk.63.ffn_down.weight q5_K + +[Q4_K_S] q4_K +output.weight q6_K +blk.0.ffn_down.weight q5_K +blk.1.ffn_down.weight q5_K +blk.2.ffn_down.weight q5_K +blk.3.attn_v.weight q5_K +blk.3.ffn_down.weight q5_K +blk.4.ffn_down.weight q5_K +blk.5.ffn_down.weight q5_K +blk.6.ffn_down.weight q5_K +blk.7.attn_v.weight q5_K +blk.7.ffn_down.weight q5_K +blk.11.attn_v.weight q5_K +blk.15.attn_v.weight q5_K + +[Q4_K_M] q4_K +output.weight q6_K +blk.0.attn_qkv.weight q5_K +blk.0.ffn_down.weight q6_K +blk.1.attn_qkv.weight q5_K +blk.1.ffn_down.weight q6_K +blk.2.attn_qkv.weight q5_K +blk.2.ffn_down.weight q6_K +blk.3.attn_v.weight q6_K +blk.3.ffn_down.weight q6_K +blk.4.attn_qkv.weight q5_K +blk.4.ffn_down.weight q6_K +blk.5.attn_qkv.weight q5_K +blk.5.ffn_down.weight q6_K +blk.6.attn_qkv.weight q5_K +blk.6.ffn_down.weight q6_K +blk.7.attn_v.weight q6_K +blk.7.ffn_down.weight q6_K +blk.8.attn_qkv.weight q5_K +blk.9.attn_qkv.weight q5_K +blk.10.attn_qkv.weight q5_K +blk.10.ffn_down.weight q6_K +blk.11.attn_v.weight q6_K +blk.12.attn_qkv.weight q5_K +blk.13.attn_qkv.weight q5_K +blk.13.ffn_down.weight q6_K +blk.14.attn_qkv.weight q5_K +blk.15.attn_v.weight q6_K +blk.16.attn_qkv.weight q5_K +blk.16.ffn_down.weight q6_K +blk.17.attn_qkv.weight q5_K +blk.18.attn_qkv.weight q5_K +blk.19.attn_v.weight q6_K +blk.19.ffn_down.weight q6_K +blk.20.attn_qkv.weight q5_K +blk.21.attn_qkv.weight q5_K +blk.22.attn_qkv.weight q5_K +blk.22.ffn_down.weight q6_K +blk.23.attn_v.weight q6_K +blk.24.attn_qkv.weight q5_K +blk.25.attn_qkv.weight q5_K +blk.25.ffn_down.weight q6_K +blk.26.attn_qkv.weight q5_K +blk.27.attn_v.weight q6_K +blk.28.attn_qkv.weight q5_K +blk.28.ffn_down.weight q6_K +blk.29.attn_qkv.weight q5_K +blk.30.attn_qkv.weight q5_K +blk.31.attn_v.weight q6_K +blk.31.ffn_down.weight q6_K +blk.32.attn_qkv.weight q5_K +blk.33.attn_qkv.weight q5_K +blk.34.attn_qkv.weight q5_K +blk.34.ffn_down.weight q6_K +blk.36.attn_qkv.weight q5_K +blk.37.attn_qkv.weight q5_K +blk.37.ffn_down.weight q6_K +blk.38.attn_qkv.weight q5_K +blk.40.attn_qkv.weight q5_K +blk.40.ffn_down.weight q6_K +blk.41.attn_qkv.weight q5_K +blk.42.attn_qkv.weight q5_K +blk.43.attn_v.weight q6_K +blk.43.ffn_down.weight q6_K +blk.44.attn_qkv.weight q5_K +blk.45.attn_qkv.weight q5_K +blk.46.attn_qkv.weight q5_K +blk.46.ffn_down.weight q6_K +blk.48.attn_qkv.weight q5_K +blk.49.attn_qkv.weight q5_K +blk.49.ffn_down.weight q6_K +blk.50.attn_qkv.weight q5_K +blk.52.attn_qkv.weight q5_K +blk.52.ffn_down.weight q6_K +blk.53.attn_qkv.weight q5_K +blk.54.attn_qkv.weight q5_K +blk.55.attn_v.weight q6_K +blk.55.ffn_down.weight q6_K +blk.56.attn_qkv.weight q5_K +blk.56.ffn_down.weight q6_K +blk.57.attn_qkv.weight q5_K +blk.57.ffn_down.weight q6_K +blk.58.attn_qkv.weight q5_K +blk.58.ffn_down.weight q6_K +blk.59.ffn_down.weight q6_K +blk.60.attn_qkv.weight q5_K +blk.60.ffn_down.weight q6_K +blk.61.attn_qkv.weight q5_K +blk.61.ffn_down.weight q6_K +blk.62.attn_qkv.weight q5_K +blk.62.ffn_down.weight q6_K +blk.63.ffn_down.weight q6_K + +[Q5_K_S] q5_K +output.weight q6_K + +[Q5_K_M] q5_K +output.weight q6_K +blk.0.attn_qkv.weight q6_K +blk.0.ffn_down.weight q6_K +blk.1.attn_qkv.weight q6_K +blk.1.ffn_down.weight q6_K +blk.2.attn_qkv.weight q6_K +blk.2.ffn_down.weight q6_K +blk.3.attn_v.weight q6_K +blk.3.ffn_down.weight q6_K +blk.4.attn_qkv.weight q6_K +blk.4.ffn_down.weight q6_K +blk.5.attn_qkv.weight q6_K +blk.5.ffn_down.weight q6_K +blk.6.attn_qkv.weight q6_K +blk.6.ffn_down.weight q6_K +blk.7.attn_v.weight q6_K +blk.7.ffn_down.weight q6_K +blk.8.attn_qkv.weight q6_K +blk.9.attn_qkv.weight q6_K +blk.10.attn_qkv.weight q6_K +blk.10.ffn_down.weight q6_K +blk.11.attn_v.weight q6_K +blk.12.attn_qkv.weight q6_K +blk.13.attn_qkv.weight q6_K +blk.13.ffn_down.weight q6_K +blk.14.attn_qkv.weight q6_K +blk.15.attn_v.weight q6_K +blk.16.attn_qkv.weight q6_K +blk.16.ffn_down.weight q6_K +blk.17.attn_qkv.weight q6_K +blk.18.attn_qkv.weight q6_K +blk.19.attn_v.weight q6_K +blk.19.ffn_down.weight q6_K +blk.20.attn_qkv.weight q6_K +blk.21.attn_qkv.weight q6_K +blk.22.attn_qkv.weight q6_K +blk.22.ffn_down.weight q6_K +blk.23.attn_v.weight q6_K +blk.24.attn_qkv.weight q6_K +blk.25.attn_qkv.weight q6_K +blk.25.ffn_down.weight q6_K +blk.26.attn_qkv.weight q6_K +blk.27.attn_v.weight q6_K +blk.28.attn_qkv.weight q6_K +blk.28.ffn_down.weight q6_K +blk.29.attn_qkv.weight q6_K +blk.30.attn_qkv.weight q6_K +blk.31.attn_v.weight q6_K +blk.31.ffn_down.weight q6_K +blk.32.attn_qkv.weight q6_K +blk.33.attn_qkv.weight q6_K +blk.34.attn_qkv.weight q6_K +blk.34.ffn_down.weight q6_K +blk.36.attn_qkv.weight q6_K +blk.37.attn_qkv.weight q6_K +blk.37.ffn_down.weight q6_K +blk.38.attn_qkv.weight q6_K +blk.40.attn_qkv.weight q6_K +blk.40.ffn_down.weight q6_K +blk.41.attn_qkv.weight q6_K +blk.42.attn_qkv.weight q6_K +blk.43.attn_v.weight q6_K +blk.43.ffn_down.weight q6_K +blk.44.attn_qkv.weight q6_K +blk.45.attn_qkv.weight q6_K +blk.46.attn_qkv.weight q6_K +blk.46.ffn_down.weight q6_K +blk.48.attn_qkv.weight q6_K +blk.49.attn_qkv.weight q6_K +blk.49.ffn_down.weight q6_K +blk.50.attn_qkv.weight q6_K +blk.52.attn_qkv.weight q6_K +blk.52.ffn_down.weight q6_K +blk.53.attn_qkv.weight q6_K +blk.54.attn_qkv.weight q6_K +blk.55.attn_v.weight q6_K +blk.55.ffn_down.weight q6_K +blk.56.attn_qkv.weight q6_K +blk.56.ffn_down.weight q6_K +blk.57.attn_qkv.weight q6_K +blk.57.ffn_down.weight q6_K +blk.58.attn_qkv.weight q6_K +blk.58.ffn_down.weight q6_K +blk.59.ffn_down.weight q6_K +blk.60.attn_qkv.weight q6_K +blk.60.ffn_down.weight q6_K +blk.61.attn_qkv.weight q6_K +blk.61.ffn_down.weight q6_K +blk.62.attn_qkv.weight q6_K +blk.62.ffn_down.weight q6_K +blk.63.ffn_down.weight q6_K + +[Q6_K] q6_K + +[IQ2_XXS] iq2_xxs +output.weight q5_K +token_embd.weight q2_K +blk.0.ffn_down.weight q2_K +blk.1.ffn_down.weight q2_K +blk.2.ffn_down.weight q2_K +blk.3.attn_v.weight q4_K +blk.3.ffn_down.weight q2_K +blk.4.ffn_down.weight q2_K +blk.5.ffn_down.weight q2_K +blk.6.ffn_down.weight q2_K +blk.7.attn_v.weight q4_K +blk.7.ffn_down.weight q2_K +blk.11.attn_v.weight q4_K +blk.15.attn_v.weight q4_K +blk.19.attn_v.weight q4_K +blk.23.attn_v.weight q4_K +blk.27.attn_v.weight q4_K +blk.31.attn_v.weight q4_K +blk.35.attn_v.weight q4_K +blk.39.attn_v.weight q4_K +blk.43.attn_v.weight q4_K +blk.47.attn_v.weight q4_K +blk.51.attn_v.weight q4_K +blk.55.attn_v.weight q4_K +blk.59.attn_v.weight q4_K +blk.63.attn_v.weight q4_K + +[IQ2_XS] iq2_xs +output.weight q5_K +token_embd.weight q2_K +blk.0.ffn_down.weight q2_K +blk.1.ffn_down.weight q2_K +blk.2.ffn_down.weight q2_K +blk.3.attn_v.weight q4_K +blk.3.ffn_down.weight q2_K +blk.4.ffn_down.weight q2_K +blk.5.ffn_down.weight q2_K +blk.6.ffn_down.weight q2_K +blk.7.attn_v.weight q4_K +blk.7.ffn_down.weight q2_K +blk.11.attn_v.weight q4_K +blk.15.attn_v.weight q4_K +blk.19.attn_v.weight q4_K +blk.23.attn_v.weight q4_K +blk.27.attn_v.weight q4_K +blk.31.attn_v.weight q4_K +blk.35.attn_v.weight q4_K +blk.39.attn_v.weight q4_K +blk.43.attn_v.weight q4_K +blk.47.attn_v.weight q4_K +blk.51.attn_v.weight q4_K +blk.55.attn_v.weight q4_K +blk.59.attn_v.weight q4_K +blk.63.attn_v.weight q4_K + +[Q2_K_S] q2_K +output.weight q6_K +blk.0.ffn_down.weight q4_K +blk.1.ffn_down.weight q4_K +blk.2.ffn_down.weight q4_K +blk.3.attn_v.weight q4_K +blk.3.ffn_down.weight q4_K +blk.4.ffn_down.weight q4_K +blk.5.ffn_down.weight q4_K +blk.6.ffn_down.weight q4_K +blk.7.attn_v.weight q4_K +blk.7.ffn_down.weight q4_K +blk.11.attn_v.weight q4_K +blk.15.attn_v.weight q4_K +blk.19.attn_v.weight q4_K +blk.23.attn_v.weight q4_K +blk.27.attn_v.weight q4_K +blk.31.attn_v.weight q4_K +blk.35.attn_v.weight q4_K +blk.39.attn_v.weight q4_K +blk.43.attn_v.weight q4_K +blk.47.attn_v.weight q4_K +blk.51.attn_v.weight q4_K +blk.55.attn_v.weight q4_K +blk.59.attn_v.weight q4_K +blk.63.attn_v.weight q4_K + +[IQ3_XS] iq3_s +output.weight q6_K +blk.3.attn_k.weight iq3_xxs +blk.3.attn_q.weight iq3_xxs +blk.3.attn_v.weight q4_K +blk.7.attn_k.weight iq3_xxs +blk.7.attn_q.weight iq3_xxs +blk.7.attn_v.weight q4_K +blk.8.ffn_gate.weight iq3_xxs +blk.8.ffn_up.weight iq3_xxs +blk.9.ffn_gate.weight iq3_xxs +blk.9.ffn_up.weight iq3_xxs +blk.10.ffn_gate.weight iq3_xxs +blk.10.ffn_up.weight iq3_xxs +blk.11.attn_k.weight iq3_xxs +blk.11.attn_q.weight iq3_xxs +blk.11.attn_v.weight q4_K +blk.11.ffn_gate.weight iq3_xxs +blk.11.ffn_up.weight iq3_xxs +blk.12.ffn_gate.weight iq3_xxs +blk.12.ffn_up.weight iq3_xxs +blk.13.ffn_gate.weight iq3_xxs +blk.13.ffn_up.weight iq3_xxs +blk.14.ffn_gate.weight iq3_xxs +blk.14.ffn_up.weight iq3_xxs +blk.15.attn_k.weight iq3_xxs +blk.15.attn_q.weight iq3_xxs +blk.15.attn_v.weight q4_K +blk.15.ffn_gate.weight iq3_xxs +blk.15.ffn_up.weight iq3_xxs +blk.16.ffn_gate.weight iq3_xxs +blk.16.ffn_up.weight iq3_xxs +blk.17.ffn_gate.weight iq3_xxs +blk.17.ffn_up.weight iq3_xxs +blk.18.ffn_gate.weight iq3_xxs +blk.18.ffn_up.weight iq3_xxs +blk.19.attn_k.weight iq3_xxs +blk.19.attn_q.weight iq3_xxs +blk.19.attn_v.weight q4_K +blk.19.ffn_gate.weight iq3_xxs +blk.19.ffn_up.weight iq3_xxs +blk.20.ffn_gate.weight iq3_xxs +blk.20.ffn_up.weight iq3_xxs +blk.21.ffn_gate.weight iq3_xxs +blk.21.ffn_up.weight iq3_xxs +blk.22.ffn_gate.weight iq3_xxs +blk.22.ffn_up.weight iq3_xxs +blk.23.attn_k.weight iq3_xxs +blk.23.attn_q.weight iq3_xxs +blk.23.attn_v.weight q4_K +blk.23.ffn_gate.weight iq3_xxs +blk.23.ffn_up.weight iq3_xxs +blk.24.ffn_gate.weight iq3_xxs +blk.24.ffn_up.weight iq3_xxs +blk.25.ffn_gate.weight iq3_xxs +blk.25.ffn_up.weight iq3_xxs +blk.26.ffn_gate.weight iq3_xxs +blk.26.ffn_up.weight iq3_xxs +blk.27.attn_k.weight iq3_xxs +blk.27.attn_q.weight iq3_xxs +blk.27.attn_v.weight q4_K +blk.27.ffn_gate.weight iq3_xxs +blk.27.ffn_up.weight iq3_xxs +blk.28.ffn_gate.weight iq3_xxs +blk.28.ffn_up.weight iq3_xxs +blk.29.ffn_gate.weight iq3_xxs +blk.29.ffn_up.weight iq3_xxs +blk.30.ffn_gate.weight iq3_xxs +blk.30.ffn_up.weight iq3_xxs +blk.31.attn_k.weight iq3_xxs +blk.31.attn_q.weight iq3_xxs +blk.31.attn_v.weight q4_K +blk.31.ffn_gate.weight iq3_xxs +blk.31.ffn_up.weight iq3_xxs +blk.32.ffn_gate.weight iq3_xxs +blk.32.ffn_up.weight iq3_xxs +blk.33.ffn_gate.weight iq3_xxs +blk.33.ffn_up.weight iq3_xxs +blk.34.ffn_gate.weight iq3_xxs +blk.34.ffn_up.weight iq3_xxs +blk.35.attn_k.weight iq3_xxs +blk.35.attn_q.weight iq3_xxs +blk.35.attn_v.weight q4_K +blk.35.ffn_gate.weight iq3_xxs +blk.35.ffn_up.weight iq3_xxs +blk.36.ffn_gate.weight iq3_xxs +blk.36.ffn_up.weight iq3_xxs +blk.37.ffn_gate.weight iq3_xxs +blk.37.ffn_up.weight iq3_xxs +blk.38.ffn_gate.weight iq3_xxs +blk.38.ffn_up.weight iq3_xxs +blk.39.attn_k.weight iq3_xxs +blk.39.attn_q.weight iq3_xxs +blk.39.attn_v.weight q4_K +blk.39.ffn_gate.weight iq3_xxs +blk.39.ffn_up.weight iq3_xxs +blk.40.ffn_gate.weight iq3_xxs +blk.40.ffn_up.weight iq3_xxs +blk.41.ffn_gate.weight iq3_xxs +blk.41.ffn_up.weight iq3_xxs +blk.42.ffn_gate.weight iq3_xxs +blk.42.ffn_up.weight iq3_xxs +blk.43.attn_k.weight iq3_xxs +blk.43.attn_q.weight iq3_xxs +blk.43.attn_v.weight q4_K +blk.43.ffn_gate.weight iq3_xxs +blk.43.ffn_up.weight iq3_xxs +blk.44.ffn_gate.weight iq3_xxs +blk.44.ffn_up.weight iq3_xxs +blk.45.ffn_gate.weight iq3_xxs +blk.45.ffn_up.weight iq3_xxs +blk.46.ffn_gate.weight iq3_xxs +blk.46.ffn_up.weight iq3_xxs +blk.47.attn_k.weight iq3_xxs +blk.47.attn_q.weight iq3_xxs +blk.47.attn_v.weight q4_K +blk.47.ffn_gate.weight iq3_xxs +blk.47.ffn_up.weight iq3_xxs +blk.48.ffn_gate.weight iq3_xxs +blk.48.ffn_up.weight iq3_xxs +blk.49.ffn_gate.weight iq3_xxs +blk.49.ffn_up.weight iq3_xxs +blk.50.ffn_gate.weight iq3_xxs +blk.50.ffn_up.weight iq3_xxs +blk.51.attn_k.weight iq3_xxs +blk.51.attn_q.weight iq3_xxs +blk.51.attn_v.weight q4_K +blk.51.ffn_gate.weight iq3_xxs +blk.51.ffn_up.weight iq3_xxs +blk.52.ffn_gate.weight iq3_xxs +blk.52.ffn_up.weight iq3_xxs +blk.53.ffn_gate.weight iq3_xxs +blk.53.ffn_up.weight iq3_xxs +blk.54.ffn_gate.weight iq3_xxs +blk.54.ffn_up.weight iq3_xxs +blk.55.attn_k.weight iq3_xxs +blk.55.attn_q.weight iq3_xxs +blk.55.attn_v.weight q4_K +blk.55.ffn_gate.weight iq3_xxs +blk.55.ffn_up.weight iq3_xxs +blk.59.attn_k.weight iq3_xxs +blk.59.attn_q.weight iq3_xxs +blk.59.attn_v.weight q4_K +blk.63.attn_k.weight iq3_xxs +blk.63.attn_q.weight iq3_xxs +blk.63.attn_v.weight q4_K + +[IQ3_XXS] iq3_xxs +output.weight q5_K +token_embd.weight iq3_s +blk.0.ffn_down.weight q4_K +blk.1.ffn_down.weight q4_K +blk.2.ffn_down.weight q4_K +blk.3.attn_k.weight iq2_s +blk.3.attn_output.weight iq3_s +blk.3.attn_q.weight iq2_s +blk.3.attn_v.weight q4_K +blk.3.ffn_down.weight q4_K +blk.4.ffn_down.weight q4_K +blk.5.ffn_down.weight q4_K +blk.6.ffn_down.weight q4_K +blk.7.attn_k.weight iq2_s +blk.7.attn_output.weight iq3_s +blk.7.attn_q.weight iq2_s +blk.7.attn_v.weight q4_K +blk.7.ffn_down.weight q4_K +blk.8.ffn_down.weight q3_K +blk.9.ffn_down.weight q3_K +blk.10.ffn_down.weight q3_K +blk.11.attn_k.weight iq2_s +blk.11.attn_output.weight iq3_s +blk.11.attn_q.weight iq2_s +blk.11.attn_v.weight q4_K +blk.11.ffn_down.weight q3_K +blk.12.ffn_down.weight q3_K +blk.13.ffn_down.weight q3_K +blk.14.ffn_down.weight q3_K +blk.15.attn_k.weight iq2_s +blk.15.attn_output.weight iq3_s +blk.15.attn_q.weight iq2_s +blk.15.attn_v.weight q4_K +blk.15.ffn_down.weight q3_K +blk.16.ffn_down.weight q3_K +blk.17.ffn_down.weight q3_K +blk.18.ffn_down.weight q3_K +blk.19.attn_k.weight iq2_s +blk.19.attn_output.weight iq3_s +blk.19.attn_q.weight iq2_s +blk.19.attn_v.weight q4_K +blk.19.ffn_down.weight q3_K +blk.20.ffn_down.weight q3_K +blk.21.ffn_down.weight q3_K +blk.22.ffn_down.weight q3_K +blk.23.attn_k.weight iq2_s +blk.23.attn_output.weight iq3_s +blk.23.attn_q.weight iq2_s +blk.23.attn_v.weight q4_K +blk.23.ffn_down.weight q3_K +blk.24.ffn_down.weight q3_K +blk.25.ffn_down.weight q3_K +blk.26.ffn_down.weight q3_K +blk.27.attn_k.weight iq2_s +blk.27.attn_output.weight iq3_s +blk.27.attn_q.weight iq2_s +blk.27.attn_v.weight q4_K +blk.27.ffn_down.weight q3_K +blk.28.ffn_down.weight q3_K +blk.29.ffn_down.weight q3_K +blk.30.ffn_down.weight q3_K +blk.31.attn_k.weight iq2_s +blk.31.attn_output.weight iq3_s +blk.31.attn_q.weight iq2_s +blk.31.attn_v.weight q4_K +blk.31.ffn_down.weight q3_K +blk.32.ffn_down.weight q3_K +blk.33.ffn_down.weight q3_K +blk.34.ffn_down.weight q3_K +blk.35.attn_k.weight iq2_s +blk.35.attn_output.weight iq3_s +blk.35.attn_q.weight iq2_s +blk.35.attn_v.weight q4_K +blk.35.ffn_down.weight q3_K +blk.36.ffn_down.weight q3_K +blk.37.ffn_down.weight q3_K +blk.38.ffn_down.weight q3_K +blk.39.attn_k.weight iq2_s +blk.39.attn_output.weight iq3_s +blk.39.attn_q.weight iq2_s +blk.39.attn_v.weight q4_K +blk.39.ffn_down.weight q3_K +blk.40.ffn_down.weight q3_K +blk.41.ffn_down.weight q3_K +blk.42.ffn_down.weight q3_K +blk.43.attn_k.weight iq2_s +blk.43.attn_output.weight iq3_s +blk.43.attn_q.weight iq2_s +blk.43.attn_v.weight q4_K +blk.43.ffn_down.weight q3_K +blk.44.ffn_down.weight q3_K +blk.45.ffn_down.weight q3_K +blk.46.ffn_down.weight q3_K +blk.47.attn_k.weight iq2_s +blk.47.attn_output.weight iq3_s +blk.47.attn_q.weight iq2_s +blk.47.attn_v.weight q4_K +blk.47.ffn_down.weight q3_K +blk.48.ffn_down.weight q3_K +blk.49.ffn_down.weight q3_K +blk.50.ffn_down.weight q3_K +blk.51.attn_k.weight iq2_s +blk.51.attn_output.weight iq3_s +blk.51.attn_q.weight iq2_s +blk.51.attn_v.weight q4_K +blk.51.ffn_down.weight q3_K +blk.52.ffn_down.weight q3_K +blk.53.ffn_down.weight q3_K +blk.54.ffn_down.weight q3_K +blk.55.attn_k.weight iq2_s +blk.55.attn_output.weight iq3_s +blk.55.attn_q.weight iq2_s +blk.55.attn_v.weight q4_K +blk.55.ffn_down.weight q3_K +blk.56.ffn_down.weight q3_K +blk.57.ffn_down.weight q3_K +blk.58.ffn_down.weight q3_K +blk.59.attn_k.weight iq2_s +blk.59.attn_output.weight iq3_s +blk.59.attn_q.weight iq2_s +blk.59.attn_v.weight q4_K +blk.59.ffn_down.weight q3_K +blk.60.ffn_down.weight q3_K +blk.61.ffn_down.weight q3_K +blk.62.ffn_down.weight q3_K +blk.63.attn_k.weight iq2_s +blk.63.attn_output.weight iq3_s +blk.63.attn_q.weight iq2_s +blk.63.attn_v.weight q4_K +blk.63.ffn_down.weight q3_K + +[IQ1_S] iq1_s +output.weight q5_K +token_embd.weight q2_K +blk.0.ffn_down.weight q2_K +blk.1.ffn_down.weight q2_K +blk.2.ffn_down.weight q2_K +blk.3.attn_output.weight iq2_xxs +blk.3.attn_v.weight q4_K +blk.3.ffn_down.weight q2_K +blk.4.ffn_down.weight q2_K +blk.5.ffn_down.weight q2_K +blk.6.ffn_down.weight q2_K +blk.7.attn_output.weight iq2_xxs +blk.7.attn_v.weight q4_K +blk.7.ffn_down.weight q2_K +blk.11.attn_output.weight iq2_xxs +blk.11.attn_v.weight q4_K +blk.15.attn_output.weight iq2_xxs +blk.15.attn_v.weight q4_K +blk.19.attn_output.weight iq2_xxs +blk.19.attn_v.weight q4_K +blk.23.attn_output.weight iq2_xxs +blk.23.attn_v.weight q4_K +blk.27.attn_output.weight iq2_xxs +blk.27.attn_v.weight q4_K +blk.31.attn_output.weight iq2_xxs +blk.31.attn_v.weight q4_K +blk.35.attn_output.weight iq2_xxs +blk.35.attn_v.weight q4_K +blk.39.attn_output.weight iq2_xxs +blk.39.attn_v.weight q4_K +blk.43.attn_output.weight iq2_xxs +blk.43.attn_v.weight q4_K +blk.47.attn_output.weight iq2_xxs +blk.47.attn_v.weight q4_K +blk.51.attn_output.weight iq2_xxs +blk.51.attn_v.weight q4_K +blk.55.attn_output.weight iq2_xxs +blk.55.attn_v.weight q4_K +blk.59.attn_output.weight iq2_xxs +blk.59.attn_v.weight q4_K +blk.63.attn_output.weight iq2_xxs +blk.63.attn_v.weight q4_K + +[IQ4_NL] iq4_nl +output.weight q6_K +blk.0.ffn_down.weight q5_K +blk.1.ffn_down.weight q5_K +blk.2.ffn_down.weight q5_K +blk.3.attn_v.weight q5_K +blk.3.ffn_down.weight q5_K +blk.4.ffn_down.weight q5_K +blk.5.ffn_down.weight q5_K +blk.6.ffn_down.weight q5_K +blk.7.attn_v.weight q5_K +blk.7.ffn_down.weight q5_K +blk.11.attn_v.weight q5_K +blk.15.attn_v.weight q5_K +blk.19.attn_v.weight q5_K +blk.23.attn_v.weight q5_K +blk.27.attn_v.weight q5_K +blk.31.attn_v.weight q5_K +blk.35.attn_v.weight q5_K +blk.39.attn_v.weight q5_K +blk.43.attn_v.weight q5_K +blk.47.attn_v.weight q5_K +blk.51.attn_v.weight q5_K +blk.55.attn_v.weight q5_K +blk.59.attn_v.weight q5_K +blk.63.attn_v.weight q5_K + +[IQ3_S] iq3_s +output.weight q6_K +blk.3.attn_v.weight q4_K +blk.7.attn_v.weight q4_K +blk.11.attn_v.weight q4_K +blk.15.attn_v.weight q4_K +blk.19.attn_v.weight q4_K +blk.23.attn_v.weight q4_K +blk.27.attn_v.weight q4_K +blk.31.attn_v.weight q4_K +blk.35.attn_v.weight q4_K +blk.39.attn_v.weight q4_K +blk.43.attn_v.weight q4_K +blk.47.attn_v.weight q4_K +blk.51.attn_v.weight q4_K +blk.55.attn_v.weight q4_K +blk.59.attn_v.weight q4_K +blk.63.attn_v.weight q4_K + +[IQ3_M] iq3_s +output.weight q6_K +blk.0.attn_qkv.weight q4_K +blk.0.ffn_down.weight q4_K +blk.1.attn_qkv.weight q4_K +blk.1.ffn_down.weight q4_K +blk.2.attn_qkv.weight q4_K +blk.2.ffn_down.weight q4_K +blk.3.attn_output.weight q4_K +blk.3.attn_v.weight q4_K +blk.3.ffn_down.weight q4_K +blk.4.attn_qkv.weight q4_K +blk.4.ffn_down.weight q4_K +blk.5.attn_qkv.weight q4_K +blk.5.ffn_down.weight q4_K +blk.6.attn_qkv.weight q4_K +blk.6.ffn_down.weight q4_K +blk.7.attn_output.weight q4_K +blk.7.attn_v.weight q4_K +blk.7.ffn_down.weight q4_K +blk.8.attn_qkv.weight q4_K +blk.9.attn_qkv.weight q4_K +blk.10.attn_qkv.weight q4_K +blk.11.attn_output.weight q4_K +blk.11.attn_v.weight q4_K +blk.12.attn_qkv.weight q4_K +blk.13.attn_qkv.weight q4_K +blk.14.attn_qkv.weight q4_K +blk.15.attn_output.weight q4_K +blk.15.attn_v.weight q4_K +blk.16.attn_qkv.weight q4_K +blk.17.attn_qkv.weight q4_K +blk.18.attn_qkv.weight q4_K +blk.19.attn_output.weight q4_K +blk.19.attn_v.weight q4_K +blk.20.attn_qkv.weight q4_K +blk.21.attn_qkv.weight q4_K +blk.22.attn_qkv.weight q4_K +blk.23.attn_output.weight q4_K +blk.23.attn_v.weight q4_K +blk.24.attn_qkv.weight q4_K +blk.25.attn_qkv.weight q4_K +blk.26.attn_qkv.weight q4_K +blk.27.attn_output.weight q4_K +blk.27.attn_v.weight q4_K +blk.28.attn_qkv.weight q4_K +blk.29.attn_qkv.weight q4_K +blk.30.attn_qkv.weight q4_K +blk.31.attn_output.weight q4_K +blk.31.attn_v.weight q4_K +blk.32.attn_qkv.weight q4_K +blk.33.attn_qkv.weight q4_K +blk.34.attn_qkv.weight q4_K +blk.35.attn_output.weight q4_K +blk.35.attn_v.weight q4_K +blk.36.attn_qkv.weight q4_K +blk.37.attn_qkv.weight q4_K +blk.38.attn_qkv.weight q4_K +blk.39.attn_output.weight q4_K +blk.39.attn_v.weight q4_K +blk.40.attn_qkv.weight q4_K +blk.41.attn_qkv.weight q4_K +blk.42.attn_qkv.weight q4_K +blk.43.attn_output.weight q4_K +blk.43.attn_v.weight q4_K +blk.44.attn_qkv.weight q4_K +blk.45.attn_qkv.weight q4_K +blk.46.attn_qkv.weight q4_K +blk.47.attn_output.weight q4_K +blk.47.attn_v.weight q4_K +blk.48.attn_qkv.weight q4_K +blk.49.attn_qkv.weight q4_K +blk.50.attn_qkv.weight q4_K +blk.51.attn_output.weight q4_K +blk.51.attn_v.weight q4_K +blk.52.attn_qkv.weight q4_K +blk.53.attn_qkv.weight q4_K +blk.54.attn_qkv.weight q4_K +blk.55.attn_output.weight q4_K +blk.55.attn_v.weight q4_K +blk.56.attn_qkv.weight q4_K +blk.57.attn_qkv.weight q4_K +blk.58.attn_qkv.weight q4_K +blk.59.attn_output.weight q4_K +blk.59.attn_v.weight q4_K +blk.60.attn_qkv.weight q4_K +blk.61.attn_qkv.weight q4_K +blk.62.attn_qkv.weight q4_K +blk.63.attn_output.weight q4_K +blk.63.attn_v.weight q4_K + +[IQ2_S] iq2_xs +output.weight q5_K +token_embd.weight iq3_s +blk.0.ffn_down.weight iq3_s +blk.1.ffn_down.weight iq3_s +blk.2.ffn_down.weight iq3_s +blk.3.attn_output.weight iq3_s +blk.3.attn_v.weight q4_K +blk.3.ffn_down.weight iq3_s +blk.4.ffn_down.weight iq3_s +blk.5.ffn_down.weight iq3_s +blk.6.ffn_down.weight iq3_s +blk.7.attn_output.weight iq3_s +blk.7.attn_v.weight q4_K +blk.7.ffn_down.weight iq3_s +blk.11.attn_output.weight iq3_s +blk.11.attn_v.weight q4_K +blk.15.attn_output.weight iq3_s +blk.15.attn_v.weight q4_K +blk.19.attn_output.weight iq3_s +blk.19.attn_v.weight q4_K +blk.23.attn_output.weight iq3_s +blk.23.attn_v.weight q4_K +blk.27.attn_output.weight iq3_s +blk.27.attn_v.weight q4_K +blk.31.attn_output.weight iq3_s +blk.31.attn_v.weight q4_K +blk.35.attn_output.weight iq3_s +blk.35.attn_v.weight q4_K +blk.39.attn_output.weight iq3_s +blk.39.attn_v.weight q4_K +blk.43.attn_output.weight iq3_s +blk.43.attn_v.weight q4_K +blk.47.attn_output.weight iq3_s +blk.47.attn_v.weight q4_K +blk.51.attn_output.weight iq3_s +blk.51.attn_v.weight q4_K +blk.55.attn_output.weight iq3_s +blk.55.attn_v.weight q4_K +blk.59.attn_output.weight iq3_s +blk.59.attn_v.weight q4_K +blk.63.attn_output.weight iq3_s +blk.63.attn_v.weight q4_K + +[IQ2_M] iq2_s +output.weight q5_K +token_embd.weight iq3_s +blk.0.ffn_down.weight iq3_s +blk.1.ffn_down.weight iq3_s +blk.2.ffn_down.weight iq3_s +blk.3.attn_output.weight iq3_s +blk.3.attn_v.weight q4_K +blk.3.ffn_down.weight iq3_s +blk.4.ffn_down.weight iq3_s +blk.5.ffn_down.weight iq3_s +blk.6.ffn_down.weight iq3_s +blk.7.attn_output.weight iq3_s +blk.7.attn_v.weight q4_K +blk.7.ffn_down.weight iq3_s +blk.11.attn_output.weight iq3_s +blk.11.attn_v.weight q4_K +blk.15.attn_output.weight iq3_s +blk.15.attn_v.weight q4_K +blk.19.attn_output.weight iq3_s +blk.19.attn_v.weight q4_K +blk.23.attn_output.weight iq3_s +blk.23.attn_v.weight q4_K +blk.27.attn_output.weight iq3_s +blk.27.attn_v.weight q4_K +blk.31.attn_output.weight iq3_s +blk.31.attn_v.weight q4_K +blk.35.attn_output.weight iq3_s +blk.35.attn_v.weight q4_K +blk.39.attn_output.weight iq3_s +blk.39.attn_v.weight q4_K +blk.43.attn_output.weight iq3_s +blk.43.attn_v.weight q4_K +blk.47.attn_output.weight iq3_s +blk.47.attn_v.weight q4_K +blk.51.attn_output.weight iq3_s +blk.51.attn_v.weight q4_K +blk.55.attn_output.weight iq3_s +blk.55.attn_v.weight q4_K +blk.59.attn_output.weight iq3_s +blk.59.attn_v.weight q4_K +blk.63.attn_output.weight iq3_s +blk.63.attn_v.weight q4_K + +[IQ4_XS] iq4_xs +output.weight q6_K +blk.0.ffn_down.weight q5_K +blk.1.ffn_down.weight q5_K +blk.2.ffn_down.weight q5_K +blk.3.attn_v.weight q5_K +blk.3.ffn_down.weight q5_K +blk.4.ffn_down.weight q5_K +blk.5.ffn_down.weight q5_K +blk.6.ffn_down.weight q5_K +blk.7.attn_v.weight q5_K +blk.7.ffn_down.weight q5_K +blk.11.attn_v.weight q5_K +blk.15.attn_v.weight q5_K +blk.19.attn_v.weight q5_K +blk.23.attn_v.weight q5_K +blk.27.attn_v.weight q5_K +blk.31.attn_v.weight q5_K +blk.35.attn_v.weight q5_K +blk.39.attn_v.weight q5_K +blk.43.attn_v.weight q5_K +blk.47.attn_v.weight q5_K +blk.51.attn_v.weight q5_K +blk.55.attn_v.weight q5_K +blk.59.attn_v.weight q5_K +blk.63.attn_v.weight q5_K + +[IQ1_M] iq1_m +output.weight q5_K +token_embd.weight q2_K +blk.0.ffn_down.weight q2_K +blk.1.ffn_down.weight q2_K +blk.2.ffn_down.weight q2_K +blk.3.attn_output.weight iq2_xxs +blk.3.attn_v.weight q4_K +blk.3.ffn_down.weight q2_K +blk.4.ffn_down.weight q2_K +blk.5.ffn_down.weight q2_K +blk.6.ffn_down.weight q2_K +blk.7.attn_output.weight iq2_xxs +blk.7.attn_v.weight q4_K +blk.7.ffn_down.weight q2_K +blk.11.attn_output.weight iq2_xxs +blk.11.attn_v.weight q4_K +blk.15.attn_output.weight iq2_xxs +blk.15.attn_v.weight q4_K +blk.19.attn_output.weight iq2_xxs +blk.19.attn_v.weight q4_K +blk.23.attn_output.weight iq2_xxs +blk.23.attn_v.weight q4_K +blk.27.attn_output.weight iq2_xxs +blk.27.attn_v.weight q4_K +blk.31.attn_output.weight iq2_xxs +blk.31.attn_v.weight q4_K +blk.35.attn_output.weight iq2_xxs +blk.35.attn_v.weight q4_K +blk.39.attn_output.weight iq2_xxs +blk.39.attn_v.weight q4_K +blk.43.attn_output.weight iq2_xxs +blk.43.attn_v.weight q4_K +blk.47.attn_output.weight iq2_xxs +blk.47.attn_v.weight q4_K +blk.51.attn_output.weight iq2_xxs +blk.51.attn_v.weight q4_K +blk.55.attn_output.weight iq2_xxs +blk.55.attn_v.weight q4_K +blk.59.attn_output.weight iq2_xxs +blk.59.attn_v.weight q4_K +blk.63.attn_output.weight iq2_xxs +blk.63.attn_v.weight q4_K + +[BF16] bf16 +output.weight q6_K + +[TQ1_0] tq1_0 +output.weight q6_K +token_embd.weight q4_K + +[TQ2_0] tq2_0 +output.weight q6_K +token_embd.weight q4_K + +[MXFP4_MOE] mxfp4 +output.weight q8_0 +token_embd.weight q8_0 +blk.0.attn_gate.weight q8_0 +blk.0.attn_qkv.weight q8_0 +blk.0.ffn_down.weight q8_0 +blk.0.ffn_gate.weight q8_0 +blk.0.ffn_up.weight q8_0 +blk.0.ssm_alpha.weight q8_0 +blk.0.ssm_beta.weight q8_0 +blk.0.ssm_out.weight q8_0 +blk.1.attn_gate.weight q8_0 +blk.1.attn_qkv.weight q8_0 +blk.1.ffn_down.weight q8_0 +blk.1.ffn_gate.weight q8_0 +blk.1.ffn_up.weight q8_0 +blk.1.ssm_alpha.weight q8_0 +blk.1.ssm_beta.weight q8_0 +blk.1.ssm_out.weight q8_0 +blk.2.attn_gate.weight q8_0 +blk.2.attn_qkv.weight q8_0 +blk.2.ffn_down.weight q8_0 +blk.2.ffn_gate.weight q8_0 +blk.2.ffn_up.weight q8_0 +blk.2.ssm_alpha.weight q8_0 +blk.2.ssm_beta.weight q8_0 +blk.2.ssm_out.weight q8_0 +blk.3.attn_k.weight q8_0 +blk.3.attn_output.weight q8_0 +blk.3.attn_q.weight q8_0 +blk.3.attn_v.weight q8_0 +blk.3.ffn_down.weight q8_0 +blk.3.ffn_gate.weight q8_0 +blk.3.ffn_up.weight q8_0 +blk.4.attn_gate.weight q8_0 +blk.4.attn_qkv.weight q8_0 +blk.4.ffn_down.weight q8_0 +blk.4.ffn_gate.weight q8_0 +blk.4.ffn_up.weight q8_0 +blk.4.ssm_alpha.weight q8_0 +blk.4.ssm_beta.weight q8_0 +blk.4.ssm_out.weight q8_0 +blk.5.attn_gate.weight q8_0 +blk.5.attn_qkv.weight q8_0 +blk.5.ffn_down.weight q8_0 +blk.5.ffn_gate.weight q8_0 +blk.5.ffn_up.weight q8_0 +blk.5.ssm_alpha.weight q8_0 +blk.5.ssm_beta.weight q8_0 +blk.5.ssm_out.weight q8_0 +blk.6.attn_gate.weight q8_0 +blk.6.attn_qkv.weight q8_0 +blk.6.ffn_down.weight q8_0 +blk.6.ffn_gate.weight q8_0 +blk.6.ffn_up.weight q8_0 +blk.6.ssm_alpha.weight q8_0 +blk.6.ssm_beta.weight q8_0 +blk.6.ssm_out.weight q8_0 +blk.7.attn_k.weight q8_0 +blk.7.attn_output.weight q8_0 +blk.7.attn_q.weight q8_0 +blk.7.attn_v.weight q8_0 +blk.7.ffn_down.weight q8_0 +blk.7.ffn_gate.weight q8_0 +blk.7.ffn_up.weight q8_0 +blk.8.attn_gate.weight q8_0 +blk.8.attn_qkv.weight q8_0 +blk.8.ffn_down.weight q8_0 +blk.8.ffn_gate.weight q8_0 +blk.8.ffn_up.weight q8_0 +blk.8.ssm_alpha.weight q8_0 +blk.8.ssm_beta.weight q8_0 +blk.8.ssm_out.weight q8_0 +blk.9.attn_gate.weight q8_0 +blk.9.attn_qkv.weight q8_0 +blk.9.ffn_down.weight q8_0 +blk.9.ffn_gate.weight q8_0 +blk.9.ffn_up.weight q8_0 +blk.9.ssm_alpha.weight q8_0 +blk.9.ssm_beta.weight q8_0 +blk.9.ssm_out.weight q8_0 +blk.10.attn_gate.weight q8_0 +blk.10.attn_qkv.weight q8_0 +blk.10.ffn_down.weight q8_0 +blk.10.ffn_gate.weight q8_0 +blk.10.ffn_up.weight q8_0 +blk.10.ssm_alpha.weight q8_0 +blk.10.ssm_beta.weight q8_0 +blk.10.ssm_out.weight q8_0 +blk.11.attn_k.weight q8_0 +blk.11.attn_output.weight q8_0 +blk.11.attn_q.weight q8_0 +blk.11.attn_v.weight q8_0 +blk.11.ffn_down.weight q8_0 +blk.11.ffn_gate.weight q8_0 +blk.11.ffn_up.weight q8_0 +blk.12.attn_gate.weight q8_0 +blk.12.attn_qkv.weight q8_0 +blk.12.ffn_down.weight q8_0 +blk.12.ffn_gate.weight q8_0 +blk.12.ffn_up.weight q8_0 +blk.12.ssm_alpha.weight q8_0 +blk.12.ssm_beta.weight q8_0 +blk.12.ssm_out.weight q8_0 +blk.13.attn_gate.weight q8_0 +blk.13.attn_qkv.weight q8_0 +blk.13.ffn_down.weight q8_0 +blk.13.ffn_gate.weight q8_0 +blk.13.ffn_up.weight q8_0 +blk.13.ssm_alpha.weight q8_0 +blk.13.ssm_beta.weight q8_0 +blk.13.ssm_out.weight q8_0 +blk.14.attn_gate.weight q8_0 +blk.14.attn_qkv.weight q8_0 +blk.14.ffn_down.weight q8_0 +blk.14.ffn_gate.weight q8_0 +blk.14.ffn_up.weight q8_0 +blk.14.ssm_alpha.weight q8_0 +blk.14.ssm_beta.weight q8_0 +blk.14.ssm_out.weight q8_0 +blk.15.attn_k.weight q8_0 +blk.15.attn_output.weight q8_0 +blk.15.attn_q.weight q8_0 +blk.15.attn_v.weight q8_0 +blk.15.ffn_down.weight q8_0 +blk.15.ffn_gate.weight q8_0 +blk.15.ffn_up.weight q8_0 +blk.16.attn_gate.weight q8_0 +blk.16.attn_qkv.weight q8_0 +blk.16.ffn_down.weight q8_0 +blk.16.ffn_gate.weight q8_0 +blk.16.ffn_up.weight q8_0 +blk.16.ssm_alpha.weight q8_0 +blk.16.ssm_beta.weight q8_0 +blk.16.ssm_out.weight q8_0 +blk.17.attn_gate.weight q8_0 +blk.17.attn_qkv.weight q8_0 +blk.17.ffn_down.weight q8_0 +blk.17.ffn_gate.weight q8_0 +blk.17.ffn_up.weight q8_0 +blk.17.ssm_alpha.weight q8_0 +blk.17.ssm_beta.weight q8_0 +blk.17.ssm_out.weight q8_0 +blk.18.attn_gate.weight q8_0 +blk.18.attn_qkv.weight q8_0 +blk.18.ffn_down.weight q8_0 +blk.18.ffn_gate.weight q8_0 +blk.18.ffn_up.weight q8_0 +blk.18.ssm_alpha.weight q8_0 +blk.18.ssm_beta.weight q8_0 +blk.18.ssm_out.weight q8_0 +blk.19.attn_k.weight q8_0 +blk.19.attn_output.weight q8_0 +blk.19.attn_q.weight q8_0 +blk.19.attn_v.weight q8_0 +blk.19.ffn_down.weight q8_0 +blk.19.ffn_gate.weight q8_0 +blk.19.ffn_up.weight q8_0 +blk.20.attn_gate.weight q8_0 +blk.20.attn_qkv.weight q8_0 +blk.20.ffn_down.weight q8_0 +blk.20.ffn_gate.weight q8_0 +blk.20.ffn_up.weight q8_0 +blk.20.ssm_alpha.weight q8_0 +blk.20.ssm_beta.weight q8_0 +blk.20.ssm_out.weight q8_0 +blk.21.attn_gate.weight q8_0 +blk.21.attn_qkv.weight q8_0 +blk.21.ffn_down.weight q8_0 +blk.21.ffn_gate.weight q8_0 +blk.21.ffn_up.weight q8_0 +blk.21.ssm_alpha.weight q8_0 +blk.21.ssm_beta.weight q8_0 +blk.21.ssm_out.weight q8_0 +blk.22.attn_gate.weight q8_0 +blk.22.attn_qkv.weight q8_0 +blk.22.ffn_down.weight q8_0 +blk.22.ffn_gate.weight q8_0 +blk.22.ffn_up.weight q8_0 +blk.22.ssm_alpha.weight q8_0 +blk.22.ssm_beta.weight q8_0 +blk.22.ssm_out.weight q8_0 +blk.23.attn_k.weight q8_0 +blk.23.attn_output.weight q8_0 +blk.23.attn_q.weight q8_0 +blk.23.attn_v.weight q8_0 +blk.23.ffn_down.weight q8_0 +blk.23.ffn_gate.weight q8_0 +blk.23.ffn_up.weight q8_0 +blk.24.attn_gate.weight q8_0 +blk.24.attn_qkv.weight q8_0 +blk.24.ffn_down.weight q8_0 +blk.24.ffn_gate.weight q8_0 +blk.24.ffn_up.weight q8_0 +blk.24.ssm_alpha.weight q8_0 +blk.24.ssm_beta.weight q8_0 +blk.24.ssm_out.weight q8_0 +blk.25.attn_gate.weight q8_0 +blk.25.attn_qkv.weight q8_0 +blk.25.ffn_down.weight q8_0 +blk.25.ffn_gate.weight q8_0 +blk.25.ffn_up.weight q8_0 +blk.25.ssm_alpha.weight q8_0 +blk.25.ssm_beta.weight q8_0 +blk.25.ssm_out.weight q8_0 +blk.26.attn_gate.weight q8_0 +blk.26.attn_qkv.weight q8_0 +blk.26.ffn_down.weight q8_0 +blk.26.ffn_gate.weight q8_0 +blk.26.ffn_up.weight q8_0 +blk.26.ssm_alpha.weight q8_0 +blk.26.ssm_beta.weight q8_0 +blk.26.ssm_out.weight q8_0 +blk.27.attn_k.weight q8_0 +blk.27.attn_output.weight q8_0 +blk.27.attn_q.weight q8_0 +blk.27.attn_v.weight q8_0 +blk.27.ffn_down.weight q8_0 +blk.27.ffn_gate.weight q8_0 +blk.27.ffn_up.weight q8_0 +blk.28.attn_gate.weight q8_0 +blk.28.attn_qkv.weight q8_0 +blk.28.ffn_down.weight q8_0 +blk.28.ffn_gate.weight q8_0 +blk.28.ffn_up.weight q8_0 +blk.28.ssm_alpha.weight q8_0 +blk.28.ssm_beta.weight q8_0 +blk.28.ssm_out.weight q8_0 +blk.29.attn_gate.weight q8_0 +blk.29.attn_qkv.weight q8_0 +blk.29.ffn_down.weight q8_0 +blk.29.ffn_gate.weight q8_0 +blk.29.ffn_up.weight q8_0 +blk.29.ssm_alpha.weight q8_0 +blk.29.ssm_beta.weight q8_0 +blk.29.ssm_out.weight q8_0 +blk.30.attn_gate.weight q8_0 +blk.30.attn_qkv.weight q8_0 +blk.30.ffn_down.weight q8_0 +blk.30.ffn_gate.weight q8_0 +blk.30.ffn_up.weight q8_0 +blk.30.ssm_alpha.weight q8_0 +blk.30.ssm_beta.weight q8_0 +blk.30.ssm_out.weight q8_0 +blk.31.attn_k.weight q8_0 +blk.31.attn_output.weight q8_0 +blk.31.attn_q.weight q8_0 +blk.31.attn_v.weight q8_0 +blk.31.ffn_down.weight q8_0 +blk.31.ffn_gate.weight q8_0 +blk.31.ffn_up.weight q8_0 +blk.32.attn_gate.weight q8_0 +blk.32.attn_qkv.weight q8_0 +blk.32.ffn_down.weight q8_0 +blk.32.ffn_gate.weight q8_0 +blk.32.ffn_up.weight q8_0 +blk.32.ssm_alpha.weight q8_0 +blk.32.ssm_beta.weight q8_0 +blk.32.ssm_out.weight q8_0 +blk.33.attn_gate.weight q8_0 +blk.33.attn_qkv.weight q8_0 +blk.33.ffn_down.weight q8_0 +blk.33.ffn_gate.weight q8_0 +blk.33.ffn_up.weight q8_0 +blk.33.ssm_alpha.weight q8_0 +blk.33.ssm_beta.weight q8_0 +blk.33.ssm_out.weight q8_0 +blk.34.attn_gate.weight q8_0 +blk.34.attn_qkv.weight q8_0 +blk.34.ffn_down.weight q8_0 +blk.34.ffn_gate.weight q8_0 +blk.34.ffn_up.weight q8_0 +blk.34.ssm_alpha.weight q8_0 +blk.34.ssm_beta.weight q8_0 +blk.34.ssm_out.weight q8_0 +blk.35.attn_k.weight q8_0 +blk.35.attn_output.weight q8_0 +blk.35.attn_q.weight q8_0 +blk.35.attn_v.weight q8_0 +blk.35.ffn_down.weight q8_0 +blk.35.ffn_gate.weight q8_0 +blk.35.ffn_up.weight q8_0 +blk.36.attn_gate.weight q8_0 +blk.36.attn_qkv.weight q8_0 +blk.36.ffn_down.weight q8_0 +blk.36.ffn_gate.weight q8_0 +blk.36.ffn_up.weight q8_0 +blk.36.ssm_alpha.weight q8_0 +blk.36.ssm_beta.weight q8_0 +blk.36.ssm_out.weight q8_0 +blk.37.attn_gate.weight q8_0 +blk.37.attn_qkv.weight q8_0 +blk.37.ffn_down.weight q8_0 +blk.37.ffn_gate.weight q8_0 +blk.37.ffn_up.weight q8_0 +blk.37.ssm_alpha.weight q8_0 +blk.37.ssm_beta.weight q8_0 +blk.37.ssm_out.weight q8_0 +blk.38.attn_gate.weight q8_0 +blk.38.attn_qkv.weight q8_0 +blk.38.ffn_down.weight q8_0 +blk.38.ffn_gate.weight q8_0 +blk.38.ffn_up.weight q8_0 +blk.38.ssm_alpha.weight q8_0 +blk.38.ssm_beta.weight q8_0 +blk.38.ssm_out.weight q8_0 +blk.39.attn_k.weight q8_0 +blk.39.attn_output.weight q8_0 +blk.39.attn_q.weight q8_0 +blk.39.attn_v.weight q8_0 +blk.39.ffn_down.weight q8_0 +blk.39.ffn_gate.weight q8_0 +blk.39.ffn_up.weight q8_0 +blk.40.attn_gate.weight q8_0 +blk.40.attn_qkv.weight q8_0 +blk.40.ffn_down.weight q8_0 +blk.40.ffn_gate.weight q8_0 +blk.40.ffn_up.weight q8_0 +blk.40.ssm_alpha.weight q8_0 +blk.40.ssm_beta.weight q8_0 +blk.40.ssm_out.weight q8_0 +blk.41.attn_gate.weight q8_0 +blk.41.attn_qkv.weight q8_0 +blk.41.ffn_down.weight q8_0 +blk.41.ffn_gate.weight q8_0 +blk.41.ffn_up.weight q8_0 +blk.41.ssm_alpha.weight q8_0 +blk.41.ssm_beta.weight q8_0 +blk.41.ssm_out.weight q8_0 +blk.42.attn_gate.weight q8_0 +blk.42.attn_qkv.weight q8_0 +blk.42.ffn_down.weight q8_0 +blk.42.ffn_gate.weight q8_0 +blk.42.ffn_up.weight q8_0 +blk.42.ssm_alpha.weight q8_0 +blk.42.ssm_beta.weight q8_0 +blk.42.ssm_out.weight q8_0 +blk.43.attn_k.weight q8_0 +blk.43.attn_output.weight q8_0 +blk.43.attn_q.weight q8_0 +blk.43.attn_v.weight q8_0 +blk.43.ffn_down.weight q8_0 +blk.43.ffn_gate.weight q8_0 +blk.43.ffn_up.weight q8_0 +blk.44.attn_gate.weight q8_0 +blk.44.attn_qkv.weight q8_0 +blk.44.ffn_down.weight q8_0 +blk.44.ffn_gate.weight q8_0 +blk.44.ffn_up.weight q8_0 +blk.44.ssm_alpha.weight q8_0 +blk.44.ssm_beta.weight q8_0 +blk.44.ssm_out.weight q8_0 +blk.45.attn_gate.weight q8_0 +blk.45.attn_qkv.weight q8_0 +blk.45.ffn_down.weight q8_0 +blk.45.ffn_gate.weight q8_0 +blk.45.ffn_up.weight q8_0 +blk.45.ssm_alpha.weight q8_0 +blk.45.ssm_beta.weight q8_0 +blk.45.ssm_out.weight q8_0 +blk.46.attn_gate.weight q8_0 +blk.46.attn_qkv.weight q8_0 +blk.46.ffn_down.weight q8_0 +blk.46.ffn_gate.weight q8_0 +blk.46.ffn_up.weight q8_0 +blk.46.ssm_alpha.weight q8_0 +blk.46.ssm_beta.weight q8_0 +blk.46.ssm_out.weight q8_0 +blk.47.attn_k.weight q8_0 +blk.47.attn_output.weight q8_0 +blk.47.attn_q.weight q8_0 +blk.47.attn_v.weight q8_0 +blk.47.ffn_down.weight q8_0 +blk.47.ffn_gate.weight q8_0 +blk.47.ffn_up.weight q8_0 +blk.48.attn_gate.weight q8_0 +blk.48.attn_qkv.weight q8_0 +blk.48.ffn_down.weight q8_0 +blk.48.ffn_gate.weight q8_0 +blk.48.ffn_up.weight q8_0 +blk.48.ssm_alpha.weight q8_0 +blk.48.ssm_beta.weight q8_0 +blk.48.ssm_out.weight q8_0 +blk.49.attn_gate.weight q8_0 +blk.49.attn_qkv.weight q8_0 +blk.49.ffn_down.weight q8_0 +blk.49.ffn_gate.weight q8_0 +blk.49.ffn_up.weight q8_0 +blk.49.ssm_alpha.weight q8_0 +blk.49.ssm_beta.weight q8_0 +blk.49.ssm_out.weight q8_0 +blk.50.attn_gate.weight q8_0 +blk.50.attn_qkv.weight q8_0 +blk.50.ffn_down.weight q8_0 +blk.50.ffn_gate.weight q8_0 +blk.50.ffn_up.weight q8_0 +blk.50.ssm_alpha.weight q8_0 +blk.50.ssm_beta.weight q8_0 +blk.50.ssm_out.weight q8_0 +blk.51.attn_k.weight q8_0 +blk.51.attn_output.weight q8_0 +blk.51.attn_q.weight q8_0 +blk.51.attn_v.weight q8_0 +blk.51.ffn_down.weight q8_0 +blk.51.ffn_gate.weight q8_0 +blk.51.ffn_up.weight q8_0 +blk.52.attn_gate.weight q8_0 +blk.52.attn_qkv.weight q8_0 +blk.52.ffn_down.weight q8_0 +blk.52.ffn_gate.weight q8_0 +blk.52.ffn_up.weight q8_0 +blk.52.ssm_alpha.weight q8_0 +blk.52.ssm_beta.weight q8_0 +blk.52.ssm_out.weight q8_0 +blk.53.attn_gate.weight q8_0 +blk.53.attn_qkv.weight q8_0 +blk.53.ffn_down.weight q8_0 +blk.53.ffn_gate.weight q8_0 +blk.53.ffn_up.weight q8_0 +blk.53.ssm_alpha.weight q8_0 +blk.53.ssm_beta.weight q8_0 +blk.53.ssm_out.weight q8_0 +blk.54.attn_gate.weight q8_0 +blk.54.attn_qkv.weight q8_0 +blk.54.ffn_down.weight q8_0 +blk.54.ffn_gate.weight q8_0 +blk.54.ffn_up.weight q8_0 +blk.54.ssm_alpha.weight q8_0 +blk.54.ssm_beta.weight q8_0 +blk.54.ssm_out.weight q8_0 +blk.55.attn_k.weight q8_0 +blk.55.attn_output.weight q8_0 +blk.55.attn_q.weight q8_0 +blk.55.attn_v.weight q8_0 +blk.55.ffn_down.weight q8_0 +blk.55.ffn_gate.weight q8_0 +blk.55.ffn_up.weight q8_0 +blk.56.attn_gate.weight q8_0 +blk.56.attn_qkv.weight q8_0 +blk.56.ffn_down.weight q8_0 +blk.56.ffn_gate.weight q8_0 +blk.56.ffn_up.weight q8_0 +blk.56.ssm_alpha.weight q8_0 +blk.56.ssm_beta.weight q8_0 +blk.56.ssm_out.weight q8_0 +blk.57.attn_gate.weight q8_0 +blk.57.attn_qkv.weight q8_0 +blk.57.ffn_down.weight q8_0 +blk.57.ffn_gate.weight q8_0 +blk.57.ffn_up.weight q8_0 +blk.57.ssm_alpha.weight q8_0 +blk.57.ssm_beta.weight q8_0 +blk.57.ssm_out.weight q8_0 +blk.58.attn_gate.weight q8_0 +blk.58.attn_qkv.weight q8_0 +blk.58.ffn_down.weight q8_0 +blk.58.ffn_gate.weight q8_0 +blk.58.ffn_up.weight q8_0 +blk.58.ssm_alpha.weight q8_0 +blk.58.ssm_beta.weight q8_0 +blk.58.ssm_out.weight q8_0 +blk.59.attn_k.weight q8_0 +blk.59.attn_output.weight q8_0 +blk.59.attn_q.weight q8_0 +blk.59.attn_v.weight q8_0 +blk.59.ffn_down.weight q8_0 +blk.59.ffn_gate.weight q8_0 +blk.59.ffn_up.weight q8_0 +blk.60.attn_gate.weight q8_0 +blk.60.attn_qkv.weight q8_0 +blk.60.ffn_down.weight q8_0 +blk.60.ffn_gate.weight q8_0 +blk.60.ffn_up.weight q8_0 +blk.60.ssm_alpha.weight q8_0 +blk.60.ssm_beta.weight q8_0 +blk.60.ssm_out.weight q8_0 +blk.61.attn_gate.weight q8_0 +blk.61.attn_qkv.weight q8_0 +blk.61.ffn_down.weight q8_0 +blk.61.ffn_gate.weight q8_0 +blk.61.ffn_up.weight q8_0 +blk.61.ssm_alpha.weight q8_0 +blk.61.ssm_beta.weight q8_0 +blk.61.ssm_out.weight q8_0 +blk.62.attn_gate.weight q8_0 +blk.62.attn_qkv.weight q8_0 +blk.62.ffn_down.weight q8_0 +blk.62.ffn_gate.weight q8_0 +blk.62.ffn_up.weight q8_0 +blk.62.ssm_alpha.weight q8_0 +blk.62.ssm_beta.weight q8_0 +blk.62.ssm_out.weight q8_0 +blk.63.attn_k.weight q8_0 +blk.63.attn_output.weight q8_0 +blk.63.attn_q.weight q8_0 +blk.63.attn_v.weight q8_0 +blk.63.ffn_down.weight q8_0 +blk.63.ffn_gate.weight q8_0 +blk.63.ffn_up.weight q8_0 diff --git a/tests/snapshots/qwen3.5-397b-a17b.schema b/tests/snapshots/qwen3.5-397b-a17b.schema new file mode 100644 index 0000000000..801c9b4f16 --- /dev/null +++ b/tests/snapshots/qwen3.5-397b-a17b.schema @@ -0,0 +1,2148 @@ +# Model: Qwen3.5-397B-A17B +# n_embd=4096, n_ff=0, n_vocab=248320, n_layer=60, n_head=32, n_head_kv=2, n_expert=512 + +[F32] f32 +output.weight q6_K + +[F16] f16 +output.weight q6_K + +[Q4_0] q4_0 +output.weight q6_K + +[Q4_1] q4_1 +output.weight q6_K + +[Q8_0] q8_0 + +[Q5_0] q5_0 +output.weight q6_K + +[Q5_1] q5_1 +output.weight q6_K + +[Q2_K] q2_K +output.weight q6_K +blk.0.ffn_down_exps.weight q3_K +blk.0.ffn_down_shexp.weight q3_K +blk.1.ffn_down_exps.weight q3_K +blk.1.ffn_down_shexp.weight q3_K +blk.2.ffn_down_exps.weight q3_K +blk.2.ffn_down_shexp.weight q3_K +blk.3.attn_output.weight q3_K +blk.3.attn_v.weight q4_K +blk.3.ffn_down_exps.weight q3_K +blk.3.ffn_down_shexp.weight q3_K +blk.4.ffn_down_exps.weight q3_K +blk.4.ffn_down_shexp.weight q3_K +blk.5.ffn_down_exps.weight q3_K +blk.5.ffn_down_shexp.weight q3_K +blk.6.ffn_down_exps.weight q3_K +blk.6.ffn_down_shexp.weight q3_K +blk.7.attn_output.weight q3_K +blk.7.attn_v.weight q4_K +blk.7.ffn_down_exps.weight q3_K +blk.7.ffn_down_shexp.weight q3_K +blk.8.ffn_down_exps.weight q3_K +blk.8.ffn_down_shexp.weight q3_K +blk.9.ffn_down_exps.weight q3_K +blk.9.ffn_down_shexp.weight q3_K +blk.10.ffn_down_exps.weight q3_K +blk.10.ffn_down_shexp.weight q3_K +blk.11.attn_output.weight q3_K +blk.11.attn_v.weight q4_K +blk.11.ffn_down_exps.weight q3_K +blk.11.ffn_down_shexp.weight q3_K +blk.12.ffn_down_exps.weight q3_K +blk.12.ffn_down_shexp.weight q3_K +blk.13.ffn_down_exps.weight q3_K +blk.13.ffn_down_shexp.weight q3_K +blk.14.ffn_down_exps.weight q3_K +blk.14.ffn_down_shexp.weight q3_K +blk.15.attn_output.weight q3_K +blk.15.attn_v.weight q4_K +blk.15.ffn_down_exps.weight q3_K +blk.15.ffn_down_shexp.weight q3_K +blk.16.ffn_down_exps.weight q3_K +blk.16.ffn_down_shexp.weight q3_K +blk.17.ffn_down_exps.weight q3_K +blk.17.ffn_down_shexp.weight q3_K +blk.18.ffn_down_exps.weight q3_K +blk.18.ffn_down_shexp.weight q3_K +blk.19.attn_output.weight q3_K +blk.19.attn_v.weight q4_K +blk.19.ffn_down_exps.weight q3_K +blk.19.ffn_down_shexp.weight q3_K +blk.20.ffn_down_exps.weight q3_K +blk.20.ffn_down_shexp.weight q3_K +blk.21.ffn_down_exps.weight q3_K +blk.21.ffn_down_shexp.weight q3_K +blk.22.ffn_down_exps.weight q3_K +blk.22.ffn_down_shexp.weight q3_K +blk.23.attn_output.weight q3_K +blk.23.attn_v.weight q4_K +blk.23.ffn_down_exps.weight q3_K +blk.23.ffn_down_shexp.weight q3_K +blk.24.ffn_down_exps.weight q3_K +blk.24.ffn_down_shexp.weight q3_K +blk.25.ffn_down_exps.weight q3_K +blk.25.ffn_down_shexp.weight q3_K +blk.26.ffn_down_exps.weight q3_K +blk.26.ffn_down_shexp.weight q3_K +blk.27.attn_output.weight q3_K +blk.27.attn_v.weight q4_K +blk.27.ffn_down_exps.weight q3_K +blk.27.ffn_down_shexp.weight q3_K +blk.28.ffn_down_exps.weight q3_K +blk.28.ffn_down_shexp.weight q3_K +blk.29.ffn_down_exps.weight q3_K +blk.29.ffn_down_shexp.weight q3_K +blk.30.ffn_down_exps.weight q3_K +blk.30.ffn_down_shexp.weight q3_K +blk.31.attn_output.weight q3_K +blk.31.attn_v.weight q4_K +blk.31.ffn_down_exps.weight q3_K +blk.31.ffn_down_shexp.weight q3_K +blk.32.ffn_down_exps.weight q3_K +blk.32.ffn_down_shexp.weight q3_K +blk.33.ffn_down_exps.weight q3_K +blk.33.ffn_down_shexp.weight q3_K +blk.34.ffn_down_exps.weight q3_K +blk.34.ffn_down_shexp.weight q3_K +blk.35.attn_output.weight q3_K +blk.35.attn_v.weight q4_K +blk.35.ffn_down_exps.weight q3_K +blk.35.ffn_down_shexp.weight q3_K +blk.36.ffn_down_exps.weight q3_K +blk.36.ffn_down_shexp.weight q3_K +blk.37.ffn_down_exps.weight q3_K +blk.37.ffn_down_shexp.weight q3_K +blk.38.ffn_down_exps.weight q3_K +blk.38.ffn_down_shexp.weight q3_K +blk.39.attn_output.weight q3_K +blk.39.attn_v.weight q4_K +blk.39.ffn_down_exps.weight q3_K +blk.39.ffn_down_shexp.weight q3_K +blk.40.ffn_down_exps.weight q3_K +blk.40.ffn_down_shexp.weight q3_K +blk.41.ffn_down_exps.weight q3_K +blk.41.ffn_down_shexp.weight q3_K +blk.42.ffn_down_exps.weight q3_K +blk.42.ffn_down_shexp.weight q3_K +blk.43.attn_output.weight q3_K +blk.43.attn_v.weight q4_K +blk.43.ffn_down_exps.weight q3_K +blk.43.ffn_down_shexp.weight q3_K +blk.44.ffn_down_exps.weight q3_K +blk.44.ffn_down_shexp.weight q3_K +blk.45.ffn_down_exps.weight q3_K +blk.45.ffn_down_shexp.weight q3_K +blk.46.ffn_down_exps.weight q3_K +blk.46.ffn_down_shexp.weight q3_K +blk.47.attn_output.weight q3_K +blk.47.attn_v.weight q4_K +blk.47.ffn_down_exps.weight q3_K +blk.47.ffn_down_shexp.weight q3_K +blk.48.ffn_down_exps.weight q3_K +blk.48.ffn_down_shexp.weight q3_K +blk.49.ffn_down_exps.weight q3_K +blk.49.ffn_down_shexp.weight q3_K +blk.50.ffn_down_exps.weight q3_K +blk.50.ffn_down_shexp.weight q3_K +blk.51.attn_output.weight q3_K +blk.51.attn_v.weight q4_K +blk.51.ffn_down_exps.weight q3_K +blk.51.ffn_down_shexp.weight q3_K +blk.52.ffn_down_exps.weight q3_K +blk.52.ffn_down_shexp.weight q3_K +blk.53.ffn_down_exps.weight q3_K +blk.53.ffn_down_shexp.weight q3_K +blk.54.ffn_down_exps.weight q3_K +blk.54.ffn_down_shexp.weight q3_K +blk.55.attn_output.weight q3_K +blk.55.attn_v.weight q4_K +blk.55.ffn_down_exps.weight q3_K +blk.55.ffn_down_shexp.weight q3_K +blk.56.ffn_down_exps.weight q3_K +blk.56.ffn_down_shexp.weight q3_K +blk.57.ffn_down_exps.weight q3_K +blk.57.ffn_down_shexp.weight q3_K +blk.58.ffn_down_exps.weight q3_K +blk.58.ffn_down_shexp.weight q3_K +blk.59.attn_output.weight q3_K +blk.59.attn_v.weight q4_K +blk.59.ffn_down_exps.weight q3_K +blk.59.ffn_down_shexp.weight q3_K + +[Q3_K_S] q3_K +output.weight q6_K + +[Q3_K_M] q3_K +output.weight q6_K +blk.0.attn_qkv.weight q4_K +blk.0.ffn_down_exps.weight q5_K +blk.0.ffn_down_shexp.weight q5_K +blk.1.attn_qkv.weight q4_K +blk.1.ffn_down_exps.weight q5_K +blk.1.ffn_down_shexp.weight q5_K +blk.2.attn_qkv.weight q4_K +blk.2.ffn_down_exps.weight q5_K +blk.2.ffn_down_shexp.weight q5_K +blk.3.attn_output.weight q4_K +blk.3.attn_v.weight q5_K +blk.3.ffn_down_exps.weight q4_K +blk.3.ffn_down_shexp.weight q4_K +blk.4.attn_qkv.weight q4_K +blk.4.ffn_down_exps.weight q4_K +blk.4.ffn_down_shexp.weight q4_K +blk.5.attn_qkv.weight q4_K +blk.5.ffn_down_exps.weight q4_K +blk.5.ffn_down_shexp.weight q4_K +blk.6.attn_qkv.weight q4_K +blk.6.ffn_down_exps.weight q4_K +blk.6.ffn_down_shexp.weight q4_K +blk.7.attn_output.weight q4_K +blk.7.attn_v.weight q5_K +blk.7.ffn_down_exps.weight q4_K +blk.7.ffn_down_shexp.weight q4_K +blk.8.attn_qkv.weight q4_K +blk.8.ffn_down_exps.weight q4_K +blk.8.ffn_down_shexp.weight q4_K +blk.9.attn_qkv.weight q4_K +blk.9.ffn_down_exps.weight q4_K +blk.9.ffn_down_shexp.weight q4_K +blk.10.attn_qkv.weight q4_K +blk.10.ffn_down_exps.weight q4_K +blk.10.ffn_down_shexp.weight q4_K +blk.11.attn_output.weight q4_K +blk.11.attn_v.weight q4_K +blk.11.ffn_down_exps.weight q4_K +blk.11.ffn_down_shexp.weight q4_K +blk.12.attn_qkv.weight q4_K +blk.12.ffn_down_exps.weight q4_K +blk.12.ffn_down_shexp.weight q4_K +blk.13.attn_qkv.weight q4_K +blk.13.ffn_down_exps.weight q4_K +blk.13.ffn_down_shexp.weight q4_K +blk.14.attn_qkv.weight q4_K +blk.14.ffn_down_exps.weight q4_K +blk.14.ffn_down_shexp.weight q4_K +blk.15.attn_output.weight q4_K +blk.15.attn_v.weight q4_K +blk.15.ffn_down_exps.weight q4_K +blk.15.ffn_down_shexp.weight q4_K +blk.16.attn_qkv.weight q4_K +blk.16.ffn_down_exps.weight q4_K +blk.16.ffn_down_shexp.weight q4_K +blk.17.attn_qkv.weight q4_K +blk.17.ffn_down_exps.weight q4_K +blk.17.ffn_down_shexp.weight q4_K +blk.18.attn_qkv.weight q4_K +blk.18.ffn_down_exps.weight q4_K +blk.18.ffn_down_shexp.weight q4_K +blk.19.attn_output.weight q4_K +blk.19.attn_v.weight q4_K +blk.19.ffn_down_exps.weight q4_K +blk.19.ffn_down_shexp.weight q4_K +blk.20.attn_qkv.weight q4_K +blk.20.ffn_down_exps.weight q4_K +blk.20.ffn_down_shexp.weight q4_K +blk.21.attn_qkv.weight q4_K +blk.21.ffn_down_exps.weight q4_K +blk.21.ffn_down_shexp.weight q4_K +blk.22.attn_qkv.weight q4_K +blk.22.ffn_down_exps.weight q4_K +blk.22.ffn_down_shexp.weight q4_K +blk.23.attn_output.weight q4_K +blk.23.attn_v.weight q4_K +blk.23.ffn_down_exps.weight q4_K +blk.23.ffn_down_shexp.weight q4_K +blk.24.attn_qkv.weight q4_K +blk.24.ffn_down_exps.weight q4_K +blk.24.ffn_down_shexp.weight q4_K +blk.25.attn_qkv.weight q4_K +blk.25.ffn_down_exps.weight q4_K +blk.25.ffn_down_shexp.weight q4_K +blk.26.attn_qkv.weight q4_K +blk.26.ffn_down_exps.weight q4_K +blk.26.ffn_down_shexp.weight q4_K +blk.27.attn_output.weight q4_K +blk.27.attn_v.weight q4_K +blk.27.ffn_down_exps.weight q4_K +blk.27.ffn_down_shexp.weight q4_K +blk.28.attn_qkv.weight q4_K +blk.28.ffn_down_exps.weight q4_K +blk.28.ffn_down_shexp.weight q4_K +blk.29.attn_qkv.weight q4_K +blk.29.ffn_down_exps.weight q4_K +blk.29.ffn_down_shexp.weight q4_K +blk.30.attn_qkv.weight q4_K +blk.30.ffn_down_exps.weight q4_K +blk.30.ffn_down_shexp.weight q4_K +blk.31.attn_output.weight q4_K +blk.31.attn_v.weight q4_K +blk.31.ffn_down_exps.weight q4_K +blk.31.ffn_down_shexp.weight q4_K +blk.32.attn_qkv.weight q4_K +blk.32.ffn_down_exps.weight q4_K +blk.32.ffn_down_shexp.weight q4_K +blk.33.attn_qkv.weight q4_K +blk.33.ffn_down_exps.weight q4_K +blk.33.ffn_down_shexp.weight q4_K +blk.34.attn_qkv.weight q4_K +blk.34.ffn_down_exps.weight q4_K +blk.34.ffn_down_shexp.weight q4_K +blk.35.attn_output.weight q4_K +blk.35.attn_v.weight q4_K +blk.35.ffn_down_exps.weight q4_K +blk.35.ffn_down_shexp.weight q4_K +blk.36.attn_qkv.weight q4_K +blk.36.ffn_down_exps.weight q4_K +blk.36.ffn_down_shexp.weight q4_K +blk.37.attn_qkv.weight q4_K +blk.37.ffn_down_exps.weight q4_K +blk.37.ffn_down_shexp.weight q4_K +blk.38.attn_qkv.weight q4_K +blk.38.ffn_down_exps.weight q4_K +blk.38.ffn_down_shexp.weight q4_K +blk.39.attn_output.weight q4_K +blk.39.attn_v.weight q4_K +blk.39.ffn_down_exps.weight q4_K +blk.39.ffn_down_shexp.weight q4_K +blk.40.attn_qkv.weight q4_K +blk.40.ffn_down_exps.weight q4_K +blk.40.ffn_down_shexp.weight q4_K +blk.41.attn_qkv.weight q4_K +blk.41.ffn_down_exps.weight q4_K +blk.41.ffn_down_shexp.weight q4_K +blk.42.attn_qkv.weight q4_K +blk.42.ffn_down_exps.weight q4_K +blk.42.ffn_down_shexp.weight q4_K +blk.43.attn_output.weight q4_K +blk.43.attn_v.weight q4_K +blk.43.ffn_down_exps.weight q4_K +blk.43.ffn_down_shexp.weight q4_K +blk.44.attn_qkv.weight q4_K +blk.44.ffn_down_exps.weight q4_K +blk.44.ffn_down_shexp.weight q4_K +blk.45.attn_qkv.weight q4_K +blk.45.ffn_down_exps.weight q4_K +blk.45.ffn_down_shexp.weight q4_K +blk.46.attn_qkv.weight q4_K +blk.46.ffn_down_exps.weight q4_K +blk.46.ffn_down_shexp.weight q4_K +blk.47.attn_output.weight q4_K +blk.47.attn_v.weight q4_K +blk.47.ffn_down_exps.weight q4_K +blk.47.ffn_down_shexp.weight q4_K +blk.48.attn_qkv.weight q4_K +blk.48.ffn_down_exps.weight q4_K +blk.48.ffn_down_shexp.weight q4_K +blk.49.attn_qkv.weight q4_K +blk.49.ffn_down_exps.weight q4_K +blk.49.ffn_down_shexp.weight q4_K +blk.50.attn_qkv.weight q4_K +blk.50.ffn_down_exps.weight q4_K +blk.50.ffn_down_shexp.weight q4_K +blk.51.attn_output.weight q4_K +blk.51.attn_v.weight q4_K +blk.51.ffn_down_exps.weight q4_K +blk.51.ffn_down_shexp.weight q4_K +blk.52.attn_qkv.weight q4_K +blk.52.ffn_down_exps.weight q4_K +blk.52.ffn_down_shexp.weight q4_K +blk.53.attn_qkv.weight q4_K +blk.53.ffn_down_exps.weight q4_K +blk.53.ffn_down_shexp.weight q4_K +blk.54.attn_qkv.weight q4_K +blk.54.ffn_down_exps.weight q4_K +blk.54.ffn_down_shexp.weight q4_K +blk.55.attn_output.weight q4_K +blk.55.attn_v.weight q4_K +blk.55.ffn_down_exps.weight q4_K +blk.55.ffn_down_shexp.weight q4_K +blk.56.attn_qkv.weight q4_K +blk.56.ffn_down_exps.weight q4_K +blk.56.ffn_down_shexp.weight q4_K +blk.57.attn_qkv.weight q4_K +blk.57.ffn_down_exps.weight q4_K +blk.57.ffn_down_shexp.weight q4_K +blk.58.attn_qkv.weight q4_K +blk.58.ffn_down_exps.weight q4_K +blk.58.ffn_down_shexp.weight q4_K +blk.59.attn_output.weight q4_K +blk.59.attn_v.weight q4_K +blk.59.ffn_down_exps.weight q4_K +blk.59.ffn_down_shexp.weight q4_K + +[Q3_K_L] q3_K +output.weight q6_K +blk.0.attn_qkv.weight q4_K +blk.0.ffn_down_exps.weight q5_K +blk.0.ffn_down_shexp.weight q5_K +blk.1.attn_qkv.weight q4_K +blk.1.ffn_down_exps.weight q5_K +blk.1.ffn_down_shexp.weight q5_K +blk.2.attn_qkv.weight q4_K +blk.2.ffn_down_exps.weight q5_K +blk.2.ffn_down_shexp.weight q5_K +blk.3.attn_output.weight q5_K +blk.3.attn_v.weight q5_K +blk.3.ffn_down_exps.weight q5_K +blk.3.ffn_down_shexp.weight q5_K +blk.4.attn_qkv.weight q4_K +blk.4.ffn_down_exps.weight q5_K +blk.4.ffn_down_shexp.weight q5_K +blk.5.attn_qkv.weight q4_K +blk.5.ffn_down_exps.weight q5_K +blk.5.ffn_down_shexp.weight q5_K +blk.6.attn_qkv.weight q4_K +blk.6.ffn_down_exps.weight q5_K +blk.6.ffn_down_shexp.weight q5_K +blk.7.attn_output.weight q5_K +blk.7.attn_v.weight q5_K +blk.7.ffn_down_exps.weight q5_K +blk.7.ffn_down_shexp.weight q5_K +blk.8.attn_qkv.weight q4_K +blk.8.ffn_down_exps.weight q5_K +blk.8.ffn_down_shexp.weight q5_K +blk.9.attn_qkv.weight q4_K +blk.9.ffn_down_exps.weight q5_K +blk.9.ffn_down_shexp.weight q5_K +blk.10.attn_qkv.weight q4_K +blk.10.ffn_down_exps.weight q5_K +blk.10.ffn_down_shexp.weight q5_K +blk.11.attn_output.weight q5_K +blk.11.attn_v.weight q5_K +blk.11.ffn_down_exps.weight q5_K +blk.11.ffn_down_shexp.weight q5_K +blk.12.attn_qkv.weight q4_K +blk.12.ffn_down_exps.weight q5_K +blk.12.ffn_down_shexp.weight q5_K +blk.13.attn_qkv.weight q4_K +blk.13.ffn_down_exps.weight q5_K +blk.13.ffn_down_shexp.weight q5_K +blk.14.attn_qkv.weight q4_K +blk.14.ffn_down_exps.weight q5_K +blk.14.ffn_down_shexp.weight q5_K +blk.15.attn_output.weight q5_K +blk.15.attn_v.weight q5_K +blk.15.ffn_down_exps.weight q5_K +blk.15.ffn_down_shexp.weight q5_K +blk.16.attn_qkv.weight q4_K +blk.16.ffn_down_exps.weight q5_K +blk.16.ffn_down_shexp.weight q5_K +blk.17.attn_qkv.weight q4_K +blk.17.ffn_down_exps.weight q5_K +blk.17.ffn_down_shexp.weight q5_K +blk.18.attn_qkv.weight q4_K +blk.18.ffn_down_exps.weight q5_K +blk.18.ffn_down_shexp.weight q5_K +blk.19.attn_output.weight q5_K +blk.19.attn_v.weight q5_K +blk.19.ffn_down_exps.weight q5_K +blk.19.ffn_down_shexp.weight q5_K +blk.20.attn_qkv.weight q4_K +blk.20.ffn_down_exps.weight q5_K +blk.20.ffn_down_shexp.weight q5_K +blk.21.attn_qkv.weight q4_K +blk.21.ffn_down_exps.weight q5_K +blk.21.ffn_down_shexp.weight q5_K +blk.22.attn_qkv.weight q4_K +blk.22.ffn_down_exps.weight q5_K +blk.22.ffn_down_shexp.weight q5_K +blk.23.attn_output.weight q5_K +blk.23.attn_v.weight q5_K +blk.23.ffn_down_exps.weight q5_K +blk.23.ffn_down_shexp.weight q5_K +blk.24.attn_qkv.weight q4_K +blk.24.ffn_down_exps.weight q5_K +blk.24.ffn_down_shexp.weight q5_K +blk.25.attn_qkv.weight q4_K +blk.25.ffn_down_exps.weight q5_K +blk.25.ffn_down_shexp.weight q5_K +blk.26.attn_qkv.weight q4_K +blk.26.ffn_down_exps.weight q5_K +blk.26.ffn_down_shexp.weight q5_K +blk.27.attn_output.weight q5_K +blk.27.attn_v.weight q5_K +blk.27.ffn_down_exps.weight q5_K +blk.27.ffn_down_shexp.weight q5_K +blk.28.attn_qkv.weight q4_K +blk.28.ffn_down_exps.weight q5_K +blk.28.ffn_down_shexp.weight q5_K +blk.29.attn_qkv.weight q4_K +blk.29.ffn_down_exps.weight q5_K +blk.29.ffn_down_shexp.weight q5_K +blk.30.attn_qkv.weight q4_K +blk.30.ffn_down_exps.weight q5_K +blk.30.ffn_down_shexp.weight q5_K +blk.31.attn_output.weight q5_K +blk.31.attn_v.weight q5_K +blk.31.ffn_down_exps.weight q5_K +blk.31.ffn_down_shexp.weight q5_K +blk.32.attn_qkv.weight q4_K +blk.32.ffn_down_exps.weight q5_K +blk.32.ffn_down_shexp.weight q5_K +blk.33.attn_qkv.weight q4_K +blk.33.ffn_down_exps.weight q5_K +blk.33.ffn_down_shexp.weight q5_K +blk.34.attn_qkv.weight q4_K +blk.34.ffn_down_exps.weight q5_K +blk.34.ffn_down_shexp.weight q5_K +blk.35.attn_output.weight q5_K +blk.35.attn_v.weight q5_K +blk.35.ffn_down_exps.weight q5_K +blk.35.ffn_down_shexp.weight q5_K +blk.36.attn_qkv.weight q4_K +blk.36.ffn_down_exps.weight q5_K +blk.36.ffn_down_shexp.weight q5_K +blk.37.attn_qkv.weight q4_K +blk.37.ffn_down_exps.weight q5_K +blk.37.ffn_down_shexp.weight q5_K +blk.38.attn_qkv.weight q4_K +blk.38.ffn_down_exps.weight q5_K +blk.38.ffn_down_shexp.weight q5_K +blk.39.attn_output.weight q5_K +blk.39.attn_v.weight q5_K +blk.39.ffn_down_exps.weight q5_K +blk.39.ffn_down_shexp.weight q5_K +blk.40.attn_qkv.weight q4_K +blk.40.ffn_down_exps.weight q5_K +blk.40.ffn_down_shexp.weight q5_K +blk.41.attn_qkv.weight q4_K +blk.41.ffn_down_exps.weight q5_K +blk.41.ffn_down_shexp.weight q5_K +blk.42.attn_qkv.weight q4_K +blk.42.ffn_down_exps.weight q5_K +blk.42.ffn_down_shexp.weight q5_K +blk.43.attn_output.weight q5_K +blk.43.attn_v.weight q5_K +blk.43.ffn_down_exps.weight q5_K +blk.43.ffn_down_shexp.weight q5_K +blk.44.attn_qkv.weight q4_K +blk.44.ffn_down_exps.weight q5_K +blk.44.ffn_down_shexp.weight q5_K +blk.45.attn_qkv.weight q4_K +blk.45.ffn_down_exps.weight q5_K +blk.45.ffn_down_shexp.weight q5_K +blk.46.attn_qkv.weight q4_K +blk.46.ffn_down_exps.weight q5_K +blk.46.ffn_down_shexp.weight q5_K +blk.47.attn_output.weight q5_K +blk.47.attn_v.weight q5_K +blk.47.ffn_down_exps.weight q5_K +blk.47.ffn_down_shexp.weight q5_K +blk.48.attn_qkv.weight q4_K +blk.48.ffn_down_exps.weight q5_K +blk.48.ffn_down_shexp.weight q5_K +blk.49.attn_qkv.weight q4_K +blk.49.ffn_down_exps.weight q5_K +blk.49.ffn_down_shexp.weight q5_K +blk.50.attn_qkv.weight q4_K +blk.50.ffn_down_exps.weight q5_K +blk.50.ffn_down_shexp.weight q5_K +blk.51.attn_output.weight q5_K +blk.51.attn_v.weight q5_K +blk.51.ffn_down_exps.weight q5_K +blk.51.ffn_down_shexp.weight q5_K +blk.52.attn_qkv.weight q4_K +blk.52.ffn_down_exps.weight q5_K +blk.52.ffn_down_shexp.weight q5_K +blk.53.attn_qkv.weight q4_K +blk.53.ffn_down_exps.weight q5_K +blk.53.ffn_down_shexp.weight q5_K +blk.54.attn_qkv.weight q4_K +blk.54.ffn_down_exps.weight q5_K +blk.54.ffn_down_shexp.weight q5_K +blk.55.attn_output.weight q5_K +blk.55.attn_v.weight q5_K +blk.55.ffn_down_exps.weight q5_K +blk.55.ffn_down_shexp.weight q5_K +blk.56.attn_qkv.weight q4_K +blk.56.ffn_down_exps.weight q5_K +blk.56.ffn_down_shexp.weight q5_K +blk.57.attn_qkv.weight q4_K +blk.57.ffn_down_exps.weight q5_K +blk.57.ffn_down_shexp.weight q5_K +blk.58.attn_qkv.weight q4_K +blk.58.ffn_down_exps.weight q5_K +blk.58.ffn_down_shexp.weight q5_K +blk.59.attn_output.weight q5_K +blk.59.attn_v.weight q5_K +blk.59.ffn_down_exps.weight q5_K +blk.59.ffn_down_shexp.weight q5_K + +[Q4_K_S] q4_K +output.weight q6_K +blk.0.ffn_down_exps.weight q5_K +blk.0.ffn_down_shexp.weight q5_K +blk.1.ffn_down_exps.weight q5_K +blk.1.ffn_down_shexp.weight q5_K +blk.2.ffn_down_exps.weight q5_K +blk.2.ffn_down_shexp.weight q5_K +blk.3.attn_v.weight q5_K +blk.3.ffn_down_exps.weight q5_K +blk.3.ffn_down_shexp.weight q5_K +blk.4.ffn_down_exps.weight q5_K +blk.4.ffn_down_shexp.weight q5_K +blk.5.ffn_down_exps.weight q5_K +blk.5.ffn_down_shexp.weight q5_K +blk.6.ffn_down_exps.weight q5_K +blk.6.ffn_down_shexp.weight q5_K +blk.7.attn_v.weight q5_K +blk.11.attn_v.weight q5_K +blk.15.attn_v.weight q5_K + +[Q4_K_M] q4_K +output.weight q6_K +blk.0.attn_qkv.weight q5_K +blk.0.ffn_down_exps.weight q6_K +blk.0.ffn_down_shexp.weight q6_K +blk.1.attn_qkv.weight q5_K +blk.1.ffn_down_exps.weight q6_K +blk.1.ffn_down_shexp.weight q6_K +blk.2.attn_qkv.weight q5_K +blk.2.ffn_down_exps.weight q6_K +blk.2.ffn_down_shexp.weight q6_K +blk.3.attn_v.weight q6_K +blk.3.ffn_down_exps.weight q6_K +blk.3.ffn_down_shexp.weight q6_K +blk.4.attn_qkv.weight q5_K +blk.4.ffn_down_exps.weight q6_K +blk.4.ffn_down_shexp.weight q6_K +blk.5.attn_qkv.weight q5_K +blk.5.ffn_down_exps.weight q6_K +blk.5.ffn_down_shexp.weight q6_K +blk.6.attn_qkv.weight q5_K +blk.6.ffn_down_exps.weight q6_K +blk.6.ffn_down_shexp.weight q6_K +blk.7.attn_v.weight q6_K +blk.8.attn_qkv.weight q5_K +blk.9.attn_qkv.weight q5_K +blk.9.ffn_down_exps.weight q6_K +blk.9.ffn_down_shexp.weight q6_K +blk.10.attn_qkv.weight q5_K +blk.11.attn_v.weight q6_K +blk.12.attn_qkv.weight q5_K +blk.12.ffn_down_exps.weight q6_K +blk.12.ffn_down_shexp.weight q6_K +blk.13.attn_qkv.weight q5_K +blk.14.attn_qkv.weight q5_K +blk.15.attn_v.weight q6_K +blk.15.ffn_down_exps.weight q6_K +blk.15.ffn_down_shexp.weight q6_K +blk.16.attn_qkv.weight q5_K +blk.17.attn_qkv.weight q5_K +blk.18.attn_qkv.weight q5_K +blk.18.ffn_down_exps.weight q6_K +blk.18.ffn_down_shexp.weight q6_K +blk.19.attn_v.weight q6_K +blk.20.attn_qkv.weight q5_K +blk.21.attn_qkv.weight q5_K +blk.21.ffn_down_exps.weight q6_K +blk.21.ffn_down_shexp.weight q6_K +blk.22.attn_qkv.weight q5_K +blk.23.attn_v.weight q6_K +blk.24.attn_qkv.weight q5_K +blk.24.ffn_down_exps.weight q6_K +blk.24.ffn_down_shexp.weight q6_K +blk.25.attn_qkv.weight q5_K +blk.26.attn_qkv.weight q5_K +blk.27.attn_v.weight q6_K +blk.27.ffn_down_exps.weight q6_K +blk.27.ffn_down_shexp.weight q6_K +blk.28.attn_qkv.weight q5_K +blk.29.attn_qkv.weight q5_K +blk.30.attn_qkv.weight q5_K +blk.30.ffn_down_exps.weight q6_K +blk.30.ffn_down_shexp.weight q6_K +blk.32.attn_qkv.weight q5_K +blk.33.attn_qkv.weight q5_K +blk.33.ffn_down_exps.weight q6_K +blk.33.ffn_down_shexp.weight q6_K +blk.34.attn_qkv.weight q5_K +blk.36.attn_qkv.weight q5_K +blk.36.ffn_down_exps.weight q6_K +blk.36.ffn_down_shexp.weight q6_K +blk.37.attn_qkv.weight q5_K +blk.38.attn_qkv.weight q5_K +blk.39.attn_v.weight q6_K +blk.39.ffn_down_exps.weight q6_K +blk.39.ffn_down_shexp.weight q6_K +blk.40.attn_qkv.weight q5_K +blk.41.attn_qkv.weight q5_K +blk.42.attn_qkv.weight q5_K +blk.42.ffn_down_exps.weight q6_K +blk.42.ffn_down_shexp.weight q6_K +blk.44.attn_qkv.weight q5_K +blk.45.attn_qkv.weight q5_K +blk.45.ffn_down_exps.weight q6_K +blk.45.ffn_down_shexp.weight q6_K +blk.46.attn_qkv.weight q5_K +blk.48.attn_qkv.weight q5_K +blk.48.ffn_down_exps.weight q6_K +blk.48.ffn_down_shexp.weight q6_K +blk.49.attn_qkv.weight q5_K +blk.50.attn_qkv.weight q5_K +blk.51.attn_v.weight q6_K +blk.51.ffn_down_exps.weight q6_K +blk.51.ffn_down_shexp.weight q6_K +blk.52.attn_qkv.weight q5_K +blk.52.ffn_down_exps.weight q6_K +blk.52.ffn_down_shexp.weight q6_K +blk.53.attn_qkv.weight q5_K +blk.53.ffn_down_exps.weight q6_K +blk.53.ffn_down_shexp.weight q6_K +blk.54.attn_qkv.weight q5_K +blk.54.ffn_down_exps.weight q6_K +blk.54.ffn_down_shexp.weight q6_K +blk.55.ffn_down_exps.weight q6_K +blk.55.ffn_down_shexp.weight q6_K +blk.56.attn_qkv.weight q5_K +blk.56.ffn_down_exps.weight q6_K +blk.56.ffn_down_shexp.weight q6_K +blk.57.attn_qkv.weight q5_K +blk.57.ffn_down_exps.weight q6_K +blk.57.ffn_down_shexp.weight q6_K +blk.58.attn_qkv.weight q5_K +blk.58.ffn_down_exps.weight q6_K +blk.58.ffn_down_shexp.weight q6_K +blk.59.ffn_down_exps.weight q6_K +blk.59.ffn_down_shexp.weight q6_K + +[Q5_K_S] q5_K +output.weight q6_K + +[Q5_K_M] q5_K +output.weight q6_K +blk.0.attn_qkv.weight q6_K +blk.0.ffn_down_exps.weight q6_K +blk.0.ffn_down_shexp.weight q6_K +blk.1.attn_qkv.weight q6_K +blk.1.ffn_down_exps.weight q6_K +blk.1.ffn_down_shexp.weight q6_K +blk.2.attn_qkv.weight q6_K +blk.2.ffn_down_exps.weight q6_K +blk.2.ffn_down_shexp.weight q6_K +blk.3.attn_v.weight q6_K +blk.3.ffn_down_exps.weight q6_K +blk.3.ffn_down_shexp.weight q6_K +blk.4.attn_qkv.weight q6_K +blk.4.ffn_down_exps.weight q6_K +blk.4.ffn_down_shexp.weight q6_K +blk.5.attn_qkv.weight q6_K +blk.5.ffn_down_exps.weight q6_K +blk.5.ffn_down_shexp.weight q6_K +blk.6.attn_qkv.weight q6_K +blk.6.ffn_down_exps.weight q6_K +blk.6.ffn_down_shexp.weight q6_K +blk.7.attn_v.weight q6_K +blk.8.attn_qkv.weight q6_K +blk.9.attn_qkv.weight q6_K +blk.9.ffn_down_exps.weight q6_K +blk.9.ffn_down_shexp.weight q6_K +blk.10.attn_qkv.weight q6_K +blk.11.attn_v.weight q6_K +blk.12.attn_qkv.weight q6_K +blk.12.ffn_down_exps.weight q6_K +blk.12.ffn_down_shexp.weight q6_K +blk.13.attn_qkv.weight q6_K +blk.14.attn_qkv.weight q6_K +blk.15.attn_v.weight q6_K +blk.15.ffn_down_exps.weight q6_K +blk.15.ffn_down_shexp.weight q6_K +blk.16.attn_qkv.weight q6_K +blk.17.attn_qkv.weight q6_K +blk.18.attn_qkv.weight q6_K +blk.18.ffn_down_exps.weight q6_K +blk.18.ffn_down_shexp.weight q6_K +blk.19.attn_v.weight q6_K +blk.20.attn_qkv.weight q6_K +blk.21.attn_qkv.weight q6_K +blk.21.ffn_down_exps.weight q6_K +blk.21.ffn_down_shexp.weight q6_K +blk.22.attn_qkv.weight q6_K +blk.23.attn_v.weight q6_K +blk.24.attn_qkv.weight q6_K +blk.24.ffn_down_exps.weight q6_K +blk.24.ffn_down_shexp.weight q6_K +blk.25.attn_qkv.weight q6_K +blk.26.attn_qkv.weight q6_K +blk.27.attn_v.weight q6_K +blk.27.ffn_down_exps.weight q6_K +blk.27.ffn_down_shexp.weight q6_K +blk.28.attn_qkv.weight q6_K +blk.29.attn_qkv.weight q6_K +blk.30.attn_qkv.weight q6_K +blk.30.ffn_down_exps.weight q6_K +blk.30.ffn_down_shexp.weight q6_K +blk.32.attn_qkv.weight q6_K +blk.33.attn_qkv.weight q6_K +blk.33.ffn_down_exps.weight q6_K +blk.33.ffn_down_shexp.weight q6_K +blk.34.attn_qkv.weight q6_K +blk.36.attn_qkv.weight q6_K +blk.36.ffn_down_exps.weight q6_K +blk.36.ffn_down_shexp.weight q6_K +blk.37.attn_qkv.weight q6_K +blk.38.attn_qkv.weight q6_K +blk.39.attn_v.weight q6_K +blk.39.ffn_down_exps.weight q6_K +blk.39.ffn_down_shexp.weight q6_K +blk.40.attn_qkv.weight q6_K +blk.41.attn_qkv.weight q6_K +blk.42.attn_qkv.weight q6_K +blk.42.ffn_down_exps.weight q6_K +blk.42.ffn_down_shexp.weight q6_K +blk.44.attn_qkv.weight q6_K +blk.45.attn_qkv.weight q6_K +blk.45.ffn_down_exps.weight q6_K +blk.45.ffn_down_shexp.weight q6_K +blk.46.attn_qkv.weight q6_K +blk.48.attn_qkv.weight q6_K +blk.48.ffn_down_exps.weight q6_K +blk.48.ffn_down_shexp.weight q6_K +blk.49.attn_qkv.weight q6_K +blk.50.attn_qkv.weight q6_K +blk.51.attn_v.weight q6_K +blk.51.ffn_down_exps.weight q6_K +blk.51.ffn_down_shexp.weight q6_K +blk.52.attn_qkv.weight q6_K +blk.52.ffn_down_exps.weight q6_K +blk.52.ffn_down_shexp.weight q6_K +blk.53.attn_qkv.weight q6_K +blk.53.ffn_down_exps.weight q6_K +blk.53.ffn_down_shexp.weight q6_K +blk.54.attn_qkv.weight q6_K +blk.54.ffn_down_exps.weight q6_K +blk.54.ffn_down_shexp.weight q6_K +blk.55.ffn_down_exps.weight q6_K +blk.55.ffn_down_shexp.weight q6_K +blk.56.attn_qkv.weight q6_K +blk.56.ffn_down_exps.weight q6_K +blk.56.ffn_down_shexp.weight q6_K +blk.57.attn_qkv.weight q6_K +blk.57.ffn_down_exps.weight q6_K +blk.57.ffn_down_shexp.weight q6_K +blk.58.attn_qkv.weight q6_K +blk.58.ffn_down_exps.weight q6_K +blk.58.ffn_down_shexp.weight q6_K +blk.59.ffn_down_exps.weight q6_K +blk.59.ffn_down_shexp.weight q6_K + +[Q6_K] q6_K + +[IQ2_XXS] iq2_xxs +output.weight q5_K +token_embd.weight q2_K +blk.0.ffn_down_exps.weight q2_K +blk.0.ffn_down_shexp.weight q2_K +blk.1.ffn_down_exps.weight q2_K +blk.1.ffn_down_shexp.weight q2_K +blk.2.ffn_down_exps.weight q2_K +blk.2.ffn_down_shexp.weight q2_K +blk.3.attn_v.weight q4_K +blk.3.ffn_down_exps.weight q2_K +blk.7.attn_v.weight q4_K +blk.11.attn_v.weight q4_K +blk.15.attn_v.weight q4_K +blk.19.attn_v.weight q4_K +blk.23.attn_v.weight q4_K +blk.27.attn_v.weight q4_K +blk.31.attn_v.weight q4_K +blk.35.attn_v.weight q4_K +blk.39.attn_v.weight q4_K +blk.43.attn_v.weight q4_K +blk.47.attn_v.weight q4_K +blk.51.attn_v.weight q4_K +blk.55.attn_v.weight q4_K +blk.59.attn_v.weight q4_K + +[IQ2_XS] iq2_xs +output.weight q5_K +token_embd.weight q2_K +blk.0.ffn_down_exps.weight q2_K +blk.0.ffn_down_shexp.weight q2_K +blk.1.ffn_down_exps.weight q2_K +blk.1.ffn_down_shexp.weight q2_K +blk.2.ffn_down_exps.weight q2_K +blk.2.ffn_down_shexp.weight q2_K +blk.3.attn_v.weight q4_K +blk.3.ffn_down_exps.weight q2_K +blk.7.attn_v.weight q4_K +blk.11.attn_v.weight q4_K +blk.15.attn_v.weight q4_K +blk.19.attn_v.weight q4_K +blk.23.attn_v.weight q4_K +blk.27.attn_v.weight q4_K +blk.31.attn_v.weight q4_K +blk.35.attn_v.weight q4_K +blk.39.attn_v.weight q4_K +blk.43.attn_v.weight q4_K +blk.47.attn_v.weight q4_K +blk.51.attn_v.weight q4_K +blk.55.attn_v.weight q4_K +blk.59.attn_v.weight q4_K + +[Q2_K_S] q2_K +output.weight q6_K +blk.0.ffn_down_exps.weight q4_K +blk.0.ffn_down_shexp.weight q4_K +blk.1.ffn_down_exps.weight q4_K +blk.1.ffn_down_shexp.weight q4_K +blk.2.ffn_down_exps.weight q4_K +blk.2.ffn_down_shexp.weight q4_K +blk.3.attn_v.weight q4_K +blk.3.ffn_down_exps.weight q4_K +blk.3.ffn_down_shexp.weight q4_K +blk.4.ffn_down_exps.weight q4_K +blk.4.ffn_down_shexp.weight q4_K +blk.5.ffn_down_exps.weight q4_K +blk.5.ffn_down_shexp.weight q4_K +blk.6.ffn_down_exps.weight q4_K +blk.6.ffn_down_shexp.weight q4_K +blk.7.attn_v.weight q4_K +blk.11.attn_v.weight q4_K +blk.15.attn_v.weight q4_K +blk.19.attn_v.weight q4_K +blk.23.attn_v.weight q4_K +blk.27.attn_v.weight q4_K +blk.31.attn_v.weight q4_K +blk.35.attn_v.weight q4_K +blk.39.attn_v.weight q4_K +blk.43.attn_v.weight q4_K +blk.47.attn_v.weight q4_K +blk.51.attn_v.weight q4_K +blk.55.attn_v.weight q4_K +blk.59.attn_v.weight q4_K + +[IQ3_XS] iq3_s +output.weight q6_K +blk.3.attn_k.weight iq3_xxs +blk.3.attn_q.weight iq3_xxs +blk.3.attn_v.weight q4_K +blk.7.attn_k.weight iq3_xxs +blk.7.attn_q.weight iq3_xxs +blk.7.attn_v.weight q4_K +blk.7.ffn_gate_exps.weight iq3_xxs +blk.7.ffn_gate_shexp.weight iq3_xxs +blk.7.ffn_up_exps.weight iq3_xxs +blk.7.ffn_up_shexp.weight iq3_xxs +blk.8.ffn_gate_exps.weight iq3_xxs +blk.8.ffn_gate_shexp.weight iq3_xxs +blk.8.ffn_up_exps.weight iq3_xxs +blk.8.ffn_up_shexp.weight iq3_xxs +blk.9.ffn_gate_exps.weight iq3_xxs +blk.9.ffn_gate_shexp.weight iq3_xxs +blk.9.ffn_up_exps.weight iq3_xxs +blk.9.ffn_up_shexp.weight iq3_xxs +blk.10.ffn_gate_exps.weight iq3_xxs +blk.10.ffn_gate_shexp.weight iq3_xxs +blk.10.ffn_up_exps.weight iq3_xxs +blk.10.ffn_up_shexp.weight iq3_xxs +blk.11.attn_k.weight iq3_xxs +blk.11.attn_q.weight iq3_xxs +blk.11.attn_v.weight q4_K +blk.11.ffn_gate_exps.weight iq3_xxs +blk.11.ffn_gate_shexp.weight iq3_xxs +blk.11.ffn_up_exps.weight iq3_xxs +blk.11.ffn_up_shexp.weight iq3_xxs +blk.12.ffn_gate_exps.weight iq3_xxs +blk.12.ffn_gate_shexp.weight iq3_xxs +blk.12.ffn_up_exps.weight iq3_xxs +blk.12.ffn_up_shexp.weight iq3_xxs +blk.13.ffn_gate_exps.weight iq3_xxs +blk.13.ffn_gate_shexp.weight iq3_xxs +blk.13.ffn_up_exps.weight iq3_xxs +blk.13.ffn_up_shexp.weight iq3_xxs +blk.14.ffn_gate_exps.weight iq3_xxs +blk.14.ffn_gate_shexp.weight iq3_xxs +blk.14.ffn_up_exps.weight iq3_xxs +blk.14.ffn_up_shexp.weight iq3_xxs +blk.15.attn_k.weight iq3_xxs +blk.15.attn_q.weight iq3_xxs +blk.15.attn_v.weight q4_K +blk.15.ffn_gate_exps.weight iq3_xxs +blk.15.ffn_gate_shexp.weight iq3_xxs +blk.15.ffn_up_exps.weight iq3_xxs +blk.15.ffn_up_shexp.weight iq3_xxs +blk.16.ffn_gate_exps.weight iq3_xxs +blk.16.ffn_gate_shexp.weight iq3_xxs +blk.16.ffn_up_exps.weight iq3_xxs +blk.16.ffn_up_shexp.weight iq3_xxs +blk.17.ffn_gate_exps.weight iq3_xxs +blk.17.ffn_gate_shexp.weight iq3_xxs +blk.17.ffn_up_exps.weight iq3_xxs +blk.17.ffn_up_shexp.weight iq3_xxs +blk.18.ffn_gate_exps.weight iq3_xxs +blk.18.ffn_gate_shexp.weight iq3_xxs +blk.18.ffn_up_exps.weight iq3_xxs +blk.18.ffn_up_shexp.weight iq3_xxs +blk.19.attn_k.weight iq3_xxs +blk.19.attn_q.weight iq3_xxs +blk.19.attn_v.weight q4_K +blk.19.ffn_gate_exps.weight iq3_xxs +blk.19.ffn_gate_shexp.weight iq3_xxs +blk.19.ffn_up_exps.weight iq3_xxs +blk.19.ffn_up_shexp.weight iq3_xxs +blk.20.ffn_gate_exps.weight iq3_xxs +blk.20.ffn_gate_shexp.weight iq3_xxs +blk.20.ffn_up_exps.weight iq3_xxs +blk.20.ffn_up_shexp.weight iq3_xxs +blk.21.ffn_gate_exps.weight iq3_xxs +blk.21.ffn_gate_shexp.weight iq3_xxs +blk.21.ffn_up_exps.weight iq3_xxs +blk.21.ffn_up_shexp.weight iq3_xxs +blk.22.ffn_gate_exps.weight iq3_xxs +blk.22.ffn_gate_shexp.weight iq3_xxs +blk.22.ffn_up_exps.weight iq3_xxs +blk.22.ffn_up_shexp.weight iq3_xxs +blk.23.attn_k.weight iq3_xxs +blk.23.attn_q.weight iq3_xxs +blk.23.attn_v.weight q4_K +blk.23.ffn_gate_exps.weight iq3_xxs +blk.23.ffn_gate_shexp.weight iq3_xxs +blk.23.ffn_up_exps.weight iq3_xxs +blk.23.ffn_up_shexp.weight iq3_xxs +blk.24.ffn_gate_exps.weight iq3_xxs +blk.24.ffn_gate_shexp.weight iq3_xxs +blk.24.ffn_up_exps.weight iq3_xxs +blk.24.ffn_up_shexp.weight iq3_xxs +blk.25.ffn_gate_exps.weight iq3_xxs +blk.25.ffn_gate_shexp.weight iq3_xxs +blk.25.ffn_up_exps.weight iq3_xxs +blk.25.ffn_up_shexp.weight iq3_xxs +blk.26.ffn_gate_exps.weight iq3_xxs +blk.26.ffn_gate_shexp.weight iq3_xxs +blk.26.ffn_up_exps.weight iq3_xxs +blk.26.ffn_up_shexp.weight iq3_xxs +blk.27.attn_k.weight iq3_xxs +blk.27.attn_q.weight iq3_xxs +blk.27.attn_v.weight q4_K +blk.27.ffn_gate_exps.weight iq3_xxs +blk.27.ffn_gate_shexp.weight iq3_xxs +blk.27.ffn_up_exps.weight iq3_xxs +blk.27.ffn_up_shexp.weight iq3_xxs +blk.28.ffn_gate_exps.weight iq3_xxs +blk.28.ffn_gate_shexp.weight iq3_xxs +blk.28.ffn_up_exps.weight iq3_xxs +blk.28.ffn_up_shexp.weight iq3_xxs +blk.29.ffn_gate_exps.weight iq3_xxs +blk.29.ffn_gate_shexp.weight iq3_xxs +blk.29.ffn_up_exps.weight iq3_xxs +blk.29.ffn_up_shexp.weight iq3_xxs +blk.30.ffn_gate_exps.weight iq3_xxs +blk.30.ffn_gate_shexp.weight iq3_xxs +blk.30.ffn_up_exps.weight iq3_xxs +blk.30.ffn_up_shexp.weight iq3_xxs +blk.31.attn_k.weight iq3_xxs +blk.31.attn_q.weight iq3_xxs +blk.31.attn_v.weight q4_K +blk.31.ffn_gate_exps.weight iq3_xxs +blk.31.ffn_gate_shexp.weight iq3_xxs +blk.31.ffn_up_exps.weight iq3_xxs +blk.31.ffn_up_shexp.weight iq3_xxs +blk.32.ffn_gate_exps.weight iq3_xxs +blk.32.ffn_gate_shexp.weight iq3_xxs +blk.32.ffn_up_exps.weight iq3_xxs +blk.32.ffn_up_shexp.weight iq3_xxs +blk.33.ffn_gate_exps.weight iq3_xxs +blk.33.ffn_gate_shexp.weight iq3_xxs +blk.33.ffn_up_exps.weight iq3_xxs +blk.33.ffn_up_shexp.weight iq3_xxs +blk.34.ffn_gate_exps.weight iq3_xxs +blk.34.ffn_gate_shexp.weight iq3_xxs +blk.34.ffn_up_exps.weight iq3_xxs +blk.34.ffn_up_shexp.weight iq3_xxs +blk.35.attn_k.weight iq3_xxs +blk.35.attn_q.weight iq3_xxs +blk.35.attn_v.weight q4_K +blk.35.ffn_gate_exps.weight iq3_xxs +blk.35.ffn_gate_shexp.weight iq3_xxs +blk.35.ffn_up_exps.weight iq3_xxs +blk.35.ffn_up_shexp.weight iq3_xxs +blk.36.ffn_gate_exps.weight iq3_xxs +blk.36.ffn_gate_shexp.weight iq3_xxs +blk.36.ffn_up_exps.weight iq3_xxs +blk.36.ffn_up_shexp.weight iq3_xxs +blk.37.ffn_gate_exps.weight iq3_xxs +blk.37.ffn_gate_shexp.weight iq3_xxs +blk.37.ffn_up_exps.weight iq3_xxs +blk.37.ffn_up_shexp.weight iq3_xxs +blk.38.ffn_gate_exps.weight iq3_xxs +blk.38.ffn_gate_shexp.weight iq3_xxs +blk.38.ffn_up_exps.weight iq3_xxs +blk.38.ffn_up_shexp.weight iq3_xxs +blk.39.attn_k.weight iq3_xxs +blk.39.attn_q.weight iq3_xxs +blk.39.attn_v.weight q4_K +blk.39.ffn_gate_exps.weight iq3_xxs +blk.39.ffn_gate_shexp.weight iq3_xxs +blk.39.ffn_up_exps.weight iq3_xxs +blk.39.ffn_up_shexp.weight iq3_xxs +blk.40.ffn_gate_exps.weight iq3_xxs +blk.40.ffn_gate_shexp.weight iq3_xxs +blk.40.ffn_up_exps.weight iq3_xxs +blk.40.ffn_up_shexp.weight iq3_xxs +blk.41.ffn_gate_exps.weight iq3_xxs +blk.41.ffn_gate_shexp.weight iq3_xxs +blk.41.ffn_up_exps.weight iq3_xxs +blk.41.ffn_up_shexp.weight iq3_xxs +blk.42.ffn_gate_exps.weight iq3_xxs +blk.42.ffn_gate_shexp.weight iq3_xxs +blk.42.ffn_up_exps.weight iq3_xxs +blk.42.ffn_up_shexp.weight iq3_xxs +blk.43.attn_k.weight iq3_xxs +blk.43.attn_q.weight iq3_xxs +blk.43.attn_v.weight q4_K +blk.43.ffn_gate_exps.weight iq3_xxs +blk.43.ffn_gate_shexp.weight iq3_xxs +blk.43.ffn_up_exps.weight iq3_xxs +blk.43.ffn_up_shexp.weight iq3_xxs +blk.44.ffn_gate_exps.weight iq3_xxs +blk.44.ffn_gate_shexp.weight iq3_xxs +blk.44.ffn_up_exps.weight iq3_xxs +blk.44.ffn_up_shexp.weight iq3_xxs +blk.45.ffn_gate_exps.weight iq3_xxs +blk.45.ffn_gate_shexp.weight iq3_xxs +blk.45.ffn_up_exps.weight iq3_xxs +blk.45.ffn_up_shexp.weight iq3_xxs +blk.46.ffn_gate_exps.weight iq3_xxs +blk.46.ffn_gate_shexp.weight iq3_xxs +blk.46.ffn_up_exps.weight iq3_xxs +blk.46.ffn_up_shexp.weight iq3_xxs +blk.47.attn_k.weight iq3_xxs +blk.47.attn_q.weight iq3_xxs +blk.47.attn_v.weight q4_K +blk.47.ffn_gate_exps.weight iq3_xxs +blk.47.ffn_gate_shexp.weight iq3_xxs +blk.47.ffn_up_exps.weight iq3_xxs +blk.47.ffn_up_shexp.weight iq3_xxs +blk.48.ffn_gate_exps.weight iq3_xxs +blk.48.ffn_gate_shexp.weight iq3_xxs +blk.48.ffn_up_exps.weight iq3_xxs +blk.48.ffn_up_shexp.weight iq3_xxs +blk.49.ffn_gate_exps.weight iq3_xxs +blk.49.ffn_gate_shexp.weight iq3_xxs +blk.49.ffn_up_exps.weight iq3_xxs +blk.49.ffn_up_shexp.weight iq3_xxs +blk.50.ffn_gate_exps.weight iq3_xxs +blk.50.ffn_gate_shexp.weight iq3_xxs +blk.50.ffn_up_exps.weight iq3_xxs +blk.50.ffn_up_shexp.weight iq3_xxs +blk.51.attn_k.weight iq3_xxs +blk.51.attn_q.weight iq3_xxs +blk.51.attn_v.weight q4_K +blk.51.ffn_gate_exps.weight iq3_xxs +blk.51.ffn_gate_shexp.weight iq3_xxs +blk.51.ffn_up_exps.weight iq3_xxs +blk.51.ffn_up_shexp.weight iq3_xxs +blk.55.attn_k.weight iq3_xxs +blk.55.attn_q.weight iq3_xxs +blk.55.attn_v.weight q4_K +blk.59.attn_k.weight iq3_xxs +blk.59.attn_q.weight iq3_xxs +blk.59.attn_v.weight q4_K + +[IQ3_XXS] iq3_xxs +output.weight q5_K +token_embd.weight iq3_s +blk.0.ffn_down_exps.weight q4_K +blk.0.ffn_down_shexp.weight q4_K +blk.1.ffn_down_exps.weight q4_K +blk.1.ffn_down_shexp.weight q4_K +blk.2.ffn_down_exps.weight q4_K +blk.2.ffn_down_shexp.weight q4_K +blk.3.attn_k.weight iq2_s +blk.3.attn_output.weight iq3_s +blk.3.attn_q.weight iq2_s +blk.3.attn_v.weight q4_K +blk.3.ffn_down_exps.weight q4_K +blk.3.ffn_down_shexp.weight q4_K +blk.4.ffn_down_exps.weight q4_K +blk.4.ffn_down_shexp.weight q4_K +blk.5.ffn_down_exps.weight q4_K +blk.5.ffn_down_shexp.weight q4_K +blk.6.ffn_down_exps.weight q4_K +blk.6.ffn_down_shexp.weight q4_K +blk.7.attn_k.weight iq2_s +blk.7.attn_output.weight iq3_s +blk.7.attn_q.weight iq2_s +blk.7.attn_v.weight q4_K +blk.7.ffn_down_exps.weight q3_K +blk.7.ffn_down_shexp.weight q3_K +blk.8.ffn_down_exps.weight q3_K +blk.8.ffn_down_shexp.weight q3_K +blk.9.ffn_down_exps.weight q3_K +blk.9.ffn_down_shexp.weight q3_K +blk.10.ffn_down_exps.weight q3_K +blk.10.ffn_down_shexp.weight q3_K +blk.11.attn_k.weight iq2_s +blk.11.attn_output.weight iq3_s +blk.11.attn_q.weight iq2_s +blk.11.attn_v.weight q4_K +blk.11.ffn_down_exps.weight q3_K +blk.11.ffn_down_shexp.weight q3_K +blk.12.ffn_down_exps.weight q3_K +blk.12.ffn_down_shexp.weight q3_K +blk.13.ffn_down_exps.weight q3_K +blk.13.ffn_down_shexp.weight q3_K +blk.14.ffn_down_exps.weight q3_K +blk.14.ffn_down_shexp.weight q3_K +blk.15.attn_k.weight iq2_s +blk.15.attn_output.weight iq3_s +blk.15.attn_q.weight iq2_s +blk.15.attn_v.weight q4_K +blk.15.ffn_down_exps.weight q3_K +blk.15.ffn_down_shexp.weight q3_K +blk.16.ffn_down_exps.weight q3_K +blk.16.ffn_down_shexp.weight q3_K +blk.17.ffn_down_exps.weight q3_K +blk.17.ffn_down_shexp.weight q3_K +blk.18.ffn_down_exps.weight q3_K +blk.18.ffn_down_shexp.weight q3_K +blk.19.attn_k.weight iq2_s +blk.19.attn_output.weight iq3_s +blk.19.attn_q.weight iq2_s +blk.19.attn_v.weight q4_K +blk.19.ffn_down_exps.weight q3_K +blk.19.ffn_down_shexp.weight q3_K +blk.20.ffn_down_exps.weight q3_K +blk.20.ffn_down_shexp.weight q3_K +blk.21.ffn_down_exps.weight q3_K +blk.21.ffn_down_shexp.weight q3_K +blk.22.ffn_down_exps.weight q3_K +blk.22.ffn_down_shexp.weight q3_K +blk.23.attn_k.weight iq2_s +blk.23.attn_output.weight iq3_s +blk.23.attn_q.weight iq2_s +blk.23.attn_v.weight q4_K +blk.23.ffn_down_exps.weight q3_K +blk.23.ffn_down_shexp.weight q3_K +blk.24.ffn_down_exps.weight q3_K +blk.24.ffn_down_shexp.weight q3_K +blk.25.ffn_down_exps.weight q3_K +blk.25.ffn_down_shexp.weight q3_K +blk.26.ffn_down_exps.weight q3_K +blk.26.ffn_down_shexp.weight q3_K +blk.27.attn_k.weight iq2_s +blk.27.attn_output.weight iq3_s +blk.27.attn_q.weight iq2_s +blk.27.attn_v.weight q4_K +blk.27.ffn_down_exps.weight q3_K +blk.27.ffn_down_shexp.weight q3_K +blk.28.ffn_down_exps.weight q3_K +blk.28.ffn_down_shexp.weight q3_K +blk.29.ffn_down_exps.weight q3_K +blk.29.ffn_down_shexp.weight q3_K +blk.30.ffn_down_exps.weight q3_K +blk.30.ffn_down_shexp.weight q3_K +blk.31.attn_k.weight iq2_s +blk.31.attn_output.weight iq3_s +blk.31.attn_q.weight iq2_s +blk.31.attn_v.weight q4_K +blk.31.ffn_down_exps.weight q3_K +blk.31.ffn_down_shexp.weight q3_K +blk.32.ffn_down_exps.weight q3_K +blk.32.ffn_down_shexp.weight q3_K +blk.33.ffn_down_exps.weight q3_K +blk.33.ffn_down_shexp.weight q3_K +blk.34.ffn_down_exps.weight q3_K +blk.34.ffn_down_shexp.weight q3_K +blk.35.attn_k.weight iq2_s +blk.35.attn_output.weight iq3_s +blk.35.attn_q.weight iq2_s +blk.35.attn_v.weight q4_K +blk.35.ffn_down_exps.weight q3_K +blk.35.ffn_down_shexp.weight q3_K +blk.36.ffn_down_exps.weight q3_K +blk.36.ffn_down_shexp.weight q3_K +blk.37.ffn_down_exps.weight q3_K +blk.37.ffn_down_shexp.weight q3_K +blk.38.ffn_down_exps.weight q3_K +blk.38.ffn_down_shexp.weight q3_K +blk.39.attn_k.weight iq2_s +blk.39.attn_output.weight iq3_s +blk.39.attn_q.weight iq2_s +blk.39.attn_v.weight q4_K +blk.39.ffn_down_exps.weight q3_K +blk.39.ffn_down_shexp.weight q3_K +blk.40.ffn_down_exps.weight q3_K +blk.40.ffn_down_shexp.weight q3_K +blk.41.ffn_down_exps.weight q3_K +blk.41.ffn_down_shexp.weight q3_K +blk.42.ffn_down_exps.weight q3_K +blk.42.ffn_down_shexp.weight q3_K +blk.43.attn_k.weight iq2_s +blk.43.attn_output.weight iq3_s +blk.43.attn_q.weight iq2_s +blk.43.attn_v.weight q4_K +blk.43.ffn_down_exps.weight q3_K +blk.43.ffn_down_shexp.weight q3_K +blk.44.ffn_down_exps.weight q3_K +blk.44.ffn_down_shexp.weight q3_K +blk.45.ffn_down_exps.weight q3_K +blk.45.ffn_down_shexp.weight q3_K +blk.46.ffn_down_exps.weight q3_K +blk.46.ffn_down_shexp.weight q3_K +blk.47.attn_k.weight iq2_s +blk.47.attn_output.weight iq3_s +blk.47.attn_q.weight iq2_s +blk.47.attn_v.weight q4_K +blk.47.ffn_down_exps.weight q3_K +blk.47.ffn_down_shexp.weight q3_K +blk.48.ffn_down_exps.weight q3_K +blk.48.ffn_down_shexp.weight q3_K +blk.49.ffn_down_exps.weight q3_K +blk.49.ffn_down_shexp.weight q3_K +blk.50.ffn_down_exps.weight q3_K +blk.50.ffn_down_shexp.weight q3_K +blk.51.attn_k.weight iq2_s +blk.51.attn_output.weight iq3_s +blk.51.attn_q.weight iq2_s +blk.51.attn_v.weight q4_K +blk.51.ffn_down_exps.weight q3_K +blk.51.ffn_down_shexp.weight q3_K +blk.52.ffn_down_exps.weight q3_K +blk.52.ffn_down_shexp.weight q3_K +blk.53.ffn_down_exps.weight q3_K +blk.53.ffn_down_shexp.weight q3_K +blk.54.ffn_down_exps.weight q3_K +blk.54.ffn_down_shexp.weight q3_K +blk.55.attn_k.weight iq2_s +blk.55.attn_output.weight iq3_s +blk.55.attn_q.weight iq2_s +blk.55.attn_v.weight q4_K +blk.55.ffn_down_exps.weight q3_K +blk.55.ffn_down_shexp.weight q3_K +blk.56.ffn_down_exps.weight q3_K +blk.56.ffn_down_shexp.weight q3_K +blk.57.ffn_down_exps.weight q3_K +blk.57.ffn_down_shexp.weight q3_K +blk.58.ffn_down_exps.weight q3_K +blk.58.ffn_down_shexp.weight q3_K +blk.59.attn_k.weight iq2_s +blk.59.attn_output.weight iq3_s +blk.59.attn_q.weight iq2_s +blk.59.attn_v.weight q4_K +blk.59.ffn_down_exps.weight q3_K +blk.59.ffn_down_shexp.weight q3_K + +[IQ1_S] iq1_s +output.weight q5_K +token_embd.weight q2_K +blk.0.ffn_down_exps.weight q2_K +blk.0.ffn_down_shexp.weight q2_K +blk.1.ffn_down_exps.weight q2_K +blk.1.ffn_down_shexp.weight q2_K +blk.2.ffn_down_exps.weight q2_K +blk.2.ffn_down_shexp.weight q2_K +blk.3.attn_output.weight iq2_xxs +blk.3.attn_v.weight q4_K +blk.3.ffn_down_exps.weight q2_K +blk.7.attn_output.weight iq2_xxs +blk.7.attn_v.weight q4_K +blk.11.attn_output.weight iq2_xxs +blk.11.attn_v.weight q4_K +blk.15.attn_output.weight iq2_xxs +blk.15.attn_v.weight q4_K +blk.19.attn_output.weight iq2_xxs +blk.19.attn_v.weight q4_K +blk.23.attn_output.weight iq2_xxs +blk.23.attn_v.weight q4_K +blk.27.attn_output.weight iq2_xxs +blk.27.attn_v.weight q4_K +blk.31.attn_output.weight iq2_xxs +blk.31.attn_v.weight q4_K +blk.35.attn_output.weight iq2_xxs +blk.35.attn_v.weight q4_K +blk.39.attn_output.weight iq2_xxs +blk.39.attn_v.weight q4_K +blk.43.attn_output.weight iq2_xxs +blk.43.attn_v.weight q4_K +blk.47.attn_output.weight iq2_xxs +blk.47.attn_v.weight q4_K +blk.51.attn_output.weight iq2_xxs +blk.51.attn_v.weight q4_K +blk.55.attn_output.weight iq2_xxs +blk.55.attn_v.weight q4_K +blk.59.attn_output.weight iq2_xxs +blk.59.attn_v.weight q4_K + +[IQ4_NL] iq4_nl +output.weight q6_K +blk.0.ffn_down_exps.weight q5_K +blk.0.ffn_down_shexp.weight q5_K +blk.1.ffn_down_exps.weight q5_K +blk.1.ffn_down_shexp.weight q5_K +blk.2.ffn_down_exps.weight q5_K +blk.2.ffn_down_shexp.weight q5_K +blk.3.attn_v.weight q5_K +blk.3.ffn_down_exps.weight q5_K +blk.3.ffn_down_shexp.weight q5_K +blk.4.ffn_down_exps.weight q5_K +blk.4.ffn_down_shexp.weight q5_K +blk.5.ffn_down_exps.weight q5_K +blk.5.ffn_down_shexp.weight q5_K +blk.6.ffn_down_exps.weight q5_K +blk.6.ffn_down_shexp.weight q5_K +blk.7.attn_v.weight q5_K +blk.11.attn_v.weight q5_K +blk.15.attn_v.weight q5_K +blk.19.attn_v.weight q5_K +blk.23.attn_v.weight q5_K +blk.27.attn_v.weight q5_K +blk.31.attn_v.weight q5_K +blk.35.attn_v.weight q5_K +blk.39.attn_v.weight q5_K +blk.43.attn_v.weight q5_K +blk.47.attn_v.weight q5_K +blk.51.attn_v.weight q5_K +blk.55.attn_v.weight q5_K +blk.59.attn_v.weight q5_K + +[IQ3_S] iq3_s +output.weight q6_K +blk.3.attn_v.weight q4_K +blk.7.attn_v.weight q4_K +blk.11.attn_v.weight q4_K +blk.15.attn_v.weight q4_K +blk.19.attn_v.weight q4_K +blk.23.attn_v.weight q4_K +blk.27.attn_v.weight q4_K +blk.31.attn_v.weight q4_K +blk.35.attn_v.weight q4_K +blk.39.attn_v.weight q4_K +blk.43.attn_v.weight q4_K +blk.47.attn_v.weight q4_K +blk.51.attn_v.weight q4_K +blk.55.attn_v.weight q4_K +blk.59.attn_v.weight q4_K + +[IQ3_M] iq3_s +output.weight q6_K +blk.0.attn_qkv.weight q4_K +blk.0.ffn_down_exps.weight q4_K +blk.0.ffn_down_shexp.weight q4_K +blk.1.attn_qkv.weight q4_K +blk.1.ffn_down_exps.weight q4_K +blk.1.ffn_down_shexp.weight q4_K +blk.2.attn_qkv.weight q4_K +blk.2.ffn_down_exps.weight q4_K +blk.2.ffn_down_shexp.weight q4_K +blk.3.attn_output.weight q4_K +blk.3.attn_v.weight q4_K +blk.3.ffn_down_exps.weight q4_K +blk.3.ffn_down_shexp.weight q4_K +blk.4.attn_qkv.weight q4_K +blk.4.ffn_down_exps.weight q4_K +blk.4.ffn_down_shexp.weight q4_K +blk.5.attn_qkv.weight q4_K +blk.5.ffn_down_exps.weight q4_K +blk.5.ffn_down_shexp.weight q4_K +blk.6.attn_qkv.weight q4_K +blk.6.ffn_down_exps.weight q4_K +blk.6.ffn_down_shexp.weight q4_K +blk.7.attn_output.weight q4_K +blk.7.attn_v.weight q4_K +blk.8.attn_qkv.weight q4_K +blk.9.attn_qkv.weight q4_K +blk.10.attn_qkv.weight q4_K +blk.11.attn_output.weight q4_K +blk.11.attn_v.weight q4_K +blk.12.attn_qkv.weight q4_K +blk.13.attn_qkv.weight q4_K +blk.14.attn_qkv.weight q4_K +blk.15.attn_output.weight q4_K +blk.15.attn_v.weight q4_K +blk.16.attn_qkv.weight q4_K +blk.17.attn_qkv.weight q4_K +blk.18.attn_qkv.weight q4_K +blk.19.attn_output.weight q4_K +blk.19.attn_v.weight q4_K +blk.20.attn_qkv.weight q4_K +blk.21.attn_qkv.weight q4_K +blk.22.attn_qkv.weight q4_K +blk.23.attn_output.weight q4_K +blk.23.attn_v.weight q4_K +blk.24.attn_qkv.weight q4_K +blk.25.attn_qkv.weight q4_K +blk.26.attn_qkv.weight q4_K +blk.27.attn_output.weight q4_K +blk.27.attn_v.weight q4_K +blk.28.attn_qkv.weight q4_K +blk.29.attn_qkv.weight q4_K +blk.30.attn_qkv.weight q4_K +blk.31.attn_output.weight q4_K +blk.31.attn_v.weight q4_K +blk.32.attn_qkv.weight q4_K +blk.33.attn_qkv.weight q4_K +blk.34.attn_qkv.weight q4_K +blk.35.attn_output.weight q4_K +blk.35.attn_v.weight q4_K +blk.36.attn_qkv.weight q4_K +blk.37.attn_qkv.weight q4_K +blk.38.attn_qkv.weight q4_K +blk.39.attn_output.weight q4_K +blk.39.attn_v.weight q4_K +blk.40.attn_qkv.weight q4_K +blk.41.attn_qkv.weight q4_K +blk.42.attn_qkv.weight q4_K +blk.43.attn_output.weight q4_K +blk.43.attn_v.weight q4_K +blk.44.attn_qkv.weight q4_K +blk.45.attn_qkv.weight q4_K +blk.46.attn_qkv.weight q4_K +blk.47.attn_output.weight q4_K +blk.47.attn_v.weight q4_K +blk.48.attn_qkv.weight q4_K +blk.49.attn_qkv.weight q4_K +blk.50.attn_qkv.weight q4_K +blk.51.attn_output.weight q4_K +blk.51.attn_v.weight q4_K +blk.52.attn_qkv.weight q4_K +blk.53.attn_qkv.weight q4_K +blk.54.attn_qkv.weight q4_K +blk.55.attn_output.weight q4_K +blk.55.attn_v.weight q4_K +blk.56.attn_qkv.weight q4_K +blk.57.attn_qkv.weight q4_K +blk.58.attn_qkv.weight q4_K +blk.59.attn_output.weight q4_K +blk.59.attn_v.weight q4_K + +[IQ2_S] iq2_xs +output.weight q5_K +token_embd.weight iq3_s +blk.0.ffn_down_exps.weight iq3_s +blk.0.ffn_down_shexp.weight iq3_s +blk.1.ffn_down_exps.weight iq3_s +blk.1.ffn_down_shexp.weight iq3_s +blk.2.ffn_down_exps.weight iq3_s +blk.2.ffn_down_shexp.weight iq3_s +blk.3.attn_output.weight iq3_s +blk.3.attn_v.weight q4_K +blk.3.ffn_down_exps.weight iq3_s +blk.7.attn_output.weight iq3_s +blk.7.attn_v.weight q4_K +blk.11.attn_output.weight iq3_s +blk.11.attn_v.weight q4_K +blk.15.attn_output.weight iq3_s +blk.15.attn_v.weight q4_K +blk.19.attn_output.weight iq3_s +blk.19.attn_v.weight q4_K +blk.23.attn_output.weight iq3_s +blk.23.attn_v.weight q4_K +blk.27.attn_output.weight iq3_s +blk.27.attn_v.weight q4_K +blk.31.attn_output.weight iq3_s +blk.31.attn_v.weight q4_K +blk.35.attn_output.weight iq3_s +blk.35.attn_v.weight q4_K +blk.39.attn_output.weight iq3_s +blk.39.attn_v.weight q4_K +blk.43.attn_output.weight iq3_s +blk.43.attn_v.weight q4_K +blk.47.attn_output.weight iq3_s +blk.47.attn_v.weight q4_K +blk.51.attn_output.weight iq3_s +blk.51.attn_v.weight q4_K +blk.55.attn_output.weight iq3_s +blk.55.attn_v.weight q4_K +blk.59.attn_output.weight iq3_s +blk.59.attn_v.weight q4_K + +[IQ2_M] iq2_s +output.weight q5_K +token_embd.weight iq3_s +blk.0.ffn_down_exps.weight iq3_s +blk.0.ffn_down_shexp.weight iq3_s +blk.1.ffn_down_exps.weight iq3_s +blk.1.ffn_down_shexp.weight iq3_s +blk.2.ffn_down_exps.weight iq3_s +blk.2.ffn_down_shexp.weight iq3_s +blk.3.attn_output.weight iq3_s +blk.3.attn_v.weight q4_K +blk.3.ffn_down_exps.weight iq3_s +blk.7.attn_output.weight iq3_s +blk.7.attn_v.weight q4_K +blk.11.attn_output.weight iq3_s +blk.11.attn_v.weight q4_K +blk.15.attn_output.weight iq3_s +blk.15.attn_v.weight q4_K +blk.19.attn_output.weight iq3_s +blk.19.attn_v.weight q4_K +blk.23.attn_output.weight iq3_s +blk.23.attn_v.weight q4_K +blk.27.attn_output.weight iq3_s +blk.27.attn_v.weight q4_K +blk.31.attn_output.weight iq3_s +blk.31.attn_v.weight q4_K +blk.35.attn_output.weight iq3_s +blk.35.attn_v.weight q4_K +blk.39.attn_output.weight iq3_s +blk.39.attn_v.weight q4_K +blk.43.attn_output.weight iq3_s +blk.43.attn_v.weight q4_K +blk.47.attn_output.weight iq3_s +blk.47.attn_v.weight q4_K +blk.51.attn_output.weight iq3_s +blk.51.attn_v.weight q4_K +blk.55.attn_output.weight iq3_s +blk.55.attn_v.weight q4_K +blk.59.attn_output.weight iq3_s +blk.59.attn_v.weight q4_K + +[IQ4_XS] iq4_xs +output.weight q6_K +blk.0.ffn_down_exps.weight q5_K +blk.0.ffn_down_shexp.weight q5_K +blk.1.ffn_down_exps.weight q5_K +blk.1.ffn_down_shexp.weight q5_K +blk.2.ffn_down_exps.weight q5_K +blk.2.ffn_down_shexp.weight q5_K +blk.3.attn_v.weight q5_K +blk.3.ffn_down_exps.weight q5_K +blk.3.ffn_down_shexp.weight q5_K +blk.4.ffn_down_exps.weight q5_K +blk.4.ffn_down_shexp.weight q5_K +blk.5.ffn_down_exps.weight q5_K +blk.5.ffn_down_shexp.weight q5_K +blk.6.ffn_down_exps.weight q5_K +blk.6.ffn_down_shexp.weight q5_K +blk.7.attn_v.weight q5_K +blk.11.attn_v.weight q5_K +blk.15.attn_v.weight q5_K +blk.19.attn_v.weight q5_K +blk.23.attn_v.weight q5_K +blk.27.attn_v.weight q5_K +blk.31.attn_v.weight q5_K +blk.35.attn_v.weight q5_K +blk.39.attn_v.weight q5_K +blk.43.attn_v.weight q5_K +blk.47.attn_v.weight q5_K +blk.51.attn_v.weight q5_K +blk.55.attn_v.weight q5_K +blk.59.attn_v.weight q5_K + +[IQ1_M] iq1_m +output.weight q5_K +token_embd.weight q2_K +blk.0.ffn_down_exps.weight q2_K +blk.0.ffn_down_shexp.weight q2_K +blk.1.ffn_down_exps.weight q2_K +blk.1.ffn_down_shexp.weight q2_K +blk.2.ffn_down_exps.weight q2_K +blk.2.ffn_down_shexp.weight q2_K +blk.3.attn_output.weight iq2_xxs +blk.3.attn_v.weight q4_K +blk.3.ffn_down_exps.weight q2_K +blk.7.attn_output.weight iq2_xxs +blk.7.attn_v.weight q4_K +blk.11.attn_output.weight iq2_xxs +blk.11.attn_v.weight q4_K +blk.15.attn_output.weight iq2_xxs +blk.15.attn_v.weight q4_K +blk.19.attn_output.weight iq2_xxs +blk.19.attn_v.weight q4_K +blk.23.attn_output.weight iq2_xxs +blk.23.attn_v.weight q4_K +blk.27.attn_output.weight iq2_xxs +blk.27.attn_v.weight q4_K +blk.31.attn_output.weight iq2_xxs +blk.31.attn_v.weight q4_K +blk.35.attn_output.weight iq2_xxs +blk.35.attn_v.weight q4_K +blk.39.attn_output.weight iq2_xxs +blk.39.attn_v.weight q4_K +blk.43.attn_output.weight iq2_xxs +blk.43.attn_v.weight q4_K +blk.47.attn_output.weight iq2_xxs +blk.47.attn_v.weight q4_K +blk.51.attn_output.weight iq2_xxs +blk.51.attn_v.weight q4_K +blk.55.attn_output.weight iq2_xxs +blk.55.attn_v.weight q4_K +blk.59.attn_output.weight iq2_xxs +blk.59.attn_v.weight q4_K + +[BF16] bf16 +output.weight q6_K + +[TQ1_0] tq1_0 +output.weight q6_K +token_embd.weight q4_K + +[TQ2_0] tq2_0 +output.weight q6_K +token_embd.weight q4_K + +[MXFP4_MOE] mxfp4 +output.weight q8_0 +token_embd.weight q8_0 +blk.0.attn_gate.weight q8_0 +blk.0.attn_qkv.weight q8_0 +blk.0.ffn_down_shexp.weight q8_0 +blk.0.ffn_gate_shexp.weight q8_0 +blk.0.ffn_up_shexp.weight q8_0 +blk.0.ssm_alpha.weight q8_0 +blk.0.ssm_beta.weight q8_0 +blk.0.ssm_out.weight q8_0 +blk.1.attn_gate.weight q8_0 +blk.1.attn_qkv.weight q8_0 +blk.1.ffn_down_shexp.weight q8_0 +blk.1.ffn_gate_shexp.weight q8_0 +blk.1.ffn_up_shexp.weight q8_0 +blk.1.ssm_alpha.weight q8_0 +blk.1.ssm_beta.weight q8_0 +blk.1.ssm_out.weight q8_0 +blk.2.attn_gate.weight q8_0 +blk.2.attn_qkv.weight q8_0 +blk.2.ffn_down_shexp.weight q8_0 +blk.2.ffn_gate_shexp.weight q8_0 +blk.2.ffn_up_shexp.weight q8_0 +blk.2.ssm_alpha.weight q8_0 +blk.2.ssm_beta.weight q8_0 +blk.2.ssm_out.weight q8_0 +blk.3.attn_k.weight q8_0 +blk.3.attn_output.weight q8_0 +blk.3.attn_q.weight q8_0 +blk.3.attn_v.weight q8_0 +blk.3.ffn_down_shexp.weight q8_0 +blk.3.ffn_gate_shexp.weight q8_0 +blk.3.ffn_up_shexp.weight q8_0 +blk.4.attn_gate.weight q8_0 +blk.4.attn_qkv.weight q8_0 +blk.4.ffn_down_shexp.weight q8_0 +blk.4.ffn_gate_shexp.weight q8_0 +blk.4.ffn_up_shexp.weight q8_0 +blk.4.ssm_alpha.weight q8_0 +blk.4.ssm_beta.weight q8_0 +blk.4.ssm_out.weight q8_0 +blk.5.attn_gate.weight q8_0 +blk.5.attn_qkv.weight q8_0 +blk.5.ffn_down_shexp.weight q8_0 +blk.5.ffn_gate_shexp.weight q8_0 +blk.5.ffn_up_shexp.weight q8_0 +blk.5.ssm_alpha.weight q8_0 +blk.5.ssm_beta.weight q8_0 +blk.5.ssm_out.weight q8_0 +blk.6.attn_gate.weight q8_0 +blk.6.attn_qkv.weight q8_0 +blk.6.ffn_down_shexp.weight q8_0 +blk.6.ffn_gate_shexp.weight q8_0 +blk.6.ffn_up_shexp.weight q8_0 +blk.6.ssm_alpha.weight q8_0 +blk.6.ssm_beta.weight q8_0 +blk.6.ssm_out.weight q8_0 +blk.7.attn_k.weight q8_0 +blk.7.attn_output.weight q8_0 +blk.7.attn_q.weight q8_0 +blk.7.attn_v.weight q8_0 +blk.7.ffn_down_shexp.weight q8_0 +blk.7.ffn_gate_shexp.weight q8_0 +blk.7.ffn_up_shexp.weight q8_0 +blk.8.attn_gate.weight q8_0 +blk.8.attn_qkv.weight q8_0 +blk.8.ffn_down_shexp.weight q8_0 +blk.8.ffn_gate_shexp.weight q8_0 +blk.8.ffn_up_shexp.weight q8_0 +blk.8.ssm_alpha.weight q8_0 +blk.8.ssm_beta.weight q8_0 +blk.8.ssm_out.weight q8_0 +blk.9.attn_gate.weight q8_0 +blk.9.attn_qkv.weight q8_0 +blk.9.ffn_down_shexp.weight q8_0 +blk.9.ffn_gate_shexp.weight q8_0 +blk.9.ffn_up_shexp.weight q8_0 +blk.9.ssm_alpha.weight q8_0 +blk.9.ssm_beta.weight q8_0 +blk.9.ssm_out.weight q8_0 +blk.10.attn_gate.weight q8_0 +blk.10.attn_qkv.weight q8_0 +blk.10.ffn_down_shexp.weight q8_0 +blk.10.ffn_gate_shexp.weight q8_0 +blk.10.ffn_up_shexp.weight q8_0 +blk.10.ssm_alpha.weight q8_0 +blk.10.ssm_beta.weight q8_0 +blk.10.ssm_out.weight q8_0 +blk.11.attn_k.weight q8_0 +blk.11.attn_output.weight q8_0 +blk.11.attn_q.weight q8_0 +blk.11.attn_v.weight q8_0 +blk.11.ffn_down_shexp.weight q8_0 +blk.11.ffn_gate_shexp.weight q8_0 +blk.11.ffn_up_shexp.weight q8_0 +blk.12.attn_gate.weight q8_0 +blk.12.attn_qkv.weight q8_0 +blk.12.ffn_down_shexp.weight q8_0 +blk.12.ffn_gate_shexp.weight q8_0 +blk.12.ffn_up_shexp.weight q8_0 +blk.12.ssm_alpha.weight q8_0 +blk.12.ssm_beta.weight q8_0 +blk.12.ssm_out.weight q8_0 +blk.13.attn_gate.weight q8_0 +blk.13.attn_qkv.weight q8_0 +blk.13.ffn_down_shexp.weight q8_0 +blk.13.ffn_gate_shexp.weight q8_0 +blk.13.ffn_up_shexp.weight q8_0 +blk.13.ssm_alpha.weight q8_0 +blk.13.ssm_beta.weight q8_0 +blk.13.ssm_out.weight q8_0 +blk.14.attn_gate.weight q8_0 +blk.14.attn_qkv.weight q8_0 +blk.14.ffn_down_shexp.weight q8_0 +blk.14.ffn_gate_shexp.weight q8_0 +blk.14.ffn_up_shexp.weight q8_0 +blk.14.ssm_alpha.weight q8_0 +blk.14.ssm_beta.weight q8_0 +blk.14.ssm_out.weight q8_0 +blk.15.attn_k.weight q8_0 +blk.15.attn_output.weight q8_0 +blk.15.attn_q.weight q8_0 +blk.15.attn_v.weight q8_0 +blk.15.ffn_down_shexp.weight q8_0 +blk.15.ffn_gate_shexp.weight q8_0 +blk.15.ffn_up_shexp.weight q8_0 +blk.16.attn_gate.weight q8_0 +blk.16.attn_qkv.weight q8_0 +blk.16.ffn_down_shexp.weight q8_0 +blk.16.ffn_gate_shexp.weight q8_0 +blk.16.ffn_up_shexp.weight q8_0 +blk.16.ssm_alpha.weight q8_0 +blk.16.ssm_beta.weight q8_0 +blk.16.ssm_out.weight q8_0 +blk.17.attn_gate.weight q8_0 +blk.17.attn_qkv.weight q8_0 +blk.17.ffn_down_shexp.weight q8_0 +blk.17.ffn_gate_shexp.weight q8_0 +blk.17.ffn_up_shexp.weight q8_0 +blk.17.ssm_alpha.weight q8_0 +blk.17.ssm_beta.weight q8_0 +blk.17.ssm_out.weight q8_0 +blk.18.attn_gate.weight q8_0 +blk.18.attn_qkv.weight q8_0 +blk.18.ffn_down_shexp.weight q8_0 +blk.18.ffn_gate_shexp.weight q8_0 +blk.18.ffn_up_shexp.weight q8_0 +blk.18.ssm_alpha.weight q8_0 +blk.18.ssm_beta.weight q8_0 +blk.18.ssm_out.weight q8_0 +blk.19.attn_k.weight q8_0 +blk.19.attn_output.weight q8_0 +blk.19.attn_q.weight q8_0 +blk.19.attn_v.weight q8_0 +blk.19.ffn_down_shexp.weight q8_0 +blk.19.ffn_gate_shexp.weight q8_0 +blk.19.ffn_up_shexp.weight q8_0 +blk.20.attn_gate.weight q8_0 +blk.20.attn_qkv.weight q8_0 +blk.20.ffn_down_shexp.weight q8_0 +blk.20.ffn_gate_shexp.weight q8_0 +blk.20.ffn_up_shexp.weight q8_0 +blk.20.ssm_alpha.weight q8_0 +blk.20.ssm_beta.weight q8_0 +blk.20.ssm_out.weight q8_0 +blk.21.attn_gate.weight q8_0 +blk.21.attn_qkv.weight q8_0 +blk.21.ffn_down_shexp.weight q8_0 +blk.21.ffn_gate_shexp.weight q8_0 +blk.21.ffn_up_shexp.weight q8_0 +blk.21.ssm_alpha.weight q8_0 +blk.21.ssm_beta.weight q8_0 +blk.21.ssm_out.weight q8_0 +blk.22.attn_gate.weight q8_0 +blk.22.attn_qkv.weight q8_0 +blk.22.ffn_down_shexp.weight q8_0 +blk.22.ffn_gate_shexp.weight q8_0 +blk.22.ffn_up_shexp.weight q8_0 +blk.22.ssm_alpha.weight q8_0 +blk.22.ssm_beta.weight q8_0 +blk.22.ssm_out.weight q8_0 +blk.23.attn_k.weight q8_0 +blk.23.attn_output.weight q8_0 +blk.23.attn_q.weight q8_0 +blk.23.attn_v.weight q8_0 +blk.23.ffn_down_shexp.weight q8_0 +blk.23.ffn_gate_shexp.weight q8_0 +blk.23.ffn_up_shexp.weight q8_0 +blk.24.attn_gate.weight q8_0 +blk.24.attn_qkv.weight q8_0 +blk.24.ffn_down_shexp.weight q8_0 +blk.24.ffn_gate_shexp.weight q8_0 +blk.24.ffn_up_shexp.weight q8_0 +blk.24.ssm_alpha.weight q8_0 +blk.24.ssm_beta.weight q8_0 +blk.24.ssm_out.weight q8_0 +blk.25.attn_gate.weight q8_0 +blk.25.attn_qkv.weight q8_0 +blk.25.ffn_down_shexp.weight q8_0 +blk.25.ffn_gate_shexp.weight q8_0 +blk.25.ffn_up_shexp.weight q8_0 +blk.25.ssm_alpha.weight q8_0 +blk.25.ssm_beta.weight q8_0 +blk.25.ssm_out.weight q8_0 +blk.26.attn_gate.weight q8_0 +blk.26.attn_qkv.weight q8_0 +blk.26.ffn_down_shexp.weight q8_0 +blk.26.ffn_gate_shexp.weight q8_0 +blk.26.ffn_up_shexp.weight q8_0 +blk.26.ssm_alpha.weight q8_0 +blk.26.ssm_beta.weight q8_0 +blk.26.ssm_out.weight q8_0 +blk.27.attn_k.weight q8_0 +blk.27.attn_output.weight q8_0 +blk.27.attn_q.weight q8_0 +blk.27.attn_v.weight q8_0 +blk.27.ffn_down_shexp.weight q8_0 +blk.27.ffn_gate_shexp.weight q8_0 +blk.27.ffn_up_shexp.weight q8_0 +blk.28.attn_gate.weight q8_0 +blk.28.attn_qkv.weight q8_0 +blk.28.ffn_down_shexp.weight q8_0 +blk.28.ffn_gate_shexp.weight q8_0 +blk.28.ffn_up_shexp.weight q8_0 +blk.28.ssm_alpha.weight q8_0 +blk.28.ssm_beta.weight q8_0 +blk.28.ssm_out.weight q8_0 +blk.29.attn_gate.weight q8_0 +blk.29.attn_qkv.weight q8_0 +blk.29.ffn_down_shexp.weight q8_0 +blk.29.ffn_gate_shexp.weight q8_0 +blk.29.ffn_up_shexp.weight q8_0 +blk.29.ssm_alpha.weight q8_0 +blk.29.ssm_beta.weight q8_0 +blk.29.ssm_out.weight q8_0 +blk.30.attn_gate.weight q8_0 +blk.30.attn_qkv.weight q8_0 +blk.30.ffn_down_shexp.weight q8_0 +blk.30.ffn_gate_shexp.weight q8_0 +blk.30.ffn_up_shexp.weight q8_0 +blk.30.ssm_alpha.weight q8_0 +blk.30.ssm_beta.weight q8_0 +blk.30.ssm_out.weight q8_0 +blk.31.attn_k.weight q8_0 +blk.31.attn_output.weight q8_0 +blk.31.attn_q.weight q8_0 +blk.31.attn_v.weight q8_0 +blk.31.ffn_down_shexp.weight q8_0 +blk.31.ffn_gate_shexp.weight q8_0 +blk.31.ffn_up_shexp.weight q8_0 +blk.32.attn_gate.weight q8_0 +blk.32.attn_qkv.weight q8_0 +blk.32.ffn_down_shexp.weight q8_0 +blk.32.ffn_gate_shexp.weight q8_0 +blk.32.ffn_up_shexp.weight q8_0 +blk.32.ssm_alpha.weight q8_0 +blk.32.ssm_beta.weight q8_0 +blk.32.ssm_out.weight q8_0 +blk.33.attn_gate.weight q8_0 +blk.33.attn_qkv.weight q8_0 +blk.33.ffn_down_shexp.weight q8_0 +blk.33.ffn_gate_shexp.weight q8_0 +blk.33.ffn_up_shexp.weight q8_0 +blk.33.ssm_alpha.weight q8_0 +blk.33.ssm_beta.weight q8_0 +blk.33.ssm_out.weight q8_0 +blk.34.attn_gate.weight q8_0 +blk.34.attn_qkv.weight q8_0 +blk.34.ffn_down_shexp.weight q8_0 +blk.34.ffn_gate_shexp.weight q8_0 +blk.34.ffn_up_shexp.weight q8_0 +blk.34.ssm_alpha.weight q8_0 +blk.34.ssm_beta.weight q8_0 +blk.34.ssm_out.weight q8_0 +blk.35.attn_k.weight q8_0 +blk.35.attn_output.weight q8_0 +blk.35.attn_q.weight q8_0 +blk.35.attn_v.weight q8_0 +blk.35.ffn_down_shexp.weight q8_0 +blk.35.ffn_gate_shexp.weight q8_0 +blk.35.ffn_up_shexp.weight q8_0 +blk.36.attn_gate.weight q8_0 +blk.36.attn_qkv.weight q8_0 +blk.36.ffn_down_shexp.weight q8_0 +blk.36.ffn_gate_shexp.weight q8_0 +blk.36.ffn_up_shexp.weight q8_0 +blk.36.ssm_alpha.weight q8_0 +blk.36.ssm_beta.weight q8_0 +blk.36.ssm_out.weight q8_0 +blk.37.attn_gate.weight q8_0 +blk.37.attn_qkv.weight q8_0 +blk.37.ffn_down_shexp.weight q8_0 +blk.37.ffn_gate_shexp.weight q8_0 +blk.37.ffn_up_shexp.weight q8_0 +blk.37.ssm_alpha.weight q8_0 +blk.37.ssm_beta.weight q8_0 +blk.37.ssm_out.weight q8_0 +blk.38.attn_gate.weight q8_0 +blk.38.attn_qkv.weight q8_0 +blk.38.ffn_down_shexp.weight q8_0 +blk.38.ffn_gate_shexp.weight q8_0 +blk.38.ffn_up_shexp.weight q8_0 +blk.38.ssm_alpha.weight q8_0 +blk.38.ssm_beta.weight q8_0 +blk.38.ssm_out.weight q8_0 +blk.39.attn_k.weight q8_0 +blk.39.attn_output.weight q8_0 +blk.39.attn_q.weight q8_0 +blk.39.attn_v.weight q8_0 +blk.39.ffn_down_shexp.weight q8_0 +blk.39.ffn_gate_shexp.weight q8_0 +blk.39.ffn_up_shexp.weight q8_0 +blk.40.attn_gate.weight q8_0 +blk.40.attn_qkv.weight q8_0 +blk.40.ffn_down_shexp.weight q8_0 +blk.40.ffn_gate_shexp.weight q8_0 +blk.40.ffn_up_shexp.weight q8_0 +blk.40.ssm_alpha.weight q8_0 +blk.40.ssm_beta.weight q8_0 +blk.40.ssm_out.weight q8_0 +blk.41.attn_gate.weight q8_0 +blk.41.attn_qkv.weight q8_0 +blk.41.ffn_down_shexp.weight q8_0 +blk.41.ffn_gate_shexp.weight q8_0 +blk.41.ffn_up_shexp.weight q8_0 +blk.41.ssm_alpha.weight q8_0 +blk.41.ssm_beta.weight q8_0 +blk.41.ssm_out.weight q8_0 +blk.42.attn_gate.weight q8_0 +blk.42.attn_qkv.weight q8_0 +blk.42.ffn_down_shexp.weight q8_0 +blk.42.ffn_gate_shexp.weight q8_0 +blk.42.ffn_up_shexp.weight q8_0 +blk.42.ssm_alpha.weight q8_0 +blk.42.ssm_beta.weight q8_0 +blk.42.ssm_out.weight q8_0 +blk.43.attn_k.weight q8_0 +blk.43.attn_output.weight q8_0 +blk.43.attn_q.weight q8_0 +blk.43.attn_v.weight q8_0 +blk.43.ffn_down_shexp.weight q8_0 +blk.43.ffn_gate_shexp.weight q8_0 +blk.43.ffn_up_shexp.weight q8_0 +blk.44.attn_gate.weight q8_0 +blk.44.attn_qkv.weight q8_0 +blk.44.ffn_down_shexp.weight q8_0 +blk.44.ffn_gate_shexp.weight q8_0 +blk.44.ffn_up_shexp.weight q8_0 +blk.44.ssm_alpha.weight q8_0 +blk.44.ssm_beta.weight q8_0 +blk.44.ssm_out.weight q8_0 +blk.45.attn_gate.weight q8_0 +blk.45.attn_qkv.weight q8_0 +blk.45.ffn_down_shexp.weight q8_0 +blk.45.ffn_gate_shexp.weight q8_0 +blk.45.ffn_up_shexp.weight q8_0 +blk.45.ssm_alpha.weight q8_0 +blk.45.ssm_beta.weight q8_0 +blk.45.ssm_out.weight q8_0 +blk.46.attn_gate.weight q8_0 +blk.46.attn_qkv.weight q8_0 +blk.46.ffn_down_shexp.weight q8_0 +blk.46.ffn_gate_shexp.weight q8_0 +blk.46.ffn_up_shexp.weight q8_0 +blk.46.ssm_alpha.weight q8_0 +blk.46.ssm_beta.weight q8_0 +blk.46.ssm_out.weight q8_0 +blk.47.attn_k.weight q8_0 +blk.47.attn_output.weight q8_0 +blk.47.attn_q.weight q8_0 +blk.47.attn_v.weight q8_0 +blk.47.ffn_down_shexp.weight q8_0 +blk.47.ffn_gate_shexp.weight q8_0 +blk.47.ffn_up_shexp.weight q8_0 +blk.48.attn_gate.weight q8_0 +blk.48.attn_qkv.weight q8_0 +blk.48.ffn_down_shexp.weight q8_0 +blk.48.ffn_gate_shexp.weight q8_0 +blk.48.ffn_up_shexp.weight q8_0 +blk.48.ssm_alpha.weight q8_0 +blk.48.ssm_beta.weight q8_0 +blk.48.ssm_out.weight q8_0 +blk.49.attn_gate.weight q8_0 +blk.49.attn_qkv.weight q8_0 +blk.49.ffn_down_shexp.weight q8_0 +blk.49.ffn_gate_shexp.weight q8_0 +blk.49.ffn_up_shexp.weight q8_0 +blk.49.ssm_alpha.weight q8_0 +blk.49.ssm_beta.weight q8_0 +blk.49.ssm_out.weight q8_0 +blk.50.attn_gate.weight q8_0 +blk.50.attn_qkv.weight q8_0 +blk.50.ffn_down_shexp.weight q8_0 +blk.50.ffn_gate_shexp.weight q8_0 +blk.50.ffn_up_shexp.weight q8_0 +blk.50.ssm_alpha.weight q8_0 +blk.50.ssm_beta.weight q8_0 +blk.50.ssm_out.weight q8_0 +blk.51.attn_k.weight q8_0 +blk.51.attn_output.weight q8_0 +blk.51.attn_q.weight q8_0 +blk.51.attn_v.weight q8_0 +blk.51.ffn_down_shexp.weight q8_0 +blk.51.ffn_gate_shexp.weight q8_0 +blk.51.ffn_up_shexp.weight q8_0 +blk.52.attn_gate.weight q8_0 +blk.52.attn_qkv.weight q8_0 +blk.52.ffn_down_shexp.weight q8_0 +blk.52.ffn_gate_shexp.weight q8_0 +blk.52.ffn_up_shexp.weight q8_0 +blk.52.ssm_alpha.weight q8_0 +blk.52.ssm_beta.weight q8_0 +blk.52.ssm_out.weight q8_0 +blk.53.attn_gate.weight q8_0 +blk.53.attn_qkv.weight q8_0 +blk.53.ffn_down_shexp.weight q8_0 +blk.53.ffn_gate_shexp.weight q8_0 +blk.53.ffn_up_shexp.weight q8_0 +blk.53.ssm_alpha.weight q8_0 +blk.53.ssm_beta.weight q8_0 +blk.53.ssm_out.weight q8_0 +blk.54.attn_gate.weight q8_0 +blk.54.attn_qkv.weight q8_0 +blk.54.ffn_down_shexp.weight q8_0 +blk.54.ffn_gate_shexp.weight q8_0 +blk.54.ffn_up_shexp.weight q8_0 +blk.54.ssm_alpha.weight q8_0 +blk.54.ssm_beta.weight q8_0 +blk.54.ssm_out.weight q8_0 +blk.55.attn_k.weight q8_0 +blk.55.attn_output.weight q8_0 +blk.55.attn_q.weight q8_0 +blk.55.attn_v.weight q8_0 +blk.55.ffn_down_shexp.weight q8_0 +blk.55.ffn_gate_shexp.weight q8_0 +blk.55.ffn_up_shexp.weight q8_0 +blk.56.attn_gate.weight q8_0 +blk.56.attn_qkv.weight q8_0 +blk.56.ffn_down_shexp.weight q8_0 +blk.56.ffn_gate_shexp.weight q8_0 +blk.56.ffn_up_shexp.weight q8_0 +blk.56.ssm_alpha.weight q8_0 +blk.56.ssm_beta.weight q8_0 +blk.56.ssm_out.weight q8_0 +blk.57.attn_gate.weight q8_0 +blk.57.attn_qkv.weight q8_0 +blk.57.ffn_down_shexp.weight q8_0 +blk.57.ffn_gate_shexp.weight q8_0 +blk.57.ffn_up_shexp.weight q8_0 +blk.57.ssm_alpha.weight q8_0 +blk.57.ssm_beta.weight q8_0 +blk.57.ssm_out.weight q8_0 +blk.58.attn_gate.weight q8_0 +blk.58.attn_qkv.weight q8_0 +blk.58.ffn_down_shexp.weight q8_0 +blk.58.ffn_gate_shexp.weight q8_0 +blk.58.ffn_up_shexp.weight q8_0 +blk.58.ssm_alpha.weight q8_0 +blk.58.ssm_beta.weight q8_0 +blk.58.ssm_out.weight q8_0 +blk.59.attn_k.weight q8_0 +blk.59.attn_output.weight q8_0 +blk.59.attn_q.weight q8_0 +blk.59.attn_v.weight q8_0 +blk.59.ffn_down_shexp.weight q8_0 +blk.59.ffn_gate_shexp.weight q8_0 +blk.59.ffn_up_shexp.weight q8_0 diff --git a/tests/snapshots/step-3.5-flash.schema b/tests/snapshots/step-3.5-flash.schema new file mode 100644 index 0000000000..c9d1be0082 --- /dev/null +++ b/tests/snapshots/step-3.5-flash.schema @@ -0,0 +1,2453 @@ +# Model: Step-3.5-Flash +# n_embd=4096, n_ff=11264, n_vocab=128896, n_layer=45, n_head=64, n_head_kv=8, n_expert=288 + +[F32] f32 +output.weight q6_K + +[F16] f16 +output.weight q6_K + +[Q4_0] q4_0 +output.weight q6_K + +[Q4_1] q4_1 +output.weight q6_K + +[Q8_0] q8_0 + +[Q5_0] q5_0 +output.weight q6_K + +[Q5_1] q5_1 +output.weight q6_K + +[Q2_K] q2_K +output.weight q6_K +blk.0.attn_output.weight q3_K +blk.0.attn_v.weight q4_K +blk.0.ffn_down.weight q3_K +blk.1.attn_output.weight q3_K +blk.1.attn_v.weight q4_K +blk.1.ffn_down.weight q3_K +blk.2.attn_output.weight q3_K +blk.2.attn_v.weight q4_K +blk.2.ffn_down.weight q3_K +blk.3.attn_output.weight q3_K +blk.3.attn_v.weight q4_K +blk.3.ffn_down_exps.weight q3_K +blk.3.ffn_down_shexp.weight q3_K +blk.4.attn_output.weight q3_K +blk.4.attn_v.weight q4_K +blk.4.ffn_down_exps.weight q3_K +blk.4.ffn_down_shexp.weight q3_K +blk.5.attn_output.weight q3_K +blk.5.attn_v.weight q4_K +blk.5.ffn_down_exps.weight q3_K +blk.5.ffn_down_shexp.weight q3_K +blk.6.attn_output.weight q3_K +blk.6.attn_v.weight q4_K +blk.6.ffn_down_exps.weight q3_K +blk.6.ffn_down_shexp.weight q3_K +blk.7.attn_output.weight q3_K +blk.7.attn_v.weight q4_K +blk.7.ffn_down_exps.weight q3_K +blk.7.ffn_down_shexp.weight q3_K +blk.8.attn_output.weight q3_K +blk.8.attn_v.weight q4_K +blk.8.ffn_down_exps.weight q3_K +blk.8.ffn_down_shexp.weight q3_K +blk.9.attn_output.weight q3_K +blk.9.attn_v.weight q4_K +blk.9.ffn_down_exps.weight q3_K +blk.9.ffn_down_shexp.weight q3_K +blk.10.attn_output.weight q3_K +blk.10.attn_v.weight q4_K +blk.10.ffn_down_exps.weight q3_K +blk.10.ffn_down_shexp.weight q3_K +blk.11.attn_output.weight q3_K +blk.11.attn_v.weight q4_K +blk.11.ffn_down_exps.weight q3_K +blk.11.ffn_down_shexp.weight q3_K +blk.12.attn_output.weight q3_K +blk.12.attn_v.weight q4_K +blk.12.ffn_down_exps.weight q3_K +blk.12.ffn_down_shexp.weight q3_K +blk.13.attn_output.weight q3_K +blk.13.attn_v.weight q4_K +blk.13.ffn_down_exps.weight q3_K +blk.13.ffn_down_shexp.weight q3_K +blk.14.attn_output.weight q3_K +blk.14.attn_v.weight q4_K +blk.14.ffn_down_exps.weight q3_K +blk.14.ffn_down_shexp.weight q3_K +blk.15.attn_output.weight q3_K +blk.15.attn_v.weight q4_K +blk.15.ffn_down_exps.weight q3_K +blk.15.ffn_down_shexp.weight q3_K +blk.16.attn_output.weight q3_K +blk.16.attn_v.weight q4_K +blk.16.ffn_down_exps.weight q3_K +blk.16.ffn_down_shexp.weight q3_K +blk.17.attn_output.weight q3_K +blk.17.attn_v.weight q4_K +blk.17.ffn_down_exps.weight q3_K +blk.17.ffn_down_shexp.weight q3_K +blk.18.attn_output.weight q3_K +blk.18.attn_v.weight q4_K +blk.18.ffn_down_exps.weight q3_K +blk.18.ffn_down_shexp.weight q3_K +blk.19.attn_output.weight q3_K +blk.19.attn_v.weight q4_K +blk.19.ffn_down_exps.weight q3_K +blk.19.ffn_down_shexp.weight q3_K +blk.20.attn_output.weight q3_K +blk.20.attn_v.weight q4_K +blk.20.ffn_down_exps.weight q3_K +blk.20.ffn_down_shexp.weight q3_K +blk.21.attn_output.weight q3_K +blk.21.attn_v.weight q4_K +blk.21.ffn_down_exps.weight q3_K +blk.21.ffn_down_shexp.weight q3_K +blk.22.attn_output.weight q3_K +blk.22.attn_v.weight q4_K +blk.22.ffn_down_exps.weight q3_K +blk.22.ffn_down_shexp.weight q3_K +blk.23.attn_output.weight q3_K +blk.23.attn_v.weight q4_K +blk.23.ffn_down_exps.weight q3_K +blk.23.ffn_down_shexp.weight q3_K +blk.24.attn_output.weight q3_K +blk.24.attn_v.weight q4_K +blk.24.ffn_down_exps.weight q3_K +blk.24.ffn_down_shexp.weight q3_K +blk.25.attn_output.weight q3_K +blk.25.attn_v.weight q4_K +blk.25.ffn_down_exps.weight q3_K +blk.25.ffn_down_shexp.weight q3_K +blk.26.attn_output.weight q3_K +blk.26.attn_v.weight q4_K +blk.26.ffn_down_exps.weight q3_K +blk.26.ffn_down_shexp.weight q3_K +blk.27.attn_output.weight q3_K +blk.27.attn_v.weight q4_K +blk.27.ffn_down_exps.weight q3_K +blk.27.ffn_down_shexp.weight q3_K +blk.28.attn_output.weight q3_K +blk.28.attn_v.weight q4_K +blk.28.ffn_down_exps.weight q3_K +blk.28.ffn_down_shexp.weight q3_K +blk.29.attn_output.weight q3_K +blk.29.attn_v.weight q4_K +blk.29.ffn_down_exps.weight q3_K +blk.29.ffn_down_shexp.weight q3_K +blk.30.attn_output.weight q3_K +blk.30.attn_v.weight q4_K +blk.30.ffn_down_exps.weight q3_K +blk.30.ffn_down_shexp.weight q3_K +blk.31.attn_output.weight q3_K +blk.31.attn_v.weight q4_K +blk.31.ffn_down_exps.weight q3_K +blk.31.ffn_down_shexp.weight q3_K +blk.32.attn_output.weight q3_K +blk.32.attn_v.weight q4_K +blk.32.ffn_down_exps.weight q3_K +blk.32.ffn_down_shexp.weight q3_K +blk.33.attn_output.weight q3_K +blk.33.attn_v.weight q4_K +blk.33.ffn_down_exps.weight q3_K +blk.33.ffn_down_shexp.weight q3_K +blk.34.attn_output.weight q3_K +blk.34.attn_v.weight q4_K +blk.34.ffn_down_exps.weight q3_K +blk.34.ffn_down_shexp.weight q3_K +blk.35.attn_output.weight q3_K +blk.35.attn_v.weight q4_K +blk.35.ffn_down_exps.weight q3_K +blk.35.ffn_down_shexp.weight q3_K +blk.36.attn_output.weight q3_K +blk.36.attn_v.weight q4_K +blk.36.ffn_down_exps.weight q3_K +blk.36.ffn_down_shexp.weight q3_K +blk.37.attn_output.weight q3_K +blk.37.attn_v.weight q4_K +blk.37.ffn_down_exps.weight q3_K +blk.37.ffn_down_shexp.weight q3_K +blk.38.attn_output.weight q3_K +blk.38.attn_v.weight q4_K +blk.38.ffn_down_exps.weight q3_K +blk.38.ffn_down_shexp.weight q3_K +blk.39.attn_output.weight q3_K +blk.39.attn_v.weight q4_K +blk.39.ffn_down_exps.weight q3_K +blk.39.ffn_down_shexp.weight q3_K +blk.40.attn_output.weight q3_K +blk.40.attn_v.weight q4_K +blk.40.ffn_down_exps.weight q3_K +blk.40.ffn_down_shexp.weight q3_K +blk.41.attn_output.weight q3_K +blk.41.attn_v.weight q4_K +blk.41.ffn_down_exps.weight q3_K +blk.41.ffn_down_shexp.weight q3_K +blk.42.attn_output.weight q3_K +blk.42.attn_v.weight q4_K +blk.42.ffn_down_exps.weight q3_K +blk.42.ffn_down_shexp.weight q3_K +blk.43.attn_output.weight q3_K +blk.43.attn_v.weight q4_K +blk.43.ffn_down_exps.weight q3_K +blk.43.ffn_down_shexp.weight q3_K +blk.44.attn_output.weight q3_K +blk.44.attn_v.weight q4_K +blk.44.ffn_down_exps.weight q3_K +blk.44.ffn_down_shexp.weight q3_K + +[Q3_K_S] q3_K +output.weight q6_K + +[Q3_K_M] q3_K +output.weight q6_K +blk.0.attn_output.weight q4_K +blk.0.attn_v.weight q5_K +blk.0.ffn_down.weight q5_K +blk.1.attn_output.weight q4_K +blk.1.attn_v.weight q5_K +blk.1.ffn_down.weight q5_K +blk.2.attn_output.weight q4_K +blk.2.attn_v.weight q4_K +blk.2.ffn_down.weight q4_K +blk.3.attn_output.weight q4_K +blk.3.attn_v.weight q4_K +blk.3.ffn_down_exps.weight q4_K +blk.3.ffn_down_shexp.weight q4_K +blk.4.attn_output.weight q4_K +blk.4.attn_v.weight q4_K +blk.4.ffn_down_exps.weight q4_K +blk.4.ffn_down_shexp.weight q4_K +blk.5.attn_output.weight q4_K +blk.5.attn_v.weight q4_K +blk.5.ffn_down_exps.weight q4_K +blk.5.ffn_down_shexp.weight q4_K +blk.6.attn_output.weight q4_K +blk.6.attn_v.weight q4_K +blk.6.ffn_down_exps.weight q4_K +blk.6.ffn_down_shexp.weight q4_K +blk.7.attn_output.weight q4_K +blk.7.attn_v.weight q4_K +blk.7.ffn_down_exps.weight q4_K +blk.7.ffn_down_shexp.weight q4_K +blk.8.attn_output.weight q4_K +blk.8.attn_v.weight q4_K +blk.8.ffn_down_exps.weight q4_K +blk.8.ffn_down_shexp.weight q4_K +blk.9.attn_output.weight q4_K +blk.9.attn_v.weight q4_K +blk.9.ffn_down_exps.weight q4_K +blk.9.ffn_down_shexp.weight q4_K +blk.10.attn_output.weight q4_K +blk.10.attn_v.weight q4_K +blk.10.ffn_down_exps.weight q4_K +blk.10.ffn_down_shexp.weight q4_K +blk.11.attn_output.weight q4_K +blk.11.attn_v.weight q4_K +blk.11.ffn_down_exps.weight q4_K +blk.11.ffn_down_shexp.weight q4_K +blk.12.attn_output.weight q4_K +blk.12.attn_v.weight q4_K +blk.12.ffn_down_exps.weight q4_K +blk.12.ffn_down_shexp.weight q4_K +blk.13.attn_output.weight q4_K +blk.13.attn_v.weight q4_K +blk.13.ffn_down_exps.weight q4_K +blk.13.ffn_down_shexp.weight q4_K +blk.14.attn_output.weight q4_K +blk.14.attn_v.weight q4_K +blk.14.ffn_down_exps.weight q4_K +blk.14.ffn_down_shexp.weight q4_K +blk.15.attn_output.weight q4_K +blk.15.attn_v.weight q4_K +blk.15.ffn_down_exps.weight q4_K +blk.15.ffn_down_shexp.weight q4_K +blk.16.attn_output.weight q4_K +blk.16.attn_v.weight q4_K +blk.16.ffn_down_exps.weight q4_K +blk.16.ffn_down_shexp.weight q4_K +blk.17.attn_output.weight q4_K +blk.17.attn_v.weight q4_K +blk.17.ffn_down_exps.weight q4_K +blk.17.ffn_down_shexp.weight q4_K +blk.18.attn_output.weight q4_K +blk.18.attn_v.weight q4_K +blk.18.ffn_down_exps.weight q4_K +blk.18.ffn_down_shexp.weight q4_K +blk.19.attn_output.weight q4_K +blk.19.attn_v.weight q4_K +blk.19.ffn_down_exps.weight q4_K +blk.19.ffn_down_shexp.weight q4_K +blk.20.attn_output.weight q4_K +blk.20.attn_v.weight q4_K +blk.20.ffn_down_exps.weight q4_K +blk.20.ffn_down_shexp.weight q4_K +blk.21.attn_output.weight q4_K +blk.21.attn_v.weight q4_K +blk.21.ffn_down_exps.weight q4_K +blk.21.ffn_down_shexp.weight q4_K +blk.22.attn_output.weight q4_K +blk.22.attn_v.weight q4_K +blk.22.ffn_down_exps.weight q4_K +blk.22.ffn_down_shexp.weight q4_K +blk.23.attn_output.weight q4_K +blk.23.attn_v.weight q4_K +blk.23.ffn_down_exps.weight q4_K +blk.23.ffn_down_shexp.weight q4_K +blk.24.attn_output.weight q4_K +blk.24.attn_v.weight q4_K +blk.24.ffn_down_exps.weight q4_K +blk.24.ffn_down_shexp.weight q4_K +blk.25.attn_output.weight q4_K +blk.25.attn_v.weight q4_K +blk.25.ffn_down_exps.weight q4_K +blk.25.ffn_down_shexp.weight q4_K +blk.26.attn_output.weight q4_K +blk.26.attn_v.weight q4_K +blk.26.ffn_down_exps.weight q4_K +blk.26.ffn_down_shexp.weight q4_K +blk.27.attn_output.weight q4_K +blk.27.attn_v.weight q4_K +blk.27.ffn_down_exps.weight q4_K +blk.27.ffn_down_shexp.weight q4_K +blk.28.attn_output.weight q4_K +blk.28.attn_v.weight q4_K +blk.28.ffn_down_exps.weight q4_K +blk.28.ffn_down_shexp.weight q4_K +blk.29.attn_output.weight q4_K +blk.29.attn_v.weight q4_K +blk.29.ffn_down_exps.weight q4_K +blk.29.ffn_down_shexp.weight q4_K +blk.30.attn_output.weight q4_K +blk.30.attn_v.weight q4_K +blk.30.ffn_down_exps.weight q4_K +blk.30.ffn_down_shexp.weight q4_K +blk.31.attn_output.weight q4_K +blk.31.attn_v.weight q4_K +blk.31.ffn_down_exps.weight q4_K +blk.31.ffn_down_shexp.weight q4_K +blk.32.attn_output.weight q4_K +blk.32.attn_v.weight q4_K +blk.32.ffn_down_exps.weight q4_K +blk.32.ffn_down_shexp.weight q4_K +blk.33.attn_output.weight q4_K +blk.33.attn_v.weight q4_K +blk.33.ffn_down_exps.weight q4_K +blk.33.ffn_down_shexp.weight q4_K +blk.34.attn_output.weight q4_K +blk.34.attn_v.weight q4_K +blk.34.ffn_down_exps.weight q4_K +blk.34.ffn_down_shexp.weight q4_K +blk.35.attn_output.weight q4_K +blk.35.attn_v.weight q4_K +blk.35.ffn_down_exps.weight q4_K +blk.35.ffn_down_shexp.weight q4_K +blk.36.attn_output.weight q4_K +blk.36.attn_v.weight q4_K +blk.36.ffn_down_exps.weight q4_K +blk.36.ffn_down_shexp.weight q4_K +blk.37.attn_output.weight q4_K +blk.37.attn_v.weight q4_K +blk.37.ffn_down_exps.weight q4_K +blk.37.ffn_down_shexp.weight q4_K +blk.38.attn_output.weight q4_K +blk.38.attn_v.weight q4_K +blk.38.ffn_down_exps.weight q4_K +blk.38.ffn_down_shexp.weight q4_K +blk.39.attn_output.weight q4_K +blk.39.attn_v.weight q4_K +blk.39.ffn_down_exps.weight q4_K +blk.39.ffn_down_shexp.weight q4_K +blk.40.attn_output.weight q4_K +blk.40.attn_v.weight q4_K +blk.40.ffn_down_exps.weight q4_K +blk.40.ffn_down_shexp.weight q4_K +blk.41.attn_output.weight q4_K +blk.41.attn_v.weight q4_K +blk.41.ffn_down_exps.weight q4_K +blk.41.ffn_down_shexp.weight q4_K +blk.42.attn_output.weight q4_K +blk.42.attn_v.weight q4_K +blk.42.ffn_down_exps.weight q4_K +blk.42.ffn_down_shexp.weight q4_K +blk.43.attn_output.weight q4_K +blk.43.attn_v.weight q4_K +blk.43.ffn_down_exps.weight q4_K +blk.43.ffn_down_shexp.weight q4_K +blk.44.attn_output.weight q4_K +blk.44.attn_v.weight q4_K +blk.44.ffn_down_exps.weight q4_K +blk.44.ffn_down_shexp.weight q4_K + +[Q3_K_L] q3_K +output.weight q6_K +blk.0.attn_output.weight q5_K +blk.0.attn_v.weight q5_K +blk.0.ffn_down.weight q5_K +blk.1.attn_output.weight q5_K +blk.1.attn_v.weight q5_K +blk.1.ffn_down.weight q5_K +blk.2.attn_output.weight q5_K +blk.2.attn_v.weight q5_K +blk.2.ffn_down.weight q5_K +blk.3.attn_output.weight q5_K +blk.3.attn_v.weight q5_K +blk.3.ffn_down_exps.weight q5_K +blk.3.ffn_down_shexp.weight q5_K +blk.4.attn_output.weight q5_K +blk.4.attn_v.weight q5_K +blk.4.ffn_down_exps.weight q5_K +blk.4.ffn_down_shexp.weight q5_K +blk.5.attn_output.weight q5_K +blk.5.attn_v.weight q5_K +blk.5.ffn_down_exps.weight q5_K +blk.5.ffn_down_shexp.weight q5_K +blk.6.attn_output.weight q5_K +blk.6.attn_v.weight q5_K +blk.6.ffn_down_exps.weight q5_K +blk.6.ffn_down_shexp.weight q5_K +blk.7.attn_output.weight q5_K +blk.7.attn_v.weight q5_K +blk.7.ffn_down_exps.weight q5_K +blk.7.ffn_down_shexp.weight q5_K +blk.8.attn_output.weight q5_K +blk.8.attn_v.weight q5_K +blk.8.ffn_down_exps.weight q5_K +blk.8.ffn_down_shexp.weight q5_K +blk.9.attn_output.weight q5_K +blk.9.attn_v.weight q5_K +blk.9.ffn_down_exps.weight q5_K +blk.9.ffn_down_shexp.weight q5_K +blk.10.attn_output.weight q5_K +blk.10.attn_v.weight q5_K +blk.10.ffn_down_exps.weight q5_K +blk.10.ffn_down_shexp.weight q5_K +blk.11.attn_output.weight q5_K +blk.11.attn_v.weight q5_K +blk.11.ffn_down_exps.weight q5_K +blk.11.ffn_down_shexp.weight q5_K +blk.12.attn_output.weight q5_K +blk.12.attn_v.weight q5_K +blk.12.ffn_down_exps.weight q5_K +blk.12.ffn_down_shexp.weight q5_K +blk.13.attn_output.weight q5_K +blk.13.attn_v.weight q5_K +blk.13.ffn_down_exps.weight q5_K +blk.13.ffn_down_shexp.weight q5_K +blk.14.attn_output.weight q5_K +blk.14.attn_v.weight q5_K +blk.14.ffn_down_exps.weight q5_K +blk.14.ffn_down_shexp.weight q5_K +blk.15.attn_output.weight q5_K +blk.15.attn_v.weight q5_K +blk.15.ffn_down_exps.weight q5_K +blk.15.ffn_down_shexp.weight q5_K +blk.16.attn_output.weight q5_K +blk.16.attn_v.weight q5_K +blk.16.ffn_down_exps.weight q5_K +blk.16.ffn_down_shexp.weight q5_K +blk.17.attn_output.weight q5_K +blk.17.attn_v.weight q5_K +blk.17.ffn_down_exps.weight q5_K +blk.17.ffn_down_shexp.weight q5_K +blk.18.attn_output.weight q5_K +blk.18.attn_v.weight q5_K +blk.18.ffn_down_exps.weight q5_K +blk.18.ffn_down_shexp.weight q5_K +blk.19.attn_output.weight q5_K +blk.19.attn_v.weight q5_K +blk.19.ffn_down_exps.weight q5_K +blk.19.ffn_down_shexp.weight q5_K +blk.20.attn_output.weight q5_K +blk.20.attn_v.weight q5_K +blk.20.ffn_down_exps.weight q5_K +blk.20.ffn_down_shexp.weight q5_K +blk.21.attn_output.weight q5_K +blk.21.attn_v.weight q5_K +blk.21.ffn_down_exps.weight q5_K +blk.21.ffn_down_shexp.weight q5_K +blk.22.attn_output.weight q5_K +blk.22.attn_v.weight q5_K +blk.22.ffn_down_exps.weight q5_K +blk.22.ffn_down_shexp.weight q5_K +blk.23.attn_output.weight q5_K +blk.23.attn_v.weight q5_K +blk.23.ffn_down_exps.weight q5_K +blk.23.ffn_down_shexp.weight q5_K +blk.24.attn_output.weight q5_K +blk.24.attn_v.weight q5_K +blk.24.ffn_down_exps.weight q5_K +blk.24.ffn_down_shexp.weight q5_K +blk.25.attn_output.weight q5_K +blk.25.attn_v.weight q5_K +blk.25.ffn_down_exps.weight q5_K +blk.25.ffn_down_shexp.weight q5_K +blk.26.attn_output.weight q5_K +blk.26.attn_v.weight q5_K +blk.26.ffn_down_exps.weight q5_K +blk.26.ffn_down_shexp.weight q5_K +blk.27.attn_output.weight q5_K +blk.27.attn_v.weight q5_K +blk.27.ffn_down_exps.weight q5_K +blk.27.ffn_down_shexp.weight q5_K +blk.28.attn_output.weight q5_K +blk.28.attn_v.weight q5_K +blk.28.ffn_down_exps.weight q5_K +blk.28.ffn_down_shexp.weight q5_K +blk.29.attn_output.weight q5_K +blk.29.attn_v.weight q5_K +blk.29.ffn_down_exps.weight q5_K +blk.29.ffn_down_shexp.weight q5_K +blk.30.attn_output.weight q5_K +blk.30.attn_v.weight q5_K +blk.30.ffn_down_exps.weight q5_K +blk.30.ffn_down_shexp.weight q5_K +blk.31.attn_output.weight q5_K +blk.31.attn_v.weight q5_K +blk.31.ffn_down_exps.weight q5_K +blk.31.ffn_down_shexp.weight q5_K +blk.32.attn_output.weight q5_K +blk.32.attn_v.weight q5_K +blk.32.ffn_down_exps.weight q5_K +blk.32.ffn_down_shexp.weight q5_K +blk.33.attn_output.weight q5_K +blk.33.attn_v.weight q5_K +blk.33.ffn_down_exps.weight q5_K +blk.33.ffn_down_shexp.weight q5_K +blk.34.attn_output.weight q5_K +blk.34.attn_v.weight q5_K +blk.34.ffn_down_exps.weight q5_K +blk.34.ffn_down_shexp.weight q5_K +blk.35.attn_output.weight q5_K +blk.35.attn_v.weight q5_K +blk.35.ffn_down_exps.weight q5_K +blk.35.ffn_down_shexp.weight q5_K +blk.36.attn_output.weight q5_K +blk.36.attn_v.weight q5_K +blk.36.ffn_down_exps.weight q5_K +blk.36.ffn_down_shexp.weight q5_K +blk.37.attn_output.weight q5_K +blk.37.attn_v.weight q5_K +blk.37.ffn_down_exps.weight q5_K +blk.37.ffn_down_shexp.weight q5_K +blk.38.attn_output.weight q5_K +blk.38.attn_v.weight q5_K +blk.38.ffn_down_exps.weight q5_K +blk.38.ffn_down_shexp.weight q5_K +blk.39.attn_output.weight q5_K +blk.39.attn_v.weight q5_K +blk.39.ffn_down_exps.weight q5_K +blk.39.ffn_down_shexp.weight q5_K +blk.40.attn_output.weight q5_K +blk.40.attn_v.weight q5_K +blk.40.ffn_down_exps.weight q5_K +blk.40.ffn_down_shexp.weight q5_K +blk.41.attn_output.weight q5_K +blk.41.attn_v.weight q5_K +blk.41.ffn_down_exps.weight q5_K +blk.41.ffn_down_shexp.weight q5_K +blk.42.attn_output.weight q5_K +blk.42.attn_v.weight q5_K +blk.42.ffn_down_exps.weight q5_K +blk.42.ffn_down_shexp.weight q5_K +blk.43.attn_output.weight q5_K +blk.43.attn_v.weight q5_K +blk.43.ffn_down_exps.weight q5_K +blk.43.ffn_down_shexp.weight q5_K +blk.44.attn_output.weight q5_K +blk.44.attn_v.weight q5_K +blk.44.ffn_down_exps.weight q5_K +blk.44.ffn_down_shexp.weight q5_K + +[Q4_K_S] q4_K +output.weight q6_K +blk.0.attn_v.weight q5_K +blk.0.ffn_down.weight q5_K +blk.1.attn_v.weight q5_K +blk.1.ffn_down.weight q5_K +blk.2.attn_v.weight q5_K +blk.2.ffn_down.weight q5_K +blk.3.attn_v.weight q5_K +blk.3.ffn_down_exps.weight q5_K +blk.3.ffn_down_shexp.weight q5_K +blk.4.ffn_down_exps.weight q5_K +blk.4.ffn_down_shexp.weight q5_K + +[Q4_K_M] q4_K +output.weight q6_K +blk.0.attn_v.weight q6_K +blk.0.ffn_down.weight q6_K +blk.1.attn_v.weight q6_K +blk.1.ffn_down.weight q6_K +blk.2.attn_v.weight q6_K +blk.2.ffn_down.weight q6_K +blk.3.attn_v.weight q6_K +blk.3.ffn_down_exps.weight q6_K +blk.3.ffn_down_shexp.weight q6_K +blk.4.attn_v.weight q6_K +blk.4.ffn_down_exps.weight q6_K +blk.4.ffn_down_shexp.weight q6_K +blk.7.attn_v.weight q6_K +blk.7.ffn_down_exps.weight q6_K +blk.7.ffn_down_shexp.weight q6_K +blk.10.attn_v.weight q6_K +blk.10.ffn_down_exps.weight q6_K +blk.10.ffn_down_shexp.weight q6_K +blk.13.attn_v.weight q6_K +blk.13.ffn_down_exps.weight q6_K +blk.13.ffn_down_shexp.weight q6_K +blk.16.attn_v.weight q6_K +blk.16.ffn_down_exps.weight q6_K +blk.16.ffn_down_shexp.weight q6_K +blk.19.attn_v.weight q6_K +blk.19.ffn_down_exps.weight q6_K +blk.19.ffn_down_shexp.weight q6_K +blk.22.attn_v.weight q6_K +blk.22.ffn_down_exps.weight q6_K +blk.22.ffn_down_shexp.weight q6_K +blk.25.attn_v.weight q6_K +blk.25.ffn_down_exps.weight q6_K +blk.25.ffn_down_shexp.weight q6_K +blk.28.attn_v.weight q6_K +blk.28.ffn_down_exps.weight q6_K +blk.28.ffn_down_shexp.weight q6_K +blk.31.attn_v.weight q6_K +blk.31.ffn_down_exps.weight q6_K +blk.31.ffn_down_shexp.weight q6_K +blk.34.attn_v.weight q6_K +blk.34.ffn_down_exps.weight q6_K +blk.34.ffn_down_shexp.weight q6_K +blk.37.attn_v.weight q6_K +blk.37.ffn_down_exps.weight q6_K +blk.37.ffn_down_shexp.weight q6_K +blk.39.attn_v.weight q6_K +blk.39.ffn_down_exps.weight q6_K +blk.39.ffn_down_shexp.weight q6_K +blk.40.attn_v.weight q6_K +blk.40.ffn_down_exps.weight q6_K +blk.40.ffn_down_shexp.weight q6_K +blk.41.attn_v.weight q6_K +blk.41.ffn_down_exps.weight q6_K +blk.41.ffn_down_shexp.weight q6_K +blk.42.attn_v.weight q6_K +blk.42.ffn_down_exps.weight q6_K +blk.42.ffn_down_shexp.weight q6_K +blk.43.attn_v.weight q6_K +blk.43.ffn_down_exps.weight q6_K +blk.43.ffn_down_shexp.weight q6_K +blk.44.attn_v.weight q6_K +blk.44.ffn_down_exps.weight q6_K +blk.44.ffn_down_shexp.weight q6_K + +[Q5_K_S] q5_K +output.weight q6_K + +[Q5_K_M] q5_K +output.weight q6_K +blk.0.attn_v.weight q6_K +blk.0.ffn_down.weight q6_K +blk.1.attn_v.weight q6_K +blk.1.ffn_down.weight q6_K +blk.2.attn_v.weight q6_K +blk.2.ffn_down.weight q6_K +blk.3.attn_v.weight q6_K +blk.3.ffn_down_exps.weight q6_K +blk.3.ffn_down_shexp.weight q6_K +blk.4.attn_v.weight q6_K +blk.4.ffn_down_exps.weight q6_K +blk.4.ffn_down_shexp.weight q6_K +blk.7.attn_v.weight q6_K +blk.7.ffn_down_exps.weight q6_K +blk.7.ffn_down_shexp.weight q6_K +blk.10.attn_v.weight q6_K +blk.10.ffn_down_exps.weight q6_K +blk.10.ffn_down_shexp.weight q6_K +blk.13.attn_v.weight q6_K +blk.13.ffn_down_exps.weight q6_K +blk.13.ffn_down_shexp.weight q6_K +blk.16.attn_v.weight q6_K +blk.16.ffn_down_exps.weight q6_K +blk.16.ffn_down_shexp.weight q6_K +blk.19.attn_v.weight q6_K +blk.19.ffn_down_exps.weight q6_K +blk.19.ffn_down_shexp.weight q6_K +blk.22.attn_v.weight q6_K +blk.22.ffn_down_exps.weight q6_K +blk.22.ffn_down_shexp.weight q6_K +blk.25.attn_v.weight q6_K +blk.25.ffn_down_exps.weight q6_K +blk.25.ffn_down_shexp.weight q6_K +blk.28.attn_v.weight q6_K +blk.28.ffn_down_exps.weight q6_K +blk.28.ffn_down_shexp.weight q6_K +blk.31.attn_v.weight q6_K +blk.31.ffn_down_exps.weight q6_K +blk.31.ffn_down_shexp.weight q6_K +blk.34.attn_v.weight q6_K +blk.34.ffn_down_exps.weight q6_K +blk.34.ffn_down_shexp.weight q6_K +blk.37.attn_v.weight q6_K +blk.37.ffn_down_exps.weight q6_K +blk.37.ffn_down_shexp.weight q6_K +blk.39.attn_v.weight q6_K +blk.39.ffn_down_exps.weight q6_K +blk.39.ffn_down_shexp.weight q6_K +blk.40.attn_v.weight q6_K +blk.40.ffn_down_exps.weight q6_K +blk.40.ffn_down_shexp.weight q6_K +blk.41.attn_v.weight q6_K +blk.41.ffn_down_exps.weight q6_K +blk.41.ffn_down_shexp.weight q6_K +blk.42.attn_v.weight q6_K +blk.42.ffn_down_exps.weight q6_K +blk.42.ffn_down_shexp.weight q6_K +blk.43.attn_v.weight q6_K +blk.43.ffn_down_exps.weight q6_K +blk.43.ffn_down_shexp.weight q6_K +blk.44.attn_v.weight q6_K +blk.44.ffn_down_exps.weight q6_K +blk.44.ffn_down_shexp.weight q6_K + +[Q6_K] q6_K + +[IQ2_XXS] iq2_xxs +output.weight q5_K +token_embd.weight q2_K +blk.0.attn_v.weight q4_K +blk.0.ffn_down.weight q2_K +blk.1.attn_v.weight q4_K +blk.1.ffn_down.weight q2_K +blk.2.attn_v.weight q4_K +blk.2.ffn_down.weight q2_K +blk.3.attn_v.weight q4_K +blk.3.ffn_down_exps.weight q2_K +blk.3.ffn_down_shexp.weight q2_K +blk.4.attn_v.weight q4_K +blk.5.attn_v.weight q4_K +blk.6.attn_v.weight q4_K +blk.7.attn_v.weight q4_K +blk.8.attn_v.weight q4_K +blk.9.attn_v.weight q4_K +blk.10.attn_v.weight q4_K +blk.11.attn_v.weight q4_K +blk.12.attn_v.weight q4_K +blk.13.attn_v.weight q4_K +blk.14.attn_v.weight q4_K +blk.15.attn_v.weight q4_K +blk.16.attn_v.weight q4_K +blk.17.attn_v.weight q4_K +blk.18.attn_v.weight q4_K +blk.19.attn_v.weight q4_K +blk.20.attn_v.weight q4_K +blk.21.attn_v.weight q4_K +blk.22.attn_v.weight q4_K +blk.23.attn_v.weight q4_K +blk.24.attn_v.weight q4_K +blk.25.attn_v.weight q4_K +blk.26.attn_v.weight q4_K +blk.27.attn_v.weight q4_K +blk.28.attn_v.weight q4_K +blk.29.attn_v.weight q4_K +blk.30.attn_v.weight q4_K +blk.31.attn_v.weight q4_K +blk.32.attn_v.weight q4_K +blk.33.attn_v.weight q4_K +blk.34.attn_v.weight q4_K +blk.35.attn_v.weight q4_K +blk.36.attn_v.weight q4_K +blk.37.attn_v.weight q4_K +blk.38.attn_v.weight q4_K +blk.39.attn_v.weight q4_K +blk.40.attn_v.weight q4_K +blk.41.attn_v.weight q4_K +blk.42.attn_v.weight q4_K +blk.43.attn_v.weight q4_K +blk.44.attn_v.weight q4_K + +[IQ2_XS] iq2_xs +output.weight q5_K +token_embd.weight q2_K +blk.0.attn_v.weight q4_K +blk.0.ffn_down.weight q2_K +blk.1.attn_v.weight q4_K +blk.1.ffn_down.weight q2_K +blk.2.attn_v.weight q4_K +blk.2.ffn_down.weight q2_K +blk.3.attn_v.weight q4_K +blk.3.ffn_down_exps.weight q2_K +blk.3.ffn_down_shexp.weight q2_K +blk.4.attn_v.weight q4_K +blk.5.attn_v.weight q4_K +blk.6.attn_v.weight q4_K +blk.7.attn_v.weight q4_K +blk.8.attn_v.weight q4_K +blk.9.attn_v.weight q4_K +blk.10.attn_v.weight q4_K +blk.11.attn_v.weight q4_K +blk.12.attn_v.weight q4_K +blk.13.attn_v.weight q4_K +blk.14.attn_v.weight q4_K +blk.15.attn_v.weight q4_K +blk.16.attn_v.weight q4_K +blk.17.attn_v.weight q4_K +blk.18.attn_v.weight q4_K +blk.19.attn_v.weight q4_K +blk.20.attn_v.weight q4_K +blk.21.attn_v.weight q4_K +blk.22.attn_v.weight q4_K +blk.23.attn_v.weight q4_K +blk.24.attn_v.weight q4_K +blk.25.attn_v.weight q4_K +blk.26.attn_v.weight q4_K +blk.27.attn_v.weight q4_K +blk.28.attn_v.weight q4_K +blk.29.attn_v.weight q4_K +blk.30.attn_v.weight q4_K +blk.31.attn_v.weight q4_K +blk.32.attn_v.weight q4_K +blk.33.attn_v.weight q4_K +blk.34.attn_v.weight q4_K +blk.35.attn_v.weight q4_K +blk.36.attn_v.weight q4_K +blk.37.attn_v.weight q4_K +blk.38.attn_v.weight q4_K +blk.39.attn_v.weight q4_K +blk.40.attn_v.weight q4_K +blk.41.attn_v.weight q4_K +blk.42.attn_v.weight q4_K +blk.43.attn_v.weight q4_K +blk.44.attn_v.weight q4_K + +[Q2_K_S] q2_K +output.weight q6_K +blk.0.attn_v.weight q4_K +blk.0.ffn_down.weight q4_K +blk.1.attn_v.weight q4_K +blk.1.ffn_down.weight q4_K +blk.2.attn_v.weight q4_K +blk.2.ffn_down.weight q4_K +blk.3.attn_v.weight q4_K +blk.3.ffn_down_exps.weight q4_K +blk.3.ffn_down_shexp.weight q4_K +blk.4.attn_v.weight q4_K +blk.4.ffn_down_exps.weight q4_K +blk.4.ffn_down_shexp.weight q4_K +blk.5.attn_v.weight q4_K +blk.6.attn_v.weight q4_K +blk.7.attn_v.weight q4_K +blk.8.attn_v.weight q4_K +blk.9.attn_v.weight q4_K +blk.10.attn_v.weight q4_K +blk.11.attn_v.weight q4_K +blk.12.attn_v.weight q4_K +blk.13.attn_v.weight q4_K +blk.14.attn_v.weight q4_K +blk.15.attn_v.weight q4_K +blk.16.attn_v.weight q4_K +blk.17.attn_v.weight q4_K +blk.18.attn_v.weight q4_K +blk.19.attn_v.weight q4_K +blk.20.attn_v.weight q4_K +blk.21.attn_v.weight q4_K +blk.22.attn_v.weight q4_K +blk.23.attn_v.weight q4_K +blk.24.attn_v.weight q4_K +blk.25.attn_v.weight q4_K +blk.26.attn_v.weight q4_K +blk.27.attn_v.weight q4_K +blk.28.attn_v.weight q4_K +blk.29.attn_v.weight q4_K +blk.30.attn_v.weight q4_K +blk.31.attn_v.weight q4_K +blk.32.attn_v.weight q4_K +blk.33.attn_v.weight q4_K +blk.34.attn_v.weight q4_K +blk.35.attn_v.weight q4_K +blk.36.attn_v.weight q4_K +blk.37.attn_v.weight q4_K +blk.38.attn_v.weight q4_K +blk.39.attn_v.weight q4_K +blk.40.attn_v.weight q4_K +blk.41.attn_v.weight q4_K +blk.42.attn_v.weight q4_K +blk.43.attn_v.weight q4_K +blk.44.attn_v.weight q4_K + +[IQ3_XS] iq3_s +output.weight q6_K +blk.0.attn_k.weight iq3_xxs +blk.0.attn_q.weight iq3_xxs +blk.0.attn_v.weight q4_K +blk.1.attn_k.weight iq3_xxs +blk.1.attn_q.weight iq3_xxs +blk.1.attn_v.weight q4_K +blk.2.attn_k.weight iq3_xxs +blk.2.attn_q.weight iq3_xxs +blk.2.attn_v.weight q4_K +blk.3.attn_k.weight iq3_xxs +blk.3.attn_q.weight iq3_xxs +blk.3.attn_v.weight q4_K +blk.4.attn_k.weight iq3_xxs +blk.4.attn_q.weight iq3_xxs +blk.4.attn_v.weight q4_K +blk.5.attn_k.weight iq3_xxs +blk.5.attn_q.weight iq3_xxs +blk.5.attn_v.weight q4_K +blk.5.ffn_gate_exps.weight iq3_xxs +blk.5.ffn_gate_shexp.weight iq3_xxs +blk.5.ffn_up_exps.weight iq3_xxs +blk.5.ffn_up_shexp.weight iq3_xxs +blk.6.attn_k.weight iq3_xxs +blk.6.attn_q.weight iq3_xxs +blk.6.attn_v.weight q4_K +blk.6.ffn_gate_exps.weight iq3_xxs +blk.6.ffn_gate_shexp.weight iq3_xxs +blk.6.ffn_up_exps.weight iq3_xxs +blk.6.ffn_up_shexp.weight iq3_xxs +blk.7.attn_k.weight iq3_xxs +blk.7.attn_q.weight iq3_xxs +blk.7.attn_v.weight q4_K +blk.7.ffn_gate_exps.weight iq3_xxs +blk.7.ffn_gate_shexp.weight iq3_xxs +blk.7.ffn_up_exps.weight iq3_xxs +blk.7.ffn_up_shexp.weight iq3_xxs +blk.8.attn_k.weight iq3_xxs +blk.8.attn_q.weight iq3_xxs +blk.8.attn_v.weight q4_K +blk.8.ffn_gate_exps.weight iq3_xxs +blk.8.ffn_gate_shexp.weight iq3_xxs +blk.8.ffn_up_exps.weight iq3_xxs +blk.8.ffn_up_shexp.weight iq3_xxs +blk.9.attn_k.weight iq3_xxs +blk.9.attn_q.weight iq3_xxs +blk.9.attn_v.weight q4_K +blk.9.ffn_gate_exps.weight iq3_xxs +blk.9.ffn_gate_shexp.weight iq3_xxs +blk.9.ffn_up_exps.weight iq3_xxs +blk.9.ffn_up_shexp.weight iq3_xxs +blk.10.attn_k.weight iq3_xxs +blk.10.attn_q.weight iq3_xxs +blk.10.attn_v.weight q4_K +blk.10.ffn_gate_exps.weight iq3_xxs +blk.10.ffn_gate_shexp.weight iq3_xxs +blk.10.ffn_up_exps.weight iq3_xxs +blk.10.ffn_up_shexp.weight iq3_xxs +blk.11.attn_k.weight iq3_xxs +blk.11.attn_q.weight iq3_xxs +blk.11.attn_v.weight q4_K +blk.11.ffn_gate_exps.weight iq3_xxs +blk.11.ffn_gate_shexp.weight iq3_xxs +blk.11.ffn_up_exps.weight iq3_xxs +blk.11.ffn_up_shexp.weight iq3_xxs +blk.12.attn_k.weight iq3_xxs +blk.12.attn_q.weight iq3_xxs +blk.12.attn_v.weight q4_K +blk.12.ffn_gate_exps.weight iq3_xxs +blk.12.ffn_gate_shexp.weight iq3_xxs +blk.12.ffn_up_exps.weight iq3_xxs +blk.12.ffn_up_shexp.weight iq3_xxs +blk.13.attn_k.weight iq3_xxs +blk.13.attn_q.weight iq3_xxs +blk.13.attn_v.weight q4_K +blk.13.ffn_gate_exps.weight iq3_xxs +blk.13.ffn_gate_shexp.weight iq3_xxs +blk.13.ffn_up_exps.weight iq3_xxs +blk.13.ffn_up_shexp.weight iq3_xxs +blk.14.attn_k.weight iq3_xxs +blk.14.attn_q.weight iq3_xxs +blk.14.attn_v.weight q4_K +blk.14.ffn_gate_exps.weight iq3_xxs +blk.14.ffn_gate_shexp.weight iq3_xxs +blk.14.ffn_up_exps.weight iq3_xxs +blk.14.ffn_up_shexp.weight iq3_xxs +blk.15.attn_k.weight iq3_xxs +blk.15.attn_q.weight iq3_xxs +blk.15.attn_v.weight q4_K +blk.15.ffn_gate_exps.weight iq3_xxs +blk.15.ffn_gate_shexp.weight iq3_xxs +blk.15.ffn_up_exps.weight iq3_xxs +blk.15.ffn_up_shexp.weight iq3_xxs +blk.16.attn_k.weight iq3_xxs +blk.16.attn_q.weight iq3_xxs +blk.16.attn_v.weight q4_K +blk.16.ffn_gate_exps.weight iq3_xxs +blk.16.ffn_gate_shexp.weight iq3_xxs +blk.16.ffn_up_exps.weight iq3_xxs +blk.16.ffn_up_shexp.weight iq3_xxs +blk.17.attn_k.weight iq3_xxs +blk.17.attn_q.weight iq3_xxs +blk.17.attn_v.weight q4_K +blk.17.ffn_gate_exps.weight iq3_xxs +blk.17.ffn_gate_shexp.weight iq3_xxs +blk.17.ffn_up_exps.weight iq3_xxs +blk.17.ffn_up_shexp.weight iq3_xxs +blk.18.attn_k.weight iq3_xxs +blk.18.attn_q.weight iq3_xxs +blk.18.attn_v.weight q4_K +blk.18.ffn_gate_exps.weight iq3_xxs +blk.18.ffn_gate_shexp.weight iq3_xxs +blk.18.ffn_up_exps.weight iq3_xxs +blk.18.ffn_up_shexp.weight iq3_xxs +blk.19.attn_k.weight iq3_xxs +blk.19.attn_q.weight iq3_xxs +blk.19.attn_v.weight q4_K +blk.19.ffn_gate_exps.weight iq3_xxs +blk.19.ffn_gate_shexp.weight iq3_xxs +blk.19.ffn_up_exps.weight iq3_xxs +blk.19.ffn_up_shexp.weight iq3_xxs +blk.20.attn_k.weight iq3_xxs +blk.20.attn_q.weight iq3_xxs +blk.20.attn_v.weight q4_K +blk.20.ffn_gate_exps.weight iq3_xxs +blk.20.ffn_gate_shexp.weight iq3_xxs +blk.20.ffn_up_exps.weight iq3_xxs +blk.20.ffn_up_shexp.weight iq3_xxs +blk.21.attn_k.weight iq3_xxs +blk.21.attn_q.weight iq3_xxs +blk.21.attn_v.weight q4_K +blk.21.ffn_gate_exps.weight iq3_xxs +blk.21.ffn_gate_shexp.weight iq3_xxs +blk.21.ffn_up_exps.weight iq3_xxs +blk.21.ffn_up_shexp.weight iq3_xxs +blk.22.attn_k.weight iq3_xxs +blk.22.attn_q.weight iq3_xxs +blk.22.attn_v.weight q4_K +blk.22.ffn_gate_exps.weight iq3_xxs +blk.22.ffn_gate_shexp.weight iq3_xxs +blk.22.ffn_up_exps.weight iq3_xxs +blk.22.ffn_up_shexp.weight iq3_xxs +blk.23.attn_k.weight iq3_xxs +blk.23.attn_q.weight iq3_xxs +blk.23.attn_v.weight q4_K +blk.23.ffn_gate_exps.weight iq3_xxs +blk.23.ffn_gate_shexp.weight iq3_xxs +blk.23.ffn_up_exps.weight iq3_xxs +blk.23.ffn_up_shexp.weight iq3_xxs +blk.24.attn_k.weight iq3_xxs +blk.24.attn_q.weight iq3_xxs +blk.24.attn_v.weight q4_K +blk.24.ffn_gate_exps.weight iq3_xxs +blk.24.ffn_gate_shexp.weight iq3_xxs +blk.24.ffn_up_exps.weight iq3_xxs +blk.24.ffn_up_shexp.weight iq3_xxs +blk.25.attn_k.weight iq3_xxs +blk.25.attn_q.weight iq3_xxs +blk.25.attn_v.weight q4_K +blk.25.ffn_gate_exps.weight iq3_xxs +blk.25.ffn_gate_shexp.weight iq3_xxs +blk.25.ffn_up_exps.weight iq3_xxs +blk.25.ffn_up_shexp.weight iq3_xxs +blk.26.attn_k.weight iq3_xxs +blk.26.attn_q.weight iq3_xxs +blk.26.attn_v.weight q4_K +blk.26.ffn_gate_exps.weight iq3_xxs +blk.26.ffn_gate_shexp.weight iq3_xxs +blk.26.ffn_up_exps.weight iq3_xxs +blk.26.ffn_up_shexp.weight iq3_xxs +blk.27.attn_k.weight iq3_xxs +blk.27.attn_q.weight iq3_xxs +blk.27.attn_v.weight q4_K +blk.27.ffn_gate_exps.weight iq3_xxs +blk.27.ffn_gate_shexp.weight iq3_xxs +blk.27.ffn_up_exps.weight iq3_xxs +blk.27.ffn_up_shexp.weight iq3_xxs +blk.28.attn_k.weight iq3_xxs +blk.28.attn_q.weight iq3_xxs +blk.28.attn_v.weight q4_K +blk.28.ffn_gate_exps.weight iq3_xxs +blk.28.ffn_gate_shexp.weight iq3_xxs +blk.28.ffn_up_exps.weight iq3_xxs +blk.28.ffn_up_shexp.weight iq3_xxs +blk.29.attn_k.weight iq3_xxs +blk.29.attn_q.weight iq3_xxs +blk.29.attn_v.weight q4_K +blk.29.ffn_gate_exps.weight iq3_xxs +blk.29.ffn_gate_shexp.weight iq3_xxs +blk.29.ffn_up_exps.weight iq3_xxs +blk.29.ffn_up_shexp.weight iq3_xxs +blk.30.attn_k.weight iq3_xxs +blk.30.attn_q.weight iq3_xxs +blk.30.attn_v.weight q4_K +blk.30.ffn_gate_exps.weight iq3_xxs +blk.30.ffn_gate_shexp.weight iq3_xxs +blk.30.ffn_up_exps.weight iq3_xxs +blk.30.ffn_up_shexp.weight iq3_xxs +blk.31.attn_k.weight iq3_xxs +blk.31.attn_q.weight iq3_xxs +blk.31.attn_v.weight q4_K +blk.31.ffn_gate_exps.weight iq3_xxs +blk.31.ffn_gate_shexp.weight iq3_xxs +blk.31.ffn_up_exps.weight iq3_xxs +blk.31.ffn_up_shexp.weight iq3_xxs +blk.32.attn_k.weight iq3_xxs +blk.32.attn_q.weight iq3_xxs +blk.32.attn_v.weight q4_K +blk.32.ffn_gate_exps.weight iq3_xxs +blk.32.ffn_gate_shexp.weight iq3_xxs +blk.32.ffn_up_exps.weight iq3_xxs +blk.32.ffn_up_shexp.weight iq3_xxs +blk.33.attn_k.weight iq3_xxs +blk.33.attn_q.weight iq3_xxs +blk.33.attn_v.weight q4_K +blk.33.ffn_gate_exps.weight iq3_xxs +blk.33.ffn_gate_shexp.weight iq3_xxs +blk.33.ffn_up_exps.weight iq3_xxs +blk.33.ffn_up_shexp.weight iq3_xxs +blk.34.attn_k.weight iq3_xxs +blk.34.attn_q.weight iq3_xxs +blk.34.attn_v.weight q4_K +blk.34.ffn_gate_exps.weight iq3_xxs +blk.34.ffn_gate_shexp.weight iq3_xxs +blk.34.ffn_up_exps.weight iq3_xxs +blk.34.ffn_up_shexp.weight iq3_xxs +blk.35.attn_k.weight iq3_xxs +blk.35.attn_q.weight iq3_xxs +blk.35.attn_v.weight q4_K +blk.35.ffn_gate_exps.weight iq3_xxs +blk.35.ffn_gate_shexp.weight iq3_xxs +blk.35.ffn_up_exps.weight iq3_xxs +blk.35.ffn_up_shexp.weight iq3_xxs +blk.36.attn_k.weight iq3_xxs +blk.36.attn_q.weight iq3_xxs +blk.36.attn_v.weight q4_K +blk.36.ffn_gate_exps.weight iq3_xxs +blk.36.ffn_gate_shexp.weight iq3_xxs +blk.36.ffn_up_exps.weight iq3_xxs +blk.36.ffn_up_shexp.weight iq3_xxs +blk.37.attn_k.weight iq3_xxs +blk.37.attn_q.weight iq3_xxs +blk.37.attn_v.weight q4_K +blk.37.ffn_gate_exps.weight iq3_xxs +blk.37.ffn_gate_shexp.weight iq3_xxs +blk.37.ffn_up_exps.weight iq3_xxs +blk.37.ffn_up_shexp.weight iq3_xxs +blk.38.attn_k.weight iq3_xxs +blk.38.attn_q.weight iq3_xxs +blk.38.attn_v.weight q4_K +blk.38.ffn_gate_exps.weight iq3_xxs +blk.38.ffn_gate_shexp.weight iq3_xxs +blk.38.ffn_up_exps.weight iq3_xxs +blk.38.ffn_up_shexp.weight iq3_xxs +blk.39.attn_k.weight iq3_xxs +blk.39.attn_q.weight iq3_xxs +blk.39.attn_v.weight q4_K +blk.40.attn_k.weight iq3_xxs +blk.40.attn_q.weight iq3_xxs +blk.40.attn_v.weight q4_K +blk.41.attn_k.weight iq3_xxs +blk.41.attn_q.weight iq3_xxs +blk.41.attn_v.weight q4_K +blk.42.attn_k.weight iq3_xxs +blk.42.attn_q.weight iq3_xxs +blk.42.attn_v.weight q4_K +blk.43.attn_k.weight iq3_xxs +blk.43.attn_q.weight iq3_xxs +blk.43.attn_v.weight q4_K +blk.44.attn_k.weight iq3_xxs +blk.44.attn_q.weight iq3_xxs +blk.44.attn_v.weight q4_K + +[IQ3_XXS] iq3_xxs +output.weight q5_K +token_embd.weight iq3_s +blk.0.attn_k.weight iq2_s +blk.0.attn_output.weight iq3_s +blk.0.attn_q.weight iq2_s +blk.0.attn_v.weight q4_K +blk.0.ffn_down.weight q4_K +blk.1.attn_k.weight iq2_s +blk.1.attn_output.weight iq3_s +blk.1.attn_q.weight iq2_s +blk.1.attn_v.weight q4_K +blk.1.ffn_down.weight q4_K +blk.2.attn_k.weight iq2_s +blk.2.attn_output.weight iq3_s +blk.2.attn_q.weight iq2_s +blk.2.attn_v.weight q4_K +blk.2.ffn_down.weight q4_K +blk.3.attn_k.weight iq2_s +blk.3.attn_output.weight iq3_s +blk.3.attn_q.weight iq2_s +blk.3.attn_v.weight q4_K +blk.3.ffn_down_exps.weight q4_K +blk.3.ffn_down_shexp.weight q4_K +blk.4.attn_k.weight iq2_s +blk.4.attn_output.weight iq3_s +blk.4.attn_q.weight iq2_s +blk.4.attn_v.weight q4_K +blk.4.ffn_down_exps.weight q4_K +blk.4.ffn_down_shexp.weight q4_K +blk.5.attn_k.weight iq2_s +blk.5.attn_output.weight iq3_s +blk.5.attn_q.weight iq2_s +blk.5.attn_v.weight q4_K +blk.5.ffn_down_exps.weight q3_K +blk.5.ffn_down_shexp.weight q3_K +blk.6.attn_k.weight iq2_s +blk.6.attn_output.weight iq3_s +blk.6.attn_q.weight iq2_s +blk.6.attn_v.weight q4_K +blk.6.ffn_down_exps.weight q3_K +blk.6.ffn_down_shexp.weight q3_K +blk.7.attn_k.weight iq2_s +blk.7.attn_output.weight iq3_s +blk.7.attn_q.weight iq2_s +blk.7.attn_v.weight q4_K +blk.7.ffn_down_exps.weight q3_K +blk.7.ffn_down_shexp.weight q3_K +blk.8.attn_k.weight iq2_s +blk.8.attn_output.weight iq3_s +blk.8.attn_q.weight iq2_s +blk.8.attn_v.weight q4_K +blk.8.ffn_down_exps.weight q3_K +blk.8.ffn_down_shexp.weight q3_K +blk.9.attn_k.weight iq2_s +blk.9.attn_output.weight iq3_s +blk.9.attn_q.weight iq2_s +blk.9.attn_v.weight q4_K +blk.9.ffn_down_exps.weight q3_K +blk.9.ffn_down_shexp.weight q3_K +blk.10.attn_k.weight iq2_s +blk.10.attn_output.weight iq3_s +blk.10.attn_q.weight iq2_s +blk.10.attn_v.weight q4_K +blk.10.ffn_down_exps.weight q3_K +blk.10.ffn_down_shexp.weight q3_K +blk.11.attn_k.weight iq2_s +blk.11.attn_output.weight iq3_s +blk.11.attn_q.weight iq2_s +blk.11.attn_v.weight q4_K +blk.11.ffn_down_exps.weight q3_K +blk.11.ffn_down_shexp.weight q3_K +blk.12.attn_k.weight iq2_s +blk.12.attn_output.weight iq3_s +blk.12.attn_q.weight iq2_s +blk.12.attn_v.weight q4_K +blk.12.ffn_down_exps.weight q3_K +blk.12.ffn_down_shexp.weight q3_K +blk.13.attn_k.weight iq2_s +blk.13.attn_output.weight iq3_s +blk.13.attn_q.weight iq2_s +blk.13.attn_v.weight q4_K +blk.13.ffn_down_exps.weight q3_K +blk.13.ffn_down_shexp.weight q3_K +blk.14.attn_k.weight iq2_s +blk.14.attn_output.weight iq3_s +blk.14.attn_q.weight iq2_s +blk.14.attn_v.weight q4_K +blk.14.ffn_down_exps.weight q3_K +blk.14.ffn_down_shexp.weight q3_K +blk.15.attn_k.weight iq2_s +blk.15.attn_output.weight iq3_s +blk.15.attn_q.weight iq2_s +blk.15.attn_v.weight q4_K +blk.15.ffn_down_exps.weight q3_K +blk.15.ffn_down_shexp.weight q3_K +blk.16.attn_k.weight iq2_s +blk.16.attn_output.weight iq3_s +blk.16.attn_q.weight iq2_s +blk.16.attn_v.weight q4_K +blk.16.ffn_down_exps.weight q3_K +blk.16.ffn_down_shexp.weight q3_K +blk.17.attn_k.weight iq2_s +blk.17.attn_output.weight iq3_s +blk.17.attn_q.weight iq2_s +blk.17.attn_v.weight q4_K +blk.17.ffn_down_exps.weight q3_K +blk.17.ffn_down_shexp.weight q3_K +blk.18.attn_k.weight iq2_s +blk.18.attn_output.weight iq3_s +blk.18.attn_q.weight iq2_s +blk.18.attn_v.weight q4_K +blk.18.ffn_down_exps.weight q3_K +blk.18.ffn_down_shexp.weight q3_K +blk.19.attn_k.weight iq2_s +blk.19.attn_output.weight iq3_s +blk.19.attn_q.weight iq2_s +blk.19.attn_v.weight q4_K +blk.19.ffn_down_exps.weight q3_K +blk.19.ffn_down_shexp.weight q3_K +blk.20.attn_k.weight iq2_s +blk.20.attn_output.weight iq3_s +blk.20.attn_q.weight iq2_s +blk.20.attn_v.weight q4_K +blk.20.ffn_down_exps.weight q3_K +blk.20.ffn_down_shexp.weight q3_K +blk.21.attn_k.weight iq2_s +blk.21.attn_output.weight iq3_s +blk.21.attn_q.weight iq2_s +blk.21.attn_v.weight q4_K +blk.21.ffn_down_exps.weight q3_K +blk.21.ffn_down_shexp.weight q3_K +blk.22.attn_k.weight iq2_s +blk.22.attn_output.weight iq3_s +blk.22.attn_q.weight iq2_s +blk.22.attn_v.weight q4_K +blk.22.ffn_down_exps.weight q3_K +blk.22.ffn_down_shexp.weight q3_K +blk.23.attn_k.weight iq2_s +blk.23.attn_output.weight iq3_s +blk.23.attn_q.weight iq2_s +blk.23.attn_v.weight q4_K +blk.23.ffn_down_exps.weight q3_K +blk.23.ffn_down_shexp.weight q3_K +blk.24.attn_k.weight iq2_s +blk.24.attn_output.weight iq3_s +blk.24.attn_q.weight iq2_s +blk.24.attn_v.weight q4_K +blk.24.ffn_down_exps.weight q3_K +blk.24.ffn_down_shexp.weight q3_K +blk.25.attn_k.weight iq2_s +blk.25.attn_output.weight iq3_s +blk.25.attn_q.weight iq2_s +blk.25.attn_v.weight q4_K +blk.25.ffn_down_exps.weight q3_K +blk.25.ffn_down_shexp.weight q3_K +blk.26.attn_k.weight iq2_s +blk.26.attn_output.weight iq3_s +blk.26.attn_q.weight iq2_s +blk.26.attn_v.weight q4_K +blk.26.ffn_down_exps.weight q3_K +blk.26.ffn_down_shexp.weight q3_K +blk.27.attn_k.weight iq2_s +blk.27.attn_output.weight iq3_s +blk.27.attn_q.weight iq2_s +blk.27.attn_v.weight q4_K +blk.27.ffn_down_exps.weight q3_K +blk.27.ffn_down_shexp.weight q3_K +blk.28.attn_k.weight iq2_s +blk.28.attn_output.weight iq3_s +blk.28.attn_q.weight iq2_s +blk.28.attn_v.weight q4_K +blk.28.ffn_down_exps.weight q3_K +blk.28.ffn_down_shexp.weight q3_K +blk.29.attn_k.weight iq2_s +blk.29.attn_output.weight iq3_s +blk.29.attn_q.weight iq2_s +blk.29.attn_v.weight q4_K +blk.29.ffn_down_exps.weight q3_K +blk.29.ffn_down_shexp.weight q3_K +blk.30.attn_k.weight iq2_s +blk.30.attn_output.weight iq3_s +blk.30.attn_q.weight iq2_s +blk.30.attn_v.weight q4_K +blk.30.ffn_down_exps.weight q3_K +blk.30.ffn_down_shexp.weight q3_K +blk.31.attn_k.weight iq2_s +blk.31.attn_output.weight iq3_s +blk.31.attn_q.weight iq2_s +blk.31.attn_v.weight q4_K +blk.31.ffn_down_exps.weight q3_K +blk.31.ffn_down_shexp.weight q3_K +blk.32.attn_k.weight iq2_s +blk.32.attn_output.weight iq3_s +blk.32.attn_q.weight iq2_s +blk.32.attn_v.weight q4_K +blk.32.ffn_down_exps.weight q3_K +blk.32.ffn_down_shexp.weight q3_K +blk.33.attn_k.weight iq2_s +blk.33.attn_output.weight iq3_s +blk.33.attn_q.weight iq2_s +blk.33.attn_v.weight q4_K +blk.33.ffn_down_exps.weight q3_K +blk.33.ffn_down_shexp.weight q3_K +blk.34.attn_k.weight iq2_s +blk.34.attn_output.weight iq3_s +blk.34.attn_q.weight iq2_s +blk.34.attn_v.weight q4_K +blk.34.ffn_down_exps.weight q3_K +blk.34.ffn_down_shexp.weight q3_K +blk.35.attn_k.weight iq2_s +blk.35.attn_output.weight iq3_s +blk.35.attn_q.weight iq2_s +blk.35.attn_v.weight q4_K +blk.35.ffn_down_exps.weight q3_K +blk.35.ffn_down_shexp.weight q3_K +blk.36.attn_k.weight iq2_s +blk.36.attn_output.weight iq3_s +blk.36.attn_q.weight iq2_s +blk.36.attn_v.weight q4_K +blk.36.ffn_down_exps.weight q3_K +blk.36.ffn_down_shexp.weight q3_K +blk.37.attn_k.weight iq2_s +blk.37.attn_output.weight iq3_s +blk.37.attn_q.weight iq2_s +blk.37.attn_v.weight q4_K +blk.37.ffn_down_exps.weight q3_K +blk.37.ffn_down_shexp.weight q3_K +blk.38.attn_k.weight iq2_s +blk.38.attn_output.weight iq3_s +blk.38.attn_q.weight iq2_s +blk.38.attn_v.weight q4_K +blk.38.ffn_down_exps.weight q3_K +blk.38.ffn_down_shexp.weight q3_K +blk.39.attn_k.weight iq2_s +blk.39.attn_output.weight iq3_s +blk.39.attn_q.weight iq2_s +blk.39.attn_v.weight q4_K +blk.39.ffn_down_exps.weight q3_K +blk.39.ffn_down_shexp.weight q3_K +blk.40.attn_k.weight iq2_s +blk.40.attn_output.weight iq3_s +blk.40.attn_q.weight iq2_s +blk.40.attn_v.weight q4_K +blk.40.ffn_down_exps.weight q3_K +blk.40.ffn_down_shexp.weight q3_K +blk.41.attn_k.weight iq2_s +blk.41.attn_output.weight iq3_s +blk.41.attn_q.weight iq2_s +blk.41.attn_v.weight q4_K +blk.41.ffn_down_exps.weight q3_K +blk.41.ffn_down_shexp.weight q3_K +blk.42.attn_k.weight iq2_s +blk.42.attn_output.weight iq3_s +blk.42.attn_q.weight iq2_s +blk.42.attn_v.weight q4_K +blk.42.ffn_down_exps.weight q3_K +blk.42.ffn_down_shexp.weight q3_K +blk.43.attn_k.weight iq2_s +blk.43.attn_output.weight iq3_s +blk.43.attn_q.weight iq2_s +blk.43.attn_v.weight q4_K +blk.43.ffn_down_exps.weight q3_K +blk.43.ffn_down_shexp.weight q3_K +blk.44.attn_k.weight iq2_s +blk.44.attn_output.weight iq3_s +blk.44.attn_q.weight iq2_s +blk.44.attn_v.weight q4_K +blk.44.ffn_down_exps.weight q3_K +blk.44.ffn_down_shexp.weight q3_K + +[IQ1_S] iq1_s +output.weight q5_K +token_embd.weight q2_K +blk.0.attn_output.weight iq2_xxs +blk.0.attn_v.weight q4_K +blk.0.ffn_down.weight q2_K +blk.1.attn_output.weight iq2_xxs +blk.1.attn_v.weight q4_K +blk.1.ffn_down.weight q2_K +blk.2.attn_output.weight iq2_xxs +blk.2.attn_v.weight q4_K +blk.2.ffn_down.weight q2_K +blk.3.attn_output.weight iq2_xxs +blk.3.attn_v.weight q4_K +blk.3.ffn_down_exps.weight q2_K +blk.3.ffn_down_shexp.weight q2_K +blk.4.attn_output.weight iq2_xxs +blk.4.attn_v.weight q4_K +blk.5.attn_output.weight iq2_xxs +blk.5.attn_v.weight q4_K +blk.6.attn_output.weight iq2_xxs +blk.6.attn_v.weight q4_K +blk.7.attn_output.weight iq2_xxs +blk.7.attn_v.weight q4_K +blk.8.attn_output.weight iq2_xxs +blk.8.attn_v.weight q4_K +blk.9.attn_output.weight iq2_xxs +blk.9.attn_v.weight q4_K +blk.10.attn_output.weight iq2_xxs +blk.10.attn_v.weight q4_K +blk.11.attn_output.weight iq2_xxs +blk.11.attn_v.weight q4_K +blk.12.attn_output.weight iq2_xxs +blk.12.attn_v.weight q4_K +blk.13.attn_output.weight iq2_xxs +blk.13.attn_v.weight q4_K +blk.14.attn_output.weight iq2_xxs +blk.14.attn_v.weight q4_K +blk.15.attn_output.weight iq2_xxs +blk.15.attn_v.weight q4_K +blk.16.attn_output.weight iq2_xxs +blk.16.attn_v.weight q4_K +blk.17.attn_output.weight iq2_xxs +blk.17.attn_v.weight q4_K +blk.18.attn_output.weight iq2_xxs +blk.18.attn_v.weight q4_K +blk.19.attn_output.weight iq2_xxs +blk.19.attn_v.weight q4_K +blk.20.attn_output.weight iq2_xxs +blk.20.attn_v.weight q4_K +blk.21.attn_output.weight iq2_xxs +blk.21.attn_v.weight q4_K +blk.22.attn_output.weight iq2_xxs +blk.22.attn_v.weight q4_K +blk.23.attn_output.weight iq2_xxs +blk.23.attn_v.weight q4_K +blk.24.attn_output.weight iq2_xxs +blk.24.attn_v.weight q4_K +blk.25.attn_output.weight iq2_xxs +blk.25.attn_v.weight q4_K +blk.26.attn_output.weight iq2_xxs +blk.26.attn_v.weight q4_K +blk.27.attn_output.weight iq2_xxs +blk.27.attn_v.weight q4_K +blk.28.attn_output.weight iq2_xxs +blk.28.attn_v.weight q4_K +blk.29.attn_output.weight iq2_xxs +blk.29.attn_v.weight q4_K +blk.30.attn_output.weight iq2_xxs +blk.30.attn_v.weight q4_K +blk.31.attn_output.weight iq2_xxs +blk.31.attn_v.weight q4_K +blk.32.attn_output.weight iq2_xxs +blk.32.attn_v.weight q4_K +blk.33.attn_output.weight iq2_xxs +blk.33.attn_v.weight q4_K +blk.34.attn_output.weight iq2_xxs +blk.34.attn_v.weight q4_K +blk.35.attn_output.weight iq2_xxs +blk.35.attn_v.weight q4_K +blk.36.attn_output.weight iq2_xxs +blk.36.attn_v.weight q4_K +blk.37.attn_output.weight iq2_xxs +blk.37.attn_v.weight q4_K +blk.38.attn_output.weight iq2_xxs +blk.38.attn_v.weight q4_K +blk.39.attn_output.weight iq2_xxs +blk.39.attn_v.weight q4_K +blk.40.attn_output.weight iq2_xxs +blk.40.attn_v.weight q4_K +blk.41.attn_output.weight iq2_xxs +blk.41.attn_v.weight q4_K +blk.42.attn_output.weight iq2_xxs +blk.42.attn_v.weight q4_K +blk.43.attn_output.weight iq2_xxs +blk.43.attn_v.weight q4_K +blk.44.attn_output.weight iq2_xxs +blk.44.attn_v.weight q4_K + +[IQ4_NL] iq4_nl +output.weight q6_K +blk.0.attn_v.weight q5_K +blk.0.ffn_down.weight q5_K +blk.1.attn_v.weight q5_K +blk.1.ffn_down.weight q5_K +blk.2.attn_v.weight q5_K +blk.2.ffn_down.weight q5_K +blk.3.attn_v.weight q5_K +blk.3.ffn_down_exps.weight q5_K +blk.3.ffn_down_shexp.weight q5_K +blk.4.attn_v.weight q5_K +blk.4.ffn_down_exps.weight q5_K +blk.4.ffn_down_shexp.weight q5_K +blk.5.attn_v.weight q5_K +blk.6.attn_v.weight q5_K +blk.7.attn_v.weight q5_K +blk.8.attn_v.weight q5_K +blk.9.attn_v.weight q5_K +blk.10.attn_v.weight q5_K +blk.11.attn_v.weight q5_K +blk.12.attn_v.weight q5_K +blk.13.attn_v.weight q5_K +blk.14.attn_v.weight q5_K +blk.15.attn_v.weight q5_K +blk.16.attn_v.weight q5_K +blk.17.attn_v.weight q5_K +blk.18.attn_v.weight q5_K +blk.19.attn_v.weight q5_K +blk.20.attn_v.weight q5_K +blk.21.attn_v.weight q5_K +blk.22.attn_v.weight q5_K +blk.23.attn_v.weight q5_K +blk.24.attn_v.weight q5_K +blk.25.attn_v.weight q5_K +blk.26.attn_v.weight q5_K +blk.27.attn_v.weight q5_K +blk.28.attn_v.weight q5_K +blk.29.attn_v.weight q5_K +blk.30.attn_v.weight q5_K +blk.31.attn_v.weight q5_K +blk.32.attn_v.weight q5_K +blk.33.attn_v.weight q5_K +blk.34.attn_v.weight q5_K +blk.35.attn_v.weight q5_K +blk.36.attn_v.weight q5_K +blk.37.attn_v.weight q5_K +blk.38.attn_v.weight q5_K +blk.39.attn_v.weight q5_K +blk.40.attn_v.weight q5_K +blk.41.attn_v.weight q5_K +blk.42.attn_v.weight q5_K +blk.43.attn_v.weight q5_K +blk.44.attn_v.weight q5_K + +[IQ3_S] iq3_s +output.weight q6_K +blk.0.attn_v.weight q4_K +blk.1.attn_v.weight q4_K +blk.2.attn_v.weight q4_K +blk.3.attn_v.weight q4_K +blk.4.attn_v.weight q4_K +blk.5.attn_v.weight q4_K +blk.6.attn_v.weight q4_K +blk.7.attn_v.weight q4_K +blk.8.attn_v.weight q4_K +blk.9.attn_v.weight q4_K +blk.10.attn_v.weight q4_K +blk.11.attn_v.weight q4_K +blk.12.attn_v.weight q4_K +blk.13.attn_v.weight q4_K +blk.14.attn_v.weight q4_K +blk.15.attn_v.weight q4_K +blk.16.attn_v.weight q4_K +blk.17.attn_v.weight q4_K +blk.18.attn_v.weight q4_K +blk.19.attn_v.weight q4_K +blk.20.attn_v.weight q4_K +blk.21.attn_v.weight q4_K +blk.22.attn_v.weight q4_K +blk.23.attn_v.weight q4_K +blk.24.attn_v.weight q4_K +blk.25.attn_v.weight q4_K +blk.26.attn_v.weight q4_K +blk.27.attn_v.weight q4_K +blk.28.attn_v.weight q4_K +blk.29.attn_v.weight q4_K +blk.30.attn_v.weight q4_K +blk.31.attn_v.weight q4_K +blk.32.attn_v.weight q4_K +blk.33.attn_v.weight q4_K +blk.34.attn_v.weight q4_K +blk.35.attn_v.weight q4_K +blk.36.attn_v.weight q4_K +blk.37.attn_v.weight q4_K +blk.38.attn_v.weight q4_K +blk.39.attn_v.weight q4_K +blk.40.attn_v.weight q4_K +blk.41.attn_v.weight q4_K +blk.42.attn_v.weight q4_K +blk.43.attn_v.weight q4_K +blk.44.attn_v.weight q4_K + +[IQ3_M] iq3_s +output.weight q6_K +blk.0.attn_output.weight q4_K +blk.0.attn_v.weight q4_K +blk.0.ffn_down.weight q4_K +blk.1.attn_output.weight q4_K +blk.1.attn_v.weight q4_K +blk.1.ffn_down.weight q4_K +blk.2.attn_output.weight q4_K +blk.2.attn_v.weight q4_K +blk.2.ffn_down.weight q4_K +blk.3.attn_output.weight q4_K +blk.3.attn_v.weight q4_K +blk.3.ffn_down_exps.weight q4_K +blk.3.ffn_down_shexp.weight q4_K +blk.4.attn_output.weight q4_K +blk.4.attn_v.weight q4_K +blk.4.ffn_down_exps.weight q4_K +blk.4.ffn_down_shexp.weight q4_K +blk.5.attn_output.weight q4_K +blk.5.attn_v.weight q4_K +blk.6.attn_output.weight q4_K +blk.6.attn_v.weight q4_K +blk.7.attn_output.weight q4_K +blk.7.attn_v.weight q4_K +blk.8.attn_output.weight q4_K +blk.8.attn_v.weight q4_K +blk.9.attn_output.weight q4_K +blk.9.attn_v.weight q4_K +blk.10.attn_output.weight q4_K +blk.10.attn_v.weight q4_K +blk.11.attn_output.weight q4_K +blk.11.attn_v.weight q4_K +blk.12.attn_output.weight q4_K +blk.12.attn_v.weight q4_K +blk.13.attn_output.weight q4_K +blk.13.attn_v.weight q4_K +blk.14.attn_output.weight q4_K +blk.14.attn_v.weight q4_K +blk.15.attn_output.weight q4_K +blk.15.attn_v.weight q4_K +blk.16.attn_output.weight q4_K +blk.16.attn_v.weight q4_K +blk.17.attn_output.weight q4_K +blk.17.attn_v.weight q4_K +blk.18.attn_output.weight q4_K +blk.18.attn_v.weight q4_K +blk.19.attn_output.weight q4_K +blk.19.attn_v.weight q4_K +blk.20.attn_output.weight q4_K +blk.20.attn_v.weight q4_K +blk.21.attn_output.weight q4_K +blk.21.attn_v.weight q4_K +blk.22.attn_output.weight q4_K +blk.22.attn_v.weight q4_K +blk.23.attn_output.weight q4_K +blk.23.attn_v.weight q4_K +blk.24.attn_output.weight q4_K +blk.24.attn_v.weight q4_K +blk.25.attn_output.weight q4_K +blk.25.attn_v.weight q4_K +blk.26.attn_output.weight q4_K +blk.26.attn_v.weight q4_K +blk.27.attn_output.weight q4_K +blk.27.attn_v.weight q4_K +blk.28.attn_output.weight q4_K +blk.28.attn_v.weight q4_K +blk.29.attn_output.weight q4_K +blk.29.attn_v.weight q4_K +blk.30.attn_output.weight q4_K +blk.30.attn_v.weight q4_K +blk.31.attn_output.weight q4_K +blk.31.attn_v.weight q4_K +blk.32.attn_output.weight q4_K +blk.32.attn_v.weight q4_K +blk.33.attn_output.weight q4_K +blk.33.attn_v.weight q4_K +blk.34.attn_output.weight q4_K +blk.34.attn_v.weight q4_K +blk.35.attn_output.weight q4_K +blk.35.attn_v.weight q4_K +blk.36.attn_output.weight q4_K +blk.36.attn_v.weight q4_K +blk.37.attn_output.weight q4_K +blk.37.attn_v.weight q4_K +blk.38.attn_output.weight q4_K +blk.38.attn_v.weight q4_K +blk.39.attn_output.weight q4_K +blk.39.attn_v.weight q4_K +blk.40.attn_output.weight q4_K +blk.40.attn_v.weight q4_K +blk.41.attn_output.weight q4_K +blk.41.attn_v.weight q4_K +blk.42.attn_output.weight q4_K +blk.42.attn_v.weight q4_K +blk.43.attn_output.weight q4_K +blk.43.attn_v.weight q4_K +blk.44.attn_output.weight q4_K +blk.44.attn_v.weight q4_K + +[IQ2_S] iq2_xs +output.weight q5_K +token_embd.weight iq3_s +blk.0.attn_output.weight iq3_s +blk.0.attn_v.weight q4_K +blk.0.ffn_down.weight iq3_s +blk.1.attn_output.weight iq3_s +blk.1.attn_v.weight q4_K +blk.1.ffn_down.weight iq3_s +blk.2.attn_output.weight iq3_s +blk.2.attn_v.weight q4_K +blk.2.ffn_down.weight iq3_s +blk.3.attn_output.weight iq3_s +blk.3.attn_v.weight q4_K +blk.3.ffn_down_exps.weight iq3_s +blk.3.ffn_down_shexp.weight iq3_s +blk.4.attn_output.weight iq3_s +blk.4.attn_v.weight q4_K +blk.5.attn_output.weight iq3_s +blk.5.attn_v.weight q4_K +blk.6.attn_output.weight iq3_s +blk.6.attn_v.weight q4_K +blk.7.attn_output.weight iq3_s +blk.7.attn_v.weight q4_K +blk.8.attn_output.weight iq3_s +blk.8.attn_v.weight q4_K +blk.9.attn_output.weight iq3_s +blk.9.attn_v.weight q4_K +blk.10.attn_output.weight iq3_s +blk.10.attn_v.weight q4_K +blk.11.attn_output.weight iq3_s +blk.11.attn_v.weight q4_K +blk.12.attn_output.weight iq3_s +blk.12.attn_v.weight q4_K +blk.13.attn_output.weight iq3_s +blk.13.attn_v.weight q4_K +blk.14.attn_output.weight iq3_s +blk.14.attn_v.weight q4_K +blk.15.attn_output.weight iq3_s +blk.15.attn_v.weight q4_K +blk.16.attn_output.weight iq3_s +blk.16.attn_v.weight q4_K +blk.17.attn_output.weight iq3_s +blk.17.attn_v.weight q4_K +blk.18.attn_output.weight iq3_s +blk.18.attn_v.weight q4_K +blk.19.attn_output.weight iq3_s +blk.19.attn_v.weight q4_K +blk.20.attn_output.weight iq3_s +blk.20.attn_v.weight q4_K +blk.21.attn_output.weight iq3_s +blk.21.attn_v.weight q4_K +blk.22.attn_output.weight iq3_s +blk.22.attn_v.weight q4_K +blk.23.attn_output.weight iq3_s +blk.23.attn_v.weight q4_K +blk.24.attn_output.weight iq3_s +blk.24.attn_v.weight q4_K +blk.25.attn_output.weight iq3_s +blk.25.attn_v.weight q4_K +blk.26.attn_output.weight iq3_s +blk.26.attn_v.weight q4_K +blk.27.attn_output.weight iq3_s +blk.27.attn_v.weight q4_K +blk.28.attn_output.weight iq3_s +blk.28.attn_v.weight q4_K +blk.29.attn_output.weight iq3_s +blk.29.attn_v.weight q4_K +blk.30.attn_output.weight iq3_s +blk.30.attn_v.weight q4_K +blk.31.attn_output.weight iq3_s +blk.31.attn_v.weight q4_K +blk.32.attn_output.weight iq3_s +blk.32.attn_v.weight q4_K +blk.33.attn_output.weight iq3_s +blk.33.attn_v.weight q4_K +blk.34.attn_output.weight iq3_s +blk.34.attn_v.weight q4_K +blk.35.attn_output.weight iq3_s +blk.35.attn_v.weight q4_K +blk.36.attn_output.weight iq3_s +blk.36.attn_v.weight q4_K +blk.37.attn_output.weight iq3_s +blk.37.attn_v.weight q4_K +blk.38.attn_output.weight iq3_s +blk.38.attn_v.weight q4_K +blk.39.attn_output.weight iq3_s +blk.39.attn_v.weight q4_K +blk.40.attn_output.weight iq3_s +blk.40.attn_v.weight q4_K +blk.41.attn_output.weight iq3_s +blk.41.attn_v.weight q4_K +blk.42.attn_output.weight iq3_s +blk.42.attn_v.weight q4_K +blk.43.attn_output.weight iq3_s +blk.43.attn_v.weight q4_K +blk.44.attn_output.weight iq3_s +blk.44.attn_v.weight q4_K + +[IQ2_M] iq2_s +output.weight q5_K +token_embd.weight iq3_s +blk.0.attn_output.weight iq3_s +blk.0.attn_v.weight q4_K +blk.0.ffn_down.weight iq3_s +blk.1.attn_output.weight iq3_s +blk.1.attn_v.weight q4_K +blk.1.ffn_down.weight iq3_s +blk.2.attn_output.weight iq3_s +blk.2.attn_v.weight q4_K +blk.2.ffn_down.weight iq3_s +blk.3.attn_output.weight iq3_s +blk.3.attn_v.weight q4_K +blk.3.ffn_down_exps.weight iq3_s +blk.3.ffn_down_shexp.weight iq3_s +blk.4.attn_output.weight iq3_s +blk.4.attn_v.weight q4_K +blk.5.attn_output.weight iq3_s +blk.5.attn_v.weight q4_K +blk.6.attn_output.weight iq3_s +blk.6.attn_v.weight q4_K +blk.7.attn_output.weight iq3_s +blk.7.attn_v.weight q4_K +blk.8.attn_output.weight iq3_s +blk.8.attn_v.weight q4_K +blk.9.attn_output.weight iq3_s +blk.9.attn_v.weight q4_K +blk.10.attn_output.weight iq3_s +blk.10.attn_v.weight q4_K +blk.11.attn_output.weight iq3_s +blk.11.attn_v.weight q4_K +blk.12.attn_output.weight iq3_s +blk.12.attn_v.weight q4_K +blk.13.attn_output.weight iq3_s +blk.13.attn_v.weight q4_K +blk.14.attn_output.weight iq3_s +blk.14.attn_v.weight q4_K +blk.15.attn_output.weight iq3_s +blk.15.attn_v.weight q4_K +blk.16.attn_output.weight iq3_s +blk.16.attn_v.weight q4_K +blk.17.attn_output.weight iq3_s +blk.17.attn_v.weight q4_K +blk.18.attn_output.weight iq3_s +blk.18.attn_v.weight q4_K +blk.19.attn_output.weight iq3_s +blk.19.attn_v.weight q4_K +blk.20.attn_output.weight iq3_s +blk.20.attn_v.weight q4_K +blk.21.attn_output.weight iq3_s +blk.21.attn_v.weight q4_K +blk.22.attn_output.weight iq3_s +blk.22.attn_v.weight q4_K +blk.23.attn_output.weight iq3_s +blk.23.attn_v.weight q4_K +blk.24.attn_output.weight iq3_s +blk.24.attn_v.weight q4_K +blk.25.attn_output.weight iq3_s +blk.25.attn_v.weight q4_K +blk.26.attn_output.weight iq3_s +blk.26.attn_v.weight q4_K +blk.27.attn_output.weight iq3_s +blk.27.attn_v.weight q4_K +blk.28.attn_output.weight iq3_s +blk.28.attn_v.weight q4_K +blk.29.attn_output.weight iq3_s +blk.29.attn_v.weight q4_K +blk.30.attn_output.weight iq3_s +blk.30.attn_v.weight q4_K +blk.31.attn_output.weight iq3_s +blk.31.attn_v.weight q4_K +blk.32.attn_output.weight iq3_s +blk.32.attn_v.weight q4_K +blk.33.attn_output.weight iq3_s +blk.33.attn_v.weight q4_K +blk.34.attn_output.weight iq3_s +blk.34.attn_v.weight q4_K +blk.35.attn_output.weight iq3_s +blk.35.attn_v.weight q4_K +blk.36.attn_output.weight iq3_s +blk.36.attn_v.weight q4_K +blk.37.attn_output.weight iq3_s +blk.37.attn_v.weight q4_K +blk.38.attn_output.weight iq3_s +blk.38.attn_v.weight q4_K +blk.39.attn_output.weight iq3_s +blk.39.attn_v.weight q4_K +blk.40.attn_output.weight iq3_s +blk.40.attn_v.weight q4_K +blk.41.attn_output.weight iq3_s +blk.41.attn_v.weight q4_K +blk.42.attn_output.weight iq3_s +blk.42.attn_v.weight q4_K +blk.43.attn_output.weight iq3_s +blk.43.attn_v.weight q4_K +blk.44.attn_output.weight iq3_s +blk.44.attn_v.weight q4_K + +[IQ4_XS] iq4_xs +output.weight q6_K +blk.0.attn_v.weight q5_K +blk.0.ffn_down.weight q5_K +blk.1.attn_v.weight q5_K +blk.1.ffn_down.weight q5_K +blk.2.attn_v.weight q5_K +blk.2.ffn_down.weight q5_K +blk.3.attn_v.weight q5_K +blk.3.ffn_down_exps.weight q5_K +blk.3.ffn_down_shexp.weight q5_K +blk.4.attn_v.weight q5_K +blk.4.ffn_down_exps.weight q5_K +blk.4.ffn_down_shexp.weight q5_K +blk.5.attn_v.weight q5_K +blk.6.attn_v.weight q5_K +blk.7.attn_v.weight q5_K +blk.8.attn_v.weight q5_K +blk.9.attn_v.weight q5_K +blk.10.attn_v.weight q5_K +blk.11.attn_v.weight q5_K +blk.12.attn_v.weight q5_K +blk.13.attn_v.weight q5_K +blk.14.attn_v.weight q5_K +blk.15.attn_v.weight q5_K +blk.16.attn_v.weight q5_K +blk.17.attn_v.weight q5_K +blk.18.attn_v.weight q5_K +blk.19.attn_v.weight q5_K +blk.20.attn_v.weight q5_K +blk.21.attn_v.weight q5_K +blk.22.attn_v.weight q5_K +blk.23.attn_v.weight q5_K +blk.24.attn_v.weight q5_K +blk.25.attn_v.weight q5_K +blk.26.attn_v.weight q5_K +blk.27.attn_v.weight q5_K +blk.28.attn_v.weight q5_K +blk.29.attn_v.weight q5_K +blk.30.attn_v.weight q5_K +blk.31.attn_v.weight q5_K +blk.32.attn_v.weight q5_K +blk.33.attn_v.weight q5_K +blk.34.attn_v.weight q5_K +blk.35.attn_v.weight q5_K +blk.36.attn_v.weight q5_K +blk.37.attn_v.weight q5_K +blk.38.attn_v.weight q5_K +blk.39.attn_v.weight q5_K +blk.40.attn_v.weight q5_K +blk.41.attn_v.weight q5_K +blk.42.attn_v.weight q5_K +blk.43.attn_v.weight q5_K +blk.44.attn_v.weight q5_K + +[IQ1_M] iq1_m +output.weight q5_K +token_embd.weight q2_K +blk.0.attn_output.weight iq2_xxs +blk.0.attn_v.weight q4_K +blk.0.ffn_down.weight q2_K +blk.1.attn_output.weight iq2_xxs +blk.1.attn_v.weight q4_K +blk.1.ffn_down.weight q2_K +blk.2.attn_output.weight iq2_xxs +blk.2.attn_v.weight q4_K +blk.2.ffn_down.weight q2_K +blk.3.attn_output.weight iq2_xxs +blk.3.attn_v.weight q4_K +blk.3.ffn_down_exps.weight q2_K +blk.3.ffn_down_shexp.weight q2_K +blk.4.attn_output.weight iq2_xxs +blk.4.attn_v.weight q4_K +blk.5.attn_output.weight iq2_xxs +blk.5.attn_v.weight q4_K +blk.6.attn_output.weight iq2_xxs +blk.6.attn_v.weight q4_K +blk.7.attn_output.weight iq2_xxs +blk.7.attn_v.weight q4_K +blk.8.attn_output.weight iq2_xxs +blk.8.attn_v.weight q4_K +blk.9.attn_output.weight iq2_xxs +blk.9.attn_v.weight q4_K +blk.10.attn_output.weight iq2_xxs +blk.10.attn_v.weight q4_K +blk.11.attn_output.weight iq2_xxs +blk.11.attn_v.weight q4_K +blk.12.attn_output.weight iq2_xxs +blk.12.attn_v.weight q4_K +blk.13.attn_output.weight iq2_xxs +blk.13.attn_v.weight q4_K +blk.14.attn_output.weight iq2_xxs +blk.14.attn_v.weight q4_K +blk.15.attn_output.weight iq2_xxs +blk.15.attn_v.weight q4_K +blk.16.attn_output.weight iq2_xxs +blk.16.attn_v.weight q4_K +blk.17.attn_output.weight iq2_xxs +blk.17.attn_v.weight q4_K +blk.18.attn_output.weight iq2_xxs +blk.18.attn_v.weight q4_K +blk.19.attn_output.weight iq2_xxs +blk.19.attn_v.weight q4_K +blk.20.attn_output.weight iq2_xxs +blk.20.attn_v.weight q4_K +blk.21.attn_output.weight iq2_xxs +blk.21.attn_v.weight q4_K +blk.22.attn_output.weight iq2_xxs +blk.22.attn_v.weight q4_K +blk.23.attn_output.weight iq2_xxs +blk.23.attn_v.weight q4_K +blk.24.attn_output.weight iq2_xxs +blk.24.attn_v.weight q4_K +blk.25.attn_output.weight iq2_xxs +blk.25.attn_v.weight q4_K +blk.26.attn_output.weight iq2_xxs +blk.26.attn_v.weight q4_K +blk.27.attn_output.weight iq2_xxs +blk.27.attn_v.weight q4_K +blk.28.attn_output.weight iq2_xxs +blk.28.attn_v.weight q4_K +blk.29.attn_output.weight iq2_xxs +blk.29.attn_v.weight q4_K +blk.30.attn_output.weight iq2_xxs +blk.30.attn_v.weight q4_K +blk.31.attn_output.weight iq2_xxs +blk.31.attn_v.weight q4_K +blk.32.attn_output.weight iq2_xxs +blk.32.attn_v.weight q4_K +blk.33.attn_output.weight iq2_xxs +blk.33.attn_v.weight q4_K +blk.34.attn_output.weight iq2_xxs +blk.34.attn_v.weight q4_K +blk.35.attn_output.weight iq2_xxs +blk.35.attn_v.weight q4_K +blk.36.attn_output.weight iq2_xxs +blk.36.attn_v.weight q4_K +blk.37.attn_output.weight iq2_xxs +blk.37.attn_v.weight q4_K +blk.38.attn_output.weight iq2_xxs +blk.38.attn_v.weight q4_K +blk.39.attn_output.weight iq2_xxs +blk.39.attn_v.weight q4_K +blk.40.attn_output.weight iq2_xxs +blk.40.attn_v.weight q4_K +blk.41.attn_output.weight iq2_xxs +blk.41.attn_v.weight q4_K +blk.42.attn_output.weight iq2_xxs +blk.42.attn_v.weight q4_K +blk.43.attn_output.weight iq2_xxs +blk.43.attn_v.weight q4_K +blk.44.attn_output.weight iq2_xxs +blk.44.attn_v.weight q4_K + +[BF16] bf16 +output.weight q6_K + +[TQ1_0] tq1_0 +output.weight q6_K +token_embd.weight q4_K + +[TQ2_0] tq2_0 +output.weight q6_K +token_embd.weight q4_K + +[MXFP4_MOE] mxfp4 +output.weight q8_0 +token_embd.weight q8_0 +blk.0.attn_gate.weight q8_0 +blk.0.attn_k.weight q8_0 +blk.0.attn_output.weight q8_0 +blk.0.attn_q.weight q8_0 +blk.0.attn_v.weight q8_0 +blk.0.ffn_down.weight q8_0 +blk.0.ffn_gate.weight q8_0 +blk.0.ffn_up.weight q8_0 +blk.1.attn_gate.weight q8_0 +blk.1.attn_k.weight q8_0 +blk.1.attn_output.weight q8_0 +blk.1.attn_q.weight q8_0 +blk.1.attn_v.weight q8_0 +blk.1.ffn_down.weight q8_0 +blk.1.ffn_gate.weight q8_0 +blk.1.ffn_up.weight q8_0 +blk.2.attn_gate.weight q8_0 +blk.2.attn_k.weight q8_0 +blk.2.attn_output.weight q8_0 +blk.2.attn_q.weight q8_0 +blk.2.attn_v.weight q8_0 +blk.2.ffn_down.weight q8_0 +blk.2.ffn_gate.weight q8_0 +blk.2.ffn_up.weight q8_0 +blk.3.attn_gate.weight q8_0 +blk.3.attn_k.weight q8_0 +blk.3.attn_output.weight q8_0 +blk.3.attn_q.weight q8_0 +blk.3.attn_v.weight q8_0 +blk.3.ffn_down_shexp.weight q8_0 +blk.3.ffn_gate_shexp.weight q8_0 +blk.3.ffn_up_shexp.weight q8_0 +blk.4.attn_gate.weight q8_0 +blk.4.attn_k.weight q8_0 +blk.4.attn_output.weight q8_0 +blk.4.attn_q.weight q8_0 +blk.4.attn_v.weight q8_0 +blk.4.ffn_down_shexp.weight q8_0 +blk.4.ffn_gate_shexp.weight q8_0 +blk.4.ffn_up_shexp.weight q8_0 +blk.5.attn_gate.weight q8_0 +blk.5.attn_k.weight q8_0 +blk.5.attn_output.weight q8_0 +blk.5.attn_q.weight q8_0 +blk.5.attn_v.weight q8_0 +blk.5.ffn_down_shexp.weight q8_0 +blk.5.ffn_gate_shexp.weight q8_0 +blk.5.ffn_up_shexp.weight q8_0 +blk.6.attn_gate.weight q8_0 +blk.6.attn_k.weight q8_0 +blk.6.attn_output.weight q8_0 +blk.6.attn_q.weight q8_0 +blk.6.attn_v.weight q8_0 +blk.6.ffn_down_shexp.weight q8_0 +blk.6.ffn_gate_shexp.weight q8_0 +blk.6.ffn_up_shexp.weight q8_0 +blk.7.attn_gate.weight q8_0 +blk.7.attn_k.weight q8_0 +blk.7.attn_output.weight q8_0 +blk.7.attn_q.weight q8_0 +blk.7.attn_v.weight q8_0 +blk.7.ffn_down_shexp.weight q8_0 +blk.7.ffn_gate_shexp.weight q8_0 +blk.7.ffn_up_shexp.weight q8_0 +blk.8.attn_gate.weight q8_0 +blk.8.attn_k.weight q8_0 +blk.8.attn_output.weight q8_0 +blk.8.attn_q.weight q8_0 +blk.8.attn_v.weight q8_0 +blk.8.ffn_down_shexp.weight q8_0 +blk.8.ffn_gate_shexp.weight q8_0 +blk.8.ffn_up_shexp.weight q8_0 +blk.9.attn_gate.weight q8_0 +blk.9.attn_k.weight q8_0 +blk.9.attn_output.weight q8_0 +blk.9.attn_q.weight q8_0 +blk.9.attn_v.weight q8_0 +blk.9.ffn_down_shexp.weight q8_0 +blk.9.ffn_gate_shexp.weight q8_0 +blk.9.ffn_up_shexp.weight q8_0 +blk.10.attn_gate.weight q8_0 +blk.10.attn_k.weight q8_0 +blk.10.attn_output.weight q8_0 +blk.10.attn_q.weight q8_0 +blk.10.attn_v.weight q8_0 +blk.10.ffn_down_shexp.weight q8_0 +blk.10.ffn_gate_shexp.weight q8_0 +blk.10.ffn_up_shexp.weight q8_0 +blk.11.attn_gate.weight q8_0 +blk.11.attn_k.weight q8_0 +blk.11.attn_output.weight q8_0 +blk.11.attn_q.weight q8_0 +blk.11.attn_v.weight q8_0 +blk.11.ffn_down_shexp.weight q8_0 +blk.11.ffn_gate_shexp.weight q8_0 +blk.11.ffn_up_shexp.weight q8_0 +blk.12.attn_gate.weight q8_0 +blk.12.attn_k.weight q8_0 +blk.12.attn_output.weight q8_0 +blk.12.attn_q.weight q8_0 +blk.12.attn_v.weight q8_0 +blk.12.ffn_down_shexp.weight q8_0 +blk.12.ffn_gate_shexp.weight q8_0 +blk.12.ffn_up_shexp.weight q8_0 +blk.13.attn_gate.weight q8_0 +blk.13.attn_k.weight q8_0 +blk.13.attn_output.weight q8_0 +blk.13.attn_q.weight q8_0 +blk.13.attn_v.weight q8_0 +blk.13.ffn_down_shexp.weight q8_0 +blk.13.ffn_gate_shexp.weight q8_0 +blk.13.ffn_up_shexp.weight q8_0 +blk.14.attn_gate.weight q8_0 +blk.14.attn_k.weight q8_0 +blk.14.attn_output.weight q8_0 +blk.14.attn_q.weight q8_0 +blk.14.attn_v.weight q8_0 +blk.14.ffn_down_shexp.weight q8_0 +blk.14.ffn_gate_shexp.weight q8_0 +blk.14.ffn_up_shexp.weight q8_0 +blk.15.attn_gate.weight q8_0 +blk.15.attn_k.weight q8_0 +blk.15.attn_output.weight q8_0 +blk.15.attn_q.weight q8_0 +blk.15.attn_v.weight q8_0 +blk.15.ffn_down_shexp.weight q8_0 +blk.15.ffn_gate_shexp.weight q8_0 +blk.15.ffn_up_shexp.weight q8_0 +blk.16.attn_gate.weight q8_0 +blk.16.attn_k.weight q8_0 +blk.16.attn_output.weight q8_0 +blk.16.attn_q.weight q8_0 +blk.16.attn_v.weight q8_0 +blk.16.ffn_down_shexp.weight q8_0 +blk.16.ffn_gate_shexp.weight q8_0 +blk.16.ffn_up_shexp.weight q8_0 +blk.17.attn_gate.weight q8_0 +blk.17.attn_k.weight q8_0 +blk.17.attn_output.weight q8_0 +blk.17.attn_q.weight q8_0 +blk.17.attn_v.weight q8_0 +blk.17.ffn_down_shexp.weight q8_0 +blk.17.ffn_gate_shexp.weight q8_0 +blk.17.ffn_up_shexp.weight q8_0 +blk.18.attn_gate.weight q8_0 +blk.18.attn_k.weight q8_0 +blk.18.attn_output.weight q8_0 +blk.18.attn_q.weight q8_0 +blk.18.attn_v.weight q8_0 +blk.18.ffn_down_shexp.weight q8_0 +blk.18.ffn_gate_shexp.weight q8_0 +blk.18.ffn_up_shexp.weight q8_0 +blk.19.attn_gate.weight q8_0 +blk.19.attn_k.weight q8_0 +blk.19.attn_output.weight q8_0 +blk.19.attn_q.weight q8_0 +blk.19.attn_v.weight q8_0 +blk.19.ffn_down_shexp.weight q8_0 +blk.19.ffn_gate_shexp.weight q8_0 +blk.19.ffn_up_shexp.weight q8_0 +blk.20.attn_gate.weight q8_0 +blk.20.attn_k.weight q8_0 +blk.20.attn_output.weight q8_0 +blk.20.attn_q.weight q8_0 +blk.20.attn_v.weight q8_0 +blk.20.ffn_down_shexp.weight q8_0 +blk.20.ffn_gate_shexp.weight q8_0 +blk.20.ffn_up_shexp.weight q8_0 +blk.21.attn_gate.weight q8_0 +blk.21.attn_k.weight q8_0 +blk.21.attn_output.weight q8_0 +blk.21.attn_q.weight q8_0 +blk.21.attn_v.weight q8_0 +blk.21.ffn_down_shexp.weight q8_0 +blk.21.ffn_gate_shexp.weight q8_0 +blk.21.ffn_up_shexp.weight q8_0 +blk.22.attn_gate.weight q8_0 +blk.22.attn_k.weight q8_0 +blk.22.attn_output.weight q8_0 +blk.22.attn_q.weight q8_0 +blk.22.attn_v.weight q8_0 +blk.22.ffn_down_shexp.weight q8_0 +blk.22.ffn_gate_shexp.weight q8_0 +blk.22.ffn_up_shexp.weight q8_0 +blk.23.attn_gate.weight q8_0 +blk.23.attn_k.weight q8_0 +blk.23.attn_output.weight q8_0 +blk.23.attn_q.weight q8_0 +blk.23.attn_v.weight q8_0 +blk.23.ffn_down_shexp.weight q8_0 +blk.23.ffn_gate_shexp.weight q8_0 +blk.23.ffn_up_shexp.weight q8_0 +blk.24.attn_gate.weight q8_0 +blk.24.attn_k.weight q8_0 +blk.24.attn_output.weight q8_0 +blk.24.attn_q.weight q8_0 +blk.24.attn_v.weight q8_0 +blk.24.ffn_down_shexp.weight q8_0 +blk.24.ffn_gate_shexp.weight q8_0 +blk.24.ffn_up_shexp.weight q8_0 +blk.25.attn_gate.weight q8_0 +blk.25.attn_k.weight q8_0 +blk.25.attn_output.weight q8_0 +blk.25.attn_q.weight q8_0 +blk.25.attn_v.weight q8_0 +blk.25.ffn_down_shexp.weight q8_0 +blk.25.ffn_gate_shexp.weight q8_0 +blk.25.ffn_up_shexp.weight q8_0 +blk.26.attn_gate.weight q8_0 +blk.26.attn_k.weight q8_0 +blk.26.attn_output.weight q8_0 +blk.26.attn_q.weight q8_0 +blk.26.attn_v.weight q8_0 +blk.26.ffn_down_shexp.weight q8_0 +blk.26.ffn_gate_shexp.weight q8_0 +blk.26.ffn_up_shexp.weight q8_0 +blk.27.attn_gate.weight q8_0 +blk.27.attn_k.weight q8_0 +blk.27.attn_output.weight q8_0 +blk.27.attn_q.weight q8_0 +blk.27.attn_v.weight q8_0 +blk.27.ffn_down_shexp.weight q8_0 +blk.27.ffn_gate_shexp.weight q8_0 +blk.27.ffn_up_shexp.weight q8_0 +blk.28.attn_gate.weight q8_0 +blk.28.attn_k.weight q8_0 +blk.28.attn_output.weight q8_0 +blk.28.attn_q.weight q8_0 +blk.28.attn_v.weight q8_0 +blk.28.ffn_down_shexp.weight q8_0 +blk.28.ffn_gate_shexp.weight q8_0 +blk.28.ffn_up_shexp.weight q8_0 +blk.29.attn_gate.weight q8_0 +blk.29.attn_k.weight q8_0 +blk.29.attn_output.weight q8_0 +blk.29.attn_q.weight q8_0 +blk.29.attn_v.weight q8_0 +blk.29.ffn_down_shexp.weight q8_0 +blk.29.ffn_gate_shexp.weight q8_0 +blk.29.ffn_up_shexp.weight q8_0 +blk.30.attn_gate.weight q8_0 +blk.30.attn_k.weight q8_0 +blk.30.attn_output.weight q8_0 +blk.30.attn_q.weight q8_0 +blk.30.attn_v.weight q8_0 +blk.30.ffn_down_shexp.weight q8_0 +blk.30.ffn_gate_shexp.weight q8_0 +blk.30.ffn_up_shexp.weight q8_0 +blk.31.attn_gate.weight q8_0 +blk.31.attn_k.weight q8_0 +blk.31.attn_output.weight q8_0 +blk.31.attn_q.weight q8_0 +blk.31.attn_v.weight q8_0 +blk.31.ffn_down_shexp.weight q8_0 +blk.31.ffn_gate_shexp.weight q8_0 +blk.31.ffn_up_shexp.weight q8_0 +blk.32.attn_gate.weight q8_0 +blk.32.attn_k.weight q8_0 +blk.32.attn_output.weight q8_0 +blk.32.attn_q.weight q8_0 +blk.32.attn_v.weight q8_0 +blk.32.ffn_down_shexp.weight q8_0 +blk.32.ffn_gate_shexp.weight q8_0 +blk.32.ffn_up_shexp.weight q8_0 +blk.33.attn_gate.weight q8_0 +blk.33.attn_k.weight q8_0 +blk.33.attn_output.weight q8_0 +blk.33.attn_q.weight q8_0 +blk.33.attn_v.weight q8_0 +blk.33.ffn_down_shexp.weight q8_0 +blk.33.ffn_gate_shexp.weight q8_0 +blk.33.ffn_up_shexp.weight q8_0 +blk.34.attn_gate.weight q8_0 +blk.34.attn_k.weight q8_0 +blk.34.attn_output.weight q8_0 +blk.34.attn_q.weight q8_0 +blk.34.attn_v.weight q8_0 +blk.34.ffn_down_shexp.weight q8_0 +blk.34.ffn_gate_shexp.weight q8_0 +blk.34.ffn_up_shexp.weight q8_0 +blk.35.attn_gate.weight q8_0 +blk.35.attn_k.weight q8_0 +blk.35.attn_output.weight q8_0 +blk.35.attn_q.weight q8_0 +blk.35.attn_v.weight q8_0 +blk.35.ffn_down_shexp.weight q8_0 +blk.35.ffn_gate_shexp.weight q8_0 +blk.35.ffn_up_shexp.weight q8_0 +blk.36.attn_gate.weight q8_0 +blk.36.attn_k.weight q8_0 +blk.36.attn_output.weight q8_0 +blk.36.attn_q.weight q8_0 +blk.36.attn_v.weight q8_0 +blk.36.ffn_down_shexp.weight q8_0 +blk.36.ffn_gate_shexp.weight q8_0 +blk.36.ffn_up_shexp.weight q8_0 +blk.37.attn_gate.weight q8_0 +blk.37.attn_k.weight q8_0 +blk.37.attn_output.weight q8_0 +blk.37.attn_q.weight q8_0 +blk.37.attn_v.weight q8_0 +blk.37.ffn_down_shexp.weight q8_0 +blk.37.ffn_gate_shexp.weight q8_0 +blk.37.ffn_up_shexp.weight q8_0 +blk.38.attn_gate.weight q8_0 +blk.38.attn_k.weight q8_0 +blk.38.attn_output.weight q8_0 +blk.38.attn_q.weight q8_0 +blk.38.attn_v.weight q8_0 +blk.38.ffn_down_shexp.weight q8_0 +blk.38.ffn_gate_shexp.weight q8_0 +blk.38.ffn_up_shexp.weight q8_0 +blk.39.attn_gate.weight q8_0 +blk.39.attn_k.weight q8_0 +blk.39.attn_output.weight q8_0 +blk.39.attn_q.weight q8_0 +blk.39.attn_v.weight q8_0 +blk.39.ffn_down_shexp.weight q8_0 +blk.39.ffn_gate_shexp.weight q8_0 +blk.39.ffn_up_shexp.weight q8_0 +blk.40.attn_gate.weight q8_0 +blk.40.attn_k.weight q8_0 +blk.40.attn_output.weight q8_0 +blk.40.attn_q.weight q8_0 +blk.40.attn_v.weight q8_0 +blk.40.ffn_down_shexp.weight q8_0 +blk.40.ffn_gate_shexp.weight q8_0 +blk.40.ffn_up_shexp.weight q8_0 +blk.41.attn_gate.weight q8_0 +blk.41.attn_k.weight q8_0 +blk.41.attn_output.weight q8_0 +blk.41.attn_q.weight q8_0 +blk.41.attn_v.weight q8_0 +blk.41.ffn_down_shexp.weight q8_0 +blk.41.ffn_gate_shexp.weight q8_0 +blk.41.ffn_up_shexp.weight q8_0 +blk.42.attn_gate.weight q8_0 +blk.42.attn_k.weight q8_0 +blk.42.attn_output.weight q8_0 +blk.42.attn_q.weight q8_0 +blk.42.attn_v.weight q8_0 +blk.42.ffn_down_shexp.weight q8_0 +blk.42.ffn_gate_shexp.weight q8_0 +blk.42.ffn_up_shexp.weight q8_0 +blk.43.attn_gate.weight q8_0 +blk.43.attn_k.weight q8_0 +blk.43.attn_output.weight q8_0 +blk.43.attn_q.weight q8_0 +blk.43.attn_v.weight q8_0 +blk.43.ffn_down_shexp.weight q8_0 +blk.43.ffn_gate_shexp.weight q8_0 +blk.43.ffn_up_shexp.weight q8_0 +blk.44.attn_gate.weight q8_0 +blk.44.attn_k.weight q8_0 +blk.44.attn_output.weight q8_0 +blk.44.attn_q.weight q8_0 +blk.44.attn_v.weight q8_0 +blk.44.ffn_down_shexp.weight q8_0 +blk.44.ffn_gate_shexp.weight q8_0 +blk.44.ffn_up_shexp.weight q8_0 diff --git a/tests/test-quant-type-selection.cpp b/tests/test-quant-type-selection.cpp index 4ecb4c09ac..26d04f7c38 100644 --- a/tests/test-quant-type-selection.cpp +++ b/tests/test-quant-type-selection.cpp @@ -190,7 +190,7 @@ static const remote_model_spec model_specs[] = { { "ggml-org/gpt-oss-120b-GGUF", "mxfp4" }, { "ggml-org/gemma-3-4b-it-GGUF", "Q8_0" }, { "bartowski/Meta-Llama-3.1-70B-Instruct-GGUF", "Q4_K_M" }, - { "bartowski/deepseek-ai_DeepSeek-V3.1-GGUF", "IQ1_S" }, + { "bartowski/deepseek-ai_DeepSeek-V3.1-GGUF", "IQ1_M" }, { "bartowski/Qwen_Qwen3.5-397B-A17B-GGUF", "IQ1_S" }, // TODO: swap with ggml-org if/when it's released { "bartowski/Qwen_Qwen3.5-27B-GGUF", "Q8_0" }, // TODO: swap with ggml-org if/when it's released }; From 363e6d3f0b4972c3b844c139501c5515208299aa Mon Sep 17 00:00:00 2001 From: Colin Kealty <3266127+bartowski1182@users.noreply.github.com> Date: Wed, 4 Mar 2026 14:05:29 -0500 Subject: [PATCH 03/21] clang formatter changes --- tests/test-quant-type-selection.cpp | 156 +++++++++++++--------------- 1 file changed, 73 insertions(+), 83 deletions(-) diff --git a/tests/test-quant-type-selection.cpp b/tests/test-quant-type-selection.cpp index 26d04f7c38..a50e982eca 100644 --- a/tests/test-quant-type-selection.cpp +++ b/tests/test-quant-type-selection.cpp @@ -1,12 +1,10 @@ -#include "ggml.h" -#include "ggml-cpp.h" -#include "llama.h" - #include "../src/llama-arch.h" #include "../src/llama-model.h" #include "../src/llama-quant.h" - +#include "ggml-cpp.h" +#include "ggml.h" #include "gguf-model-data.h" +#include "llama.h" #include #include @@ -27,8 +25,11 @@ struct mock_tensor { ggml_tensor * tensor; }; -static mock_tensor make_mock_tensor(const std::string & name, int64_t ne0, int64_t ne1, - int64_t ne2 = 1, int64_t ne3 = 1) { +static mock_tensor make_mock_tensor(const std::string & name, + int64_t ne0, + int64_t ne1, + int64_t ne2 = 1, + int64_t ne3 = 1) { struct ggml_init_params params = { /*.mem_size =*/ 2 * ggml_tensor_overhead(), /*.mem_buffer =*/ nullptr, @@ -49,9 +50,9 @@ static mock_tensor make_mock_tensor(const std::string & name, int64_t ne0, int64 static ggml_type ggml_type_from_name(const std::string & name) { for (int i = 0; i < GGML_TYPE_COUNT; i++) { - const char * tname = ggml_type_name((ggml_type)i); + const char * tname = ggml_type_name((ggml_type) i); if (tname && name == tname) { - return (ggml_type)i; + return (ggml_type) i; } } return GGML_TYPE_COUNT; @@ -62,8 +63,8 @@ static ggml_type ggml_type_from_name(const std::string & name) { // --------------------------------------------------------------------------- struct snapshot_section { - llama_ftype ftype; - ggml_type default_type; + llama_ftype ftype; + ggml_type default_type; std::vector> overrides; }; @@ -76,7 +77,7 @@ static bool parse_snapshot_file(const std::string & path, std::vectoroverrides.push_back({tname, gt}); + cur->overrides.push_back({ tname, gt }); } return true; @@ -181,21 +183,21 @@ static std::string snapshot_file_from_name(const std::string & name) { } static const remote_model_spec model_specs[] = { - { "ggml-org/Qwen3-0.6B-GGUF", "Q8_0" }, - { "ggml-org/GLM-4.6V-GGUF", "Q8_0" }, - { "ggml-org/Step-3.5-Flash-GGUF", "Q4_K" }, - { "ggml-org/Qwen3-Coder-Next-GGUF", "Q8_0" }, - { "ggml-org/Qwen3-14B-GGUF", "Q8_0" }, - { "ggml-org/Nemotron-Nano-3-30B-A3B-GGUF", "Q8_0" }, - { "ggml-org/gpt-oss-120b-GGUF", "mxfp4" }, - { "ggml-org/gemma-3-4b-it-GGUF", "Q8_0" }, - { "bartowski/Meta-Llama-3.1-70B-Instruct-GGUF", "Q4_K_M" }, - { "bartowski/deepseek-ai_DeepSeek-V3.1-GGUF", "IQ1_M" }, - { "bartowski/Qwen_Qwen3.5-397B-A17B-GGUF", "IQ1_S" }, // TODO: swap with ggml-org if/when it's released - { "bartowski/Qwen_Qwen3.5-27B-GGUF", "Q8_0" }, // TODO: swap with ggml-org if/when it's released + { "ggml-org/Qwen3-0.6B-GGUF", "Q8_0" }, + { "ggml-org/GLM-4.6V-GGUF", "Q8_0" }, + { "ggml-org/Step-3.5-Flash-GGUF", "Q4_K" }, + { "ggml-org/Qwen3-Coder-Next-GGUF", "Q8_0" }, + { "ggml-org/Qwen3-14B-GGUF", "Q8_0" }, + { "ggml-org/Nemotron-Nano-3-30B-A3B-GGUF", "Q8_0" }, + { "ggml-org/gpt-oss-120b-GGUF", "mxfp4" }, + { "ggml-org/gemma-3-4b-it-GGUF", "Q8_0" }, + { "bartowski/Meta-Llama-3.1-70B-Instruct-GGUF", "Q4_K_M" }, + { "bartowski/deepseek-ai_DeepSeek-V3.1-GGUF", "IQ1_M" }, + { "bartowski/Qwen_Qwen3.5-397B-A17B-GGUF", "IQ1_S" }, // TODO: swap with ggml-org if/when it's released + { "bartowski/Qwen_Qwen3.5-27B-GGUF", "Q8_0" }, // TODO: swap with ggml-org if/when it's released }; -static const int n_model_specs = (int)(sizeof(model_specs) / sizeof(model_specs[0])); +static const int n_model_specs = (int) (sizeof(model_specs) / sizeof(model_specs[0])); // Determine llm_type from metadata. // Only LLM_TYPE_70B matters -> probably can/should be dropped in the future @@ -208,7 +210,7 @@ static llm_type infer_llm_type(llm_arch arch, const gguf_remote_model & remote) static std::unique_ptr build_mock_model_from_remote(const gguf_remote_model & remote) { struct llama_model_params mparams = llama_model_default_params(); - auto model = std::make_unique(mparams); + auto model = std::make_unique(mparams); model->arch = llm_arch_from_string(remote.architecture); model->type = infer_llm_type(model->arch, remote); @@ -228,10 +230,9 @@ static std::unique_ptr build_mock_model_from_remote(const gguf_remo return model; } -static std::vector build_mock_tensors( - const gguf_remote_model & remote, - llm_arch arch, - const llama_model_quantize_params & qparams) { +static std::vector build_mock_tensors(const gguf_remote_model & remote, + llm_arch arch, + const llama_model_quantize_params & qparams) { std::vector result; for (const auto & t : remote.tensors) { @@ -259,12 +260,11 @@ static std::string read_file_contents(const std::string & path) { // --------------------------------------------------------------------------- // Returns {tensor_name, assigned_type} for each tensor, in order. -static std::vector> compute_quant_types( - llama_model & mdl, - const std::vector & tensors, - llama_ftype ftype) { +static std::vector> compute_quant_types(llama_model & mdl, + const std::vector & tensors, + llama_ftype ftype) { llama_model_quantize_params qparams = llama_model_quantize_default_params(); - qparams.ftype = ftype; + qparams.ftype = ftype; quantize_state_impl qs(mdl, &qparams); @@ -282,7 +282,7 @@ static std::vector> compute_quant_types( for (const auto & mt : tensors) { ggml_type got = llama_tensor_get_type(qs, default_type, mt.tensor, ftype); - result.push_back({mt.tensor->name, got}); + result.push_back({ mt.tensor->name, got }); } return result; @@ -293,27 +293,23 @@ static std::vector> compute_quant_types( // Use this when either adding new models or modifying quants // --------------------------------------------------------------------------- -static std::string generate_snapshot(const std::string & name, - const gguf_remote_model & remote, - llama_model & mdl, +static std::string generate_snapshot(const std::string & name, + const gguf_remote_model & remote, + llama_model & mdl, const std::vector & tensors) { std::ostringstream out; out << "# Model: " << name << "\n"; - out << "# n_embd=" << remote.n_embd - << ", n_ff=" << remote.n_ff - << ", n_vocab=" << remote.n_vocab - << ", n_layer=" << remote.n_layer - << ", n_head=" << remote.n_head - << ", n_head_kv=" << remote.n_head_kv; + out << "# n_embd=" << remote.n_embd << ", n_ff=" << remote.n_ff << ", n_vocab=" << remote.n_vocab + << ", n_layer=" << remote.n_layer << ", n_head=" << remote.n_head << ", n_head_kv=" << remote.n_head_kv; if (remote.n_expert > 0) { out << ", n_expert=" << remote.n_expert; } out << "\n"; for (int i = 0; i < LLAMA_FTYPE_GUESSED; i++) { - llama_ftype ft = (llama_ftype)i; - ggml_type default_type = llama_ftype_default_type(ft); + llama_ftype ft = (llama_ftype) i; + ggml_type default_type = llama_ftype_default_type(ft); if (default_type == GGML_TYPE_COUNT) { continue; } @@ -350,7 +346,7 @@ static int run_generate(const std::string & snapshot_dir) { for (int m = 0; m < n_model_specs; m++) { const auto & spec = model_specs[m]; - std::string name = model_name_from_repo(spec.repo); + std::string name = model_name_from_repo(spec.repo); fprintf(stderr, "Fetching model metadata for %s from %s...\n", name.c_str(), spec.repo); auto result = gguf_fetch_model_meta(spec.repo, spec.quant); @@ -359,10 +355,10 @@ static int run_generate(const std::string & snapshot_dir) { return 1; } - const auto & remote = result.value(); - auto model = build_mock_model_from_remote(remote); + const auto & remote = result.value(); + auto model = build_mock_model_from_remote(remote); llama_model_quantize_params qparams = llama_model_quantize_default_params(); - auto tensors = build_mock_tensors(remote, model->arch, qparams); + auto tensors = build_mock_tensors(remote, model->arch, qparams); std::string content = generate_snapshot(name, remote, *model, tensors); std::string path = snapshot_dir + "/" + snapshot_file_from_name(name) + ".schema"; @@ -385,16 +381,14 @@ static int run_generate(const std::string & snapshot_dir) { // Test mode: compare against snapshot files // --------------------------------------------------------------------------- -static bool run_test_section(llama_model & mdl, +static bool run_test_section(llama_model & mdl, const std::vector & tensors, - const snapshot_section & section) { + const snapshot_section & section) { // verify default_type matches what llama_ftype_default_type returns ggml_type computed_default = llama_ftype_default_type(section.ftype); if (computed_default != section.default_type) { - printf(" FAIL [%s] default type mismatch: file says %s, code says %s\n", - llama_ftype_to_name(section.ftype), - ggml_type_name(section.default_type), - ggml_type_name(computed_default)); + printf(" FAIL [%s] default type mismatch: file says %s, code says %s\n", llama_ftype_to_name(section.ftype), + ggml_type_name(section.default_type), ggml_type_name(computed_default)); return false; } @@ -402,28 +396,26 @@ static bool run_test_section(llama_model & mdl, std::map override_map(section.overrides.begin(), section.overrides.end()); - bool all_pass = true; - int n_override_found = 0; + bool all_pass = true; + int n_override_found = 0; for (const auto & [name, got] : types) { ggml_type expected = section.default_type; - auto it = override_map.find(name); + auto it = override_map.find(name); if (it != override_map.end()) { expected = it->second; n_override_found++; } if (got != expected) { - printf(" FAIL %-50s expected %s, got %s\n", - name.c_str(), ggml_type_name(expected), ggml_type_name(got)); + printf(" FAIL %-50s expected %s, got %s\n", name.c_str(), ggml_type_name(expected), ggml_type_name(got)); all_pass = false; } } - if (n_override_found != (int)section.overrides.size()) { - printf(" FAIL [%s] override count mismatch: listed %d, matched %d\n", - llama_ftype_to_name(section.ftype), - (int)section.overrides.size(), n_override_found); + if (n_override_found != (int) section.overrides.size()) { + printf(" FAIL [%s] override count mismatch: listed %d, matched %d\n", llama_ftype_to_name(section.ftype), + (int) section.overrides.size(), n_override_found); all_pass = false; } @@ -437,7 +429,7 @@ static int run_remote_tests(const std::string & snapshot_dir, const char * argv0 for (int m = 0; m < n_model_specs; m++) { const auto & spec = model_specs[m]; - std::string name = model_name_from_repo(spec.repo); + std::string name = model_name_from_repo(spec.repo); printf("=== %s ===\n", name.c_str()); fprintf(stderr, "Fetching model metadata for %s from %s...\n", name.c_str(), spec.repo); @@ -448,12 +440,12 @@ static int run_remote_tests(const std::string & snapshot_dir, const char * argv0 continue; } - const auto & remote = result.value(); - auto model = build_mock_model_from_remote(remote); + const auto & remote = result.value(); + auto model = build_mock_model_from_remote(remote); llama_model_quantize_params qparams = llama_model_quantize_default_params(); - auto tensors = build_mock_tensors(remote, model->arch, qparams); + auto tensors = build_mock_tensors(remote, model->arch, qparams); - std::string snapshot_path = snapshot_dir + "/" + snapshot_file_from_name(name) + ".schema"; + std::string snapshot_path = snapshot_dir + "/" + snapshot_file_from_name(name) + ".schema"; std::vector sections; if (!parse_snapshot_file(snapshot_path, sections)) { printf(" SKIP (could not read snapshot file: %s)\n\n", snapshot_path.c_str()); @@ -473,10 +465,8 @@ static int run_remote_tests(const std::string & snapshot_dir, const char * argv0 } } - printf(" %s %s: %d/%d ftype sections passed (%d tensors)\n", - model_fail == 0 ? "PASS" : "FAIL", - name.c_str(), model_pass, model_pass + model_fail, - (int)tensors.size()); + printf(" %s %s: %d/%d ftype sections passed (%d tensors)\n", model_fail == 0 ? "PASS" : "FAIL", name.c_str(), + model_pass, model_pass + model_fail, (int) tensors.size()); printf("\n"); if (model_fail == 0) { @@ -502,7 +492,7 @@ static int run_remote_tests(const std::string & snapshot_dir, const char * argv0 int main(int argc, char ** argv) { std::string snapshot_dir = SNAPSHOT_DIR; - bool generate = false; + bool generate = false; for (int i = 1; i < argc; i++) { if (strcmp(argv[i], "--snapshot-dir") == 0 && i + 1 < argc) { From 0a89cdae09105be17afda59ab4c7eb73a9ff9bad Mon Sep 17 00:00:00 2001 From: Colin Kealty <3266127+bartowski1182@users.noreply.github.com> Date: Wed, 4 Mar 2026 14:21:21 -0500 Subject: [PATCH 04/21] Trailing whitespace --- tests/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index ced1f3943c..ddd35c2041 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -274,7 +274,7 @@ if (TARGET cpp-httplib) add_executable(test-gguf-model-data test-gguf-model-data.cpp) target_link_libraries(test-gguf-model-data PRIVATE gguf-model-data common) llama_test(test-gguf-model-data LABEL "model") - + # test-quant-type-selection requires gguf-model-data for remote model metadata llama_build_and_test(test-quant-type-selection.cpp LABEL "model") target_link_libraries(test-quant-type-selection PRIVATE gguf-model-data) From 86103e7e065c4286b68ef710f8eb5cf938a60b43 Mon Sep 17 00:00:00 2001 From: Colin Kealty <3266127+bartowski1182@users.noreply.github.com> Date: Mon, 9 Mar 2026 15:46:31 -0400 Subject: [PATCH 05/21] Update name --- src/llama-quant.h | 2 +- tests/test-quant-type-selection.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/llama-quant.h b/src/llama-quant.h index 059f68fd09..fc7da927cd 100644 --- a/src/llama-quant.h +++ b/src/llama-quant.h @@ -39,7 +39,7 @@ struct quantize_state_impl { }; ggml_type llama_tensor_get_type(quantize_state_impl & qs, ggml_type new_type, const ggml_tensor * tensor, llama_ftype ftype); -ggml_type llama_ftype_default_type(llama_ftype ftype); +ggml_type llama_ftype_get_default_type(llama_ftype ftype); // Ftype name <-> enum conversions. // Returns (llama_ftype)-1 on failure. diff --git a/tests/test-quant-type-selection.cpp b/tests/test-quant-type-selection.cpp index a50e982eca..8dbfe34567 100644 --- a/tests/test-quant-type-selection.cpp +++ b/tests/test-quant-type-selection.cpp @@ -275,7 +275,7 @@ static std::vector> compute_quant_types(llama_ } init_quantize_state_counters(qs, names); - ggml_type default_type = llama_ftype_default_type(ftype); + ggml_type default_type = llama_ftype_get_default_type(ftype); std::vector> result; result.reserve(tensors.size()); From 86273028d12e8dfc18e5752cb461f19a36d40826 Mon Sep 17 00:00:00 2001 From: Colin Kealty <3266127+bartowski1182@users.noreply.github.com> Date: Mon, 9 Mar 2026 16:11:36 -0400 Subject: [PATCH 06/21] Start rebase --- src/llama-quant.h | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/src/llama-quant.h b/src/llama-quant.h index fc7da927cd..7c89d81d70 100644 --- a/src/llama-quant.h +++ b/src/llama-quant.h @@ -29,13 +29,23 @@ struct quantize_state_impl { bool has_imatrix = false; - // used to figure out if a model shares tok_embd with the output weight - bool has_output = false; + // used to figure out if a model has tied embeddings (tok_embd shares weights with output) + bool has_tied_embeddings = false; // assume tied until we see output.weight - quantize_state_impl(const llama_model & model, const llama_model_quantize_params * params) - : model(model) - , params(params) - {} + // tensor type override patterns (compiled once, used twice) + std::vector> tensor_type_patterns; + + quantize_state_impl(const llama_model & model, const llama_model_quantize_params * params): + model(model), params(params) + { + // compile regex patterns once - they are expensive + if (params->tensor_types) { + const auto & tensor_types = *static_cast *>(params->tensor_types); + for (const auto & [tname, qtype] : tensor_types) { + tensor_type_patterns.emplace_back(std::regex(tname), qtype); + } + } + } }; ggml_type llama_tensor_get_type(quantize_state_impl & qs, ggml_type new_type, const ggml_tensor * tensor, llama_ftype ftype); From 6e414fc2d6bb8b443944e82358a693ac3064f296 Mon Sep 17 00:00:00 2001 From: Colin Kealty <3266127+bartowski1182@users.noreply.github.com> Date: Tue, 10 Mar 2026 10:41:22 -0400 Subject: [PATCH 07/21] Updating files with upstream changes prior to rebase --- src/llama-quant.h | 10 +++++++++- tests/test-quant-type-selection.cpp | 8 ++++---- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/src/llama-quant.h b/src/llama-quant.h index 7c89d81d70..f3c1290c58 100644 --- a/src/llama-quant.h +++ b/src/llama-quant.h @@ -6,11 +6,19 @@ #include "llama-arch.h" +#include #include #include struct llama_model; +// result of parsing --tensor-type option +// (changes to this struct must be reflected in tools/quantize/quantize.cpp) +struct tensor_type_option { + std::string name; + ggml_type type = GGML_TYPE_COUNT; +}; + struct quantize_state_impl { const llama_model & model; const llama_model_quantize_params * params; @@ -30,7 +38,7 @@ struct quantize_state_impl { bool has_imatrix = false; // used to figure out if a model has tied embeddings (tok_embd shares weights with output) - bool has_tied_embeddings = false; // assume tied until we see output.weight + bool has_tied_embeddings = true; // assume tied until we see output.weight // tensor type override patterns (compiled once, used twice) std::vector> tensor_type_patterns; diff --git a/tests/test-quant-type-selection.cpp b/tests/test-quant-type-selection.cpp index 8dbfe34567..3384e4af11 100644 --- a/tests/test-quant-type-selection.cpp +++ b/tests/test-quant-type-selection.cpp @@ -309,7 +309,7 @@ static std::string generate_snapshot(const std::string & name, for (int i = 0; i < LLAMA_FTYPE_GUESSED; i++) { llama_ftype ft = (llama_ftype) i; - ggml_type default_type = llama_ftype_default_type(ft); + ggml_type default_type = llama_ftype_get_default_type(ft); if (default_type == GGML_TYPE_COUNT) { continue; } @@ -384,8 +384,8 @@ static int run_generate(const std::string & snapshot_dir) { static bool run_test_section(llama_model & mdl, const std::vector & tensors, const snapshot_section & section) { - // verify default_type matches what llama_ftype_default_type returns - ggml_type computed_default = llama_ftype_default_type(section.ftype); + // verify default_type matches what llama_ftype_get_default_type returns + ggml_type computed_default = llama_ftype_get_default_type(section.ftype); if (computed_default != section.default_type) { printf(" FAIL [%s] default type mismatch: file says %s, code says %s\n", llama_ftype_to_name(section.ftype), ggml_type_name(section.default_type), ggml_type_name(computed_default)); @@ -408,7 +408,7 @@ static bool run_test_section(llama_model & mdl, } if (got != expected) { - printf(" FAIL %-50s expected %s, got %s\n", name.c_str(), ggml_type_name(expected), ggml_type_name(got)); + printf(" FAIL [%s] %-50s expected %s, got %s\n", llama_ftype_to_name(section.ftype), name.c_str(), ggml_type_name(expected), ggml_type_name(got)); all_pass = false; } } From 2015dea82076254da7c2c8c77da9e1a2679854a4 Mon Sep 17 00:00:00 2001 From: Colin Kealty <3266127+bartowski1182@users.noreply.github.com> Date: Tue, 10 Mar 2026 11:14:54 -0400 Subject: [PATCH 08/21] Changes needed from rebase --- tests/test-quant-type-selection.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test-quant-type-selection.cpp b/tests/test-quant-type-selection.cpp index 3384e4af11..d5e06173f6 100644 --- a/tests/test-quant-type-selection.cpp +++ b/tests/test-quant-type-selection.cpp @@ -216,8 +216,8 @@ static std::unique_ptr build_mock_model_from_remote(const gguf_remo model->type = infer_llm_type(model->arch, remote); model->hparams.n_embd = remote.n_embd; - model->hparams.n_embd_head_k = remote.n_embd_head_k; - model->hparams.n_embd_head_v = remote.n_embd_head_v; + model->hparams.n_embd_head_k_full = remote.n_embd_head_k; + model->hparams.n_embd_head_v_full = remote.n_embd_head_v; model->hparams.n_layer = remote.n_layer; model->hparams.n_expert = remote.n_expert; @@ -408,7 +408,7 @@ static bool run_test_section(llama_model & mdl, } if (got != expected) { - printf(" FAIL [%s] %-50s expected %s, got %s\n", llama_ftype_to_name(section.ftype), name.c_str(), ggml_type_name(expected), ggml_type_name(got)); + printf(" FAIL %-50s expected %s, got %s\n", name.c_str(), ggml_type_name(expected), ggml_type_name(got)); all_pass = false; } } From 544745c03419ad004271bb16d1d53c64535ffbed Mon Sep 17 00:00:00 2001 From: Colin Kealty <3266127+bartowski1182@users.noreply.github.com> Date: Tue, 10 Mar 2026 11:24:24 -0400 Subject: [PATCH 09/21] Update attn_qkv schema, change throw behaviour --- src/llama-quant.cpp | 7 +- tests/snapshots/qwen3-coder-next.schema | 634 ++++++++++++++--- tests/snapshots/qwen3.5-27b.schema | 842 +++++++++++++++++++---- tests/snapshots/qwen3.5-397b-a17b.schema | 791 +++++++++++++++++---- 4 files changed, 1907 insertions(+), 367 deletions(-) diff --git a/src/llama-quant.cpp b/src/llama-quant.cpp index 7415328c24..79c57832bf 100644 --- a/src/llama-quant.cpp +++ b/src/llama-quant.cpp @@ -355,7 +355,7 @@ static ggml_type tensor_type_fallback(quantize_state_impl & qs, const ggml_tenso return return_type; } -ggml_type llama_tensor_get_type_impl(quantize_state_impl & qs, ggml_type new_type, const ggml_tensor * tensor, llama_ftype ftype, tensor_category category) { +static ggml_type llama_tensor_get_type_impl(quantize_state_impl & qs, ggml_type new_type, const ggml_tensor * tensor, llama_ftype ftype, tensor_category category) { const std::string name = ggml_get_name(tensor); // TODO: avoid hardcoded tensor names - use the TN_* constants @@ -782,7 +782,7 @@ ggml_type llama_ftype_get_default_type(llama_ftype ftype) { case LLAMA_FTYPE_MOSTLY_IQ3_S: case LLAMA_FTYPE_MOSTLY_IQ3_M: return GGML_TYPE_IQ3_S; - default: throw std::runtime_error(format("invalid output file type %d\n", ftype)); + default: return GGML_TYPE_COUNT; } } @@ -874,6 +874,9 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std:: } ggml_type default_type = llama_ftype_get_default_type(ftype); + if (default_type == GGML_TYPE_COUNT) { + throw std::runtime_error(format("invalid output file type %d\n", ftype)); + } // mmap consistently increases speed on Linux, and also increases speed on Windows with // hot cache. It may cause a slowdown on macOS, possibly related to free memory. diff --git a/tests/snapshots/qwen3-coder-next.schema b/tests/snapshots/qwen3-coder-next.schema index d71fe86a1f..4f12721f16 100644 --- a/tests/snapshots/qwen3-coder-next.schema +++ b/tests/snapshots/qwen3-coder-next.schema @@ -22,29 +22,38 @@ output.weight q6_K output.weight q6_K [Q2_K] q2_K +blk.0.attn_qkv.weight q4_K blk.0.ffn_down_exps.weight q3_K blk.0.ffn_down_shexp.weight q3_K +blk.1.attn_qkv.weight q4_K blk.1.ffn_down_shexp.weight q3_K blk.1.ffn_down_exps.weight q3_K +blk.2.attn_qkv.weight q4_K blk.2.ffn_down_shexp.weight q3_K blk.2.ffn_down_exps.weight q3_K blk.3.ffn_down_shexp.weight q3_K blk.3.attn_output.weight q3_K blk.3.attn_v.weight q4_K blk.3.ffn_down_exps.weight q3_K +blk.4.attn_qkv.weight q4_K blk.4.ffn_down_shexp.weight q3_K blk.4.ffn_down_exps.weight q3_K +blk.5.attn_qkv.weight q4_K blk.5.ffn_down_shexp.weight q3_K blk.5.ffn_down_exps.weight q3_K +blk.6.attn_qkv.weight q4_K blk.6.ffn_down_exps.weight q3_K blk.6.ffn_down_shexp.weight q3_K blk.7.ffn_down_shexp.weight q3_K blk.7.attn_output.weight q3_K blk.7.attn_v.weight q4_K blk.7.ffn_down_exps.weight q3_K +blk.8.attn_qkv.weight q4_K blk.8.ffn_down_shexp.weight q3_K blk.8.ffn_down_exps.weight q3_K +blk.9.attn_qkv.weight q4_K blk.9.ffn_down_shexp.weight q3_K +blk.10.attn_qkv.weight q4_K blk.10.ffn_down_shexp.weight q3_K blk.9.ffn_down_exps.weight q3_K blk.10.ffn_down_exps.weight q3_K @@ -52,90 +61,117 @@ blk.11.ffn_down_shexp.weight q3_K blk.11.attn_output.weight q3_K blk.11.attn_v.weight q4_K blk.11.ffn_down_exps.weight q3_K +blk.12.attn_qkv.weight q4_K blk.12.ffn_down_exps.weight q3_K blk.12.ffn_down_shexp.weight q3_K +blk.13.attn_qkv.weight q4_K blk.13.ffn_down_shexp.weight q3_K blk.13.ffn_down_exps.weight q3_K +blk.14.attn_qkv.weight q4_K blk.14.ffn_down_shexp.weight q3_K blk.14.ffn_down_exps.weight q3_K blk.15.ffn_down_shexp.weight q3_K blk.15.attn_output.weight q3_K blk.15.attn_v.weight q4_K blk.15.ffn_down_exps.weight q3_K +blk.16.attn_qkv.weight q4_K blk.16.ffn_down_shexp.weight q3_K blk.16.ffn_down_exps.weight q3_K +blk.17.attn_qkv.weight q4_K blk.17.ffn_down_exps.weight q3_K blk.17.ffn_down_shexp.weight q3_K +blk.18.attn_qkv.weight q4_K blk.18.ffn_down_shexp.weight q3_K blk.18.ffn_down_exps.weight q3_K blk.19.ffn_down_shexp.weight q3_K blk.19.attn_output.weight q3_K blk.19.attn_v.weight q4_K blk.19.ffn_down_exps.weight q3_K +blk.20.attn_qkv.weight q4_K blk.20.ffn_down_shexp.weight q3_K blk.20.ffn_down_exps.weight q3_K +blk.21.attn_qkv.weight q4_K blk.21.ffn_down_shexp.weight q3_K blk.21.ffn_down_exps.weight q3_K +blk.22.attn_qkv.weight q4_K blk.22.ffn_down_shexp.weight q3_K blk.22.ffn_down_exps.weight q3_K blk.23.ffn_down_exps.weight q3_K blk.23.ffn_down_shexp.weight q3_K blk.23.attn_output.weight q3_K blk.23.attn_v.weight q4_K +blk.24.attn_qkv.weight q4_K blk.24.ffn_down_shexp.weight q3_K blk.24.ffn_down_exps.weight q3_K +blk.25.attn_qkv.weight q4_K blk.25.ffn_down_shexp.weight q3_K blk.25.ffn_down_exps.weight q3_K +blk.26.attn_qkv.weight q4_K blk.26.ffn_down_shexp.weight q3_K blk.26.ffn_down_exps.weight q3_K blk.27.ffn_down_shexp.weight q3_K blk.27.attn_output.weight q3_K blk.27.attn_v.weight q4_K blk.27.ffn_down_exps.weight q3_K +blk.28.attn_qkv.weight q4_K blk.28.ffn_down_shexp.weight q3_K blk.28.ffn_down_exps.weight q3_K +blk.29.attn_qkv.weight q4_K blk.29.ffn_down_exps.weight q3_K blk.29.ffn_down_shexp.weight q3_K +blk.30.attn_qkv.weight q4_K blk.30.ffn_down_shexp.weight q3_K blk.30.ffn_down_exps.weight q3_K blk.31.ffn_down_shexp.weight q3_K blk.31.attn_output.weight q3_K blk.31.attn_v.weight q4_K blk.31.ffn_down_exps.weight q3_K +blk.32.attn_qkv.weight q4_K blk.32.ffn_down_shexp.weight q3_K blk.32.ffn_down_exps.weight q3_K +blk.33.attn_qkv.weight q4_K blk.33.ffn_down_shexp.weight q3_K blk.33.ffn_down_exps.weight q3_K +blk.34.attn_qkv.weight q4_K blk.34.ffn_down_exps.weight q3_K blk.34.ffn_down_shexp.weight q3_K blk.35.ffn_down_shexp.weight q3_K blk.35.attn_output.weight q3_K blk.35.attn_v.weight q4_K blk.35.ffn_down_exps.weight q3_K +blk.36.attn_qkv.weight q4_K blk.36.ffn_down_shexp.weight q3_K blk.36.ffn_down_exps.weight q3_K +blk.37.attn_qkv.weight q4_K blk.37.ffn_down_shexp.weight q3_K blk.37.ffn_down_exps.weight q3_K +blk.38.attn_qkv.weight q4_K blk.38.ffn_down_shexp.weight q3_K blk.38.ffn_down_exps.weight q3_K blk.39.ffn_down_shexp.weight q3_K blk.39.attn_output.weight q3_K blk.39.attn_v.weight q4_K blk.39.ffn_down_exps.weight q3_K +blk.40.attn_qkv.weight q4_K blk.40.ffn_down_exps.weight q3_K blk.40.ffn_down_shexp.weight q3_K +blk.41.attn_qkv.weight q4_K blk.41.ffn_down_shexp.weight q3_K blk.41.ffn_down_exps.weight q3_K +blk.42.attn_qkv.weight q4_K blk.42.ffn_down_shexp.weight q3_K blk.42.ffn_down_exps.weight q3_K blk.43.ffn_down_shexp.weight q3_K blk.43.attn_output.weight q3_K blk.43.attn_v.weight q4_K blk.43.ffn_down_exps.weight q3_K +blk.44.attn_qkv.weight q4_K blk.44.ffn_down_shexp.weight q3_K blk.44.ffn_down_exps.weight q3_K +blk.45.attn_qkv.weight q4_K blk.45.ffn_down_shexp.weight q3_K blk.45.ffn_down_exps.weight q3_K +blk.46.attn_qkv.weight q4_K blk.46.ffn_down_exps.weight q3_K blk.46.ffn_down_shexp.weight q3_K blk.47.ffn_down_shexp.weight q3_K @@ -148,10 +184,10 @@ blk.47.ffn_down_exps.weight q3_K output.weight q6_K [Q3_K_M] q3_K -blk.0.attn_qkv.weight q4_K +blk.0.attn_qkv.weight q5_K blk.0.ffn_down_exps.weight q5_K blk.0.ffn_down_shexp.weight q5_K -blk.1.attn_qkv.weight q4_K +blk.1.attn_qkv.weight q5_K blk.1.ffn_down_shexp.weight q5_K blk.1.ffn_down_exps.weight q5_K blk.2.attn_qkv.weight q4_K @@ -159,7 +195,7 @@ blk.2.ffn_down_shexp.weight q5_K blk.2.ffn_down_exps.weight q5_K blk.3.ffn_down_shexp.weight q4_K blk.3.attn_output.weight q4_K -blk.3.attn_v.weight q5_K +blk.3.attn_v.weight q4_K blk.3.ffn_down_exps.weight q4_K blk.4.attn_qkv.weight q4_K blk.4.ffn_down_shexp.weight q4_K @@ -172,7 +208,7 @@ blk.6.ffn_down_exps.weight q4_K blk.6.ffn_down_shexp.weight q4_K blk.7.ffn_down_shexp.weight q4_K blk.7.attn_output.weight q4_K -blk.7.attn_v.weight q5_K +blk.7.attn_v.weight q4_K blk.7.ffn_down_exps.weight q4_K blk.8.attn_qkv.weight q4_K blk.8.ffn_down_shexp.weight q4_K @@ -307,38 +343,38 @@ output.weight q6_K blk.47.ffn_down_exps.weight q4_K [Q3_K_L] q3_K -blk.0.attn_qkv.weight q4_K +blk.0.attn_qkv.weight q5_K blk.0.ffn_down_exps.weight q5_K blk.0.ffn_down_shexp.weight q5_K -blk.1.attn_qkv.weight q4_K +blk.1.attn_qkv.weight q5_K blk.1.ffn_down_shexp.weight q5_K blk.1.ffn_down_exps.weight q5_K -blk.2.attn_qkv.weight q4_K +blk.2.attn_qkv.weight q5_K blk.2.ffn_down_shexp.weight q5_K blk.2.ffn_down_exps.weight q5_K blk.3.ffn_down_shexp.weight q5_K blk.3.attn_output.weight q5_K blk.3.attn_v.weight q5_K blk.3.ffn_down_exps.weight q5_K -blk.4.attn_qkv.weight q4_K +blk.4.attn_qkv.weight q5_K blk.4.ffn_down_shexp.weight q5_K blk.4.ffn_down_exps.weight q5_K -blk.5.attn_qkv.weight q4_K +blk.5.attn_qkv.weight q5_K blk.5.ffn_down_shexp.weight q5_K blk.5.ffn_down_exps.weight q5_K -blk.6.attn_qkv.weight q4_K +blk.6.attn_qkv.weight q5_K blk.6.ffn_down_exps.weight q5_K blk.6.ffn_down_shexp.weight q5_K blk.7.ffn_down_shexp.weight q5_K blk.7.attn_output.weight q5_K blk.7.attn_v.weight q5_K blk.7.ffn_down_exps.weight q5_K -blk.8.attn_qkv.weight q4_K +blk.8.attn_qkv.weight q5_K blk.8.ffn_down_shexp.weight q5_K blk.8.ffn_down_exps.weight q5_K -blk.9.attn_qkv.weight q4_K +blk.9.attn_qkv.weight q5_K blk.9.ffn_down_shexp.weight q5_K -blk.10.attn_qkv.weight q4_K +blk.10.attn_qkv.weight q5_K blk.10.ffn_down_shexp.weight q5_K blk.9.ffn_down_exps.weight q5_K blk.10.ffn_down_exps.weight q5_K @@ -346,117 +382,117 @@ blk.11.ffn_down_shexp.weight q5_K blk.11.attn_output.weight q5_K blk.11.attn_v.weight q5_K blk.11.ffn_down_exps.weight q5_K -blk.12.attn_qkv.weight q4_K +blk.12.attn_qkv.weight q5_K blk.12.ffn_down_exps.weight q5_K blk.12.ffn_down_shexp.weight q5_K -blk.13.attn_qkv.weight q4_K +blk.13.attn_qkv.weight q5_K blk.13.ffn_down_shexp.weight q5_K blk.13.ffn_down_exps.weight q5_K -blk.14.attn_qkv.weight q4_K +blk.14.attn_qkv.weight q5_K blk.14.ffn_down_shexp.weight q5_K blk.14.ffn_down_exps.weight q5_K blk.15.ffn_down_shexp.weight q5_K blk.15.attn_output.weight q5_K blk.15.attn_v.weight q5_K blk.15.ffn_down_exps.weight q5_K -blk.16.attn_qkv.weight q4_K +blk.16.attn_qkv.weight q5_K blk.16.ffn_down_shexp.weight q5_K blk.16.ffn_down_exps.weight q5_K -blk.17.attn_qkv.weight q4_K +blk.17.attn_qkv.weight q5_K blk.17.ffn_down_exps.weight q5_K blk.17.ffn_down_shexp.weight q5_K -blk.18.attn_qkv.weight q4_K +blk.18.attn_qkv.weight q5_K blk.18.ffn_down_shexp.weight q5_K blk.18.ffn_down_exps.weight q5_K blk.19.ffn_down_shexp.weight q5_K blk.19.attn_output.weight q5_K blk.19.attn_v.weight q5_K blk.19.ffn_down_exps.weight q5_K -blk.20.attn_qkv.weight q4_K +blk.20.attn_qkv.weight q5_K blk.20.ffn_down_shexp.weight q5_K blk.20.ffn_down_exps.weight q5_K -blk.21.attn_qkv.weight q4_K +blk.21.attn_qkv.weight q5_K blk.21.ffn_down_shexp.weight q5_K blk.21.ffn_down_exps.weight q5_K -blk.22.attn_qkv.weight q4_K +blk.22.attn_qkv.weight q5_K blk.22.ffn_down_shexp.weight q5_K blk.22.ffn_down_exps.weight q5_K blk.23.ffn_down_exps.weight q5_K blk.23.ffn_down_shexp.weight q5_K blk.23.attn_output.weight q5_K blk.23.attn_v.weight q5_K -blk.24.attn_qkv.weight q4_K +blk.24.attn_qkv.weight q5_K blk.24.ffn_down_shexp.weight q5_K blk.24.ffn_down_exps.weight q5_K -blk.25.attn_qkv.weight q4_K +blk.25.attn_qkv.weight q5_K blk.25.ffn_down_shexp.weight q5_K blk.25.ffn_down_exps.weight q5_K -blk.26.attn_qkv.weight q4_K +blk.26.attn_qkv.weight q5_K blk.26.ffn_down_shexp.weight q5_K blk.26.ffn_down_exps.weight q5_K blk.27.ffn_down_shexp.weight q5_K blk.27.attn_output.weight q5_K blk.27.attn_v.weight q5_K blk.27.ffn_down_exps.weight q5_K -blk.28.attn_qkv.weight q4_K +blk.28.attn_qkv.weight q5_K blk.28.ffn_down_shexp.weight q5_K blk.28.ffn_down_exps.weight q5_K -blk.29.attn_qkv.weight q4_K +blk.29.attn_qkv.weight q5_K blk.29.ffn_down_exps.weight q5_K blk.29.ffn_down_shexp.weight q5_K -blk.30.attn_qkv.weight q4_K +blk.30.attn_qkv.weight q5_K blk.30.ffn_down_shexp.weight q5_K blk.30.ffn_down_exps.weight q5_K blk.31.ffn_down_shexp.weight q5_K blk.31.attn_output.weight q5_K blk.31.attn_v.weight q5_K blk.31.ffn_down_exps.weight q5_K -blk.32.attn_qkv.weight q4_K +blk.32.attn_qkv.weight q5_K blk.32.ffn_down_shexp.weight q5_K blk.32.ffn_down_exps.weight q5_K -blk.33.attn_qkv.weight q4_K +blk.33.attn_qkv.weight q5_K blk.33.ffn_down_shexp.weight q5_K blk.33.ffn_down_exps.weight q5_K -blk.34.attn_qkv.weight q4_K +blk.34.attn_qkv.weight q5_K blk.34.ffn_down_exps.weight q5_K blk.34.ffn_down_shexp.weight q5_K blk.35.ffn_down_shexp.weight q5_K blk.35.attn_output.weight q5_K blk.35.attn_v.weight q5_K blk.35.ffn_down_exps.weight q5_K -blk.36.attn_qkv.weight q4_K +blk.36.attn_qkv.weight q5_K blk.36.ffn_down_shexp.weight q5_K blk.36.ffn_down_exps.weight q5_K -blk.37.attn_qkv.weight q4_K +blk.37.attn_qkv.weight q5_K blk.37.ffn_down_shexp.weight q5_K blk.37.ffn_down_exps.weight q5_K -blk.38.attn_qkv.weight q4_K +blk.38.attn_qkv.weight q5_K blk.38.ffn_down_shexp.weight q5_K blk.38.ffn_down_exps.weight q5_K blk.39.ffn_down_shexp.weight q5_K blk.39.attn_output.weight q5_K blk.39.attn_v.weight q5_K blk.39.ffn_down_exps.weight q5_K -blk.40.attn_qkv.weight q4_K +blk.40.attn_qkv.weight q5_K blk.40.ffn_down_exps.weight q5_K blk.40.ffn_down_shexp.weight q5_K -blk.41.attn_qkv.weight q4_K +blk.41.attn_qkv.weight q5_K blk.41.ffn_down_shexp.weight q5_K blk.41.ffn_down_exps.weight q5_K -blk.42.attn_qkv.weight q4_K +blk.42.attn_qkv.weight q5_K blk.42.ffn_down_shexp.weight q5_K blk.42.ffn_down_exps.weight q5_K blk.43.ffn_down_shexp.weight q5_K blk.43.attn_output.weight q5_K blk.43.attn_v.weight q5_K blk.43.ffn_down_exps.weight q5_K -blk.44.attn_qkv.weight q4_K +blk.44.attn_qkv.weight q5_K blk.44.ffn_down_shexp.weight q5_K blk.44.ffn_down_exps.weight q5_K -blk.45.attn_qkv.weight q4_K +blk.45.attn_qkv.weight q5_K blk.45.ffn_down_shexp.weight q5_K blk.45.ffn_down_exps.weight q5_K -blk.46.attn_qkv.weight q4_K +blk.46.attn_qkv.weight q5_K blk.46.ffn_down_exps.weight q5_K blk.46.ffn_down_shexp.weight q5_K blk.47.ffn_down_shexp.weight q5_K @@ -466,10 +502,13 @@ output.weight q6_K blk.47.ffn_down_exps.weight q5_K [Q4_K_S] q4_K +blk.0.attn_qkv.weight q5_K blk.0.ffn_down_exps.weight q5_K blk.0.ffn_down_shexp.weight q5_K +blk.1.attn_qkv.weight q5_K blk.1.ffn_down_shexp.weight q5_K blk.1.ffn_down_exps.weight q5_K +blk.2.attn_qkv.weight q5_K blk.2.ffn_down_shexp.weight q5_K blk.2.ffn_down_exps.weight q5_K blk.3.ffn_down_shexp.weight q5_K @@ -479,99 +518,76 @@ blk.4.ffn_down_shexp.weight q5_K blk.4.ffn_down_exps.weight q5_K blk.5.ffn_down_shexp.weight q5_K blk.5.ffn_down_exps.weight q5_K -blk.7.attn_v.weight q5_K -blk.11.attn_v.weight q5_K -blk.15.attn_v.weight q5_K output.weight q6_K [Q4_K_M] q4_K -blk.0.attn_qkv.weight q5_K +blk.0.attn_qkv.weight q6_K blk.0.ffn_down_exps.weight q6_K blk.0.ffn_down_shexp.weight q6_K -blk.1.attn_qkv.weight q5_K +blk.1.attn_qkv.weight q6_K blk.1.ffn_down_shexp.weight q6_K blk.1.ffn_down_exps.weight q6_K -blk.2.attn_qkv.weight q5_K +blk.2.attn_qkv.weight q6_K blk.2.ffn_down_shexp.weight q6_K blk.2.ffn_down_exps.weight q6_K blk.3.ffn_down_shexp.weight q6_K blk.3.attn_v.weight q6_K blk.3.ffn_down_exps.weight q6_K -blk.4.attn_qkv.weight q5_K +blk.4.attn_qkv.weight q6_K blk.4.ffn_down_shexp.weight q6_K blk.4.ffn_down_exps.weight q6_K -blk.5.attn_qkv.weight q5_K +blk.5.attn_qkv.weight q6_K blk.5.ffn_down_shexp.weight q6_K blk.5.ffn_down_exps.weight q6_K -blk.6.attn_qkv.weight q5_K -blk.7.attn_v.weight q6_K -blk.8.attn_qkv.weight q5_K +blk.8.attn_qkv.weight q6_K blk.8.ffn_down_shexp.weight q6_K blk.8.ffn_down_exps.weight q6_K -blk.9.attn_qkv.weight q5_K -blk.10.attn_qkv.weight q5_K blk.11.ffn_down_shexp.weight q6_K blk.11.attn_v.weight q6_K blk.11.ffn_down_exps.weight q6_K -blk.12.attn_qkv.weight q5_K -blk.13.attn_qkv.weight q5_K -blk.14.attn_qkv.weight q5_K +blk.14.attn_qkv.weight q6_K blk.14.ffn_down_shexp.weight q6_K blk.14.ffn_down_exps.weight q6_K -blk.15.attn_v.weight q6_K -blk.16.attn_qkv.weight q5_K -blk.17.attn_qkv.weight q5_K +blk.17.attn_qkv.weight q6_K blk.17.ffn_down_exps.weight q6_K blk.17.ffn_down_shexp.weight q6_K -blk.18.attn_qkv.weight q5_K -blk.19.attn_v.weight q6_K -blk.20.attn_qkv.weight q5_K +blk.20.attn_qkv.weight q6_K blk.20.ffn_down_shexp.weight q6_K blk.20.ffn_down_exps.weight q6_K -blk.21.attn_qkv.weight q5_K -blk.22.attn_qkv.weight q5_K blk.23.ffn_down_exps.weight q6_K blk.23.ffn_down_shexp.weight q6_K blk.23.attn_v.weight q6_K -blk.24.attn_qkv.weight q5_K -blk.25.attn_qkv.weight q5_K -blk.26.attn_qkv.weight q5_K +blk.26.attn_qkv.weight q6_K blk.26.ffn_down_shexp.weight q6_K blk.26.ffn_down_exps.weight q6_K -blk.28.attn_qkv.weight q5_K -blk.29.attn_qkv.weight q5_K +blk.29.attn_qkv.weight q6_K blk.29.ffn_down_exps.weight q6_K blk.29.ffn_down_shexp.weight q6_K -blk.30.attn_qkv.weight q5_K -blk.32.attn_qkv.weight q5_K +blk.32.attn_qkv.weight q6_K blk.32.ffn_down_shexp.weight q6_K blk.32.ffn_down_exps.weight q6_K -blk.33.attn_qkv.weight q5_K -blk.34.attn_qkv.weight q5_K blk.35.ffn_down_shexp.weight q6_K blk.35.attn_v.weight q6_K blk.35.ffn_down_exps.weight q6_K -blk.36.attn_qkv.weight q5_K -blk.37.attn_qkv.weight q5_K -blk.38.attn_qkv.weight q5_K +blk.38.attn_qkv.weight q6_K blk.38.ffn_down_shexp.weight q6_K blk.38.ffn_down_exps.weight q6_K -blk.40.attn_qkv.weight q5_K -blk.41.attn_qkv.weight q5_K +blk.41.attn_qkv.weight q6_K blk.41.ffn_down_shexp.weight q6_K blk.41.ffn_down_exps.weight q6_K -blk.42.attn_qkv.weight q5_K +blk.42.attn_qkv.weight q6_K blk.42.ffn_down_shexp.weight q6_K blk.42.ffn_down_exps.weight q6_K blk.43.ffn_down_shexp.weight q6_K +blk.43.attn_v.weight q6_K blk.43.ffn_down_exps.weight q6_K -blk.44.attn_qkv.weight q5_K +blk.44.attn_qkv.weight q6_K blk.44.ffn_down_shexp.weight q6_K blk.44.ffn_down_exps.weight q6_K -blk.45.attn_qkv.weight q5_K +blk.45.attn_qkv.weight q6_K blk.45.ffn_down_shexp.weight q6_K blk.45.ffn_down_exps.weight q6_K -blk.46.attn_qkv.weight q5_K +blk.46.attn_qkv.weight q6_K blk.46.ffn_down_exps.weight q6_K blk.46.ffn_down_shexp.weight q6_K blk.47.ffn_down_shexp.weight q6_K @@ -601,60 +617,39 @@ blk.4.ffn_down_exps.weight q6_K blk.5.attn_qkv.weight q6_K blk.5.ffn_down_shexp.weight q6_K blk.5.ffn_down_exps.weight q6_K -blk.6.attn_qkv.weight q6_K -blk.7.attn_v.weight q6_K blk.8.attn_qkv.weight q6_K blk.8.ffn_down_shexp.weight q6_K blk.8.ffn_down_exps.weight q6_K -blk.9.attn_qkv.weight q6_K -blk.10.attn_qkv.weight q6_K blk.11.ffn_down_shexp.weight q6_K blk.11.attn_v.weight q6_K blk.11.ffn_down_exps.weight q6_K -blk.12.attn_qkv.weight q6_K -blk.13.attn_qkv.weight q6_K blk.14.attn_qkv.weight q6_K blk.14.ffn_down_shexp.weight q6_K blk.14.ffn_down_exps.weight q6_K -blk.15.attn_v.weight q6_K -blk.16.attn_qkv.weight q6_K blk.17.attn_qkv.weight q6_K blk.17.ffn_down_exps.weight q6_K blk.17.ffn_down_shexp.weight q6_K -blk.18.attn_qkv.weight q6_K -blk.19.attn_v.weight q6_K blk.20.attn_qkv.weight q6_K blk.20.ffn_down_shexp.weight q6_K blk.20.ffn_down_exps.weight q6_K -blk.21.attn_qkv.weight q6_K -blk.22.attn_qkv.weight q6_K blk.23.ffn_down_exps.weight q6_K blk.23.ffn_down_shexp.weight q6_K blk.23.attn_v.weight q6_K -blk.24.attn_qkv.weight q6_K -blk.25.attn_qkv.weight q6_K blk.26.attn_qkv.weight q6_K blk.26.ffn_down_shexp.weight q6_K blk.26.ffn_down_exps.weight q6_K -blk.28.attn_qkv.weight q6_K blk.29.attn_qkv.weight q6_K blk.29.ffn_down_exps.weight q6_K blk.29.ffn_down_shexp.weight q6_K -blk.30.attn_qkv.weight q6_K blk.32.attn_qkv.weight q6_K blk.32.ffn_down_shexp.weight q6_K blk.32.ffn_down_exps.weight q6_K -blk.33.attn_qkv.weight q6_K -blk.34.attn_qkv.weight q6_K blk.35.ffn_down_shexp.weight q6_K blk.35.attn_v.weight q6_K blk.35.ffn_down_exps.weight q6_K -blk.36.attn_qkv.weight q6_K -blk.37.attn_qkv.weight q6_K blk.38.attn_qkv.weight q6_K blk.38.ffn_down_shexp.weight q6_K blk.38.ffn_down_exps.weight q6_K -blk.40.attn_qkv.weight q6_K blk.41.attn_qkv.weight q6_K blk.41.ffn_down_shexp.weight q6_K blk.41.ffn_down_exps.weight q6_K @@ -662,6 +657,7 @@ blk.42.attn_qkv.weight q6_K blk.42.ffn_down_shexp.weight q6_K blk.42.ffn_down_exps.weight q6_K blk.43.ffn_down_shexp.weight q6_K +blk.43.attn_v.weight q6_K blk.43.ffn_down_exps.weight q6_K blk.44.attn_qkv.weight q6_K blk.44.ffn_down_shexp.weight q6_K @@ -681,79 +677,193 @@ blk.47.ffn_down_exps.weight q6_K [IQ2_XXS] iq2_xxs token_embd.weight q2_K +blk.0.attn_qkv.weight q4_K blk.0.ffn_down_exps.weight q2_K blk.0.ffn_down_shexp.weight q2_K +blk.1.attn_qkv.weight q4_K blk.1.ffn_down_shexp.weight q2_K blk.1.ffn_down_exps.weight q2_K +blk.2.attn_qkv.weight q4_K blk.2.ffn_down_shexp.weight q2_K blk.2.ffn_down_exps.weight q2_K blk.3.attn_v.weight q4_K +blk.4.attn_qkv.weight q4_K +blk.5.attn_qkv.weight q4_K +blk.6.attn_qkv.weight q4_K blk.7.attn_v.weight q4_K +blk.8.attn_qkv.weight q4_K +blk.9.attn_qkv.weight q4_K +blk.10.attn_qkv.weight q4_K blk.11.attn_v.weight q4_K +blk.12.attn_qkv.weight q4_K +blk.13.attn_qkv.weight q4_K +blk.14.attn_qkv.weight q4_K blk.15.attn_v.weight q4_K +blk.16.attn_qkv.weight q4_K +blk.17.attn_qkv.weight q4_K +blk.18.attn_qkv.weight q4_K blk.19.attn_v.weight q4_K +blk.20.attn_qkv.weight q4_K +blk.21.attn_qkv.weight q4_K +blk.22.attn_qkv.weight q4_K blk.23.attn_v.weight q4_K +blk.24.attn_qkv.weight q4_K +blk.25.attn_qkv.weight q4_K +blk.26.attn_qkv.weight q4_K blk.27.attn_v.weight q4_K +blk.28.attn_qkv.weight q4_K +blk.29.attn_qkv.weight q4_K +blk.30.attn_qkv.weight q4_K blk.31.attn_v.weight q4_K +blk.32.attn_qkv.weight q4_K +blk.33.attn_qkv.weight q4_K +blk.34.attn_qkv.weight q4_K blk.35.attn_v.weight q4_K +blk.36.attn_qkv.weight q4_K +blk.37.attn_qkv.weight q4_K +blk.38.attn_qkv.weight q4_K blk.39.attn_v.weight q4_K +blk.40.attn_qkv.weight q4_K +blk.41.attn_qkv.weight q4_K +blk.42.attn_qkv.weight q4_K blk.43.attn_v.weight q4_K +blk.44.attn_qkv.weight q4_K +blk.45.attn_qkv.weight q4_K +blk.46.attn_qkv.weight q4_K blk.47.attn_v.weight q4_K output.weight q5_K [IQ2_XS] iq2_xs token_embd.weight q2_K +blk.0.attn_qkv.weight q4_K blk.0.ffn_down_exps.weight q2_K blk.0.ffn_down_shexp.weight q2_K +blk.1.attn_qkv.weight q4_K blk.1.ffn_down_shexp.weight q2_K blk.1.ffn_down_exps.weight q2_K +blk.2.attn_qkv.weight q4_K blk.2.ffn_down_shexp.weight q2_K blk.2.ffn_down_exps.weight q2_K blk.3.attn_v.weight q4_K +blk.4.attn_qkv.weight q4_K +blk.5.attn_qkv.weight q4_K +blk.6.attn_qkv.weight q4_K blk.7.attn_v.weight q4_K +blk.8.attn_qkv.weight q4_K +blk.9.attn_qkv.weight q4_K +blk.10.attn_qkv.weight q4_K blk.11.attn_v.weight q4_K +blk.12.attn_qkv.weight q4_K +blk.13.attn_qkv.weight q4_K +blk.14.attn_qkv.weight q4_K blk.15.attn_v.weight q4_K +blk.16.attn_qkv.weight q4_K +blk.17.attn_qkv.weight q4_K +blk.18.attn_qkv.weight q4_K blk.19.attn_v.weight q4_K +blk.20.attn_qkv.weight q4_K +blk.21.attn_qkv.weight q4_K +blk.22.attn_qkv.weight q4_K blk.23.attn_v.weight q4_K +blk.24.attn_qkv.weight q4_K +blk.25.attn_qkv.weight q4_K +blk.26.attn_qkv.weight q4_K blk.27.attn_v.weight q4_K +blk.28.attn_qkv.weight q4_K +blk.29.attn_qkv.weight q4_K +blk.30.attn_qkv.weight q4_K blk.31.attn_v.weight q4_K +blk.32.attn_qkv.weight q4_K +blk.33.attn_qkv.weight q4_K +blk.34.attn_qkv.weight q4_K blk.35.attn_v.weight q4_K +blk.36.attn_qkv.weight q4_K +blk.37.attn_qkv.weight q4_K +blk.38.attn_qkv.weight q4_K blk.39.attn_v.weight q4_K +blk.40.attn_qkv.weight q4_K +blk.41.attn_qkv.weight q4_K +blk.42.attn_qkv.weight q4_K blk.43.attn_v.weight q4_K +blk.44.attn_qkv.weight q4_K +blk.45.attn_qkv.weight q4_K +blk.46.attn_qkv.weight q4_K blk.47.attn_v.weight q4_K output.weight q5_K [Q2_K_S] q2_K +blk.0.attn_qkv.weight q4_K blk.0.ffn_down_exps.weight q4_K blk.0.ffn_down_shexp.weight q4_K +blk.1.attn_qkv.weight q4_K blk.1.ffn_down_shexp.weight q4_K blk.1.ffn_down_exps.weight q4_K +blk.2.attn_qkv.weight q4_K blk.2.ffn_down_shexp.weight q4_K blk.2.ffn_down_exps.weight q4_K blk.3.ffn_down_shexp.weight q4_K blk.3.attn_v.weight q4_K blk.3.ffn_down_exps.weight q4_K +blk.4.attn_qkv.weight q4_K blk.4.ffn_down_shexp.weight q4_K blk.4.ffn_down_exps.weight q4_K +blk.5.attn_qkv.weight q4_K blk.5.ffn_down_shexp.weight q4_K blk.5.ffn_down_exps.weight q4_K +blk.6.attn_qkv.weight q4_K blk.7.attn_v.weight q4_K +blk.8.attn_qkv.weight q4_K +blk.9.attn_qkv.weight q4_K +blk.10.attn_qkv.weight q4_K blk.11.attn_v.weight q4_K +blk.12.attn_qkv.weight q4_K +blk.13.attn_qkv.weight q4_K +blk.14.attn_qkv.weight q4_K blk.15.attn_v.weight q4_K +blk.16.attn_qkv.weight q4_K +blk.17.attn_qkv.weight q4_K +blk.18.attn_qkv.weight q4_K blk.19.attn_v.weight q4_K +blk.20.attn_qkv.weight q4_K +blk.21.attn_qkv.weight q4_K +blk.22.attn_qkv.weight q4_K blk.23.attn_v.weight q4_K +blk.24.attn_qkv.weight q4_K +blk.25.attn_qkv.weight q4_K +blk.26.attn_qkv.weight q4_K blk.27.attn_v.weight q4_K +blk.28.attn_qkv.weight q4_K +blk.29.attn_qkv.weight q4_K +blk.30.attn_qkv.weight q4_K blk.31.attn_v.weight q4_K +blk.32.attn_qkv.weight q4_K +blk.33.attn_qkv.weight q4_K +blk.34.attn_qkv.weight q4_K blk.35.attn_v.weight q4_K +blk.36.attn_qkv.weight q4_K +blk.37.attn_qkv.weight q4_K +blk.38.attn_qkv.weight q4_K blk.39.attn_v.weight q4_K +blk.40.attn_qkv.weight q4_K +blk.41.attn_qkv.weight q4_K +blk.42.attn_qkv.weight q4_K blk.43.attn_v.weight q4_K +blk.44.attn_qkv.weight q4_K +blk.45.attn_qkv.weight q4_K +blk.46.attn_qkv.weight q4_K blk.47.attn_v.weight q4_K output.weight q6_K [IQ3_XS] iq3_s +blk.0.attn_qkv.weight q4_K +blk.1.attn_qkv.weight q4_K +blk.2.attn_qkv.weight q4_K blk.3.attn_k.weight iq3_xxs blk.3.attn_q.weight iq3_xxs blk.3.attn_v.weight q4_K +blk.4.attn_qkv.weight q4_K +blk.5.attn_qkv.weight q4_K +blk.6.attn_qkv.weight q4_K blk.6.ffn_gate_exps.weight iq3_xxs blk.6.ffn_up_exps.weight iq3_xxs blk.6.ffn_gate_shexp.weight iq3_xxs @@ -765,12 +875,15 @@ blk.7.attn_q.weight iq3_xxs blk.7.attn_v.weight q4_K blk.7.ffn_gate_exps.weight iq3_xxs blk.7.ffn_up_exps.weight iq3_xxs +blk.8.attn_qkv.weight q4_K blk.8.ffn_gate_shexp.weight iq3_xxs blk.8.ffn_up_shexp.weight iq3_xxs blk.8.ffn_gate_exps.weight iq3_xxs blk.8.ffn_up_exps.weight iq3_xxs +blk.9.attn_qkv.weight q4_K blk.9.ffn_gate_shexp.weight iq3_xxs blk.9.ffn_up_shexp.weight iq3_xxs +blk.10.attn_qkv.weight q4_K blk.10.ffn_gate_shexp.weight iq3_xxs blk.10.ffn_up_shexp.weight iq3_xxs blk.9.ffn_gate_exps.weight iq3_xxs @@ -784,14 +897,17 @@ blk.11.attn_q.weight iq3_xxs blk.11.attn_v.weight q4_K blk.11.ffn_gate_exps.weight iq3_xxs blk.11.ffn_up_exps.weight iq3_xxs +blk.12.attn_qkv.weight q4_K blk.12.ffn_gate_exps.weight iq3_xxs blk.12.ffn_up_exps.weight iq3_xxs blk.12.ffn_gate_shexp.weight iq3_xxs blk.12.ffn_up_shexp.weight iq3_xxs +blk.13.attn_qkv.weight q4_K blk.13.ffn_gate_shexp.weight iq3_xxs blk.13.ffn_up_shexp.weight iq3_xxs blk.13.ffn_gate_exps.weight iq3_xxs blk.13.ffn_up_exps.weight iq3_xxs +blk.14.attn_qkv.weight q4_K blk.14.ffn_gate_shexp.weight iq3_xxs blk.14.ffn_up_shexp.weight iq3_xxs blk.14.ffn_gate_exps.weight iq3_xxs @@ -803,14 +919,17 @@ blk.15.attn_q.weight iq3_xxs blk.15.attn_v.weight q4_K blk.15.ffn_gate_exps.weight iq3_xxs blk.15.ffn_up_exps.weight iq3_xxs +blk.16.attn_qkv.weight q4_K blk.16.ffn_gate_shexp.weight iq3_xxs blk.16.ffn_up_shexp.weight iq3_xxs blk.16.ffn_gate_exps.weight iq3_xxs blk.16.ffn_up_exps.weight iq3_xxs +blk.17.attn_qkv.weight q4_K blk.17.ffn_gate_exps.weight iq3_xxs blk.17.ffn_up_exps.weight iq3_xxs blk.17.ffn_gate_shexp.weight iq3_xxs blk.17.ffn_up_shexp.weight iq3_xxs +blk.18.attn_qkv.weight q4_K blk.18.ffn_gate_shexp.weight iq3_xxs blk.18.ffn_up_shexp.weight iq3_xxs blk.18.ffn_gate_exps.weight iq3_xxs @@ -822,14 +941,17 @@ blk.19.attn_q.weight iq3_xxs blk.19.attn_v.weight q4_K blk.19.ffn_gate_exps.weight iq3_xxs blk.19.ffn_up_exps.weight iq3_xxs +blk.20.attn_qkv.weight q4_K blk.20.ffn_gate_shexp.weight iq3_xxs blk.20.ffn_up_shexp.weight iq3_xxs blk.20.ffn_gate_exps.weight iq3_xxs blk.20.ffn_up_exps.weight iq3_xxs +blk.21.attn_qkv.weight q4_K blk.21.ffn_gate_shexp.weight iq3_xxs blk.21.ffn_up_shexp.weight iq3_xxs blk.21.ffn_gate_exps.weight iq3_xxs blk.21.ffn_up_exps.weight iq3_xxs +blk.22.attn_qkv.weight q4_K blk.22.ffn_gate_shexp.weight iq3_xxs blk.22.ffn_up_shexp.weight iq3_xxs blk.22.ffn_gate_exps.weight iq3_xxs @@ -841,14 +963,17 @@ blk.23.ffn_up_shexp.weight iq3_xxs blk.23.attn_k.weight iq3_xxs blk.23.attn_q.weight iq3_xxs blk.23.attn_v.weight q4_K +blk.24.attn_qkv.weight q4_K blk.24.ffn_gate_shexp.weight iq3_xxs blk.24.ffn_up_shexp.weight iq3_xxs blk.24.ffn_gate_exps.weight iq3_xxs blk.24.ffn_up_exps.weight iq3_xxs +blk.25.attn_qkv.weight q4_K blk.25.ffn_gate_shexp.weight iq3_xxs blk.25.ffn_up_shexp.weight iq3_xxs blk.25.ffn_gate_exps.weight iq3_xxs blk.25.ffn_up_exps.weight iq3_xxs +blk.26.attn_qkv.weight q4_K blk.26.ffn_gate_shexp.weight iq3_xxs blk.26.ffn_up_shexp.weight iq3_xxs blk.26.ffn_gate_exps.weight iq3_xxs @@ -860,14 +985,17 @@ blk.27.attn_q.weight iq3_xxs blk.27.attn_v.weight q4_K blk.27.ffn_gate_exps.weight iq3_xxs blk.27.ffn_up_exps.weight iq3_xxs +blk.28.attn_qkv.weight q4_K blk.28.ffn_gate_shexp.weight iq3_xxs blk.28.ffn_up_shexp.weight iq3_xxs blk.28.ffn_gate_exps.weight iq3_xxs blk.28.ffn_up_exps.weight iq3_xxs +blk.29.attn_qkv.weight q4_K blk.29.ffn_gate_exps.weight iq3_xxs blk.29.ffn_up_exps.weight iq3_xxs blk.29.ffn_gate_shexp.weight iq3_xxs blk.29.ffn_up_shexp.weight iq3_xxs +blk.30.attn_qkv.weight q4_K blk.30.ffn_gate_shexp.weight iq3_xxs blk.30.ffn_up_shexp.weight iq3_xxs blk.30.ffn_gate_exps.weight iq3_xxs @@ -879,14 +1007,17 @@ blk.31.attn_q.weight iq3_xxs blk.31.attn_v.weight q4_K blk.31.ffn_gate_exps.weight iq3_xxs blk.31.ffn_up_exps.weight iq3_xxs +blk.32.attn_qkv.weight q4_K blk.32.ffn_gate_shexp.weight iq3_xxs blk.32.ffn_up_shexp.weight iq3_xxs blk.32.ffn_gate_exps.weight iq3_xxs blk.32.ffn_up_exps.weight iq3_xxs +blk.33.attn_qkv.weight q4_K blk.33.ffn_gate_shexp.weight iq3_xxs blk.33.ffn_up_shexp.weight iq3_xxs blk.33.ffn_gate_exps.weight iq3_xxs blk.33.ffn_up_exps.weight iq3_xxs +blk.34.attn_qkv.weight q4_K blk.34.ffn_gate_exps.weight iq3_xxs blk.34.ffn_up_exps.weight iq3_xxs blk.34.ffn_gate_shexp.weight iq3_xxs @@ -898,14 +1029,17 @@ blk.35.attn_q.weight iq3_xxs blk.35.attn_v.weight q4_K blk.35.ffn_gate_exps.weight iq3_xxs blk.35.ffn_up_exps.weight iq3_xxs +blk.36.attn_qkv.weight q4_K blk.36.ffn_gate_shexp.weight iq3_xxs blk.36.ffn_up_shexp.weight iq3_xxs blk.36.ffn_gate_exps.weight iq3_xxs blk.36.ffn_up_exps.weight iq3_xxs +blk.37.attn_qkv.weight q4_K blk.37.ffn_gate_shexp.weight iq3_xxs blk.37.ffn_up_shexp.weight iq3_xxs blk.37.ffn_gate_exps.weight iq3_xxs blk.37.ffn_up_exps.weight iq3_xxs +blk.38.attn_qkv.weight q4_K blk.38.ffn_gate_shexp.weight iq3_xxs blk.38.ffn_up_shexp.weight iq3_xxs blk.38.ffn_gate_exps.weight iq3_xxs @@ -917,17 +1051,23 @@ blk.39.attn_q.weight iq3_xxs blk.39.attn_v.weight q4_K blk.39.ffn_gate_exps.weight iq3_xxs blk.39.ffn_up_exps.weight iq3_xxs +blk.40.attn_qkv.weight q4_K blk.40.ffn_gate_exps.weight iq3_xxs blk.40.ffn_up_exps.weight iq3_xxs blk.40.ffn_gate_shexp.weight iq3_xxs blk.40.ffn_up_shexp.weight iq3_xxs +blk.41.attn_qkv.weight q4_K blk.41.ffn_gate_shexp.weight iq3_xxs blk.41.ffn_up_shexp.weight iq3_xxs blk.41.ffn_gate_exps.weight iq3_xxs blk.41.ffn_up_exps.weight iq3_xxs +blk.42.attn_qkv.weight q4_K blk.43.attn_k.weight iq3_xxs blk.43.attn_q.weight iq3_xxs blk.43.attn_v.weight q4_K +blk.44.attn_qkv.weight q4_K +blk.45.attn_qkv.weight q4_K +blk.46.attn_qkv.weight q4_K blk.47.attn_k.weight iq3_xxs blk.47.attn_q.weight iq3_xxs blk.47.attn_v.weight q4_K @@ -935,10 +1075,13 @@ output.weight q6_K [IQ3_XXS] iq3_xxs token_embd.weight iq3_s +blk.0.attn_qkv.weight q4_K blk.0.ffn_down_exps.weight q4_K blk.0.ffn_down_shexp.weight q4_K +blk.1.attn_qkv.weight q4_K blk.1.ffn_down_shexp.weight q4_K blk.1.ffn_down_exps.weight q4_K +blk.2.attn_qkv.weight q4_K blk.2.ffn_down_shexp.weight q4_K blk.2.ffn_down_exps.weight q4_K blk.3.ffn_down_shexp.weight q4_K @@ -947,10 +1090,13 @@ blk.3.attn_output.weight iq3_s blk.3.attn_q.weight iq2_s blk.3.attn_v.weight q4_K blk.3.ffn_down_exps.weight q4_K +blk.4.attn_qkv.weight q4_K blk.4.ffn_down_shexp.weight q4_K blk.4.ffn_down_exps.weight q4_K +blk.5.attn_qkv.weight q4_K blk.5.ffn_down_shexp.weight q4_K blk.5.ffn_down_exps.weight q4_K +blk.6.attn_qkv.weight q4_K blk.6.ffn_down_exps.weight q3_K blk.6.ffn_down_shexp.weight q3_K blk.7.ffn_down_shexp.weight q3_K @@ -959,9 +1105,12 @@ blk.7.attn_output.weight iq3_s blk.7.attn_q.weight iq2_s blk.7.attn_v.weight q4_K blk.7.ffn_down_exps.weight q3_K +blk.8.attn_qkv.weight q4_K blk.8.ffn_down_shexp.weight q3_K blk.8.ffn_down_exps.weight q3_K +blk.9.attn_qkv.weight q4_K blk.9.ffn_down_shexp.weight q3_K +blk.10.attn_qkv.weight q4_K blk.10.ffn_down_shexp.weight q3_K blk.9.ffn_down_exps.weight q3_K blk.10.ffn_down_exps.weight q3_K @@ -971,10 +1120,13 @@ blk.11.attn_output.weight iq3_s blk.11.attn_q.weight iq2_s blk.11.attn_v.weight q4_K blk.11.ffn_down_exps.weight q3_K +blk.12.attn_qkv.weight q4_K blk.12.ffn_down_exps.weight q3_K blk.12.ffn_down_shexp.weight q3_K +blk.13.attn_qkv.weight q4_K blk.13.ffn_down_shexp.weight q3_K blk.13.ffn_down_exps.weight q3_K +blk.14.attn_qkv.weight q4_K blk.14.ffn_down_shexp.weight q3_K blk.14.ffn_down_exps.weight q3_K blk.15.ffn_down_shexp.weight q3_K @@ -983,10 +1135,13 @@ blk.15.attn_output.weight iq3_s blk.15.attn_q.weight iq2_s blk.15.attn_v.weight q4_K blk.15.ffn_down_exps.weight q3_K +blk.16.attn_qkv.weight q4_K blk.16.ffn_down_shexp.weight q3_K blk.16.ffn_down_exps.weight q3_K +blk.17.attn_qkv.weight q4_K blk.17.ffn_down_exps.weight q3_K blk.17.ffn_down_shexp.weight q3_K +blk.18.attn_qkv.weight q4_K blk.18.ffn_down_shexp.weight q3_K blk.18.ffn_down_exps.weight q3_K blk.19.ffn_down_shexp.weight q3_K @@ -995,10 +1150,13 @@ blk.19.attn_output.weight iq3_s blk.19.attn_q.weight iq2_s blk.19.attn_v.weight q4_K blk.19.ffn_down_exps.weight q3_K +blk.20.attn_qkv.weight q4_K blk.20.ffn_down_shexp.weight q3_K blk.20.ffn_down_exps.weight q3_K +blk.21.attn_qkv.weight q4_K blk.21.ffn_down_shexp.weight q3_K blk.21.ffn_down_exps.weight q3_K +blk.22.attn_qkv.weight q4_K blk.22.ffn_down_shexp.weight q3_K blk.22.ffn_down_exps.weight q3_K blk.23.ffn_down_exps.weight q3_K @@ -1007,10 +1165,13 @@ blk.23.attn_k.weight iq2_s blk.23.attn_output.weight iq3_s blk.23.attn_q.weight iq2_s blk.23.attn_v.weight q4_K +blk.24.attn_qkv.weight q4_K blk.24.ffn_down_shexp.weight q3_K blk.24.ffn_down_exps.weight q3_K +blk.25.attn_qkv.weight q4_K blk.25.ffn_down_shexp.weight q3_K blk.25.ffn_down_exps.weight q3_K +blk.26.attn_qkv.weight q4_K blk.26.ffn_down_shexp.weight q3_K blk.26.ffn_down_exps.weight q3_K blk.27.ffn_down_shexp.weight q3_K @@ -1019,10 +1180,13 @@ blk.27.attn_output.weight iq3_s blk.27.attn_q.weight iq2_s blk.27.attn_v.weight q4_K blk.27.ffn_down_exps.weight q3_K +blk.28.attn_qkv.weight q4_K blk.28.ffn_down_shexp.weight q3_K blk.28.ffn_down_exps.weight q3_K +blk.29.attn_qkv.weight q4_K blk.29.ffn_down_exps.weight q3_K blk.29.ffn_down_shexp.weight q3_K +blk.30.attn_qkv.weight q4_K blk.30.ffn_down_shexp.weight q3_K blk.30.ffn_down_exps.weight q3_K blk.31.ffn_down_shexp.weight q3_K @@ -1031,10 +1195,13 @@ blk.31.attn_output.weight iq3_s blk.31.attn_q.weight iq2_s blk.31.attn_v.weight q4_K blk.31.ffn_down_exps.weight q3_K +blk.32.attn_qkv.weight q4_K blk.32.ffn_down_shexp.weight q3_K blk.32.ffn_down_exps.weight q3_K +blk.33.attn_qkv.weight q4_K blk.33.ffn_down_shexp.weight q3_K blk.33.ffn_down_exps.weight q3_K +blk.34.attn_qkv.weight q4_K blk.34.ffn_down_exps.weight q3_K blk.34.ffn_down_shexp.weight q3_K blk.35.ffn_down_shexp.weight q3_K @@ -1043,10 +1210,13 @@ blk.35.attn_output.weight iq3_s blk.35.attn_q.weight iq2_s blk.35.attn_v.weight q4_K blk.35.ffn_down_exps.weight q3_K +blk.36.attn_qkv.weight q4_K blk.36.ffn_down_shexp.weight q3_K blk.36.ffn_down_exps.weight q3_K +blk.37.attn_qkv.weight q4_K blk.37.ffn_down_shexp.weight q3_K blk.37.ffn_down_exps.weight q3_K +blk.38.attn_qkv.weight q4_K blk.38.ffn_down_shexp.weight q3_K blk.38.ffn_down_exps.weight q3_K blk.39.ffn_down_shexp.weight q3_K @@ -1055,10 +1225,13 @@ blk.39.attn_output.weight iq3_s blk.39.attn_q.weight iq2_s blk.39.attn_v.weight q4_K blk.39.ffn_down_exps.weight q3_K +blk.40.attn_qkv.weight q4_K blk.40.ffn_down_exps.weight q3_K blk.40.ffn_down_shexp.weight q3_K +blk.41.attn_qkv.weight q4_K blk.41.ffn_down_shexp.weight q3_K blk.41.ffn_down_exps.weight q3_K +blk.42.attn_qkv.weight q4_K blk.42.ffn_down_shexp.weight q3_K blk.42.ffn_down_exps.weight q3_K blk.43.ffn_down_shexp.weight q3_K @@ -1067,10 +1240,13 @@ blk.43.attn_output.weight iq3_s blk.43.attn_q.weight iq2_s blk.43.attn_v.weight q4_K blk.43.ffn_down_exps.weight q3_K +blk.44.attn_qkv.weight q4_K blk.44.ffn_down_shexp.weight q3_K blk.44.ffn_down_exps.weight q3_K +blk.45.attn_qkv.weight q4_K blk.45.ffn_down_shexp.weight q3_K blk.45.ffn_down_exps.weight q3_K +blk.46.attn_qkv.weight q4_K blk.46.ffn_down_exps.weight q3_K blk.46.ffn_down_shexp.weight q3_K blk.47.ffn_down_shexp.weight q3_K @@ -1083,77 +1259,185 @@ blk.47.ffn_down_exps.weight q3_K [IQ1_S] iq1_s token_embd.weight q2_K +blk.0.attn_qkv.weight q4_K blk.0.ffn_down_exps.weight q2_K blk.0.ffn_down_shexp.weight q2_K +blk.1.attn_qkv.weight q4_K blk.1.ffn_down_shexp.weight q2_K blk.1.ffn_down_exps.weight q2_K +blk.2.attn_qkv.weight q4_K blk.2.ffn_down_shexp.weight q2_K blk.2.ffn_down_exps.weight q2_K blk.3.attn_output.weight iq2_xxs blk.3.attn_v.weight q4_K +blk.4.attn_qkv.weight q4_K +blk.5.attn_qkv.weight q4_K +blk.6.attn_qkv.weight q4_K blk.7.attn_output.weight iq2_xxs blk.7.attn_v.weight q4_K +blk.8.attn_qkv.weight q4_K +blk.9.attn_qkv.weight q4_K +blk.10.attn_qkv.weight q4_K blk.11.attn_output.weight iq2_xxs blk.11.attn_v.weight q4_K +blk.12.attn_qkv.weight q4_K +blk.13.attn_qkv.weight q4_K +blk.14.attn_qkv.weight q4_K blk.15.attn_output.weight iq2_xxs blk.15.attn_v.weight q4_K +blk.16.attn_qkv.weight q4_K +blk.17.attn_qkv.weight q4_K +blk.18.attn_qkv.weight q4_K blk.19.attn_output.weight iq2_xxs blk.19.attn_v.weight q4_K +blk.20.attn_qkv.weight q4_K +blk.21.attn_qkv.weight q4_K +blk.22.attn_qkv.weight q4_K blk.23.attn_output.weight iq2_xxs blk.23.attn_v.weight q4_K +blk.24.attn_qkv.weight q4_K +blk.25.attn_qkv.weight q4_K +blk.26.attn_qkv.weight q4_K blk.27.attn_output.weight iq2_xxs blk.27.attn_v.weight q4_K +blk.28.attn_qkv.weight q4_K +blk.29.attn_qkv.weight q4_K +blk.30.attn_qkv.weight q4_K blk.31.attn_output.weight iq2_xxs blk.31.attn_v.weight q4_K +blk.32.attn_qkv.weight q4_K +blk.33.attn_qkv.weight q4_K +blk.34.attn_qkv.weight q4_K blk.35.attn_output.weight iq2_xxs blk.35.attn_v.weight q4_K +blk.36.attn_qkv.weight q4_K +blk.37.attn_qkv.weight q4_K +blk.38.attn_qkv.weight q4_K blk.39.attn_output.weight iq2_xxs blk.39.attn_v.weight q4_K +blk.40.attn_qkv.weight q4_K +blk.41.attn_qkv.weight q4_K +blk.42.attn_qkv.weight q4_K blk.43.attn_output.weight iq2_xxs blk.43.attn_v.weight q4_K +blk.44.attn_qkv.weight q4_K +blk.45.attn_qkv.weight q4_K +blk.46.attn_qkv.weight q4_K blk.47.attn_output.weight iq2_xxs blk.47.attn_v.weight q4_K output.weight q5_K [IQ4_NL] iq4_nl +blk.0.attn_qkv.weight q5_K blk.0.ffn_down_exps.weight q5_K blk.0.ffn_down_shexp.weight q5_K +blk.1.attn_qkv.weight q5_K blk.1.ffn_down_shexp.weight q5_K blk.1.ffn_down_exps.weight q5_K +blk.2.attn_qkv.weight q5_K blk.2.ffn_down_shexp.weight q5_K blk.2.ffn_down_exps.weight q5_K blk.3.ffn_down_shexp.weight q5_K blk.3.attn_v.weight q5_K blk.3.ffn_down_exps.weight q5_K +blk.4.attn_qkv.weight q5_K blk.4.ffn_down_shexp.weight q5_K blk.4.ffn_down_exps.weight q5_K +blk.5.attn_qkv.weight q5_K blk.5.ffn_down_shexp.weight q5_K blk.5.ffn_down_exps.weight q5_K +blk.6.attn_qkv.weight q5_K blk.7.attn_v.weight q5_K +blk.8.attn_qkv.weight q5_K +blk.9.attn_qkv.weight q5_K +blk.10.attn_qkv.weight q5_K blk.11.attn_v.weight q5_K +blk.12.attn_qkv.weight q5_K +blk.13.attn_qkv.weight q5_K +blk.14.attn_qkv.weight q5_K blk.15.attn_v.weight q5_K +blk.16.attn_qkv.weight q5_K +blk.17.attn_qkv.weight q5_K +blk.18.attn_qkv.weight q5_K blk.19.attn_v.weight q5_K +blk.20.attn_qkv.weight q5_K +blk.21.attn_qkv.weight q5_K +blk.22.attn_qkv.weight q5_K blk.23.attn_v.weight q5_K +blk.24.attn_qkv.weight q5_K +blk.25.attn_qkv.weight q5_K +blk.26.attn_qkv.weight q5_K blk.27.attn_v.weight q5_K +blk.28.attn_qkv.weight q5_K +blk.29.attn_qkv.weight q5_K +blk.30.attn_qkv.weight q5_K blk.31.attn_v.weight q5_K +blk.32.attn_qkv.weight q5_K +blk.33.attn_qkv.weight q5_K +blk.34.attn_qkv.weight q5_K blk.35.attn_v.weight q5_K +blk.36.attn_qkv.weight q5_K +blk.37.attn_qkv.weight q5_K +blk.38.attn_qkv.weight q5_K blk.39.attn_v.weight q5_K +blk.40.attn_qkv.weight q5_K +blk.41.attn_qkv.weight q5_K +blk.42.attn_qkv.weight q5_K blk.43.attn_v.weight q5_K +blk.44.attn_qkv.weight q5_K +blk.45.attn_qkv.weight q5_K +blk.46.attn_qkv.weight q5_K blk.47.attn_v.weight q5_K output.weight q6_K [IQ3_S] iq3_s +blk.0.attn_qkv.weight q4_K +blk.1.attn_qkv.weight q4_K +blk.2.attn_qkv.weight q4_K blk.3.attn_v.weight q4_K +blk.4.attn_qkv.weight q4_K +blk.5.attn_qkv.weight q4_K +blk.6.attn_qkv.weight q4_K blk.7.attn_v.weight q4_K +blk.8.attn_qkv.weight q4_K +blk.9.attn_qkv.weight q4_K +blk.10.attn_qkv.weight q4_K blk.11.attn_v.weight q4_K +blk.12.attn_qkv.weight q4_K +blk.13.attn_qkv.weight q4_K +blk.14.attn_qkv.weight q4_K blk.15.attn_v.weight q4_K +blk.16.attn_qkv.weight q4_K +blk.17.attn_qkv.weight q4_K +blk.18.attn_qkv.weight q4_K blk.19.attn_v.weight q4_K +blk.20.attn_qkv.weight q4_K +blk.21.attn_qkv.weight q4_K +blk.22.attn_qkv.weight q4_K blk.23.attn_v.weight q4_K +blk.24.attn_qkv.weight q4_K +blk.25.attn_qkv.weight q4_K +blk.26.attn_qkv.weight q4_K blk.27.attn_v.weight q4_K +blk.28.attn_qkv.weight q4_K +blk.29.attn_qkv.weight q4_K +blk.30.attn_qkv.weight q4_K blk.31.attn_v.weight q4_K +blk.32.attn_qkv.weight q4_K +blk.33.attn_qkv.weight q4_K +blk.34.attn_qkv.weight q4_K blk.35.attn_v.weight q4_K +blk.36.attn_qkv.weight q4_K +blk.37.attn_qkv.weight q4_K +blk.38.attn_qkv.weight q4_K blk.39.attn_v.weight q4_K +blk.40.attn_qkv.weight q4_K +blk.41.attn_qkv.weight q4_K +blk.42.attn_qkv.weight q4_K blk.43.attn_v.weight q4_K +blk.44.attn_qkv.weight q4_K +blk.45.attn_qkv.weight q4_K +blk.46.attn_qkv.weight q4_K blk.47.attn_v.weight q4_K output.weight q6_K @@ -1234,129 +1518,273 @@ output.weight q6_K [IQ2_S] iq2_xs token_embd.weight iq3_s +blk.0.attn_qkv.weight q4_K blk.0.ffn_down_exps.weight iq3_s blk.0.ffn_down_shexp.weight iq3_s +blk.1.attn_qkv.weight q4_K blk.1.ffn_down_shexp.weight iq3_s blk.1.ffn_down_exps.weight iq3_s +blk.2.attn_qkv.weight q4_K blk.2.ffn_down_shexp.weight iq3_s blk.2.ffn_down_exps.weight iq3_s blk.3.attn_output.weight iq3_s blk.3.attn_v.weight q4_K +blk.4.attn_qkv.weight q4_K +blk.5.attn_qkv.weight q4_K +blk.6.attn_qkv.weight q4_K blk.7.attn_output.weight iq3_s blk.7.attn_v.weight q4_K +blk.8.attn_qkv.weight q4_K +blk.9.attn_qkv.weight q4_K +blk.10.attn_qkv.weight q4_K blk.11.attn_output.weight iq3_s blk.11.attn_v.weight q4_K +blk.12.attn_qkv.weight q4_K +blk.13.attn_qkv.weight q4_K +blk.14.attn_qkv.weight q4_K blk.15.attn_output.weight iq3_s blk.15.attn_v.weight q4_K +blk.16.attn_qkv.weight q4_K +blk.17.attn_qkv.weight q4_K +blk.18.attn_qkv.weight q4_K blk.19.attn_output.weight iq3_s blk.19.attn_v.weight q4_K +blk.20.attn_qkv.weight q4_K +blk.21.attn_qkv.weight q4_K +blk.22.attn_qkv.weight q4_K blk.23.attn_output.weight iq3_s blk.23.attn_v.weight q4_K +blk.24.attn_qkv.weight q4_K +blk.25.attn_qkv.weight q4_K +blk.26.attn_qkv.weight q4_K blk.27.attn_output.weight iq3_s blk.27.attn_v.weight q4_K +blk.28.attn_qkv.weight q4_K +blk.29.attn_qkv.weight q4_K +blk.30.attn_qkv.weight q4_K blk.31.attn_output.weight iq3_s blk.31.attn_v.weight q4_K +blk.32.attn_qkv.weight q4_K +blk.33.attn_qkv.weight q4_K +blk.34.attn_qkv.weight q4_K blk.35.attn_output.weight iq3_s blk.35.attn_v.weight q4_K +blk.36.attn_qkv.weight q4_K +blk.37.attn_qkv.weight q4_K +blk.38.attn_qkv.weight q4_K blk.39.attn_output.weight iq3_s blk.39.attn_v.weight q4_K +blk.40.attn_qkv.weight q4_K +blk.41.attn_qkv.weight q4_K +blk.42.attn_qkv.weight q4_K blk.43.attn_output.weight iq3_s blk.43.attn_v.weight q4_K +blk.44.attn_qkv.weight q4_K +blk.45.attn_qkv.weight q4_K +blk.46.attn_qkv.weight q4_K blk.47.attn_output.weight iq3_s blk.47.attn_v.weight q4_K output.weight q5_K [IQ2_M] iq2_s token_embd.weight iq3_s +blk.0.attn_qkv.weight q4_K blk.0.ffn_down_exps.weight iq3_s blk.0.ffn_down_shexp.weight iq3_s +blk.1.attn_qkv.weight q4_K blk.1.ffn_down_shexp.weight iq3_s blk.1.ffn_down_exps.weight iq3_s +blk.2.attn_qkv.weight q4_K blk.2.ffn_down_shexp.weight iq3_s blk.2.ffn_down_exps.weight iq3_s blk.3.attn_output.weight iq3_s blk.3.attn_v.weight q4_K +blk.4.attn_qkv.weight q4_K +blk.5.attn_qkv.weight q4_K +blk.6.attn_qkv.weight q4_K blk.7.attn_output.weight iq3_s blk.7.attn_v.weight q4_K +blk.8.attn_qkv.weight q4_K +blk.9.attn_qkv.weight q4_K +blk.10.attn_qkv.weight q4_K blk.11.attn_output.weight iq3_s blk.11.attn_v.weight q4_K +blk.12.attn_qkv.weight q4_K +blk.13.attn_qkv.weight q4_K +blk.14.attn_qkv.weight q4_K blk.15.attn_output.weight iq3_s blk.15.attn_v.weight q4_K +blk.16.attn_qkv.weight q4_K +blk.17.attn_qkv.weight q4_K +blk.18.attn_qkv.weight q4_K blk.19.attn_output.weight iq3_s blk.19.attn_v.weight q4_K +blk.20.attn_qkv.weight q4_K +blk.21.attn_qkv.weight q4_K +blk.22.attn_qkv.weight q4_K blk.23.attn_output.weight iq3_s blk.23.attn_v.weight q4_K +blk.24.attn_qkv.weight q4_K +blk.25.attn_qkv.weight q4_K +blk.26.attn_qkv.weight q4_K blk.27.attn_output.weight iq3_s blk.27.attn_v.weight q4_K +blk.28.attn_qkv.weight q4_K +blk.29.attn_qkv.weight q4_K +blk.30.attn_qkv.weight q4_K blk.31.attn_output.weight iq3_s blk.31.attn_v.weight q4_K +blk.32.attn_qkv.weight q4_K +blk.33.attn_qkv.weight q4_K +blk.34.attn_qkv.weight q4_K blk.35.attn_output.weight iq3_s blk.35.attn_v.weight q4_K +blk.36.attn_qkv.weight q4_K +blk.37.attn_qkv.weight q4_K +blk.38.attn_qkv.weight q4_K blk.39.attn_output.weight iq3_s blk.39.attn_v.weight q4_K +blk.40.attn_qkv.weight q4_K +blk.41.attn_qkv.weight q4_K +blk.42.attn_qkv.weight q4_K blk.43.attn_output.weight iq3_s blk.43.attn_v.weight q4_K +blk.44.attn_qkv.weight q4_K +blk.45.attn_qkv.weight q4_K +blk.46.attn_qkv.weight q4_K blk.47.attn_output.weight iq3_s blk.47.attn_v.weight q4_K output.weight q5_K [IQ4_XS] iq4_xs +blk.0.attn_qkv.weight q5_K blk.0.ffn_down_exps.weight q5_K blk.0.ffn_down_shexp.weight q5_K +blk.1.attn_qkv.weight q5_K blk.1.ffn_down_shexp.weight q5_K blk.1.ffn_down_exps.weight q5_K +blk.2.attn_qkv.weight q5_K blk.2.ffn_down_shexp.weight q5_K blk.2.ffn_down_exps.weight q5_K blk.3.ffn_down_shexp.weight q5_K blk.3.attn_v.weight q5_K blk.3.ffn_down_exps.weight q5_K +blk.4.attn_qkv.weight q5_K blk.4.ffn_down_shexp.weight q5_K blk.4.ffn_down_exps.weight q5_K +blk.5.attn_qkv.weight q5_K blk.5.ffn_down_shexp.weight q5_K blk.5.ffn_down_exps.weight q5_K +blk.6.attn_qkv.weight q5_K blk.7.attn_v.weight q5_K +blk.8.attn_qkv.weight q5_K +blk.9.attn_qkv.weight q5_K +blk.10.attn_qkv.weight q5_K blk.11.attn_v.weight q5_K +blk.12.attn_qkv.weight q5_K +blk.13.attn_qkv.weight q5_K +blk.14.attn_qkv.weight q5_K blk.15.attn_v.weight q5_K +blk.16.attn_qkv.weight q5_K +blk.17.attn_qkv.weight q5_K +blk.18.attn_qkv.weight q5_K blk.19.attn_v.weight q5_K +blk.20.attn_qkv.weight q5_K +blk.21.attn_qkv.weight q5_K +blk.22.attn_qkv.weight q5_K blk.23.attn_v.weight q5_K +blk.24.attn_qkv.weight q5_K +blk.25.attn_qkv.weight q5_K +blk.26.attn_qkv.weight q5_K blk.27.attn_v.weight q5_K +blk.28.attn_qkv.weight q5_K +blk.29.attn_qkv.weight q5_K +blk.30.attn_qkv.weight q5_K blk.31.attn_v.weight q5_K +blk.32.attn_qkv.weight q5_K +blk.33.attn_qkv.weight q5_K +blk.34.attn_qkv.weight q5_K blk.35.attn_v.weight q5_K +blk.36.attn_qkv.weight q5_K +blk.37.attn_qkv.weight q5_K +blk.38.attn_qkv.weight q5_K blk.39.attn_v.weight q5_K +blk.40.attn_qkv.weight q5_K +blk.41.attn_qkv.weight q5_K +blk.42.attn_qkv.weight q5_K blk.43.attn_v.weight q5_K +blk.44.attn_qkv.weight q5_K +blk.45.attn_qkv.weight q5_K +blk.46.attn_qkv.weight q5_K blk.47.attn_v.weight q5_K output.weight q6_K [IQ1_M] iq1_m token_embd.weight q2_K +blk.0.attn_qkv.weight q4_K blk.0.ffn_down_exps.weight q2_K blk.0.ffn_down_shexp.weight q2_K +blk.1.attn_qkv.weight q4_K blk.1.ffn_down_shexp.weight q2_K blk.1.ffn_down_exps.weight q2_K +blk.2.attn_qkv.weight q4_K blk.2.ffn_down_shexp.weight q2_K blk.2.ffn_down_exps.weight q2_K blk.3.attn_output.weight iq2_xxs blk.3.attn_v.weight q4_K +blk.4.attn_qkv.weight q4_K +blk.5.attn_qkv.weight q4_K +blk.6.attn_qkv.weight q4_K blk.7.attn_output.weight iq2_xxs blk.7.attn_v.weight q4_K +blk.8.attn_qkv.weight q4_K +blk.9.attn_qkv.weight q4_K +blk.10.attn_qkv.weight q4_K blk.11.attn_output.weight iq2_xxs blk.11.attn_v.weight q4_K +blk.12.attn_qkv.weight q4_K +blk.13.attn_qkv.weight q4_K +blk.14.attn_qkv.weight q4_K blk.15.attn_output.weight iq2_xxs blk.15.attn_v.weight q4_K +blk.16.attn_qkv.weight q4_K +blk.17.attn_qkv.weight q4_K +blk.18.attn_qkv.weight q4_K blk.19.attn_output.weight iq2_xxs blk.19.attn_v.weight q4_K +blk.20.attn_qkv.weight q4_K +blk.21.attn_qkv.weight q4_K +blk.22.attn_qkv.weight q4_K blk.23.attn_output.weight iq2_xxs blk.23.attn_v.weight q4_K +blk.24.attn_qkv.weight q4_K +blk.25.attn_qkv.weight q4_K +blk.26.attn_qkv.weight q4_K blk.27.attn_output.weight iq2_xxs blk.27.attn_v.weight q4_K +blk.28.attn_qkv.weight q4_K +blk.29.attn_qkv.weight q4_K +blk.30.attn_qkv.weight q4_K blk.31.attn_output.weight iq2_xxs blk.31.attn_v.weight q4_K +blk.32.attn_qkv.weight q4_K +blk.33.attn_qkv.weight q4_K +blk.34.attn_qkv.weight q4_K blk.35.attn_output.weight iq2_xxs blk.35.attn_v.weight q4_K +blk.36.attn_qkv.weight q4_K +blk.37.attn_qkv.weight q4_K +blk.38.attn_qkv.weight q4_K blk.39.attn_output.weight iq2_xxs blk.39.attn_v.weight q4_K +blk.40.attn_qkv.weight q4_K +blk.41.attn_qkv.weight q4_K +blk.42.attn_qkv.weight q4_K blk.43.attn_output.weight iq2_xxs blk.43.attn_v.weight q4_K +blk.44.attn_qkv.weight q4_K +blk.45.attn_qkv.weight q4_K +blk.46.attn_qkv.weight q4_K blk.47.attn_output.weight iq2_xxs blk.47.attn_v.weight q4_K output.weight q5_K diff --git a/tests/snapshots/qwen3.5-27b.schema b/tests/snapshots/qwen3.5-27b.schema index f1f655a847..b5e07f01fc 100644 --- a/tests/snapshots/qwen3.5-27b.schema +++ b/tests/snapshots/qwen3.5-27b.schema @@ -23,98 +23,146 @@ output.weight q6_K [Q2_K] q2_K output.weight q6_K +blk.0.attn_qkv.weight q4_K blk.0.ffn_down.weight q3_K +blk.1.attn_qkv.weight q4_K blk.1.ffn_down.weight q3_K +blk.2.attn_qkv.weight q4_K blk.2.ffn_down.weight q3_K blk.3.attn_output.weight q3_K blk.3.attn_v.weight q4_K blk.3.ffn_down.weight q3_K +blk.4.attn_qkv.weight q4_K blk.4.ffn_down.weight q3_K +blk.5.attn_qkv.weight q4_K blk.5.ffn_down.weight q3_K +blk.6.attn_qkv.weight q4_K blk.6.ffn_down.weight q3_K blk.7.attn_output.weight q3_K blk.7.attn_v.weight q4_K blk.7.ffn_down.weight q3_K +blk.8.attn_qkv.weight q4_K blk.8.ffn_down.weight q3_K +blk.9.attn_qkv.weight q4_K blk.9.ffn_down.weight q3_K +blk.10.attn_qkv.weight q4_K blk.10.ffn_down.weight q3_K blk.11.attn_output.weight q3_K blk.11.attn_v.weight q4_K blk.11.ffn_down.weight q3_K +blk.12.attn_qkv.weight q4_K blk.12.ffn_down.weight q3_K +blk.13.attn_qkv.weight q4_K blk.13.ffn_down.weight q3_K +blk.14.attn_qkv.weight q4_K blk.14.ffn_down.weight q3_K blk.15.attn_output.weight q3_K blk.15.attn_v.weight q4_K blk.15.ffn_down.weight q3_K +blk.16.attn_qkv.weight q4_K blk.16.ffn_down.weight q3_K +blk.17.attn_qkv.weight q4_K blk.17.ffn_down.weight q3_K +blk.18.attn_qkv.weight q4_K blk.18.ffn_down.weight q3_K blk.19.attn_output.weight q3_K blk.19.attn_v.weight q4_K blk.19.ffn_down.weight q3_K +blk.20.attn_qkv.weight q4_K blk.20.ffn_down.weight q3_K +blk.21.attn_qkv.weight q4_K blk.21.ffn_down.weight q3_K +blk.22.attn_qkv.weight q4_K blk.22.ffn_down.weight q3_K blk.23.attn_output.weight q3_K blk.23.attn_v.weight q4_K blk.23.ffn_down.weight q3_K +blk.24.attn_qkv.weight q4_K blk.24.ffn_down.weight q3_K +blk.25.attn_qkv.weight q4_K blk.25.ffn_down.weight q3_K +blk.26.attn_qkv.weight q4_K blk.26.ffn_down.weight q3_K blk.27.attn_output.weight q3_K blk.27.attn_v.weight q4_K blk.27.ffn_down.weight q3_K +blk.28.attn_qkv.weight q4_K blk.28.ffn_down.weight q3_K +blk.29.attn_qkv.weight q4_K blk.29.ffn_down.weight q3_K +blk.30.attn_qkv.weight q4_K blk.30.ffn_down.weight q3_K blk.31.attn_output.weight q3_K blk.31.attn_v.weight q4_K blk.31.ffn_down.weight q3_K +blk.32.attn_qkv.weight q4_K blk.32.ffn_down.weight q3_K +blk.33.attn_qkv.weight q4_K blk.33.ffn_down.weight q3_K +blk.34.attn_qkv.weight q4_K blk.34.ffn_down.weight q3_K blk.35.attn_output.weight q3_K blk.35.attn_v.weight q4_K blk.35.ffn_down.weight q3_K +blk.36.attn_qkv.weight q4_K blk.36.ffn_down.weight q3_K +blk.37.attn_qkv.weight q4_K blk.37.ffn_down.weight q3_K +blk.38.attn_qkv.weight q4_K blk.38.ffn_down.weight q3_K blk.39.attn_output.weight q3_K blk.39.attn_v.weight q4_K blk.39.ffn_down.weight q3_K +blk.40.attn_qkv.weight q4_K blk.40.ffn_down.weight q3_K +blk.41.attn_qkv.weight q4_K blk.41.ffn_down.weight q3_K +blk.42.attn_qkv.weight q4_K blk.42.ffn_down.weight q3_K blk.43.attn_output.weight q3_K blk.43.attn_v.weight q4_K blk.43.ffn_down.weight q3_K +blk.44.attn_qkv.weight q4_K blk.44.ffn_down.weight q3_K +blk.45.attn_qkv.weight q4_K blk.45.ffn_down.weight q3_K +blk.46.attn_qkv.weight q4_K blk.46.ffn_down.weight q3_K blk.47.attn_output.weight q3_K blk.47.attn_v.weight q4_K blk.47.ffn_down.weight q3_K +blk.48.attn_qkv.weight q4_K blk.48.ffn_down.weight q3_K +blk.49.attn_qkv.weight q4_K blk.49.ffn_down.weight q3_K +blk.50.attn_qkv.weight q4_K blk.50.ffn_down.weight q3_K blk.51.attn_output.weight q3_K blk.51.attn_v.weight q4_K blk.51.ffn_down.weight q3_K +blk.52.attn_qkv.weight q4_K blk.52.ffn_down.weight q3_K +blk.53.attn_qkv.weight q4_K blk.53.ffn_down.weight q3_K +blk.54.attn_qkv.weight q4_K blk.54.ffn_down.weight q3_K blk.55.attn_output.weight q3_K blk.55.attn_v.weight q4_K blk.55.ffn_down.weight q3_K +blk.56.attn_qkv.weight q4_K blk.56.ffn_down.weight q3_K +blk.57.attn_qkv.weight q4_K blk.57.ffn_down.weight q3_K +blk.58.attn_qkv.weight q4_K blk.58.ffn_down.weight q3_K blk.59.attn_output.weight q3_K blk.59.attn_v.weight q4_K blk.59.ffn_down.weight q3_K +blk.60.attn_qkv.weight q4_K blk.60.ffn_down.weight q3_K +blk.61.attn_qkv.weight q4_K blk.61.ffn_down.weight q3_K +blk.62.attn_qkv.weight q4_K blk.62.ffn_down.weight q3_K blk.63.attn_output.weight q3_K blk.63.attn_v.weight q4_K @@ -125,14 +173,14 @@ output.weight q6_K [Q3_K_M] q3_K output.weight q6_K -blk.0.attn_qkv.weight q4_K +blk.0.attn_qkv.weight q5_K blk.0.ffn_down.weight q5_K -blk.1.attn_qkv.weight q4_K +blk.1.attn_qkv.weight q5_K blk.1.ffn_down.weight q5_K blk.2.attn_qkv.weight q4_K blk.2.ffn_down.weight q5_K blk.3.attn_output.weight q4_K -blk.3.attn_v.weight q5_K +blk.3.attn_v.weight q4_K blk.3.ffn_down.weight q5_K blk.4.attn_qkv.weight q4_K blk.4.ffn_down.weight q4_K @@ -141,7 +189,7 @@ blk.5.ffn_down.weight q4_K blk.6.attn_qkv.weight q4_K blk.6.ffn_down.weight q4_K blk.7.attn_output.weight q4_K -blk.7.attn_v.weight q5_K +blk.7.attn_v.weight q4_K blk.7.ffn_down.weight q4_K blk.8.attn_qkv.weight q4_K blk.8.ffn_down.weight q4_K @@ -272,146 +320,146 @@ blk.63.ffn_down.weight q4_K [Q3_K_L] q3_K output.weight q6_K -blk.0.attn_qkv.weight q4_K +blk.0.attn_qkv.weight q5_K blk.0.ffn_down.weight q5_K -blk.1.attn_qkv.weight q4_K +blk.1.attn_qkv.weight q5_K blk.1.ffn_down.weight q5_K -blk.2.attn_qkv.weight q4_K +blk.2.attn_qkv.weight q5_K blk.2.ffn_down.weight q5_K blk.3.attn_output.weight q5_K blk.3.attn_v.weight q5_K blk.3.ffn_down.weight q5_K -blk.4.attn_qkv.weight q4_K +blk.4.attn_qkv.weight q5_K blk.4.ffn_down.weight q5_K -blk.5.attn_qkv.weight q4_K +blk.5.attn_qkv.weight q5_K blk.5.ffn_down.weight q5_K -blk.6.attn_qkv.weight q4_K +blk.6.attn_qkv.weight q5_K blk.6.ffn_down.weight q5_K blk.7.attn_output.weight q5_K blk.7.attn_v.weight q5_K blk.7.ffn_down.weight q5_K -blk.8.attn_qkv.weight q4_K +blk.8.attn_qkv.weight q5_K blk.8.ffn_down.weight q5_K -blk.9.attn_qkv.weight q4_K +blk.9.attn_qkv.weight q5_K blk.9.ffn_down.weight q5_K -blk.10.attn_qkv.weight q4_K +blk.10.attn_qkv.weight q5_K blk.10.ffn_down.weight q5_K blk.11.attn_output.weight q5_K blk.11.attn_v.weight q5_K blk.11.ffn_down.weight q5_K -blk.12.attn_qkv.weight q4_K +blk.12.attn_qkv.weight q5_K blk.12.ffn_down.weight q5_K -blk.13.attn_qkv.weight q4_K +blk.13.attn_qkv.weight q5_K blk.13.ffn_down.weight q5_K -blk.14.attn_qkv.weight q4_K +blk.14.attn_qkv.weight q5_K blk.14.ffn_down.weight q5_K blk.15.attn_output.weight q5_K blk.15.attn_v.weight q5_K blk.15.ffn_down.weight q5_K -blk.16.attn_qkv.weight q4_K +blk.16.attn_qkv.weight q5_K blk.16.ffn_down.weight q5_K -blk.17.attn_qkv.weight q4_K +blk.17.attn_qkv.weight q5_K blk.17.ffn_down.weight q5_K -blk.18.attn_qkv.weight q4_K +blk.18.attn_qkv.weight q5_K blk.18.ffn_down.weight q5_K blk.19.attn_output.weight q5_K blk.19.attn_v.weight q5_K blk.19.ffn_down.weight q5_K -blk.20.attn_qkv.weight q4_K +blk.20.attn_qkv.weight q5_K blk.20.ffn_down.weight q5_K -blk.21.attn_qkv.weight q4_K +blk.21.attn_qkv.weight q5_K blk.21.ffn_down.weight q5_K -blk.22.attn_qkv.weight q4_K +blk.22.attn_qkv.weight q5_K blk.22.ffn_down.weight q5_K blk.23.attn_output.weight q5_K blk.23.attn_v.weight q5_K blk.23.ffn_down.weight q5_K -blk.24.attn_qkv.weight q4_K +blk.24.attn_qkv.weight q5_K blk.24.ffn_down.weight q5_K -blk.25.attn_qkv.weight q4_K +blk.25.attn_qkv.weight q5_K blk.25.ffn_down.weight q5_K -blk.26.attn_qkv.weight q4_K +blk.26.attn_qkv.weight q5_K blk.26.ffn_down.weight q5_K blk.27.attn_output.weight q5_K blk.27.attn_v.weight q5_K blk.27.ffn_down.weight q5_K -blk.28.attn_qkv.weight q4_K +blk.28.attn_qkv.weight q5_K blk.28.ffn_down.weight q5_K -blk.29.attn_qkv.weight q4_K +blk.29.attn_qkv.weight q5_K blk.29.ffn_down.weight q5_K -blk.30.attn_qkv.weight q4_K +blk.30.attn_qkv.weight q5_K blk.30.ffn_down.weight q5_K blk.31.attn_output.weight q5_K blk.31.attn_v.weight q5_K blk.31.ffn_down.weight q5_K -blk.32.attn_qkv.weight q4_K +blk.32.attn_qkv.weight q5_K blk.32.ffn_down.weight q5_K -blk.33.attn_qkv.weight q4_K +blk.33.attn_qkv.weight q5_K blk.33.ffn_down.weight q5_K -blk.34.attn_qkv.weight q4_K +blk.34.attn_qkv.weight q5_K blk.34.ffn_down.weight q5_K blk.35.attn_output.weight q5_K blk.35.attn_v.weight q5_K blk.35.ffn_down.weight q5_K -blk.36.attn_qkv.weight q4_K +blk.36.attn_qkv.weight q5_K blk.36.ffn_down.weight q5_K -blk.37.attn_qkv.weight q4_K +blk.37.attn_qkv.weight q5_K blk.37.ffn_down.weight q5_K -blk.38.attn_qkv.weight q4_K +blk.38.attn_qkv.weight q5_K blk.38.ffn_down.weight q5_K blk.39.attn_output.weight q5_K blk.39.attn_v.weight q5_K blk.39.ffn_down.weight q5_K -blk.40.attn_qkv.weight q4_K +blk.40.attn_qkv.weight q5_K blk.40.ffn_down.weight q5_K -blk.41.attn_qkv.weight q4_K +blk.41.attn_qkv.weight q5_K blk.41.ffn_down.weight q5_K -blk.42.attn_qkv.weight q4_K +blk.42.attn_qkv.weight q5_K blk.42.ffn_down.weight q5_K blk.43.attn_output.weight q5_K blk.43.attn_v.weight q5_K blk.43.ffn_down.weight q5_K -blk.44.attn_qkv.weight q4_K +blk.44.attn_qkv.weight q5_K blk.44.ffn_down.weight q5_K -blk.45.attn_qkv.weight q4_K +blk.45.attn_qkv.weight q5_K blk.45.ffn_down.weight q5_K -blk.46.attn_qkv.weight q4_K +blk.46.attn_qkv.weight q5_K blk.46.ffn_down.weight q5_K blk.47.attn_output.weight q5_K blk.47.attn_v.weight q5_K blk.47.ffn_down.weight q5_K -blk.48.attn_qkv.weight q4_K +blk.48.attn_qkv.weight q5_K blk.48.ffn_down.weight q5_K -blk.49.attn_qkv.weight q4_K +blk.49.attn_qkv.weight q5_K blk.49.ffn_down.weight q5_K -blk.50.attn_qkv.weight q4_K +blk.50.attn_qkv.weight q5_K blk.50.ffn_down.weight q5_K blk.51.attn_output.weight q5_K blk.51.attn_v.weight q5_K blk.51.ffn_down.weight q5_K -blk.52.attn_qkv.weight q4_K +blk.52.attn_qkv.weight q5_K blk.52.ffn_down.weight q5_K -blk.53.attn_qkv.weight q4_K +blk.53.attn_qkv.weight q5_K blk.53.ffn_down.weight q5_K -blk.54.attn_qkv.weight q4_K +blk.54.attn_qkv.weight q5_K blk.54.ffn_down.weight q5_K blk.55.attn_output.weight q5_K blk.55.attn_v.weight q5_K blk.55.ffn_down.weight q5_K -blk.56.attn_qkv.weight q4_K +blk.56.attn_qkv.weight q5_K blk.56.ffn_down.weight q5_K -blk.57.attn_qkv.weight q4_K +blk.57.attn_qkv.weight q5_K blk.57.ffn_down.weight q5_K -blk.58.attn_qkv.weight q4_K +blk.58.attn_qkv.weight q5_K blk.58.ffn_down.weight q5_K blk.59.attn_output.weight q5_K blk.59.attn_v.weight q5_K blk.59.ffn_down.weight q5_K -blk.60.attn_qkv.weight q4_K +blk.60.attn_qkv.weight q5_K blk.60.ffn_down.weight q5_K -blk.61.attn_qkv.weight q4_K +blk.61.attn_qkv.weight q5_K blk.61.ffn_down.weight q5_K -blk.62.attn_qkv.weight q4_K +blk.62.attn_qkv.weight q5_K blk.62.ffn_down.weight q5_K blk.63.attn_output.weight q5_K blk.63.attn_v.weight q5_K @@ -419,110 +467,84 @@ blk.63.ffn_down.weight q5_K [Q4_K_S] q4_K output.weight q6_K +blk.0.attn_qkv.weight q5_K blk.0.ffn_down.weight q5_K +blk.1.attn_qkv.weight q5_K blk.1.ffn_down.weight q5_K +blk.2.attn_qkv.weight q5_K blk.2.ffn_down.weight q5_K blk.3.attn_v.weight q5_K blk.3.ffn_down.weight q5_K blk.4.ffn_down.weight q5_K blk.5.ffn_down.weight q5_K blk.6.ffn_down.weight q5_K -blk.7.attn_v.weight q5_K blk.7.ffn_down.weight q5_K -blk.11.attn_v.weight q5_K -blk.15.attn_v.weight q5_K [Q4_K_M] q4_K output.weight q6_K -blk.0.attn_qkv.weight q5_K +blk.0.attn_qkv.weight q6_K blk.0.ffn_down.weight q6_K -blk.1.attn_qkv.weight q5_K +blk.1.attn_qkv.weight q6_K blk.1.ffn_down.weight q6_K -blk.2.attn_qkv.weight q5_K +blk.2.attn_qkv.weight q6_K blk.2.ffn_down.weight q6_K blk.3.attn_v.weight q6_K blk.3.ffn_down.weight q6_K -blk.4.attn_qkv.weight q5_K +blk.4.attn_qkv.weight q6_K blk.4.ffn_down.weight q6_K -blk.5.attn_qkv.weight q5_K +blk.5.attn_qkv.weight q6_K blk.5.ffn_down.weight q6_K -blk.6.attn_qkv.weight q5_K +blk.6.attn_qkv.weight q6_K blk.6.ffn_down.weight q6_K blk.7.attn_v.weight q6_K blk.7.ffn_down.weight q6_K -blk.8.attn_qkv.weight q5_K -blk.9.attn_qkv.weight q5_K -blk.10.attn_qkv.weight q5_K +blk.10.attn_qkv.weight q6_K blk.10.ffn_down.weight q6_K -blk.11.attn_v.weight q6_K -blk.12.attn_qkv.weight q5_K -blk.13.attn_qkv.weight q5_K +blk.13.attn_qkv.weight q6_K blk.13.ffn_down.weight q6_K -blk.14.attn_qkv.weight q5_K -blk.15.attn_v.weight q6_K -blk.16.attn_qkv.weight q5_K +blk.16.attn_qkv.weight q6_K blk.16.ffn_down.weight q6_K -blk.17.attn_qkv.weight q5_K -blk.18.attn_qkv.weight q5_K blk.19.attn_v.weight q6_K blk.19.ffn_down.weight q6_K -blk.20.attn_qkv.weight q5_K -blk.21.attn_qkv.weight q5_K -blk.22.attn_qkv.weight q5_K +blk.22.attn_qkv.weight q6_K blk.22.ffn_down.weight q6_K -blk.23.attn_v.weight q6_K -blk.24.attn_qkv.weight q5_K -blk.25.attn_qkv.weight q5_K +blk.25.attn_qkv.weight q6_K blk.25.ffn_down.weight q6_K -blk.26.attn_qkv.weight q5_K -blk.27.attn_v.weight q6_K -blk.28.attn_qkv.weight q5_K +blk.28.attn_qkv.weight q6_K blk.28.ffn_down.weight q6_K -blk.29.attn_qkv.weight q5_K -blk.30.attn_qkv.weight q5_K blk.31.attn_v.weight q6_K blk.31.ffn_down.weight q6_K -blk.32.attn_qkv.weight q5_K -blk.33.attn_qkv.weight q5_K -blk.34.attn_qkv.weight q5_K +blk.34.attn_qkv.weight q6_K blk.34.ffn_down.weight q6_K -blk.36.attn_qkv.weight q5_K -blk.37.attn_qkv.weight q5_K +blk.37.attn_qkv.weight q6_K blk.37.ffn_down.weight q6_K -blk.38.attn_qkv.weight q5_K -blk.40.attn_qkv.weight q5_K +blk.40.attn_qkv.weight q6_K blk.40.ffn_down.weight q6_K -blk.41.attn_qkv.weight q5_K -blk.42.attn_qkv.weight q5_K blk.43.attn_v.weight q6_K blk.43.ffn_down.weight q6_K -blk.44.attn_qkv.weight q5_K -blk.45.attn_qkv.weight q5_K -blk.46.attn_qkv.weight q5_K +blk.46.attn_qkv.weight q6_K blk.46.ffn_down.weight q6_K -blk.48.attn_qkv.weight q5_K -blk.49.attn_qkv.weight q5_K +blk.49.attn_qkv.weight q6_K blk.49.ffn_down.weight q6_K -blk.50.attn_qkv.weight q5_K -blk.52.attn_qkv.weight q5_K +blk.52.attn_qkv.weight q6_K blk.52.ffn_down.weight q6_K -blk.53.attn_qkv.weight q5_K -blk.54.attn_qkv.weight q5_K blk.55.attn_v.weight q6_K blk.55.ffn_down.weight q6_K -blk.56.attn_qkv.weight q5_K +blk.56.attn_qkv.weight q6_K blk.56.ffn_down.weight q6_K -blk.57.attn_qkv.weight q5_K +blk.57.attn_qkv.weight q6_K blk.57.ffn_down.weight q6_K -blk.58.attn_qkv.weight q5_K +blk.58.attn_qkv.weight q6_K blk.58.ffn_down.weight q6_K +blk.59.attn_v.weight q6_K blk.59.ffn_down.weight q6_K -blk.60.attn_qkv.weight q5_K +blk.60.attn_qkv.weight q6_K blk.60.ffn_down.weight q6_K -blk.61.attn_qkv.weight q5_K +blk.61.attn_qkv.weight q6_K blk.61.ffn_down.weight q6_K -blk.62.attn_qkv.weight q5_K +blk.62.attn_qkv.weight q6_K blk.62.ffn_down.weight q6_K +blk.63.attn_v.weight q6_K blk.63.ffn_down.weight q6_K [Q5_K_S] q5_K @@ -546,64 +568,36 @@ blk.6.attn_qkv.weight q6_K blk.6.ffn_down.weight q6_K blk.7.attn_v.weight q6_K blk.7.ffn_down.weight q6_K -blk.8.attn_qkv.weight q6_K -blk.9.attn_qkv.weight q6_K blk.10.attn_qkv.weight q6_K blk.10.ffn_down.weight q6_K -blk.11.attn_v.weight q6_K -blk.12.attn_qkv.weight q6_K blk.13.attn_qkv.weight q6_K blk.13.ffn_down.weight q6_K -blk.14.attn_qkv.weight q6_K -blk.15.attn_v.weight q6_K blk.16.attn_qkv.weight q6_K blk.16.ffn_down.weight q6_K -blk.17.attn_qkv.weight q6_K -blk.18.attn_qkv.weight q6_K blk.19.attn_v.weight q6_K blk.19.ffn_down.weight q6_K -blk.20.attn_qkv.weight q6_K -blk.21.attn_qkv.weight q6_K blk.22.attn_qkv.weight q6_K blk.22.ffn_down.weight q6_K -blk.23.attn_v.weight q6_K -blk.24.attn_qkv.weight q6_K blk.25.attn_qkv.weight q6_K blk.25.ffn_down.weight q6_K -blk.26.attn_qkv.weight q6_K -blk.27.attn_v.weight q6_K blk.28.attn_qkv.weight q6_K blk.28.ffn_down.weight q6_K -blk.29.attn_qkv.weight q6_K -blk.30.attn_qkv.weight q6_K blk.31.attn_v.weight q6_K blk.31.ffn_down.weight q6_K -blk.32.attn_qkv.weight q6_K -blk.33.attn_qkv.weight q6_K blk.34.attn_qkv.weight q6_K blk.34.ffn_down.weight q6_K -blk.36.attn_qkv.weight q6_K blk.37.attn_qkv.weight q6_K blk.37.ffn_down.weight q6_K -blk.38.attn_qkv.weight q6_K blk.40.attn_qkv.weight q6_K blk.40.ffn_down.weight q6_K -blk.41.attn_qkv.weight q6_K -blk.42.attn_qkv.weight q6_K blk.43.attn_v.weight q6_K blk.43.ffn_down.weight q6_K -blk.44.attn_qkv.weight q6_K -blk.45.attn_qkv.weight q6_K blk.46.attn_qkv.weight q6_K blk.46.ffn_down.weight q6_K -blk.48.attn_qkv.weight q6_K blk.49.attn_qkv.weight q6_K blk.49.ffn_down.weight q6_K -blk.50.attn_qkv.weight q6_K blk.52.attn_qkv.weight q6_K blk.52.ffn_down.weight q6_K -blk.53.attn_qkv.weight q6_K -blk.54.attn_qkv.weight q6_K blk.55.attn_v.weight q6_K blk.55.ffn_down.weight q6_K blk.56.attn_qkv.weight q6_K @@ -612,6 +606,7 @@ blk.57.attn_qkv.weight q6_K blk.57.ffn_down.weight q6_K blk.58.attn_qkv.weight q6_K blk.58.ffn_down.weight q6_K +blk.59.attn_v.weight q6_K blk.59.ffn_down.weight q6_K blk.60.attn_qkv.weight q6_K blk.60.ffn_down.weight q6_K @@ -619,6 +614,7 @@ blk.61.attn_qkv.weight q6_K blk.61.ffn_down.weight q6_K blk.62.attn_qkv.weight q6_K blk.62.ffn_down.weight q6_K +blk.63.attn_v.weight q6_K blk.63.ffn_down.weight q6_K [Q6_K] q6_K @@ -626,98 +622,251 @@ blk.63.ffn_down.weight q6_K [IQ2_XXS] iq2_xxs output.weight q5_K token_embd.weight q2_K +blk.0.attn_qkv.weight q4_K blk.0.ffn_down.weight q2_K +blk.1.attn_qkv.weight q4_K blk.1.ffn_down.weight q2_K +blk.2.attn_qkv.weight q4_K blk.2.ffn_down.weight q2_K blk.3.attn_v.weight q4_K blk.3.ffn_down.weight q2_K +blk.4.attn_qkv.weight q4_K blk.4.ffn_down.weight q2_K +blk.5.attn_qkv.weight q4_K blk.5.ffn_down.weight q2_K +blk.6.attn_qkv.weight q4_K blk.6.ffn_down.weight q2_K blk.7.attn_v.weight q4_K blk.7.ffn_down.weight q2_K +blk.8.attn_qkv.weight q4_K +blk.9.attn_qkv.weight q4_K +blk.10.attn_qkv.weight q4_K blk.11.attn_v.weight q4_K +blk.12.attn_qkv.weight q4_K +blk.13.attn_qkv.weight q4_K +blk.14.attn_qkv.weight q4_K blk.15.attn_v.weight q4_K +blk.16.attn_qkv.weight q4_K +blk.17.attn_qkv.weight q4_K +blk.18.attn_qkv.weight q4_K blk.19.attn_v.weight q4_K +blk.20.attn_qkv.weight q4_K +blk.21.attn_qkv.weight q4_K +blk.22.attn_qkv.weight q4_K blk.23.attn_v.weight q4_K +blk.24.attn_qkv.weight q4_K +blk.25.attn_qkv.weight q4_K +blk.26.attn_qkv.weight q4_K blk.27.attn_v.weight q4_K +blk.28.attn_qkv.weight q4_K +blk.29.attn_qkv.weight q4_K +blk.30.attn_qkv.weight q4_K blk.31.attn_v.weight q4_K +blk.32.attn_qkv.weight q4_K +blk.33.attn_qkv.weight q4_K +blk.34.attn_qkv.weight q4_K blk.35.attn_v.weight q4_K +blk.36.attn_qkv.weight q4_K +blk.37.attn_qkv.weight q4_K +blk.38.attn_qkv.weight q4_K blk.39.attn_v.weight q4_K +blk.40.attn_qkv.weight q4_K +blk.41.attn_qkv.weight q4_K +blk.42.attn_qkv.weight q4_K blk.43.attn_v.weight q4_K +blk.44.attn_qkv.weight q4_K +blk.45.attn_qkv.weight q4_K +blk.46.attn_qkv.weight q4_K blk.47.attn_v.weight q4_K +blk.48.attn_qkv.weight q4_K +blk.49.attn_qkv.weight q4_K +blk.50.attn_qkv.weight q4_K blk.51.attn_v.weight q4_K +blk.52.attn_qkv.weight q4_K +blk.53.attn_qkv.weight q4_K +blk.54.attn_qkv.weight q4_K blk.55.attn_v.weight q4_K +blk.56.attn_qkv.weight q4_K +blk.57.attn_qkv.weight q4_K +blk.58.attn_qkv.weight q4_K blk.59.attn_v.weight q4_K +blk.60.attn_qkv.weight q4_K +blk.61.attn_qkv.weight q4_K +blk.62.attn_qkv.weight q4_K blk.63.attn_v.weight q4_K [IQ2_XS] iq2_xs output.weight q5_K token_embd.weight q2_K +blk.0.attn_qkv.weight q4_K blk.0.ffn_down.weight q2_K +blk.1.attn_qkv.weight q4_K blk.1.ffn_down.weight q2_K +blk.2.attn_qkv.weight q4_K blk.2.ffn_down.weight q2_K blk.3.attn_v.weight q4_K blk.3.ffn_down.weight q2_K +blk.4.attn_qkv.weight q4_K blk.4.ffn_down.weight q2_K +blk.5.attn_qkv.weight q4_K blk.5.ffn_down.weight q2_K +blk.6.attn_qkv.weight q4_K blk.6.ffn_down.weight q2_K blk.7.attn_v.weight q4_K blk.7.ffn_down.weight q2_K +blk.8.attn_qkv.weight q4_K +blk.9.attn_qkv.weight q4_K +blk.10.attn_qkv.weight q4_K blk.11.attn_v.weight q4_K +blk.12.attn_qkv.weight q4_K +blk.13.attn_qkv.weight q4_K +blk.14.attn_qkv.weight q4_K blk.15.attn_v.weight q4_K +blk.16.attn_qkv.weight q4_K +blk.17.attn_qkv.weight q4_K +blk.18.attn_qkv.weight q4_K blk.19.attn_v.weight q4_K +blk.20.attn_qkv.weight q4_K +blk.21.attn_qkv.weight q4_K +blk.22.attn_qkv.weight q4_K blk.23.attn_v.weight q4_K +blk.24.attn_qkv.weight q4_K +blk.25.attn_qkv.weight q4_K +blk.26.attn_qkv.weight q4_K blk.27.attn_v.weight q4_K +blk.28.attn_qkv.weight q4_K +blk.29.attn_qkv.weight q4_K +blk.30.attn_qkv.weight q4_K blk.31.attn_v.weight q4_K +blk.32.attn_qkv.weight q4_K +blk.33.attn_qkv.weight q4_K +blk.34.attn_qkv.weight q4_K blk.35.attn_v.weight q4_K +blk.36.attn_qkv.weight q4_K +blk.37.attn_qkv.weight q4_K +blk.38.attn_qkv.weight q4_K blk.39.attn_v.weight q4_K +blk.40.attn_qkv.weight q4_K +blk.41.attn_qkv.weight q4_K +blk.42.attn_qkv.weight q4_K blk.43.attn_v.weight q4_K +blk.44.attn_qkv.weight q4_K +blk.45.attn_qkv.weight q4_K +blk.46.attn_qkv.weight q4_K blk.47.attn_v.weight q4_K +blk.48.attn_qkv.weight q4_K +blk.49.attn_qkv.weight q4_K +blk.50.attn_qkv.weight q4_K blk.51.attn_v.weight q4_K +blk.52.attn_qkv.weight q4_K +blk.53.attn_qkv.weight q4_K +blk.54.attn_qkv.weight q4_K blk.55.attn_v.weight q4_K +blk.56.attn_qkv.weight q4_K +blk.57.attn_qkv.weight q4_K +blk.58.attn_qkv.weight q4_K blk.59.attn_v.weight q4_K +blk.60.attn_qkv.weight q4_K +blk.61.attn_qkv.weight q4_K +blk.62.attn_qkv.weight q4_K blk.63.attn_v.weight q4_K [Q2_K_S] q2_K output.weight q6_K +blk.0.attn_qkv.weight q4_K blk.0.ffn_down.weight q4_K +blk.1.attn_qkv.weight q4_K blk.1.ffn_down.weight q4_K +blk.2.attn_qkv.weight q4_K blk.2.ffn_down.weight q4_K blk.3.attn_v.weight q4_K blk.3.ffn_down.weight q4_K +blk.4.attn_qkv.weight q4_K blk.4.ffn_down.weight q4_K +blk.5.attn_qkv.weight q4_K blk.5.ffn_down.weight q4_K +blk.6.attn_qkv.weight q4_K blk.6.ffn_down.weight q4_K blk.7.attn_v.weight q4_K blk.7.ffn_down.weight q4_K +blk.8.attn_qkv.weight q4_K +blk.9.attn_qkv.weight q4_K +blk.10.attn_qkv.weight q4_K blk.11.attn_v.weight q4_K +blk.12.attn_qkv.weight q4_K +blk.13.attn_qkv.weight q4_K +blk.14.attn_qkv.weight q4_K blk.15.attn_v.weight q4_K +blk.16.attn_qkv.weight q4_K +blk.17.attn_qkv.weight q4_K +blk.18.attn_qkv.weight q4_K blk.19.attn_v.weight q4_K +blk.20.attn_qkv.weight q4_K +blk.21.attn_qkv.weight q4_K +blk.22.attn_qkv.weight q4_K blk.23.attn_v.weight q4_K +blk.24.attn_qkv.weight q4_K +blk.25.attn_qkv.weight q4_K +blk.26.attn_qkv.weight q4_K blk.27.attn_v.weight q4_K +blk.28.attn_qkv.weight q4_K +blk.29.attn_qkv.weight q4_K +blk.30.attn_qkv.weight q4_K blk.31.attn_v.weight q4_K +blk.32.attn_qkv.weight q4_K +blk.33.attn_qkv.weight q4_K +blk.34.attn_qkv.weight q4_K blk.35.attn_v.weight q4_K +blk.36.attn_qkv.weight q4_K +blk.37.attn_qkv.weight q4_K +blk.38.attn_qkv.weight q4_K blk.39.attn_v.weight q4_K +blk.40.attn_qkv.weight q4_K +blk.41.attn_qkv.weight q4_K +blk.42.attn_qkv.weight q4_K blk.43.attn_v.weight q4_K +blk.44.attn_qkv.weight q4_K +blk.45.attn_qkv.weight q4_K +blk.46.attn_qkv.weight q4_K blk.47.attn_v.weight q4_K +blk.48.attn_qkv.weight q4_K +blk.49.attn_qkv.weight q4_K +blk.50.attn_qkv.weight q4_K blk.51.attn_v.weight q4_K +blk.52.attn_qkv.weight q4_K +blk.53.attn_qkv.weight q4_K +blk.54.attn_qkv.weight q4_K blk.55.attn_v.weight q4_K +blk.56.attn_qkv.weight q4_K +blk.57.attn_qkv.weight q4_K +blk.58.attn_qkv.weight q4_K blk.59.attn_v.weight q4_K +blk.60.attn_qkv.weight q4_K +blk.61.attn_qkv.weight q4_K +blk.62.attn_qkv.weight q4_K blk.63.attn_v.weight q4_K [IQ3_XS] iq3_s output.weight q6_K +blk.0.attn_qkv.weight q4_K +blk.1.attn_qkv.weight q4_K +blk.2.attn_qkv.weight q4_K blk.3.attn_k.weight iq3_xxs blk.3.attn_q.weight iq3_xxs blk.3.attn_v.weight q4_K +blk.4.attn_qkv.weight q4_K +blk.5.attn_qkv.weight q4_K +blk.6.attn_qkv.weight q4_K blk.7.attn_k.weight iq3_xxs blk.7.attn_q.weight iq3_xxs blk.7.attn_v.weight q4_K +blk.8.attn_qkv.weight q4_K blk.8.ffn_gate.weight iq3_xxs blk.8.ffn_up.weight iq3_xxs +blk.9.attn_qkv.weight q4_K blk.9.ffn_gate.weight iq3_xxs blk.9.ffn_up.weight iq3_xxs +blk.10.attn_qkv.weight q4_K blk.10.ffn_gate.weight iq3_xxs blk.10.ffn_up.weight iq3_xxs blk.11.attn_k.weight iq3_xxs @@ -725,10 +874,13 @@ blk.11.attn_q.weight iq3_xxs blk.11.attn_v.weight q4_K blk.11.ffn_gate.weight iq3_xxs blk.11.ffn_up.weight iq3_xxs +blk.12.attn_qkv.weight q4_K blk.12.ffn_gate.weight iq3_xxs blk.12.ffn_up.weight iq3_xxs +blk.13.attn_qkv.weight q4_K blk.13.ffn_gate.weight iq3_xxs blk.13.ffn_up.weight iq3_xxs +blk.14.attn_qkv.weight q4_K blk.14.ffn_gate.weight iq3_xxs blk.14.ffn_up.weight iq3_xxs blk.15.attn_k.weight iq3_xxs @@ -736,10 +888,13 @@ blk.15.attn_q.weight iq3_xxs blk.15.attn_v.weight q4_K blk.15.ffn_gate.weight iq3_xxs blk.15.ffn_up.weight iq3_xxs +blk.16.attn_qkv.weight q4_K blk.16.ffn_gate.weight iq3_xxs blk.16.ffn_up.weight iq3_xxs +blk.17.attn_qkv.weight q4_K blk.17.ffn_gate.weight iq3_xxs blk.17.ffn_up.weight iq3_xxs +blk.18.attn_qkv.weight q4_K blk.18.ffn_gate.weight iq3_xxs blk.18.ffn_up.weight iq3_xxs blk.19.attn_k.weight iq3_xxs @@ -747,10 +902,13 @@ blk.19.attn_q.weight iq3_xxs blk.19.attn_v.weight q4_K blk.19.ffn_gate.weight iq3_xxs blk.19.ffn_up.weight iq3_xxs +blk.20.attn_qkv.weight q4_K blk.20.ffn_gate.weight iq3_xxs blk.20.ffn_up.weight iq3_xxs +blk.21.attn_qkv.weight q4_K blk.21.ffn_gate.weight iq3_xxs blk.21.ffn_up.weight iq3_xxs +blk.22.attn_qkv.weight q4_K blk.22.ffn_gate.weight iq3_xxs blk.22.ffn_up.weight iq3_xxs blk.23.attn_k.weight iq3_xxs @@ -758,10 +916,13 @@ blk.23.attn_q.weight iq3_xxs blk.23.attn_v.weight q4_K blk.23.ffn_gate.weight iq3_xxs blk.23.ffn_up.weight iq3_xxs +blk.24.attn_qkv.weight q4_K blk.24.ffn_gate.weight iq3_xxs blk.24.ffn_up.weight iq3_xxs +blk.25.attn_qkv.weight q4_K blk.25.ffn_gate.weight iq3_xxs blk.25.ffn_up.weight iq3_xxs +blk.26.attn_qkv.weight q4_K blk.26.ffn_gate.weight iq3_xxs blk.26.ffn_up.weight iq3_xxs blk.27.attn_k.weight iq3_xxs @@ -769,10 +930,13 @@ blk.27.attn_q.weight iq3_xxs blk.27.attn_v.weight q4_K blk.27.ffn_gate.weight iq3_xxs blk.27.ffn_up.weight iq3_xxs +blk.28.attn_qkv.weight q4_K blk.28.ffn_gate.weight iq3_xxs blk.28.ffn_up.weight iq3_xxs +blk.29.attn_qkv.weight q4_K blk.29.ffn_gate.weight iq3_xxs blk.29.ffn_up.weight iq3_xxs +blk.30.attn_qkv.weight q4_K blk.30.ffn_gate.weight iq3_xxs blk.30.ffn_up.weight iq3_xxs blk.31.attn_k.weight iq3_xxs @@ -780,10 +944,13 @@ blk.31.attn_q.weight iq3_xxs blk.31.attn_v.weight q4_K blk.31.ffn_gate.weight iq3_xxs blk.31.ffn_up.weight iq3_xxs +blk.32.attn_qkv.weight q4_K blk.32.ffn_gate.weight iq3_xxs blk.32.ffn_up.weight iq3_xxs +blk.33.attn_qkv.weight q4_K blk.33.ffn_gate.weight iq3_xxs blk.33.ffn_up.weight iq3_xxs +blk.34.attn_qkv.weight q4_K blk.34.ffn_gate.weight iq3_xxs blk.34.ffn_up.weight iq3_xxs blk.35.attn_k.weight iq3_xxs @@ -791,10 +958,13 @@ blk.35.attn_q.weight iq3_xxs blk.35.attn_v.weight q4_K blk.35.ffn_gate.weight iq3_xxs blk.35.ffn_up.weight iq3_xxs +blk.36.attn_qkv.weight q4_K blk.36.ffn_gate.weight iq3_xxs blk.36.ffn_up.weight iq3_xxs +blk.37.attn_qkv.weight q4_K blk.37.ffn_gate.weight iq3_xxs blk.37.ffn_up.weight iq3_xxs +blk.38.attn_qkv.weight q4_K blk.38.ffn_gate.weight iq3_xxs blk.38.ffn_up.weight iq3_xxs blk.39.attn_k.weight iq3_xxs @@ -802,10 +972,13 @@ blk.39.attn_q.weight iq3_xxs blk.39.attn_v.weight q4_K blk.39.ffn_gate.weight iq3_xxs blk.39.ffn_up.weight iq3_xxs +blk.40.attn_qkv.weight q4_K blk.40.ffn_gate.weight iq3_xxs blk.40.ffn_up.weight iq3_xxs +blk.41.attn_qkv.weight q4_K blk.41.ffn_gate.weight iq3_xxs blk.41.ffn_up.weight iq3_xxs +blk.42.attn_qkv.weight q4_K blk.42.ffn_gate.weight iq3_xxs blk.42.ffn_up.weight iq3_xxs blk.43.attn_k.weight iq3_xxs @@ -813,10 +986,13 @@ blk.43.attn_q.weight iq3_xxs blk.43.attn_v.weight q4_K blk.43.ffn_gate.weight iq3_xxs blk.43.ffn_up.weight iq3_xxs +blk.44.attn_qkv.weight q4_K blk.44.ffn_gate.weight iq3_xxs blk.44.ffn_up.weight iq3_xxs +blk.45.attn_qkv.weight q4_K blk.45.ffn_gate.weight iq3_xxs blk.45.ffn_up.weight iq3_xxs +blk.46.attn_qkv.weight q4_K blk.46.ffn_gate.weight iq3_xxs blk.46.ffn_up.weight iq3_xxs blk.47.attn_k.weight iq3_xxs @@ -824,10 +1000,13 @@ blk.47.attn_q.weight iq3_xxs blk.47.attn_v.weight q4_K blk.47.ffn_gate.weight iq3_xxs blk.47.ffn_up.weight iq3_xxs +blk.48.attn_qkv.weight q4_K blk.48.ffn_gate.weight iq3_xxs blk.48.ffn_up.weight iq3_xxs +blk.49.attn_qkv.weight q4_K blk.49.ffn_gate.weight iq3_xxs blk.49.ffn_up.weight iq3_xxs +blk.50.attn_qkv.weight q4_K blk.50.ffn_gate.weight iq3_xxs blk.50.ffn_up.weight iq3_xxs blk.51.attn_k.weight iq3_xxs @@ -835,10 +1014,13 @@ blk.51.attn_q.weight iq3_xxs blk.51.attn_v.weight q4_K blk.51.ffn_gate.weight iq3_xxs blk.51.ffn_up.weight iq3_xxs +blk.52.attn_qkv.weight q4_K blk.52.ffn_gate.weight iq3_xxs blk.52.ffn_up.weight iq3_xxs +blk.53.attn_qkv.weight q4_K blk.53.ffn_gate.weight iq3_xxs blk.53.ffn_up.weight iq3_xxs +blk.54.attn_qkv.weight q4_K blk.54.ffn_gate.weight iq3_xxs blk.54.ffn_up.weight iq3_xxs blk.55.attn_k.weight iq3_xxs @@ -846,9 +1028,15 @@ blk.55.attn_q.weight iq3_xxs blk.55.attn_v.weight q4_K blk.55.ffn_gate.weight iq3_xxs blk.55.ffn_up.weight iq3_xxs +blk.56.attn_qkv.weight q4_K +blk.57.attn_qkv.weight q4_K +blk.58.attn_qkv.weight q4_K blk.59.attn_k.weight iq3_xxs blk.59.attn_q.weight iq3_xxs blk.59.attn_v.weight q4_K +blk.60.attn_qkv.weight q4_K +blk.61.attn_qkv.weight q4_K +blk.62.attn_qkv.weight q4_K blk.63.attn_k.weight iq3_xxs blk.63.attn_q.weight iq3_xxs blk.63.attn_v.weight q4_K @@ -856,128 +1044,176 @@ blk.63.attn_v.weight q4_K [IQ3_XXS] iq3_xxs output.weight q5_K token_embd.weight iq3_s +blk.0.attn_qkv.weight q4_K blk.0.ffn_down.weight q4_K +blk.1.attn_qkv.weight q4_K blk.1.ffn_down.weight q4_K +blk.2.attn_qkv.weight q4_K blk.2.ffn_down.weight q4_K blk.3.attn_k.weight iq2_s blk.3.attn_output.weight iq3_s blk.3.attn_q.weight iq2_s blk.3.attn_v.weight q4_K blk.3.ffn_down.weight q4_K +blk.4.attn_qkv.weight q4_K blk.4.ffn_down.weight q4_K +blk.5.attn_qkv.weight q4_K blk.5.ffn_down.weight q4_K +blk.6.attn_qkv.weight q4_K blk.6.ffn_down.weight q4_K blk.7.attn_k.weight iq2_s blk.7.attn_output.weight iq3_s blk.7.attn_q.weight iq2_s blk.7.attn_v.weight q4_K blk.7.ffn_down.weight q4_K +blk.8.attn_qkv.weight q4_K blk.8.ffn_down.weight q3_K +blk.9.attn_qkv.weight q4_K blk.9.ffn_down.weight q3_K +blk.10.attn_qkv.weight q4_K blk.10.ffn_down.weight q3_K blk.11.attn_k.weight iq2_s blk.11.attn_output.weight iq3_s blk.11.attn_q.weight iq2_s blk.11.attn_v.weight q4_K blk.11.ffn_down.weight q3_K +blk.12.attn_qkv.weight q4_K blk.12.ffn_down.weight q3_K +blk.13.attn_qkv.weight q4_K blk.13.ffn_down.weight q3_K +blk.14.attn_qkv.weight q4_K blk.14.ffn_down.weight q3_K blk.15.attn_k.weight iq2_s blk.15.attn_output.weight iq3_s blk.15.attn_q.weight iq2_s blk.15.attn_v.weight q4_K blk.15.ffn_down.weight q3_K +blk.16.attn_qkv.weight q4_K blk.16.ffn_down.weight q3_K +blk.17.attn_qkv.weight q4_K blk.17.ffn_down.weight q3_K +blk.18.attn_qkv.weight q4_K blk.18.ffn_down.weight q3_K blk.19.attn_k.weight iq2_s blk.19.attn_output.weight iq3_s blk.19.attn_q.weight iq2_s blk.19.attn_v.weight q4_K blk.19.ffn_down.weight q3_K +blk.20.attn_qkv.weight q4_K blk.20.ffn_down.weight q3_K +blk.21.attn_qkv.weight q4_K blk.21.ffn_down.weight q3_K +blk.22.attn_qkv.weight q4_K blk.22.ffn_down.weight q3_K blk.23.attn_k.weight iq2_s blk.23.attn_output.weight iq3_s blk.23.attn_q.weight iq2_s blk.23.attn_v.weight q4_K blk.23.ffn_down.weight q3_K +blk.24.attn_qkv.weight q4_K blk.24.ffn_down.weight q3_K +blk.25.attn_qkv.weight q4_K blk.25.ffn_down.weight q3_K +blk.26.attn_qkv.weight q4_K blk.26.ffn_down.weight q3_K blk.27.attn_k.weight iq2_s blk.27.attn_output.weight iq3_s blk.27.attn_q.weight iq2_s blk.27.attn_v.weight q4_K blk.27.ffn_down.weight q3_K +blk.28.attn_qkv.weight q4_K blk.28.ffn_down.weight q3_K +blk.29.attn_qkv.weight q4_K blk.29.ffn_down.weight q3_K +blk.30.attn_qkv.weight q4_K blk.30.ffn_down.weight q3_K blk.31.attn_k.weight iq2_s blk.31.attn_output.weight iq3_s blk.31.attn_q.weight iq2_s blk.31.attn_v.weight q4_K blk.31.ffn_down.weight q3_K +blk.32.attn_qkv.weight q4_K blk.32.ffn_down.weight q3_K +blk.33.attn_qkv.weight q4_K blk.33.ffn_down.weight q3_K +blk.34.attn_qkv.weight q4_K blk.34.ffn_down.weight q3_K blk.35.attn_k.weight iq2_s blk.35.attn_output.weight iq3_s blk.35.attn_q.weight iq2_s blk.35.attn_v.weight q4_K blk.35.ffn_down.weight q3_K +blk.36.attn_qkv.weight q4_K blk.36.ffn_down.weight q3_K +blk.37.attn_qkv.weight q4_K blk.37.ffn_down.weight q3_K +blk.38.attn_qkv.weight q4_K blk.38.ffn_down.weight q3_K blk.39.attn_k.weight iq2_s blk.39.attn_output.weight iq3_s blk.39.attn_q.weight iq2_s blk.39.attn_v.weight q4_K blk.39.ffn_down.weight q3_K +blk.40.attn_qkv.weight q4_K blk.40.ffn_down.weight q3_K +blk.41.attn_qkv.weight q4_K blk.41.ffn_down.weight q3_K +blk.42.attn_qkv.weight q4_K blk.42.ffn_down.weight q3_K blk.43.attn_k.weight iq2_s blk.43.attn_output.weight iq3_s blk.43.attn_q.weight iq2_s blk.43.attn_v.weight q4_K blk.43.ffn_down.weight q3_K +blk.44.attn_qkv.weight q4_K blk.44.ffn_down.weight q3_K +blk.45.attn_qkv.weight q4_K blk.45.ffn_down.weight q3_K +blk.46.attn_qkv.weight q4_K blk.46.ffn_down.weight q3_K blk.47.attn_k.weight iq2_s blk.47.attn_output.weight iq3_s blk.47.attn_q.weight iq2_s blk.47.attn_v.weight q4_K blk.47.ffn_down.weight q3_K +blk.48.attn_qkv.weight q4_K blk.48.ffn_down.weight q3_K +blk.49.attn_qkv.weight q4_K blk.49.ffn_down.weight q3_K +blk.50.attn_qkv.weight q4_K blk.50.ffn_down.weight q3_K blk.51.attn_k.weight iq2_s blk.51.attn_output.weight iq3_s blk.51.attn_q.weight iq2_s blk.51.attn_v.weight q4_K blk.51.ffn_down.weight q3_K +blk.52.attn_qkv.weight q4_K blk.52.ffn_down.weight q3_K +blk.53.attn_qkv.weight q4_K blk.53.ffn_down.weight q3_K +blk.54.attn_qkv.weight q4_K blk.54.ffn_down.weight q3_K blk.55.attn_k.weight iq2_s blk.55.attn_output.weight iq3_s blk.55.attn_q.weight iq2_s blk.55.attn_v.weight q4_K blk.55.ffn_down.weight q3_K +blk.56.attn_qkv.weight q4_K blk.56.ffn_down.weight q3_K +blk.57.attn_qkv.weight q4_K blk.57.ffn_down.weight q3_K +blk.58.attn_qkv.weight q4_K blk.58.ffn_down.weight q3_K blk.59.attn_k.weight iq2_s blk.59.attn_output.weight iq3_s blk.59.attn_q.weight iq2_s blk.59.attn_v.weight q4_K blk.59.ffn_down.weight q3_K +blk.60.attn_qkv.weight q4_K blk.60.ffn_down.weight q3_K +blk.61.attn_qkv.weight q4_K blk.61.ffn_down.weight q3_K +blk.62.attn_qkv.weight q4_K blk.62.ffn_down.weight q3_K blk.63.attn_k.weight iq2_s blk.63.attn_output.weight iq3_s @@ -988,91 +1224,235 @@ blk.63.ffn_down.weight q3_K [IQ1_S] iq1_s output.weight q5_K token_embd.weight q2_K +blk.0.attn_qkv.weight q4_K blk.0.ffn_down.weight q2_K +blk.1.attn_qkv.weight q4_K blk.1.ffn_down.weight q2_K +blk.2.attn_qkv.weight q4_K blk.2.ffn_down.weight q2_K blk.3.attn_output.weight iq2_xxs blk.3.attn_v.weight q4_K blk.3.ffn_down.weight q2_K +blk.4.attn_qkv.weight q4_K blk.4.ffn_down.weight q2_K +blk.5.attn_qkv.weight q4_K blk.5.ffn_down.weight q2_K +blk.6.attn_qkv.weight q4_K blk.6.ffn_down.weight q2_K blk.7.attn_output.weight iq2_xxs blk.7.attn_v.weight q4_K blk.7.ffn_down.weight q2_K +blk.8.attn_qkv.weight q4_K +blk.9.attn_qkv.weight q4_K +blk.10.attn_qkv.weight q4_K blk.11.attn_output.weight iq2_xxs blk.11.attn_v.weight q4_K +blk.12.attn_qkv.weight q4_K +blk.13.attn_qkv.weight q4_K +blk.14.attn_qkv.weight q4_K blk.15.attn_output.weight iq2_xxs blk.15.attn_v.weight q4_K +blk.16.attn_qkv.weight q4_K +blk.17.attn_qkv.weight q4_K +blk.18.attn_qkv.weight q4_K blk.19.attn_output.weight iq2_xxs blk.19.attn_v.weight q4_K +blk.20.attn_qkv.weight q4_K +blk.21.attn_qkv.weight q4_K +blk.22.attn_qkv.weight q4_K blk.23.attn_output.weight iq2_xxs blk.23.attn_v.weight q4_K +blk.24.attn_qkv.weight q4_K +blk.25.attn_qkv.weight q4_K +blk.26.attn_qkv.weight q4_K blk.27.attn_output.weight iq2_xxs blk.27.attn_v.weight q4_K +blk.28.attn_qkv.weight q4_K +blk.29.attn_qkv.weight q4_K +blk.30.attn_qkv.weight q4_K blk.31.attn_output.weight iq2_xxs blk.31.attn_v.weight q4_K +blk.32.attn_qkv.weight q4_K +blk.33.attn_qkv.weight q4_K +blk.34.attn_qkv.weight q4_K blk.35.attn_output.weight iq2_xxs blk.35.attn_v.weight q4_K +blk.36.attn_qkv.weight q4_K +blk.37.attn_qkv.weight q4_K +blk.38.attn_qkv.weight q4_K blk.39.attn_output.weight iq2_xxs blk.39.attn_v.weight q4_K +blk.40.attn_qkv.weight q4_K +blk.41.attn_qkv.weight q4_K +blk.42.attn_qkv.weight q4_K blk.43.attn_output.weight iq2_xxs blk.43.attn_v.weight q4_K +blk.44.attn_qkv.weight q4_K +blk.45.attn_qkv.weight q4_K +blk.46.attn_qkv.weight q4_K blk.47.attn_output.weight iq2_xxs blk.47.attn_v.weight q4_K +blk.48.attn_qkv.weight q4_K +blk.49.attn_qkv.weight q4_K +blk.50.attn_qkv.weight q4_K blk.51.attn_output.weight iq2_xxs blk.51.attn_v.weight q4_K +blk.52.attn_qkv.weight q4_K +blk.53.attn_qkv.weight q4_K +blk.54.attn_qkv.weight q4_K blk.55.attn_output.weight iq2_xxs blk.55.attn_v.weight q4_K +blk.56.attn_qkv.weight q4_K +blk.57.attn_qkv.weight q4_K +blk.58.attn_qkv.weight q4_K blk.59.attn_output.weight iq2_xxs blk.59.attn_v.weight q4_K +blk.60.attn_qkv.weight q4_K +blk.61.attn_qkv.weight q4_K +blk.62.attn_qkv.weight q4_K blk.63.attn_output.weight iq2_xxs blk.63.attn_v.weight q4_K [IQ4_NL] iq4_nl output.weight q6_K +blk.0.attn_qkv.weight q5_K blk.0.ffn_down.weight q5_K +blk.1.attn_qkv.weight q5_K blk.1.ffn_down.weight q5_K +blk.2.attn_qkv.weight q5_K blk.2.ffn_down.weight q5_K blk.3.attn_v.weight q5_K blk.3.ffn_down.weight q5_K +blk.4.attn_qkv.weight q5_K blk.4.ffn_down.weight q5_K +blk.5.attn_qkv.weight q5_K blk.5.ffn_down.weight q5_K +blk.6.attn_qkv.weight q5_K blk.6.ffn_down.weight q5_K blk.7.attn_v.weight q5_K blk.7.ffn_down.weight q5_K +blk.8.attn_qkv.weight q5_K +blk.9.attn_qkv.weight q5_K +blk.10.attn_qkv.weight q5_K blk.11.attn_v.weight q5_K +blk.12.attn_qkv.weight q5_K +blk.13.attn_qkv.weight q5_K +blk.14.attn_qkv.weight q5_K blk.15.attn_v.weight q5_K +blk.16.attn_qkv.weight q5_K +blk.17.attn_qkv.weight q5_K +blk.18.attn_qkv.weight q5_K blk.19.attn_v.weight q5_K +blk.20.attn_qkv.weight q5_K +blk.21.attn_qkv.weight q5_K +blk.22.attn_qkv.weight q5_K blk.23.attn_v.weight q5_K +blk.24.attn_qkv.weight q5_K +blk.25.attn_qkv.weight q5_K +blk.26.attn_qkv.weight q5_K blk.27.attn_v.weight q5_K +blk.28.attn_qkv.weight q5_K +blk.29.attn_qkv.weight q5_K +blk.30.attn_qkv.weight q5_K blk.31.attn_v.weight q5_K +blk.32.attn_qkv.weight q5_K +blk.33.attn_qkv.weight q5_K +blk.34.attn_qkv.weight q5_K blk.35.attn_v.weight q5_K +blk.36.attn_qkv.weight q5_K +blk.37.attn_qkv.weight q5_K +blk.38.attn_qkv.weight q5_K blk.39.attn_v.weight q5_K +blk.40.attn_qkv.weight q5_K +blk.41.attn_qkv.weight q5_K +blk.42.attn_qkv.weight q5_K blk.43.attn_v.weight q5_K +blk.44.attn_qkv.weight q5_K +blk.45.attn_qkv.weight q5_K +blk.46.attn_qkv.weight q5_K blk.47.attn_v.weight q5_K +blk.48.attn_qkv.weight q5_K +blk.49.attn_qkv.weight q5_K +blk.50.attn_qkv.weight q5_K blk.51.attn_v.weight q5_K +blk.52.attn_qkv.weight q5_K +blk.53.attn_qkv.weight q5_K +blk.54.attn_qkv.weight q5_K blk.55.attn_v.weight q5_K +blk.56.attn_qkv.weight q5_K +blk.57.attn_qkv.weight q5_K +blk.58.attn_qkv.weight q5_K blk.59.attn_v.weight q5_K +blk.60.attn_qkv.weight q5_K +blk.61.attn_qkv.weight q5_K +blk.62.attn_qkv.weight q5_K blk.63.attn_v.weight q5_K [IQ3_S] iq3_s output.weight q6_K +blk.0.attn_qkv.weight q4_K +blk.1.attn_qkv.weight q4_K +blk.2.attn_qkv.weight q4_K blk.3.attn_v.weight q4_K +blk.4.attn_qkv.weight q4_K +blk.5.attn_qkv.weight q4_K +blk.6.attn_qkv.weight q4_K blk.7.attn_v.weight q4_K +blk.8.attn_qkv.weight q4_K +blk.9.attn_qkv.weight q4_K +blk.10.attn_qkv.weight q4_K blk.11.attn_v.weight q4_K +blk.12.attn_qkv.weight q4_K +blk.13.attn_qkv.weight q4_K +blk.14.attn_qkv.weight q4_K blk.15.attn_v.weight q4_K +blk.16.attn_qkv.weight q4_K +blk.17.attn_qkv.weight q4_K +blk.18.attn_qkv.weight q4_K blk.19.attn_v.weight q4_K +blk.20.attn_qkv.weight q4_K +blk.21.attn_qkv.weight q4_K +blk.22.attn_qkv.weight q4_K blk.23.attn_v.weight q4_K +blk.24.attn_qkv.weight q4_K +blk.25.attn_qkv.weight q4_K +blk.26.attn_qkv.weight q4_K blk.27.attn_v.weight q4_K +blk.28.attn_qkv.weight q4_K +blk.29.attn_qkv.weight q4_K +blk.30.attn_qkv.weight q4_K blk.31.attn_v.weight q4_K +blk.32.attn_qkv.weight q4_K +blk.33.attn_qkv.weight q4_K +blk.34.attn_qkv.weight q4_K blk.35.attn_v.weight q4_K +blk.36.attn_qkv.weight q4_K +blk.37.attn_qkv.weight q4_K +blk.38.attn_qkv.weight q4_K blk.39.attn_v.weight q4_K +blk.40.attn_qkv.weight q4_K +blk.41.attn_qkv.weight q4_K +blk.42.attn_qkv.weight q4_K blk.43.attn_v.weight q4_K +blk.44.attn_qkv.weight q4_K +blk.45.attn_qkv.weight q4_K +blk.46.attn_qkv.weight q4_K blk.47.attn_v.weight q4_K +blk.48.attn_qkv.weight q4_K +blk.49.attn_qkv.weight q4_K +blk.50.attn_qkv.weight q4_K blk.51.attn_v.weight q4_K +blk.52.attn_qkv.weight q4_K +blk.53.attn_qkv.weight q4_K +blk.54.attn_qkv.weight q4_K blk.55.attn_v.weight q4_K +blk.56.attn_qkv.weight q4_K +blk.57.attn_qkv.weight q4_K +blk.58.attn_qkv.weight q4_K blk.59.attn_v.weight q4_K +blk.60.attn_qkv.weight q4_K +blk.61.attn_qkv.weight q4_K +blk.62.attn_qkv.weight q4_K blk.63.attn_v.weight q4_K [IQ3_M] iq3_s @@ -1169,159 +1549,351 @@ blk.63.attn_v.weight q4_K [IQ2_S] iq2_xs output.weight q5_K token_embd.weight iq3_s +blk.0.attn_qkv.weight q4_K blk.0.ffn_down.weight iq3_s +blk.1.attn_qkv.weight q4_K blk.1.ffn_down.weight iq3_s +blk.2.attn_qkv.weight q4_K blk.2.ffn_down.weight iq3_s blk.3.attn_output.weight iq3_s blk.3.attn_v.weight q4_K blk.3.ffn_down.weight iq3_s +blk.4.attn_qkv.weight q4_K blk.4.ffn_down.weight iq3_s +blk.5.attn_qkv.weight q4_K blk.5.ffn_down.weight iq3_s +blk.6.attn_qkv.weight q4_K blk.6.ffn_down.weight iq3_s blk.7.attn_output.weight iq3_s blk.7.attn_v.weight q4_K blk.7.ffn_down.weight iq3_s +blk.8.attn_qkv.weight q4_K +blk.9.attn_qkv.weight q4_K +blk.10.attn_qkv.weight q4_K blk.11.attn_output.weight iq3_s blk.11.attn_v.weight q4_K +blk.12.attn_qkv.weight q4_K +blk.13.attn_qkv.weight q4_K +blk.14.attn_qkv.weight q4_K blk.15.attn_output.weight iq3_s blk.15.attn_v.weight q4_K +blk.16.attn_qkv.weight q4_K +blk.17.attn_qkv.weight q4_K +blk.18.attn_qkv.weight q4_K blk.19.attn_output.weight iq3_s blk.19.attn_v.weight q4_K +blk.20.attn_qkv.weight q4_K +blk.21.attn_qkv.weight q4_K +blk.22.attn_qkv.weight q4_K blk.23.attn_output.weight iq3_s blk.23.attn_v.weight q4_K +blk.24.attn_qkv.weight q4_K +blk.25.attn_qkv.weight q4_K +blk.26.attn_qkv.weight q4_K blk.27.attn_output.weight iq3_s blk.27.attn_v.weight q4_K +blk.28.attn_qkv.weight q4_K +blk.29.attn_qkv.weight q4_K +blk.30.attn_qkv.weight q4_K blk.31.attn_output.weight iq3_s blk.31.attn_v.weight q4_K +blk.32.attn_qkv.weight q4_K +blk.33.attn_qkv.weight q4_K +blk.34.attn_qkv.weight q4_K blk.35.attn_output.weight iq3_s blk.35.attn_v.weight q4_K +blk.36.attn_qkv.weight q4_K +blk.37.attn_qkv.weight q4_K +blk.38.attn_qkv.weight q4_K blk.39.attn_output.weight iq3_s blk.39.attn_v.weight q4_K +blk.40.attn_qkv.weight q4_K +blk.41.attn_qkv.weight q4_K +blk.42.attn_qkv.weight q4_K blk.43.attn_output.weight iq3_s blk.43.attn_v.weight q4_K +blk.44.attn_qkv.weight q4_K +blk.45.attn_qkv.weight q4_K +blk.46.attn_qkv.weight q4_K blk.47.attn_output.weight iq3_s blk.47.attn_v.weight q4_K +blk.48.attn_qkv.weight q4_K +blk.49.attn_qkv.weight q4_K +blk.50.attn_qkv.weight q4_K blk.51.attn_output.weight iq3_s blk.51.attn_v.weight q4_K +blk.52.attn_qkv.weight q4_K +blk.53.attn_qkv.weight q4_K +blk.54.attn_qkv.weight q4_K blk.55.attn_output.weight iq3_s blk.55.attn_v.weight q4_K +blk.56.attn_qkv.weight q4_K +blk.57.attn_qkv.weight q4_K +blk.58.attn_qkv.weight q4_K blk.59.attn_output.weight iq3_s blk.59.attn_v.weight q4_K +blk.60.attn_qkv.weight q4_K +blk.61.attn_qkv.weight q4_K +blk.62.attn_qkv.weight q4_K blk.63.attn_output.weight iq3_s blk.63.attn_v.weight q4_K [IQ2_M] iq2_s output.weight q5_K token_embd.weight iq3_s +blk.0.attn_qkv.weight q4_K blk.0.ffn_down.weight iq3_s +blk.1.attn_qkv.weight q4_K blk.1.ffn_down.weight iq3_s +blk.2.attn_qkv.weight q4_K blk.2.ffn_down.weight iq3_s blk.3.attn_output.weight iq3_s blk.3.attn_v.weight q4_K blk.3.ffn_down.weight iq3_s +blk.4.attn_qkv.weight q4_K blk.4.ffn_down.weight iq3_s +blk.5.attn_qkv.weight q4_K blk.5.ffn_down.weight iq3_s +blk.6.attn_qkv.weight q4_K blk.6.ffn_down.weight iq3_s blk.7.attn_output.weight iq3_s blk.7.attn_v.weight q4_K blk.7.ffn_down.weight iq3_s +blk.8.attn_qkv.weight q4_K +blk.9.attn_qkv.weight q4_K +blk.10.attn_qkv.weight q4_K blk.11.attn_output.weight iq3_s blk.11.attn_v.weight q4_K +blk.12.attn_qkv.weight q4_K +blk.13.attn_qkv.weight q4_K +blk.14.attn_qkv.weight q4_K blk.15.attn_output.weight iq3_s blk.15.attn_v.weight q4_K +blk.16.attn_qkv.weight q4_K +blk.17.attn_qkv.weight q4_K +blk.18.attn_qkv.weight q4_K blk.19.attn_output.weight iq3_s blk.19.attn_v.weight q4_K +blk.20.attn_qkv.weight q4_K +blk.21.attn_qkv.weight q4_K +blk.22.attn_qkv.weight q4_K blk.23.attn_output.weight iq3_s blk.23.attn_v.weight q4_K +blk.24.attn_qkv.weight q4_K +blk.25.attn_qkv.weight q4_K +blk.26.attn_qkv.weight q4_K blk.27.attn_output.weight iq3_s blk.27.attn_v.weight q4_K +blk.28.attn_qkv.weight q4_K +blk.29.attn_qkv.weight q4_K +blk.30.attn_qkv.weight q4_K blk.31.attn_output.weight iq3_s blk.31.attn_v.weight q4_K +blk.32.attn_qkv.weight q4_K +blk.33.attn_qkv.weight q4_K +blk.34.attn_qkv.weight q4_K blk.35.attn_output.weight iq3_s blk.35.attn_v.weight q4_K +blk.36.attn_qkv.weight q4_K +blk.37.attn_qkv.weight q4_K +blk.38.attn_qkv.weight q4_K blk.39.attn_output.weight iq3_s blk.39.attn_v.weight q4_K +blk.40.attn_qkv.weight q4_K +blk.41.attn_qkv.weight q4_K +blk.42.attn_qkv.weight q4_K blk.43.attn_output.weight iq3_s blk.43.attn_v.weight q4_K +blk.44.attn_qkv.weight q4_K +blk.45.attn_qkv.weight q4_K +blk.46.attn_qkv.weight q4_K blk.47.attn_output.weight iq3_s blk.47.attn_v.weight q4_K +blk.48.attn_qkv.weight q4_K +blk.49.attn_qkv.weight q4_K +blk.50.attn_qkv.weight q4_K blk.51.attn_output.weight iq3_s blk.51.attn_v.weight q4_K +blk.52.attn_qkv.weight q4_K +blk.53.attn_qkv.weight q4_K +blk.54.attn_qkv.weight q4_K blk.55.attn_output.weight iq3_s blk.55.attn_v.weight q4_K +blk.56.attn_qkv.weight q4_K +blk.57.attn_qkv.weight q4_K +blk.58.attn_qkv.weight q4_K blk.59.attn_output.weight iq3_s blk.59.attn_v.weight q4_K +blk.60.attn_qkv.weight q4_K +blk.61.attn_qkv.weight q4_K +blk.62.attn_qkv.weight q4_K blk.63.attn_output.weight iq3_s blk.63.attn_v.weight q4_K [IQ4_XS] iq4_xs output.weight q6_K +blk.0.attn_qkv.weight q5_K blk.0.ffn_down.weight q5_K +blk.1.attn_qkv.weight q5_K blk.1.ffn_down.weight q5_K +blk.2.attn_qkv.weight q5_K blk.2.ffn_down.weight q5_K blk.3.attn_v.weight q5_K blk.3.ffn_down.weight q5_K +blk.4.attn_qkv.weight q5_K blk.4.ffn_down.weight q5_K +blk.5.attn_qkv.weight q5_K blk.5.ffn_down.weight q5_K +blk.6.attn_qkv.weight q5_K blk.6.ffn_down.weight q5_K blk.7.attn_v.weight q5_K blk.7.ffn_down.weight q5_K +blk.8.attn_qkv.weight q5_K +blk.9.attn_qkv.weight q5_K +blk.10.attn_qkv.weight q5_K blk.11.attn_v.weight q5_K +blk.12.attn_qkv.weight q5_K +blk.13.attn_qkv.weight q5_K +blk.14.attn_qkv.weight q5_K blk.15.attn_v.weight q5_K +blk.16.attn_qkv.weight q5_K +blk.17.attn_qkv.weight q5_K +blk.18.attn_qkv.weight q5_K blk.19.attn_v.weight q5_K +blk.20.attn_qkv.weight q5_K +blk.21.attn_qkv.weight q5_K +blk.22.attn_qkv.weight q5_K blk.23.attn_v.weight q5_K +blk.24.attn_qkv.weight q5_K +blk.25.attn_qkv.weight q5_K +blk.26.attn_qkv.weight q5_K blk.27.attn_v.weight q5_K +blk.28.attn_qkv.weight q5_K +blk.29.attn_qkv.weight q5_K +blk.30.attn_qkv.weight q5_K blk.31.attn_v.weight q5_K +blk.32.attn_qkv.weight q5_K +blk.33.attn_qkv.weight q5_K +blk.34.attn_qkv.weight q5_K blk.35.attn_v.weight q5_K +blk.36.attn_qkv.weight q5_K +blk.37.attn_qkv.weight q5_K +blk.38.attn_qkv.weight q5_K blk.39.attn_v.weight q5_K +blk.40.attn_qkv.weight q5_K +blk.41.attn_qkv.weight q5_K +blk.42.attn_qkv.weight q5_K blk.43.attn_v.weight q5_K +blk.44.attn_qkv.weight q5_K +blk.45.attn_qkv.weight q5_K +blk.46.attn_qkv.weight q5_K blk.47.attn_v.weight q5_K +blk.48.attn_qkv.weight q5_K +blk.49.attn_qkv.weight q5_K +blk.50.attn_qkv.weight q5_K blk.51.attn_v.weight q5_K +blk.52.attn_qkv.weight q5_K +blk.53.attn_qkv.weight q5_K +blk.54.attn_qkv.weight q5_K blk.55.attn_v.weight q5_K +blk.56.attn_qkv.weight q5_K +blk.57.attn_qkv.weight q5_K +blk.58.attn_qkv.weight q5_K blk.59.attn_v.weight q5_K +blk.60.attn_qkv.weight q5_K +blk.61.attn_qkv.weight q5_K +blk.62.attn_qkv.weight q5_K blk.63.attn_v.weight q5_K [IQ1_M] iq1_m output.weight q5_K token_embd.weight q2_K +blk.0.attn_qkv.weight q4_K blk.0.ffn_down.weight q2_K +blk.1.attn_qkv.weight q4_K blk.1.ffn_down.weight q2_K +blk.2.attn_qkv.weight q4_K blk.2.ffn_down.weight q2_K blk.3.attn_output.weight iq2_xxs blk.3.attn_v.weight q4_K blk.3.ffn_down.weight q2_K +blk.4.attn_qkv.weight q4_K blk.4.ffn_down.weight q2_K +blk.5.attn_qkv.weight q4_K blk.5.ffn_down.weight q2_K +blk.6.attn_qkv.weight q4_K blk.6.ffn_down.weight q2_K blk.7.attn_output.weight iq2_xxs blk.7.attn_v.weight q4_K blk.7.ffn_down.weight q2_K +blk.8.attn_qkv.weight q4_K +blk.9.attn_qkv.weight q4_K +blk.10.attn_qkv.weight q4_K blk.11.attn_output.weight iq2_xxs blk.11.attn_v.weight q4_K +blk.12.attn_qkv.weight q4_K +blk.13.attn_qkv.weight q4_K +blk.14.attn_qkv.weight q4_K blk.15.attn_output.weight iq2_xxs blk.15.attn_v.weight q4_K +blk.16.attn_qkv.weight q4_K +blk.17.attn_qkv.weight q4_K +blk.18.attn_qkv.weight q4_K blk.19.attn_output.weight iq2_xxs blk.19.attn_v.weight q4_K +blk.20.attn_qkv.weight q4_K +blk.21.attn_qkv.weight q4_K +blk.22.attn_qkv.weight q4_K blk.23.attn_output.weight iq2_xxs blk.23.attn_v.weight q4_K +blk.24.attn_qkv.weight q4_K +blk.25.attn_qkv.weight q4_K +blk.26.attn_qkv.weight q4_K blk.27.attn_output.weight iq2_xxs blk.27.attn_v.weight q4_K +blk.28.attn_qkv.weight q4_K +blk.29.attn_qkv.weight q4_K +blk.30.attn_qkv.weight q4_K blk.31.attn_output.weight iq2_xxs blk.31.attn_v.weight q4_K +blk.32.attn_qkv.weight q4_K +blk.33.attn_qkv.weight q4_K +blk.34.attn_qkv.weight q4_K blk.35.attn_output.weight iq2_xxs blk.35.attn_v.weight q4_K +blk.36.attn_qkv.weight q4_K +blk.37.attn_qkv.weight q4_K +blk.38.attn_qkv.weight q4_K blk.39.attn_output.weight iq2_xxs blk.39.attn_v.weight q4_K +blk.40.attn_qkv.weight q4_K +blk.41.attn_qkv.weight q4_K +blk.42.attn_qkv.weight q4_K blk.43.attn_output.weight iq2_xxs blk.43.attn_v.weight q4_K +blk.44.attn_qkv.weight q4_K +blk.45.attn_qkv.weight q4_K +blk.46.attn_qkv.weight q4_K blk.47.attn_output.weight iq2_xxs blk.47.attn_v.weight q4_K +blk.48.attn_qkv.weight q4_K +blk.49.attn_qkv.weight q4_K +blk.50.attn_qkv.weight q4_K blk.51.attn_output.weight iq2_xxs blk.51.attn_v.weight q4_K +blk.52.attn_qkv.weight q4_K +blk.53.attn_qkv.weight q4_K +blk.54.attn_qkv.weight q4_K blk.55.attn_output.weight iq2_xxs blk.55.attn_v.weight q4_K +blk.56.attn_qkv.weight q4_K +blk.57.attn_qkv.weight q4_K +blk.58.attn_qkv.weight q4_K blk.59.attn_output.weight iq2_xxs blk.59.attn_v.weight q4_K +blk.60.attn_qkv.weight q4_K +blk.61.attn_qkv.weight q4_K +blk.62.attn_qkv.weight q4_K blk.63.attn_output.weight iq2_xxs blk.63.attn_v.weight q4_K diff --git a/tests/snapshots/qwen3.5-397b-a17b.schema b/tests/snapshots/qwen3.5-397b-a17b.schema index 801c9b4f16..e62948dac8 100644 --- a/tests/snapshots/qwen3.5-397b-a17b.schema +++ b/tests/snapshots/qwen3.5-397b-a17b.schema @@ -23,150 +23,195 @@ output.weight q6_K [Q2_K] q2_K output.weight q6_K +blk.0.attn_qkv.weight q4_K blk.0.ffn_down_exps.weight q3_K blk.0.ffn_down_shexp.weight q3_K +blk.1.attn_qkv.weight q4_K blk.1.ffn_down_exps.weight q3_K blk.1.ffn_down_shexp.weight q3_K +blk.2.attn_qkv.weight q4_K blk.2.ffn_down_exps.weight q3_K blk.2.ffn_down_shexp.weight q3_K blk.3.attn_output.weight q3_K blk.3.attn_v.weight q4_K blk.3.ffn_down_exps.weight q3_K blk.3.ffn_down_shexp.weight q3_K +blk.4.attn_qkv.weight q4_K blk.4.ffn_down_exps.weight q3_K blk.4.ffn_down_shexp.weight q3_K +blk.5.attn_qkv.weight q4_K blk.5.ffn_down_exps.weight q3_K blk.5.ffn_down_shexp.weight q3_K +blk.6.attn_qkv.weight q4_K blk.6.ffn_down_exps.weight q3_K blk.6.ffn_down_shexp.weight q3_K blk.7.attn_output.weight q3_K blk.7.attn_v.weight q4_K blk.7.ffn_down_exps.weight q3_K blk.7.ffn_down_shexp.weight q3_K +blk.8.attn_qkv.weight q4_K blk.8.ffn_down_exps.weight q3_K blk.8.ffn_down_shexp.weight q3_K +blk.9.attn_qkv.weight q4_K blk.9.ffn_down_exps.weight q3_K blk.9.ffn_down_shexp.weight q3_K +blk.10.attn_qkv.weight q4_K blk.10.ffn_down_exps.weight q3_K blk.10.ffn_down_shexp.weight q3_K blk.11.attn_output.weight q3_K blk.11.attn_v.weight q4_K blk.11.ffn_down_exps.weight q3_K blk.11.ffn_down_shexp.weight q3_K +blk.12.attn_qkv.weight q4_K blk.12.ffn_down_exps.weight q3_K blk.12.ffn_down_shexp.weight q3_K +blk.13.attn_qkv.weight q4_K blk.13.ffn_down_exps.weight q3_K blk.13.ffn_down_shexp.weight q3_K +blk.14.attn_qkv.weight q4_K blk.14.ffn_down_exps.weight q3_K blk.14.ffn_down_shexp.weight q3_K blk.15.attn_output.weight q3_K blk.15.attn_v.weight q4_K blk.15.ffn_down_exps.weight q3_K blk.15.ffn_down_shexp.weight q3_K +blk.16.attn_qkv.weight q4_K blk.16.ffn_down_exps.weight q3_K blk.16.ffn_down_shexp.weight q3_K +blk.17.attn_qkv.weight q4_K blk.17.ffn_down_exps.weight q3_K blk.17.ffn_down_shexp.weight q3_K +blk.18.attn_qkv.weight q4_K blk.18.ffn_down_exps.weight q3_K blk.18.ffn_down_shexp.weight q3_K blk.19.attn_output.weight q3_K blk.19.attn_v.weight q4_K blk.19.ffn_down_exps.weight q3_K blk.19.ffn_down_shexp.weight q3_K +blk.20.attn_qkv.weight q4_K blk.20.ffn_down_exps.weight q3_K blk.20.ffn_down_shexp.weight q3_K +blk.21.attn_qkv.weight q4_K blk.21.ffn_down_exps.weight q3_K blk.21.ffn_down_shexp.weight q3_K +blk.22.attn_qkv.weight q4_K blk.22.ffn_down_exps.weight q3_K blk.22.ffn_down_shexp.weight q3_K blk.23.attn_output.weight q3_K blk.23.attn_v.weight q4_K blk.23.ffn_down_exps.weight q3_K blk.23.ffn_down_shexp.weight q3_K +blk.24.attn_qkv.weight q4_K blk.24.ffn_down_exps.weight q3_K blk.24.ffn_down_shexp.weight q3_K +blk.25.attn_qkv.weight q4_K blk.25.ffn_down_exps.weight q3_K blk.25.ffn_down_shexp.weight q3_K +blk.26.attn_qkv.weight q4_K blk.26.ffn_down_exps.weight q3_K blk.26.ffn_down_shexp.weight q3_K blk.27.attn_output.weight q3_K blk.27.attn_v.weight q4_K blk.27.ffn_down_exps.weight q3_K blk.27.ffn_down_shexp.weight q3_K +blk.28.attn_qkv.weight q4_K blk.28.ffn_down_exps.weight q3_K blk.28.ffn_down_shexp.weight q3_K +blk.29.attn_qkv.weight q4_K blk.29.ffn_down_exps.weight q3_K blk.29.ffn_down_shexp.weight q3_K +blk.30.attn_qkv.weight q4_K blk.30.ffn_down_exps.weight q3_K blk.30.ffn_down_shexp.weight q3_K blk.31.attn_output.weight q3_K blk.31.attn_v.weight q4_K blk.31.ffn_down_exps.weight q3_K blk.31.ffn_down_shexp.weight q3_K +blk.32.attn_qkv.weight q4_K blk.32.ffn_down_exps.weight q3_K blk.32.ffn_down_shexp.weight q3_K +blk.33.attn_qkv.weight q4_K blk.33.ffn_down_exps.weight q3_K blk.33.ffn_down_shexp.weight q3_K +blk.34.attn_qkv.weight q4_K blk.34.ffn_down_exps.weight q3_K blk.34.ffn_down_shexp.weight q3_K blk.35.attn_output.weight q3_K blk.35.attn_v.weight q4_K blk.35.ffn_down_exps.weight q3_K blk.35.ffn_down_shexp.weight q3_K +blk.36.attn_qkv.weight q4_K blk.36.ffn_down_exps.weight q3_K blk.36.ffn_down_shexp.weight q3_K +blk.37.attn_qkv.weight q4_K blk.37.ffn_down_exps.weight q3_K blk.37.ffn_down_shexp.weight q3_K +blk.38.attn_qkv.weight q4_K blk.38.ffn_down_exps.weight q3_K blk.38.ffn_down_shexp.weight q3_K blk.39.attn_output.weight q3_K blk.39.attn_v.weight q4_K blk.39.ffn_down_exps.weight q3_K blk.39.ffn_down_shexp.weight q3_K +blk.40.attn_qkv.weight q4_K blk.40.ffn_down_exps.weight q3_K blk.40.ffn_down_shexp.weight q3_K +blk.41.attn_qkv.weight q4_K blk.41.ffn_down_exps.weight q3_K blk.41.ffn_down_shexp.weight q3_K +blk.42.attn_qkv.weight q4_K blk.42.ffn_down_exps.weight q3_K blk.42.ffn_down_shexp.weight q3_K blk.43.attn_output.weight q3_K blk.43.attn_v.weight q4_K blk.43.ffn_down_exps.weight q3_K blk.43.ffn_down_shexp.weight q3_K +blk.44.attn_qkv.weight q4_K blk.44.ffn_down_exps.weight q3_K blk.44.ffn_down_shexp.weight q3_K +blk.45.attn_qkv.weight q4_K blk.45.ffn_down_exps.weight q3_K blk.45.ffn_down_shexp.weight q3_K +blk.46.attn_qkv.weight q4_K blk.46.ffn_down_exps.weight q3_K blk.46.ffn_down_shexp.weight q3_K blk.47.attn_output.weight q3_K blk.47.attn_v.weight q4_K blk.47.ffn_down_exps.weight q3_K blk.47.ffn_down_shexp.weight q3_K +blk.48.attn_qkv.weight q4_K blk.48.ffn_down_exps.weight q3_K blk.48.ffn_down_shexp.weight q3_K +blk.49.attn_qkv.weight q4_K blk.49.ffn_down_exps.weight q3_K blk.49.ffn_down_shexp.weight q3_K +blk.50.attn_qkv.weight q4_K blk.50.ffn_down_exps.weight q3_K blk.50.ffn_down_shexp.weight q3_K blk.51.attn_output.weight q3_K blk.51.attn_v.weight q4_K blk.51.ffn_down_exps.weight q3_K blk.51.ffn_down_shexp.weight q3_K +blk.52.attn_qkv.weight q4_K blk.52.ffn_down_exps.weight q3_K blk.52.ffn_down_shexp.weight q3_K +blk.53.attn_qkv.weight q4_K blk.53.ffn_down_exps.weight q3_K blk.53.ffn_down_shexp.weight q3_K +blk.54.attn_qkv.weight q4_K blk.54.ffn_down_exps.weight q3_K blk.54.ffn_down_shexp.weight q3_K blk.55.attn_output.weight q3_K blk.55.attn_v.weight q4_K blk.55.ffn_down_exps.weight q3_K blk.55.ffn_down_shexp.weight q3_K +blk.56.attn_qkv.weight q4_K blk.56.ffn_down_exps.weight q3_K blk.56.ffn_down_shexp.weight q3_K +blk.57.attn_qkv.weight q4_K blk.57.ffn_down_exps.weight q3_K blk.57.ffn_down_shexp.weight q3_K +blk.58.attn_qkv.weight q4_K blk.58.ffn_down_exps.weight q3_K blk.58.ffn_down_shexp.weight q3_K blk.59.attn_output.weight q3_K @@ -179,17 +224,17 @@ output.weight q6_K [Q3_K_M] q3_K output.weight q6_K -blk.0.attn_qkv.weight q4_K +blk.0.attn_qkv.weight q5_K blk.0.ffn_down_exps.weight q5_K blk.0.ffn_down_shexp.weight q5_K -blk.1.attn_qkv.weight q4_K +blk.1.attn_qkv.weight q5_K blk.1.ffn_down_exps.weight q5_K blk.1.ffn_down_shexp.weight q5_K blk.2.attn_qkv.weight q4_K blk.2.ffn_down_exps.weight q5_K blk.2.ffn_down_shexp.weight q5_K blk.3.attn_output.weight q4_K -blk.3.attn_v.weight q5_K +blk.3.attn_v.weight q4_K blk.3.ffn_down_exps.weight q4_K blk.3.ffn_down_shexp.weight q4_K blk.4.attn_qkv.weight q4_K @@ -202,7 +247,7 @@ blk.6.attn_qkv.weight q4_K blk.6.ffn_down_exps.weight q4_K blk.6.ffn_down_shexp.weight q4_K blk.7.attn_output.weight q4_K -blk.7.attn_v.weight q5_K +blk.7.attn_v.weight q4_K blk.7.ffn_down_exps.weight q4_K blk.7.ffn_down_shexp.weight q4_K blk.8.attn_qkv.weight q4_K @@ -377,195 +422,195 @@ blk.59.ffn_down_shexp.weight q4_K [Q3_K_L] q3_K output.weight q6_K -blk.0.attn_qkv.weight q4_K +blk.0.attn_qkv.weight q5_K blk.0.ffn_down_exps.weight q5_K blk.0.ffn_down_shexp.weight q5_K -blk.1.attn_qkv.weight q4_K +blk.1.attn_qkv.weight q5_K blk.1.ffn_down_exps.weight q5_K blk.1.ffn_down_shexp.weight q5_K -blk.2.attn_qkv.weight q4_K +blk.2.attn_qkv.weight q5_K blk.2.ffn_down_exps.weight q5_K blk.2.ffn_down_shexp.weight q5_K blk.3.attn_output.weight q5_K blk.3.attn_v.weight q5_K blk.3.ffn_down_exps.weight q5_K blk.3.ffn_down_shexp.weight q5_K -blk.4.attn_qkv.weight q4_K +blk.4.attn_qkv.weight q5_K blk.4.ffn_down_exps.weight q5_K blk.4.ffn_down_shexp.weight q5_K -blk.5.attn_qkv.weight q4_K +blk.5.attn_qkv.weight q5_K blk.5.ffn_down_exps.weight q5_K blk.5.ffn_down_shexp.weight q5_K -blk.6.attn_qkv.weight q4_K +blk.6.attn_qkv.weight q5_K blk.6.ffn_down_exps.weight q5_K blk.6.ffn_down_shexp.weight q5_K blk.7.attn_output.weight q5_K blk.7.attn_v.weight q5_K blk.7.ffn_down_exps.weight q5_K blk.7.ffn_down_shexp.weight q5_K -blk.8.attn_qkv.weight q4_K +blk.8.attn_qkv.weight q5_K blk.8.ffn_down_exps.weight q5_K blk.8.ffn_down_shexp.weight q5_K -blk.9.attn_qkv.weight q4_K +blk.9.attn_qkv.weight q5_K blk.9.ffn_down_exps.weight q5_K blk.9.ffn_down_shexp.weight q5_K -blk.10.attn_qkv.weight q4_K +blk.10.attn_qkv.weight q5_K blk.10.ffn_down_exps.weight q5_K blk.10.ffn_down_shexp.weight q5_K blk.11.attn_output.weight q5_K blk.11.attn_v.weight q5_K blk.11.ffn_down_exps.weight q5_K blk.11.ffn_down_shexp.weight q5_K -blk.12.attn_qkv.weight q4_K +blk.12.attn_qkv.weight q5_K blk.12.ffn_down_exps.weight q5_K blk.12.ffn_down_shexp.weight q5_K -blk.13.attn_qkv.weight q4_K +blk.13.attn_qkv.weight q5_K blk.13.ffn_down_exps.weight q5_K blk.13.ffn_down_shexp.weight q5_K -blk.14.attn_qkv.weight q4_K +blk.14.attn_qkv.weight q5_K blk.14.ffn_down_exps.weight q5_K blk.14.ffn_down_shexp.weight q5_K blk.15.attn_output.weight q5_K blk.15.attn_v.weight q5_K blk.15.ffn_down_exps.weight q5_K blk.15.ffn_down_shexp.weight q5_K -blk.16.attn_qkv.weight q4_K +blk.16.attn_qkv.weight q5_K blk.16.ffn_down_exps.weight q5_K blk.16.ffn_down_shexp.weight q5_K -blk.17.attn_qkv.weight q4_K +blk.17.attn_qkv.weight q5_K blk.17.ffn_down_exps.weight q5_K blk.17.ffn_down_shexp.weight q5_K -blk.18.attn_qkv.weight q4_K +blk.18.attn_qkv.weight q5_K blk.18.ffn_down_exps.weight q5_K blk.18.ffn_down_shexp.weight q5_K blk.19.attn_output.weight q5_K blk.19.attn_v.weight q5_K blk.19.ffn_down_exps.weight q5_K blk.19.ffn_down_shexp.weight q5_K -blk.20.attn_qkv.weight q4_K +blk.20.attn_qkv.weight q5_K blk.20.ffn_down_exps.weight q5_K blk.20.ffn_down_shexp.weight q5_K -blk.21.attn_qkv.weight q4_K +blk.21.attn_qkv.weight q5_K blk.21.ffn_down_exps.weight q5_K blk.21.ffn_down_shexp.weight q5_K -blk.22.attn_qkv.weight q4_K +blk.22.attn_qkv.weight q5_K blk.22.ffn_down_exps.weight q5_K blk.22.ffn_down_shexp.weight q5_K blk.23.attn_output.weight q5_K blk.23.attn_v.weight q5_K blk.23.ffn_down_exps.weight q5_K blk.23.ffn_down_shexp.weight q5_K -blk.24.attn_qkv.weight q4_K +blk.24.attn_qkv.weight q5_K blk.24.ffn_down_exps.weight q5_K blk.24.ffn_down_shexp.weight q5_K -blk.25.attn_qkv.weight q4_K +blk.25.attn_qkv.weight q5_K blk.25.ffn_down_exps.weight q5_K blk.25.ffn_down_shexp.weight q5_K -blk.26.attn_qkv.weight q4_K +blk.26.attn_qkv.weight q5_K blk.26.ffn_down_exps.weight q5_K blk.26.ffn_down_shexp.weight q5_K blk.27.attn_output.weight q5_K blk.27.attn_v.weight q5_K blk.27.ffn_down_exps.weight q5_K blk.27.ffn_down_shexp.weight q5_K -blk.28.attn_qkv.weight q4_K +blk.28.attn_qkv.weight q5_K blk.28.ffn_down_exps.weight q5_K blk.28.ffn_down_shexp.weight q5_K -blk.29.attn_qkv.weight q4_K +blk.29.attn_qkv.weight q5_K blk.29.ffn_down_exps.weight q5_K blk.29.ffn_down_shexp.weight q5_K -blk.30.attn_qkv.weight q4_K +blk.30.attn_qkv.weight q5_K blk.30.ffn_down_exps.weight q5_K blk.30.ffn_down_shexp.weight q5_K blk.31.attn_output.weight q5_K blk.31.attn_v.weight q5_K blk.31.ffn_down_exps.weight q5_K blk.31.ffn_down_shexp.weight q5_K -blk.32.attn_qkv.weight q4_K +blk.32.attn_qkv.weight q5_K blk.32.ffn_down_exps.weight q5_K blk.32.ffn_down_shexp.weight q5_K -blk.33.attn_qkv.weight q4_K +blk.33.attn_qkv.weight q5_K blk.33.ffn_down_exps.weight q5_K blk.33.ffn_down_shexp.weight q5_K -blk.34.attn_qkv.weight q4_K +blk.34.attn_qkv.weight q5_K blk.34.ffn_down_exps.weight q5_K blk.34.ffn_down_shexp.weight q5_K blk.35.attn_output.weight q5_K blk.35.attn_v.weight q5_K blk.35.ffn_down_exps.weight q5_K blk.35.ffn_down_shexp.weight q5_K -blk.36.attn_qkv.weight q4_K +blk.36.attn_qkv.weight q5_K blk.36.ffn_down_exps.weight q5_K blk.36.ffn_down_shexp.weight q5_K -blk.37.attn_qkv.weight q4_K +blk.37.attn_qkv.weight q5_K blk.37.ffn_down_exps.weight q5_K blk.37.ffn_down_shexp.weight q5_K -blk.38.attn_qkv.weight q4_K +blk.38.attn_qkv.weight q5_K blk.38.ffn_down_exps.weight q5_K blk.38.ffn_down_shexp.weight q5_K blk.39.attn_output.weight q5_K blk.39.attn_v.weight q5_K blk.39.ffn_down_exps.weight q5_K blk.39.ffn_down_shexp.weight q5_K -blk.40.attn_qkv.weight q4_K +blk.40.attn_qkv.weight q5_K blk.40.ffn_down_exps.weight q5_K blk.40.ffn_down_shexp.weight q5_K -blk.41.attn_qkv.weight q4_K +blk.41.attn_qkv.weight q5_K blk.41.ffn_down_exps.weight q5_K blk.41.ffn_down_shexp.weight q5_K -blk.42.attn_qkv.weight q4_K +blk.42.attn_qkv.weight q5_K blk.42.ffn_down_exps.weight q5_K blk.42.ffn_down_shexp.weight q5_K blk.43.attn_output.weight q5_K blk.43.attn_v.weight q5_K blk.43.ffn_down_exps.weight q5_K blk.43.ffn_down_shexp.weight q5_K -blk.44.attn_qkv.weight q4_K +blk.44.attn_qkv.weight q5_K blk.44.ffn_down_exps.weight q5_K blk.44.ffn_down_shexp.weight q5_K -blk.45.attn_qkv.weight q4_K +blk.45.attn_qkv.weight q5_K blk.45.ffn_down_exps.weight q5_K blk.45.ffn_down_shexp.weight q5_K -blk.46.attn_qkv.weight q4_K +blk.46.attn_qkv.weight q5_K blk.46.ffn_down_exps.weight q5_K blk.46.ffn_down_shexp.weight q5_K blk.47.attn_output.weight q5_K blk.47.attn_v.weight q5_K blk.47.ffn_down_exps.weight q5_K blk.47.ffn_down_shexp.weight q5_K -blk.48.attn_qkv.weight q4_K +blk.48.attn_qkv.weight q5_K blk.48.ffn_down_exps.weight q5_K blk.48.ffn_down_shexp.weight q5_K -blk.49.attn_qkv.weight q4_K +blk.49.attn_qkv.weight q5_K blk.49.ffn_down_exps.weight q5_K blk.49.ffn_down_shexp.weight q5_K -blk.50.attn_qkv.weight q4_K +blk.50.attn_qkv.weight q5_K blk.50.ffn_down_exps.weight q5_K blk.50.ffn_down_shexp.weight q5_K blk.51.attn_output.weight q5_K blk.51.attn_v.weight q5_K blk.51.ffn_down_exps.weight q5_K blk.51.ffn_down_shexp.weight q5_K -blk.52.attn_qkv.weight q4_K +blk.52.attn_qkv.weight q5_K blk.52.ffn_down_exps.weight q5_K blk.52.ffn_down_shexp.weight q5_K -blk.53.attn_qkv.weight q4_K +blk.53.attn_qkv.weight q5_K blk.53.ffn_down_exps.weight q5_K blk.53.ffn_down_shexp.weight q5_K -blk.54.attn_qkv.weight q4_K +blk.54.attn_qkv.weight q5_K blk.54.ffn_down_exps.weight q5_K blk.54.ffn_down_shexp.weight q5_K blk.55.attn_output.weight q5_K blk.55.attn_v.weight q5_K blk.55.ffn_down_exps.weight q5_K blk.55.ffn_down_shexp.weight q5_K -blk.56.attn_qkv.weight q4_K +blk.56.attn_qkv.weight q5_K blk.56.ffn_down_exps.weight q5_K blk.56.ffn_down_shexp.weight q5_K -blk.57.attn_qkv.weight q4_K +blk.57.attn_qkv.weight q5_K blk.57.ffn_down_exps.weight q5_K blk.57.ffn_down_shexp.weight q5_K -blk.58.attn_qkv.weight q4_K +blk.58.attn_qkv.weight q5_K blk.58.ffn_down_exps.weight q5_K blk.58.ffn_down_shexp.weight q5_K blk.59.attn_output.weight q5_K @@ -575,10 +620,13 @@ blk.59.ffn_down_shexp.weight q5_K [Q4_K_S] q4_K output.weight q6_K +blk.0.attn_qkv.weight q5_K blk.0.ffn_down_exps.weight q5_K blk.0.ffn_down_shexp.weight q5_K +blk.1.attn_qkv.weight q5_K blk.1.ffn_down_exps.weight q5_K blk.1.ffn_down_shexp.weight q5_K +blk.2.attn_qkv.weight q5_K blk.2.ffn_down_exps.weight q5_K blk.2.ffn_down_shexp.weight q5_K blk.3.attn_v.weight q5_K @@ -590,124 +638,97 @@ blk.5.ffn_down_exps.weight q5_K blk.5.ffn_down_shexp.weight q5_K blk.6.ffn_down_exps.weight q5_K blk.6.ffn_down_shexp.weight q5_K -blk.7.attn_v.weight q5_K -blk.11.attn_v.weight q5_K -blk.15.attn_v.weight q5_K [Q4_K_M] q4_K output.weight q6_K -blk.0.attn_qkv.weight q5_K +blk.0.attn_qkv.weight q6_K blk.0.ffn_down_exps.weight q6_K blk.0.ffn_down_shexp.weight q6_K -blk.1.attn_qkv.weight q5_K +blk.1.attn_qkv.weight q6_K blk.1.ffn_down_exps.weight q6_K blk.1.ffn_down_shexp.weight q6_K -blk.2.attn_qkv.weight q5_K +blk.2.attn_qkv.weight q6_K blk.2.ffn_down_exps.weight q6_K blk.2.ffn_down_shexp.weight q6_K blk.3.attn_v.weight q6_K blk.3.ffn_down_exps.weight q6_K blk.3.ffn_down_shexp.weight q6_K -blk.4.attn_qkv.weight q5_K +blk.4.attn_qkv.weight q6_K blk.4.ffn_down_exps.weight q6_K blk.4.ffn_down_shexp.weight q6_K -blk.5.attn_qkv.weight q5_K +blk.5.attn_qkv.weight q6_K blk.5.ffn_down_exps.weight q6_K blk.5.ffn_down_shexp.weight q6_K -blk.6.attn_qkv.weight q5_K +blk.6.attn_qkv.weight q6_K blk.6.ffn_down_exps.weight q6_K blk.6.ffn_down_shexp.weight q6_K -blk.7.attn_v.weight q6_K -blk.8.attn_qkv.weight q5_K -blk.9.attn_qkv.weight q5_K +blk.9.attn_qkv.weight q6_K blk.9.ffn_down_exps.weight q6_K blk.9.ffn_down_shexp.weight q6_K -blk.10.attn_qkv.weight q5_K -blk.11.attn_v.weight q6_K -blk.12.attn_qkv.weight q5_K +blk.12.attn_qkv.weight q6_K blk.12.ffn_down_exps.weight q6_K blk.12.ffn_down_shexp.weight q6_K -blk.13.attn_qkv.weight q5_K -blk.14.attn_qkv.weight q5_K blk.15.attn_v.weight q6_K blk.15.ffn_down_exps.weight q6_K blk.15.ffn_down_shexp.weight q6_K -blk.16.attn_qkv.weight q5_K -blk.17.attn_qkv.weight q5_K -blk.18.attn_qkv.weight q5_K +blk.18.attn_qkv.weight q6_K blk.18.ffn_down_exps.weight q6_K blk.18.ffn_down_shexp.weight q6_K -blk.19.attn_v.weight q6_K -blk.20.attn_qkv.weight q5_K -blk.21.attn_qkv.weight q5_K +blk.21.attn_qkv.weight q6_K blk.21.ffn_down_exps.weight q6_K blk.21.ffn_down_shexp.weight q6_K -blk.22.attn_qkv.weight q5_K -blk.23.attn_v.weight q6_K -blk.24.attn_qkv.weight q5_K +blk.24.attn_qkv.weight q6_K blk.24.ffn_down_exps.weight q6_K blk.24.ffn_down_shexp.weight q6_K -blk.25.attn_qkv.weight q5_K -blk.26.attn_qkv.weight q5_K blk.27.attn_v.weight q6_K blk.27.ffn_down_exps.weight q6_K blk.27.ffn_down_shexp.weight q6_K -blk.28.attn_qkv.weight q5_K -blk.29.attn_qkv.weight q5_K -blk.30.attn_qkv.weight q5_K +blk.30.attn_qkv.weight q6_K blk.30.ffn_down_exps.weight q6_K blk.30.ffn_down_shexp.weight q6_K -blk.32.attn_qkv.weight q5_K -blk.33.attn_qkv.weight q5_K +blk.33.attn_qkv.weight q6_K blk.33.ffn_down_exps.weight q6_K blk.33.ffn_down_shexp.weight q6_K -blk.34.attn_qkv.weight q5_K -blk.36.attn_qkv.weight q5_K +blk.36.attn_qkv.weight q6_K blk.36.ffn_down_exps.weight q6_K blk.36.ffn_down_shexp.weight q6_K -blk.37.attn_qkv.weight q5_K -blk.38.attn_qkv.weight q5_K blk.39.attn_v.weight q6_K blk.39.ffn_down_exps.weight q6_K blk.39.ffn_down_shexp.weight q6_K -blk.40.attn_qkv.weight q5_K -blk.41.attn_qkv.weight q5_K -blk.42.attn_qkv.weight q5_K +blk.42.attn_qkv.weight q6_K blk.42.ffn_down_exps.weight q6_K blk.42.ffn_down_shexp.weight q6_K -blk.44.attn_qkv.weight q5_K -blk.45.attn_qkv.weight q5_K +blk.45.attn_qkv.weight q6_K blk.45.ffn_down_exps.weight q6_K blk.45.ffn_down_shexp.weight q6_K -blk.46.attn_qkv.weight q5_K -blk.48.attn_qkv.weight q5_K +blk.48.attn_qkv.weight q6_K blk.48.ffn_down_exps.weight q6_K blk.48.ffn_down_shexp.weight q6_K -blk.49.attn_qkv.weight q5_K -blk.50.attn_qkv.weight q5_K blk.51.attn_v.weight q6_K blk.51.ffn_down_exps.weight q6_K blk.51.ffn_down_shexp.weight q6_K -blk.52.attn_qkv.weight q5_K +blk.52.attn_qkv.weight q6_K blk.52.ffn_down_exps.weight q6_K blk.52.ffn_down_shexp.weight q6_K -blk.53.attn_qkv.weight q5_K +blk.53.attn_qkv.weight q6_K blk.53.ffn_down_exps.weight q6_K blk.53.ffn_down_shexp.weight q6_K -blk.54.attn_qkv.weight q5_K +blk.54.attn_qkv.weight q6_K blk.54.ffn_down_exps.weight q6_K blk.54.ffn_down_shexp.weight q6_K +blk.55.attn_v.weight q6_K blk.55.ffn_down_exps.weight q6_K blk.55.ffn_down_shexp.weight q6_K -blk.56.attn_qkv.weight q5_K +blk.56.attn_qkv.weight q6_K blk.56.ffn_down_exps.weight q6_K blk.56.ffn_down_shexp.weight q6_K -blk.57.attn_qkv.weight q5_K +blk.57.attn_qkv.weight q6_K blk.57.ffn_down_exps.weight q6_K blk.57.ffn_down_shexp.weight q6_K -blk.58.attn_qkv.weight q5_K +blk.58.attn_qkv.weight q6_K blk.58.ffn_down_exps.weight q6_K blk.58.ffn_down_shexp.weight q6_K +blk.59.attn_v.weight q6_K blk.59.ffn_down_exps.weight q6_K blk.59.ffn_down_shexp.weight q6_K @@ -737,74 +758,48 @@ blk.5.ffn_down_shexp.weight q6_K blk.6.attn_qkv.weight q6_K blk.6.ffn_down_exps.weight q6_K blk.6.ffn_down_shexp.weight q6_K -blk.7.attn_v.weight q6_K -blk.8.attn_qkv.weight q6_K blk.9.attn_qkv.weight q6_K blk.9.ffn_down_exps.weight q6_K blk.9.ffn_down_shexp.weight q6_K -blk.10.attn_qkv.weight q6_K -blk.11.attn_v.weight q6_K blk.12.attn_qkv.weight q6_K blk.12.ffn_down_exps.weight q6_K blk.12.ffn_down_shexp.weight q6_K -blk.13.attn_qkv.weight q6_K -blk.14.attn_qkv.weight q6_K blk.15.attn_v.weight q6_K blk.15.ffn_down_exps.weight q6_K blk.15.ffn_down_shexp.weight q6_K -blk.16.attn_qkv.weight q6_K -blk.17.attn_qkv.weight q6_K blk.18.attn_qkv.weight q6_K blk.18.ffn_down_exps.weight q6_K blk.18.ffn_down_shexp.weight q6_K -blk.19.attn_v.weight q6_K -blk.20.attn_qkv.weight q6_K blk.21.attn_qkv.weight q6_K blk.21.ffn_down_exps.weight q6_K blk.21.ffn_down_shexp.weight q6_K -blk.22.attn_qkv.weight q6_K -blk.23.attn_v.weight q6_K blk.24.attn_qkv.weight q6_K blk.24.ffn_down_exps.weight q6_K blk.24.ffn_down_shexp.weight q6_K -blk.25.attn_qkv.weight q6_K -blk.26.attn_qkv.weight q6_K blk.27.attn_v.weight q6_K blk.27.ffn_down_exps.weight q6_K blk.27.ffn_down_shexp.weight q6_K -blk.28.attn_qkv.weight q6_K -blk.29.attn_qkv.weight q6_K blk.30.attn_qkv.weight q6_K blk.30.ffn_down_exps.weight q6_K blk.30.ffn_down_shexp.weight q6_K -blk.32.attn_qkv.weight q6_K blk.33.attn_qkv.weight q6_K blk.33.ffn_down_exps.weight q6_K blk.33.ffn_down_shexp.weight q6_K -blk.34.attn_qkv.weight q6_K blk.36.attn_qkv.weight q6_K blk.36.ffn_down_exps.weight q6_K blk.36.ffn_down_shexp.weight q6_K -blk.37.attn_qkv.weight q6_K -blk.38.attn_qkv.weight q6_K blk.39.attn_v.weight q6_K blk.39.ffn_down_exps.weight q6_K blk.39.ffn_down_shexp.weight q6_K -blk.40.attn_qkv.weight q6_K -blk.41.attn_qkv.weight q6_K blk.42.attn_qkv.weight q6_K blk.42.ffn_down_exps.weight q6_K blk.42.ffn_down_shexp.weight q6_K -blk.44.attn_qkv.weight q6_K blk.45.attn_qkv.weight q6_K blk.45.ffn_down_exps.weight q6_K blk.45.ffn_down_shexp.weight q6_K -blk.46.attn_qkv.weight q6_K blk.48.attn_qkv.weight q6_K blk.48.ffn_down_exps.weight q6_K blk.48.ffn_down_shexp.weight q6_K -blk.49.attn_qkv.weight q6_K -blk.50.attn_qkv.weight q6_K blk.51.attn_v.weight q6_K blk.51.ffn_down_exps.weight q6_K blk.51.ffn_down_shexp.weight q6_K @@ -817,6 +812,7 @@ blk.53.ffn_down_shexp.weight q6_K blk.54.attn_qkv.weight q6_K blk.54.ffn_down_exps.weight q6_K blk.54.ffn_down_shexp.weight q6_K +blk.55.attn_v.weight q6_K blk.55.ffn_down_exps.weight q6_K blk.55.ffn_down_shexp.weight q6_K blk.56.attn_qkv.weight q6_K @@ -828,6 +824,7 @@ blk.57.ffn_down_shexp.weight q6_K blk.58.attn_qkv.weight q6_K blk.58.ffn_down_exps.weight q6_K blk.58.ffn_down_shexp.weight q6_K +blk.59.attn_v.weight q6_K blk.59.ffn_down_exps.weight q6_K blk.59.ffn_down_shexp.weight q6_K @@ -836,92 +833,233 @@ blk.59.ffn_down_shexp.weight q6_K [IQ2_XXS] iq2_xxs output.weight q5_K token_embd.weight q2_K +blk.0.attn_qkv.weight q4_K blk.0.ffn_down_exps.weight q2_K blk.0.ffn_down_shexp.weight q2_K +blk.1.attn_qkv.weight q4_K blk.1.ffn_down_exps.weight q2_K blk.1.ffn_down_shexp.weight q2_K +blk.2.attn_qkv.weight q4_K blk.2.ffn_down_exps.weight q2_K blk.2.ffn_down_shexp.weight q2_K blk.3.attn_v.weight q4_K blk.3.ffn_down_exps.weight q2_K +blk.4.attn_qkv.weight q4_K +blk.5.attn_qkv.weight q4_K +blk.6.attn_qkv.weight q4_K blk.7.attn_v.weight q4_K +blk.8.attn_qkv.weight q4_K +blk.9.attn_qkv.weight q4_K +blk.10.attn_qkv.weight q4_K blk.11.attn_v.weight q4_K +blk.12.attn_qkv.weight q4_K +blk.13.attn_qkv.weight q4_K +blk.14.attn_qkv.weight q4_K blk.15.attn_v.weight q4_K +blk.16.attn_qkv.weight q4_K +blk.17.attn_qkv.weight q4_K +blk.18.attn_qkv.weight q4_K blk.19.attn_v.weight q4_K +blk.20.attn_qkv.weight q4_K +blk.21.attn_qkv.weight q4_K +blk.22.attn_qkv.weight q4_K blk.23.attn_v.weight q4_K +blk.24.attn_qkv.weight q4_K +blk.25.attn_qkv.weight q4_K +blk.26.attn_qkv.weight q4_K blk.27.attn_v.weight q4_K +blk.28.attn_qkv.weight q4_K +blk.29.attn_qkv.weight q4_K +blk.30.attn_qkv.weight q4_K blk.31.attn_v.weight q4_K +blk.32.attn_qkv.weight q4_K +blk.33.attn_qkv.weight q4_K +blk.34.attn_qkv.weight q4_K blk.35.attn_v.weight q4_K +blk.36.attn_qkv.weight q4_K +blk.37.attn_qkv.weight q4_K +blk.38.attn_qkv.weight q4_K blk.39.attn_v.weight q4_K +blk.40.attn_qkv.weight q4_K +blk.41.attn_qkv.weight q4_K +blk.42.attn_qkv.weight q4_K blk.43.attn_v.weight q4_K +blk.44.attn_qkv.weight q4_K +blk.45.attn_qkv.weight q4_K +blk.46.attn_qkv.weight q4_K blk.47.attn_v.weight q4_K +blk.48.attn_qkv.weight q4_K +blk.49.attn_qkv.weight q4_K +blk.50.attn_qkv.weight q4_K blk.51.attn_v.weight q4_K +blk.52.attn_qkv.weight q4_K +blk.53.attn_qkv.weight q4_K +blk.54.attn_qkv.weight q4_K blk.55.attn_v.weight q4_K +blk.56.attn_qkv.weight q4_K +blk.57.attn_qkv.weight q4_K +blk.58.attn_qkv.weight q4_K blk.59.attn_v.weight q4_K [IQ2_XS] iq2_xs output.weight q5_K token_embd.weight q2_K +blk.0.attn_qkv.weight q4_K blk.0.ffn_down_exps.weight q2_K blk.0.ffn_down_shexp.weight q2_K +blk.1.attn_qkv.weight q4_K blk.1.ffn_down_exps.weight q2_K blk.1.ffn_down_shexp.weight q2_K +blk.2.attn_qkv.weight q4_K blk.2.ffn_down_exps.weight q2_K blk.2.ffn_down_shexp.weight q2_K blk.3.attn_v.weight q4_K blk.3.ffn_down_exps.weight q2_K +blk.4.attn_qkv.weight q4_K +blk.5.attn_qkv.weight q4_K +blk.6.attn_qkv.weight q4_K blk.7.attn_v.weight q4_K +blk.8.attn_qkv.weight q4_K +blk.9.attn_qkv.weight q4_K +blk.10.attn_qkv.weight q4_K blk.11.attn_v.weight q4_K +blk.12.attn_qkv.weight q4_K +blk.13.attn_qkv.weight q4_K +blk.14.attn_qkv.weight q4_K blk.15.attn_v.weight q4_K +blk.16.attn_qkv.weight q4_K +blk.17.attn_qkv.weight q4_K +blk.18.attn_qkv.weight q4_K blk.19.attn_v.weight q4_K +blk.20.attn_qkv.weight q4_K +blk.21.attn_qkv.weight q4_K +blk.22.attn_qkv.weight q4_K blk.23.attn_v.weight q4_K +blk.24.attn_qkv.weight q4_K +blk.25.attn_qkv.weight q4_K +blk.26.attn_qkv.weight q4_K blk.27.attn_v.weight q4_K +blk.28.attn_qkv.weight q4_K +blk.29.attn_qkv.weight q4_K +blk.30.attn_qkv.weight q4_K blk.31.attn_v.weight q4_K +blk.32.attn_qkv.weight q4_K +blk.33.attn_qkv.weight q4_K +blk.34.attn_qkv.weight q4_K blk.35.attn_v.weight q4_K +blk.36.attn_qkv.weight q4_K +blk.37.attn_qkv.weight q4_K +blk.38.attn_qkv.weight q4_K blk.39.attn_v.weight q4_K +blk.40.attn_qkv.weight q4_K +blk.41.attn_qkv.weight q4_K +blk.42.attn_qkv.weight q4_K blk.43.attn_v.weight q4_K +blk.44.attn_qkv.weight q4_K +blk.45.attn_qkv.weight q4_K +blk.46.attn_qkv.weight q4_K blk.47.attn_v.weight q4_K +blk.48.attn_qkv.weight q4_K +blk.49.attn_qkv.weight q4_K +blk.50.attn_qkv.weight q4_K blk.51.attn_v.weight q4_K +blk.52.attn_qkv.weight q4_K +blk.53.attn_qkv.weight q4_K +blk.54.attn_qkv.weight q4_K blk.55.attn_v.weight q4_K +blk.56.attn_qkv.weight q4_K +blk.57.attn_qkv.weight q4_K +blk.58.attn_qkv.weight q4_K blk.59.attn_v.weight q4_K [Q2_K_S] q2_K output.weight q6_K +blk.0.attn_qkv.weight q4_K blk.0.ffn_down_exps.weight q4_K blk.0.ffn_down_shexp.weight q4_K +blk.1.attn_qkv.weight q4_K blk.1.ffn_down_exps.weight q4_K blk.1.ffn_down_shexp.weight q4_K +blk.2.attn_qkv.weight q4_K blk.2.ffn_down_exps.weight q4_K blk.2.ffn_down_shexp.weight q4_K blk.3.attn_v.weight q4_K blk.3.ffn_down_exps.weight q4_K blk.3.ffn_down_shexp.weight q4_K +blk.4.attn_qkv.weight q4_K blk.4.ffn_down_exps.weight q4_K blk.4.ffn_down_shexp.weight q4_K +blk.5.attn_qkv.weight q4_K blk.5.ffn_down_exps.weight q4_K blk.5.ffn_down_shexp.weight q4_K +blk.6.attn_qkv.weight q4_K blk.6.ffn_down_exps.weight q4_K blk.6.ffn_down_shexp.weight q4_K blk.7.attn_v.weight q4_K +blk.8.attn_qkv.weight q4_K +blk.9.attn_qkv.weight q4_K +blk.10.attn_qkv.weight q4_K blk.11.attn_v.weight q4_K +blk.12.attn_qkv.weight q4_K +blk.13.attn_qkv.weight q4_K +blk.14.attn_qkv.weight q4_K blk.15.attn_v.weight q4_K +blk.16.attn_qkv.weight q4_K +blk.17.attn_qkv.weight q4_K +blk.18.attn_qkv.weight q4_K blk.19.attn_v.weight q4_K +blk.20.attn_qkv.weight q4_K +blk.21.attn_qkv.weight q4_K +blk.22.attn_qkv.weight q4_K blk.23.attn_v.weight q4_K +blk.24.attn_qkv.weight q4_K +blk.25.attn_qkv.weight q4_K +blk.26.attn_qkv.weight q4_K blk.27.attn_v.weight q4_K +blk.28.attn_qkv.weight q4_K +blk.29.attn_qkv.weight q4_K +blk.30.attn_qkv.weight q4_K blk.31.attn_v.weight q4_K +blk.32.attn_qkv.weight q4_K +blk.33.attn_qkv.weight q4_K +blk.34.attn_qkv.weight q4_K blk.35.attn_v.weight q4_K +blk.36.attn_qkv.weight q4_K +blk.37.attn_qkv.weight q4_K +blk.38.attn_qkv.weight q4_K blk.39.attn_v.weight q4_K +blk.40.attn_qkv.weight q4_K +blk.41.attn_qkv.weight q4_K +blk.42.attn_qkv.weight q4_K blk.43.attn_v.weight q4_K +blk.44.attn_qkv.weight q4_K +blk.45.attn_qkv.weight q4_K +blk.46.attn_qkv.weight q4_K blk.47.attn_v.weight q4_K +blk.48.attn_qkv.weight q4_K +blk.49.attn_qkv.weight q4_K +blk.50.attn_qkv.weight q4_K blk.51.attn_v.weight q4_K +blk.52.attn_qkv.weight q4_K +blk.53.attn_qkv.weight q4_K +blk.54.attn_qkv.weight q4_K blk.55.attn_v.weight q4_K +blk.56.attn_qkv.weight q4_K +blk.57.attn_qkv.weight q4_K +blk.58.attn_qkv.weight q4_K blk.59.attn_v.weight q4_K [IQ3_XS] iq3_s output.weight q6_K +blk.0.attn_qkv.weight q4_K +blk.1.attn_qkv.weight q4_K +blk.2.attn_qkv.weight q4_K blk.3.attn_k.weight iq3_xxs blk.3.attn_q.weight iq3_xxs blk.3.attn_v.weight q4_K +blk.4.attn_qkv.weight q4_K +blk.5.attn_qkv.weight q4_K +blk.6.attn_qkv.weight q4_K blk.7.attn_k.weight iq3_xxs blk.7.attn_q.weight iq3_xxs blk.7.attn_v.weight q4_K @@ -929,14 +1067,17 @@ blk.7.ffn_gate_exps.weight iq3_xxs blk.7.ffn_gate_shexp.weight iq3_xxs blk.7.ffn_up_exps.weight iq3_xxs blk.7.ffn_up_shexp.weight iq3_xxs +blk.8.attn_qkv.weight q4_K blk.8.ffn_gate_exps.weight iq3_xxs blk.8.ffn_gate_shexp.weight iq3_xxs blk.8.ffn_up_exps.weight iq3_xxs blk.8.ffn_up_shexp.weight iq3_xxs +blk.9.attn_qkv.weight q4_K blk.9.ffn_gate_exps.weight iq3_xxs blk.9.ffn_gate_shexp.weight iq3_xxs blk.9.ffn_up_exps.weight iq3_xxs blk.9.ffn_up_shexp.weight iq3_xxs +blk.10.attn_qkv.weight q4_K blk.10.ffn_gate_exps.weight iq3_xxs blk.10.ffn_gate_shexp.weight iq3_xxs blk.10.ffn_up_exps.weight iq3_xxs @@ -948,14 +1089,17 @@ blk.11.ffn_gate_exps.weight iq3_xxs blk.11.ffn_gate_shexp.weight iq3_xxs blk.11.ffn_up_exps.weight iq3_xxs blk.11.ffn_up_shexp.weight iq3_xxs +blk.12.attn_qkv.weight q4_K blk.12.ffn_gate_exps.weight iq3_xxs blk.12.ffn_gate_shexp.weight iq3_xxs blk.12.ffn_up_exps.weight iq3_xxs blk.12.ffn_up_shexp.weight iq3_xxs +blk.13.attn_qkv.weight q4_K blk.13.ffn_gate_exps.weight iq3_xxs blk.13.ffn_gate_shexp.weight iq3_xxs blk.13.ffn_up_exps.weight iq3_xxs blk.13.ffn_up_shexp.weight iq3_xxs +blk.14.attn_qkv.weight q4_K blk.14.ffn_gate_exps.weight iq3_xxs blk.14.ffn_gate_shexp.weight iq3_xxs blk.14.ffn_up_exps.weight iq3_xxs @@ -967,14 +1111,17 @@ blk.15.ffn_gate_exps.weight iq3_xxs blk.15.ffn_gate_shexp.weight iq3_xxs blk.15.ffn_up_exps.weight iq3_xxs blk.15.ffn_up_shexp.weight iq3_xxs +blk.16.attn_qkv.weight q4_K blk.16.ffn_gate_exps.weight iq3_xxs blk.16.ffn_gate_shexp.weight iq3_xxs blk.16.ffn_up_exps.weight iq3_xxs blk.16.ffn_up_shexp.weight iq3_xxs +blk.17.attn_qkv.weight q4_K blk.17.ffn_gate_exps.weight iq3_xxs blk.17.ffn_gate_shexp.weight iq3_xxs blk.17.ffn_up_exps.weight iq3_xxs blk.17.ffn_up_shexp.weight iq3_xxs +blk.18.attn_qkv.weight q4_K blk.18.ffn_gate_exps.weight iq3_xxs blk.18.ffn_gate_shexp.weight iq3_xxs blk.18.ffn_up_exps.weight iq3_xxs @@ -986,14 +1133,17 @@ blk.19.ffn_gate_exps.weight iq3_xxs blk.19.ffn_gate_shexp.weight iq3_xxs blk.19.ffn_up_exps.weight iq3_xxs blk.19.ffn_up_shexp.weight iq3_xxs +blk.20.attn_qkv.weight q4_K blk.20.ffn_gate_exps.weight iq3_xxs blk.20.ffn_gate_shexp.weight iq3_xxs blk.20.ffn_up_exps.weight iq3_xxs blk.20.ffn_up_shexp.weight iq3_xxs +blk.21.attn_qkv.weight q4_K blk.21.ffn_gate_exps.weight iq3_xxs blk.21.ffn_gate_shexp.weight iq3_xxs blk.21.ffn_up_exps.weight iq3_xxs blk.21.ffn_up_shexp.weight iq3_xxs +blk.22.attn_qkv.weight q4_K blk.22.ffn_gate_exps.weight iq3_xxs blk.22.ffn_gate_shexp.weight iq3_xxs blk.22.ffn_up_exps.weight iq3_xxs @@ -1005,14 +1155,17 @@ blk.23.ffn_gate_exps.weight iq3_xxs blk.23.ffn_gate_shexp.weight iq3_xxs blk.23.ffn_up_exps.weight iq3_xxs blk.23.ffn_up_shexp.weight iq3_xxs +blk.24.attn_qkv.weight q4_K blk.24.ffn_gate_exps.weight iq3_xxs blk.24.ffn_gate_shexp.weight iq3_xxs blk.24.ffn_up_exps.weight iq3_xxs blk.24.ffn_up_shexp.weight iq3_xxs +blk.25.attn_qkv.weight q4_K blk.25.ffn_gate_exps.weight iq3_xxs blk.25.ffn_gate_shexp.weight iq3_xxs blk.25.ffn_up_exps.weight iq3_xxs blk.25.ffn_up_shexp.weight iq3_xxs +blk.26.attn_qkv.weight q4_K blk.26.ffn_gate_exps.weight iq3_xxs blk.26.ffn_gate_shexp.weight iq3_xxs blk.26.ffn_up_exps.weight iq3_xxs @@ -1024,14 +1177,17 @@ blk.27.ffn_gate_exps.weight iq3_xxs blk.27.ffn_gate_shexp.weight iq3_xxs blk.27.ffn_up_exps.weight iq3_xxs blk.27.ffn_up_shexp.weight iq3_xxs +blk.28.attn_qkv.weight q4_K blk.28.ffn_gate_exps.weight iq3_xxs blk.28.ffn_gate_shexp.weight iq3_xxs blk.28.ffn_up_exps.weight iq3_xxs blk.28.ffn_up_shexp.weight iq3_xxs +blk.29.attn_qkv.weight q4_K blk.29.ffn_gate_exps.weight iq3_xxs blk.29.ffn_gate_shexp.weight iq3_xxs blk.29.ffn_up_exps.weight iq3_xxs blk.29.ffn_up_shexp.weight iq3_xxs +blk.30.attn_qkv.weight q4_K blk.30.ffn_gate_exps.weight iq3_xxs blk.30.ffn_gate_shexp.weight iq3_xxs blk.30.ffn_up_exps.weight iq3_xxs @@ -1043,14 +1199,17 @@ blk.31.ffn_gate_exps.weight iq3_xxs blk.31.ffn_gate_shexp.weight iq3_xxs blk.31.ffn_up_exps.weight iq3_xxs blk.31.ffn_up_shexp.weight iq3_xxs +blk.32.attn_qkv.weight q4_K blk.32.ffn_gate_exps.weight iq3_xxs blk.32.ffn_gate_shexp.weight iq3_xxs blk.32.ffn_up_exps.weight iq3_xxs blk.32.ffn_up_shexp.weight iq3_xxs +blk.33.attn_qkv.weight q4_K blk.33.ffn_gate_exps.weight iq3_xxs blk.33.ffn_gate_shexp.weight iq3_xxs blk.33.ffn_up_exps.weight iq3_xxs blk.33.ffn_up_shexp.weight iq3_xxs +blk.34.attn_qkv.weight q4_K blk.34.ffn_gate_exps.weight iq3_xxs blk.34.ffn_gate_shexp.weight iq3_xxs blk.34.ffn_up_exps.weight iq3_xxs @@ -1062,14 +1221,17 @@ blk.35.ffn_gate_exps.weight iq3_xxs blk.35.ffn_gate_shexp.weight iq3_xxs blk.35.ffn_up_exps.weight iq3_xxs blk.35.ffn_up_shexp.weight iq3_xxs +blk.36.attn_qkv.weight q4_K blk.36.ffn_gate_exps.weight iq3_xxs blk.36.ffn_gate_shexp.weight iq3_xxs blk.36.ffn_up_exps.weight iq3_xxs blk.36.ffn_up_shexp.weight iq3_xxs +blk.37.attn_qkv.weight q4_K blk.37.ffn_gate_exps.weight iq3_xxs blk.37.ffn_gate_shexp.weight iq3_xxs blk.37.ffn_up_exps.weight iq3_xxs blk.37.ffn_up_shexp.weight iq3_xxs +blk.38.attn_qkv.weight q4_K blk.38.ffn_gate_exps.weight iq3_xxs blk.38.ffn_gate_shexp.weight iq3_xxs blk.38.ffn_up_exps.weight iq3_xxs @@ -1081,14 +1243,17 @@ blk.39.ffn_gate_exps.weight iq3_xxs blk.39.ffn_gate_shexp.weight iq3_xxs blk.39.ffn_up_exps.weight iq3_xxs blk.39.ffn_up_shexp.weight iq3_xxs +blk.40.attn_qkv.weight q4_K blk.40.ffn_gate_exps.weight iq3_xxs blk.40.ffn_gate_shexp.weight iq3_xxs blk.40.ffn_up_exps.weight iq3_xxs blk.40.ffn_up_shexp.weight iq3_xxs +blk.41.attn_qkv.weight q4_K blk.41.ffn_gate_exps.weight iq3_xxs blk.41.ffn_gate_shexp.weight iq3_xxs blk.41.ffn_up_exps.weight iq3_xxs blk.41.ffn_up_shexp.weight iq3_xxs +blk.42.attn_qkv.weight q4_K blk.42.ffn_gate_exps.weight iq3_xxs blk.42.ffn_gate_shexp.weight iq3_xxs blk.42.ffn_up_exps.weight iq3_xxs @@ -1100,14 +1265,17 @@ blk.43.ffn_gate_exps.weight iq3_xxs blk.43.ffn_gate_shexp.weight iq3_xxs blk.43.ffn_up_exps.weight iq3_xxs blk.43.ffn_up_shexp.weight iq3_xxs +blk.44.attn_qkv.weight q4_K blk.44.ffn_gate_exps.weight iq3_xxs blk.44.ffn_gate_shexp.weight iq3_xxs blk.44.ffn_up_exps.weight iq3_xxs blk.44.ffn_up_shexp.weight iq3_xxs +blk.45.attn_qkv.weight q4_K blk.45.ffn_gate_exps.weight iq3_xxs blk.45.ffn_gate_shexp.weight iq3_xxs blk.45.ffn_up_exps.weight iq3_xxs blk.45.ffn_up_shexp.weight iq3_xxs +blk.46.attn_qkv.weight q4_K blk.46.ffn_gate_exps.weight iq3_xxs blk.46.ffn_gate_shexp.weight iq3_xxs blk.46.ffn_up_exps.weight iq3_xxs @@ -1119,14 +1287,17 @@ blk.47.ffn_gate_exps.weight iq3_xxs blk.47.ffn_gate_shexp.weight iq3_xxs blk.47.ffn_up_exps.weight iq3_xxs blk.47.ffn_up_shexp.weight iq3_xxs +blk.48.attn_qkv.weight q4_K blk.48.ffn_gate_exps.weight iq3_xxs blk.48.ffn_gate_shexp.weight iq3_xxs blk.48.ffn_up_exps.weight iq3_xxs blk.48.ffn_up_shexp.weight iq3_xxs +blk.49.attn_qkv.weight q4_K blk.49.ffn_gate_exps.weight iq3_xxs blk.49.ffn_gate_shexp.weight iq3_xxs blk.49.ffn_up_exps.weight iq3_xxs blk.49.ffn_up_shexp.weight iq3_xxs +blk.50.attn_qkv.weight q4_K blk.50.ffn_gate_exps.weight iq3_xxs blk.50.ffn_gate_shexp.weight iq3_xxs blk.50.ffn_up_exps.weight iq3_xxs @@ -1138,9 +1309,15 @@ blk.51.ffn_gate_exps.weight iq3_xxs blk.51.ffn_gate_shexp.weight iq3_xxs blk.51.ffn_up_exps.weight iq3_xxs blk.51.ffn_up_shexp.weight iq3_xxs +blk.52.attn_qkv.weight q4_K +blk.53.attn_qkv.weight q4_K +blk.54.attn_qkv.weight q4_K blk.55.attn_k.weight iq3_xxs blk.55.attn_q.weight iq3_xxs blk.55.attn_v.weight q4_K +blk.56.attn_qkv.weight q4_K +blk.57.attn_qkv.weight q4_K +blk.58.attn_qkv.weight q4_K blk.59.attn_k.weight iq3_xxs blk.59.attn_q.weight iq3_xxs blk.59.attn_v.weight q4_K @@ -1148,10 +1325,13 @@ blk.59.attn_v.weight q4_K [IQ3_XXS] iq3_xxs output.weight q5_K token_embd.weight iq3_s +blk.0.attn_qkv.weight q4_K blk.0.ffn_down_exps.weight q4_K blk.0.ffn_down_shexp.weight q4_K +blk.1.attn_qkv.weight q4_K blk.1.ffn_down_exps.weight q4_K blk.1.ffn_down_shexp.weight q4_K +blk.2.attn_qkv.weight q4_K blk.2.ffn_down_exps.weight q4_K blk.2.ffn_down_shexp.weight q4_K blk.3.attn_k.weight iq2_s @@ -1160,10 +1340,13 @@ blk.3.attn_q.weight iq2_s blk.3.attn_v.weight q4_K blk.3.ffn_down_exps.weight q4_K blk.3.ffn_down_shexp.weight q4_K +blk.4.attn_qkv.weight q4_K blk.4.ffn_down_exps.weight q4_K blk.4.ffn_down_shexp.weight q4_K +blk.5.attn_qkv.weight q4_K blk.5.ffn_down_exps.weight q4_K blk.5.ffn_down_shexp.weight q4_K +blk.6.attn_qkv.weight q4_K blk.6.ffn_down_exps.weight q4_K blk.6.ffn_down_shexp.weight q4_K blk.7.attn_k.weight iq2_s @@ -1172,10 +1355,13 @@ blk.7.attn_q.weight iq2_s blk.7.attn_v.weight q4_K blk.7.ffn_down_exps.weight q3_K blk.7.ffn_down_shexp.weight q3_K +blk.8.attn_qkv.weight q4_K blk.8.ffn_down_exps.weight q3_K blk.8.ffn_down_shexp.weight q3_K +blk.9.attn_qkv.weight q4_K blk.9.ffn_down_exps.weight q3_K blk.9.ffn_down_shexp.weight q3_K +blk.10.attn_qkv.weight q4_K blk.10.ffn_down_exps.weight q3_K blk.10.ffn_down_shexp.weight q3_K blk.11.attn_k.weight iq2_s @@ -1184,10 +1370,13 @@ blk.11.attn_q.weight iq2_s blk.11.attn_v.weight q4_K blk.11.ffn_down_exps.weight q3_K blk.11.ffn_down_shexp.weight q3_K +blk.12.attn_qkv.weight q4_K blk.12.ffn_down_exps.weight q3_K blk.12.ffn_down_shexp.weight q3_K +blk.13.attn_qkv.weight q4_K blk.13.ffn_down_exps.weight q3_K blk.13.ffn_down_shexp.weight q3_K +blk.14.attn_qkv.weight q4_K blk.14.ffn_down_exps.weight q3_K blk.14.ffn_down_shexp.weight q3_K blk.15.attn_k.weight iq2_s @@ -1196,10 +1385,13 @@ blk.15.attn_q.weight iq2_s blk.15.attn_v.weight q4_K blk.15.ffn_down_exps.weight q3_K blk.15.ffn_down_shexp.weight q3_K +blk.16.attn_qkv.weight q4_K blk.16.ffn_down_exps.weight q3_K blk.16.ffn_down_shexp.weight q3_K +blk.17.attn_qkv.weight q4_K blk.17.ffn_down_exps.weight q3_K blk.17.ffn_down_shexp.weight q3_K +blk.18.attn_qkv.weight q4_K blk.18.ffn_down_exps.weight q3_K blk.18.ffn_down_shexp.weight q3_K blk.19.attn_k.weight iq2_s @@ -1208,10 +1400,13 @@ blk.19.attn_q.weight iq2_s blk.19.attn_v.weight q4_K blk.19.ffn_down_exps.weight q3_K blk.19.ffn_down_shexp.weight q3_K +blk.20.attn_qkv.weight q4_K blk.20.ffn_down_exps.weight q3_K blk.20.ffn_down_shexp.weight q3_K +blk.21.attn_qkv.weight q4_K blk.21.ffn_down_exps.weight q3_K blk.21.ffn_down_shexp.weight q3_K +blk.22.attn_qkv.weight q4_K blk.22.ffn_down_exps.weight q3_K blk.22.ffn_down_shexp.weight q3_K blk.23.attn_k.weight iq2_s @@ -1220,10 +1415,13 @@ blk.23.attn_q.weight iq2_s blk.23.attn_v.weight q4_K blk.23.ffn_down_exps.weight q3_K blk.23.ffn_down_shexp.weight q3_K +blk.24.attn_qkv.weight q4_K blk.24.ffn_down_exps.weight q3_K blk.24.ffn_down_shexp.weight q3_K +blk.25.attn_qkv.weight q4_K blk.25.ffn_down_exps.weight q3_K blk.25.ffn_down_shexp.weight q3_K +blk.26.attn_qkv.weight q4_K blk.26.ffn_down_exps.weight q3_K blk.26.ffn_down_shexp.weight q3_K blk.27.attn_k.weight iq2_s @@ -1232,10 +1430,13 @@ blk.27.attn_q.weight iq2_s blk.27.attn_v.weight q4_K blk.27.ffn_down_exps.weight q3_K blk.27.ffn_down_shexp.weight q3_K +blk.28.attn_qkv.weight q4_K blk.28.ffn_down_exps.weight q3_K blk.28.ffn_down_shexp.weight q3_K +blk.29.attn_qkv.weight q4_K blk.29.ffn_down_exps.weight q3_K blk.29.ffn_down_shexp.weight q3_K +blk.30.attn_qkv.weight q4_K blk.30.ffn_down_exps.weight q3_K blk.30.ffn_down_shexp.weight q3_K blk.31.attn_k.weight iq2_s @@ -1244,10 +1445,13 @@ blk.31.attn_q.weight iq2_s blk.31.attn_v.weight q4_K blk.31.ffn_down_exps.weight q3_K blk.31.ffn_down_shexp.weight q3_K +blk.32.attn_qkv.weight q4_K blk.32.ffn_down_exps.weight q3_K blk.32.ffn_down_shexp.weight q3_K +blk.33.attn_qkv.weight q4_K blk.33.ffn_down_exps.weight q3_K blk.33.ffn_down_shexp.weight q3_K +blk.34.attn_qkv.weight q4_K blk.34.ffn_down_exps.weight q3_K blk.34.ffn_down_shexp.weight q3_K blk.35.attn_k.weight iq2_s @@ -1256,10 +1460,13 @@ blk.35.attn_q.weight iq2_s blk.35.attn_v.weight q4_K blk.35.ffn_down_exps.weight q3_K blk.35.ffn_down_shexp.weight q3_K +blk.36.attn_qkv.weight q4_K blk.36.ffn_down_exps.weight q3_K blk.36.ffn_down_shexp.weight q3_K +blk.37.attn_qkv.weight q4_K blk.37.ffn_down_exps.weight q3_K blk.37.ffn_down_shexp.weight q3_K +blk.38.attn_qkv.weight q4_K blk.38.ffn_down_exps.weight q3_K blk.38.ffn_down_shexp.weight q3_K blk.39.attn_k.weight iq2_s @@ -1268,10 +1475,13 @@ blk.39.attn_q.weight iq2_s blk.39.attn_v.weight q4_K blk.39.ffn_down_exps.weight q3_K blk.39.ffn_down_shexp.weight q3_K +blk.40.attn_qkv.weight q4_K blk.40.ffn_down_exps.weight q3_K blk.40.ffn_down_shexp.weight q3_K +blk.41.attn_qkv.weight q4_K blk.41.ffn_down_exps.weight q3_K blk.41.ffn_down_shexp.weight q3_K +blk.42.attn_qkv.weight q4_K blk.42.ffn_down_exps.weight q3_K blk.42.ffn_down_shexp.weight q3_K blk.43.attn_k.weight iq2_s @@ -1280,10 +1490,13 @@ blk.43.attn_q.weight iq2_s blk.43.attn_v.weight q4_K blk.43.ffn_down_exps.weight q3_K blk.43.ffn_down_shexp.weight q3_K +blk.44.attn_qkv.weight q4_K blk.44.ffn_down_exps.weight q3_K blk.44.ffn_down_shexp.weight q3_K +blk.45.attn_qkv.weight q4_K blk.45.ffn_down_exps.weight q3_K blk.45.ffn_down_shexp.weight q3_K +blk.46.attn_qkv.weight q4_K blk.46.ffn_down_exps.weight q3_K blk.46.ffn_down_shexp.weight q3_K blk.47.attn_k.weight iq2_s @@ -1292,10 +1505,13 @@ blk.47.attn_q.weight iq2_s blk.47.attn_v.weight q4_K blk.47.ffn_down_exps.weight q3_K blk.47.ffn_down_shexp.weight q3_K +blk.48.attn_qkv.weight q4_K blk.48.ffn_down_exps.weight q3_K blk.48.ffn_down_shexp.weight q3_K +blk.49.attn_qkv.weight q4_K blk.49.ffn_down_exps.weight q3_K blk.49.ffn_down_shexp.weight q3_K +blk.50.attn_qkv.weight q4_K blk.50.ffn_down_exps.weight q3_K blk.50.ffn_down_shexp.weight q3_K blk.51.attn_k.weight iq2_s @@ -1304,10 +1520,13 @@ blk.51.attn_q.weight iq2_s blk.51.attn_v.weight q4_K blk.51.ffn_down_exps.weight q3_K blk.51.ffn_down_shexp.weight q3_K +blk.52.attn_qkv.weight q4_K blk.52.ffn_down_exps.weight q3_K blk.52.ffn_down_shexp.weight q3_K +blk.53.attn_qkv.weight q4_K blk.53.ffn_down_exps.weight q3_K blk.53.ffn_down_shexp.weight q3_K +blk.54.attn_qkv.weight q4_K blk.54.ffn_down_exps.weight q3_K blk.54.ffn_down_shexp.weight q3_K blk.55.attn_k.weight iq2_s @@ -1316,10 +1535,13 @@ blk.55.attn_q.weight iq2_s blk.55.attn_v.weight q4_K blk.55.ffn_down_exps.weight q3_K blk.55.ffn_down_shexp.weight q3_K +blk.56.attn_qkv.weight q4_K blk.56.ffn_down_exps.weight q3_K blk.56.ffn_down_shexp.weight q3_K +blk.57.attn_qkv.weight q4_K blk.57.ffn_down_exps.weight q3_K blk.57.ffn_down_shexp.weight q3_K +blk.58.attn_qkv.weight q4_K blk.58.ffn_down_exps.weight q3_K blk.58.ffn_down_shexp.weight q3_K blk.59.attn_k.weight iq2_s @@ -1332,92 +1554,227 @@ blk.59.ffn_down_shexp.weight q3_K [IQ1_S] iq1_s output.weight q5_K token_embd.weight q2_K +blk.0.attn_qkv.weight q4_K blk.0.ffn_down_exps.weight q2_K blk.0.ffn_down_shexp.weight q2_K +blk.1.attn_qkv.weight q4_K blk.1.ffn_down_exps.weight q2_K blk.1.ffn_down_shexp.weight q2_K +blk.2.attn_qkv.weight q4_K blk.2.ffn_down_exps.weight q2_K blk.2.ffn_down_shexp.weight q2_K blk.3.attn_output.weight iq2_xxs blk.3.attn_v.weight q4_K blk.3.ffn_down_exps.weight q2_K +blk.4.attn_qkv.weight q4_K +blk.5.attn_qkv.weight q4_K +blk.6.attn_qkv.weight q4_K blk.7.attn_output.weight iq2_xxs blk.7.attn_v.weight q4_K +blk.8.attn_qkv.weight q4_K +blk.9.attn_qkv.weight q4_K +blk.10.attn_qkv.weight q4_K blk.11.attn_output.weight iq2_xxs blk.11.attn_v.weight q4_K +blk.12.attn_qkv.weight q4_K +blk.13.attn_qkv.weight q4_K +blk.14.attn_qkv.weight q4_K blk.15.attn_output.weight iq2_xxs blk.15.attn_v.weight q4_K +blk.16.attn_qkv.weight q4_K +blk.17.attn_qkv.weight q4_K +blk.18.attn_qkv.weight q4_K blk.19.attn_output.weight iq2_xxs blk.19.attn_v.weight q4_K +blk.20.attn_qkv.weight q4_K +blk.21.attn_qkv.weight q4_K +blk.22.attn_qkv.weight q4_K blk.23.attn_output.weight iq2_xxs blk.23.attn_v.weight q4_K +blk.24.attn_qkv.weight q4_K +blk.25.attn_qkv.weight q4_K +blk.26.attn_qkv.weight q4_K blk.27.attn_output.weight iq2_xxs blk.27.attn_v.weight q4_K +blk.28.attn_qkv.weight q4_K +blk.29.attn_qkv.weight q4_K +blk.30.attn_qkv.weight q4_K blk.31.attn_output.weight iq2_xxs blk.31.attn_v.weight q4_K +blk.32.attn_qkv.weight q4_K +blk.33.attn_qkv.weight q4_K +blk.34.attn_qkv.weight q4_K blk.35.attn_output.weight iq2_xxs blk.35.attn_v.weight q4_K +blk.36.attn_qkv.weight q4_K +blk.37.attn_qkv.weight q4_K +blk.38.attn_qkv.weight q4_K blk.39.attn_output.weight iq2_xxs blk.39.attn_v.weight q4_K +blk.40.attn_qkv.weight q4_K +blk.41.attn_qkv.weight q4_K +blk.42.attn_qkv.weight q4_K blk.43.attn_output.weight iq2_xxs blk.43.attn_v.weight q4_K +blk.44.attn_qkv.weight q4_K +blk.45.attn_qkv.weight q4_K +blk.46.attn_qkv.weight q4_K blk.47.attn_output.weight iq2_xxs blk.47.attn_v.weight q4_K +blk.48.attn_qkv.weight q4_K +blk.49.attn_qkv.weight q4_K +blk.50.attn_qkv.weight q4_K blk.51.attn_output.weight iq2_xxs blk.51.attn_v.weight q4_K +blk.52.attn_qkv.weight q4_K +blk.53.attn_qkv.weight q4_K +blk.54.attn_qkv.weight q4_K blk.55.attn_output.weight iq2_xxs blk.55.attn_v.weight q4_K +blk.56.attn_qkv.weight q4_K +blk.57.attn_qkv.weight q4_K +blk.58.attn_qkv.weight q4_K blk.59.attn_output.weight iq2_xxs blk.59.attn_v.weight q4_K [IQ4_NL] iq4_nl output.weight q6_K +blk.0.attn_qkv.weight q5_K blk.0.ffn_down_exps.weight q5_K blk.0.ffn_down_shexp.weight q5_K +blk.1.attn_qkv.weight q5_K blk.1.ffn_down_exps.weight q5_K blk.1.ffn_down_shexp.weight q5_K +blk.2.attn_qkv.weight q5_K blk.2.ffn_down_exps.weight q5_K blk.2.ffn_down_shexp.weight q5_K blk.3.attn_v.weight q5_K blk.3.ffn_down_exps.weight q5_K blk.3.ffn_down_shexp.weight q5_K +blk.4.attn_qkv.weight q5_K blk.4.ffn_down_exps.weight q5_K blk.4.ffn_down_shexp.weight q5_K +blk.5.attn_qkv.weight q5_K blk.5.ffn_down_exps.weight q5_K blk.5.ffn_down_shexp.weight q5_K +blk.6.attn_qkv.weight q5_K blk.6.ffn_down_exps.weight q5_K blk.6.ffn_down_shexp.weight q5_K blk.7.attn_v.weight q5_K +blk.8.attn_qkv.weight q5_K +blk.9.attn_qkv.weight q5_K +blk.10.attn_qkv.weight q5_K blk.11.attn_v.weight q5_K +blk.12.attn_qkv.weight q5_K +blk.13.attn_qkv.weight q5_K +blk.14.attn_qkv.weight q5_K blk.15.attn_v.weight q5_K +blk.16.attn_qkv.weight q5_K +blk.17.attn_qkv.weight q5_K +blk.18.attn_qkv.weight q5_K blk.19.attn_v.weight q5_K +blk.20.attn_qkv.weight q5_K +blk.21.attn_qkv.weight q5_K +blk.22.attn_qkv.weight q5_K blk.23.attn_v.weight q5_K +blk.24.attn_qkv.weight q5_K +blk.25.attn_qkv.weight q5_K +blk.26.attn_qkv.weight q5_K blk.27.attn_v.weight q5_K +blk.28.attn_qkv.weight q5_K +blk.29.attn_qkv.weight q5_K +blk.30.attn_qkv.weight q5_K blk.31.attn_v.weight q5_K +blk.32.attn_qkv.weight q5_K +blk.33.attn_qkv.weight q5_K +blk.34.attn_qkv.weight q5_K blk.35.attn_v.weight q5_K +blk.36.attn_qkv.weight q5_K +blk.37.attn_qkv.weight q5_K +blk.38.attn_qkv.weight q5_K blk.39.attn_v.weight q5_K +blk.40.attn_qkv.weight q5_K +blk.41.attn_qkv.weight q5_K +blk.42.attn_qkv.weight q5_K blk.43.attn_v.weight q5_K +blk.44.attn_qkv.weight q5_K +blk.45.attn_qkv.weight q5_K +blk.46.attn_qkv.weight q5_K blk.47.attn_v.weight q5_K +blk.48.attn_qkv.weight q5_K +blk.49.attn_qkv.weight q5_K +blk.50.attn_qkv.weight q5_K blk.51.attn_v.weight q5_K +blk.52.attn_qkv.weight q5_K +blk.53.attn_qkv.weight q5_K +blk.54.attn_qkv.weight q5_K blk.55.attn_v.weight q5_K +blk.56.attn_qkv.weight q5_K +blk.57.attn_qkv.weight q5_K +blk.58.attn_qkv.weight q5_K blk.59.attn_v.weight q5_K [IQ3_S] iq3_s output.weight q6_K +blk.0.attn_qkv.weight q4_K +blk.1.attn_qkv.weight q4_K +blk.2.attn_qkv.weight q4_K blk.3.attn_v.weight q4_K +blk.4.attn_qkv.weight q4_K +blk.5.attn_qkv.weight q4_K +blk.6.attn_qkv.weight q4_K blk.7.attn_v.weight q4_K +blk.8.attn_qkv.weight q4_K +blk.9.attn_qkv.weight q4_K +blk.10.attn_qkv.weight q4_K blk.11.attn_v.weight q4_K +blk.12.attn_qkv.weight q4_K +blk.13.attn_qkv.weight q4_K +blk.14.attn_qkv.weight q4_K blk.15.attn_v.weight q4_K +blk.16.attn_qkv.weight q4_K +blk.17.attn_qkv.weight q4_K +blk.18.attn_qkv.weight q4_K blk.19.attn_v.weight q4_K +blk.20.attn_qkv.weight q4_K +blk.21.attn_qkv.weight q4_K +blk.22.attn_qkv.weight q4_K blk.23.attn_v.weight q4_K +blk.24.attn_qkv.weight q4_K +blk.25.attn_qkv.weight q4_K +blk.26.attn_qkv.weight q4_K blk.27.attn_v.weight q4_K +blk.28.attn_qkv.weight q4_K +blk.29.attn_qkv.weight q4_K +blk.30.attn_qkv.weight q4_K blk.31.attn_v.weight q4_K +blk.32.attn_qkv.weight q4_K +blk.33.attn_qkv.weight q4_K +blk.34.attn_qkv.weight q4_K blk.35.attn_v.weight q4_K +blk.36.attn_qkv.weight q4_K +blk.37.attn_qkv.weight q4_K +blk.38.attn_qkv.weight q4_K blk.39.attn_v.weight q4_K +blk.40.attn_qkv.weight q4_K +blk.41.attn_qkv.weight q4_K +blk.42.attn_qkv.weight q4_K blk.43.attn_v.weight q4_K +blk.44.attn_qkv.weight q4_K +blk.45.attn_qkv.weight q4_K +blk.46.attn_qkv.weight q4_K blk.47.attn_v.weight q4_K +blk.48.attn_qkv.weight q4_K +blk.49.attn_qkv.weight q4_K +blk.50.attn_qkv.weight q4_K blk.51.attn_v.weight q4_K +blk.52.attn_qkv.weight q4_K +blk.53.attn_qkv.weight q4_K +blk.54.attn_qkv.weight q4_K blk.55.attn_v.weight q4_K +blk.56.attn_qkv.weight q4_K +blk.57.attn_qkv.weight q4_K +blk.58.attn_qkv.weight q4_K blk.59.attn_v.weight q4_K [IQ3_M] iq3_s @@ -1515,155 +1872,335 @@ blk.59.attn_v.weight q4_K [IQ2_S] iq2_xs output.weight q5_K token_embd.weight iq3_s +blk.0.attn_qkv.weight q4_K blk.0.ffn_down_exps.weight iq3_s blk.0.ffn_down_shexp.weight iq3_s +blk.1.attn_qkv.weight q4_K blk.1.ffn_down_exps.weight iq3_s blk.1.ffn_down_shexp.weight iq3_s +blk.2.attn_qkv.weight q4_K blk.2.ffn_down_exps.weight iq3_s blk.2.ffn_down_shexp.weight iq3_s blk.3.attn_output.weight iq3_s blk.3.attn_v.weight q4_K blk.3.ffn_down_exps.weight iq3_s +blk.4.attn_qkv.weight q4_K +blk.5.attn_qkv.weight q4_K +blk.6.attn_qkv.weight q4_K blk.7.attn_output.weight iq3_s blk.7.attn_v.weight q4_K +blk.8.attn_qkv.weight q4_K +blk.9.attn_qkv.weight q4_K +blk.10.attn_qkv.weight q4_K blk.11.attn_output.weight iq3_s blk.11.attn_v.weight q4_K +blk.12.attn_qkv.weight q4_K +blk.13.attn_qkv.weight q4_K +blk.14.attn_qkv.weight q4_K blk.15.attn_output.weight iq3_s blk.15.attn_v.weight q4_K +blk.16.attn_qkv.weight q4_K +blk.17.attn_qkv.weight q4_K +blk.18.attn_qkv.weight q4_K blk.19.attn_output.weight iq3_s blk.19.attn_v.weight q4_K +blk.20.attn_qkv.weight q4_K +blk.21.attn_qkv.weight q4_K +blk.22.attn_qkv.weight q4_K blk.23.attn_output.weight iq3_s blk.23.attn_v.weight q4_K +blk.24.attn_qkv.weight q4_K +blk.25.attn_qkv.weight q4_K +blk.26.attn_qkv.weight q4_K blk.27.attn_output.weight iq3_s blk.27.attn_v.weight q4_K +blk.28.attn_qkv.weight q4_K +blk.29.attn_qkv.weight q4_K +blk.30.attn_qkv.weight q4_K blk.31.attn_output.weight iq3_s blk.31.attn_v.weight q4_K +blk.32.attn_qkv.weight q4_K +blk.33.attn_qkv.weight q4_K +blk.34.attn_qkv.weight q4_K blk.35.attn_output.weight iq3_s blk.35.attn_v.weight q4_K +blk.36.attn_qkv.weight q4_K +blk.37.attn_qkv.weight q4_K +blk.38.attn_qkv.weight q4_K blk.39.attn_output.weight iq3_s blk.39.attn_v.weight q4_K +blk.40.attn_qkv.weight q4_K +blk.41.attn_qkv.weight q4_K +blk.42.attn_qkv.weight q4_K blk.43.attn_output.weight iq3_s blk.43.attn_v.weight q4_K +blk.44.attn_qkv.weight q4_K +blk.45.attn_qkv.weight q4_K +blk.46.attn_qkv.weight q4_K blk.47.attn_output.weight iq3_s blk.47.attn_v.weight q4_K +blk.48.attn_qkv.weight q4_K +blk.49.attn_qkv.weight q4_K +blk.50.attn_qkv.weight q4_K blk.51.attn_output.weight iq3_s blk.51.attn_v.weight q4_K +blk.52.attn_qkv.weight q4_K +blk.53.attn_qkv.weight q4_K +blk.54.attn_qkv.weight q4_K blk.55.attn_output.weight iq3_s blk.55.attn_v.weight q4_K +blk.56.attn_qkv.weight q4_K +blk.57.attn_qkv.weight q4_K +blk.58.attn_qkv.weight q4_K blk.59.attn_output.weight iq3_s blk.59.attn_v.weight q4_K [IQ2_M] iq2_s output.weight q5_K token_embd.weight iq3_s +blk.0.attn_qkv.weight q4_K blk.0.ffn_down_exps.weight iq3_s blk.0.ffn_down_shexp.weight iq3_s +blk.1.attn_qkv.weight q4_K blk.1.ffn_down_exps.weight iq3_s blk.1.ffn_down_shexp.weight iq3_s +blk.2.attn_qkv.weight q4_K blk.2.ffn_down_exps.weight iq3_s blk.2.ffn_down_shexp.weight iq3_s blk.3.attn_output.weight iq3_s blk.3.attn_v.weight q4_K blk.3.ffn_down_exps.weight iq3_s +blk.4.attn_qkv.weight q4_K +blk.5.attn_qkv.weight q4_K +blk.6.attn_qkv.weight q4_K blk.7.attn_output.weight iq3_s blk.7.attn_v.weight q4_K +blk.8.attn_qkv.weight q4_K +blk.9.attn_qkv.weight q4_K +blk.10.attn_qkv.weight q4_K blk.11.attn_output.weight iq3_s blk.11.attn_v.weight q4_K +blk.12.attn_qkv.weight q4_K +blk.13.attn_qkv.weight q4_K +blk.14.attn_qkv.weight q4_K blk.15.attn_output.weight iq3_s blk.15.attn_v.weight q4_K +blk.16.attn_qkv.weight q4_K +blk.17.attn_qkv.weight q4_K +blk.18.attn_qkv.weight q4_K blk.19.attn_output.weight iq3_s blk.19.attn_v.weight q4_K +blk.20.attn_qkv.weight q4_K +blk.21.attn_qkv.weight q4_K +blk.22.attn_qkv.weight q4_K blk.23.attn_output.weight iq3_s blk.23.attn_v.weight q4_K +blk.24.attn_qkv.weight q4_K +blk.25.attn_qkv.weight q4_K +blk.26.attn_qkv.weight q4_K blk.27.attn_output.weight iq3_s blk.27.attn_v.weight q4_K +blk.28.attn_qkv.weight q4_K +blk.29.attn_qkv.weight q4_K +blk.30.attn_qkv.weight q4_K blk.31.attn_output.weight iq3_s blk.31.attn_v.weight q4_K +blk.32.attn_qkv.weight q4_K +blk.33.attn_qkv.weight q4_K +blk.34.attn_qkv.weight q4_K blk.35.attn_output.weight iq3_s blk.35.attn_v.weight q4_K +blk.36.attn_qkv.weight q4_K +blk.37.attn_qkv.weight q4_K +blk.38.attn_qkv.weight q4_K blk.39.attn_output.weight iq3_s blk.39.attn_v.weight q4_K +blk.40.attn_qkv.weight q4_K +blk.41.attn_qkv.weight q4_K +blk.42.attn_qkv.weight q4_K blk.43.attn_output.weight iq3_s blk.43.attn_v.weight q4_K +blk.44.attn_qkv.weight q4_K +blk.45.attn_qkv.weight q4_K +blk.46.attn_qkv.weight q4_K blk.47.attn_output.weight iq3_s blk.47.attn_v.weight q4_K +blk.48.attn_qkv.weight q4_K +blk.49.attn_qkv.weight q4_K +blk.50.attn_qkv.weight q4_K blk.51.attn_output.weight iq3_s blk.51.attn_v.weight q4_K +blk.52.attn_qkv.weight q4_K +blk.53.attn_qkv.weight q4_K +blk.54.attn_qkv.weight q4_K blk.55.attn_output.weight iq3_s blk.55.attn_v.weight q4_K +blk.56.attn_qkv.weight q4_K +blk.57.attn_qkv.weight q4_K +blk.58.attn_qkv.weight q4_K blk.59.attn_output.weight iq3_s blk.59.attn_v.weight q4_K [IQ4_XS] iq4_xs output.weight q6_K +blk.0.attn_qkv.weight q5_K blk.0.ffn_down_exps.weight q5_K blk.0.ffn_down_shexp.weight q5_K +blk.1.attn_qkv.weight q5_K blk.1.ffn_down_exps.weight q5_K blk.1.ffn_down_shexp.weight q5_K +blk.2.attn_qkv.weight q5_K blk.2.ffn_down_exps.weight q5_K blk.2.ffn_down_shexp.weight q5_K blk.3.attn_v.weight q5_K blk.3.ffn_down_exps.weight q5_K blk.3.ffn_down_shexp.weight q5_K +blk.4.attn_qkv.weight q5_K blk.4.ffn_down_exps.weight q5_K blk.4.ffn_down_shexp.weight q5_K +blk.5.attn_qkv.weight q5_K blk.5.ffn_down_exps.weight q5_K blk.5.ffn_down_shexp.weight q5_K +blk.6.attn_qkv.weight q5_K blk.6.ffn_down_exps.weight q5_K blk.6.ffn_down_shexp.weight q5_K blk.7.attn_v.weight q5_K +blk.8.attn_qkv.weight q5_K +blk.9.attn_qkv.weight q5_K +blk.10.attn_qkv.weight q5_K blk.11.attn_v.weight q5_K +blk.12.attn_qkv.weight q5_K +blk.13.attn_qkv.weight q5_K +blk.14.attn_qkv.weight q5_K blk.15.attn_v.weight q5_K +blk.16.attn_qkv.weight q5_K +blk.17.attn_qkv.weight q5_K +blk.18.attn_qkv.weight q5_K blk.19.attn_v.weight q5_K +blk.20.attn_qkv.weight q5_K +blk.21.attn_qkv.weight q5_K +blk.22.attn_qkv.weight q5_K blk.23.attn_v.weight q5_K +blk.24.attn_qkv.weight q5_K +blk.25.attn_qkv.weight q5_K +blk.26.attn_qkv.weight q5_K blk.27.attn_v.weight q5_K +blk.28.attn_qkv.weight q5_K +blk.29.attn_qkv.weight q5_K +blk.30.attn_qkv.weight q5_K blk.31.attn_v.weight q5_K +blk.32.attn_qkv.weight q5_K +blk.33.attn_qkv.weight q5_K +blk.34.attn_qkv.weight q5_K blk.35.attn_v.weight q5_K +blk.36.attn_qkv.weight q5_K +blk.37.attn_qkv.weight q5_K +blk.38.attn_qkv.weight q5_K blk.39.attn_v.weight q5_K +blk.40.attn_qkv.weight q5_K +blk.41.attn_qkv.weight q5_K +blk.42.attn_qkv.weight q5_K blk.43.attn_v.weight q5_K +blk.44.attn_qkv.weight q5_K +blk.45.attn_qkv.weight q5_K +blk.46.attn_qkv.weight q5_K blk.47.attn_v.weight q5_K +blk.48.attn_qkv.weight q5_K +blk.49.attn_qkv.weight q5_K +blk.50.attn_qkv.weight q5_K blk.51.attn_v.weight q5_K +blk.52.attn_qkv.weight q5_K +blk.53.attn_qkv.weight q5_K +blk.54.attn_qkv.weight q5_K blk.55.attn_v.weight q5_K +blk.56.attn_qkv.weight q5_K +blk.57.attn_qkv.weight q5_K +blk.58.attn_qkv.weight q5_K blk.59.attn_v.weight q5_K [IQ1_M] iq1_m output.weight q5_K token_embd.weight q2_K +blk.0.attn_qkv.weight q4_K blk.0.ffn_down_exps.weight q2_K blk.0.ffn_down_shexp.weight q2_K +blk.1.attn_qkv.weight q4_K blk.1.ffn_down_exps.weight q2_K blk.1.ffn_down_shexp.weight q2_K +blk.2.attn_qkv.weight q4_K blk.2.ffn_down_exps.weight q2_K blk.2.ffn_down_shexp.weight q2_K blk.3.attn_output.weight iq2_xxs blk.3.attn_v.weight q4_K blk.3.ffn_down_exps.weight q2_K +blk.4.attn_qkv.weight q4_K +blk.5.attn_qkv.weight q4_K +blk.6.attn_qkv.weight q4_K blk.7.attn_output.weight iq2_xxs blk.7.attn_v.weight q4_K +blk.8.attn_qkv.weight q4_K +blk.9.attn_qkv.weight q4_K +blk.10.attn_qkv.weight q4_K blk.11.attn_output.weight iq2_xxs blk.11.attn_v.weight q4_K +blk.12.attn_qkv.weight q4_K +blk.13.attn_qkv.weight q4_K +blk.14.attn_qkv.weight q4_K blk.15.attn_output.weight iq2_xxs blk.15.attn_v.weight q4_K +blk.16.attn_qkv.weight q4_K +blk.17.attn_qkv.weight q4_K +blk.18.attn_qkv.weight q4_K blk.19.attn_output.weight iq2_xxs blk.19.attn_v.weight q4_K +blk.20.attn_qkv.weight q4_K +blk.21.attn_qkv.weight q4_K +blk.22.attn_qkv.weight q4_K blk.23.attn_output.weight iq2_xxs blk.23.attn_v.weight q4_K +blk.24.attn_qkv.weight q4_K +blk.25.attn_qkv.weight q4_K +blk.26.attn_qkv.weight q4_K blk.27.attn_output.weight iq2_xxs blk.27.attn_v.weight q4_K +blk.28.attn_qkv.weight q4_K +blk.29.attn_qkv.weight q4_K +blk.30.attn_qkv.weight q4_K blk.31.attn_output.weight iq2_xxs blk.31.attn_v.weight q4_K +blk.32.attn_qkv.weight q4_K +blk.33.attn_qkv.weight q4_K +blk.34.attn_qkv.weight q4_K blk.35.attn_output.weight iq2_xxs blk.35.attn_v.weight q4_K +blk.36.attn_qkv.weight q4_K +blk.37.attn_qkv.weight q4_K +blk.38.attn_qkv.weight q4_K blk.39.attn_output.weight iq2_xxs blk.39.attn_v.weight q4_K +blk.40.attn_qkv.weight q4_K +blk.41.attn_qkv.weight q4_K +blk.42.attn_qkv.weight q4_K blk.43.attn_output.weight iq2_xxs blk.43.attn_v.weight q4_K +blk.44.attn_qkv.weight q4_K +blk.45.attn_qkv.weight q4_K +blk.46.attn_qkv.weight q4_K blk.47.attn_output.weight iq2_xxs blk.47.attn_v.weight q4_K +blk.48.attn_qkv.weight q4_K +blk.49.attn_qkv.weight q4_K +blk.50.attn_qkv.weight q4_K blk.51.attn_output.weight iq2_xxs blk.51.attn_v.weight q4_K +blk.52.attn_qkv.weight q4_K +blk.53.attn_qkv.weight q4_K +blk.54.attn_qkv.weight q4_K blk.55.attn_output.weight iq2_xxs blk.55.attn_v.weight q4_K +blk.56.attn_qkv.weight q4_K +blk.57.attn_qkv.weight q4_K +blk.58.attn_qkv.weight q4_K blk.59.attn_output.weight iq2_xxs blk.59.attn_v.weight q4_K From 182cbe5d646c9a86cdcdf5dc80193553cb98afc1 Mon Sep 17 00:00:00 2001 From: Colin Kealty <3266127+bartowski1182@users.noreply.github.com> Date: Tue, 10 Mar 2026 11:45:43 -0400 Subject: [PATCH 10/21] Fix merge conflicts --- src/llama-quant.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/llama-quant.cpp b/src/llama-quant.cpp index 79c57832bf..82d7234c7c 100644 --- a/src/llama-quant.cpp +++ b/src/llama-quant.cpp @@ -913,7 +913,8 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std:: imatrix_data = static_cast>*>(params->imatrix); if (imatrix_data) { LLAMA_LOG_INFO("\n%s: have importance matrix data with %d entries\n", - __func__, (int)imatrix_data->size()); qs.has_imatrix = true; + __func__, (int)imatrix_data->size()); + qs.has_imatrix = true; // check imatrix for nans or infs for (const auto & kv : *imatrix_data) { for (float f : kv.second) { @@ -995,7 +996,7 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std:: }); } - { // Based on old loop + { std::vector tensor_names; tensor_names.reserve(tensors.size()); for (const auto * it : tensors) { @@ -1005,7 +1006,6 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std:: } int idx = 0; - uint16_t n_split = 1; // Assume split index is continuous @@ -1048,6 +1048,8 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std:: const struct ggml_tensor * tensor = it->tensor; const std::string name = ggml_get_name(tensor); + metadata[i].category = tensor_get_category(name); + uint16_t i_split = params->keep_split ? it->idx : 0; if (!ctx_outs[i_split]) { ctx_outs[i_split].reset(gguf_init_empty()); From 506a49006c7184886640e151c42b028c36667b59 Mon Sep 17 00:00:00 2001 From: Colin Kealty <3266127+bartowski1182@users.noreply.github.com> Date: Tue, 10 Mar 2026 11:49:23 -0400 Subject: [PATCH 11/21] White space --- src/llama-quant.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llama-quant.cpp b/src/llama-quant.cpp index 82d7234c7c..65e5452e2f 100644 --- a/src/llama-quant.cpp +++ b/src/llama-quant.cpp @@ -1091,7 +1091,7 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std:: gguf_set_val_i32(ctx_outs[i].get(), ml.llm_kv(LLM_KV_SPLIT_TENSORS_COUNT).c_str(), (int32_t)tensors.size()); } } - + size_t total_size_org = 0; size_t total_size_new = 0; From 3948227d2312b0e220334c4a92ecab04f4cc4bf7 Mon Sep 17 00:00:00 2001 From: Colin Kealty <3266127+bartowski1182@users.noreply.github.com> Date: Wed, 11 Mar 2026 13:24:51 -0400 Subject: [PATCH 12/21] Update with latest changes to state counters --- CLAUDE.md | 37 +- src/llama-quant.cpp | 81 +- src/llama-quant.h | 35 +- tests/gguf-model-data.cpp | 29 +- tests/gguf-model-data.h | 3 +- tests/snapshots/deepseek-v3.1.schema | 1406 +++- tests/snapshots/gemma-3-4b-it.schema | 3 - tests/snapshots/glm-4.6v.schema | 2630 +++++-- tests/snapshots/gpt-oss-120b.schema | 6091 ++++++++++++++--- .../meta-llama-3.1-70b-instruct.schema | 3 - .../snapshots/nemotron-nano-3-30b-a3b.schema | 3524 ++++++++-- tests/snapshots/qwen3-0.6b.schema | 3 - tests/snapshots/qwen3-14b.schema | 3 - tests/snapshots/qwen3-coder-next.schema | 3 - tests/snapshots/qwen3.5-27b.schema | 3 - tests/snapshots/qwen3.5-397b-a17b.schema | 3 - tests/snapshots/step-3.5-flash.schema | 3 - tests/test-quant-type-selection.cpp | 23 +- 18 files changed, 11748 insertions(+), 2135 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 302cdeab99..f2bd317322 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -1 +1,36 @@ -IMPORTANT: Ensure you’ve thoroughly reviewed the [AGENTS.md](AGENTS.md) file before beginning any work. +IMPORTANT: Ensure you've thoroughly reviewed the [AGENTS.md](AGENTS.md) file before beginning any work. + +# AI Policy +- AI is assistive only; AI-generated PRs are restricted per AGENTS.md +- Contributor reviews and writes code themselves + +# Code Style & Conventions +- snake_case naming; optimize for longest common prefix +- 4 spaces indentation, brackets on same line +- `void * ptr`, `int & a` (space around pointer/reference) +- Avoid templates and fancy STL +- Use sized integer types (`int32_t`) in public API +- See [CONTRIBUTING.md](CONTRIBUTING.md) for full guidelines, naming, and PR process + +# ggml Tensor Conventions +- Data stored in row-major order +- Dimension 0 = columns, dimension 1 = rows, dimension 2 = matrices +- **Matrix multiply is unconventional**: `C = ggml_mul_mat(ctx, A, B)` means `C^T = A * B^T` + +# Quantization +- See [docs/quantization/](docs/quantization/) for comprehensive documentation +- See [docs/quantization/09-adding-new-types.md](docs/quantization/09-adding-new-types.md) for adding new types + +## Key Files +- `ggml/include/ggml.h`: type enums (`ggml_type`) +- `ggml/src/ggml-common.h`: block structures +- `ggml/src/ggml-quants.c`: reference quantize/dequantize implementations +- `tools/quantize/quantize.cpp`: CLI tool +- `src/llama-quant.cpp`: core quantization engine + +## Quantization Families +- **Q**: simple uniform quantization +- **K**: super-block quantization (multiple sub-blocks per super-block) +- **IQ**: importance-weighted quantization +- **T**: ternary quantization +- **MXFP**: Microsoft floating-point quantization diff --git a/src/llama-quant.cpp b/src/llama-quant.cpp index 65e5452e2f..e5890936c4 100644 --- a/src/llama-quant.cpp +++ b/src/llama-quant.cpp @@ -13,22 +13,6 @@ #include #include -// tensor categorization - used to avoid repeated string matching in quantization logic. -// this is different from LLM_TN - we want broad categories, not specific tensor names per arch. -enum class tensor_category { - TOKEN_EMBD, - ATTENTION_Q, - ATTENTION_V, - ATTENTION_K, - ATTENTION_QKV, - ATTENTION_KV_B, - ATTENTION_OUTPUT, - FFN_UP, - FFN_GATE, - FFN_DOWN, - OUTPUT, - OTHER -}; static void zeros(std::ofstream & file, size_t n) { char zero = 0; @@ -150,15 +134,6 @@ static bool category_is_attn_v(tensor_category cat) { cat == tensor_category::ATTENTION_KV_B; } -// per-tensor metadata, computed in the preliminary loop and used in the main loop -struct tensor_metadata { - ggml_type target_type; - tensor_category category; - std::string remapped_imatrix_name; - bool allows_quantization; - bool requires_imatrix; -}; - // // dequantization // @@ -355,6 +330,7 @@ static ggml_type tensor_type_fallback(quantize_state_impl & qs, const ggml_tenso return return_type; } +// internal standard logic for selecting the target tensor type based on tensor category, ftype, and model arch static ggml_type llama_tensor_get_type_impl(quantize_state_impl & qs, ggml_type new_type, const ggml_tensor * tensor, llama_ftype ftype, tensor_category category) { const std::string name = ggml_get_name(tensor); @@ -604,15 +580,8 @@ static ggml_type llama_tensor_get_type_impl(quantize_state_impl & qs, ggml_type return new_type; } -// public API: compute category from tensor name and delegate to _impl -ggml_type llama_tensor_get_type(quantize_state_impl & qs, ggml_type new_type, const ggml_tensor * tensor, llama_ftype ftype) { - const std::string name = ggml_get_name(tensor); - tensor_category category = tensor_get_category(name); - return llama_tensor_get_type_impl(qs, new_type, tensor, ftype, category); -} - -// outer wrapper: determine the ggml_type that this tensor should be quantized to (used internally by llama_model_quantize_impl) -static ggml_type llama_tensor_get_type_internal(quantize_state_impl & qs, const llama_model_quantize_params * params, const ggml_tensor * tensor, ggml_type default_type, const tensor_metadata & tm) { +// outer wrapper: determine the ggml_type that this tensor should be quantized to +ggml_type llama_tensor_get_type(quantize_state_impl & qs, const llama_model_quantize_params * params, const ggml_tensor * tensor, ggml_type default_type, const tensor_metadata & tm) { if (!tensor_allows_quantization(params, qs.model.arch, tensor)) { return tensor->type; } @@ -845,15 +814,16 @@ const char * llama_ftype_to_name(llama_ftype ftype) { return nullptr; } -void init_quantize_state_counters(quantize_state_impl & qs, const std::vector & tensor_names) { - for (const auto & name : tensor_names) { - tensor_category cat = tensor_get_category(name); +void init_quantize_state_counters(quantize_state_impl & qs, std::vector & metadata) { + for (auto & tm : metadata) { + tensor_category cat = tensor_get_category(tm.name); + tm.category = cat; if (category_is_attn_v(cat)) { ++qs.n_attention_wv; } - if (tensor_name_match_output_weight(name.c_str())) { + if (cat == tensor_category::OUTPUT) { qs.has_tied_embeddings = false; } } @@ -996,15 +966,15 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std:: }); } - { - std::vector tensor_names; - tensor_names.reserve(tensors.size()); - for (const auto * it : tensors) { - tensor_names.emplace_back(ggml_get_name(it->tensor)); - } - init_quantize_state_counters(qs, tensor_names); + // compute tensor metadata once and cache it + std::vector metadata(tensors.size()); + for (size_t i = 0; i < tensors.size(); ++i) { + metadata[i].name = ggml_get_name(tensors[i]->tensor); } + // initialize quantization state counters and metadata categories + init_quantize_state_counters(qs, metadata); + int idx = 0; uint16_t n_split = 1; @@ -1017,25 +987,6 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std:: std::vector ctx_outs(n_split); ctx_outs[0] = std::move(ctx_out); - // compute tensor metadata once and cache it - std::vector metadata(tensors.size()); - - // initialize quantization state before preliminary loop (counters for use_more_bits) - { - for (size_t i = 0; i < tensors.size(); ++i) { - const auto cat = tensor_get_category(tensors[i]->tensor->name); - if (category_is_attn_v(cat)) { - ++qs.n_attention_wv; - } - if (cat == tensor_category::OUTPUT) { - qs.has_tied_embeddings = false; - } - metadata[i].category = cat; // save and re-use the category while we're at it - } - // these also need to be set to n_layer by default - qs.n_ffn_down = qs.n_ffn_gate = qs.n_ffn_up = (int)qs.model.hparams.n_layer; - } - // flag for --dry-run bool will_require_imatrix = false; @@ -1059,7 +1010,7 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std:: metadata[i].allows_quantization = tensor_allows_quantization(params, model.arch, tensor); if (metadata[i].allows_quantization) { - metadata[i].target_type = llama_tensor_get_type_internal(qs, params, tensor, default_type, metadata[i]); + metadata[i].target_type = llama_tensor_get_type(qs, params, tensor, default_type, metadata[i]); } else { metadata[i].target_type = tensor->type; } diff --git a/src/llama-quant.h b/src/llama-quant.h index f3c1290c58..d8fe643557 100644 --- a/src/llama-quant.h +++ b/src/llama-quant.h @@ -12,6 +12,33 @@ struct llama_model; +// tensor categorization - used to avoid repeated string matching in quantization logic. +// this is different from LLM_TN - we want broad categories, not specific tensor names per arch. +enum class tensor_category { + TOKEN_EMBD, + ATTENTION_Q, + ATTENTION_V, + ATTENTION_K, + ATTENTION_QKV, + ATTENTION_KV_B, + ATTENTION_OUTPUT, + FFN_UP, + FFN_GATE, + FFN_DOWN, + OUTPUT, + OTHER +}; + +// per-tensor metadata, computed in the preliminary loop and used in the main loop +struct tensor_metadata { + std::string name; + ggml_type target_type; + tensor_category category; + std::string remapped_imatrix_name; + bool allows_quantization; + bool requires_imatrix; +}; + // result of parsing --tensor-type option // (changes to this struct must be reflected in tools/quantize/quantize.cpp) struct tensor_type_option { @@ -56,7 +83,7 @@ struct quantize_state_impl { } }; -ggml_type llama_tensor_get_type(quantize_state_impl & qs, ggml_type new_type, const ggml_tensor * tensor, llama_ftype ftype); +ggml_type llama_tensor_get_type(quantize_state_impl & qs, const llama_model_quantize_params * params, const ggml_tensor * tensor, ggml_type default_type, const tensor_metadata & tm); ggml_type llama_ftype_get_default_type(llama_ftype ftype); // Ftype name <-> enum conversions. @@ -64,9 +91,9 @@ ggml_type llama_ftype_get_default_type(llama_ftype ftype); llama_ftype llama_ftype_from_name(const char * name); const char * llama_ftype_to_name(llama_ftype ftype); -// Initialize quantize_state_impl counters by scanning tensor names. -// tensor_names: all quantizable weight tensor names in the model. -void init_quantize_state_counters(quantize_state_impl & qs, const std::vector & tensor_names); +// Initialize quantize_state_impl counters and populate tensor_metadata categories. +// metadata: vector with name fields already set, will have category field populated. +void init_quantize_state_counters(quantize_state_impl & qs, std::vector & metadata); // Returns true if this tensor should be quantized (based on name, dims, params). bool tensor_allows_quantization(const llama_model_quantize_params * params, llm_arch arch, const ggml_tensor * tensor); diff --git a/tests/gguf-model-data.cpp b/tests/gguf-model-data.cpp index 97de083568..61801256c6 100644 --- a/tests/gguf-model-data.cpp +++ b/tests/gguf-model-data.cpp @@ -515,7 +515,8 @@ static std::string detect_gguf_filename(const std::string & repo, const std::str static std::optional fetch_and_parse( const std::string & repo, const std::string & filename, - const std::string & cache_path) { + const std::string & cache_path, + bool verbose) { std::string url = "https://huggingface.co/" + repo + "/resolve/main/" + filename; // Progressive download inspired by RangeView.fetchChunk() @@ -524,7 +525,9 @@ static std::optional fetch_and_parse( const size_t max_chunk = 64 * 1024 * 1024; while (chunk_size <= max_chunk) { - fprintf(stderr, "gguf_fetch: downloading %zu bytes from %s\n", chunk_size, filename.c_str()); + if (verbose) { + fprintf(stderr, "gguf_fetch: downloading %zu bytes from %s\n", chunk_size, filename.c_str()); + } char range_buf[64]; snprintf(range_buf, sizeof(range_buf), "bytes=0-%zu", chunk_size - 1); @@ -565,7 +568,8 @@ static std::optional fetch_or_cached( const std::string & repo, const std::string & filename, const std::string & cdir, - const std::string & repo_part) { + const std::string & repo_part, + bool verbose) { std::string fname_part = sanitize_for_path(filename); std::string cache_path = cdir + "/" + repo_part + "--" + fname_part + ".partial"; @@ -574,20 +578,23 @@ static std::optional fetch_or_cached( if (std::filesystem::exists(cache_path) && read_file(cache_path, cached)) { auto result = gguf_parse_meta(cached); if (result.has_value()) { - fprintf(stderr, "gguf_fetch: loaded from cache: %s\n", cache_path.c_str()); + if (verbose) { + fprintf(stderr, "gguf_fetch: loaded from cache: %s\n", cache_path.c_str()); + } return result; } } } fs_create_directory_with_parents(cdir); - return fetch_and_parse(repo, filename, cache_path); + return fetch_and_parse(repo, filename, cache_path, verbose); } std::optional gguf_fetch_model_meta( const std::string & repo, const std::string & quant, - const std::string & cache_dir) { + const std::string & cache_dir, + bool verbose) { std::string cdir = cache_dir.empty() ? get_default_cache_dir() : cache_dir; std::string repo_part = sanitize_for_path(repo); @@ -597,7 +604,7 @@ std::optional gguf_fetch_model_meta( return std::nullopt; } - auto model_opt = fetch_or_cached(repo, filename, cdir, repo_part); + auto model_opt = fetch_or_cached(repo, filename, cdir, repo_part, verbose); if (!model_opt.has_value()) { fprintf(stderr, "gguf_fetch: failed to fetch %s\n", filename.c_str()); return std::nullopt; @@ -612,8 +619,10 @@ std::optional gguf_fetch_model_meta( return std::nullopt; } - fprintf(stderr, "gguf_fetch: split model with %u shards, fetching remaining %u...\n", - model.n_split, model.n_split - 1); + if (verbose) { + fprintf(stderr, "gguf_fetch: split model with %u shards, fetching remaining %u...\n", + model.n_split, model.n_split - 1); + } for (int i = 2; i <= model.n_split; i++) { char num_buf[6], total_buf[6]; @@ -621,7 +630,7 @@ std::optional gguf_fetch_model_meta( snprintf(total_buf, sizeof(total_buf), "%05d", (int)model.n_split); std::string shard_name = split_prefix + "-" + num_buf + "-of-" + total_buf + ".gguf"; - auto shard = fetch_or_cached(repo, shard_name, cdir, repo_part); + auto shard = fetch_or_cached(repo, shard_name, cdir, repo_part, verbose); if (!shard.has_value()) { fprintf(stderr, "gguf_fetch: failed to fetch shard %d: %s\n", i, shard_name.c_str()); return std::nullopt; diff --git a/tests/gguf-model-data.h b/tests/gguf-model-data.h index ed433791ad..c49c7c6fbe 100644 --- a/tests/gguf-model-data.h +++ b/tests/gguf-model-data.h @@ -39,4 +39,5 @@ struct gguf_remote_model { std::optional gguf_fetch_model_meta( const std::string & repo, const std::string & quant = "Q8_0", - const std::string & cache_dir = ""); // empty = default + const std::string & cache_dir = "", // empty = default + bool verbose = true); diff --git a/tests/snapshots/deepseek-v3.1.schema b/tests/snapshots/deepseek-v3.1.schema index a5bce29b27..0e9dfd6ed4 100644 --- a/tests/snapshots/deepseek-v3.1.schema +++ b/tests/snapshots/deepseek-v3.1.schema @@ -2,10 +2,8 @@ # n_embd=7168, n_ff=18432, n_vocab=129280, n_layer=61, n_head=128, n_head_kv=1, n_expert=256 [F32] f32 -output.weight q6_K [F16] f16 -output.weight q6_K [Q4_0] q4_0 output.weight q6_K @@ -23,1098 +21,1952 @@ output.weight q6_K [Q2_K] q2_K output.weight q6_K +blk.0.attn_k_b.weight q4_0 blk.0.attn_output.weight q3_K blk.0.ffn_down.weight q3_K +blk.1.attn_k_b.weight q4_0 blk.1.attn_output.weight q3_K blk.1.ffn_down.weight q3_K +blk.2.attn_k_b.weight q4_0 blk.2.attn_output.weight q3_K blk.2.ffn_down.weight q3_K +blk.3.attn_k_b.weight q4_0 blk.3.attn_output.weight q3_K blk.3.ffn_down_exps.weight q3_K blk.3.ffn_down_shexp.weight q3_K +blk.4.attn_k_b.weight q4_0 blk.4.attn_output.weight q3_K blk.4.ffn_down_exps.weight q3_K blk.4.ffn_down_shexp.weight q3_K +blk.5.attn_k_b.weight q4_0 blk.5.attn_output.weight q3_K blk.5.ffn_down_exps.weight q3_K blk.5.ffn_down_shexp.weight q3_K +blk.6.attn_k_b.weight q4_0 blk.6.attn_output.weight q3_K blk.6.ffn_down_exps.weight q3_K blk.6.ffn_down_shexp.weight q3_K +blk.7.attn_k_b.weight q4_0 blk.7.attn_output.weight q3_K blk.7.ffn_down_exps.weight q3_K blk.7.ffn_down_shexp.weight q3_K +blk.8.attn_k_b.weight q4_0 blk.8.attn_output.weight q3_K blk.8.ffn_down_exps.weight q3_K blk.8.ffn_down_shexp.weight q3_K +blk.9.attn_k_b.weight q4_0 blk.9.attn_output.weight q3_K blk.9.ffn_down_exps.weight q3_K blk.9.ffn_down_shexp.weight q3_K +blk.10.attn_k_b.weight q4_0 blk.10.attn_output.weight q3_K blk.10.ffn_down_exps.weight q3_K blk.10.ffn_down_shexp.weight q3_K +blk.11.attn_k_b.weight q4_0 blk.11.attn_output.weight q3_K blk.11.ffn_down_exps.weight q3_K blk.11.ffn_down_shexp.weight q3_K +blk.12.attn_k_b.weight q4_0 blk.12.attn_output.weight q3_K blk.12.ffn_down_exps.weight q3_K blk.12.ffn_down_shexp.weight q3_K +blk.13.attn_k_b.weight q4_0 blk.13.attn_output.weight q3_K blk.13.ffn_down_exps.weight q3_K blk.13.ffn_down_shexp.weight q3_K +blk.14.attn_k_b.weight q4_0 blk.14.attn_output.weight q3_K blk.14.ffn_down_exps.weight q3_K blk.14.ffn_down_shexp.weight q3_K +blk.15.attn_k_b.weight q4_0 blk.15.attn_output.weight q3_K blk.15.ffn_down_exps.weight q3_K blk.15.ffn_down_shexp.weight q3_K +blk.16.attn_k_b.weight q4_0 blk.16.attn_output.weight q3_K blk.16.ffn_down_exps.weight q3_K blk.16.ffn_down_shexp.weight q3_K +blk.17.attn_k_b.weight q4_0 blk.17.attn_output.weight q3_K blk.17.ffn_down_exps.weight q3_K blk.17.ffn_down_shexp.weight q3_K +blk.18.attn_k_b.weight q4_0 blk.18.attn_output.weight q3_K blk.18.ffn_down_exps.weight q3_K blk.18.ffn_down_shexp.weight q3_K +blk.19.attn_k_b.weight q4_0 blk.19.attn_output.weight q3_K blk.19.ffn_down_exps.weight q3_K blk.19.ffn_down_shexp.weight q3_K +blk.20.attn_k_b.weight q4_0 blk.20.attn_output.weight q3_K blk.20.ffn_down_exps.weight q3_K blk.20.ffn_down_shexp.weight q3_K +blk.21.attn_k_b.weight q4_0 blk.21.attn_output.weight q3_K blk.21.ffn_down_exps.weight q3_K blk.21.ffn_down_shexp.weight q3_K +blk.22.attn_k_b.weight q4_0 blk.22.attn_output.weight q3_K blk.22.ffn_down_exps.weight q3_K blk.22.ffn_down_shexp.weight q3_K +blk.23.attn_k_b.weight q4_0 blk.23.attn_output.weight q3_K blk.23.ffn_down_exps.weight q3_K blk.23.ffn_down_shexp.weight q3_K +blk.24.attn_k_b.weight q4_0 blk.24.attn_output.weight q3_K blk.24.ffn_down_exps.weight q3_K blk.24.ffn_down_shexp.weight q3_K +blk.25.attn_k_b.weight q4_0 blk.25.attn_output.weight q3_K blk.25.ffn_down_exps.weight q3_K blk.25.ffn_down_shexp.weight q3_K +blk.26.attn_k_b.weight q4_0 blk.26.attn_output.weight q3_K blk.26.ffn_down_exps.weight q3_K blk.26.ffn_down_shexp.weight q3_K +blk.27.attn_k_b.weight q4_0 blk.27.attn_output.weight q3_K blk.27.ffn_down_exps.weight q3_K blk.27.ffn_down_shexp.weight q3_K +blk.28.attn_k_b.weight q4_0 blk.28.attn_output.weight q3_K blk.28.ffn_down_exps.weight q3_K blk.28.ffn_down_shexp.weight q3_K +blk.29.attn_k_b.weight q4_0 blk.29.attn_output.weight q3_K blk.29.ffn_down_exps.weight q3_K blk.29.ffn_down_shexp.weight q3_K +blk.30.attn_k_b.weight q4_0 blk.30.attn_output.weight q3_K blk.30.ffn_down_exps.weight q3_K blk.30.ffn_down_shexp.weight q3_K +blk.31.attn_k_b.weight q4_0 blk.31.attn_output.weight q3_K blk.31.ffn_down_exps.weight q3_K blk.31.ffn_down_shexp.weight q3_K +blk.32.attn_k_b.weight q4_0 blk.32.attn_output.weight q3_K blk.32.ffn_down_exps.weight q3_K blk.32.ffn_down_shexp.weight q3_K +blk.33.attn_k_b.weight q4_0 blk.33.attn_output.weight q3_K blk.33.ffn_down_exps.weight q3_K blk.33.ffn_down_shexp.weight q3_K +blk.34.attn_k_b.weight q4_0 blk.34.attn_output.weight q3_K blk.34.ffn_down_exps.weight q3_K blk.34.ffn_down_shexp.weight q3_K +blk.35.attn_k_b.weight q4_0 blk.35.attn_output.weight q3_K blk.35.ffn_down_exps.weight q3_K blk.35.ffn_down_shexp.weight q3_K +blk.36.attn_k_b.weight q4_0 blk.36.attn_output.weight q3_K blk.36.ffn_down_exps.weight q3_K blk.36.ffn_down_shexp.weight q3_K +blk.37.attn_k_b.weight q4_0 blk.37.attn_output.weight q3_K blk.37.ffn_down_exps.weight q3_K blk.37.ffn_down_shexp.weight q3_K +blk.38.attn_k_b.weight q4_0 blk.38.attn_output.weight q3_K blk.38.ffn_down_exps.weight q3_K blk.38.ffn_down_shexp.weight q3_K +blk.39.attn_k_b.weight q4_0 blk.39.attn_output.weight q3_K blk.39.ffn_down_exps.weight q3_K blk.39.ffn_down_shexp.weight q3_K +blk.40.attn_k_b.weight q4_0 blk.40.attn_output.weight q3_K blk.40.ffn_down_exps.weight q3_K blk.40.ffn_down_shexp.weight q3_K +blk.41.attn_k_b.weight q4_0 blk.41.attn_output.weight q3_K blk.41.ffn_down_exps.weight q3_K blk.41.ffn_down_shexp.weight q3_K +blk.42.attn_k_b.weight q4_0 blk.42.attn_output.weight q3_K blk.42.ffn_down_exps.weight q3_K blk.42.ffn_down_shexp.weight q3_K +blk.43.attn_k_b.weight q4_0 blk.43.attn_output.weight q3_K blk.43.ffn_down_exps.weight q3_K blk.43.ffn_down_shexp.weight q3_K +blk.44.attn_k_b.weight q4_0 blk.44.attn_output.weight q3_K blk.44.ffn_down_exps.weight q3_K blk.44.ffn_down_shexp.weight q3_K +blk.45.attn_k_b.weight q4_0 blk.45.attn_output.weight q3_K blk.45.ffn_down_exps.weight q3_K blk.45.ffn_down_shexp.weight q3_K +blk.46.attn_k_b.weight q4_0 blk.46.attn_output.weight q3_K blk.46.ffn_down_exps.weight q3_K blk.46.ffn_down_shexp.weight q3_K +blk.47.attn_k_b.weight q4_0 blk.47.attn_output.weight q3_K blk.47.ffn_down_exps.weight q3_K blk.47.ffn_down_shexp.weight q3_K +blk.48.attn_k_b.weight q4_0 blk.48.attn_output.weight q3_K blk.48.ffn_down_exps.weight q3_K blk.48.ffn_down_shexp.weight q3_K +blk.49.attn_k_b.weight q4_0 blk.49.attn_output.weight q3_K blk.49.ffn_down_exps.weight q3_K blk.49.ffn_down_shexp.weight q3_K +blk.50.attn_k_b.weight q4_0 blk.50.attn_output.weight q3_K blk.50.ffn_down_exps.weight q3_K blk.50.ffn_down_shexp.weight q3_K +blk.51.attn_k_b.weight q4_0 blk.51.attn_output.weight q3_K blk.51.ffn_down_exps.weight q3_K blk.51.ffn_down_shexp.weight q3_K +blk.52.attn_k_b.weight q4_0 blk.52.attn_output.weight q3_K blk.52.ffn_down_exps.weight q3_K blk.52.ffn_down_shexp.weight q3_K +blk.53.attn_k_b.weight q4_0 blk.53.attn_output.weight q3_K blk.53.ffn_down_exps.weight q3_K blk.53.ffn_down_shexp.weight q3_K +blk.54.attn_k_b.weight q4_0 blk.54.attn_output.weight q3_K blk.54.ffn_down_exps.weight q3_K blk.54.ffn_down_shexp.weight q3_K +blk.55.attn_k_b.weight q4_0 blk.55.attn_output.weight q3_K blk.55.ffn_down_exps.weight q3_K blk.55.ffn_down_shexp.weight q3_K +blk.56.attn_k_b.weight q4_0 blk.56.attn_output.weight q3_K blk.56.ffn_down_exps.weight q3_K blk.56.ffn_down_shexp.weight q3_K +blk.57.attn_k_b.weight q4_0 blk.57.attn_output.weight q3_K blk.57.ffn_down_exps.weight q3_K blk.57.ffn_down_shexp.weight q3_K +blk.58.attn_k_b.weight q4_0 blk.58.attn_output.weight q3_K blk.58.ffn_down_exps.weight q3_K blk.58.ffn_down_shexp.weight q3_K +blk.59.attn_k_b.weight q4_0 blk.59.attn_output.weight q3_K blk.59.ffn_down_exps.weight q3_K blk.59.ffn_down_shexp.weight q3_K +blk.60.attn_k_b.weight q4_0 blk.60.attn_output.weight q3_K blk.60.ffn_down_exps.weight q3_K blk.60.ffn_down_shexp.weight q3_K [Q3_K_S] q3_K output.weight q6_K +blk.0.attn_k_b.weight q4_0 +blk.1.attn_k_b.weight q4_0 +blk.2.attn_k_b.weight q4_0 +blk.3.attn_k_b.weight q4_0 +blk.4.attn_k_b.weight q4_0 +blk.5.attn_k_b.weight q4_0 +blk.6.attn_k_b.weight q4_0 +blk.7.attn_k_b.weight q4_0 +blk.8.attn_k_b.weight q4_0 +blk.9.attn_k_b.weight q4_0 +blk.10.attn_k_b.weight q4_0 +blk.11.attn_k_b.weight q4_0 +blk.12.attn_k_b.weight q4_0 +blk.13.attn_k_b.weight q4_0 +blk.14.attn_k_b.weight q4_0 +blk.15.attn_k_b.weight q4_0 +blk.16.attn_k_b.weight q4_0 +blk.17.attn_k_b.weight q4_0 +blk.18.attn_k_b.weight q4_0 +blk.19.attn_k_b.weight q4_0 +blk.20.attn_k_b.weight q4_0 +blk.21.attn_k_b.weight q4_0 +blk.22.attn_k_b.weight q4_0 +blk.23.attn_k_b.weight q4_0 +blk.24.attn_k_b.weight q4_0 +blk.25.attn_k_b.weight q4_0 +blk.26.attn_k_b.weight q4_0 +blk.27.attn_k_b.weight q4_0 +blk.28.attn_k_b.weight q4_0 +blk.29.attn_k_b.weight q4_0 +blk.30.attn_k_b.weight q4_0 +blk.31.attn_k_b.weight q4_0 +blk.32.attn_k_b.weight q4_0 +blk.33.attn_k_b.weight q4_0 +blk.34.attn_k_b.weight q4_0 +blk.35.attn_k_b.weight q4_0 +blk.36.attn_k_b.weight q4_0 +blk.37.attn_k_b.weight q4_0 +blk.38.attn_k_b.weight q4_0 +blk.39.attn_k_b.weight q4_0 +blk.40.attn_k_b.weight q4_0 +blk.41.attn_k_b.weight q4_0 +blk.42.attn_k_b.weight q4_0 +blk.43.attn_k_b.weight q4_0 +blk.44.attn_k_b.weight q4_0 +blk.45.attn_k_b.weight q4_0 +blk.46.attn_k_b.weight q4_0 +blk.47.attn_k_b.weight q4_0 +blk.48.attn_k_b.weight q4_0 +blk.49.attn_k_b.weight q4_0 +blk.50.attn_k_b.weight q4_0 +blk.51.attn_k_b.weight q4_0 +blk.52.attn_k_b.weight q4_0 +blk.53.attn_k_b.weight q4_0 +blk.54.attn_k_b.weight q4_0 +blk.55.attn_k_b.weight q4_0 +blk.56.attn_k_b.weight q4_0 +blk.57.attn_k_b.weight q4_0 +blk.58.attn_k_b.weight q4_0 +blk.59.attn_k_b.weight q4_0 +blk.60.attn_k_b.weight q4_0 [Q3_K_M] q3_K output.weight q6_K +blk.0.attn_k_b.weight q4_0 blk.0.attn_output.weight q4_K blk.0.ffn_down.weight q5_K +blk.1.attn_k_b.weight q4_0 blk.1.attn_output.weight q4_K blk.1.ffn_down.weight q5_K +blk.2.attn_k_b.weight q4_0 blk.2.attn_output.weight q4_K blk.2.ffn_down.weight q5_K +blk.3.attn_k_b.weight q4_0 blk.3.attn_output.weight q4_K blk.3.ffn_down_exps.weight q4_K blk.3.ffn_down_shexp.weight q4_K +blk.4.attn_k_b.weight q4_0 blk.4.attn_output.weight q4_K blk.4.ffn_down_exps.weight q4_K blk.4.ffn_down_shexp.weight q4_K +blk.5.attn_k_b.weight q4_0 blk.5.attn_output.weight q4_K blk.5.ffn_down_exps.weight q4_K blk.5.ffn_down_shexp.weight q4_K +blk.6.attn_k_b.weight q4_0 blk.6.attn_output.weight q4_K blk.6.ffn_down_exps.weight q4_K blk.6.ffn_down_shexp.weight q4_K +blk.7.attn_k_b.weight q4_0 blk.7.attn_output.weight q4_K blk.7.ffn_down_exps.weight q4_K blk.7.ffn_down_shexp.weight q4_K +blk.8.attn_k_b.weight q4_0 blk.8.attn_output.weight q4_K blk.8.ffn_down_exps.weight q4_K blk.8.ffn_down_shexp.weight q4_K +blk.9.attn_k_b.weight q4_0 blk.9.attn_output.weight q4_K blk.9.ffn_down_exps.weight q4_K blk.9.ffn_down_shexp.weight q4_K +blk.10.attn_k_b.weight q4_0 blk.10.attn_output.weight q4_K blk.10.ffn_down_exps.weight q4_K blk.10.ffn_down_shexp.weight q4_K +blk.11.attn_k_b.weight q4_0 blk.11.attn_output.weight q4_K blk.11.ffn_down_exps.weight q4_K blk.11.ffn_down_shexp.weight q4_K +blk.12.attn_k_b.weight q4_0 blk.12.attn_output.weight q4_K blk.12.ffn_down_exps.weight q4_K blk.12.ffn_down_shexp.weight q4_K +blk.13.attn_k_b.weight q4_0 blk.13.attn_output.weight q4_K blk.13.ffn_down_exps.weight q4_K blk.13.ffn_down_shexp.weight q4_K +blk.14.attn_k_b.weight q4_0 blk.14.attn_output.weight q4_K blk.14.ffn_down_exps.weight q4_K blk.14.ffn_down_shexp.weight q4_K +blk.15.attn_k_b.weight q4_0 blk.15.attn_output.weight q4_K blk.15.ffn_down_exps.weight q4_K blk.15.ffn_down_shexp.weight q4_K +blk.16.attn_k_b.weight q4_0 blk.16.attn_output.weight q4_K blk.16.ffn_down_exps.weight q4_K blk.16.ffn_down_shexp.weight q4_K +blk.17.attn_k_b.weight q4_0 blk.17.attn_output.weight q4_K blk.17.ffn_down_exps.weight q4_K blk.17.ffn_down_shexp.weight q4_K +blk.18.attn_k_b.weight q4_0 blk.18.attn_output.weight q4_K blk.18.ffn_down_exps.weight q4_K blk.18.ffn_down_shexp.weight q4_K +blk.19.attn_k_b.weight q4_0 blk.19.attn_output.weight q4_K blk.19.ffn_down_exps.weight q4_K blk.19.ffn_down_shexp.weight q4_K +blk.20.attn_k_b.weight q4_0 blk.20.attn_output.weight q4_K blk.20.ffn_down_exps.weight q4_K blk.20.ffn_down_shexp.weight q4_K +blk.21.attn_k_b.weight q4_0 blk.21.attn_output.weight q4_K blk.21.ffn_down_exps.weight q4_K blk.21.ffn_down_shexp.weight q4_K +blk.22.attn_k_b.weight q4_0 blk.22.attn_output.weight q4_K blk.22.ffn_down_exps.weight q4_K blk.22.ffn_down_shexp.weight q4_K +blk.23.attn_k_b.weight q4_0 blk.23.attn_output.weight q4_K blk.23.ffn_down_exps.weight q4_K blk.23.ffn_down_shexp.weight q4_K +blk.24.attn_k_b.weight q4_0 blk.24.attn_output.weight q4_K blk.24.ffn_down_exps.weight q4_K blk.24.ffn_down_shexp.weight q4_K +blk.25.attn_k_b.weight q4_0 blk.25.attn_output.weight q4_K blk.25.ffn_down_exps.weight q4_K blk.25.ffn_down_shexp.weight q4_K +blk.26.attn_k_b.weight q4_0 blk.26.attn_output.weight q4_K blk.26.ffn_down_exps.weight q4_K blk.26.ffn_down_shexp.weight q4_K +blk.27.attn_k_b.weight q4_0 blk.27.attn_output.weight q4_K blk.27.ffn_down_exps.weight q4_K blk.27.ffn_down_shexp.weight q4_K +blk.28.attn_k_b.weight q4_0 blk.28.attn_output.weight q4_K blk.28.ffn_down_exps.weight q4_K blk.28.ffn_down_shexp.weight q4_K +blk.29.attn_k_b.weight q4_0 blk.29.attn_output.weight q4_K blk.29.ffn_down_exps.weight q4_K blk.29.ffn_down_shexp.weight q4_K +blk.30.attn_k_b.weight q4_0 blk.30.attn_output.weight q4_K blk.30.ffn_down_exps.weight q4_K blk.30.ffn_down_shexp.weight q4_K +blk.31.attn_k_b.weight q4_0 blk.31.attn_output.weight q4_K blk.31.ffn_down_exps.weight q4_K blk.31.ffn_down_shexp.weight q4_K +blk.32.attn_k_b.weight q4_0 blk.32.attn_output.weight q4_K blk.32.ffn_down_exps.weight q4_K blk.32.ffn_down_shexp.weight q4_K +blk.33.attn_k_b.weight q4_0 blk.33.attn_output.weight q4_K blk.33.ffn_down_exps.weight q4_K blk.33.ffn_down_shexp.weight q4_K +blk.34.attn_k_b.weight q4_0 blk.34.attn_output.weight q4_K blk.34.ffn_down_exps.weight q4_K blk.34.ffn_down_shexp.weight q4_K +blk.35.attn_k_b.weight q4_0 blk.35.attn_output.weight q4_K blk.35.ffn_down_exps.weight q4_K blk.35.ffn_down_shexp.weight q4_K +blk.36.attn_k_b.weight q4_0 blk.36.attn_output.weight q4_K blk.36.ffn_down_exps.weight q4_K blk.36.ffn_down_shexp.weight q4_K +blk.37.attn_k_b.weight q4_0 blk.37.attn_output.weight q4_K blk.37.ffn_down_exps.weight q4_K blk.37.ffn_down_shexp.weight q4_K +blk.38.attn_k_b.weight q4_0 blk.38.attn_output.weight q4_K blk.38.ffn_down_exps.weight q4_K blk.38.ffn_down_shexp.weight q4_K +blk.39.attn_k_b.weight q4_0 blk.39.attn_output.weight q4_K blk.39.ffn_down_exps.weight q4_K blk.39.ffn_down_shexp.weight q4_K +blk.40.attn_k_b.weight q4_0 blk.40.attn_output.weight q4_K blk.40.ffn_down_exps.weight q4_K blk.40.ffn_down_shexp.weight q4_K +blk.41.attn_k_b.weight q4_0 blk.41.attn_output.weight q4_K blk.41.ffn_down_exps.weight q4_K blk.41.ffn_down_shexp.weight q4_K +blk.42.attn_k_b.weight q4_0 blk.42.attn_output.weight q4_K blk.42.ffn_down_exps.weight q4_K blk.42.ffn_down_shexp.weight q4_K +blk.43.attn_k_b.weight q4_0 blk.43.attn_output.weight q4_K blk.43.ffn_down_exps.weight q4_K blk.43.ffn_down_shexp.weight q4_K +blk.44.attn_k_b.weight q4_0 blk.44.attn_output.weight q4_K blk.44.ffn_down_exps.weight q4_K blk.44.ffn_down_shexp.weight q4_K +blk.45.attn_k_b.weight q4_0 blk.45.attn_output.weight q4_K blk.45.ffn_down_exps.weight q4_K blk.45.ffn_down_shexp.weight q4_K +blk.46.attn_k_b.weight q4_0 blk.46.attn_output.weight q4_K blk.46.ffn_down_exps.weight q4_K blk.46.ffn_down_shexp.weight q4_K +blk.47.attn_k_b.weight q4_0 blk.47.attn_output.weight q4_K blk.47.ffn_down_exps.weight q4_K blk.47.ffn_down_shexp.weight q4_K +blk.48.attn_k_b.weight q4_0 blk.48.attn_output.weight q4_K blk.48.ffn_down_exps.weight q4_K blk.48.ffn_down_shexp.weight q4_K +blk.49.attn_k_b.weight q4_0 blk.49.attn_output.weight q4_K blk.49.ffn_down_exps.weight q4_K blk.49.ffn_down_shexp.weight q4_K +blk.50.attn_k_b.weight q4_0 blk.50.attn_output.weight q4_K blk.50.ffn_down_exps.weight q4_K blk.50.ffn_down_shexp.weight q4_K +blk.51.attn_k_b.weight q4_0 blk.51.attn_output.weight q4_K blk.51.ffn_down_exps.weight q4_K blk.51.ffn_down_shexp.weight q4_K +blk.52.attn_k_b.weight q4_0 blk.52.attn_output.weight q4_K blk.52.ffn_down_exps.weight q4_K blk.52.ffn_down_shexp.weight q4_K +blk.53.attn_k_b.weight q4_0 blk.53.attn_output.weight q4_K blk.53.ffn_down_exps.weight q4_K blk.53.ffn_down_shexp.weight q4_K +blk.54.attn_k_b.weight q4_0 blk.54.attn_output.weight q4_K blk.54.ffn_down_exps.weight q4_K blk.54.ffn_down_shexp.weight q4_K +blk.55.attn_k_b.weight q4_0 blk.55.attn_output.weight q4_K blk.55.ffn_down_exps.weight q4_K blk.55.ffn_down_shexp.weight q4_K +blk.56.attn_k_b.weight q4_0 blk.56.attn_output.weight q4_K blk.56.ffn_down_exps.weight q4_K blk.56.ffn_down_shexp.weight q4_K +blk.57.attn_k_b.weight q4_0 blk.57.attn_output.weight q4_K blk.57.ffn_down_exps.weight q4_K blk.57.ffn_down_shexp.weight q4_K +blk.58.attn_k_b.weight q4_0 blk.58.attn_output.weight q4_K blk.58.ffn_down_exps.weight q4_K blk.58.ffn_down_shexp.weight q4_K +blk.59.attn_k_b.weight q4_0 blk.59.attn_output.weight q4_K blk.59.ffn_down_exps.weight q4_K blk.59.ffn_down_shexp.weight q4_K +blk.60.attn_k_b.weight q4_0 blk.60.attn_output.weight q4_K blk.60.ffn_down_exps.weight q4_K blk.60.ffn_down_shexp.weight q4_K [Q3_K_L] q3_K output.weight q6_K +blk.0.attn_k_b.weight q4_0 blk.0.attn_output.weight q5_K blk.0.ffn_down.weight q5_K +blk.1.attn_k_b.weight q4_0 blk.1.attn_output.weight q5_K blk.1.ffn_down.weight q5_K +blk.2.attn_k_b.weight q4_0 blk.2.attn_output.weight q5_K blk.2.ffn_down.weight q5_K +blk.3.attn_k_b.weight q4_0 blk.3.attn_output.weight q5_K blk.3.ffn_down_exps.weight q5_K blk.3.ffn_down_shexp.weight q5_K +blk.4.attn_k_b.weight q4_0 blk.4.attn_output.weight q5_K blk.4.ffn_down_exps.weight q5_K blk.4.ffn_down_shexp.weight q5_K +blk.5.attn_k_b.weight q4_0 blk.5.attn_output.weight q5_K blk.5.ffn_down_exps.weight q5_K blk.5.ffn_down_shexp.weight q5_K +blk.6.attn_k_b.weight q4_0 blk.6.attn_output.weight q5_K blk.6.ffn_down_exps.weight q5_K blk.6.ffn_down_shexp.weight q5_K +blk.7.attn_k_b.weight q4_0 blk.7.attn_output.weight q5_K blk.7.ffn_down_exps.weight q5_K blk.7.ffn_down_shexp.weight q5_K +blk.8.attn_k_b.weight q4_0 blk.8.attn_output.weight q5_K blk.8.ffn_down_exps.weight q5_K blk.8.ffn_down_shexp.weight q5_K +blk.9.attn_k_b.weight q4_0 blk.9.attn_output.weight q5_K blk.9.ffn_down_exps.weight q5_K blk.9.ffn_down_shexp.weight q5_K +blk.10.attn_k_b.weight q4_0 blk.10.attn_output.weight q5_K blk.10.ffn_down_exps.weight q5_K blk.10.ffn_down_shexp.weight q5_K +blk.11.attn_k_b.weight q4_0 blk.11.attn_output.weight q5_K blk.11.ffn_down_exps.weight q5_K blk.11.ffn_down_shexp.weight q5_K +blk.12.attn_k_b.weight q4_0 blk.12.attn_output.weight q5_K blk.12.ffn_down_exps.weight q5_K blk.12.ffn_down_shexp.weight q5_K +blk.13.attn_k_b.weight q4_0 blk.13.attn_output.weight q5_K blk.13.ffn_down_exps.weight q5_K blk.13.ffn_down_shexp.weight q5_K +blk.14.attn_k_b.weight q4_0 blk.14.attn_output.weight q5_K blk.14.ffn_down_exps.weight q5_K blk.14.ffn_down_shexp.weight q5_K +blk.15.attn_k_b.weight q4_0 blk.15.attn_output.weight q5_K blk.15.ffn_down_exps.weight q5_K blk.15.ffn_down_shexp.weight q5_K +blk.16.attn_k_b.weight q4_0 blk.16.attn_output.weight q5_K blk.16.ffn_down_exps.weight q5_K blk.16.ffn_down_shexp.weight q5_K +blk.17.attn_k_b.weight q4_0 blk.17.attn_output.weight q5_K blk.17.ffn_down_exps.weight q5_K blk.17.ffn_down_shexp.weight q5_K +blk.18.attn_k_b.weight q4_0 blk.18.attn_output.weight q5_K blk.18.ffn_down_exps.weight q5_K blk.18.ffn_down_shexp.weight q5_K +blk.19.attn_k_b.weight q4_0 blk.19.attn_output.weight q5_K blk.19.ffn_down_exps.weight q5_K blk.19.ffn_down_shexp.weight q5_K +blk.20.attn_k_b.weight q4_0 blk.20.attn_output.weight q5_K blk.20.ffn_down_exps.weight q5_K blk.20.ffn_down_shexp.weight q5_K +blk.21.attn_k_b.weight q4_0 blk.21.attn_output.weight q5_K blk.21.ffn_down_exps.weight q5_K blk.21.ffn_down_shexp.weight q5_K +blk.22.attn_k_b.weight q4_0 blk.22.attn_output.weight q5_K blk.22.ffn_down_exps.weight q5_K blk.22.ffn_down_shexp.weight q5_K +blk.23.attn_k_b.weight q4_0 blk.23.attn_output.weight q5_K blk.23.ffn_down_exps.weight q5_K blk.23.ffn_down_shexp.weight q5_K +blk.24.attn_k_b.weight q4_0 blk.24.attn_output.weight q5_K blk.24.ffn_down_exps.weight q5_K blk.24.ffn_down_shexp.weight q5_K +blk.25.attn_k_b.weight q4_0 blk.25.attn_output.weight q5_K blk.25.ffn_down_exps.weight q5_K blk.25.ffn_down_shexp.weight q5_K +blk.26.attn_k_b.weight q4_0 blk.26.attn_output.weight q5_K blk.26.ffn_down_exps.weight q5_K blk.26.ffn_down_shexp.weight q5_K +blk.27.attn_k_b.weight q4_0 blk.27.attn_output.weight q5_K blk.27.ffn_down_exps.weight q5_K blk.27.ffn_down_shexp.weight q5_K +blk.28.attn_k_b.weight q4_0 blk.28.attn_output.weight q5_K blk.28.ffn_down_exps.weight q5_K blk.28.ffn_down_shexp.weight q5_K +blk.29.attn_k_b.weight q4_0 blk.29.attn_output.weight q5_K blk.29.ffn_down_exps.weight q5_K blk.29.ffn_down_shexp.weight q5_K +blk.30.attn_k_b.weight q4_0 blk.30.attn_output.weight q5_K blk.30.ffn_down_exps.weight q5_K blk.30.ffn_down_shexp.weight q5_K +blk.31.attn_k_b.weight q4_0 blk.31.attn_output.weight q5_K blk.31.ffn_down_exps.weight q5_K blk.31.ffn_down_shexp.weight q5_K +blk.32.attn_k_b.weight q4_0 blk.32.attn_output.weight q5_K blk.32.ffn_down_exps.weight q5_K blk.32.ffn_down_shexp.weight q5_K +blk.33.attn_k_b.weight q4_0 blk.33.attn_output.weight q5_K blk.33.ffn_down_exps.weight q5_K blk.33.ffn_down_shexp.weight q5_K +blk.34.attn_k_b.weight q4_0 blk.34.attn_output.weight q5_K blk.34.ffn_down_exps.weight q5_K blk.34.ffn_down_shexp.weight q5_K +blk.35.attn_k_b.weight q4_0 blk.35.attn_output.weight q5_K blk.35.ffn_down_exps.weight q5_K blk.35.ffn_down_shexp.weight q5_K +blk.36.attn_k_b.weight q4_0 blk.36.attn_output.weight q5_K blk.36.ffn_down_exps.weight q5_K blk.36.ffn_down_shexp.weight q5_K +blk.37.attn_k_b.weight q4_0 blk.37.attn_output.weight q5_K blk.37.ffn_down_exps.weight q5_K blk.37.ffn_down_shexp.weight q5_K +blk.38.attn_k_b.weight q4_0 blk.38.attn_output.weight q5_K blk.38.ffn_down_exps.weight q5_K blk.38.ffn_down_shexp.weight q5_K +blk.39.attn_k_b.weight q4_0 blk.39.attn_output.weight q5_K blk.39.ffn_down_exps.weight q5_K blk.39.ffn_down_shexp.weight q5_K +blk.40.attn_k_b.weight q4_0 blk.40.attn_output.weight q5_K blk.40.ffn_down_exps.weight q5_K blk.40.ffn_down_shexp.weight q5_K +blk.41.attn_k_b.weight q4_0 blk.41.attn_output.weight q5_K blk.41.ffn_down_exps.weight q5_K blk.41.ffn_down_shexp.weight q5_K +blk.42.attn_k_b.weight q4_0 blk.42.attn_output.weight q5_K blk.42.ffn_down_exps.weight q5_K blk.42.ffn_down_shexp.weight q5_K +blk.43.attn_k_b.weight q4_0 blk.43.attn_output.weight q5_K blk.43.ffn_down_exps.weight q5_K blk.43.ffn_down_shexp.weight q5_K +blk.44.attn_k_b.weight q4_0 blk.44.attn_output.weight q5_K blk.44.ffn_down_exps.weight q5_K blk.44.ffn_down_shexp.weight q5_K +blk.45.attn_k_b.weight q4_0 blk.45.attn_output.weight q5_K blk.45.ffn_down_exps.weight q5_K blk.45.ffn_down_shexp.weight q5_K +blk.46.attn_k_b.weight q4_0 blk.46.attn_output.weight q5_K blk.46.ffn_down_exps.weight q5_K blk.46.ffn_down_shexp.weight q5_K +blk.47.attn_k_b.weight q4_0 blk.47.attn_output.weight q5_K blk.47.ffn_down_exps.weight q5_K blk.47.ffn_down_shexp.weight q5_K +blk.48.attn_k_b.weight q4_0 blk.48.attn_output.weight q5_K blk.48.ffn_down_exps.weight q5_K blk.48.ffn_down_shexp.weight q5_K +blk.49.attn_k_b.weight q4_0 blk.49.attn_output.weight q5_K blk.49.ffn_down_exps.weight q5_K blk.49.ffn_down_shexp.weight q5_K +blk.50.attn_k_b.weight q4_0 blk.50.attn_output.weight q5_K blk.50.ffn_down_exps.weight q5_K blk.50.ffn_down_shexp.weight q5_K +blk.51.attn_k_b.weight q4_0 blk.51.attn_output.weight q5_K blk.51.ffn_down_exps.weight q5_K blk.51.ffn_down_shexp.weight q5_K +blk.52.attn_k_b.weight q4_0 blk.52.attn_output.weight q5_K blk.52.ffn_down_exps.weight q5_K blk.52.ffn_down_shexp.weight q5_K +blk.53.attn_k_b.weight q4_0 blk.53.attn_output.weight q5_K blk.53.ffn_down_exps.weight q5_K blk.53.ffn_down_shexp.weight q5_K +blk.54.attn_k_b.weight q4_0 blk.54.attn_output.weight q5_K blk.54.ffn_down_exps.weight q5_K blk.54.ffn_down_shexp.weight q5_K +blk.55.attn_k_b.weight q4_0 blk.55.attn_output.weight q5_K blk.55.ffn_down_exps.weight q5_K blk.55.ffn_down_shexp.weight q5_K +blk.56.attn_k_b.weight q4_0 blk.56.attn_output.weight q5_K blk.56.ffn_down_exps.weight q5_K blk.56.ffn_down_shexp.weight q5_K +blk.57.attn_k_b.weight q4_0 blk.57.attn_output.weight q5_K blk.57.ffn_down_exps.weight q5_K blk.57.ffn_down_shexp.weight q5_K +blk.58.attn_k_b.weight q4_0 blk.58.attn_output.weight q5_K blk.58.ffn_down_exps.weight q5_K blk.58.ffn_down_shexp.weight q5_K +blk.59.attn_k_b.weight q4_0 blk.59.attn_output.weight q5_K blk.59.ffn_down_exps.weight q5_K blk.59.ffn_down_shexp.weight q5_K +blk.60.attn_k_b.weight q4_0 blk.60.attn_output.weight q5_K blk.60.ffn_down_exps.weight q5_K blk.60.ffn_down_shexp.weight q5_K [Q4_K_S] q4_K output.weight q6_K +blk.0.attn_k_b.weight q5_0 blk.0.ffn_down.weight q5_K +blk.1.attn_k_b.weight q5_0 blk.1.ffn_down.weight q5_K +blk.2.attn_k_b.weight q5_0 blk.2.ffn_down.weight q5_K +blk.3.attn_k_b.weight q5_0 blk.3.ffn_down_exps.weight q5_K blk.3.ffn_down_shexp.weight q5_K +blk.4.attn_k_b.weight q5_0 blk.4.ffn_down_exps.weight q5_K blk.4.ffn_down_shexp.weight q5_K +blk.5.attn_k_b.weight q5_0 blk.5.ffn_down_exps.weight q5_K blk.5.ffn_down_shexp.weight q5_K +blk.6.attn_k_b.weight q5_0 blk.6.ffn_down_exps.weight q5_K blk.6.ffn_down_shexp.weight q5_K +blk.7.attn_k_b.weight q5_0 +blk.8.attn_k_b.weight q5_0 +blk.9.attn_k_b.weight q5_0 +blk.10.attn_k_b.weight q5_0 +blk.11.attn_k_b.weight q5_0 +blk.12.attn_k_b.weight q5_0 +blk.13.attn_k_b.weight q5_0 +blk.14.attn_k_b.weight q5_0 +blk.15.attn_k_b.weight q5_0 +blk.16.attn_k_b.weight q5_0 +blk.17.attn_k_b.weight q5_0 +blk.18.attn_k_b.weight q5_0 +blk.19.attn_k_b.weight q5_0 +blk.20.attn_k_b.weight q5_0 +blk.21.attn_k_b.weight q5_0 +blk.22.attn_k_b.weight q5_0 +blk.23.attn_k_b.weight q5_0 +blk.24.attn_k_b.weight q5_0 +blk.25.attn_k_b.weight q5_0 +blk.26.attn_k_b.weight q5_0 +blk.27.attn_k_b.weight q5_0 +blk.28.attn_k_b.weight q5_0 +blk.29.attn_k_b.weight q5_0 +blk.30.attn_k_b.weight q5_0 +blk.31.attn_k_b.weight q5_0 +blk.32.attn_k_b.weight q5_0 +blk.33.attn_k_b.weight q5_0 +blk.34.attn_k_b.weight q5_0 +blk.35.attn_k_b.weight q5_0 +blk.36.attn_k_b.weight q5_0 +blk.37.attn_k_b.weight q5_0 +blk.38.attn_k_b.weight q5_0 +blk.39.attn_k_b.weight q5_0 +blk.40.attn_k_b.weight q5_0 +blk.41.attn_k_b.weight q5_0 +blk.42.attn_k_b.weight q5_0 +blk.43.attn_k_b.weight q5_0 +blk.44.attn_k_b.weight q5_0 +blk.45.attn_k_b.weight q5_0 +blk.46.attn_k_b.weight q5_0 +blk.47.attn_k_b.weight q5_0 +blk.48.attn_k_b.weight q5_0 +blk.49.attn_k_b.weight q5_0 +blk.50.attn_k_b.weight q5_0 +blk.51.attn_k_b.weight q5_0 +blk.52.attn_k_b.weight q5_0 +blk.53.attn_k_b.weight q5_0 +blk.54.attn_k_b.weight q5_0 +blk.55.attn_k_b.weight q5_0 +blk.56.attn_k_b.weight q5_0 +blk.57.attn_k_b.weight q5_0 +blk.58.attn_k_b.weight q5_0 +blk.59.attn_k_b.weight q5_0 +blk.60.attn_k_b.weight q5_0 [Q4_K_M] q4_K output.weight q6_K +blk.0.attn_k_b.weight q5_0 blk.0.ffn_down.weight q6_K +blk.1.attn_k_b.weight q5_0 blk.1.ffn_down.weight q6_K +blk.2.attn_k_b.weight q5_0 blk.2.ffn_down.weight q6_K +blk.3.attn_k_b.weight q5_0 blk.3.ffn_down_exps.weight q6_K blk.3.ffn_down_shexp.weight q6_K +blk.4.attn_k_b.weight q5_0 blk.4.ffn_down_exps.weight q6_K blk.4.ffn_down_shexp.weight q6_K +blk.5.attn_k_b.weight q5_0 blk.5.ffn_down_exps.weight q6_K blk.5.ffn_down_shexp.weight q6_K +blk.6.attn_k_b.weight q5_0 blk.6.ffn_down_exps.weight q6_K blk.6.ffn_down_shexp.weight q6_K +blk.7.attn_k_b.weight q5_0 +blk.8.attn_k_b.weight q5_0 +blk.9.attn_k_b.weight q5_0 blk.9.ffn_down_exps.weight q6_K blk.9.ffn_down_shexp.weight q6_K +blk.10.attn_k_b.weight q5_0 +blk.11.attn_k_b.weight q5_0 +blk.12.attn_k_b.weight q5_0 blk.12.ffn_down_exps.weight q6_K blk.12.ffn_down_shexp.weight q6_K +blk.13.attn_k_b.weight q5_0 +blk.14.attn_k_b.weight q5_0 +blk.15.attn_k_b.weight q5_0 blk.15.ffn_down_exps.weight q6_K blk.15.ffn_down_shexp.weight q6_K +blk.16.attn_k_b.weight q5_0 +blk.17.attn_k_b.weight q5_0 +blk.18.attn_k_b.weight q5_0 blk.18.ffn_down_exps.weight q6_K blk.18.ffn_down_shexp.weight q6_K +blk.19.attn_k_b.weight q5_0 +blk.20.attn_k_b.weight q5_0 +blk.21.attn_k_b.weight q5_0 blk.21.ffn_down_exps.weight q6_K blk.21.ffn_down_shexp.weight q6_K +blk.22.attn_k_b.weight q5_0 +blk.23.attn_k_b.weight q5_0 +blk.24.attn_k_b.weight q5_0 blk.24.ffn_down_exps.weight q6_K blk.24.ffn_down_shexp.weight q6_K +blk.25.attn_k_b.weight q5_0 +blk.26.attn_k_b.weight q5_0 +blk.27.attn_k_b.weight q5_0 blk.27.ffn_down_exps.weight q6_K blk.27.ffn_down_shexp.weight q6_K +blk.28.attn_k_b.weight q5_0 +blk.29.attn_k_b.weight q5_0 +blk.30.attn_k_b.weight q5_0 blk.30.ffn_down_exps.weight q6_K blk.30.ffn_down_shexp.weight q6_K +blk.31.attn_k_b.weight q5_0 +blk.32.attn_k_b.weight q5_0 +blk.33.attn_k_b.weight q5_0 blk.33.ffn_down_exps.weight q6_K blk.33.ffn_down_shexp.weight q6_K +blk.34.attn_k_b.weight q5_0 +blk.35.attn_k_b.weight q5_0 +blk.36.attn_k_b.weight q5_0 blk.36.ffn_down_exps.weight q6_K blk.36.ffn_down_shexp.weight q6_K +blk.37.attn_k_b.weight q5_0 +blk.38.attn_k_b.weight q5_0 +blk.39.attn_k_b.weight q5_0 blk.39.ffn_down_exps.weight q6_K blk.39.ffn_down_shexp.weight q6_K +blk.40.attn_k_b.weight q5_0 +blk.41.attn_k_b.weight q5_0 +blk.42.attn_k_b.weight q5_0 blk.42.ffn_down_exps.weight q6_K blk.42.ffn_down_shexp.weight q6_K +blk.43.attn_k_b.weight q5_0 +blk.44.attn_k_b.weight q5_0 +blk.45.attn_k_b.weight q5_0 blk.45.ffn_down_exps.weight q6_K blk.45.ffn_down_shexp.weight q6_K +blk.46.attn_k_b.weight q5_0 +blk.47.attn_k_b.weight q5_0 +blk.48.attn_k_b.weight q5_0 blk.48.ffn_down_exps.weight q6_K blk.48.ffn_down_shexp.weight q6_K +blk.49.attn_k_b.weight q5_0 +blk.50.attn_k_b.weight q5_0 +blk.51.attn_k_b.weight q5_0 blk.51.ffn_down_exps.weight q6_K blk.51.ffn_down_shexp.weight q6_K +blk.52.attn_k_b.weight q5_0 +blk.53.attn_k_b.weight q5_0 blk.53.ffn_down_exps.weight q6_K blk.53.ffn_down_shexp.weight q6_K +blk.54.attn_k_b.weight q5_0 blk.54.ffn_down_exps.weight q6_K blk.54.ffn_down_shexp.weight q6_K +blk.55.attn_k_b.weight q5_0 blk.55.ffn_down_exps.weight q6_K blk.55.ffn_down_shexp.weight q6_K +blk.56.attn_k_b.weight q5_0 blk.56.ffn_down_exps.weight q6_K blk.56.ffn_down_shexp.weight q6_K +blk.57.attn_k_b.weight q5_0 blk.57.ffn_down_exps.weight q6_K blk.57.ffn_down_shexp.weight q6_K +blk.58.attn_k_b.weight q5_0 blk.58.ffn_down_exps.weight q6_K blk.58.ffn_down_shexp.weight q6_K +blk.59.attn_k_b.weight q5_0 blk.59.ffn_down_exps.weight q6_K blk.59.ffn_down_shexp.weight q6_K +blk.60.attn_k_b.weight q5_0 blk.60.ffn_down_exps.weight q6_K blk.60.ffn_down_shexp.weight q6_K [Q5_K_S] q5_K output.weight q6_K +blk.0.attn_k_b.weight q5_1 +blk.1.attn_k_b.weight q5_1 +blk.2.attn_k_b.weight q5_1 +blk.3.attn_k_b.weight q5_1 +blk.4.attn_k_b.weight q5_1 +blk.5.attn_k_b.weight q5_1 +blk.6.attn_k_b.weight q5_1 +blk.7.attn_k_b.weight q5_1 +blk.8.attn_k_b.weight q5_1 +blk.9.attn_k_b.weight q5_1 +blk.10.attn_k_b.weight q5_1 +blk.11.attn_k_b.weight q5_1 +blk.12.attn_k_b.weight q5_1 +blk.13.attn_k_b.weight q5_1 +blk.14.attn_k_b.weight q5_1 +blk.15.attn_k_b.weight q5_1 +blk.16.attn_k_b.weight q5_1 +blk.17.attn_k_b.weight q5_1 +blk.18.attn_k_b.weight q5_1 +blk.19.attn_k_b.weight q5_1 +blk.20.attn_k_b.weight q5_1 +blk.21.attn_k_b.weight q5_1 +blk.22.attn_k_b.weight q5_1 +blk.23.attn_k_b.weight q5_1 +blk.24.attn_k_b.weight q5_1 +blk.25.attn_k_b.weight q5_1 +blk.26.attn_k_b.weight q5_1 +blk.27.attn_k_b.weight q5_1 +blk.28.attn_k_b.weight q5_1 +blk.29.attn_k_b.weight q5_1 +blk.30.attn_k_b.weight q5_1 +blk.31.attn_k_b.weight q5_1 +blk.32.attn_k_b.weight q5_1 +blk.33.attn_k_b.weight q5_1 +blk.34.attn_k_b.weight q5_1 +blk.35.attn_k_b.weight q5_1 +blk.36.attn_k_b.weight q5_1 +blk.37.attn_k_b.weight q5_1 +blk.38.attn_k_b.weight q5_1 +blk.39.attn_k_b.weight q5_1 +blk.40.attn_k_b.weight q5_1 +blk.41.attn_k_b.weight q5_1 +blk.42.attn_k_b.weight q5_1 +blk.43.attn_k_b.weight q5_1 +blk.44.attn_k_b.weight q5_1 +blk.45.attn_k_b.weight q5_1 +blk.46.attn_k_b.weight q5_1 +blk.47.attn_k_b.weight q5_1 +blk.48.attn_k_b.weight q5_1 +blk.49.attn_k_b.weight q5_1 +blk.50.attn_k_b.weight q5_1 +blk.51.attn_k_b.weight q5_1 +blk.52.attn_k_b.weight q5_1 +blk.53.attn_k_b.weight q5_1 +blk.54.attn_k_b.weight q5_1 +blk.55.attn_k_b.weight q5_1 +blk.56.attn_k_b.weight q5_1 +blk.57.attn_k_b.weight q5_1 +blk.58.attn_k_b.weight q5_1 +blk.59.attn_k_b.weight q5_1 +blk.60.attn_k_b.weight q5_1 [Q5_K_M] q5_K output.weight q6_K +blk.0.attn_k_b.weight q5_1 blk.0.ffn_down.weight q6_K +blk.1.attn_k_b.weight q5_1 blk.1.ffn_down.weight q6_K +blk.2.attn_k_b.weight q5_1 blk.2.ffn_down.weight q6_K +blk.3.attn_k_b.weight q5_1 blk.3.ffn_down_exps.weight q6_K blk.3.ffn_down_shexp.weight q6_K +blk.4.attn_k_b.weight q5_1 blk.4.ffn_down_exps.weight q6_K blk.4.ffn_down_shexp.weight q6_K +blk.5.attn_k_b.weight q5_1 blk.5.ffn_down_exps.weight q6_K blk.5.ffn_down_shexp.weight q6_K +blk.6.attn_k_b.weight q5_1 blk.6.ffn_down_exps.weight q6_K blk.6.ffn_down_shexp.weight q6_K +blk.7.attn_k_b.weight q5_1 +blk.8.attn_k_b.weight q5_1 +blk.9.attn_k_b.weight q5_1 blk.9.ffn_down_exps.weight q6_K blk.9.ffn_down_shexp.weight q6_K +blk.10.attn_k_b.weight q5_1 +blk.11.attn_k_b.weight q5_1 +blk.12.attn_k_b.weight q5_1 blk.12.ffn_down_exps.weight q6_K blk.12.ffn_down_shexp.weight q6_K +blk.13.attn_k_b.weight q5_1 +blk.14.attn_k_b.weight q5_1 +blk.15.attn_k_b.weight q5_1 blk.15.ffn_down_exps.weight q6_K blk.15.ffn_down_shexp.weight q6_K +blk.16.attn_k_b.weight q5_1 +blk.17.attn_k_b.weight q5_1 +blk.18.attn_k_b.weight q5_1 blk.18.ffn_down_exps.weight q6_K blk.18.ffn_down_shexp.weight q6_K +blk.19.attn_k_b.weight q5_1 +blk.20.attn_k_b.weight q5_1 +blk.21.attn_k_b.weight q5_1 blk.21.ffn_down_exps.weight q6_K blk.21.ffn_down_shexp.weight q6_K +blk.22.attn_k_b.weight q5_1 +blk.23.attn_k_b.weight q5_1 +blk.24.attn_k_b.weight q5_1 blk.24.ffn_down_exps.weight q6_K blk.24.ffn_down_shexp.weight q6_K +blk.25.attn_k_b.weight q5_1 +blk.26.attn_k_b.weight q5_1 +blk.27.attn_k_b.weight q5_1 blk.27.ffn_down_exps.weight q6_K blk.27.ffn_down_shexp.weight q6_K +blk.28.attn_k_b.weight q5_1 +blk.29.attn_k_b.weight q5_1 +blk.30.attn_k_b.weight q5_1 blk.30.ffn_down_exps.weight q6_K blk.30.ffn_down_shexp.weight q6_K +blk.31.attn_k_b.weight q5_1 +blk.32.attn_k_b.weight q5_1 +blk.33.attn_k_b.weight q5_1 blk.33.ffn_down_exps.weight q6_K blk.33.ffn_down_shexp.weight q6_K +blk.34.attn_k_b.weight q5_1 +blk.35.attn_k_b.weight q5_1 +blk.36.attn_k_b.weight q5_1 blk.36.ffn_down_exps.weight q6_K blk.36.ffn_down_shexp.weight q6_K +blk.37.attn_k_b.weight q5_1 +blk.38.attn_k_b.weight q5_1 +blk.39.attn_k_b.weight q5_1 blk.39.ffn_down_exps.weight q6_K blk.39.ffn_down_shexp.weight q6_K +blk.40.attn_k_b.weight q5_1 +blk.41.attn_k_b.weight q5_1 +blk.42.attn_k_b.weight q5_1 blk.42.ffn_down_exps.weight q6_K blk.42.ffn_down_shexp.weight q6_K +blk.43.attn_k_b.weight q5_1 +blk.44.attn_k_b.weight q5_1 +blk.45.attn_k_b.weight q5_1 blk.45.ffn_down_exps.weight q6_K blk.45.ffn_down_shexp.weight q6_K +blk.46.attn_k_b.weight q5_1 +blk.47.attn_k_b.weight q5_1 +blk.48.attn_k_b.weight q5_1 blk.48.ffn_down_exps.weight q6_K blk.48.ffn_down_shexp.weight q6_K +blk.49.attn_k_b.weight q5_1 +blk.50.attn_k_b.weight q5_1 +blk.51.attn_k_b.weight q5_1 blk.51.ffn_down_exps.weight q6_K blk.51.ffn_down_shexp.weight q6_K +blk.52.attn_k_b.weight q5_1 +blk.53.attn_k_b.weight q5_1 blk.53.ffn_down_exps.weight q6_K blk.53.ffn_down_shexp.weight q6_K +blk.54.attn_k_b.weight q5_1 blk.54.ffn_down_exps.weight q6_K blk.54.ffn_down_shexp.weight q6_K +blk.55.attn_k_b.weight q5_1 blk.55.ffn_down_exps.weight q6_K blk.55.ffn_down_shexp.weight q6_K +blk.56.attn_k_b.weight q5_1 blk.56.ffn_down_exps.weight q6_K blk.56.ffn_down_shexp.weight q6_K +blk.57.attn_k_b.weight q5_1 blk.57.ffn_down_exps.weight q6_K blk.57.ffn_down_shexp.weight q6_K +blk.58.attn_k_b.weight q5_1 blk.58.ffn_down_exps.weight q6_K blk.58.ffn_down_shexp.weight q6_K +blk.59.attn_k_b.weight q5_1 blk.59.ffn_down_exps.weight q6_K blk.59.ffn_down_shexp.weight q6_K +blk.60.attn_k_b.weight q5_1 blk.60.ffn_down_exps.weight q6_K blk.60.ffn_down_shexp.weight q6_K [Q6_K] q6_K +blk.0.attn_k_b.weight q8_0 +blk.1.attn_k_b.weight q8_0 +blk.2.attn_k_b.weight q8_0 +blk.3.attn_k_b.weight q8_0 +blk.4.attn_k_b.weight q8_0 +blk.5.attn_k_b.weight q8_0 +blk.6.attn_k_b.weight q8_0 +blk.7.attn_k_b.weight q8_0 +blk.8.attn_k_b.weight q8_0 +blk.9.attn_k_b.weight q8_0 +blk.10.attn_k_b.weight q8_0 +blk.11.attn_k_b.weight q8_0 +blk.12.attn_k_b.weight q8_0 +blk.13.attn_k_b.weight q8_0 +blk.14.attn_k_b.weight q8_0 +blk.15.attn_k_b.weight q8_0 +blk.16.attn_k_b.weight q8_0 +blk.17.attn_k_b.weight q8_0 +blk.18.attn_k_b.weight q8_0 +blk.19.attn_k_b.weight q8_0 +blk.20.attn_k_b.weight q8_0 +blk.21.attn_k_b.weight q8_0 +blk.22.attn_k_b.weight q8_0 +blk.23.attn_k_b.weight q8_0 +blk.24.attn_k_b.weight q8_0 +blk.25.attn_k_b.weight q8_0 +blk.26.attn_k_b.weight q8_0 +blk.27.attn_k_b.weight q8_0 +blk.28.attn_k_b.weight q8_0 +blk.29.attn_k_b.weight q8_0 +blk.30.attn_k_b.weight q8_0 +blk.31.attn_k_b.weight q8_0 +blk.32.attn_k_b.weight q8_0 +blk.33.attn_k_b.weight q8_0 +blk.34.attn_k_b.weight q8_0 +blk.35.attn_k_b.weight q8_0 +blk.36.attn_k_b.weight q8_0 +blk.37.attn_k_b.weight q8_0 +blk.38.attn_k_b.weight q8_0 +blk.39.attn_k_b.weight q8_0 +blk.40.attn_k_b.weight q8_0 +blk.41.attn_k_b.weight q8_0 +blk.42.attn_k_b.weight q8_0 +blk.43.attn_k_b.weight q8_0 +blk.44.attn_k_b.weight q8_0 +blk.45.attn_k_b.weight q8_0 +blk.46.attn_k_b.weight q8_0 +blk.47.attn_k_b.weight q8_0 +blk.48.attn_k_b.weight q8_0 +blk.49.attn_k_b.weight q8_0 +blk.50.attn_k_b.weight q8_0 +blk.51.attn_k_b.weight q8_0 +blk.52.attn_k_b.weight q8_0 +blk.53.attn_k_b.weight q8_0 +blk.54.attn_k_b.weight q8_0 +blk.55.attn_k_b.weight q8_0 +blk.56.attn_k_b.weight q8_0 +blk.57.attn_k_b.weight q8_0 +blk.58.attn_k_b.weight q8_0 +blk.59.attn_k_b.weight q8_0 +blk.60.attn_k_b.weight q8_0 [IQ2_XXS] iq2_xxs output.weight q5_K token_embd.weight q2_K +blk.0.attn_k_b.weight iq4_nl blk.0.ffn_down.weight q2_K +blk.1.attn_k_b.weight iq4_nl blk.1.ffn_down.weight q2_K +blk.2.attn_k_b.weight iq4_nl blk.2.ffn_down.weight q2_K +blk.3.attn_k_b.weight iq4_nl blk.3.ffn_down_exps.weight q2_K blk.3.ffn_down_shexp.weight q2_K +blk.4.attn_k_b.weight iq4_nl blk.4.ffn_down_exps.weight q2_K blk.4.ffn_down_shexp.weight q2_K +blk.5.attn_k_b.weight iq4_nl +blk.6.attn_k_b.weight iq4_nl +blk.7.attn_k_b.weight iq4_nl +blk.8.attn_k_b.weight iq4_nl +blk.9.attn_k_b.weight iq4_nl +blk.10.attn_k_b.weight iq4_nl +blk.11.attn_k_b.weight iq4_nl +blk.12.attn_k_b.weight iq4_nl +blk.13.attn_k_b.weight iq4_nl +blk.14.attn_k_b.weight iq4_nl +blk.15.attn_k_b.weight iq4_nl +blk.16.attn_k_b.weight iq4_nl +blk.17.attn_k_b.weight iq4_nl +blk.18.attn_k_b.weight iq4_nl +blk.19.attn_k_b.weight iq4_nl +blk.20.attn_k_b.weight iq4_nl +blk.21.attn_k_b.weight iq4_nl +blk.22.attn_k_b.weight iq4_nl +blk.23.attn_k_b.weight iq4_nl +blk.24.attn_k_b.weight iq4_nl +blk.25.attn_k_b.weight iq4_nl +blk.26.attn_k_b.weight iq4_nl +blk.27.attn_k_b.weight iq4_nl +blk.28.attn_k_b.weight iq4_nl +blk.29.attn_k_b.weight iq4_nl +blk.30.attn_k_b.weight iq4_nl +blk.31.attn_k_b.weight iq4_nl +blk.32.attn_k_b.weight iq4_nl +blk.33.attn_k_b.weight iq4_nl +blk.34.attn_k_b.weight iq4_nl +blk.35.attn_k_b.weight iq4_nl +blk.36.attn_k_b.weight iq4_nl +blk.37.attn_k_b.weight iq4_nl +blk.38.attn_k_b.weight iq4_nl +blk.39.attn_k_b.weight iq4_nl +blk.40.attn_k_b.weight iq4_nl +blk.41.attn_k_b.weight iq4_nl +blk.42.attn_k_b.weight iq4_nl +blk.43.attn_k_b.weight iq4_nl +blk.44.attn_k_b.weight iq4_nl +blk.45.attn_k_b.weight iq4_nl +blk.46.attn_k_b.weight iq4_nl +blk.47.attn_k_b.weight iq4_nl +blk.48.attn_k_b.weight iq4_nl +blk.49.attn_k_b.weight iq4_nl +blk.50.attn_k_b.weight iq4_nl +blk.51.attn_k_b.weight iq4_nl +blk.52.attn_k_b.weight iq4_nl +blk.53.attn_k_b.weight iq4_nl +blk.54.attn_k_b.weight iq4_nl +blk.55.attn_k_b.weight iq4_nl +blk.56.attn_k_b.weight iq4_nl +blk.57.attn_k_b.weight iq4_nl +blk.58.attn_k_b.weight iq4_nl +blk.59.attn_k_b.weight iq4_nl +blk.60.attn_k_b.weight iq4_nl [IQ2_XS] iq2_xs output.weight q5_K token_embd.weight q2_K +blk.0.attn_k_b.weight iq4_nl blk.0.ffn_down.weight q2_K +blk.1.attn_k_b.weight iq4_nl blk.1.ffn_down.weight q2_K +blk.2.attn_k_b.weight iq4_nl blk.2.ffn_down.weight q2_K +blk.3.attn_k_b.weight iq4_nl blk.3.ffn_down_exps.weight q2_K blk.3.ffn_down_shexp.weight q2_K +blk.4.attn_k_b.weight iq4_nl blk.4.ffn_down_exps.weight q2_K blk.4.ffn_down_shexp.weight q2_K +blk.5.attn_k_b.weight iq4_nl +blk.6.attn_k_b.weight iq4_nl +blk.7.attn_k_b.weight iq4_nl +blk.8.attn_k_b.weight iq4_nl +blk.9.attn_k_b.weight iq4_nl +blk.10.attn_k_b.weight iq4_nl +blk.11.attn_k_b.weight iq4_nl +blk.12.attn_k_b.weight iq4_nl +blk.13.attn_k_b.weight iq4_nl +blk.14.attn_k_b.weight iq4_nl +blk.15.attn_k_b.weight iq4_nl +blk.16.attn_k_b.weight iq4_nl +blk.17.attn_k_b.weight iq4_nl +blk.18.attn_k_b.weight iq4_nl +blk.19.attn_k_b.weight iq4_nl +blk.20.attn_k_b.weight iq4_nl +blk.21.attn_k_b.weight iq4_nl +blk.22.attn_k_b.weight iq4_nl +blk.23.attn_k_b.weight iq4_nl +blk.24.attn_k_b.weight iq4_nl +blk.25.attn_k_b.weight iq4_nl +blk.26.attn_k_b.weight iq4_nl +blk.27.attn_k_b.weight iq4_nl +blk.28.attn_k_b.weight iq4_nl +blk.29.attn_k_b.weight iq4_nl +blk.30.attn_k_b.weight iq4_nl +blk.31.attn_k_b.weight iq4_nl +blk.32.attn_k_b.weight iq4_nl +blk.33.attn_k_b.weight iq4_nl +blk.34.attn_k_b.weight iq4_nl +blk.35.attn_k_b.weight iq4_nl +blk.36.attn_k_b.weight iq4_nl +blk.37.attn_k_b.weight iq4_nl +blk.38.attn_k_b.weight iq4_nl +blk.39.attn_k_b.weight iq4_nl +blk.40.attn_k_b.weight iq4_nl +blk.41.attn_k_b.weight iq4_nl +blk.42.attn_k_b.weight iq4_nl +blk.43.attn_k_b.weight iq4_nl +blk.44.attn_k_b.weight iq4_nl +blk.45.attn_k_b.weight iq4_nl +blk.46.attn_k_b.weight iq4_nl +blk.47.attn_k_b.weight iq4_nl +blk.48.attn_k_b.weight iq4_nl +blk.49.attn_k_b.weight iq4_nl +blk.50.attn_k_b.weight iq4_nl +blk.51.attn_k_b.weight iq4_nl +blk.52.attn_k_b.weight iq4_nl +blk.53.attn_k_b.weight iq4_nl +blk.54.attn_k_b.weight iq4_nl +blk.55.attn_k_b.weight iq4_nl +blk.56.attn_k_b.weight iq4_nl +blk.57.attn_k_b.weight iq4_nl +blk.58.attn_k_b.weight iq4_nl +blk.59.attn_k_b.weight iq4_nl +blk.60.attn_k_b.weight iq4_nl [Q2_K_S] q2_K output.weight q6_K +blk.0.attn_k_b.weight q4_0 blk.0.ffn_down.weight q4_K +blk.1.attn_k_b.weight q4_0 blk.1.ffn_down.weight q4_K +blk.2.attn_k_b.weight q4_0 blk.2.ffn_down.weight q4_K +blk.3.attn_k_b.weight q4_0 blk.3.ffn_down_exps.weight q4_K blk.3.ffn_down_shexp.weight q4_K +blk.4.attn_k_b.weight q4_0 blk.4.ffn_down_exps.weight q4_K blk.4.ffn_down_shexp.weight q4_K +blk.5.attn_k_b.weight q4_0 blk.5.ffn_down_exps.weight q4_K blk.5.ffn_down_shexp.weight q4_K +blk.6.attn_k_b.weight q4_0 blk.6.ffn_down_exps.weight q4_K blk.6.ffn_down_shexp.weight q4_K +blk.7.attn_k_b.weight q4_0 +blk.8.attn_k_b.weight q4_0 +blk.9.attn_k_b.weight q4_0 +blk.10.attn_k_b.weight q4_0 +blk.11.attn_k_b.weight q4_0 +blk.12.attn_k_b.weight q4_0 +blk.13.attn_k_b.weight q4_0 +blk.14.attn_k_b.weight q4_0 +blk.15.attn_k_b.weight q4_0 +blk.16.attn_k_b.weight q4_0 +blk.17.attn_k_b.weight q4_0 +blk.18.attn_k_b.weight q4_0 +blk.19.attn_k_b.weight q4_0 +blk.20.attn_k_b.weight q4_0 +blk.21.attn_k_b.weight q4_0 +blk.22.attn_k_b.weight q4_0 +blk.23.attn_k_b.weight q4_0 +blk.24.attn_k_b.weight q4_0 +blk.25.attn_k_b.weight q4_0 +blk.26.attn_k_b.weight q4_0 +blk.27.attn_k_b.weight q4_0 +blk.28.attn_k_b.weight q4_0 +blk.29.attn_k_b.weight q4_0 +blk.30.attn_k_b.weight q4_0 +blk.31.attn_k_b.weight q4_0 +blk.32.attn_k_b.weight q4_0 +blk.33.attn_k_b.weight q4_0 +blk.34.attn_k_b.weight q4_0 +blk.35.attn_k_b.weight q4_0 +blk.36.attn_k_b.weight q4_0 +blk.37.attn_k_b.weight q4_0 +blk.38.attn_k_b.weight q4_0 +blk.39.attn_k_b.weight q4_0 +blk.40.attn_k_b.weight q4_0 +blk.41.attn_k_b.weight q4_0 +blk.42.attn_k_b.weight q4_0 +blk.43.attn_k_b.weight q4_0 +blk.44.attn_k_b.weight q4_0 +blk.45.attn_k_b.weight q4_0 +blk.46.attn_k_b.weight q4_0 +blk.47.attn_k_b.weight q4_0 +blk.48.attn_k_b.weight q4_0 +blk.49.attn_k_b.weight q4_0 +blk.50.attn_k_b.weight q4_0 +blk.51.attn_k_b.weight q4_0 +blk.52.attn_k_b.weight q4_0 +blk.53.attn_k_b.weight q4_0 +blk.54.attn_k_b.weight q4_0 +blk.55.attn_k_b.weight q4_0 +blk.56.attn_k_b.weight q4_0 +blk.57.attn_k_b.weight q4_0 +blk.58.attn_k_b.weight q4_0 +blk.59.attn_k_b.weight q4_0 +blk.60.attn_k_b.weight q4_0 [IQ3_XS] iq3_s output.weight q6_K +blk.0.attn_k_b.weight iq4_nl +blk.1.attn_k_b.weight iq4_nl +blk.2.attn_k_b.weight iq4_nl +blk.3.attn_k_b.weight iq4_nl +blk.4.attn_k_b.weight iq4_nl +blk.5.attn_k_b.weight iq4_nl +blk.6.attn_k_b.weight iq4_nl +blk.7.attn_k_b.weight iq4_nl blk.7.ffn_gate_exps.weight iq3_xxs blk.7.ffn_gate_shexp.weight iq3_xxs blk.7.ffn_up_exps.weight iq3_xxs blk.7.ffn_up_shexp.weight iq3_xxs +blk.8.attn_k_b.weight iq4_nl blk.8.ffn_gate_exps.weight iq3_xxs blk.8.ffn_gate_shexp.weight iq3_xxs blk.8.ffn_up_exps.weight iq3_xxs blk.8.ffn_up_shexp.weight iq3_xxs +blk.9.attn_k_b.weight iq4_nl blk.9.ffn_gate_exps.weight iq3_xxs blk.9.ffn_gate_shexp.weight iq3_xxs blk.9.ffn_up_exps.weight iq3_xxs blk.9.ffn_up_shexp.weight iq3_xxs +blk.10.attn_k_b.weight iq4_nl blk.10.ffn_gate_exps.weight iq3_xxs blk.10.ffn_gate_shexp.weight iq3_xxs blk.10.ffn_up_exps.weight iq3_xxs blk.10.ffn_up_shexp.weight iq3_xxs +blk.11.attn_k_b.weight iq4_nl blk.11.ffn_gate_exps.weight iq3_xxs blk.11.ffn_gate_shexp.weight iq3_xxs blk.11.ffn_up_exps.weight iq3_xxs blk.11.ffn_up_shexp.weight iq3_xxs +blk.12.attn_k_b.weight iq4_nl blk.12.ffn_gate_exps.weight iq3_xxs blk.12.ffn_gate_shexp.weight iq3_xxs blk.12.ffn_up_exps.weight iq3_xxs blk.12.ffn_up_shexp.weight iq3_xxs +blk.13.attn_k_b.weight iq4_nl blk.13.ffn_gate_exps.weight iq3_xxs blk.13.ffn_gate_shexp.weight iq3_xxs blk.13.ffn_up_exps.weight iq3_xxs blk.13.ffn_up_shexp.weight iq3_xxs +blk.14.attn_k_b.weight iq4_nl blk.14.ffn_gate_exps.weight iq3_xxs blk.14.ffn_gate_shexp.weight iq3_xxs blk.14.ffn_up_exps.weight iq3_xxs blk.14.ffn_up_shexp.weight iq3_xxs +blk.15.attn_k_b.weight iq4_nl blk.15.ffn_gate_exps.weight iq3_xxs blk.15.ffn_gate_shexp.weight iq3_xxs blk.15.ffn_up_exps.weight iq3_xxs blk.15.ffn_up_shexp.weight iq3_xxs +blk.16.attn_k_b.weight iq4_nl blk.16.ffn_gate_exps.weight iq3_xxs blk.16.ffn_gate_shexp.weight iq3_xxs blk.16.ffn_up_exps.weight iq3_xxs blk.16.ffn_up_shexp.weight iq3_xxs +blk.17.attn_k_b.weight iq4_nl blk.17.ffn_gate_exps.weight iq3_xxs blk.17.ffn_gate_shexp.weight iq3_xxs blk.17.ffn_up_exps.weight iq3_xxs blk.17.ffn_up_shexp.weight iq3_xxs +blk.18.attn_k_b.weight iq4_nl blk.18.ffn_gate_exps.weight iq3_xxs blk.18.ffn_gate_shexp.weight iq3_xxs blk.18.ffn_up_exps.weight iq3_xxs blk.18.ffn_up_shexp.weight iq3_xxs +blk.19.attn_k_b.weight iq4_nl blk.19.ffn_gate_exps.weight iq3_xxs blk.19.ffn_gate_shexp.weight iq3_xxs blk.19.ffn_up_exps.weight iq3_xxs blk.19.ffn_up_shexp.weight iq3_xxs +blk.20.attn_k_b.weight iq4_nl blk.20.ffn_gate_exps.weight iq3_xxs blk.20.ffn_gate_shexp.weight iq3_xxs blk.20.ffn_up_exps.weight iq3_xxs blk.20.ffn_up_shexp.weight iq3_xxs +blk.21.attn_k_b.weight iq4_nl blk.21.ffn_gate_exps.weight iq3_xxs blk.21.ffn_gate_shexp.weight iq3_xxs blk.21.ffn_up_exps.weight iq3_xxs blk.21.ffn_up_shexp.weight iq3_xxs +blk.22.attn_k_b.weight iq4_nl blk.22.ffn_gate_exps.weight iq3_xxs blk.22.ffn_gate_shexp.weight iq3_xxs blk.22.ffn_up_exps.weight iq3_xxs blk.22.ffn_up_shexp.weight iq3_xxs +blk.23.attn_k_b.weight iq4_nl blk.23.ffn_gate_exps.weight iq3_xxs blk.23.ffn_gate_shexp.weight iq3_xxs blk.23.ffn_up_exps.weight iq3_xxs blk.23.ffn_up_shexp.weight iq3_xxs +blk.24.attn_k_b.weight iq4_nl blk.24.ffn_gate_exps.weight iq3_xxs blk.24.ffn_gate_shexp.weight iq3_xxs blk.24.ffn_up_exps.weight iq3_xxs blk.24.ffn_up_shexp.weight iq3_xxs +blk.25.attn_k_b.weight iq4_nl blk.25.ffn_gate_exps.weight iq3_xxs blk.25.ffn_gate_shexp.weight iq3_xxs blk.25.ffn_up_exps.weight iq3_xxs blk.25.ffn_up_shexp.weight iq3_xxs +blk.26.attn_k_b.weight iq4_nl blk.26.ffn_gate_exps.weight iq3_xxs blk.26.ffn_gate_shexp.weight iq3_xxs blk.26.ffn_up_exps.weight iq3_xxs blk.26.ffn_up_shexp.weight iq3_xxs +blk.27.attn_k_b.weight iq4_nl blk.27.ffn_gate_exps.weight iq3_xxs blk.27.ffn_gate_shexp.weight iq3_xxs blk.27.ffn_up_exps.weight iq3_xxs blk.27.ffn_up_shexp.weight iq3_xxs +blk.28.attn_k_b.weight iq4_nl blk.28.ffn_gate_exps.weight iq3_xxs blk.28.ffn_gate_shexp.weight iq3_xxs blk.28.ffn_up_exps.weight iq3_xxs blk.28.ffn_up_shexp.weight iq3_xxs +blk.29.attn_k_b.weight iq4_nl blk.29.ffn_gate_exps.weight iq3_xxs blk.29.ffn_gate_shexp.weight iq3_xxs blk.29.ffn_up_exps.weight iq3_xxs blk.29.ffn_up_shexp.weight iq3_xxs +blk.30.attn_k_b.weight iq4_nl blk.30.ffn_gate_exps.weight iq3_xxs blk.30.ffn_gate_shexp.weight iq3_xxs blk.30.ffn_up_exps.weight iq3_xxs blk.30.ffn_up_shexp.weight iq3_xxs +blk.31.attn_k_b.weight iq4_nl blk.31.ffn_gate_exps.weight iq3_xxs blk.31.ffn_gate_shexp.weight iq3_xxs blk.31.ffn_up_exps.weight iq3_xxs blk.31.ffn_up_shexp.weight iq3_xxs +blk.32.attn_k_b.weight iq4_nl blk.32.ffn_gate_exps.weight iq3_xxs blk.32.ffn_gate_shexp.weight iq3_xxs blk.32.ffn_up_exps.weight iq3_xxs blk.32.ffn_up_shexp.weight iq3_xxs +blk.33.attn_k_b.weight iq4_nl blk.33.ffn_gate_exps.weight iq3_xxs blk.33.ffn_gate_shexp.weight iq3_xxs blk.33.ffn_up_exps.weight iq3_xxs blk.33.ffn_up_shexp.weight iq3_xxs +blk.34.attn_k_b.weight iq4_nl blk.34.ffn_gate_exps.weight iq3_xxs blk.34.ffn_gate_shexp.weight iq3_xxs blk.34.ffn_up_exps.weight iq3_xxs blk.34.ffn_up_shexp.weight iq3_xxs +blk.35.attn_k_b.weight iq4_nl blk.35.ffn_gate_exps.weight iq3_xxs blk.35.ffn_gate_shexp.weight iq3_xxs blk.35.ffn_up_exps.weight iq3_xxs blk.35.ffn_up_shexp.weight iq3_xxs +blk.36.attn_k_b.weight iq4_nl blk.36.ffn_gate_exps.weight iq3_xxs blk.36.ffn_gate_shexp.weight iq3_xxs blk.36.ffn_up_exps.weight iq3_xxs blk.36.ffn_up_shexp.weight iq3_xxs +blk.37.attn_k_b.weight iq4_nl blk.37.ffn_gate_exps.weight iq3_xxs blk.37.ffn_gate_shexp.weight iq3_xxs blk.37.ffn_up_exps.weight iq3_xxs blk.37.ffn_up_shexp.weight iq3_xxs +blk.38.attn_k_b.weight iq4_nl blk.38.ffn_gate_exps.weight iq3_xxs blk.38.ffn_gate_shexp.weight iq3_xxs blk.38.ffn_up_exps.weight iq3_xxs blk.38.ffn_up_shexp.weight iq3_xxs +blk.39.attn_k_b.weight iq4_nl blk.39.ffn_gate_exps.weight iq3_xxs blk.39.ffn_gate_shexp.weight iq3_xxs blk.39.ffn_up_exps.weight iq3_xxs blk.39.ffn_up_shexp.weight iq3_xxs +blk.40.attn_k_b.weight iq4_nl blk.40.ffn_gate_exps.weight iq3_xxs blk.40.ffn_gate_shexp.weight iq3_xxs blk.40.ffn_up_exps.weight iq3_xxs blk.40.ffn_up_shexp.weight iq3_xxs +blk.41.attn_k_b.weight iq4_nl blk.41.ffn_gate_exps.weight iq3_xxs blk.41.ffn_gate_shexp.weight iq3_xxs blk.41.ffn_up_exps.weight iq3_xxs blk.41.ffn_up_shexp.weight iq3_xxs +blk.42.attn_k_b.weight iq4_nl blk.42.ffn_gate_exps.weight iq3_xxs blk.42.ffn_gate_shexp.weight iq3_xxs blk.42.ffn_up_exps.weight iq3_xxs blk.42.ffn_up_shexp.weight iq3_xxs +blk.43.attn_k_b.weight iq4_nl blk.43.ffn_gate_exps.weight iq3_xxs blk.43.ffn_gate_shexp.weight iq3_xxs blk.43.ffn_up_exps.weight iq3_xxs blk.43.ffn_up_shexp.weight iq3_xxs +blk.44.attn_k_b.weight iq4_nl blk.44.ffn_gate_exps.weight iq3_xxs blk.44.ffn_gate_shexp.weight iq3_xxs blk.44.ffn_up_exps.weight iq3_xxs blk.44.ffn_up_shexp.weight iq3_xxs +blk.45.attn_k_b.weight iq4_nl blk.45.ffn_gate_exps.weight iq3_xxs blk.45.ffn_gate_shexp.weight iq3_xxs blk.45.ffn_up_exps.weight iq3_xxs blk.45.ffn_up_shexp.weight iq3_xxs +blk.46.attn_k_b.weight iq4_nl blk.46.ffn_gate_exps.weight iq3_xxs blk.46.ffn_gate_shexp.weight iq3_xxs blk.46.ffn_up_exps.weight iq3_xxs blk.46.ffn_up_shexp.weight iq3_xxs +blk.47.attn_k_b.weight iq4_nl blk.47.ffn_gate_exps.weight iq3_xxs blk.47.ffn_gate_shexp.weight iq3_xxs blk.47.ffn_up_exps.weight iq3_xxs blk.47.ffn_up_shexp.weight iq3_xxs +blk.48.attn_k_b.weight iq4_nl blk.48.ffn_gate_exps.weight iq3_xxs blk.48.ffn_gate_shexp.weight iq3_xxs blk.48.ffn_up_exps.weight iq3_xxs blk.48.ffn_up_shexp.weight iq3_xxs +blk.49.attn_k_b.weight iq4_nl blk.49.ffn_gate_exps.weight iq3_xxs blk.49.ffn_gate_shexp.weight iq3_xxs blk.49.ffn_up_exps.weight iq3_xxs blk.49.ffn_up_shexp.weight iq3_xxs +blk.50.attn_k_b.weight iq4_nl blk.50.ffn_gate_exps.weight iq3_xxs blk.50.ffn_gate_shexp.weight iq3_xxs blk.50.ffn_up_exps.weight iq3_xxs blk.50.ffn_up_shexp.weight iq3_xxs +blk.51.attn_k_b.weight iq4_nl blk.51.ffn_gate_exps.weight iq3_xxs blk.51.ffn_gate_shexp.weight iq3_xxs blk.51.ffn_up_exps.weight iq3_xxs blk.51.ffn_up_shexp.weight iq3_xxs +blk.52.attn_k_b.weight iq4_nl blk.52.ffn_gate_exps.weight iq3_xxs blk.52.ffn_gate_shexp.weight iq3_xxs blk.52.ffn_up_exps.weight iq3_xxs blk.52.ffn_up_shexp.weight iq3_xxs +blk.53.attn_k_b.weight iq4_nl +blk.54.attn_k_b.weight iq4_nl +blk.55.attn_k_b.weight iq4_nl +blk.56.attn_k_b.weight iq4_nl +blk.57.attn_k_b.weight iq4_nl +blk.58.attn_k_b.weight iq4_nl +blk.59.attn_k_b.weight iq4_nl +blk.60.attn_k_b.weight iq4_nl [IQ3_XXS] iq3_xxs output.weight q5_K token_embd.weight iq3_s +blk.0.attn_k_b.weight iq4_nl blk.0.attn_output.weight iq3_s blk.0.ffn_down.weight q4_K +blk.1.attn_k_b.weight iq4_nl blk.1.attn_output.weight iq3_s blk.1.ffn_down.weight q4_K +blk.2.attn_k_b.weight iq4_nl blk.2.attn_output.weight iq3_s blk.2.ffn_down.weight q4_K +blk.3.attn_k_b.weight iq4_nl blk.3.attn_output.weight iq3_s blk.3.ffn_down_exps.weight q4_K blk.3.ffn_down_shexp.weight q4_K +blk.4.attn_k_b.weight iq4_nl blk.4.attn_output.weight iq3_s blk.4.ffn_down_exps.weight q4_K blk.4.ffn_down_shexp.weight q4_K +blk.5.attn_k_b.weight iq4_nl blk.5.attn_output.weight iq3_s blk.5.ffn_down_exps.weight q4_K blk.5.ffn_down_shexp.weight q4_K +blk.6.attn_k_b.weight iq4_nl blk.6.attn_output.weight iq3_s blk.6.ffn_down_exps.weight q4_K blk.6.ffn_down_shexp.weight q4_K +blk.7.attn_k_b.weight iq4_nl blk.7.attn_output.weight iq3_s blk.7.ffn_down_exps.weight q3_K blk.7.ffn_down_shexp.weight q3_K +blk.8.attn_k_b.weight iq4_nl blk.8.attn_output.weight iq3_s blk.8.ffn_down_exps.weight q3_K blk.8.ffn_down_shexp.weight q3_K +blk.9.attn_k_b.weight iq4_nl blk.9.attn_output.weight iq3_s blk.9.ffn_down_exps.weight q3_K blk.9.ffn_down_shexp.weight q3_K +blk.10.attn_k_b.weight iq4_nl blk.10.attn_output.weight iq3_s blk.10.ffn_down_exps.weight q3_K blk.10.ffn_down_shexp.weight q3_K +blk.11.attn_k_b.weight iq4_nl blk.11.attn_output.weight iq3_s blk.11.ffn_down_exps.weight q3_K blk.11.ffn_down_shexp.weight q3_K +blk.12.attn_k_b.weight iq4_nl blk.12.attn_output.weight iq3_s blk.12.ffn_down_exps.weight q3_K blk.12.ffn_down_shexp.weight q3_K +blk.13.attn_k_b.weight iq4_nl blk.13.attn_output.weight iq3_s blk.13.ffn_down_exps.weight q3_K blk.13.ffn_down_shexp.weight q3_K +blk.14.attn_k_b.weight iq4_nl blk.14.attn_output.weight iq3_s blk.14.ffn_down_exps.weight q3_K blk.14.ffn_down_shexp.weight q3_K +blk.15.attn_k_b.weight iq4_nl blk.15.attn_output.weight iq3_s blk.15.ffn_down_exps.weight q3_K blk.15.ffn_down_shexp.weight q3_K +blk.16.attn_k_b.weight iq4_nl blk.16.attn_output.weight iq3_s blk.16.ffn_down_exps.weight q3_K blk.16.ffn_down_shexp.weight q3_K +blk.17.attn_k_b.weight iq4_nl blk.17.attn_output.weight iq3_s blk.17.ffn_down_exps.weight q3_K blk.17.ffn_down_shexp.weight q3_K +blk.18.attn_k_b.weight iq4_nl blk.18.attn_output.weight iq3_s blk.18.ffn_down_exps.weight q3_K blk.18.ffn_down_shexp.weight q3_K +blk.19.attn_k_b.weight iq4_nl blk.19.attn_output.weight iq3_s blk.19.ffn_down_exps.weight q3_K blk.19.ffn_down_shexp.weight q3_K +blk.20.attn_k_b.weight iq4_nl blk.20.attn_output.weight iq3_s blk.20.ffn_down_exps.weight q3_K blk.20.ffn_down_shexp.weight q3_K +blk.21.attn_k_b.weight iq4_nl blk.21.attn_output.weight iq3_s blk.21.ffn_down_exps.weight q3_K blk.21.ffn_down_shexp.weight q3_K +blk.22.attn_k_b.weight iq4_nl blk.22.attn_output.weight iq3_s blk.22.ffn_down_exps.weight q3_K blk.22.ffn_down_shexp.weight q3_K +blk.23.attn_k_b.weight iq4_nl blk.23.attn_output.weight iq3_s blk.23.ffn_down_exps.weight q3_K blk.23.ffn_down_shexp.weight q3_K +blk.24.attn_k_b.weight iq4_nl blk.24.attn_output.weight iq3_s blk.24.ffn_down_exps.weight q3_K blk.24.ffn_down_shexp.weight q3_K +blk.25.attn_k_b.weight iq4_nl blk.25.attn_output.weight iq3_s blk.25.ffn_down_exps.weight q3_K blk.25.ffn_down_shexp.weight q3_K +blk.26.attn_k_b.weight iq4_nl blk.26.attn_output.weight iq3_s blk.26.ffn_down_exps.weight q3_K blk.26.ffn_down_shexp.weight q3_K +blk.27.attn_k_b.weight iq4_nl blk.27.attn_output.weight iq3_s blk.27.ffn_down_exps.weight q3_K blk.27.ffn_down_shexp.weight q3_K +blk.28.attn_k_b.weight iq4_nl blk.28.attn_output.weight iq3_s blk.28.ffn_down_exps.weight q3_K blk.28.ffn_down_shexp.weight q3_K +blk.29.attn_k_b.weight iq4_nl blk.29.attn_output.weight iq3_s blk.29.ffn_down_exps.weight q3_K blk.29.ffn_down_shexp.weight q3_K +blk.30.attn_k_b.weight iq4_nl blk.30.attn_output.weight iq3_s blk.30.ffn_down_exps.weight q3_K blk.30.ffn_down_shexp.weight q3_K +blk.31.attn_k_b.weight iq4_nl blk.31.attn_output.weight iq3_s blk.31.ffn_down_exps.weight q3_K blk.31.ffn_down_shexp.weight q3_K +blk.32.attn_k_b.weight iq4_nl blk.32.attn_output.weight iq3_s blk.32.ffn_down_exps.weight q3_K blk.32.ffn_down_shexp.weight q3_K +blk.33.attn_k_b.weight iq4_nl blk.33.attn_output.weight iq3_s blk.33.ffn_down_exps.weight q3_K blk.33.ffn_down_shexp.weight q3_K +blk.34.attn_k_b.weight iq4_nl blk.34.attn_output.weight iq3_s blk.34.ffn_down_exps.weight q3_K blk.34.ffn_down_shexp.weight q3_K +blk.35.attn_k_b.weight iq4_nl blk.35.attn_output.weight iq3_s blk.35.ffn_down_exps.weight q3_K blk.35.ffn_down_shexp.weight q3_K +blk.36.attn_k_b.weight iq4_nl blk.36.attn_output.weight iq3_s blk.36.ffn_down_exps.weight q3_K blk.36.ffn_down_shexp.weight q3_K +blk.37.attn_k_b.weight iq4_nl blk.37.attn_output.weight iq3_s blk.37.ffn_down_exps.weight q3_K blk.37.ffn_down_shexp.weight q3_K +blk.38.attn_k_b.weight iq4_nl blk.38.attn_output.weight iq3_s blk.38.ffn_down_exps.weight q3_K blk.38.ffn_down_shexp.weight q3_K +blk.39.attn_k_b.weight iq4_nl blk.39.attn_output.weight iq3_s blk.39.ffn_down_exps.weight q3_K blk.39.ffn_down_shexp.weight q3_K +blk.40.attn_k_b.weight iq4_nl blk.40.attn_output.weight iq3_s blk.40.ffn_down_exps.weight q3_K blk.40.ffn_down_shexp.weight q3_K +blk.41.attn_k_b.weight iq4_nl blk.41.attn_output.weight iq3_s blk.41.ffn_down_exps.weight q3_K blk.41.ffn_down_shexp.weight q3_K +blk.42.attn_k_b.weight iq4_nl blk.42.attn_output.weight iq3_s blk.42.ffn_down_exps.weight q3_K blk.42.ffn_down_shexp.weight q3_K +blk.43.attn_k_b.weight iq4_nl blk.43.attn_output.weight iq3_s blk.43.ffn_down_exps.weight q3_K blk.43.ffn_down_shexp.weight q3_K +blk.44.attn_k_b.weight iq4_nl blk.44.attn_output.weight iq3_s blk.44.ffn_down_exps.weight q3_K blk.44.ffn_down_shexp.weight q3_K +blk.45.attn_k_b.weight iq4_nl blk.45.attn_output.weight iq3_s blk.45.ffn_down_exps.weight q3_K blk.45.ffn_down_shexp.weight q3_K +blk.46.attn_k_b.weight iq4_nl blk.46.attn_output.weight iq3_s blk.46.ffn_down_exps.weight q3_K blk.46.ffn_down_shexp.weight q3_K +blk.47.attn_k_b.weight iq4_nl blk.47.attn_output.weight iq3_s blk.47.ffn_down_exps.weight q3_K blk.47.ffn_down_shexp.weight q3_K +blk.48.attn_k_b.weight iq4_nl blk.48.attn_output.weight iq3_s blk.48.ffn_down_exps.weight q3_K blk.48.ffn_down_shexp.weight q3_K +blk.49.attn_k_b.weight iq4_nl blk.49.attn_output.weight iq3_s blk.49.ffn_down_exps.weight q3_K blk.49.ffn_down_shexp.weight q3_K +blk.50.attn_k_b.weight iq4_nl blk.50.attn_output.weight iq3_s blk.50.ffn_down_exps.weight q3_K blk.50.ffn_down_shexp.weight q3_K +blk.51.attn_k_b.weight iq4_nl blk.51.attn_output.weight iq3_s blk.51.ffn_down_exps.weight q3_K blk.51.ffn_down_shexp.weight q3_K +blk.52.attn_k_b.weight iq4_nl blk.52.attn_output.weight iq3_s blk.52.ffn_down_exps.weight q3_K blk.52.ffn_down_shexp.weight q3_K +blk.53.attn_k_b.weight iq4_nl blk.53.attn_output.weight iq3_s blk.53.ffn_down_exps.weight q3_K blk.53.ffn_down_shexp.weight q3_K +blk.54.attn_k_b.weight iq4_nl blk.54.attn_output.weight iq3_s blk.54.ffn_down_exps.weight q3_K blk.54.ffn_down_shexp.weight q3_K +blk.55.attn_k_b.weight iq4_nl blk.55.attn_output.weight iq3_s blk.55.ffn_down_exps.weight q3_K blk.55.ffn_down_shexp.weight q3_K +blk.56.attn_k_b.weight iq4_nl blk.56.attn_output.weight iq3_s blk.56.ffn_down_exps.weight q3_K blk.56.ffn_down_shexp.weight q3_K +blk.57.attn_k_b.weight iq4_nl blk.57.attn_output.weight iq3_s blk.57.ffn_down_exps.weight q3_K blk.57.ffn_down_shexp.weight q3_K +blk.58.attn_k_b.weight iq4_nl blk.58.attn_output.weight iq3_s blk.58.ffn_down_exps.weight q3_K blk.58.ffn_down_shexp.weight q3_K +blk.59.attn_k_b.weight iq4_nl blk.59.attn_output.weight iq3_s blk.59.ffn_down_exps.weight q3_K blk.59.ffn_down_shexp.weight q3_K +blk.60.attn_k_b.weight iq4_nl blk.60.attn_output.weight iq3_s blk.60.ffn_down_exps.weight q3_K blk.60.ffn_down_shexp.weight q3_K @@ -1122,73 +1974,134 @@ blk.60.ffn_down_shexp.weight q3_K [IQ1_S] iq1_s output.weight q5_K token_embd.weight q2_K +blk.0.attn_k_b.weight iq4_nl blk.0.attn_output.weight iq2_xxs blk.0.ffn_down.weight q2_K +blk.1.attn_k_b.weight iq4_nl blk.1.attn_output.weight iq2_xxs blk.1.ffn_down.weight q2_K +blk.2.attn_k_b.weight iq4_nl blk.2.attn_output.weight iq2_xxs blk.2.ffn_down.weight q2_K +blk.3.attn_k_b.weight iq4_nl blk.3.attn_output.weight iq2_xxs blk.3.ffn_down_exps.weight q2_K blk.3.ffn_down_shexp.weight q2_K +blk.4.attn_k_b.weight iq4_nl blk.4.attn_output.weight iq2_xxs blk.4.ffn_down_exps.weight q2_K blk.4.ffn_down_shexp.weight q2_K +blk.5.attn_k_b.weight iq4_nl blk.5.attn_output.weight iq2_xxs +blk.6.attn_k_b.weight iq4_nl blk.6.attn_output.weight iq2_xxs +blk.7.attn_k_b.weight iq4_nl blk.7.attn_output.weight iq2_xxs +blk.8.attn_k_b.weight iq4_nl blk.8.attn_output.weight iq2_xxs +blk.9.attn_k_b.weight iq4_nl blk.9.attn_output.weight iq2_xxs +blk.10.attn_k_b.weight iq4_nl blk.10.attn_output.weight iq2_xxs +blk.11.attn_k_b.weight iq4_nl blk.11.attn_output.weight iq2_xxs +blk.12.attn_k_b.weight iq4_nl blk.12.attn_output.weight iq2_xxs +blk.13.attn_k_b.weight iq4_nl blk.13.attn_output.weight iq2_xxs +blk.14.attn_k_b.weight iq4_nl blk.14.attn_output.weight iq2_xxs +blk.15.attn_k_b.weight iq4_nl blk.15.attn_output.weight iq2_xxs +blk.16.attn_k_b.weight iq4_nl blk.16.attn_output.weight iq2_xxs +blk.17.attn_k_b.weight iq4_nl blk.17.attn_output.weight iq2_xxs +blk.18.attn_k_b.weight iq4_nl blk.18.attn_output.weight iq2_xxs +blk.19.attn_k_b.weight iq4_nl blk.19.attn_output.weight iq2_xxs +blk.20.attn_k_b.weight iq4_nl blk.20.attn_output.weight iq2_xxs +blk.21.attn_k_b.weight iq4_nl blk.21.attn_output.weight iq2_xxs +blk.22.attn_k_b.weight iq4_nl blk.22.attn_output.weight iq2_xxs +blk.23.attn_k_b.weight iq4_nl blk.23.attn_output.weight iq2_xxs +blk.24.attn_k_b.weight iq4_nl blk.24.attn_output.weight iq2_xxs +blk.25.attn_k_b.weight iq4_nl blk.25.attn_output.weight iq2_xxs +blk.26.attn_k_b.weight iq4_nl blk.26.attn_output.weight iq2_xxs +blk.27.attn_k_b.weight iq4_nl blk.27.attn_output.weight iq2_xxs +blk.28.attn_k_b.weight iq4_nl blk.28.attn_output.weight iq2_xxs +blk.29.attn_k_b.weight iq4_nl blk.29.attn_output.weight iq2_xxs +blk.30.attn_k_b.weight iq4_nl blk.30.attn_output.weight iq2_xxs +blk.31.attn_k_b.weight iq4_nl blk.31.attn_output.weight iq2_xxs +blk.32.attn_k_b.weight iq4_nl blk.32.attn_output.weight iq2_xxs +blk.33.attn_k_b.weight iq4_nl blk.33.attn_output.weight iq2_xxs +blk.34.attn_k_b.weight iq4_nl blk.34.attn_output.weight iq2_xxs +blk.35.attn_k_b.weight iq4_nl blk.35.attn_output.weight iq2_xxs +blk.36.attn_k_b.weight iq4_nl blk.36.attn_output.weight iq2_xxs +blk.37.attn_k_b.weight iq4_nl blk.37.attn_output.weight iq2_xxs +blk.38.attn_k_b.weight iq4_nl blk.38.attn_output.weight iq2_xxs +blk.39.attn_k_b.weight iq4_nl blk.39.attn_output.weight iq2_xxs +blk.40.attn_k_b.weight iq4_nl blk.40.attn_output.weight iq2_xxs +blk.41.attn_k_b.weight iq4_nl blk.41.attn_output.weight iq2_xxs +blk.42.attn_k_b.weight iq4_nl blk.42.attn_output.weight iq2_xxs +blk.43.attn_k_b.weight iq4_nl blk.43.attn_output.weight iq2_xxs +blk.44.attn_k_b.weight iq4_nl blk.44.attn_output.weight iq2_xxs +blk.45.attn_k_b.weight iq4_nl blk.45.attn_output.weight iq2_xxs +blk.46.attn_k_b.weight iq4_nl blk.46.attn_output.weight iq2_xxs +blk.47.attn_k_b.weight iq4_nl blk.47.attn_output.weight iq2_xxs +blk.48.attn_k_b.weight iq4_nl blk.48.attn_output.weight iq2_xxs +blk.49.attn_k_b.weight iq4_nl blk.49.attn_output.weight iq2_xxs +blk.50.attn_k_b.weight iq4_nl blk.50.attn_output.weight iq2_xxs +blk.51.attn_k_b.weight iq4_nl blk.51.attn_output.weight iq2_xxs +blk.52.attn_k_b.weight iq4_nl blk.52.attn_output.weight iq2_xxs +blk.53.attn_k_b.weight iq4_nl blk.53.attn_output.weight iq2_xxs +blk.54.attn_k_b.weight iq4_nl blk.54.attn_output.weight iq2_xxs +blk.55.attn_k_b.weight iq4_nl blk.55.attn_output.weight iq2_xxs +blk.56.attn_k_b.weight iq4_nl blk.56.attn_output.weight iq2_xxs +blk.57.attn_k_b.weight iq4_nl blk.57.attn_output.weight iq2_xxs +blk.58.attn_k_b.weight iq4_nl blk.58.attn_output.weight iq2_xxs +blk.59.attn_k_b.weight iq4_nl blk.59.attn_output.weight iq2_xxs +blk.60.attn_k_b.weight iq4_nl blk.60.attn_output.weight iq2_xxs [IQ4_NL] iq4_nl @@ -1207,322 +2120,809 @@ blk.6.ffn_down_shexp.weight q5_K [IQ3_S] iq3_s output.weight q6_K +blk.0.attn_k_b.weight iq4_nl +blk.1.attn_k_b.weight iq4_nl +blk.2.attn_k_b.weight iq4_nl +blk.3.attn_k_b.weight iq4_nl +blk.4.attn_k_b.weight iq4_nl +blk.5.attn_k_b.weight iq4_nl +blk.6.attn_k_b.weight iq4_nl +blk.7.attn_k_b.weight iq4_nl +blk.8.attn_k_b.weight iq4_nl +blk.9.attn_k_b.weight iq4_nl +blk.10.attn_k_b.weight iq4_nl +blk.11.attn_k_b.weight iq4_nl +blk.12.attn_k_b.weight iq4_nl +blk.13.attn_k_b.weight iq4_nl +blk.14.attn_k_b.weight iq4_nl +blk.15.attn_k_b.weight iq4_nl +blk.16.attn_k_b.weight iq4_nl +blk.17.attn_k_b.weight iq4_nl +blk.18.attn_k_b.weight iq4_nl +blk.19.attn_k_b.weight iq4_nl +blk.20.attn_k_b.weight iq4_nl +blk.21.attn_k_b.weight iq4_nl +blk.22.attn_k_b.weight iq4_nl +blk.23.attn_k_b.weight iq4_nl +blk.24.attn_k_b.weight iq4_nl +blk.25.attn_k_b.weight iq4_nl +blk.26.attn_k_b.weight iq4_nl +blk.27.attn_k_b.weight iq4_nl +blk.28.attn_k_b.weight iq4_nl +blk.29.attn_k_b.weight iq4_nl +blk.30.attn_k_b.weight iq4_nl +blk.31.attn_k_b.weight iq4_nl +blk.32.attn_k_b.weight iq4_nl +blk.33.attn_k_b.weight iq4_nl +blk.34.attn_k_b.weight iq4_nl +blk.35.attn_k_b.weight iq4_nl +blk.36.attn_k_b.weight iq4_nl +blk.37.attn_k_b.weight iq4_nl +blk.38.attn_k_b.weight iq4_nl +blk.39.attn_k_b.weight iq4_nl +blk.40.attn_k_b.weight iq4_nl +blk.41.attn_k_b.weight iq4_nl +blk.42.attn_k_b.weight iq4_nl +blk.43.attn_k_b.weight iq4_nl +blk.44.attn_k_b.weight iq4_nl +blk.45.attn_k_b.weight iq4_nl +blk.46.attn_k_b.weight iq4_nl +blk.47.attn_k_b.weight iq4_nl +blk.48.attn_k_b.weight iq4_nl +blk.49.attn_k_b.weight iq4_nl +blk.50.attn_k_b.weight iq4_nl +blk.51.attn_k_b.weight iq4_nl +blk.52.attn_k_b.weight iq4_nl +blk.53.attn_k_b.weight iq4_nl +blk.54.attn_k_b.weight iq4_nl +blk.55.attn_k_b.weight iq4_nl +blk.56.attn_k_b.weight iq4_nl +blk.57.attn_k_b.weight iq4_nl +blk.58.attn_k_b.weight iq4_nl +blk.59.attn_k_b.weight iq4_nl +blk.60.attn_k_b.weight iq4_nl [IQ3_M] iq3_s output.weight q6_K +blk.0.attn_k_b.weight iq4_nl blk.0.attn_output.weight q4_K blk.0.ffn_down.weight q4_K +blk.1.attn_k_b.weight iq4_nl blk.1.attn_output.weight q4_K blk.1.ffn_down.weight q4_K +blk.2.attn_k_b.weight iq4_nl blk.2.attn_output.weight q4_K blk.2.ffn_down.weight q4_K +blk.3.attn_k_b.weight iq4_nl blk.3.attn_output.weight q4_K blk.3.ffn_down_exps.weight q4_K blk.3.ffn_down_shexp.weight q4_K +blk.4.attn_k_b.weight iq4_nl blk.4.attn_output.weight q4_K blk.4.ffn_down_exps.weight q4_K blk.4.ffn_down_shexp.weight q4_K +blk.5.attn_k_b.weight iq4_nl blk.5.attn_output.weight q4_K blk.5.ffn_down_exps.weight q4_K blk.5.ffn_down_shexp.weight q4_K +blk.6.attn_k_b.weight iq4_nl blk.6.attn_output.weight q4_K blk.6.ffn_down_exps.weight q4_K blk.6.ffn_down_shexp.weight q4_K +blk.7.attn_k_b.weight iq4_nl blk.7.attn_output.weight q4_K +blk.8.attn_k_b.weight iq4_nl blk.8.attn_output.weight q4_K +blk.9.attn_k_b.weight iq4_nl blk.9.attn_output.weight q4_K +blk.10.attn_k_b.weight iq4_nl blk.10.attn_output.weight q4_K +blk.11.attn_k_b.weight iq4_nl blk.11.attn_output.weight q4_K +blk.12.attn_k_b.weight iq4_nl blk.12.attn_output.weight q4_K +blk.13.attn_k_b.weight iq4_nl blk.13.attn_output.weight q4_K +blk.14.attn_k_b.weight iq4_nl blk.14.attn_output.weight q4_K +blk.15.attn_k_b.weight iq4_nl blk.15.attn_output.weight q4_K +blk.16.attn_k_b.weight iq4_nl blk.16.attn_output.weight q4_K +blk.17.attn_k_b.weight iq4_nl blk.17.attn_output.weight q4_K +blk.18.attn_k_b.weight iq4_nl blk.18.attn_output.weight q4_K +blk.19.attn_k_b.weight iq4_nl blk.19.attn_output.weight q4_K +blk.20.attn_k_b.weight iq4_nl blk.20.attn_output.weight q4_K +blk.21.attn_k_b.weight iq4_nl blk.21.attn_output.weight q4_K +blk.22.attn_k_b.weight iq4_nl blk.22.attn_output.weight q4_K +blk.23.attn_k_b.weight iq4_nl blk.23.attn_output.weight q4_K +blk.24.attn_k_b.weight iq4_nl blk.24.attn_output.weight q4_K +blk.25.attn_k_b.weight iq4_nl blk.25.attn_output.weight q4_K +blk.26.attn_k_b.weight iq4_nl blk.26.attn_output.weight q4_K +blk.27.attn_k_b.weight iq4_nl blk.27.attn_output.weight q4_K +blk.28.attn_k_b.weight iq4_nl blk.28.attn_output.weight q4_K +blk.29.attn_k_b.weight iq4_nl blk.29.attn_output.weight q4_K +blk.30.attn_k_b.weight iq4_nl blk.30.attn_output.weight q4_K +blk.31.attn_k_b.weight iq4_nl blk.31.attn_output.weight q4_K +blk.32.attn_k_b.weight iq4_nl blk.32.attn_output.weight q4_K +blk.33.attn_k_b.weight iq4_nl blk.33.attn_output.weight q4_K +blk.34.attn_k_b.weight iq4_nl blk.34.attn_output.weight q4_K +blk.35.attn_k_b.weight iq4_nl blk.35.attn_output.weight q4_K +blk.36.attn_k_b.weight iq4_nl blk.36.attn_output.weight q4_K +blk.37.attn_k_b.weight iq4_nl blk.37.attn_output.weight q4_K +blk.38.attn_k_b.weight iq4_nl blk.38.attn_output.weight q4_K +blk.39.attn_k_b.weight iq4_nl blk.39.attn_output.weight q4_K +blk.40.attn_k_b.weight iq4_nl blk.40.attn_output.weight q4_K +blk.41.attn_k_b.weight iq4_nl blk.41.attn_output.weight q4_K +blk.42.attn_k_b.weight iq4_nl blk.42.attn_output.weight q4_K +blk.43.attn_k_b.weight iq4_nl blk.43.attn_output.weight q4_K +blk.44.attn_k_b.weight iq4_nl blk.44.attn_output.weight q4_K +blk.45.attn_k_b.weight iq4_nl blk.45.attn_output.weight q4_K +blk.46.attn_k_b.weight iq4_nl blk.46.attn_output.weight q4_K +blk.47.attn_k_b.weight iq4_nl blk.47.attn_output.weight q4_K +blk.48.attn_k_b.weight iq4_nl blk.48.attn_output.weight q4_K +blk.49.attn_k_b.weight iq4_nl blk.49.attn_output.weight q4_K +blk.50.attn_k_b.weight iq4_nl blk.50.attn_output.weight q4_K +blk.51.attn_k_b.weight iq4_nl blk.51.attn_output.weight q4_K +blk.52.attn_k_b.weight iq4_nl blk.52.attn_output.weight q4_K +blk.53.attn_k_b.weight iq4_nl blk.53.attn_output.weight q4_K +blk.54.attn_k_b.weight iq4_nl blk.54.attn_output.weight q4_K +blk.55.attn_k_b.weight iq4_nl blk.55.attn_output.weight q4_K +blk.56.attn_k_b.weight iq4_nl blk.56.attn_output.weight q4_K +blk.57.attn_k_b.weight iq4_nl blk.57.attn_output.weight q4_K +blk.58.attn_k_b.weight iq4_nl blk.58.attn_output.weight q4_K +blk.59.attn_k_b.weight iq4_nl blk.59.attn_output.weight q4_K +blk.60.attn_k_b.weight iq4_nl blk.60.attn_output.weight q4_K [IQ2_S] iq2_xs output.weight q5_K token_embd.weight iq3_s +blk.0.attn_k_b.weight iq4_nl blk.0.attn_output.weight iq3_s blk.0.ffn_down.weight iq3_s +blk.1.attn_k_b.weight iq4_nl blk.1.attn_output.weight iq3_s blk.1.ffn_down.weight iq3_s +blk.2.attn_k_b.weight iq4_nl blk.2.attn_output.weight iq3_s blk.2.ffn_down.weight iq3_s +blk.3.attn_k_b.weight iq4_nl blk.3.attn_output.weight iq3_s blk.3.ffn_down_exps.weight iq3_s blk.3.ffn_down_shexp.weight iq3_s +blk.4.attn_k_b.weight iq4_nl blk.4.attn_output.weight iq3_s blk.4.ffn_down_exps.weight iq3_s blk.4.ffn_down_shexp.weight iq3_s +blk.5.attn_k_b.weight iq4_nl blk.5.attn_output.weight iq3_s +blk.6.attn_k_b.weight iq4_nl blk.6.attn_output.weight iq3_s +blk.7.attn_k_b.weight iq4_nl blk.7.attn_output.weight iq3_s +blk.8.attn_k_b.weight iq4_nl blk.8.attn_output.weight iq3_s +blk.9.attn_k_b.weight iq4_nl blk.9.attn_output.weight iq3_s +blk.10.attn_k_b.weight iq4_nl blk.10.attn_output.weight iq3_s +blk.11.attn_k_b.weight iq4_nl blk.11.attn_output.weight iq3_s +blk.12.attn_k_b.weight iq4_nl blk.12.attn_output.weight iq3_s +blk.13.attn_k_b.weight iq4_nl blk.13.attn_output.weight iq3_s +blk.14.attn_k_b.weight iq4_nl blk.14.attn_output.weight iq3_s +blk.15.attn_k_b.weight iq4_nl blk.15.attn_output.weight iq3_s +blk.16.attn_k_b.weight iq4_nl blk.16.attn_output.weight iq3_s +blk.17.attn_k_b.weight iq4_nl blk.17.attn_output.weight iq3_s +blk.18.attn_k_b.weight iq4_nl blk.18.attn_output.weight iq3_s +blk.19.attn_k_b.weight iq4_nl blk.19.attn_output.weight iq3_s +blk.20.attn_k_b.weight iq4_nl blk.20.attn_output.weight iq3_s +blk.21.attn_k_b.weight iq4_nl blk.21.attn_output.weight iq3_s +blk.22.attn_k_b.weight iq4_nl blk.22.attn_output.weight iq3_s +blk.23.attn_k_b.weight iq4_nl blk.23.attn_output.weight iq3_s +blk.24.attn_k_b.weight iq4_nl blk.24.attn_output.weight iq3_s +blk.25.attn_k_b.weight iq4_nl blk.25.attn_output.weight iq3_s +blk.26.attn_k_b.weight iq4_nl blk.26.attn_output.weight iq3_s +blk.27.attn_k_b.weight iq4_nl blk.27.attn_output.weight iq3_s +blk.28.attn_k_b.weight iq4_nl blk.28.attn_output.weight iq3_s +blk.29.attn_k_b.weight iq4_nl blk.29.attn_output.weight iq3_s +blk.30.attn_k_b.weight iq4_nl blk.30.attn_output.weight iq3_s +blk.31.attn_k_b.weight iq4_nl blk.31.attn_output.weight iq3_s +blk.32.attn_k_b.weight iq4_nl blk.32.attn_output.weight iq3_s +blk.33.attn_k_b.weight iq4_nl blk.33.attn_output.weight iq3_s +blk.34.attn_k_b.weight iq4_nl blk.34.attn_output.weight iq3_s +blk.35.attn_k_b.weight iq4_nl blk.35.attn_output.weight iq3_s +blk.36.attn_k_b.weight iq4_nl blk.36.attn_output.weight iq3_s +blk.37.attn_k_b.weight iq4_nl blk.37.attn_output.weight iq3_s +blk.38.attn_k_b.weight iq4_nl blk.38.attn_output.weight iq3_s +blk.39.attn_k_b.weight iq4_nl blk.39.attn_output.weight iq3_s +blk.40.attn_k_b.weight iq4_nl blk.40.attn_output.weight iq3_s +blk.41.attn_k_b.weight iq4_nl blk.41.attn_output.weight iq3_s +blk.42.attn_k_b.weight iq4_nl blk.42.attn_output.weight iq3_s +blk.43.attn_k_b.weight iq4_nl blk.43.attn_output.weight iq3_s +blk.44.attn_k_b.weight iq4_nl blk.44.attn_output.weight iq3_s +blk.45.attn_k_b.weight iq4_nl blk.45.attn_output.weight iq3_s +blk.46.attn_k_b.weight iq4_nl blk.46.attn_output.weight iq3_s +blk.47.attn_k_b.weight iq4_nl blk.47.attn_output.weight iq3_s +blk.48.attn_k_b.weight iq4_nl blk.48.attn_output.weight iq3_s +blk.49.attn_k_b.weight iq4_nl blk.49.attn_output.weight iq3_s +blk.50.attn_k_b.weight iq4_nl blk.50.attn_output.weight iq3_s +blk.51.attn_k_b.weight iq4_nl blk.51.attn_output.weight iq3_s +blk.52.attn_k_b.weight iq4_nl blk.52.attn_output.weight iq3_s +blk.53.attn_k_b.weight iq4_nl blk.53.attn_output.weight iq3_s +blk.54.attn_k_b.weight iq4_nl blk.54.attn_output.weight iq3_s +blk.55.attn_k_b.weight iq4_nl blk.55.attn_output.weight iq3_s +blk.56.attn_k_b.weight iq4_nl blk.56.attn_output.weight iq3_s +blk.57.attn_k_b.weight iq4_nl blk.57.attn_output.weight iq3_s +blk.58.attn_k_b.weight iq4_nl blk.58.attn_output.weight iq3_s +blk.59.attn_k_b.weight iq4_nl blk.59.attn_output.weight iq3_s +blk.60.attn_k_b.weight iq4_nl blk.60.attn_output.weight iq3_s [IQ2_M] iq2_s output.weight q5_K token_embd.weight iq3_s +blk.0.attn_k_b.weight iq4_nl blk.0.attn_output.weight iq3_s blk.0.ffn_down.weight iq3_s +blk.1.attn_k_b.weight iq4_nl blk.1.attn_output.weight iq3_s blk.1.ffn_down.weight iq3_s +blk.2.attn_k_b.weight iq4_nl blk.2.attn_output.weight iq3_s blk.2.ffn_down.weight iq3_s +blk.3.attn_k_b.weight iq4_nl blk.3.attn_output.weight iq3_s blk.3.ffn_down_exps.weight iq3_s blk.3.ffn_down_shexp.weight iq3_s +blk.4.attn_k_b.weight iq4_nl blk.4.attn_output.weight iq3_s blk.4.ffn_down_exps.weight iq3_s blk.4.ffn_down_shexp.weight iq3_s +blk.5.attn_k_b.weight iq4_nl blk.5.attn_output.weight iq3_s +blk.6.attn_k_b.weight iq4_nl blk.6.attn_output.weight iq3_s +blk.7.attn_k_b.weight iq4_nl blk.7.attn_output.weight iq3_s +blk.8.attn_k_b.weight iq4_nl blk.8.attn_output.weight iq3_s +blk.9.attn_k_b.weight iq4_nl blk.9.attn_output.weight iq3_s +blk.10.attn_k_b.weight iq4_nl blk.10.attn_output.weight iq3_s +blk.11.attn_k_b.weight iq4_nl blk.11.attn_output.weight iq3_s +blk.12.attn_k_b.weight iq4_nl blk.12.attn_output.weight iq3_s +blk.13.attn_k_b.weight iq4_nl blk.13.attn_output.weight iq3_s +blk.14.attn_k_b.weight iq4_nl blk.14.attn_output.weight iq3_s +blk.15.attn_k_b.weight iq4_nl blk.15.attn_output.weight iq3_s +blk.16.attn_k_b.weight iq4_nl blk.16.attn_output.weight iq3_s +blk.17.attn_k_b.weight iq4_nl blk.17.attn_output.weight iq3_s +blk.18.attn_k_b.weight iq4_nl blk.18.attn_output.weight iq3_s +blk.19.attn_k_b.weight iq4_nl blk.19.attn_output.weight iq3_s +blk.20.attn_k_b.weight iq4_nl blk.20.attn_output.weight iq3_s +blk.21.attn_k_b.weight iq4_nl blk.21.attn_output.weight iq3_s +blk.22.attn_k_b.weight iq4_nl blk.22.attn_output.weight iq3_s +blk.23.attn_k_b.weight iq4_nl blk.23.attn_output.weight iq3_s +blk.24.attn_k_b.weight iq4_nl blk.24.attn_output.weight iq3_s +blk.25.attn_k_b.weight iq4_nl blk.25.attn_output.weight iq3_s +blk.26.attn_k_b.weight iq4_nl blk.26.attn_output.weight iq3_s +blk.27.attn_k_b.weight iq4_nl blk.27.attn_output.weight iq3_s +blk.28.attn_k_b.weight iq4_nl blk.28.attn_output.weight iq3_s +blk.29.attn_k_b.weight iq4_nl blk.29.attn_output.weight iq3_s +blk.30.attn_k_b.weight iq4_nl blk.30.attn_output.weight iq3_s +blk.31.attn_k_b.weight iq4_nl blk.31.attn_output.weight iq3_s +blk.32.attn_k_b.weight iq4_nl blk.32.attn_output.weight iq3_s +blk.33.attn_k_b.weight iq4_nl blk.33.attn_output.weight iq3_s +blk.34.attn_k_b.weight iq4_nl blk.34.attn_output.weight iq3_s +blk.35.attn_k_b.weight iq4_nl blk.35.attn_output.weight iq3_s +blk.36.attn_k_b.weight iq4_nl blk.36.attn_output.weight iq3_s +blk.37.attn_k_b.weight iq4_nl blk.37.attn_output.weight iq3_s +blk.38.attn_k_b.weight iq4_nl blk.38.attn_output.weight iq3_s +blk.39.attn_k_b.weight iq4_nl blk.39.attn_output.weight iq3_s +blk.40.attn_k_b.weight iq4_nl blk.40.attn_output.weight iq3_s +blk.41.attn_k_b.weight iq4_nl blk.41.attn_output.weight iq3_s +blk.42.attn_k_b.weight iq4_nl blk.42.attn_output.weight iq3_s +blk.43.attn_k_b.weight iq4_nl blk.43.attn_output.weight iq3_s +blk.44.attn_k_b.weight iq4_nl blk.44.attn_output.weight iq3_s +blk.45.attn_k_b.weight iq4_nl blk.45.attn_output.weight iq3_s +blk.46.attn_k_b.weight iq4_nl blk.46.attn_output.weight iq3_s +blk.47.attn_k_b.weight iq4_nl blk.47.attn_output.weight iq3_s +blk.48.attn_k_b.weight iq4_nl blk.48.attn_output.weight iq3_s +blk.49.attn_k_b.weight iq4_nl blk.49.attn_output.weight iq3_s +blk.50.attn_k_b.weight iq4_nl blk.50.attn_output.weight iq3_s +blk.51.attn_k_b.weight iq4_nl blk.51.attn_output.weight iq3_s +blk.52.attn_k_b.weight iq4_nl blk.52.attn_output.weight iq3_s +blk.53.attn_k_b.weight iq4_nl blk.53.attn_output.weight iq3_s +blk.54.attn_k_b.weight iq4_nl blk.54.attn_output.weight iq3_s +blk.55.attn_k_b.weight iq4_nl blk.55.attn_output.weight iq3_s +blk.56.attn_k_b.weight iq4_nl blk.56.attn_output.weight iq3_s +blk.57.attn_k_b.weight iq4_nl blk.57.attn_output.weight iq3_s +blk.58.attn_k_b.weight iq4_nl blk.58.attn_output.weight iq3_s +blk.59.attn_k_b.weight iq4_nl blk.59.attn_output.weight iq3_s +blk.60.attn_k_b.weight iq4_nl blk.60.attn_output.weight iq3_s [IQ4_XS] iq4_xs output.weight q6_K +blk.0.attn_k_b.weight iq4_nl blk.0.ffn_down.weight q5_K +blk.1.attn_k_b.weight iq4_nl blk.1.ffn_down.weight q5_K +blk.2.attn_k_b.weight iq4_nl blk.2.ffn_down.weight q5_K +blk.3.attn_k_b.weight iq4_nl blk.3.ffn_down_exps.weight q5_K blk.3.ffn_down_shexp.weight q5_K +blk.4.attn_k_b.weight iq4_nl blk.4.ffn_down_exps.weight q5_K blk.4.ffn_down_shexp.weight q5_K +blk.5.attn_k_b.weight iq4_nl blk.5.ffn_down_exps.weight q5_K blk.5.ffn_down_shexp.weight q5_K +blk.6.attn_k_b.weight iq4_nl blk.6.ffn_down_exps.weight q5_K blk.6.ffn_down_shexp.weight q5_K +blk.7.attn_k_b.weight iq4_nl +blk.8.attn_k_b.weight iq4_nl +blk.9.attn_k_b.weight iq4_nl +blk.10.attn_k_b.weight iq4_nl +blk.11.attn_k_b.weight iq4_nl +blk.12.attn_k_b.weight iq4_nl +blk.13.attn_k_b.weight iq4_nl +blk.14.attn_k_b.weight iq4_nl +blk.15.attn_k_b.weight iq4_nl +blk.16.attn_k_b.weight iq4_nl +blk.17.attn_k_b.weight iq4_nl +blk.18.attn_k_b.weight iq4_nl +blk.19.attn_k_b.weight iq4_nl +blk.20.attn_k_b.weight iq4_nl +blk.21.attn_k_b.weight iq4_nl +blk.22.attn_k_b.weight iq4_nl +blk.23.attn_k_b.weight iq4_nl +blk.24.attn_k_b.weight iq4_nl +blk.25.attn_k_b.weight iq4_nl +blk.26.attn_k_b.weight iq4_nl +blk.27.attn_k_b.weight iq4_nl +blk.28.attn_k_b.weight iq4_nl +blk.29.attn_k_b.weight iq4_nl +blk.30.attn_k_b.weight iq4_nl +blk.31.attn_k_b.weight iq4_nl +blk.32.attn_k_b.weight iq4_nl +blk.33.attn_k_b.weight iq4_nl +blk.34.attn_k_b.weight iq4_nl +blk.35.attn_k_b.weight iq4_nl +blk.36.attn_k_b.weight iq4_nl +blk.37.attn_k_b.weight iq4_nl +blk.38.attn_k_b.weight iq4_nl +blk.39.attn_k_b.weight iq4_nl +blk.40.attn_k_b.weight iq4_nl +blk.41.attn_k_b.weight iq4_nl +blk.42.attn_k_b.weight iq4_nl +blk.43.attn_k_b.weight iq4_nl +blk.44.attn_k_b.weight iq4_nl +blk.45.attn_k_b.weight iq4_nl +blk.46.attn_k_b.weight iq4_nl +blk.47.attn_k_b.weight iq4_nl +blk.48.attn_k_b.weight iq4_nl +blk.49.attn_k_b.weight iq4_nl +blk.50.attn_k_b.weight iq4_nl +blk.51.attn_k_b.weight iq4_nl +blk.52.attn_k_b.weight iq4_nl +blk.53.attn_k_b.weight iq4_nl +blk.54.attn_k_b.weight iq4_nl +blk.55.attn_k_b.weight iq4_nl +blk.56.attn_k_b.weight iq4_nl +blk.57.attn_k_b.weight iq4_nl +blk.58.attn_k_b.weight iq4_nl +blk.59.attn_k_b.weight iq4_nl +blk.60.attn_k_b.weight iq4_nl [IQ1_M] iq1_m output.weight q5_K token_embd.weight q2_K +blk.0.attn_k_b.weight iq4_nl blk.0.attn_output.weight iq2_xxs blk.0.ffn_down.weight q2_K +blk.1.attn_k_b.weight iq4_nl blk.1.attn_output.weight iq2_xxs blk.1.ffn_down.weight q2_K +blk.2.attn_k_b.weight iq4_nl blk.2.attn_output.weight iq2_xxs blk.2.ffn_down.weight q2_K +blk.3.attn_k_b.weight iq4_nl blk.3.attn_output.weight iq2_xxs blk.3.ffn_down_exps.weight q2_K blk.3.ffn_down_shexp.weight q2_K +blk.4.attn_k_b.weight iq4_nl blk.4.attn_output.weight iq2_xxs blk.4.ffn_down_exps.weight q2_K blk.4.ffn_down_shexp.weight q2_K +blk.5.attn_k_b.weight iq4_nl blk.5.attn_output.weight iq2_xxs +blk.6.attn_k_b.weight iq4_nl blk.6.attn_output.weight iq2_xxs +blk.7.attn_k_b.weight iq4_nl blk.7.attn_output.weight iq2_xxs +blk.8.attn_k_b.weight iq4_nl blk.8.attn_output.weight iq2_xxs +blk.9.attn_k_b.weight iq4_nl blk.9.attn_output.weight iq2_xxs +blk.10.attn_k_b.weight iq4_nl blk.10.attn_output.weight iq2_xxs +blk.11.attn_k_b.weight iq4_nl blk.11.attn_output.weight iq2_xxs +blk.12.attn_k_b.weight iq4_nl blk.12.attn_output.weight iq2_xxs +blk.13.attn_k_b.weight iq4_nl blk.13.attn_output.weight iq2_xxs +blk.14.attn_k_b.weight iq4_nl blk.14.attn_output.weight iq2_xxs +blk.15.attn_k_b.weight iq4_nl blk.15.attn_output.weight iq2_xxs +blk.16.attn_k_b.weight iq4_nl blk.16.attn_output.weight iq2_xxs +blk.17.attn_k_b.weight iq4_nl blk.17.attn_output.weight iq2_xxs +blk.18.attn_k_b.weight iq4_nl blk.18.attn_output.weight iq2_xxs +blk.19.attn_k_b.weight iq4_nl blk.19.attn_output.weight iq2_xxs +blk.20.attn_k_b.weight iq4_nl blk.20.attn_output.weight iq2_xxs +blk.21.attn_k_b.weight iq4_nl blk.21.attn_output.weight iq2_xxs +blk.22.attn_k_b.weight iq4_nl blk.22.attn_output.weight iq2_xxs +blk.23.attn_k_b.weight iq4_nl blk.23.attn_output.weight iq2_xxs +blk.24.attn_k_b.weight iq4_nl blk.24.attn_output.weight iq2_xxs +blk.25.attn_k_b.weight iq4_nl blk.25.attn_output.weight iq2_xxs +blk.26.attn_k_b.weight iq4_nl blk.26.attn_output.weight iq2_xxs +blk.27.attn_k_b.weight iq4_nl blk.27.attn_output.weight iq2_xxs +blk.28.attn_k_b.weight iq4_nl blk.28.attn_output.weight iq2_xxs +blk.29.attn_k_b.weight iq4_nl blk.29.attn_output.weight iq2_xxs +blk.30.attn_k_b.weight iq4_nl blk.30.attn_output.weight iq2_xxs +blk.31.attn_k_b.weight iq4_nl blk.31.attn_output.weight iq2_xxs +blk.32.attn_k_b.weight iq4_nl blk.32.attn_output.weight iq2_xxs +blk.33.attn_k_b.weight iq4_nl blk.33.attn_output.weight iq2_xxs +blk.34.attn_k_b.weight iq4_nl blk.34.attn_output.weight iq2_xxs +blk.35.attn_k_b.weight iq4_nl blk.35.attn_output.weight iq2_xxs +blk.36.attn_k_b.weight iq4_nl blk.36.attn_output.weight iq2_xxs +blk.37.attn_k_b.weight iq4_nl blk.37.attn_output.weight iq2_xxs +blk.38.attn_k_b.weight iq4_nl blk.38.attn_output.weight iq2_xxs +blk.39.attn_k_b.weight iq4_nl blk.39.attn_output.weight iq2_xxs +blk.40.attn_k_b.weight iq4_nl blk.40.attn_output.weight iq2_xxs +blk.41.attn_k_b.weight iq4_nl blk.41.attn_output.weight iq2_xxs +blk.42.attn_k_b.weight iq4_nl blk.42.attn_output.weight iq2_xxs +blk.43.attn_k_b.weight iq4_nl blk.43.attn_output.weight iq2_xxs +blk.44.attn_k_b.weight iq4_nl blk.44.attn_output.weight iq2_xxs +blk.45.attn_k_b.weight iq4_nl blk.45.attn_output.weight iq2_xxs +blk.46.attn_k_b.weight iq4_nl blk.46.attn_output.weight iq2_xxs +blk.47.attn_k_b.weight iq4_nl blk.47.attn_output.weight iq2_xxs +blk.48.attn_k_b.weight iq4_nl blk.48.attn_output.weight iq2_xxs +blk.49.attn_k_b.weight iq4_nl blk.49.attn_output.weight iq2_xxs +blk.50.attn_k_b.weight iq4_nl blk.50.attn_output.weight iq2_xxs +blk.51.attn_k_b.weight iq4_nl blk.51.attn_output.weight iq2_xxs +blk.52.attn_k_b.weight iq4_nl blk.52.attn_output.weight iq2_xxs +blk.53.attn_k_b.weight iq4_nl blk.53.attn_output.weight iq2_xxs +blk.54.attn_k_b.weight iq4_nl blk.54.attn_output.weight iq2_xxs +blk.55.attn_k_b.weight iq4_nl blk.55.attn_output.weight iq2_xxs +blk.56.attn_k_b.weight iq4_nl blk.56.attn_output.weight iq2_xxs +blk.57.attn_k_b.weight iq4_nl blk.57.attn_output.weight iq2_xxs +blk.58.attn_k_b.weight iq4_nl blk.58.attn_output.weight iq2_xxs +blk.59.attn_k_b.weight iq4_nl blk.59.attn_output.weight iq2_xxs +blk.60.attn_k_b.weight iq4_nl blk.60.attn_output.weight iq2_xxs [BF16] bf16 -output.weight q6_K [TQ1_0] tq1_0 output.weight q6_K token_embd.weight q4_K +blk.0.attn_k_b.weight q4_0 +blk.1.attn_k_b.weight q4_0 +blk.2.attn_k_b.weight q4_0 +blk.3.attn_k_b.weight q4_0 +blk.4.attn_k_b.weight q4_0 +blk.5.attn_k_b.weight q4_0 +blk.6.attn_k_b.weight q4_0 +blk.7.attn_k_b.weight q4_0 +blk.8.attn_k_b.weight q4_0 +blk.9.attn_k_b.weight q4_0 +blk.10.attn_k_b.weight q4_0 +blk.11.attn_k_b.weight q4_0 +blk.12.attn_k_b.weight q4_0 +blk.13.attn_k_b.weight q4_0 +blk.14.attn_k_b.weight q4_0 +blk.15.attn_k_b.weight q4_0 +blk.16.attn_k_b.weight q4_0 +blk.17.attn_k_b.weight q4_0 +blk.18.attn_k_b.weight q4_0 +blk.19.attn_k_b.weight q4_0 +blk.20.attn_k_b.weight q4_0 +blk.21.attn_k_b.weight q4_0 +blk.22.attn_k_b.weight q4_0 +blk.23.attn_k_b.weight q4_0 +blk.24.attn_k_b.weight q4_0 +blk.25.attn_k_b.weight q4_0 +blk.26.attn_k_b.weight q4_0 +blk.27.attn_k_b.weight q4_0 +blk.28.attn_k_b.weight q4_0 +blk.29.attn_k_b.weight q4_0 +blk.30.attn_k_b.weight q4_0 +blk.31.attn_k_b.weight q4_0 +blk.32.attn_k_b.weight q4_0 +blk.33.attn_k_b.weight q4_0 +blk.34.attn_k_b.weight q4_0 +blk.35.attn_k_b.weight q4_0 +blk.36.attn_k_b.weight q4_0 +blk.37.attn_k_b.weight q4_0 +blk.38.attn_k_b.weight q4_0 +blk.39.attn_k_b.weight q4_0 +blk.40.attn_k_b.weight q4_0 +blk.41.attn_k_b.weight q4_0 +blk.42.attn_k_b.weight q4_0 +blk.43.attn_k_b.weight q4_0 +blk.44.attn_k_b.weight q4_0 +blk.45.attn_k_b.weight q4_0 +blk.46.attn_k_b.weight q4_0 +blk.47.attn_k_b.weight q4_0 +blk.48.attn_k_b.weight q4_0 +blk.49.attn_k_b.weight q4_0 +blk.50.attn_k_b.weight q4_0 +blk.51.attn_k_b.weight q4_0 +blk.52.attn_k_b.weight q4_0 +blk.53.attn_k_b.weight q4_0 +blk.54.attn_k_b.weight q4_0 +blk.55.attn_k_b.weight q4_0 +blk.56.attn_k_b.weight q4_0 +blk.57.attn_k_b.weight q4_0 +blk.58.attn_k_b.weight q4_0 +blk.59.attn_k_b.weight q4_0 +blk.60.attn_k_b.weight q4_0 [TQ2_0] tq2_0 output.weight q6_K token_embd.weight q4_K +blk.0.attn_k_b.weight q4_0 +blk.1.attn_k_b.weight q4_0 +blk.2.attn_k_b.weight q4_0 +blk.3.attn_k_b.weight q4_0 +blk.4.attn_k_b.weight q4_0 +blk.5.attn_k_b.weight q4_0 +blk.6.attn_k_b.weight q4_0 +blk.7.attn_k_b.weight q4_0 +blk.8.attn_k_b.weight q4_0 +blk.9.attn_k_b.weight q4_0 +blk.10.attn_k_b.weight q4_0 +blk.11.attn_k_b.weight q4_0 +blk.12.attn_k_b.weight q4_0 +blk.13.attn_k_b.weight q4_0 +blk.14.attn_k_b.weight q4_0 +blk.15.attn_k_b.weight q4_0 +blk.16.attn_k_b.weight q4_0 +blk.17.attn_k_b.weight q4_0 +blk.18.attn_k_b.weight q4_0 +blk.19.attn_k_b.weight q4_0 +blk.20.attn_k_b.weight q4_0 +blk.21.attn_k_b.weight q4_0 +blk.22.attn_k_b.weight q4_0 +blk.23.attn_k_b.weight q4_0 +blk.24.attn_k_b.weight q4_0 +blk.25.attn_k_b.weight q4_0 +blk.26.attn_k_b.weight q4_0 +blk.27.attn_k_b.weight q4_0 +blk.28.attn_k_b.weight q4_0 +blk.29.attn_k_b.weight q4_0 +blk.30.attn_k_b.weight q4_0 +blk.31.attn_k_b.weight q4_0 +blk.32.attn_k_b.weight q4_0 +blk.33.attn_k_b.weight q4_0 +blk.34.attn_k_b.weight q4_0 +blk.35.attn_k_b.weight q4_0 +blk.36.attn_k_b.weight q4_0 +blk.37.attn_k_b.weight q4_0 +blk.38.attn_k_b.weight q4_0 +blk.39.attn_k_b.weight q4_0 +blk.40.attn_k_b.weight q4_0 +blk.41.attn_k_b.weight q4_0 +blk.42.attn_k_b.weight q4_0 +blk.43.attn_k_b.weight q4_0 +blk.44.attn_k_b.weight q4_0 +blk.45.attn_k_b.weight q4_0 +blk.46.attn_k_b.weight q4_0 +blk.47.attn_k_b.weight q4_0 +blk.48.attn_k_b.weight q4_0 +blk.49.attn_k_b.weight q4_0 +blk.50.attn_k_b.weight q4_0 +blk.51.attn_k_b.weight q4_0 +blk.52.attn_k_b.weight q4_0 +blk.53.attn_k_b.weight q4_0 +blk.54.attn_k_b.weight q4_0 +blk.55.attn_k_b.weight q4_0 +blk.56.attn_k_b.weight q4_0 +blk.57.attn_k_b.weight q4_0 +blk.58.attn_k_b.weight q4_0 +blk.59.attn_k_b.weight q4_0 +blk.60.attn_k_b.weight q4_0 [MXFP4_MOE] mxfp4 output.weight q8_0 diff --git a/tests/snapshots/gemma-3-4b-it.schema b/tests/snapshots/gemma-3-4b-it.schema index 1bec284c87..cf5032a24d 100644 --- a/tests/snapshots/gemma-3-4b-it.schema +++ b/tests/snapshots/gemma-3-4b-it.schema @@ -2,10 +2,8 @@ # n_embd=2560, n_ff=10240, n_vocab=262144, n_layer=34, n_head=8, n_head_kv=4 [F32] f32 -token_embd.weight q6_K [F16] f16 -token_embd.weight q6_K [Q4_0] q4_0 token_embd.weight q6_K @@ -1205,7 +1203,6 @@ blk.33.attn_output.weight iq2_xxs blk.33.attn_v.weight q2_K [BF16] bf16 -token_embd.weight q6_K [TQ1_0] tq1_0 token_embd.weight q6_K diff --git a/tests/snapshots/glm-4.6v.schema b/tests/snapshots/glm-4.6v.schema index 841667a1b3..560745c0fd 100644 --- a/tests/snapshots/glm-4.6v.schema +++ b/tests/snapshots/glm-4.6v.schema @@ -2,10 +2,8 @@ # n_embd=4096, n_ff=10944, n_vocab=151552, n_layer=46, n_head=96, n_head_kv=8, n_expert=128 [F32] f32 -output.weight q6_K [F16] f16 -output.weight q6_K [Q4_0] q4_0 output.weight q6_K @@ -22,1446 +20,2242 @@ output.weight q6_K output.weight q6_K [Q2_K] q2_K -blk.0.ffn_down.weight q3_K +blk.0.ffn_down.weight q4_0 blk.0.attn_output.weight q3_K blk.0.attn_v.weight q4_K -blk.1.ffn_down_exps.weight q3_K -blk.1.ffn_down_shexp.weight q3_K +blk.1.ffn_down_exps.weight q4_0 +blk.1.ffn_down_shexp.weight q4_0 blk.1.attn_output.weight q3_K blk.1.attn_v.weight q4_K -blk.2.ffn_down_exps.weight q3_K -blk.2.ffn_down_shexp.weight q3_K +blk.2.ffn_down_exps.weight q4_0 +blk.2.ffn_down_shexp.weight q4_0 blk.2.attn_output.weight q3_K blk.2.attn_v.weight q4_K -blk.3.ffn_down_exps.weight q3_K -blk.3.ffn_down_shexp.weight q3_K +blk.3.ffn_down_exps.weight q4_0 +blk.3.ffn_down_shexp.weight q4_0 blk.3.attn_output.weight q3_K blk.3.attn_v.weight q4_K -blk.4.ffn_down_exps.weight q3_K -blk.4.ffn_down_shexp.weight q3_K +blk.4.ffn_down_exps.weight q4_0 +blk.4.ffn_down_shexp.weight q4_0 blk.4.attn_output.weight q3_K blk.4.attn_v.weight q4_K -blk.5.ffn_down_exps.weight q3_K -blk.5.ffn_down_shexp.weight q3_K +blk.5.ffn_down_exps.weight q4_0 +blk.5.ffn_down_shexp.weight q4_0 blk.5.attn_output.weight q3_K blk.5.attn_v.weight q4_K -blk.6.ffn_down_exps.weight q3_K -blk.6.ffn_down_shexp.weight q3_K +blk.6.ffn_down_exps.weight q4_0 +blk.6.ffn_down_shexp.weight q4_0 blk.6.attn_output.weight q3_K blk.6.attn_v.weight q4_K -blk.7.ffn_down_exps.weight q3_K -blk.7.ffn_down_shexp.weight q3_K +blk.7.ffn_down_exps.weight q4_0 +blk.7.ffn_down_shexp.weight q4_0 blk.7.attn_output.weight q3_K blk.7.attn_v.weight q4_K -blk.8.ffn_down_exps.weight q3_K -blk.8.ffn_down_shexp.weight q3_K +blk.8.ffn_down_exps.weight q4_0 +blk.8.ffn_down_shexp.weight q4_0 blk.8.attn_output.weight q3_K blk.8.attn_v.weight q4_K -blk.9.ffn_down_exps.weight q3_K -blk.9.ffn_down_shexp.weight q3_K +blk.9.ffn_down_exps.weight q4_0 +blk.9.ffn_down_shexp.weight q4_0 blk.9.attn_output.weight q3_K blk.9.attn_v.weight q4_K -blk.10.ffn_down_exps.weight q3_K -blk.10.ffn_down_shexp.weight q3_K +blk.10.ffn_down_exps.weight q4_0 +blk.10.ffn_down_shexp.weight q4_0 blk.10.attn_output.weight q3_K blk.10.attn_v.weight q4_K -blk.11.ffn_down_exps.weight q3_K -blk.11.ffn_down_shexp.weight q3_K +blk.11.ffn_down_exps.weight q4_0 +blk.11.ffn_down_shexp.weight q4_0 blk.11.attn_output.weight q3_K blk.11.attn_v.weight q4_K -blk.12.ffn_down_exps.weight q3_K -blk.12.ffn_down_shexp.weight q3_K +blk.12.ffn_down_exps.weight q4_0 +blk.12.ffn_down_shexp.weight q4_0 blk.12.attn_output.weight q3_K blk.12.attn_v.weight q4_K -blk.13.ffn_down_exps.weight q3_K -blk.13.ffn_down_shexp.weight q3_K +blk.13.ffn_down_exps.weight q4_0 +blk.13.ffn_down_shexp.weight q4_0 blk.13.attn_output.weight q3_K blk.13.attn_v.weight q4_K -blk.14.ffn_down_exps.weight q3_K -blk.14.ffn_down_shexp.weight q3_K +blk.14.ffn_down_exps.weight q4_0 +blk.14.ffn_down_shexp.weight q4_0 blk.14.attn_output.weight q3_K blk.14.attn_v.weight q4_K -blk.15.ffn_down_exps.weight q3_K -blk.15.ffn_down_shexp.weight q3_K +blk.15.ffn_down_exps.weight q4_0 +blk.15.ffn_down_shexp.weight q4_0 blk.15.attn_output.weight q3_K blk.15.attn_v.weight q4_K -blk.16.ffn_down_exps.weight q3_K -blk.16.ffn_down_shexp.weight q3_K +blk.16.ffn_down_exps.weight q4_0 +blk.16.ffn_down_shexp.weight q4_0 blk.16.attn_output.weight q3_K blk.16.attn_v.weight q4_K -blk.17.ffn_down_exps.weight q3_K -blk.17.ffn_down_shexp.weight q3_K +blk.17.ffn_down_exps.weight q4_0 +blk.17.ffn_down_shexp.weight q4_0 blk.17.attn_output.weight q3_K blk.17.attn_v.weight q4_K -blk.18.ffn_down_exps.weight q3_K -blk.18.ffn_down_shexp.weight q3_K +blk.18.ffn_down_exps.weight q4_0 +blk.18.ffn_down_shexp.weight q4_0 blk.18.attn_output.weight q3_K blk.18.attn_v.weight q4_K -blk.19.ffn_down_exps.weight q3_K -blk.19.ffn_down_shexp.weight q3_K +blk.19.ffn_down_exps.weight q4_0 +blk.19.ffn_down_shexp.weight q4_0 blk.19.attn_output.weight q3_K blk.19.attn_v.weight q4_K -blk.20.ffn_down_exps.weight q3_K -blk.20.ffn_down_shexp.weight q3_K +blk.20.ffn_down_exps.weight q4_0 +blk.20.ffn_down_shexp.weight q4_0 blk.20.attn_output.weight q3_K blk.20.attn_v.weight q4_K -blk.21.ffn_down_exps.weight q3_K -blk.21.ffn_down_shexp.weight q3_K +blk.21.ffn_down_exps.weight q4_0 +blk.21.ffn_down_shexp.weight q4_0 blk.21.attn_output.weight q3_K blk.21.attn_v.weight q4_K -blk.22.ffn_down_exps.weight q3_K -blk.22.ffn_down_shexp.weight q3_K +blk.22.ffn_down_exps.weight q4_0 +blk.22.ffn_down_shexp.weight q4_0 blk.22.attn_output.weight q3_K blk.22.attn_v.weight q4_K -blk.23.ffn_down_exps.weight q3_K -blk.23.ffn_down_shexp.weight q3_K +blk.23.ffn_down_exps.weight q4_0 +blk.23.ffn_down_shexp.weight q4_0 blk.23.attn_output.weight q3_K blk.23.attn_v.weight q4_K -blk.24.ffn_down_exps.weight q3_K -blk.24.ffn_down_shexp.weight q3_K +blk.24.ffn_down_exps.weight q4_0 +blk.24.ffn_down_shexp.weight q4_0 blk.24.attn_output.weight q3_K blk.24.attn_v.weight q4_K -blk.25.ffn_down_exps.weight q3_K -blk.25.ffn_down_shexp.weight q3_K +blk.25.ffn_down_exps.weight q4_0 +blk.25.ffn_down_shexp.weight q4_0 blk.25.attn_output.weight q3_K blk.25.attn_v.weight q4_K -blk.26.ffn_down_exps.weight q3_K -blk.26.ffn_down_shexp.weight q3_K +blk.26.ffn_down_exps.weight q4_0 +blk.26.ffn_down_shexp.weight q4_0 blk.26.attn_output.weight q3_K blk.26.attn_v.weight q4_K -blk.27.ffn_down_exps.weight q3_K -blk.27.ffn_down_shexp.weight q3_K +blk.27.ffn_down_exps.weight q4_0 +blk.27.ffn_down_shexp.weight q4_0 blk.27.attn_output.weight q3_K blk.27.attn_v.weight q4_K -blk.28.ffn_down_exps.weight q3_K -blk.28.ffn_down_shexp.weight q3_K +blk.28.ffn_down_exps.weight q4_0 +blk.28.ffn_down_shexp.weight q4_0 blk.28.attn_output.weight q3_K blk.28.attn_v.weight q4_K -blk.29.ffn_down_exps.weight q3_K -blk.29.ffn_down_shexp.weight q3_K +blk.29.ffn_down_exps.weight q4_0 +blk.29.ffn_down_shexp.weight q4_0 blk.29.attn_output.weight q3_K blk.29.attn_v.weight q4_K -blk.30.ffn_down_exps.weight q3_K -blk.30.ffn_down_shexp.weight q3_K +blk.30.ffn_down_exps.weight q4_0 +blk.30.ffn_down_shexp.weight q4_0 blk.30.attn_output.weight q3_K blk.30.attn_v.weight q4_K -blk.31.ffn_down_exps.weight q3_K -blk.31.ffn_down_shexp.weight q3_K +blk.31.ffn_down_exps.weight q4_0 +blk.31.ffn_down_shexp.weight q4_0 blk.31.attn_output.weight q3_K blk.31.attn_v.weight q4_K -blk.32.ffn_down_exps.weight q3_K -blk.32.ffn_down_shexp.weight q3_K +blk.32.ffn_down_exps.weight q4_0 +blk.32.ffn_down_shexp.weight q4_0 blk.32.attn_output.weight q3_K blk.32.attn_v.weight q4_K -blk.33.ffn_down_exps.weight q3_K -blk.33.ffn_down_shexp.weight q3_K +blk.33.ffn_down_exps.weight q4_0 +blk.33.ffn_down_shexp.weight q4_0 blk.33.attn_output.weight q3_K blk.33.attn_v.weight q4_K -blk.34.ffn_down_exps.weight q3_K +blk.34.ffn_down_exps.weight q4_0 blk.34.attn_output.weight q3_K -blk.34.ffn_down_shexp.weight q3_K +blk.34.ffn_down_shexp.weight q4_0 blk.34.attn_v.weight q4_K -blk.35.ffn_down_exps.weight q3_K -blk.35.ffn_down_shexp.weight q3_K +blk.35.ffn_down_exps.weight q4_0 +blk.35.ffn_down_shexp.weight q4_0 blk.35.attn_output.weight q3_K blk.35.attn_v.weight q4_K -blk.36.ffn_down_exps.weight q3_K -blk.36.ffn_down_shexp.weight q3_K +blk.36.ffn_down_exps.weight q4_0 +blk.36.ffn_down_shexp.weight q4_0 blk.36.attn_output.weight q3_K blk.36.attn_v.weight q4_K -blk.37.ffn_down_exps.weight q3_K -blk.37.ffn_down_shexp.weight q3_K +blk.37.ffn_down_exps.weight q4_0 +blk.37.ffn_down_shexp.weight q4_0 blk.37.attn_output.weight q3_K blk.37.attn_v.weight q4_K -blk.38.ffn_down_exps.weight q3_K -blk.38.ffn_down_shexp.weight q3_K +blk.38.ffn_down_exps.weight q4_0 +blk.38.ffn_down_shexp.weight q4_0 blk.38.attn_output.weight q3_K blk.38.attn_v.weight q4_K -blk.39.ffn_down_exps.weight q3_K -blk.39.ffn_down_shexp.weight q3_K +blk.39.ffn_down_exps.weight q4_0 +blk.39.ffn_down_shexp.weight q4_0 blk.39.attn_output.weight q3_K blk.39.attn_v.weight q4_K -blk.40.ffn_down_exps.weight q3_K -blk.40.ffn_down_shexp.weight q3_K +blk.40.ffn_down_exps.weight q4_0 +blk.40.ffn_down_shexp.weight q4_0 blk.40.attn_output.weight q3_K blk.40.attn_v.weight q4_K -blk.41.ffn_down_exps.weight q3_K -blk.41.ffn_down_shexp.weight q3_K +blk.41.ffn_down_exps.weight q4_0 +blk.41.ffn_down_shexp.weight q4_0 blk.41.attn_output.weight q3_K blk.41.attn_v.weight q4_K -blk.42.ffn_down_exps.weight q3_K -blk.42.ffn_down_shexp.weight q3_K +blk.42.ffn_down_exps.weight q4_0 +blk.42.ffn_down_shexp.weight q4_0 blk.42.attn_v.weight q4_K blk.42.attn_output.weight q3_K -blk.43.ffn_down_exps.weight q3_K -blk.43.ffn_down_shexp.weight q3_K +blk.43.ffn_down_exps.weight q4_0 +blk.43.ffn_down_shexp.weight q4_0 blk.43.attn_output.weight q3_K blk.43.attn_v.weight q4_K -blk.44.ffn_down_exps.weight q3_K -blk.44.ffn_down_shexp.weight q3_K +blk.44.ffn_down_exps.weight q4_0 +blk.44.ffn_down_shexp.weight q4_0 blk.44.attn_output.weight q3_K blk.44.attn_v.weight q4_K output.weight q6_K -blk.45.ffn_down_exps.weight q3_K -blk.45.ffn_down_shexp.weight q3_K +blk.45.ffn_down_exps.weight q4_0 +blk.45.ffn_down_shexp.weight q4_0 blk.45.attn_output.weight q3_K blk.45.attn_v.weight q4_K [Q3_K_S] q3_K +blk.0.ffn_down.weight q4_0 +blk.1.ffn_down_exps.weight q4_0 +blk.1.ffn_down_shexp.weight q4_0 +blk.2.ffn_down_exps.weight q4_0 +blk.2.ffn_down_shexp.weight q4_0 +blk.3.ffn_down_exps.weight q4_0 +blk.3.ffn_down_shexp.weight q4_0 +blk.4.ffn_down_exps.weight q4_0 +blk.4.ffn_down_shexp.weight q4_0 +blk.5.ffn_down_exps.weight q4_0 +blk.5.ffn_down_shexp.weight q4_0 +blk.6.ffn_down_exps.weight q4_0 +blk.6.ffn_down_shexp.weight q4_0 +blk.7.ffn_down_exps.weight q4_0 +blk.7.ffn_down_shexp.weight q4_0 +blk.8.ffn_down_exps.weight q4_0 +blk.8.ffn_down_shexp.weight q4_0 +blk.9.ffn_down_exps.weight q4_0 +blk.9.ffn_down_shexp.weight q4_0 +blk.10.ffn_down_exps.weight q4_0 +blk.10.ffn_down_shexp.weight q4_0 +blk.11.ffn_down_exps.weight q4_0 +blk.11.ffn_down_shexp.weight q4_0 +blk.12.ffn_down_exps.weight q4_0 +blk.12.ffn_down_shexp.weight q4_0 +blk.13.ffn_down_exps.weight q4_0 +blk.13.ffn_down_shexp.weight q4_0 +blk.14.ffn_down_exps.weight q4_0 +blk.14.ffn_down_shexp.weight q4_0 +blk.15.ffn_down_exps.weight q4_0 +blk.15.ffn_down_shexp.weight q4_0 +blk.16.ffn_down_exps.weight q4_0 +blk.16.ffn_down_shexp.weight q4_0 +blk.17.ffn_down_exps.weight q4_0 +blk.17.ffn_down_shexp.weight q4_0 +blk.18.ffn_down_exps.weight q4_0 +blk.18.ffn_down_shexp.weight q4_0 +blk.19.ffn_down_exps.weight q4_0 +blk.19.ffn_down_shexp.weight q4_0 +blk.20.ffn_down_exps.weight q4_0 +blk.20.ffn_down_shexp.weight q4_0 +blk.21.ffn_down_exps.weight q4_0 +blk.21.ffn_down_shexp.weight q4_0 +blk.22.ffn_down_exps.weight q4_0 +blk.22.ffn_down_shexp.weight q4_0 +blk.23.ffn_down_exps.weight q4_0 +blk.23.ffn_down_shexp.weight q4_0 +blk.24.ffn_down_exps.weight q4_0 +blk.24.ffn_down_shexp.weight q4_0 +blk.25.ffn_down_exps.weight q4_0 +blk.25.ffn_down_shexp.weight q4_0 +blk.26.ffn_down_exps.weight q4_0 +blk.26.ffn_down_shexp.weight q4_0 +blk.27.ffn_down_exps.weight q4_0 +blk.27.ffn_down_shexp.weight q4_0 +blk.28.ffn_down_exps.weight q4_0 +blk.28.ffn_down_shexp.weight q4_0 +blk.29.ffn_down_exps.weight q4_0 +blk.29.ffn_down_shexp.weight q4_0 +blk.30.ffn_down_exps.weight q4_0 +blk.30.ffn_down_shexp.weight q4_0 +blk.31.ffn_down_exps.weight q4_0 +blk.31.ffn_down_shexp.weight q4_0 +blk.32.ffn_down_exps.weight q4_0 +blk.32.ffn_down_shexp.weight q4_0 +blk.33.ffn_down_exps.weight q4_0 +blk.33.ffn_down_shexp.weight q4_0 +blk.34.ffn_down_exps.weight q4_0 +blk.34.ffn_down_shexp.weight q4_0 +blk.35.ffn_down_exps.weight q4_0 +blk.35.ffn_down_shexp.weight q4_0 +blk.36.ffn_down_exps.weight q4_0 +blk.36.ffn_down_shexp.weight q4_0 +blk.37.ffn_down_exps.weight q4_0 +blk.37.ffn_down_shexp.weight q4_0 +blk.38.ffn_down_exps.weight q4_0 +blk.38.ffn_down_shexp.weight q4_0 +blk.39.ffn_down_exps.weight q4_0 +blk.39.ffn_down_shexp.weight q4_0 +blk.40.ffn_down_exps.weight q4_0 +blk.40.ffn_down_shexp.weight q4_0 +blk.41.ffn_down_exps.weight q4_0 +blk.41.ffn_down_shexp.weight q4_0 +blk.42.ffn_down_exps.weight q4_0 +blk.42.ffn_down_shexp.weight q4_0 +blk.43.ffn_down_exps.weight q4_0 +blk.43.ffn_down_shexp.weight q4_0 +blk.44.ffn_down_exps.weight q4_0 +blk.44.ffn_down_shexp.weight q4_0 output.weight q6_K +blk.45.ffn_down_exps.weight q4_0 +blk.45.ffn_down_shexp.weight q4_0 [Q3_K_M] q3_K -blk.0.ffn_down.weight q5_K +blk.0.ffn_down.weight q5_1 blk.0.attn_output.weight q4_K blk.0.attn_v.weight q5_K -blk.1.ffn_down_exps.weight q5_K -blk.1.ffn_down_shexp.weight q5_K +blk.1.ffn_down_exps.weight q5_1 +blk.1.ffn_down_shexp.weight q5_1 blk.1.attn_output.weight q4_K blk.1.attn_v.weight q5_K -blk.2.ffn_down_exps.weight q4_K -blk.2.ffn_down_shexp.weight q4_K +blk.2.ffn_down_exps.weight q5_0 +blk.2.ffn_down_shexp.weight q5_0 blk.2.attn_output.weight q4_K blk.2.attn_v.weight q4_K -blk.3.ffn_down_exps.weight q4_K -blk.3.ffn_down_shexp.weight q4_K +blk.3.ffn_down_exps.weight q5_0 +blk.3.ffn_down_shexp.weight q5_0 blk.3.attn_output.weight q4_K blk.3.attn_v.weight q4_K -blk.4.ffn_down_exps.weight q4_K -blk.4.ffn_down_shexp.weight q4_K +blk.4.ffn_down_exps.weight q5_0 +blk.4.ffn_down_shexp.weight q5_0 blk.4.attn_output.weight q4_K blk.4.attn_v.weight q4_K -blk.5.ffn_down_exps.weight q4_K -blk.5.ffn_down_shexp.weight q4_K +blk.5.ffn_down_exps.weight q5_0 +blk.5.ffn_down_shexp.weight q5_0 blk.5.attn_output.weight q4_K blk.5.attn_v.weight q4_K -blk.6.ffn_down_exps.weight q4_K -blk.6.ffn_down_shexp.weight q4_K +blk.6.ffn_down_exps.weight q5_0 +blk.6.ffn_down_shexp.weight q5_0 blk.6.attn_output.weight q4_K blk.6.attn_v.weight q4_K -blk.7.ffn_down_exps.weight q4_K -blk.7.ffn_down_shexp.weight q4_K +blk.7.ffn_down_exps.weight q5_0 +blk.7.ffn_down_shexp.weight q5_0 blk.7.attn_output.weight q4_K blk.7.attn_v.weight q4_K -blk.8.ffn_down_exps.weight q4_K -blk.8.ffn_down_shexp.weight q4_K +blk.8.ffn_down_exps.weight q5_0 +blk.8.ffn_down_shexp.weight q5_0 blk.8.attn_output.weight q4_K blk.8.attn_v.weight q4_K -blk.9.ffn_down_exps.weight q4_K -blk.9.ffn_down_shexp.weight q4_K +blk.9.ffn_down_exps.weight q5_0 +blk.9.ffn_down_shexp.weight q5_0 blk.9.attn_output.weight q4_K blk.9.attn_v.weight q4_K -blk.10.ffn_down_exps.weight q4_K -blk.10.ffn_down_shexp.weight q4_K +blk.10.ffn_down_exps.weight q5_0 +blk.10.ffn_down_shexp.weight q5_0 blk.10.attn_output.weight q4_K blk.10.attn_v.weight q4_K -blk.11.ffn_down_exps.weight q4_K -blk.11.ffn_down_shexp.weight q4_K +blk.11.ffn_down_exps.weight q5_0 +blk.11.ffn_down_shexp.weight q5_0 blk.11.attn_output.weight q4_K blk.11.attn_v.weight q4_K -blk.12.ffn_down_exps.weight q4_K -blk.12.ffn_down_shexp.weight q4_K +blk.12.ffn_down_exps.weight q5_0 +blk.12.ffn_down_shexp.weight q5_0 blk.12.attn_output.weight q4_K blk.12.attn_v.weight q4_K -blk.13.ffn_down_exps.weight q4_K -blk.13.ffn_down_shexp.weight q4_K +blk.13.ffn_down_exps.weight q5_0 +blk.13.ffn_down_shexp.weight q5_0 blk.13.attn_output.weight q4_K blk.13.attn_v.weight q4_K -blk.14.ffn_down_exps.weight q4_K -blk.14.ffn_down_shexp.weight q4_K +blk.14.ffn_down_exps.weight q5_0 +blk.14.ffn_down_shexp.weight q5_0 blk.14.attn_output.weight q4_K blk.14.attn_v.weight q4_K -blk.15.ffn_down_exps.weight q4_K -blk.15.ffn_down_shexp.weight q4_K +blk.15.ffn_down_exps.weight q5_0 +blk.15.ffn_down_shexp.weight q5_0 blk.15.attn_output.weight q4_K blk.15.attn_v.weight q4_K -blk.16.ffn_down_exps.weight q4_K -blk.16.ffn_down_shexp.weight q4_K +blk.16.ffn_down_exps.weight q5_0 +blk.16.ffn_down_shexp.weight q5_0 blk.16.attn_output.weight q4_K blk.16.attn_v.weight q4_K -blk.17.ffn_down_exps.weight q4_K -blk.17.ffn_down_shexp.weight q4_K +blk.17.ffn_down_exps.weight q5_0 +blk.17.ffn_down_shexp.weight q5_0 blk.17.attn_output.weight q4_K blk.17.attn_v.weight q4_K -blk.18.ffn_down_exps.weight q4_K -blk.18.ffn_down_shexp.weight q4_K +blk.18.ffn_down_exps.weight q5_0 +blk.18.ffn_down_shexp.weight q5_0 blk.18.attn_output.weight q4_K blk.18.attn_v.weight q4_K -blk.19.ffn_down_exps.weight q4_K -blk.19.ffn_down_shexp.weight q4_K +blk.19.ffn_down_exps.weight q5_0 +blk.19.ffn_down_shexp.weight q5_0 blk.19.attn_output.weight q4_K blk.19.attn_v.weight q4_K -blk.20.ffn_down_exps.weight q4_K -blk.20.ffn_down_shexp.weight q4_K +blk.20.ffn_down_exps.weight q5_0 +blk.20.ffn_down_shexp.weight q5_0 blk.20.attn_output.weight q4_K blk.20.attn_v.weight q4_K -blk.21.ffn_down_exps.weight q4_K -blk.21.ffn_down_shexp.weight q4_K +blk.21.ffn_down_exps.weight q5_0 +blk.21.ffn_down_shexp.weight q5_0 blk.21.attn_output.weight q4_K blk.21.attn_v.weight q4_K -blk.22.ffn_down_exps.weight q4_K -blk.22.ffn_down_shexp.weight q4_K +blk.22.ffn_down_exps.weight q5_0 +blk.22.ffn_down_shexp.weight q5_0 blk.22.attn_output.weight q4_K blk.22.attn_v.weight q4_K -blk.23.ffn_down_exps.weight q4_K -blk.23.ffn_down_shexp.weight q4_K +blk.23.ffn_down_exps.weight q5_0 +blk.23.ffn_down_shexp.weight q5_0 blk.23.attn_output.weight q4_K blk.23.attn_v.weight q4_K -blk.24.ffn_down_exps.weight q4_K -blk.24.ffn_down_shexp.weight q4_K +blk.24.ffn_down_exps.weight q5_0 +blk.24.ffn_down_shexp.weight q5_0 blk.24.attn_output.weight q4_K blk.24.attn_v.weight q4_K -blk.25.ffn_down_exps.weight q4_K -blk.25.ffn_down_shexp.weight q4_K +blk.25.ffn_down_exps.weight q5_0 +blk.25.ffn_down_shexp.weight q5_0 blk.25.attn_output.weight q4_K blk.25.attn_v.weight q4_K -blk.26.ffn_down_exps.weight q4_K -blk.26.ffn_down_shexp.weight q4_K +blk.26.ffn_down_exps.weight q5_0 +blk.26.ffn_down_shexp.weight q5_0 blk.26.attn_output.weight q4_K blk.26.attn_v.weight q4_K -blk.27.ffn_down_exps.weight q4_K -blk.27.ffn_down_shexp.weight q4_K +blk.27.ffn_down_exps.weight q5_0 +blk.27.ffn_down_shexp.weight q5_0 blk.27.attn_output.weight q4_K blk.27.attn_v.weight q4_K -blk.28.ffn_down_exps.weight q4_K -blk.28.ffn_down_shexp.weight q4_K +blk.28.ffn_down_exps.weight q5_0 +blk.28.ffn_down_shexp.weight q5_0 blk.28.attn_output.weight q4_K blk.28.attn_v.weight q4_K -blk.29.ffn_down_exps.weight q4_K -blk.29.ffn_down_shexp.weight q4_K +blk.29.ffn_down_exps.weight q5_0 +blk.29.ffn_down_shexp.weight q5_0 blk.29.attn_output.weight q4_K blk.29.attn_v.weight q4_K -blk.30.ffn_down_exps.weight q4_K -blk.30.ffn_down_shexp.weight q4_K +blk.30.ffn_down_exps.weight q5_0 +blk.30.ffn_down_shexp.weight q5_0 blk.30.attn_output.weight q4_K blk.30.attn_v.weight q4_K -blk.31.ffn_down_exps.weight q4_K -blk.31.ffn_down_shexp.weight q4_K +blk.31.ffn_down_exps.weight q5_0 +blk.31.ffn_down_shexp.weight q5_0 blk.31.attn_output.weight q4_K blk.31.attn_v.weight q4_K -blk.32.ffn_down_exps.weight q4_K -blk.32.ffn_down_shexp.weight q4_K +blk.32.ffn_down_exps.weight q5_0 +blk.32.ffn_down_shexp.weight q5_0 blk.32.attn_output.weight q4_K blk.32.attn_v.weight q4_K -blk.33.ffn_down_exps.weight q4_K -blk.33.ffn_down_shexp.weight q4_K +blk.33.ffn_down_exps.weight q5_0 +blk.33.ffn_down_shexp.weight q5_0 blk.33.attn_output.weight q4_K blk.33.attn_v.weight q4_K -blk.34.ffn_down_exps.weight q4_K +blk.34.ffn_down_exps.weight q5_0 blk.34.attn_output.weight q4_K -blk.34.ffn_down_shexp.weight q4_K +blk.34.ffn_down_shexp.weight q5_0 blk.34.attn_v.weight q4_K -blk.35.ffn_down_exps.weight q4_K -blk.35.ffn_down_shexp.weight q4_K +blk.35.ffn_down_exps.weight q5_0 +blk.35.ffn_down_shexp.weight q5_0 blk.35.attn_output.weight q4_K blk.35.attn_v.weight q4_K -blk.36.ffn_down_exps.weight q4_K -blk.36.ffn_down_shexp.weight q4_K +blk.36.ffn_down_exps.weight q5_0 +blk.36.ffn_down_shexp.weight q5_0 blk.36.attn_output.weight q4_K blk.36.attn_v.weight q4_K -blk.37.ffn_down_exps.weight q4_K -blk.37.ffn_down_shexp.weight q4_K +blk.37.ffn_down_exps.weight q5_0 +blk.37.ffn_down_shexp.weight q5_0 blk.37.attn_output.weight q4_K blk.37.attn_v.weight q4_K -blk.38.ffn_down_exps.weight q4_K -blk.38.ffn_down_shexp.weight q4_K +blk.38.ffn_down_exps.weight q5_0 +blk.38.ffn_down_shexp.weight q5_0 blk.38.attn_output.weight q4_K blk.38.attn_v.weight q4_K -blk.39.ffn_down_exps.weight q4_K -blk.39.ffn_down_shexp.weight q4_K +blk.39.ffn_down_exps.weight q5_0 +blk.39.ffn_down_shexp.weight q5_0 blk.39.attn_output.weight q4_K blk.39.attn_v.weight q4_K -blk.40.ffn_down_exps.weight q4_K -blk.40.ffn_down_shexp.weight q4_K +blk.40.ffn_down_exps.weight q5_0 +blk.40.ffn_down_shexp.weight q5_0 blk.40.attn_output.weight q4_K blk.40.attn_v.weight q4_K -blk.41.ffn_down_exps.weight q4_K -blk.41.ffn_down_shexp.weight q4_K +blk.41.ffn_down_exps.weight q5_0 +blk.41.ffn_down_shexp.weight q5_0 blk.41.attn_output.weight q4_K blk.41.attn_v.weight q4_K -blk.42.ffn_down_exps.weight q4_K -blk.42.ffn_down_shexp.weight q4_K +blk.42.ffn_down_exps.weight q5_0 +blk.42.ffn_down_shexp.weight q5_0 blk.42.attn_v.weight q4_K blk.42.attn_output.weight q4_K -blk.43.ffn_down_exps.weight q4_K -blk.43.ffn_down_shexp.weight q4_K +blk.43.ffn_down_exps.weight q5_0 +blk.43.ffn_down_shexp.weight q5_0 blk.43.attn_output.weight q4_K blk.43.attn_v.weight q4_K -blk.44.ffn_down_exps.weight q4_K -blk.44.ffn_down_shexp.weight q4_K +blk.44.ffn_down_exps.weight q5_0 +blk.44.ffn_down_shexp.weight q5_0 blk.44.attn_output.weight q4_K blk.44.attn_v.weight q4_K output.weight q6_K -blk.45.ffn_down_exps.weight q4_K -blk.45.ffn_down_shexp.weight q4_K +blk.45.ffn_down_exps.weight q5_0 +blk.45.ffn_down_shexp.weight q5_0 blk.45.attn_output.weight q4_K blk.45.attn_v.weight q4_K [Q3_K_L] q3_K -blk.0.ffn_down.weight q5_K +blk.0.ffn_down.weight q5_1 blk.0.attn_output.weight q5_K blk.0.attn_v.weight q5_K -blk.1.ffn_down_exps.weight q5_K -blk.1.ffn_down_shexp.weight q5_K +blk.1.ffn_down_exps.weight q5_1 +blk.1.ffn_down_shexp.weight q5_1 blk.1.attn_output.weight q5_K blk.1.attn_v.weight q5_K -blk.2.ffn_down_exps.weight q5_K -blk.2.ffn_down_shexp.weight q5_K +blk.2.ffn_down_exps.weight q5_1 +blk.2.ffn_down_shexp.weight q5_1 blk.2.attn_output.weight q5_K blk.2.attn_v.weight q5_K -blk.3.ffn_down_exps.weight q5_K -blk.3.ffn_down_shexp.weight q5_K +blk.3.ffn_down_exps.weight q5_1 +blk.3.ffn_down_shexp.weight q5_1 blk.3.attn_output.weight q5_K blk.3.attn_v.weight q5_K -blk.4.ffn_down_exps.weight q5_K -blk.4.ffn_down_shexp.weight q5_K +blk.4.ffn_down_exps.weight q5_1 +blk.4.ffn_down_shexp.weight q5_1 blk.4.attn_output.weight q5_K blk.4.attn_v.weight q5_K -blk.5.ffn_down_exps.weight q5_K -blk.5.ffn_down_shexp.weight q5_K +blk.5.ffn_down_exps.weight q5_1 +blk.5.ffn_down_shexp.weight q5_1 blk.5.attn_output.weight q5_K blk.5.attn_v.weight q5_K -blk.6.ffn_down_exps.weight q5_K -blk.6.ffn_down_shexp.weight q5_K +blk.6.ffn_down_exps.weight q5_1 +blk.6.ffn_down_shexp.weight q5_1 blk.6.attn_output.weight q5_K blk.6.attn_v.weight q5_K -blk.7.ffn_down_exps.weight q5_K -blk.7.ffn_down_shexp.weight q5_K +blk.7.ffn_down_exps.weight q5_1 +blk.7.ffn_down_shexp.weight q5_1 blk.7.attn_output.weight q5_K blk.7.attn_v.weight q5_K -blk.8.ffn_down_exps.weight q5_K -blk.8.ffn_down_shexp.weight q5_K +blk.8.ffn_down_exps.weight q5_1 +blk.8.ffn_down_shexp.weight q5_1 blk.8.attn_output.weight q5_K blk.8.attn_v.weight q5_K -blk.9.ffn_down_exps.weight q5_K -blk.9.ffn_down_shexp.weight q5_K +blk.9.ffn_down_exps.weight q5_1 +blk.9.ffn_down_shexp.weight q5_1 blk.9.attn_output.weight q5_K blk.9.attn_v.weight q5_K -blk.10.ffn_down_exps.weight q5_K -blk.10.ffn_down_shexp.weight q5_K +blk.10.ffn_down_exps.weight q5_1 +blk.10.ffn_down_shexp.weight q5_1 blk.10.attn_output.weight q5_K blk.10.attn_v.weight q5_K -blk.11.ffn_down_exps.weight q5_K -blk.11.ffn_down_shexp.weight q5_K +blk.11.ffn_down_exps.weight q5_1 +blk.11.ffn_down_shexp.weight q5_1 blk.11.attn_output.weight q5_K blk.11.attn_v.weight q5_K -blk.12.ffn_down_exps.weight q5_K -blk.12.ffn_down_shexp.weight q5_K +blk.12.ffn_down_exps.weight q5_1 +blk.12.ffn_down_shexp.weight q5_1 blk.12.attn_output.weight q5_K blk.12.attn_v.weight q5_K -blk.13.ffn_down_exps.weight q5_K -blk.13.ffn_down_shexp.weight q5_K +blk.13.ffn_down_exps.weight q5_1 +blk.13.ffn_down_shexp.weight q5_1 blk.13.attn_output.weight q5_K blk.13.attn_v.weight q5_K -blk.14.ffn_down_exps.weight q5_K -blk.14.ffn_down_shexp.weight q5_K +blk.14.ffn_down_exps.weight q5_1 +blk.14.ffn_down_shexp.weight q5_1 blk.14.attn_output.weight q5_K blk.14.attn_v.weight q5_K -blk.15.ffn_down_exps.weight q5_K -blk.15.ffn_down_shexp.weight q5_K +blk.15.ffn_down_exps.weight q5_1 +blk.15.ffn_down_shexp.weight q5_1 blk.15.attn_output.weight q5_K blk.15.attn_v.weight q5_K -blk.16.ffn_down_exps.weight q5_K -blk.16.ffn_down_shexp.weight q5_K +blk.16.ffn_down_exps.weight q5_1 +blk.16.ffn_down_shexp.weight q5_1 blk.16.attn_output.weight q5_K blk.16.attn_v.weight q5_K -blk.17.ffn_down_exps.weight q5_K -blk.17.ffn_down_shexp.weight q5_K +blk.17.ffn_down_exps.weight q5_1 +blk.17.ffn_down_shexp.weight q5_1 blk.17.attn_output.weight q5_K blk.17.attn_v.weight q5_K -blk.18.ffn_down_exps.weight q5_K -blk.18.ffn_down_shexp.weight q5_K +blk.18.ffn_down_exps.weight q5_1 +blk.18.ffn_down_shexp.weight q5_1 blk.18.attn_output.weight q5_K blk.18.attn_v.weight q5_K -blk.19.ffn_down_exps.weight q5_K -blk.19.ffn_down_shexp.weight q5_K +blk.19.ffn_down_exps.weight q5_1 +blk.19.ffn_down_shexp.weight q5_1 blk.19.attn_output.weight q5_K blk.19.attn_v.weight q5_K -blk.20.ffn_down_exps.weight q5_K -blk.20.ffn_down_shexp.weight q5_K +blk.20.ffn_down_exps.weight q5_1 +blk.20.ffn_down_shexp.weight q5_1 blk.20.attn_output.weight q5_K blk.20.attn_v.weight q5_K -blk.21.ffn_down_exps.weight q5_K -blk.21.ffn_down_shexp.weight q5_K +blk.21.ffn_down_exps.weight q5_1 +blk.21.ffn_down_shexp.weight q5_1 blk.21.attn_output.weight q5_K blk.21.attn_v.weight q5_K -blk.22.ffn_down_exps.weight q5_K -blk.22.ffn_down_shexp.weight q5_K +blk.22.ffn_down_exps.weight q5_1 +blk.22.ffn_down_shexp.weight q5_1 blk.22.attn_output.weight q5_K blk.22.attn_v.weight q5_K -blk.23.ffn_down_exps.weight q5_K -blk.23.ffn_down_shexp.weight q5_K +blk.23.ffn_down_exps.weight q5_1 +blk.23.ffn_down_shexp.weight q5_1 blk.23.attn_output.weight q5_K blk.23.attn_v.weight q5_K -blk.24.ffn_down_exps.weight q5_K -blk.24.ffn_down_shexp.weight q5_K +blk.24.ffn_down_exps.weight q5_1 +blk.24.ffn_down_shexp.weight q5_1 blk.24.attn_output.weight q5_K blk.24.attn_v.weight q5_K -blk.25.ffn_down_exps.weight q5_K -blk.25.ffn_down_shexp.weight q5_K +blk.25.ffn_down_exps.weight q5_1 +blk.25.ffn_down_shexp.weight q5_1 blk.25.attn_output.weight q5_K blk.25.attn_v.weight q5_K -blk.26.ffn_down_exps.weight q5_K -blk.26.ffn_down_shexp.weight q5_K +blk.26.ffn_down_exps.weight q5_1 +blk.26.ffn_down_shexp.weight q5_1 blk.26.attn_output.weight q5_K blk.26.attn_v.weight q5_K -blk.27.ffn_down_exps.weight q5_K -blk.27.ffn_down_shexp.weight q5_K +blk.27.ffn_down_exps.weight q5_1 +blk.27.ffn_down_shexp.weight q5_1 blk.27.attn_output.weight q5_K blk.27.attn_v.weight q5_K -blk.28.ffn_down_exps.weight q5_K -blk.28.ffn_down_shexp.weight q5_K +blk.28.ffn_down_exps.weight q5_1 +blk.28.ffn_down_shexp.weight q5_1 blk.28.attn_output.weight q5_K blk.28.attn_v.weight q5_K -blk.29.ffn_down_exps.weight q5_K -blk.29.ffn_down_shexp.weight q5_K +blk.29.ffn_down_exps.weight q5_1 +blk.29.ffn_down_shexp.weight q5_1 blk.29.attn_output.weight q5_K blk.29.attn_v.weight q5_K -blk.30.ffn_down_exps.weight q5_K -blk.30.ffn_down_shexp.weight q5_K +blk.30.ffn_down_exps.weight q5_1 +blk.30.ffn_down_shexp.weight q5_1 blk.30.attn_output.weight q5_K blk.30.attn_v.weight q5_K -blk.31.ffn_down_exps.weight q5_K -blk.31.ffn_down_shexp.weight q5_K +blk.31.ffn_down_exps.weight q5_1 +blk.31.ffn_down_shexp.weight q5_1 blk.31.attn_output.weight q5_K blk.31.attn_v.weight q5_K -blk.32.ffn_down_exps.weight q5_K -blk.32.ffn_down_shexp.weight q5_K +blk.32.ffn_down_exps.weight q5_1 +blk.32.ffn_down_shexp.weight q5_1 blk.32.attn_output.weight q5_K blk.32.attn_v.weight q5_K -blk.33.ffn_down_exps.weight q5_K -blk.33.ffn_down_shexp.weight q5_K +blk.33.ffn_down_exps.weight q5_1 +blk.33.ffn_down_shexp.weight q5_1 blk.33.attn_output.weight q5_K blk.33.attn_v.weight q5_K -blk.34.ffn_down_exps.weight q5_K +blk.34.ffn_down_exps.weight q5_1 blk.34.attn_output.weight q5_K -blk.34.ffn_down_shexp.weight q5_K +blk.34.ffn_down_shexp.weight q5_1 blk.34.attn_v.weight q5_K -blk.35.ffn_down_exps.weight q5_K -blk.35.ffn_down_shexp.weight q5_K +blk.35.ffn_down_exps.weight q5_1 +blk.35.ffn_down_shexp.weight q5_1 blk.35.attn_output.weight q5_K blk.35.attn_v.weight q5_K -blk.36.ffn_down_exps.weight q5_K -blk.36.ffn_down_shexp.weight q5_K +blk.36.ffn_down_exps.weight q5_1 +blk.36.ffn_down_shexp.weight q5_1 blk.36.attn_output.weight q5_K blk.36.attn_v.weight q5_K -blk.37.ffn_down_exps.weight q5_K -blk.37.ffn_down_shexp.weight q5_K +blk.37.ffn_down_exps.weight q5_1 +blk.37.ffn_down_shexp.weight q5_1 blk.37.attn_output.weight q5_K blk.37.attn_v.weight q5_K -blk.38.ffn_down_exps.weight q5_K -blk.38.ffn_down_shexp.weight q5_K +blk.38.ffn_down_exps.weight q5_1 +blk.38.ffn_down_shexp.weight q5_1 blk.38.attn_output.weight q5_K blk.38.attn_v.weight q5_K -blk.39.ffn_down_exps.weight q5_K -blk.39.ffn_down_shexp.weight q5_K +blk.39.ffn_down_exps.weight q5_1 +blk.39.ffn_down_shexp.weight q5_1 blk.39.attn_output.weight q5_K blk.39.attn_v.weight q5_K -blk.40.ffn_down_exps.weight q5_K -blk.40.ffn_down_shexp.weight q5_K +blk.40.ffn_down_exps.weight q5_1 +blk.40.ffn_down_shexp.weight q5_1 blk.40.attn_output.weight q5_K blk.40.attn_v.weight q5_K -blk.41.ffn_down_exps.weight q5_K -blk.41.ffn_down_shexp.weight q5_K +blk.41.ffn_down_exps.weight q5_1 +blk.41.ffn_down_shexp.weight q5_1 blk.41.attn_output.weight q5_K blk.41.attn_v.weight q5_K -blk.42.ffn_down_exps.weight q5_K -blk.42.ffn_down_shexp.weight q5_K +blk.42.ffn_down_exps.weight q5_1 +blk.42.ffn_down_shexp.weight q5_1 blk.42.attn_v.weight q5_K blk.42.attn_output.weight q5_K -blk.43.ffn_down_exps.weight q5_K -blk.43.ffn_down_shexp.weight q5_K +blk.43.ffn_down_exps.weight q5_1 +blk.43.ffn_down_shexp.weight q5_1 blk.43.attn_output.weight q5_K blk.43.attn_v.weight q5_K -blk.44.ffn_down_exps.weight q5_K -blk.44.ffn_down_shexp.weight q5_K +blk.44.ffn_down_exps.weight q5_1 +blk.44.ffn_down_shexp.weight q5_1 blk.44.attn_output.weight q5_K blk.44.attn_v.weight q5_K output.weight q6_K -blk.45.ffn_down_exps.weight q5_K -blk.45.ffn_down_shexp.weight q5_K +blk.45.ffn_down_exps.weight q5_1 +blk.45.ffn_down_shexp.weight q5_1 blk.45.attn_output.weight q5_K blk.45.attn_v.weight q5_K [Q4_K_S] q4_K -blk.0.ffn_down.weight q5_K +blk.0.ffn_down.weight q5_1 blk.0.attn_v.weight q5_K -blk.1.ffn_down_exps.weight q5_K -blk.1.ffn_down_shexp.weight q5_K +blk.1.ffn_down_exps.weight q5_1 +blk.1.ffn_down_shexp.weight q5_1 blk.1.attn_v.weight q5_K -blk.2.ffn_down_exps.weight q5_K -blk.2.ffn_down_shexp.weight q5_K +blk.2.ffn_down_exps.weight q5_1 +blk.2.ffn_down_shexp.weight q5_1 blk.2.attn_v.weight q5_K -blk.3.ffn_down_exps.weight q5_K -blk.3.ffn_down_shexp.weight q5_K +blk.3.ffn_down_exps.weight q5_1 +blk.3.ffn_down_shexp.weight q5_1 blk.3.attn_v.weight q5_K -blk.4.ffn_down_exps.weight q5_K -blk.4.ffn_down_shexp.weight q5_K +blk.4.ffn_down_exps.weight q5_1 +blk.4.ffn_down_shexp.weight q5_1 +blk.5.ffn_down_exps.weight q5_0 +blk.5.ffn_down_shexp.weight q5_0 +blk.6.ffn_down_exps.weight q5_0 +blk.6.ffn_down_shexp.weight q5_0 +blk.7.ffn_down_exps.weight q5_0 +blk.7.ffn_down_shexp.weight q5_0 +blk.8.ffn_down_exps.weight q5_0 +blk.8.ffn_down_shexp.weight q5_0 +blk.9.ffn_down_exps.weight q5_0 +blk.9.ffn_down_shexp.weight q5_0 +blk.10.ffn_down_exps.weight q5_0 +blk.10.ffn_down_shexp.weight q5_0 +blk.11.ffn_down_exps.weight q5_0 +blk.11.ffn_down_shexp.weight q5_0 +blk.12.ffn_down_exps.weight q5_0 +blk.12.ffn_down_shexp.weight q5_0 +blk.13.ffn_down_exps.weight q5_0 +blk.13.ffn_down_shexp.weight q5_0 +blk.14.ffn_down_exps.weight q5_0 +blk.14.ffn_down_shexp.weight q5_0 +blk.15.ffn_down_exps.weight q5_0 +blk.15.ffn_down_shexp.weight q5_0 +blk.16.ffn_down_exps.weight q5_0 +blk.16.ffn_down_shexp.weight q5_0 +blk.17.ffn_down_exps.weight q5_0 +blk.17.ffn_down_shexp.weight q5_0 +blk.18.ffn_down_exps.weight q5_0 +blk.18.ffn_down_shexp.weight q5_0 +blk.19.ffn_down_exps.weight q5_0 +blk.19.ffn_down_shexp.weight q5_0 +blk.20.ffn_down_exps.weight q5_0 +blk.20.ffn_down_shexp.weight q5_0 +blk.21.ffn_down_exps.weight q5_0 +blk.21.ffn_down_shexp.weight q5_0 +blk.22.ffn_down_exps.weight q5_0 +blk.22.ffn_down_shexp.weight q5_0 +blk.23.ffn_down_exps.weight q5_0 +blk.23.ffn_down_shexp.weight q5_0 +blk.24.ffn_down_exps.weight q5_0 +blk.24.ffn_down_shexp.weight q5_0 +blk.25.ffn_down_exps.weight q5_0 +blk.25.ffn_down_shexp.weight q5_0 +blk.26.ffn_down_exps.weight q5_0 +blk.26.ffn_down_shexp.weight q5_0 +blk.27.ffn_down_exps.weight q5_0 +blk.27.ffn_down_shexp.weight q5_0 +blk.28.ffn_down_exps.weight q5_0 +blk.28.ffn_down_shexp.weight q5_0 +blk.29.ffn_down_exps.weight q5_0 +blk.29.ffn_down_shexp.weight q5_0 +blk.30.ffn_down_exps.weight q5_0 +blk.30.ffn_down_shexp.weight q5_0 +blk.31.ffn_down_exps.weight q5_0 +blk.31.ffn_down_shexp.weight q5_0 +blk.32.ffn_down_exps.weight q5_0 +blk.32.ffn_down_shexp.weight q5_0 +blk.33.ffn_down_exps.weight q5_0 +blk.33.ffn_down_shexp.weight q5_0 +blk.34.ffn_down_exps.weight q5_0 +blk.34.ffn_down_shexp.weight q5_0 +blk.35.ffn_down_exps.weight q5_0 +blk.35.ffn_down_shexp.weight q5_0 +blk.36.ffn_down_exps.weight q5_0 +blk.36.ffn_down_shexp.weight q5_0 +blk.37.ffn_down_exps.weight q5_0 +blk.37.ffn_down_shexp.weight q5_0 +blk.38.ffn_down_exps.weight q5_0 +blk.38.ffn_down_shexp.weight q5_0 +blk.39.ffn_down_exps.weight q5_0 +blk.39.ffn_down_shexp.weight q5_0 +blk.40.ffn_down_exps.weight q5_0 +blk.40.ffn_down_shexp.weight q5_0 +blk.41.ffn_down_exps.weight q5_0 +blk.41.ffn_down_shexp.weight q5_0 +blk.42.ffn_down_exps.weight q5_0 +blk.42.ffn_down_shexp.weight q5_0 +blk.43.ffn_down_exps.weight q5_0 +blk.43.ffn_down_shexp.weight q5_0 +blk.44.ffn_down_exps.weight q5_0 +blk.44.ffn_down_shexp.weight q5_0 output.weight q6_K +blk.45.ffn_down_exps.weight q5_0 +blk.45.ffn_down_shexp.weight q5_0 [Q4_K_M] q4_K -blk.0.ffn_down.weight q6_K +blk.0.ffn_down.weight q8_0 blk.0.attn_v.weight q6_K -blk.1.ffn_down_exps.weight q6_K -blk.1.ffn_down_shexp.weight q6_K +blk.1.ffn_down_exps.weight q8_0 +blk.1.ffn_down_shexp.weight q8_0 blk.1.attn_v.weight q6_K -blk.2.ffn_down_exps.weight q6_K -blk.2.ffn_down_shexp.weight q6_K +blk.2.ffn_down_exps.weight q8_0 +blk.2.ffn_down_shexp.weight q8_0 blk.2.attn_v.weight q6_K -blk.3.ffn_down_exps.weight q6_K -blk.3.ffn_down_shexp.weight q6_K +blk.3.ffn_down_exps.weight q8_0 +blk.3.ffn_down_shexp.weight q8_0 blk.3.attn_v.weight q6_K -blk.4.ffn_down_exps.weight q6_K -blk.4.ffn_down_shexp.weight q6_K +blk.4.ffn_down_exps.weight q8_0 +blk.4.ffn_down_shexp.weight q8_0 blk.4.attn_v.weight q6_K -blk.7.ffn_down_exps.weight q6_K -blk.7.ffn_down_shexp.weight q6_K +blk.5.ffn_down_exps.weight q5_0 +blk.5.ffn_down_shexp.weight q5_0 +blk.6.ffn_down_exps.weight q5_0 +blk.6.ffn_down_shexp.weight q5_0 +blk.7.ffn_down_exps.weight q8_0 +blk.7.ffn_down_shexp.weight q8_0 blk.7.attn_v.weight q6_K -blk.10.ffn_down_exps.weight q6_K -blk.10.ffn_down_shexp.weight q6_K +blk.8.ffn_down_exps.weight q5_0 +blk.8.ffn_down_shexp.weight q5_0 +blk.9.ffn_down_exps.weight q5_0 +blk.9.ffn_down_shexp.weight q5_0 +blk.10.ffn_down_exps.weight q8_0 +blk.10.ffn_down_shexp.weight q8_0 blk.10.attn_v.weight q6_K -blk.13.ffn_down_exps.weight q6_K -blk.13.ffn_down_shexp.weight q6_K +blk.11.ffn_down_exps.weight q5_0 +blk.11.ffn_down_shexp.weight q5_0 +blk.12.ffn_down_exps.weight q5_0 +blk.12.ffn_down_shexp.weight q5_0 +blk.13.ffn_down_exps.weight q8_0 +blk.13.ffn_down_shexp.weight q8_0 blk.13.attn_v.weight q6_K -blk.16.ffn_down_exps.weight q6_K -blk.16.ffn_down_shexp.weight q6_K +blk.14.ffn_down_exps.weight q5_0 +blk.14.ffn_down_shexp.weight q5_0 +blk.15.ffn_down_exps.weight q5_0 +blk.15.ffn_down_shexp.weight q5_0 +blk.16.ffn_down_exps.weight q8_0 +blk.16.ffn_down_shexp.weight q8_0 blk.16.attn_v.weight q6_K -blk.19.ffn_down_exps.weight q6_K -blk.19.ffn_down_shexp.weight q6_K +blk.17.ffn_down_exps.weight q5_0 +blk.17.ffn_down_shexp.weight q5_0 +blk.18.ffn_down_exps.weight q5_0 +blk.18.ffn_down_shexp.weight q5_0 +blk.19.ffn_down_exps.weight q8_0 +blk.19.ffn_down_shexp.weight q8_0 blk.19.attn_v.weight q6_K -blk.22.ffn_down_exps.weight q6_K -blk.22.ffn_down_shexp.weight q6_K +blk.20.ffn_down_exps.weight q5_0 +blk.20.ffn_down_shexp.weight q5_0 +blk.21.ffn_down_exps.weight q5_0 +blk.21.ffn_down_shexp.weight q5_0 +blk.22.ffn_down_exps.weight q8_0 +blk.22.ffn_down_shexp.weight q8_0 blk.22.attn_v.weight q6_K -blk.25.ffn_down_exps.weight q6_K -blk.25.ffn_down_shexp.weight q6_K +blk.23.ffn_down_exps.weight q5_0 +blk.23.ffn_down_shexp.weight q5_0 +blk.24.ffn_down_exps.weight q5_0 +blk.24.ffn_down_shexp.weight q5_0 +blk.25.ffn_down_exps.weight q8_0 +blk.25.ffn_down_shexp.weight q8_0 blk.25.attn_v.weight q6_K -blk.28.ffn_down_exps.weight q6_K -blk.28.ffn_down_shexp.weight q6_K +blk.26.ffn_down_exps.weight q5_0 +blk.26.ffn_down_shexp.weight q5_0 +blk.27.ffn_down_exps.weight q5_0 +blk.27.ffn_down_shexp.weight q5_0 +blk.28.ffn_down_exps.weight q8_0 +blk.28.ffn_down_shexp.weight q8_0 blk.28.attn_v.weight q6_K -blk.31.ffn_down_exps.weight q6_K -blk.31.ffn_down_shexp.weight q6_K +blk.29.ffn_down_exps.weight q5_0 +blk.29.ffn_down_shexp.weight q5_0 +blk.30.ffn_down_exps.weight q5_0 +blk.30.ffn_down_shexp.weight q5_0 +blk.31.ffn_down_exps.weight q8_0 +blk.31.ffn_down_shexp.weight q8_0 blk.31.attn_v.weight q6_K -blk.34.ffn_down_exps.weight q6_K -blk.34.ffn_down_shexp.weight q6_K +blk.32.ffn_down_exps.weight q5_0 +blk.32.ffn_down_shexp.weight q5_0 +blk.33.ffn_down_exps.weight q5_0 +blk.33.ffn_down_shexp.weight q5_0 +blk.34.ffn_down_exps.weight q8_0 +blk.34.ffn_down_shexp.weight q8_0 blk.34.attn_v.weight q6_K -blk.37.ffn_down_exps.weight q6_K -blk.37.ffn_down_shexp.weight q6_K +blk.35.ffn_down_exps.weight q5_0 +blk.35.ffn_down_shexp.weight q5_0 +blk.36.ffn_down_exps.weight q5_0 +blk.36.ffn_down_shexp.weight q5_0 +blk.37.ffn_down_exps.weight q8_0 +blk.37.ffn_down_shexp.weight q8_0 blk.37.attn_v.weight q6_K -blk.40.ffn_down_exps.weight q6_K -blk.40.ffn_down_shexp.weight q6_K +blk.38.ffn_down_exps.weight q5_0 +blk.38.ffn_down_shexp.weight q5_0 +blk.39.ffn_down_exps.weight q5_0 +blk.39.ffn_down_shexp.weight q5_0 +blk.40.ffn_down_exps.weight q8_0 +blk.40.ffn_down_shexp.weight q8_0 blk.40.attn_v.weight q6_K -blk.41.ffn_down_exps.weight q6_K -blk.41.ffn_down_shexp.weight q6_K +blk.41.ffn_down_exps.weight q8_0 +blk.41.ffn_down_shexp.weight q8_0 blk.41.attn_v.weight q6_K -blk.42.ffn_down_exps.weight q6_K -blk.42.ffn_down_shexp.weight q6_K +blk.42.ffn_down_exps.weight q8_0 +blk.42.ffn_down_shexp.weight q8_0 blk.42.attn_v.weight q6_K -blk.43.ffn_down_exps.weight q6_K -blk.43.ffn_down_shexp.weight q6_K +blk.43.ffn_down_exps.weight q8_0 +blk.43.ffn_down_shexp.weight q8_0 blk.43.attn_v.weight q6_K -blk.44.ffn_down_exps.weight q6_K -blk.44.ffn_down_shexp.weight q6_K +blk.44.ffn_down_exps.weight q8_0 +blk.44.ffn_down_shexp.weight q8_0 blk.44.attn_v.weight q6_K output.weight q6_K -blk.45.ffn_down_exps.weight q6_K -blk.45.ffn_down_shexp.weight q6_K +blk.45.ffn_down_exps.weight q8_0 +blk.45.ffn_down_shexp.weight q8_0 blk.45.attn_v.weight q6_K [Q5_K_S] q5_K +blk.0.ffn_down.weight q5_1 +blk.1.ffn_down_exps.weight q5_1 +blk.1.ffn_down_shexp.weight q5_1 +blk.2.ffn_down_exps.weight q5_1 +blk.2.ffn_down_shexp.weight q5_1 +blk.3.ffn_down_exps.weight q5_1 +blk.3.ffn_down_shexp.weight q5_1 +blk.4.ffn_down_exps.weight q5_1 +blk.4.ffn_down_shexp.weight q5_1 +blk.5.ffn_down_exps.weight q5_1 +blk.5.ffn_down_shexp.weight q5_1 +blk.6.ffn_down_exps.weight q5_1 +blk.6.ffn_down_shexp.weight q5_1 +blk.7.ffn_down_exps.weight q5_1 +blk.7.ffn_down_shexp.weight q5_1 +blk.8.ffn_down_exps.weight q5_1 +blk.8.ffn_down_shexp.weight q5_1 +blk.9.ffn_down_exps.weight q5_1 +blk.9.ffn_down_shexp.weight q5_1 +blk.10.ffn_down_exps.weight q5_1 +blk.10.ffn_down_shexp.weight q5_1 +blk.11.ffn_down_exps.weight q5_1 +blk.11.ffn_down_shexp.weight q5_1 +blk.12.ffn_down_exps.weight q5_1 +blk.12.ffn_down_shexp.weight q5_1 +blk.13.ffn_down_exps.weight q5_1 +blk.13.ffn_down_shexp.weight q5_1 +blk.14.ffn_down_exps.weight q5_1 +blk.14.ffn_down_shexp.weight q5_1 +blk.15.ffn_down_exps.weight q5_1 +blk.15.ffn_down_shexp.weight q5_1 +blk.16.ffn_down_exps.weight q5_1 +blk.16.ffn_down_shexp.weight q5_1 +blk.17.ffn_down_exps.weight q5_1 +blk.17.ffn_down_shexp.weight q5_1 +blk.18.ffn_down_exps.weight q5_1 +blk.18.ffn_down_shexp.weight q5_1 +blk.19.ffn_down_exps.weight q5_1 +blk.19.ffn_down_shexp.weight q5_1 +blk.20.ffn_down_exps.weight q5_1 +blk.20.ffn_down_shexp.weight q5_1 +blk.21.ffn_down_exps.weight q5_1 +blk.21.ffn_down_shexp.weight q5_1 +blk.22.ffn_down_exps.weight q5_1 +blk.22.ffn_down_shexp.weight q5_1 +blk.23.ffn_down_exps.weight q5_1 +blk.23.ffn_down_shexp.weight q5_1 +blk.24.ffn_down_exps.weight q5_1 +blk.24.ffn_down_shexp.weight q5_1 +blk.25.ffn_down_exps.weight q5_1 +blk.25.ffn_down_shexp.weight q5_1 +blk.26.ffn_down_exps.weight q5_1 +blk.26.ffn_down_shexp.weight q5_1 +blk.27.ffn_down_exps.weight q5_1 +blk.27.ffn_down_shexp.weight q5_1 +blk.28.ffn_down_exps.weight q5_1 +blk.28.ffn_down_shexp.weight q5_1 +blk.29.ffn_down_exps.weight q5_1 +blk.29.ffn_down_shexp.weight q5_1 +blk.30.ffn_down_exps.weight q5_1 +blk.30.ffn_down_shexp.weight q5_1 +blk.31.ffn_down_exps.weight q5_1 +blk.31.ffn_down_shexp.weight q5_1 +blk.32.ffn_down_exps.weight q5_1 +blk.32.ffn_down_shexp.weight q5_1 +blk.33.ffn_down_exps.weight q5_1 +blk.33.ffn_down_shexp.weight q5_1 +blk.34.ffn_down_exps.weight q5_1 +blk.34.ffn_down_shexp.weight q5_1 +blk.35.ffn_down_exps.weight q5_1 +blk.35.ffn_down_shexp.weight q5_1 +blk.36.ffn_down_exps.weight q5_1 +blk.36.ffn_down_shexp.weight q5_1 +blk.37.ffn_down_exps.weight q5_1 +blk.37.ffn_down_shexp.weight q5_1 +blk.38.ffn_down_exps.weight q5_1 +blk.38.ffn_down_shexp.weight q5_1 +blk.39.ffn_down_exps.weight q5_1 +blk.39.ffn_down_shexp.weight q5_1 +blk.40.ffn_down_exps.weight q5_1 +blk.40.ffn_down_shexp.weight q5_1 +blk.41.ffn_down_exps.weight q5_1 +blk.41.ffn_down_shexp.weight q5_1 +blk.42.ffn_down_exps.weight q5_1 +blk.42.ffn_down_shexp.weight q5_1 +blk.43.ffn_down_exps.weight q5_1 +blk.43.ffn_down_shexp.weight q5_1 +blk.44.ffn_down_exps.weight q5_1 +blk.44.ffn_down_shexp.weight q5_1 output.weight q6_K +blk.45.ffn_down_exps.weight q5_1 +blk.45.ffn_down_shexp.weight q5_1 [Q5_K_M] q5_K -blk.0.ffn_down.weight q6_K +blk.0.ffn_down.weight q8_0 blk.0.attn_v.weight q6_K -blk.1.ffn_down_exps.weight q6_K -blk.1.ffn_down_shexp.weight q6_K +blk.1.ffn_down_exps.weight q8_0 +blk.1.ffn_down_shexp.weight q8_0 blk.1.attn_v.weight q6_K -blk.2.ffn_down_exps.weight q6_K -blk.2.ffn_down_shexp.weight q6_K +blk.2.ffn_down_exps.weight q8_0 +blk.2.ffn_down_shexp.weight q8_0 blk.2.attn_v.weight q6_K -blk.3.ffn_down_exps.weight q6_K -blk.3.ffn_down_shexp.weight q6_K +blk.3.ffn_down_exps.weight q8_0 +blk.3.ffn_down_shexp.weight q8_0 blk.3.attn_v.weight q6_K -blk.4.ffn_down_exps.weight q6_K -blk.4.ffn_down_shexp.weight q6_K +blk.4.ffn_down_exps.weight q8_0 +blk.4.ffn_down_shexp.weight q8_0 blk.4.attn_v.weight q6_K -blk.7.ffn_down_exps.weight q6_K -blk.7.ffn_down_shexp.weight q6_K +blk.5.ffn_down_exps.weight q5_1 +blk.5.ffn_down_shexp.weight q5_1 +blk.6.ffn_down_exps.weight q5_1 +blk.6.ffn_down_shexp.weight q5_1 +blk.7.ffn_down_exps.weight q8_0 +blk.7.ffn_down_shexp.weight q8_0 blk.7.attn_v.weight q6_K -blk.10.ffn_down_exps.weight q6_K -blk.10.ffn_down_shexp.weight q6_K +blk.8.ffn_down_exps.weight q5_1 +blk.8.ffn_down_shexp.weight q5_1 +blk.9.ffn_down_exps.weight q5_1 +blk.9.ffn_down_shexp.weight q5_1 +blk.10.ffn_down_exps.weight q8_0 +blk.10.ffn_down_shexp.weight q8_0 blk.10.attn_v.weight q6_K -blk.13.ffn_down_exps.weight q6_K -blk.13.ffn_down_shexp.weight q6_K +blk.11.ffn_down_exps.weight q5_1 +blk.11.ffn_down_shexp.weight q5_1 +blk.12.ffn_down_exps.weight q5_1 +blk.12.ffn_down_shexp.weight q5_1 +blk.13.ffn_down_exps.weight q8_0 +blk.13.ffn_down_shexp.weight q8_0 blk.13.attn_v.weight q6_K -blk.16.ffn_down_exps.weight q6_K -blk.16.ffn_down_shexp.weight q6_K +blk.14.ffn_down_exps.weight q5_1 +blk.14.ffn_down_shexp.weight q5_1 +blk.15.ffn_down_exps.weight q5_1 +blk.15.ffn_down_shexp.weight q5_1 +blk.16.ffn_down_exps.weight q8_0 +blk.16.ffn_down_shexp.weight q8_0 blk.16.attn_v.weight q6_K -blk.19.ffn_down_exps.weight q6_K -blk.19.ffn_down_shexp.weight q6_K +blk.17.ffn_down_exps.weight q5_1 +blk.17.ffn_down_shexp.weight q5_1 +blk.18.ffn_down_exps.weight q5_1 +blk.18.ffn_down_shexp.weight q5_1 +blk.19.ffn_down_exps.weight q8_0 +blk.19.ffn_down_shexp.weight q8_0 blk.19.attn_v.weight q6_K -blk.22.ffn_down_exps.weight q6_K -blk.22.ffn_down_shexp.weight q6_K +blk.20.ffn_down_exps.weight q5_1 +blk.20.ffn_down_shexp.weight q5_1 +blk.21.ffn_down_exps.weight q5_1 +blk.21.ffn_down_shexp.weight q5_1 +blk.22.ffn_down_exps.weight q8_0 +blk.22.ffn_down_shexp.weight q8_0 blk.22.attn_v.weight q6_K -blk.25.ffn_down_exps.weight q6_K -blk.25.ffn_down_shexp.weight q6_K +blk.23.ffn_down_exps.weight q5_1 +blk.23.ffn_down_shexp.weight q5_1 +blk.24.ffn_down_exps.weight q5_1 +blk.24.ffn_down_shexp.weight q5_1 +blk.25.ffn_down_exps.weight q8_0 +blk.25.ffn_down_shexp.weight q8_0 blk.25.attn_v.weight q6_K -blk.28.ffn_down_exps.weight q6_K -blk.28.ffn_down_shexp.weight q6_K +blk.26.ffn_down_exps.weight q5_1 +blk.26.ffn_down_shexp.weight q5_1 +blk.27.ffn_down_exps.weight q5_1 +blk.27.ffn_down_shexp.weight q5_1 +blk.28.ffn_down_exps.weight q8_0 +blk.28.ffn_down_shexp.weight q8_0 blk.28.attn_v.weight q6_K -blk.31.ffn_down_exps.weight q6_K -blk.31.ffn_down_shexp.weight q6_K +blk.29.ffn_down_exps.weight q5_1 +blk.29.ffn_down_shexp.weight q5_1 +blk.30.ffn_down_exps.weight q5_1 +blk.30.ffn_down_shexp.weight q5_1 +blk.31.ffn_down_exps.weight q8_0 +blk.31.ffn_down_shexp.weight q8_0 blk.31.attn_v.weight q6_K -blk.34.ffn_down_exps.weight q6_K -blk.34.ffn_down_shexp.weight q6_K +blk.32.ffn_down_exps.weight q5_1 +blk.32.ffn_down_shexp.weight q5_1 +blk.33.ffn_down_exps.weight q5_1 +blk.33.ffn_down_shexp.weight q5_1 +blk.34.ffn_down_exps.weight q8_0 +blk.34.ffn_down_shexp.weight q8_0 blk.34.attn_v.weight q6_K -blk.37.ffn_down_exps.weight q6_K -blk.37.ffn_down_shexp.weight q6_K +blk.35.ffn_down_exps.weight q5_1 +blk.35.ffn_down_shexp.weight q5_1 +blk.36.ffn_down_exps.weight q5_1 +blk.36.ffn_down_shexp.weight q5_1 +blk.37.ffn_down_exps.weight q8_0 +blk.37.ffn_down_shexp.weight q8_0 blk.37.attn_v.weight q6_K -blk.40.ffn_down_exps.weight q6_K -blk.40.ffn_down_shexp.weight q6_K +blk.38.ffn_down_exps.weight q5_1 +blk.38.ffn_down_shexp.weight q5_1 +blk.39.ffn_down_exps.weight q5_1 +blk.39.ffn_down_shexp.weight q5_1 +blk.40.ffn_down_exps.weight q8_0 +blk.40.ffn_down_shexp.weight q8_0 blk.40.attn_v.weight q6_K -blk.41.ffn_down_exps.weight q6_K -blk.41.ffn_down_shexp.weight q6_K +blk.41.ffn_down_exps.weight q8_0 +blk.41.ffn_down_shexp.weight q8_0 blk.41.attn_v.weight q6_K -blk.42.ffn_down_exps.weight q6_K -blk.42.ffn_down_shexp.weight q6_K +blk.42.ffn_down_exps.weight q8_0 +blk.42.ffn_down_shexp.weight q8_0 blk.42.attn_v.weight q6_K -blk.43.ffn_down_exps.weight q6_K -blk.43.ffn_down_shexp.weight q6_K +blk.43.ffn_down_exps.weight q8_0 +blk.43.ffn_down_shexp.weight q8_0 blk.43.attn_v.weight q6_K -blk.44.ffn_down_exps.weight q6_K -blk.44.ffn_down_shexp.weight q6_K +blk.44.ffn_down_exps.weight q8_0 +blk.44.ffn_down_shexp.weight q8_0 blk.44.attn_v.weight q6_K output.weight q6_K -blk.45.ffn_down_exps.weight q6_K -blk.45.ffn_down_shexp.weight q6_K +blk.45.ffn_down_exps.weight q8_0 +blk.45.ffn_down_shexp.weight q8_0 blk.45.attn_v.weight q6_K [Q6_K] q6_K +blk.0.ffn_down.weight q8_0 +blk.1.ffn_down_exps.weight q8_0 +blk.1.ffn_down_shexp.weight q8_0 +blk.2.ffn_down_exps.weight q8_0 +blk.2.ffn_down_shexp.weight q8_0 +blk.3.ffn_down_exps.weight q8_0 +blk.3.ffn_down_shexp.weight q8_0 +blk.4.ffn_down_exps.weight q8_0 +blk.4.ffn_down_shexp.weight q8_0 +blk.5.ffn_down_exps.weight q8_0 +blk.5.ffn_down_shexp.weight q8_0 +blk.6.ffn_down_exps.weight q8_0 +blk.6.ffn_down_shexp.weight q8_0 +blk.7.ffn_down_exps.weight q8_0 +blk.7.ffn_down_shexp.weight q8_0 +blk.8.ffn_down_exps.weight q8_0 +blk.8.ffn_down_shexp.weight q8_0 +blk.9.ffn_down_exps.weight q8_0 +blk.9.ffn_down_shexp.weight q8_0 +blk.10.ffn_down_exps.weight q8_0 +blk.10.ffn_down_shexp.weight q8_0 +blk.11.ffn_down_exps.weight q8_0 +blk.11.ffn_down_shexp.weight q8_0 +blk.12.ffn_down_exps.weight q8_0 +blk.12.ffn_down_shexp.weight q8_0 +blk.13.ffn_down_exps.weight q8_0 +blk.13.ffn_down_shexp.weight q8_0 +blk.14.ffn_down_exps.weight q8_0 +blk.14.ffn_down_shexp.weight q8_0 +blk.15.ffn_down_exps.weight q8_0 +blk.15.ffn_down_shexp.weight q8_0 +blk.16.ffn_down_exps.weight q8_0 +blk.16.ffn_down_shexp.weight q8_0 +blk.17.ffn_down_exps.weight q8_0 +blk.17.ffn_down_shexp.weight q8_0 +blk.18.ffn_down_exps.weight q8_0 +blk.18.ffn_down_shexp.weight q8_0 +blk.19.ffn_down_exps.weight q8_0 +blk.19.ffn_down_shexp.weight q8_0 +blk.20.ffn_down_exps.weight q8_0 +blk.20.ffn_down_shexp.weight q8_0 +blk.21.ffn_down_exps.weight q8_0 +blk.21.ffn_down_shexp.weight q8_0 +blk.22.ffn_down_exps.weight q8_0 +blk.22.ffn_down_shexp.weight q8_0 +blk.23.ffn_down_exps.weight q8_0 +blk.23.ffn_down_shexp.weight q8_0 +blk.24.ffn_down_exps.weight q8_0 +blk.24.ffn_down_shexp.weight q8_0 +blk.25.ffn_down_exps.weight q8_0 +blk.25.ffn_down_shexp.weight q8_0 +blk.26.ffn_down_exps.weight q8_0 +blk.26.ffn_down_shexp.weight q8_0 +blk.27.ffn_down_exps.weight q8_0 +blk.27.ffn_down_shexp.weight q8_0 +blk.28.ffn_down_exps.weight q8_0 +blk.28.ffn_down_shexp.weight q8_0 +blk.29.ffn_down_exps.weight q8_0 +blk.29.ffn_down_shexp.weight q8_0 +blk.30.ffn_down_exps.weight q8_0 +blk.30.ffn_down_shexp.weight q8_0 +blk.31.ffn_down_exps.weight q8_0 +blk.31.ffn_down_shexp.weight q8_0 +blk.32.ffn_down_exps.weight q8_0 +blk.32.ffn_down_shexp.weight q8_0 +blk.33.ffn_down_exps.weight q8_0 +blk.33.ffn_down_shexp.weight q8_0 +blk.34.ffn_down_exps.weight q8_0 +blk.34.ffn_down_shexp.weight q8_0 +blk.35.ffn_down_exps.weight q8_0 +blk.35.ffn_down_shexp.weight q8_0 +blk.36.ffn_down_exps.weight q8_0 +blk.36.ffn_down_shexp.weight q8_0 +blk.37.ffn_down_exps.weight q8_0 +blk.37.ffn_down_shexp.weight q8_0 +blk.38.ffn_down_exps.weight q8_0 +blk.38.ffn_down_shexp.weight q8_0 +blk.39.ffn_down_exps.weight q8_0 +blk.39.ffn_down_shexp.weight q8_0 +blk.40.ffn_down_exps.weight q8_0 +blk.40.ffn_down_shexp.weight q8_0 +blk.41.ffn_down_exps.weight q8_0 +blk.41.ffn_down_shexp.weight q8_0 +blk.42.ffn_down_exps.weight q8_0 +blk.42.ffn_down_shexp.weight q8_0 +blk.43.ffn_down_exps.weight q8_0 +blk.43.ffn_down_shexp.weight q8_0 +blk.44.ffn_down_exps.weight q8_0 +blk.44.ffn_down_shexp.weight q8_0 +blk.45.ffn_down_exps.weight q8_0 +blk.45.ffn_down_shexp.weight q8_0 [IQ2_XXS] iq2_xxs token_embd.weight q2_K -blk.0.ffn_down.weight q2_K +blk.0.ffn_down.weight q4_0 blk.0.attn_v.weight q4_K -blk.1.ffn_down_exps.weight q2_K -blk.1.ffn_down_shexp.weight q2_K +blk.1.ffn_down_exps.weight q4_0 +blk.1.ffn_down_shexp.weight q4_0 blk.1.attn_v.weight q4_K -blk.2.ffn_down_exps.weight q2_K -blk.2.ffn_down_shexp.weight q2_K +blk.2.ffn_down_exps.weight q4_0 +blk.2.ffn_down_shexp.weight q4_0 blk.2.attn_v.weight q4_K +blk.3.ffn_down_exps.weight iq4_nl +blk.3.ffn_down_shexp.weight iq4_nl blk.3.attn_v.weight q4_K +blk.4.ffn_down_exps.weight iq4_nl +blk.4.ffn_down_shexp.weight iq4_nl blk.4.attn_v.weight q4_K +blk.5.ffn_down_exps.weight iq4_nl +blk.5.ffn_down_shexp.weight iq4_nl blk.5.attn_v.weight q4_K +blk.6.ffn_down_exps.weight iq4_nl +blk.6.ffn_down_shexp.weight iq4_nl blk.6.attn_v.weight q4_K +blk.7.ffn_down_exps.weight iq4_nl +blk.7.ffn_down_shexp.weight iq4_nl blk.7.attn_v.weight q4_K +blk.8.ffn_down_exps.weight iq4_nl +blk.8.ffn_down_shexp.weight iq4_nl blk.8.attn_v.weight q4_K +blk.9.ffn_down_exps.weight iq4_nl +blk.9.ffn_down_shexp.weight iq4_nl blk.9.attn_v.weight q4_K +blk.10.ffn_down_exps.weight iq4_nl +blk.10.ffn_down_shexp.weight iq4_nl blk.10.attn_v.weight q4_K +blk.11.ffn_down_exps.weight iq4_nl +blk.11.ffn_down_shexp.weight iq4_nl blk.11.attn_v.weight q4_K +blk.12.ffn_down_exps.weight iq4_nl +blk.12.ffn_down_shexp.weight iq4_nl blk.12.attn_v.weight q4_K +blk.13.ffn_down_exps.weight iq4_nl +blk.13.ffn_down_shexp.weight iq4_nl blk.13.attn_v.weight q4_K +blk.14.ffn_down_exps.weight iq4_nl +blk.14.ffn_down_shexp.weight iq4_nl blk.14.attn_v.weight q4_K +blk.15.ffn_down_exps.weight iq4_nl +blk.15.ffn_down_shexp.weight iq4_nl blk.15.attn_v.weight q4_K +blk.16.ffn_down_exps.weight iq4_nl +blk.16.ffn_down_shexp.weight iq4_nl blk.16.attn_v.weight q4_K +blk.17.ffn_down_exps.weight iq4_nl +blk.17.ffn_down_shexp.weight iq4_nl blk.17.attn_v.weight q4_K +blk.18.ffn_down_exps.weight iq4_nl +blk.18.ffn_down_shexp.weight iq4_nl blk.18.attn_v.weight q4_K +blk.19.ffn_down_exps.weight iq4_nl +blk.19.ffn_down_shexp.weight iq4_nl blk.19.attn_v.weight q4_K +blk.20.ffn_down_exps.weight iq4_nl +blk.20.ffn_down_shexp.weight iq4_nl blk.20.attn_v.weight q4_K +blk.21.ffn_down_exps.weight iq4_nl +blk.21.ffn_down_shexp.weight iq4_nl blk.21.attn_v.weight q4_K +blk.22.ffn_down_exps.weight iq4_nl +blk.22.ffn_down_shexp.weight iq4_nl blk.22.attn_v.weight q4_K +blk.23.ffn_down_exps.weight iq4_nl +blk.23.ffn_down_shexp.weight iq4_nl blk.23.attn_v.weight q4_K +blk.24.ffn_down_exps.weight iq4_nl +blk.24.ffn_down_shexp.weight iq4_nl blk.24.attn_v.weight q4_K +blk.25.ffn_down_exps.weight iq4_nl +blk.25.ffn_down_shexp.weight iq4_nl blk.25.attn_v.weight q4_K +blk.26.ffn_down_exps.weight iq4_nl +blk.26.ffn_down_shexp.weight iq4_nl blk.26.attn_v.weight q4_K +blk.27.ffn_down_exps.weight iq4_nl +blk.27.ffn_down_shexp.weight iq4_nl blk.27.attn_v.weight q4_K +blk.28.ffn_down_exps.weight iq4_nl +blk.28.ffn_down_shexp.weight iq4_nl blk.28.attn_v.weight q4_K +blk.29.ffn_down_exps.weight iq4_nl +blk.29.ffn_down_shexp.weight iq4_nl blk.29.attn_v.weight q4_K +blk.30.ffn_down_exps.weight iq4_nl +blk.30.ffn_down_shexp.weight iq4_nl blk.30.attn_v.weight q4_K +blk.31.ffn_down_exps.weight iq4_nl +blk.31.ffn_down_shexp.weight iq4_nl blk.31.attn_v.weight q4_K +blk.32.ffn_down_exps.weight iq4_nl +blk.32.ffn_down_shexp.weight iq4_nl blk.32.attn_v.weight q4_K +blk.33.ffn_down_exps.weight iq4_nl +blk.33.ffn_down_shexp.weight iq4_nl blk.33.attn_v.weight q4_K +blk.34.ffn_down_exps.weight iq4_nl +blk.34.ffn_down_shexp.weight iq4_nl blk.34.attn_v.weight q4_K +blk.35.ffn_down_exps.weight iq4_nl +blk.35.ffn_down_shexp.weight iq4_nl blk.35.attn_v.weight q4_K +blk.36.ffn_down_exps.weight iq4_nl +blk.36.ffn_down_shexp.weight iq4_nl blk.36.attn_v.weight q4_K +blk.37.ffn_down_exps.weight iq4_nl +blk.37.ffn_down_shexp.weight iq4_nl blk.37.attn_v.weight q4_K +blk.38.ffn_down_exps.weight iq4_nl +blk.38.ffn_down_shexp.weight iq4_nl blk.38.attn_v.weight q4_K +blk.39.ffn_down_exps.weight iq4_nl +blk.39.ffn_down_shexp.weight iq4_nl blk.39.attn_v.weight q4_K +blk.40.ffn_down_exps.weight iq4_nl +blk.40.ffn_down_shexp.weight iq4_nl blk.40.attn_v.weight q4_K +blk.41.ffn_down_exps.weight iq4_nl +blk.41.ffn_down_shexp.weight iq4_nl blk.41.attn_v.weight q4_K +blk.42.ffn_down_exps.weight iq4_nl +blk.42.ffn_down_shexp.weight iq4_nl blk.42.attn_v.weight q4_K +blk.43.ffn_down_exps.weight iq4_nl +blk.43.ffn_down_shexp.weight iq4_nl blk.43.attn_v.weight q4_K +blk.44.ffn_down_exps.weight iq4_nl +blk.44.ffn_down_shexp.weight iq4_nl blk.44.attn_v.weight q4_K output.weight q5_K +blk.45.ffn_down_exps.weight iq4_nl +blk.45.ffn_down_shexp.weight iq4_nl blk.45.attn_v.weight q4_K [IQ2_XS] iq2_xs token_embd.weight q2_K -blk.0.ffn_down.weight q2_K +blk.0.ffn_down.weight q4_0 blk.0.attn_v.weight q4_K -blk.1.ffn_down_exps.weight q2_K -blk.1.ffn_down_shexp.weight q2_K +blk.1.ffn_down_exps.weight q4_0 +blk.1.ffn_down_shexp.weight q4_0 blk.1.attn_v.weight q4_K -blk.2.ffn_down_exps.weight q2_K -blk.2.ffn_down_shexp.weight q2_K +blk.2.ffn_down_exps.weight q4_0 +blk.2.ffn_down_shexp.weight q4_0 blk.2.attn_v.weight q4_K +blk.3.ffn_down_exps.weight iq4_nl +blk.3.ffn_down_shexp.weight iq4_nl blk.3.attn_v.weight q4_K +blk.4.ffn_down_exps.weight iq4_nl +blk.4.ffn_down_shexp.weight iq4_nl blk.4.attn_v.weight q4_K +blk.5.ffn_down_exps.weight iq4_nl +blk.5.ffn_down_shexp.weight iq4_nl blk.5.attn_v.weight q4_K +blk.6.ffn_down_exps.weight iq4_nl +blk.6.ffn_down_shexp.weight iq4_nl blk.6.attn_v.weight q4_K +blk.7.ffn_down_exps.weight iq4_nl +blk.7.ffn_down_shexp.weight iq4_nl blk.7.attn_v.weight q4_K +blk.8.ffn_down_exps.weight iq4_nl +blk.8.ffn_down_shexp.weight iq4_nl blk.8.attn_v.weight q4_K +blk.9.ffn_down_exps.weight iq4_nl +blk.9.ffn_down_shexp.weight iq4_nl blk.9.attn_v.weight q4_K +blk.10.ffn_down_exps.weight iq4_nl +blk.10.ffn_down_shexp.weight iq4_nl blk.10.attn_v.weight q4_K +blk.11.ffn_down_exps.weight iq4_nl +blk.11.ffn_down_shexp.weight iq4_nl blk.11.attn_v.weight q4_K +blk.12.ffn_down_exps.weight iq4_nl +blk.12.ffn_down_shexp.weight iq4_nl blk.12.attn_v.weight q4_K +blk.13.ffn_down_exps.weight iq4_nl +blk.13.ffn_down_shexp.weight iq4_nl blk.13.attn_v.weight q4_K +blk.14.ffn_down_exps.weight iq4_nl +blk.14.ffn_down_shexp.weight iq4_nl blk.14.attn_v.weight q4_K +blk.15.ffn_down_exps.weight iq4_nl +blk.15.ffn_down_shexp.weight iq4_nl blk.15.attn_v.weight q4_K +blk.16.ffn_down_exps.weight iq4_nl +blk.16.ffn_down_shexp.weight iq4_nl blk.16.attn_v.weight q4_K +blk.17.ffn_down_exps.weight iq4_nl +blk.17.ffn_down_shexp.weight iq4_nl blk.17.attn_v.weight q4_K +blk.18.ffn_down_exps.weight iq4_nl +blk.18.ffn_down_shexp.weight iq4_nl blk.18.attn_v.weight q4_K +blk.19.ffn_down_exps.weight iq4_nl +blk.19.ffn_down_shexp.weight iq4_nl blk.19.attn_v.weight q4_K +blk.20.ffn_down_exps.weight iq4_nl +blk.20.ffn_down_shexp.weight iq4_nl blk.20.attn_v.weight q4_K +blk.21.ffn_down_exps.weight iq4_nl +blk.21.ffn_down_shexp.weight iq4_nl blk.21.attn_v.weight q4_K +blk.22.ffn_down_exps.weight iq4_nl +blk.22.ffn_down_shexp.weight iq4_nl blk.22.attn_v.weight q4_K +blk.23.ffn_down_exps.weight iq4_nl +blk.23.ffn_down_shexp.weight iq4_nl blk.23.attn_v.weight q4_K +blk.24.ffn_down_exps.weight iq4_nl +blk.24.ffn_down_shexp.weight iq4_nl blk.24.attn_v.weight q4_K +blk.25.ffn_down_exps.weight iq4_nl +blk.25.ffn_down_shexp.weight iq4_nl blk.25.attn_v.weight q4_K +blk.26.ffn_down_exps.weight iq4_nl +blk.26.ffn_down_shexp.weight iq4_nl blk.26.attn_v.weight q4_K +blk.27.ffn_down_exps.weight iq4_nl +blk.27.ffn_down_shexp.weight iq4_nl blk.27.attn_v.weight q4_K +blk.28.ffn_down_exps.weight iq4_nl +blk.28.ffn_down_shexp.weight iq4_nl blk.28.attn_v.weight q4_K +blk.29.ffn_down_exps.weight iq4_nl +blk.29.ffn_down_shexp.weight iq4_nl blk.29.attn_v.weight q4_K +blk.30.ffn_down_exps.weight iq4_nl +blk.30.ffn_down_shexp.weight iq4_nl blk.30.attn_v.weight q4_K +blk.31.ffn_down_exps.weight iq4_nl +blk.31.ffn_down_shexp.weight iq4_nl blk.31.attn_v.weight q4_K +blk.32.ffn_down_exps.weight iq4_nl +blk.32.ffn_down_shexp.weight iq4_nl blk.32.attn_v.weight q4_K +blk.33.ffn_down_exps.weight iq4_nl +blk.33.ffn_down_shexp.weight iq4_nl blk.33.attn_v.weight q4_K +blk.34.ffn_down_exps.weight iq4_nl +blk.34.ffn_down_shexp.weight iq4_nl blk.34.attn_v.weight q4_K +blk.35.ffn_down_exps.weight iq4_nl +blk.35.ffn_down_shexp.weight iq4_nl blk.35.attn_v.weight q4_K +blk.36.ffn_down_exps.weight iq4_nl +blk.36.ffn_down_shexp.weight iq4_nl blk.36.attn_v.weight q4_K +blk.37.ffn_down_exps.weight iq4_nl +blk.37.ffn_down_shexp.weight iq4_nl blk.37.attn_v.weight q4_K +blk.38.ffn_down_exps.weight iq4_nl +blk.38.ffn_down_shexp.weight iq4_nl blk.38.attn_v.weight q4_K +blk.39.ffn_down_exps.weight iq4_nl +blk.39.ffn_down_shexp.weight iq4_nl blk.39.attn_v.weight q4_K +blk.40.ffn_down_exps.weight iq4_nl +blk.40.ffn_down_shexp.weight iq4_nl blk.40.attn_v.weight q4_K +blk.41.ffn_down_exps.weight iq4_nl +blk.41.ffn_down_shexp.weight iq4_nl blk.41.attn_v.weight q4_K +blk.42.ffn_down_exps.weight iq4_nl +blk.42.ffn_down_shexp.weight iq4_nl blk.42.attn_v.weight q4_K +blk.43.ffn_down_exps.weight iq4_nl +blk.43.ffn_down_shexp.weight iq4_nl blk.43.attn_v.weight q4_K +blk.44.ffn_down_exps.weight iq4_nl +blk.44.ffn_down_shexp.weight iq4_nl blk.44.attn_v.weight q4_K output.weight q5_K +blk.45.ffn_down_exps.weight iq4_nl +blk.45.ffn_down_shexp.weight iq4_nl blk.45.attn_v.weight q4_K [Q2_K_S] q2_K -blk.0.ffn_down.weight q4_K +blk.0.ffn_down.weight q5_0 blk.0.attn_v.weight q4_K -blk.1.ffn_down_exps.weight q4_K -blk.1.ffn_down_shexp.weight q4_K +blk.1.ffn_down_exps.weight q5_0 +blk.1.ffn_down_shexp.weight q5_0 blk.1.attn_v.weight q4_K -blk.2.ffn_down_exps.weight q4_K -blk.2.ffn_down_shexp.weight q4_K +blk.2.ffn_down_exps.weight q5_0 +blk.2.ffn_down_shexp.weight q5_0 blk.2.attn_v.weight q4_K -blk.3.ffn_down_exps.weight q4_K -blk.3.ffn_down_shexp.weight q4_K +blk.3.ffn_down_exps.weight q5_0 +blk.3.ffn_down_shexp.weight q5_0 blk.3.attn_v.weight q4_K -blk.4.ffn_down_exps.weight q4_K -blk.4.ffn_down_shexp.weight q4_K +blk.4.ffn_down_exps.weight q5_0 +blk.4.ffn_down_shexp.weight q5_0 blk.4.attn_v.weight q4_K +blk.5.ffn_down_exps.weight q4_0 +blk.5.ffn_down_shexp.weight q4_0 blk.5.attn_v.weight q4_K +blk.6.ffn_down_exps.weight q4_0 +blk.6.ffn_down_shexp.weight q4_0 blk.6.attn_v.weight q4_K +blk.7.ffn_down_exps.weight q4_0 +blk.7.ffn_down_shexp.weight q4_0 blk.7.attn_v.weight q4_K +blk.8.ffn_down_exps.weight q4_0 +blk.8.ffn_down_shexp.weight q4_0 blk.8.attn_v.weight q4_K +blk.9.ffn_down_exps.weight q4_0 +blk.9.ffn_down_shexp.weight q4_0 blk.9.attn_v.weight q4_K +blk.10.ffn_down_exps.weight q4_0 +blk.10.ffn_down_shexp.weight q4_0 blk.10.attn_v.weight q4_K +blk.11.ffn_down_exps.weight q4_0 +blk.11.ffn_down_shexp.weight q4_0 blk.11.attn_v.weight q4_K +blk.12.ffn_down_exps.weight q4_0 +blk.12.ffn_down_shexp.weight q4_0 blk.12.attn_v.weight q4_K +blk.13.ffn_down_exps.weight q4_0 +blk.13.ffn_down_shexp.weight q4_0 blk.13.attn_v.weight q4_K +blk.14.ffn_down_exps.weight q4_0 +blk.14.ffn_down_shexp.weight q4_0 blk.14.attn_v.weight q4_K +blk.15.ffn_down_exps.weight q4_0 +blk.15.ffn_down_shexp.weight q4_0 blk.15.attn_v.weight q4_K +blk.16.ffn_down_exps.weight q4_0 +blk.16.ffn_down_shexp.weight q4_0 blk.16.attn_v.weight q4_K +blk.17.ffn_down_exps.weight q4_0 +blk.17.ffn_down_shexp.weight q4_0 blk.17.attn_v.weight q4_K +blk.18.ffn_down_exps.weight q4_0 +blk.18.ffn_down_shexp.weight q4_0 blk.18.attn_v.weight q4_K +blk.19.ffn_down_exps.weight q4_0 +blk.19.ffn_down_shexp.weight q4_0 blk.19.attn_v.weight q4_K +blk.20.ffn_down_exps.weight q4_0 +blk.20.ffn_down_shexp.weight q4_0 blk.20.attn_v.weight q4_K +blk.21.ffn_down_exps.weight q4_0 +blk.21.ffn_down_shexp.weight q4_0 blk.21.attn_v.weight q4_K +blk.22.ffn_down_exps.weight q4_0 +blk.22.ffn_down_shexp.weight q4_0 blk.22.attn_v.weight q4_K +blk.23.ffn_down_exps.weight q4_0 +blk.23.ffn_down_shexp.weight q4_0 blk.23.attn_v.weight q4_K +blk.24.ffn_down_exps.weight q4_0 +blk.24.ffn_down_shexp.weight q4_0 blk.24.attn_v.weight q4_K +blk.25.ffn_down_exps.weight q4_0 +blk.25.ffn_down_shexp.weight q4_0 blk.25.attn_v.weight q4_K +blk.26.ffn_down_exps.weight q4_0 +blk.26.ffn_down_shexp.weight q4_0 blk.26.attn_v.weight q4_K +blk.27.ffn_down_exps.weight q4_0 +blk.27.ffn_down_shexp.weight q4_0 blk.27.attn_v.weight q4_K +blk.28.ffn_down_exps.weight q4_0 +blk.28.ffn_down_shexp.weight q4_0 blk.28.attn_v.weight q4_K +blk.29.ffn_down_exps.weight q4_0 +blk.29.ffn_down_shexp.weight q4_0 blk.29.attn_v.weight q4_K +blk.30.ffn_down_exps.weight q4_0 +blk.30.ffn_down_shexp.weight q4_0 blk.30.attn_v.weight q4_K +blk.31.ffn_down_exps.weight q4_0 +blk.31.ffn_down_shexp.weight q4_0 blk.31.attn_v.weight q4_K +blk.32.ffn_down_exps.weight q4_0 +blk.32.ffn_down_shexp.weight q4_0 blk.32.attn_v.weight q4_K +blk.33.ffn_down_exps.weight q4_0 +blk.33.ffn_down_shexp.weight q4_0 blk.33.attn_v.weight q4_K +blk.34.ffn_down_exps.weight q4_0 +blk.34.ffn_down_shexp.weight q4_0 blk.34.attn_v.weight q4_K +blk.35.ffn_down_exps.weight q4_0 +blk.35.ffn_down_shexp.weight q4_0 blk.35.attn_v.weight q4_K +blk.36.ffn_down_exps.weight q4_0 +blk.36.ffn_down_shexp.weight q4_0 blk.36.attn_v.weight q4_K +blk.37.ffn_down_exps.weight q4_0 +blk.37.ffn_down_shexp.weight q4_0 blk.37.attn_v.weight q4_K +blk.38.ffn_down_exps.weight q4_0 +blk.38.ffn_down_shexp.weight q4_0 blk.38.attn_v.weight q4_K +blk.39.ffn_down_exps.weight q4_0 +blk.39.ffn_down_shexp.weight q4_0 blk.39.attn_v.weight q4_K +blk.40.ffn_down_exps.weight q4_0 +blk.40.ffn_down_shexp.weight q4_0 blk.40.attn_v.weight q4_K +blk.41.ffn_down_exps.weight q4_0 +blk.41.ffn_down_shexp.weight q4_0 blk.41.attn_v.weight q4_K +blk.42.ffn_down_exps.weight q4_0 +blk.42.ffn_down_shexp.weight q4_0 blk.42.attn_v.weight q4_K +blk.43.ffn_down_exps.weight q4_0 +blk.43.ffn_down_shexp.weight q4_0 blk.43.attn_v.weight q4_K +blk.44.ffn_down_exps.weight q4_0 +blk.44.ffn_down_shexp.weight q4_0 blk.44.attn_v.weight q4_K output.weight q6_K +blk.45.ffn_down_exps.weight q4_0 +blk.45.ffn_down_shexp.weight q4_0 blk.45.attn_v.weight q4_K [IQ3_XS] iq3_s +blk.0.ffn_down.weight iq4_nl blk.0.attn_k.weight iq3_xxs blk.0.attn_q.weight iq3_xxs blk.0.attn_v.weight q4_K +blk.1.ffn_down_exps.weight iq4_nl +blk.1.ffn_down_shexp.weight iq4_nl blk.1.attn_k.weight iq3_xxs blk.1.attn_q.weight iq3_xxs blk.1.attn_v.weight q4_K +blk.2.ffn_down_exps.weight iq4_nl +blk.2.ffn_down_shexp.weight iq4_nl blk.2.attn_k.weight iq3_xxs blk.2.attn_q.weight iq3_xxs blk.2.attn_v.weight q4_K +blk.3.ffn_down_exps.weight iq4_nl +blk.3.ffn_down_shexp.weight iq4_nl blk.3.attn_k.weight iq3_xxs blk.3.attn_q.weight iq3_xxs blk.3.attn_v.weight q4_K +blk.4.ffn_down_exps.weight iq4_nl +blk.4.ffn_down_shexp.weight iq4_nl blk.4.attn_k.weight iq3_xxs blk.4.attn_q.weight iq3_xxs blk.4.attn_v.weight q4_K +blk.5.ffn_down_exps.weight iq4_nl blk.5.ffn_gate_exps.weight iq3_xxs blk.5.ffn_up_exps.weight iq3_xxs +blk.5.ffn_down_shexp.weight iq4_nl blk.5.ffn_gate_shexp.weight iq3_xxs blk.5.ffn_up_shexp.weight iq3_xxs blk.5.attn_k.weight iq3_xxs blk.5.attn_q.weight iq3_xxs blk.5.attn_v.weight q4_K +blk.6.ffn_down_exps.weight iq4_nl blk.6.ffn_gate_exps.weight iq3_xxs blk.6.ffn_up_exps.weight iq3_xxs +blk.6.ffn_down_shexp.weight iq4_nl blk.6.ffn_gate_shexp.weight iq3_xxs blk.6.ffn_up_shexp.weight iq3_xxs blk.6.attn_k.weight iq3_xxs blk.6.attn_q.weight iq3_xxs blk.6.attn_v.weight q4_K +blk.7.ffn_down_exps.weight iq4_nl blk.7.ffn_gate_exps.weight iq3_xxs blk.7.ffn_up_exps.weight iq3_xxs +blk.7.ffn_down_shexp.weight iq4_nl blk.7.ffn_gate_shexp.weight iq3_xxs blk.7.ffn_up_shexp.weight iq3_xxs blk.7.attn_k.weight iq3_xxs blk.7.attn_q.weight iq3_xxs blk.7.attn_v.weight q4_K +blk.8.ffn_down_exps.weight iq4_nl blk.8.ffn_gate_exps.weight iq3_xxs blk.8.ffn_up_exps.weight iq3_xxs +blk.8.ffn_down_shexp.weight iq4_nl blk.8.ffn_gate_shexp.weight iq3_xxs blk.8.ffn_up_shexp.weight iq3_xxs blk.8.attn_k.weight iq3_xxs blk.8.attn_q.weight iq3_xxs blk.8.attn_v.weight q4_K +blk.9.ffn_down_exps.weight iq4_nl blk.9.ffn_gate_exps.weight iq3_xxs blk.9.ffn_up_exps.weight iq3_xxs +blk.9.ffn_down_shexp.weight iq4_nl blk.9.ffn_gate_shexp.weight iq3_xxs blk.9.ffn_up_shexp.weight iq3_xxs blk.9.attn_k.weight iq3_xxs blk.9.attn_q.weight iq3_xxs blk.9.attn_v.weight q4_K +blk.10.ffn_down_exps.weight iq4_nl blk.10.ffn_gate_exps.weight iq3_xxs blk.10.ffn_up_exps.weight iq3_xxs +blk.10.ffn_down_shexp.weight iq4_nl blk.10.ffn_gate_shexp.weight iq3_xxs blk.10.ffn_up_shexp.weight iq3_xxs blk.10.attn_k.weight iq3_xxs blk.10.attn_q.weight iq3_xxs blk.10.attn_v.weight q4_K +blk.11.ffn_down_exps.weight iq4_nl blk.11.ffn_gate_exps.weight iq3_xxs blk.11.ffn_up_exps.weight iq3_xxs +blk.11.ffn_down_shexp.weight iq4_nl blk.11.ffn_gate_shexp.weight iq3_xxs blk.11.ffn_up_shexp.weight iq3_xxs blk.11.attn_k.weight iq3_xxs blk.11.attn_q.weight iq3_xxs blk.11.attn_v.weight q4_K +blk.12.ffn_down_exps.weight iq4_nl blk.12.ffn_gate_exps.weight iq3_xxs blk.12.ffn_up_exps.weight iq3_xxs +blk.12.ffn_down_shexp.weight iq4_nl blk.12.ffn_gate_shexp.weight iq3_xxs blk.12.ffn_up_shexp.weight iq3_xxs blk.12.attn_k.weight iq3_xxs blk.12.attn_q.weight iq3_xxs blk.12.attn_v.weight q4_K +blk.13.ffn_down_exps.weight iq4_nl blk.13.ffn_gate_exps.weight iq3_xxs blk.13.ffn_up_exps.weight iq3_xxs +blk.13.ffn_down_shexp.weight iq4_nl blk.13.ffn_gate_shexp.weight iq3_xxs blk.13.ffn_up_shexp.weight iq3_xxs blk.13.attn_k.weight iq3_xxs blk.13.attn_q.weight iq3_xxs blk.13.attn_v.weight q4_K +blk.14.ffn_down_exps.weight iq4_nl blk.14.ffn_gate_exps.weight iq3_xxs blk.14.ffn_up_exps.weight iq3_xxs +blk.14.ffn_down_shexp.weight iq4_nl blk.14.ffn_gate_shexp.weight iq3_xxs blk.14.ffn_up_shexp.weight iq3_xxs blk.14.attn_k.weight iq3_xxs blk.14.attn_q.weight iq3_xxs blk.14.attn_v.weight q4_K +blk.15.ffn_down_exps.weight iq4_nl blk.15.ffn_gate_exps.weight iq3_xxs blk.15.ffn_up_exps.weight iq3_xxs +blk.15.ffn_down_shexp.weight iq4_nl blk.15.ffn_gate_shexp.weight iq3_xxs blk.15.ffn_up_shexp.weight iq3_xxs blk.15.attn_k.weight iq3_xxs blk.15.attn_q.weight iq3_xxs blk.15.attn_v.weight q4_K +blk.16.ffn_down_exps.weight iq4_nl blk.16.ffn_gate_exps.weight iq3_xxs blk.16.ffn_up_exps.weight iq3_xxs +blk.16.ffn_down_shexp.weight iq4_nl blk.16.ffn_gate_shexp.weight iq3_xxs blk.16.ffn_up_shexp.weight iq3_xxs blk.16.attn_k.weight iq3_xxs blk.16.attn_q.weight iq3_xxs blk.16.attn_v.weight q4_K +blk.17.ffn_down_exps.weight iq4_nl blk.17.ffn_gate_exps.weight iq3_xxs blk.17.ffn_up_exps.weight iq3_xxs +blk.17.ffn_down_shexp.weight iq4_nl blk.17.ffn_gate_shexp.weight iq3_xxs blk.17.ffn_up_shexp.weight iq3_xxs blk.17.attn_k.weight iq3_xxs blk.17.attn_q.weight iq3_xxs blk.17.attn_v.weight q4_K +blk.18.ffn_down_exps.weight iq4_nl blk.18.ffn_gate_exps.weight iq3_xxs blk.18.ffn_up_exps.weight iq3_xxs +blk.18.ffn_down_shexp.weight iq4_nl blk.18.ffn_gate_shexp.weight iq3_xxs blk.18.ffn_up_shexp.weight iq3_xxs blk.18.attn_k.weight iq3_xxs blk.18.attn_q.weight iq3_xxs blk.18.attn_v.weight q4_K +blk.19.ffn_down_exps.weight iq4_nl blk.19.ffn_gate_exps.weight iq3_xxs blk.19.ffn_up_exps.weight iq3_xxs +blk.19.ffn_down_shexp.weight iq4_nl blk.19.ffn_gate_shexp.weight iq3_xxs blk.19.ffn_up_shexp.weight iq3_xxs blk.19.attn_k.weight iq3_xxs blk.19.attn_q.weight iq3_xxs blk.19.attn_v.weight q4_K +blk.20.ffn_down_exps.weight iq4_nl blk.20.ffn_gate_exps.weight iq3_xxs blk.20.ffn_up_exps.weight iq3_xxs +blk.20.ffn_down_shexp.weight iq4_nl blk.20.ffn_gate_shexp.weight iq3_xxs blk.20.ffn_up_shexp.weight iq3_xxs blk.20.attn_k.weight iq3_xxs blk.20.attn_q.weight iq3_xxs blk.20.attn_v.weight q4_K +blk.21.ffn_down_exps.weight iq4_nl blk.21.ffn_gate_exps.weight iq3_xxs blk.21.ffn_up_exps.weight iq3_xxs +blk.21.ffn_down_shexp.weight iq4_nl blk.21.ffn_gate_shexp.weight iq3_xxs blk.21.ffn_up_shexp.weight iq3_xxs blk.21.attn_k.weight iq3_xxs blk.21.attn_q.weight iq3_xxs blk.21.attn_v.weight q4_K +blk.22.ffn_down_exps.weight iq4_nl blk.22.ffn_gate_exps.weight iq3_xxs blk.22.ffn_up_exps.weight iq3_xxs +blk.22.ffn_down_shexp.weight iq4_nl blk.22.ffn_gate_shexp.weight iq3_xxs blk.22.ffn_up_shexp.weight iq3_xxs blk.22.attn_k.weight iq3_xxs blk.22.attn_q.weight iq3_xxs blk.22.attn_v.weight q4_K +blk.23.ffn_down_exps.weight iq4_nl blk.23.ffn_gate_exps.weight iq3_xxs blk.23.ffn_up_exps.weight iq3_xxs +blk.23.ffn_down_shexp.weight iq4_nl blk.23.ffn_gate_shexp.weight iq3_xxs blk.23.ffn_up_shexp.weight iq3_xxs blk.23.attn_k.weight iq3_xxs blk.23.attn_q.weight iq3_xxs blk.23.attn_v.weight q4_K +blk.24.ffn_down_exps.weight iq4_nl blk.24.ffn_gate_exps.weight iq3_xxs blk.24.ffn_up_exps.weight iq3_xxs +blk.24.ffn_down_shexp.weight iq4_nl blk.24.ffn_gate_shexp.weight iq3_xxs blk.24.ffn_up_shexp.weight iq3_xxs blk.24.attn_k.weight iq3_xxs blk.24.attn_q.weight iq3_xxs blk.24.attn_v.weight q4_K +blk.25.ffn_down_exps.weight iq4_nl blk.25.ffn_gate_exps.weight iq3_xxs blk.25.ffn_up_exps.weight iq3_xxs +blk.25.ffn_down_shexp.weight iq4_nl blk.25.ffn_gate_shexp.weight iq3_xxs blk.25.ffn_up_shexp.weight iq3_xxs blk.25.attn_k.weight iq3_xxs blk.25.attn_q.weight iq3_xxs blk.25.attn_v.weight q4_K +blk.26.ffn_down_exps.weight iq4_nl blk.26.ffn_gate_exps.weight iq3_xxs blk.26.ffn_up_exps.weight iq3_xxs +blk.26.ffn_down_shexp.weight iq4_nl blk.26.ffn_gate_shexp.weight iq3_xxs blk.26.ffn_up_shexp.weight iq3_xxs blk.26.attn_k.weight iq3_xxs blk.26.attn_q.weight iq3_xxs blk.26.attn_v.weight q4_K +blk.27.ffn_down_exps.weight iq4_nl blk.27.ffn_gate_exps.weight iq3_xxs blk.27.ffn_up_exps.weight iq3_xxs +blk.27.ffn_down_shexp.weight iq4_nl blk.27.ffn_gate_shexp.weight iq3_xxs blk.27.ffn_up_shexp.weight iq3_xxs blk.27.attn_k.weight iq3_xxs blk.27.attn_q.weight iq3_xxs blk.27.attn_v.weight q4_K +blk.28.ffn_down_exps.weight iq4_nl blk.28.ffn_gate_exps.weight iq3_xxs blk.28.ffn_up_exps.weight iq3_xxs +blk.28.ffn_down_shexp.weight iq4_nl blk.28.ffn_gate_shexp.weight iq3_xxs blk.28.ffn_up_shexp.weight iq3_xxs blk.28.attn_k.weight iq3_xxs blk.28.attn_q.weight iq3_xxs blk.28.attn_v.weight q4_K +blk.29.ffn_down_exps.weight iq4_nl blk.29.ffn_gate_exps.weight iq3_xxs blk.29.ffn_up_exps.weight iq3_xxs +blk.29.ffn_down_shexp.weight iq4_nl blk.29.ffn_gate_shexp.weight iq3_xxs blk.29.ffn_up_shexp.weight iq3_xxs blk.29.attn_k.weight iq3_xxs blk.29.attn_q.weight iq3_xxs blk.29.attn_v.weight q4_K +blk.30.ffn_down_exps.weight iq4_nl blk.30.ffn_gate_exps.weight iq3_xxs blk.30.ffn_up_exps.weight iq3_xxs +blk.30.ffn_down_shexp.weight iq4_nl blk.30.ffn_gate_shexp.weight iq3_xxs blk.30.ffn_up_shexp.weight iq3_xxs blk.30.attn_k.weight iq3_xxs blk.30.attn_q.weight iq3_xxs blk.30.attn_v.weight q4_K +blk.31.ffn_down_exps.weight iq4_nl blk.31.ffn_gate_exps.weight iq3_xxs blk.31.ffn_up_exps.weight iq3_xxs +blk.31.ffn_down_shexp.weight iq4_nl blk.31.ffn_gate_shexp.weight iq3_xxs blk.31.ffn_up_shexp.weight iq3_xxs blk.31.attn_k.weight iq3_xxs blk.31.attn_q.weight iq3_xxs blk.31.attn_v.weight q4_K +blk.32.ffn_down_exps.weight iq4_nl blk.32.ffn_gate_exps.weight iq3_xxs blk.32.ffn_up_exps.weight iq3_xxs +blk.32.ffn_down_shexp.weight iq4_nl blk.32.ffn_gate_shexp.weight iq3_xxs blk.32.ffn_up_shexp.weight iq3_xxs blk.32.attn_k.weight iq3_xxs blk.32.attn_q.weight iq3_xxs blk.32.attn_v.weight q4_K +blk.33.ffn_down_exps.weight iq4_nl blk.33.ffn_gate_exps.weight iq3_xxs blk.33.ffn_up_exps.weight iq3_xxs +blk.33.ffn_down_shexp.weight iq4_nl blk.33.ffn_gate_shexp.weight iq3_xxs blk.33.ffn_up_shexp.weight iq3_xxs blk.33.attn_k.weight iq3_xxs blk.33.attn_q.weight iq3_xxs blk.33.attn_v.weight q4_K +blk.34.ffn_down_exps.weight iq4_nl blk.34.ffn_gate_exps.weight iq3_xxs blk.34.ffn_up_exps.weight iq3_xxs blk.34.ffn_gate_shexp.weight iq3_xxs +blk.34.ffn_down_shexp.weight iq4_nl blk.34.ffn_up_shexp.weight iq3_xxs blk.34.attn_k.weight iq3_xxs blk.34.attn_q.weight iq3_xxs blk.34.attn_v.weight q4_K +blk.35.ffn_down_exps.weight iq4_nl blk.35.ffn_gate_exps.weight iq3_xxs blk.35.ffn_up_exps.weight iq3_xxs +blk.35.ffn_down_shexp.weight iq4_nl blk.35.ffn_gate_shexp.weight iq3_xxs blk.35.ffn_up_shexp.weight iq3_xxs blk.35.attn_k.weight iq3_xxs blk.35.attn_q.weight iq3_xxs blk.35.attn_v.weight q4_K +blk.36.ffn_down_exps.weight iq4_nl blk.36.ffn_gate_exps.weight iq3_xxs blk.36.ffn_up_exps.weight iq3_xxs +blk.36.ffn_down_shexp.weight iq4_nl blk.36.ffn_gate_shexp.weight iq3_xxs blk.36.ffn_up_shexp.weight iq3_xxs blk.36.attn_k.weight iq3_xxs blk.36.attn_q.weight iq3_xxs blk.36.attn_v.weight q4_K +blk.37.ffn_down_exps.weight iq4_nl blk.37.ffn_gate_exps.weight iq3_xxs blk.37.ffn_up_exps.weight iq3_xxs +blk.37.ffn_down_shexp.weight iq4_nl blk.37.ffn_gate_shexp.weight iq3_xxs blk.37.ffn_up_shexp.weight iq3_xxs blk.37.attn_k.weight iq3_xxs blk.37.attn_q.weight iq3_xxs blk.37.attn_v.weight q4_K +blk.38.ffn_down_exps.weight iq4_nl blk.38.ffn_gate_exps.weight iq3_xxs blk.38.ffn_up_exps.weight iq3_xxs +blk.38.ffn_down_shexp.weight iq4_nl blk.38.ffn_gate_shexp.weight iq3_xxs blk.38.ffn_up_shexp.weight iq3_xxs blk.38.attn_k.weight iq3_xxs blk.38.attn_q.weight iq3_xxs blk.38.attn_v.weight q4_K +blk.39.ffn_down_exps.weight iq4_nl blk.39.ffn_gate_exps.weight iq3_xxs blk.39.ffn_up_exps.weight iq3_xxs +blk.39.ffn_down_shexp.weight iq4_nl blk.39.ffn_gate_shexp.weight iq3_xxs blk.39.ffn_up_shexp.weight iq3_xxs blk.39.attn_k.weight iq3_xxs blk.39.attn_q.weight iq3_xxs blk.39.attn_v.weight q4_K +blk.40.ffn_down_exps.weight iq4_nl +blk.40.ffn_down_shexp.weight iq4_nl blk.40.attn_k.weight iq3_xxs blk.40.attn_q.weight iq3_xxs blk.40.attn_v.weight q4_K +blk.41.ffn_down_exps.weight iq4_nl +blk.41.ffn_down_shexp.weight iq4_nl blk.41.attn_k.weight iq3_xxs blk.41.attn_q.weight iq3_xxs blk.41.attn_v.weight q4_K +blk.42.ffn_down_exps.weight iq4_nl +blk.42.ffn_down_shexp.weight iq4_nl blk.42.attn_k.weight iq3_xxs blk.42.attn_q.weight iq3_xxs blk.42.attn_v.weight q4_K +blk.43.ffn_down_exps.weight iq4_nl +blk.43.ffn_down_shexp.weight iq4_nl blk.43.attn_k.weight iq3_xxs blk.43.attn_q.weight iq3_xxs blk.43.attn_v.weight q4_K +blk.44.ffn_down_exps.weight iq4_nl +blk.44.ffn_down_shexp.weight iq4_nl blk.44.attn_k.weight iq3_xxs blk.44.attn_q.weight iq3_xxs blk.44.attn_v.weight q4_K output.weight q6_K +blk.45.ffn_down_exps.weight iq4_nl +blk.45.ffn_down_shexp.weight iq4_nl blk.45.attn_k.weight iq3_xxs blk.45.attn_q.weight iq3_xxs blk.45.attn_v.weight q4_K [IQ3_XXS] iq3_xxs token_embd.weight iq3_s -blk.0.ffn_down.weight q4_K +blk.0.ffn_down.weight q5_0 blk.0.attn_k.weight iq2_s blk.0.attn_output.weight iq3_s blk.0.attn_q.weight iq2_s blk.0.attn_v.weight q4_K -blk.1.ffn_down_exps.weight q4_K -blk.1.ffn_down_shexp.weight q4_K +blk.1.ffn_down_exps.weight q5_0 +blk.1.ffn_down_shexp.weight q5_0 blk.1.attn_k.weight iq2_s blk.1.attn_output.weight iq3_s blk.1.attn_q.weight iq2_s blk.1.attn_v.weight q4_K -blk.2.ffn_down_exps.weight q4_K -blk.2.ffn_down_shexp.weight q4_K +blk.2.ffn_down_exps.weight q5_0 +blk.2.ffn_down_shexp.weight q5_0 blk.2.attn_k.weight iq2_s blk.2.attn_output.weight iq3_s blk.2.attn_q.weight iq2_s blk.2.attn_v.weight q4_K -blk.3.ffn_down_exps.weight q4_K -blk.3.ffn_down_shexp.weight q4_K +blk.3.ffn_down_exps.weight q5_0 +blk.3.ffn_down_shexp.weight q5_0 blk.3.attn_k.weight iq2_s blk.3.attn_output.weight iq3_s blk.3.attn_q.weight iq2_s blk.3.attn_v.weight q4_K -blk.4.ffn_down_exps.weight q4_K -blk.4.ffn_down_shexp.weight q4_K +blk.4.ffn_down_exps.weight q5_0 +blk.4.ffn_down_shexp.weight q5_0 blk.4.attn_k.weight iq2_s blk.4.attn_output.weight iq3_s blk.4.attn_q.weight iq2_s blk.4.attn_v.weight q4_K -blk.5.ffn_down_exps.weight q3_K -blk.5.ffn_down_shexp.weight q3_K +blk.5.ffn_down_exps.weight q4_0 +blk.5.ffn_down_shexp.weight q4_0 blk.5.attn_k.weight iq2_s blk.5.attn_output.weight iq3_s blk.5.attn_q.weight iq2_s blk.5.attn_v.weight q4_K -blk.6.ffn_down_exps.weight q3_K -blk.6.ffn_down_shexp.weight q3_K +blk.6.ffn_down_exps.weight q4_0 +blk.6.ffn_down_shexp.weight q4_0 blk.6.attn_k.weight iq2_s blk.6.attn_output.weight iq3_s blk.6.attn_q.weight iq2_s blk.6.attn_v.weight q4_K -blk.7.ffn_down_exps.weight q3_K -blk.7.ffn_down_shexp.weight q3_K +blk.7.ffn_down_exps.weight q4_0 +blk.7.ffn_down_shexp.weight q4_0 blk.7.attn_k.weight iq2_s blk.7.attn_output.weight iq3_s blk.7.attn_q.weight iq2_s blk.7.attn_v.weight q4_K -blk.8.ffn_down_exps.weight q3_K -blk.8.ffn_down_shexp.weight q3_K +blk.8.ffn_down_exps.weight q4_0 +blk.8.ffn_down_shexp.weight q4_0 blk.8.attn_k.weight iq2_s blk.8.attn_output.weight iq3_s blk.8.attn_q.weight iq2_s blk.8.attn_v.weight q4_K -blk.9.ffn_down_exps.weight q3_K -blk.9.ffn_down_shexp.weight q3_K +blk.9.ffn_down_exps.weight q4_0 +blk.9.ffn_down_shexp.weight q4_0 blk.9.attn_k.weight iq2_s blk.9.attn_output.weight iq3_s blk.9.attn_q.weight iq2_s blk.9.attn_v.weight q4_K -blk.10.ffn_down_exps.weight q3_K -blk.10.ffn_down_shexp.weight q3_K +blk.10.ffn_down_exps.weight q4_0 +blk.10.ffn_down_shexp.weight q4_0 blk.10.attn_k.weight iq2_s blk.10.attn_output.weight iq3_s blk.10.attn_q.weight iq2_s blk.10.attn_v.weight q4_K -blk.11.ffn_down_exps.weight q3_K -blk.11.ffn_down_shexp.weight q3_K +blk.11.ffn_down_exps.weight q4_0 +blk.11.ffn_down_shexp.weight q4_0 blk.11.attn_k.weight iq2_s blk.11.attn_output.weight iq3_s blk.11.attn_q.weight iq2_s blk.11.attn_v.weight q4_K -blk.12.ffn_down_exps.weight q3_K -blk.12.ffn_down_shexp.weight q3_K +blk.12.ffn_down_exps.weight q4_0 +blk.12.ffn_down_shexp.weight q4_0 blk.12.attn_k.weight iq2_s blk.12.attn_output.weight iq3_s blk.12.attn_q.weight iq2_s blk.12.attn_v.weight q4_K -blk.13.ffn_down_exps.weight q3_K -blk.13.ffn_down_shexp.weight q3_K +blk.13.ffn_down_exps.weight q4_0 +blk.13.ffn_down_shexp.weight q4_0 blk.13.attn_k.weight iq2_s blk.13.attn_output.weight iq3_s blk.13.attn_q.weight iq2_s blk.13.attn_v.weight q4_K -blk.14.ffn_down_exps.weight q3_K -blk.14.ffn_down_shexp.weight q3_K +blk.14.ffn_down_exps.weight q4_0 +blk.14.ffn_down_shexp.weight q4_0 blk.14.attn_k.weight iq2_s blk.14.attn_output.weight iq3_s blk.14.attn_q.weight iq2_s blk.14.attn_v.weight q4_K -blk.15.ffn_down_exps.weight q3_K -blk.15.ffn_down_shexp.weight q3_K +blk.15.ffn_down_exps.weight q4_0 +blk.15.ffn_down_shexp.weight q4_0 blk.15.attn_k.weight iq2_s blk.15.attn_output.weight iq3_s blk.15.attn_q.weight iq2_s blk.15.attn_v.weight q4_K -blk.16.ffn_down_exps.weight q3_K -blk.16.ffn_down_shexp.weight q3_K +blk.16.ffn_down_exps.weight q4_0 +blk.16.ffn_down_shexp.weight q4_0 blk.16.attn_k.weight iq2_s blk.16.attn_output.weight iq3_s blk.16.attn_q.weight iq2_s blk.16.attn_v.weight q4_K -blk.17.ffn_down_exps.weight q3_K -blk.17.ffn_down_shexp.weight q3_K +blk.17.ffn_down_exps.weight q4_0 +blk.17.ffn_down_shexp.weight q4_0 blk.17.attn_k.weight iq2_s blk.17.attn_output.weight iq3_s blk.17.attn_q.weight iq2_s blk.17.attn_v.weight q4_K -blk.18.ffn_down_exps.weight q3_K -blk.18.ffn_down_shexp.weight q3_K +blk.18.ffn_down_exps.weight q4_0 +blk.18.ffn_down_shexp.weight q4_0 blk.18.attn_k.weight iq2_s blk.18.attn_output.weight iq3_s blk.18.attn_q.weight iq2_s blk.18.attn_v.weight q4_K -blk.19.ffn_down_exps.weight q3_K -blk.19.ffn_down_shexp.weight q3_K +blk.19.ffn_down_exps.weight q4_0 +blk.19.ffn_down_shexp.weight q4_0 blk.19.attn_k.weight iq2_s blk.19.attn_output.weight iq3_s blk.19.attn_q.weight iq2_s blk.19.attn_v.weight q4_K -blk.20.ffn_down_exps.weight q3_K -blk.20.ffn_down_shexp.weight q3_K +blk.20.ffn_down_exps.weight q4_0 +blk.20.ffn_down_shexp.weight q4_0 blk.20.attn_k.weight iq2_s blk.20.attn_output.weight iq3_s blk.20.attn_q.weight iq2_s blk.20.attn_v.weight q4_K -blk.21.ffn_down_exps.weight q3_K -blk.21.ffn_down_shexp.weight q3_K +blk.21.ffn_down_exps.weight q4_0 +blk.21.ffn_down_shexp.weight q4_0 blk.21.attn_k.weight iq2_s blk.21.attn_output.weight iq3_s blk.21.attn_q.weight iq2_s blk.21.attn_v.weight q4_K -blk.22.ffn_down_exps.weight q3_K -blk.22.ffn_down_shexp.weight q3_K +blk.22.ffn_down_exps.weight q4_0 +blk.22.ffn_down_shexp.weight q4_0 blk.22.attn_k.weight iq2_s blk.22.attn_output.weight iq3_s blk.22.attn_q.weight iq2_s blk.22.attn_v.weight q4_K -blk.23.ffn_down_exps.weight q3_K -blk.23.ffn_down_shexp.weight q3_K +blk.23.ffn_down_exps.weight q4_0 +blk.23.ffn_down_shexp.weight q4_0 blk.23.attn_k.weight iq2_s blk.23.attn_output.weight iq3_s blk.23.attn_q.weight iq2_s blk.23.attn_v.weight q4_K -blk.24.ffn_down_exps.weight q3_K -blk.24.ffn_down_shexp.weight q3_K +blk.24.ffn_down_exps.weight q4_0 +blk.24.ffn_down_shexp.weight q4_0 blk.24.attn_k.weight iq2_s blk.24.attn_output.weight iq3_s blk.24.attn_q.weight iq2_s blk.24.attn_v.weight q4_K -blk.25.ffn_down_exps.weight q3_K -blk.25.ffn_down_shexp.weight q3_K +blk.25.ffn_down_exps.weight q4_0 +blk.25.ffn_down_shexp.weight q4_0 blk.25.attn_k.weight iq2_s blk.25.attn_output.weight iq3_s blk.25.attn_q.weight iq2_s blk.25.attn_v.weight q4_K -blk.26.ffn_down_exps.weight q3_K -blk.26.ffn_down_shexp.weight q3_K +blk.26.ffn_down_exps.weight q4_0 +blk.26.ffn_down_shexp.weight q4_0 blk.26.attn_k.weight iq2_s blk.26.attn_output.weight iq3_s blk.26.attn_q.weight iq2_s blk.26.attn_v.weight q4_K -blk.27.ffn_down_exps.weight q3_K -blk.27.ffn_down_shexp.weight q3_K +blk.27.ffn_down_exps.weight q4_0 +blk.27.ffn_down_shexp.weight q4_0 blk.27.attn_k.weight iq2_s blk.27.attn_output.weight iq3_s blk.27.attn_q.weight iq2_s blk.27.attn_v.weight q4_K -blk.28.ffn_down_exps.weight q3_K -blk.28.ffn_down_shexp.weight q3_K +blk.28.ffn_down_exps.weight q4_0 +blk.28.ffn_down_shexp.weight q4_0 blk.28.attn_k.weight iq2_s blk.28.attn_output.weight iq3_s blk.28.attn_q.weight iq2_s blk.28.attn_v.weight q4_K -blk.29.ffn_down_exps.weight q3_K -blk.29.ffn_down_shexp.weight q3_K +blk.29.ffn_down_exps.weight q4_0 +blk.29.ffn_down_shexp.weight q4_0 blk.29.attn_k.weight iq2_s blk.29.attn_output.weight iq3_s blk.29.attn_q.weight iq2_s blk.29.attn_v.weight q4_K -blk.30.ffn_down_exps.weight q3_K -blk.30.ffn_down_shexp.weight q3_K +blk.30.ffn_down_exps.weight q4_0 +blk.30.ffn_down_shexp.weight q4_0 blk.30.attn_k.weight iq2_s blk.30.attn_output.weight iq3_s blk.30.attn_q.weight iq2_s blk.30.attn_v.weight q4_K -blk.31.ffn_down_exps.weight q3_K -blk.31.ffn_down_shexp.weight q3_K +blk.31.ffn_down_exps.weight q4_0 +blk.31.ffn_down_shexp.weight q4_0 blk.31.attn_k.weight iq2_s blk.31.attn_output.weight iq3_s blk.31.attn_q.weight iq2_s blk.31.attn_v.weight q4_K -blk.32.ffn_down_exps.weight q3_K -blk.32.ffn_down_shexp.weight q3_K +blk.32.ffn_down_exps.weight q4_0 +blk.32.ffn_down_shexp.weight q4_0 blk.32.attn_k.weight iq2_s blk.32.attn_output.weight iq3_s blk.32.attn_q.weight iq2_s blk.32.attn_v.weight q4_K -blk.33.ffn_down_exps.weight q3_K -blk.33.ffn_down_shexp.weight q3_K +blk.33.ffn_down_exps.weight q4_0 +blk.33.ffn_down_shexp.weight q4_0 blk.33.attn_k.weight iq2_s blk.33.attn_output.weight iq3_s blk.33.attn_q.weight iq2_s blk.33.attn_v.weight q4_K -blk.34.ffn_down_exps.weight q3_K +blk.34.ffn_down_exps.weight q4_0 blk.34.attn_output.weight iq3_s -blk.34.ffn_down_shexp.weight q3_K +blk.34.ffn_down_shexp.weight q4_0 blk.34.attn_k.weight iq2_s blk.34.attn_q.weight iq2_s blk.34.attn_v.weight q4_K -blk.35.ffn_down_exps.weight q3_K -blk.35.ffn_down_shexp.weight q3_K +blk.35.ffn_down_exps.weight q4_0 +blk.35.ffn_down_shexp.weight q4_0 blk.35.attn_k.weight iq2_s blk.35.attn_output.weight iq3_s blk.35.attn_q.weight iq2_s blk.35.attn_v.weight q4_K -blk.36.ffn_down_exps.weight q3_K -blk.36.ffn_down_shexp.weight q3_K +blk.36.ffn_down_exps.weight q4_0 +blk.36.ffn_down_shexp.weight q4_0 blk.36.attn_k.weight iq2_s blk.36.attn_output.weight iq3_s blk.36.attn_q.weight iq2_s blk.36.attn_v.weight q4_K -blk.37.ffn_down_exps.weight q3_K -blk.37.ffn_down_shexp.weight q3_K +blk.37.ffn_down_exps.weight q4_0 +blk.37.ffn_down_shexp.weight q4_0 blk.37.attn_k.weight iq2_s blk.37.attn_output.weight iq3_s blk.37.attn_q.weight iq2_s blk.37.attn_v.weight q4_K -blk.38.ffn_down_exps.weight q3_K -blk.38.ffn_down_shexp.weight q3_K +blk.38.ffn_down_exps.weight q4_0 +blk.38.ffn_down_shexp.weight q4_0 blk.38.attn_k.weight iq2_s blk.38.attn_output.weight iq3_s blk.38.attn_q.weight iq2_s blk.38.attn_v.weight q4_K -blk.39.ffn_down_exps.weight q3_K -blk.39.ffn_down_shexp.weight q3_K +blk.39.ffn_down_exps.weight q4_0 +blk.39.ffn_down_shexp.weight q4_0 blk.39.attn_k.weight iq2_s blk.39.attn_output.weight iq3_s blk.39.attn_q.weight iq2_s blk.39.attn_v.weight q4_K -blk.40.ffn_down_exps.weight q3_K -blk.40.ffn_down_shexp.weight q3_K +blk.40.ffn_down_exps.weight q4_0 +blk.40.ffn_down_shexp.weight q4_0 blk.40.attn_k.weight iq2_s blk.40.attn_output.weight iq3_s blk.40.attn_q.weight iq2_s blk.40.attn_v.weight q4_K -blk.41.ffn_down_exps.weight q3_K -blk.41.ffn_down_shexp.weight q3_K +blk.41.ffn_down_exps.weight q4_0 +blk.41.ffn_down_shexp.weight q4_0 blk.41.attn_k.weight iq2_s blk.41.attn_output.weight iq3_s blk.41.attn_q.weight iq2_s blk.41.attn_v.weight q4_K -blk.42.ffn_down_exps.weight q3_K -blk.42.ffn_down_shexp.weight q3_K +blk.42.ffn_down_exps.weight q4_0 +blk.42.ffn_down_shexp.weight q4_0 blk.42.attn_k.weight iq2_s blk.42.attn_q.weight iq2_s blk.42.attn_v.weight q4_K blk.42.attn_output.weight iq3_s -blk.43.ffn_down_exps.weight q3_K -blk.43.ffn_down_shexp.weight q3_K +blk.43.ffn_down_exps.weight q4_0 +blk.43.ffn_down_shexp.weight q4_0 blk.43.attn_k.weight iq2_s blk.43.attn_output.weight iq3_s blk.43.attn_q.weight iq2_s blk.43.attn_v.weight q4_K -blk.44.ffn_down_exps.weight q3_K -blk.44.ffn_down_shexp.weight q3_K +blk.44.ffn_down_exps.weight q4_0 +blk.44.ffn_down_shexp.weight q4_0 blk.44.attn_k.weight iq2_s blk.44.attn_output.weight iq3_s blk.44.attn_q.weight iq2_s blk.44.attn_v.weight q4_K output.weight q5_K -blk.45.ffn_down_exps.weight q3_K -blk.45.ffn_down_shexp.weight q3_K +blk.45.ffn_down_exps.weight q4_0 +blk.45.ffn_down_shexp.weight q4_0 blk.45.attn_k.weight iq2_s blk.45.attn_output.weight iq3_s blk.45.attn_q.weight iq2_s @@ -1469,119 +2263,205 @@ blk.45.attn_v.weight q4_K [IQ1_S] iq1_s token_embd.weight q2_K -blk.0.ffn_down.weight q2_K +blk.0.ffn_down.weight q4_0 blk.0.attn_output.weight iq2_xxs blk.0.attn_v.weight q4_K -blk.1.ffn_down_exps.weight q2_K -blk.1.ffn_down_shexp.weight q2_K +blk.1.ffn_down_exps.weight q4_0 +blk.1.ffn_down_shexp.weight q4_0 blk.1.attn_output.weight iq2_xxs blk.1.attn_v.weight q4_K -blk.2.ffn_down_exps.weight q2_K -blk.2.ffn_down_shexp.weight q2_K +blk.2.ffn_down_exps.weight q4_0 +blk.2.ffn_down_shexp.weight q4_0 blk.2.attn_output.weight iq2_xxs blk.2.attn_v.weight q4_K +blk.3.ffn_down_exps.weight iq4_nl +blk.3.ffn_down_shexp.weight iq4_nl blk.3.attn_output.weight iq2_xxs blk.3.attn_v.weight q4_K +blk.4.ffn_down_exps.weight iq4_nl +blk.4.ffn_down_shexp.weight iq4_nl blk.4.attn_output.weight iq2_xxs blk.4.attn_v.weight q4_K +blk.5.ffn_down_exps.weight iq4_nl +blk.5.ffn_down_shexp.weight iq4_nl blk.5.attn_output.weight iq2_xxs blk.5.attn_v.weight q4_K +blk.6.ffn_down_exps.weight iq4_nl +blk.6.ffn_down_shexp.weight iq4_nl blk.6.attn_output.weight iq2_xxs blk.6.attn_v.weight q4_K +blk.7.ffn_down_exps.weight iq4_nl +blk.7.ffn_down_shexp.weight iq4_nl blk.7.attn_output.weight iq2_xxs blk.7.attn_v.weight q4_K +blk.8.ffn_down_exps.weight iq4_nl +blk.8.ffn_down_shexp.weight iq4_nl blk.8.attn_output.weight iq2_xxs blk.8.attn_v.weight q4_K +blk.9.ffn_down_exps.weight iq4_nl +blk.9.ffn_down_shexp.weight iq4_nl blk.9.attn_output.weight iq2_xxs blk.9.attn_v.weight q4_K +blk.10.ffn_down_exps.weight iq4_nl +blk.10.ffn_down_shexp.weight iq4_nl blk.10.attn_output.weight iq2_xxs blk.10.attn_v.weight q4_K +blk.11.ffn_down_exps.weight iq4_nl +blk.11.ffn_down_shexp.weight iq4_nl blk.11.attn_output.weight iq2_xxs blk.11.attn_v.weight q4_K +blk.12.ffn_down_exps.weight iq4_nl +blk.12.ffn_down_shexp.weight iq4_nl blk.12.attn_output.weight iq2_xxs blk.12.attn_v.weight q4_K +blk.13.ffn_down_exps.weight iq4_nl +blk.13.ffn_down_shexp.weight iq4_nl blk.13.attn_output.weight iq2_xxs blk.13.attn_v.weight q4_K +blk.14.ffn_down_exps.weight iq4_nl +blk.14.ffn_down_shexp.weight iq4_nl blk.14.attn_output.weight iq2_xxs blk.14.attn_v.weight q4_K +blk.15.ffn_down_exps.weight iq4_nl +blk.15.ffn_down_shexp.weight iq4_nl blk.15.attn_output.weight iq2_xxs blk.15.attn_v.weight q4_K +blk.16.ffn_down_exps.weight iq4_nl +blk.16.ffn_down_shexp.weight iq4_nl blk.16.attn_output.weight iq2_xxs blk.16.attn_v.weight q4_K +blk.17.ffn_down_exps.weight iq4_nl +blk.17.ffn_down_shexp.weight iq4_nl blk.17.attn_output.weight iq2_xxs blk.17.attn_v.weight q4_K +blk.18.ffn_down_exps.weight iq4_nl +blk.18.ffn_down_shexp.weight iq4_nl blk.18.attn_output.weight iq2_xxs blk.18.attn_v.weight q4_K +blk.19.ffn_down_exps.weight iq4_nl +blk.19.ffn_down_shexp.weight iq4_nl blk.19.attn_output.weight iq2_xxs blk.19.attn_v.weight q4_K +blk.20.ffn_down_exps.weight iq4_nl +blk.20.ffn_down_shexp.weight iq4_nl blk.20.attn_output.weight iq2_xxs blk.20.attn_v.weight q4_K +blk.21.ffn_down_exps.weight iq4_nl +blk.21.ffn_down_shexp.weight iq4_nl blk.21.attn_output.weight iq2_xxs blk.21.attn_v.weight q4_K +blk.22.ffn_down_exps.weight iq4_nl +blk.22.ffn_down_shexp.weight iq4_nl blk.22.attn_output.weight iq2_xxs blk.22.attn_v.weight q4_K +blk.23.ffn_down_exps.weight iq4_nl +blk.23.ffn_down_shexp.weight iq4_nl blk.23.attn_output.weight iq2_xxs blk.23.attn_v.weight q4_K +blk.24.ffn_down_exps.weight iq4_nl +blk.24.ffn_down_shexp.weight iq4_nl blk.24.attn_output.weight iq2_xxs blk.24.attn_v.weight q4_K +blk.25.ffn_down_exps.weight iq4_nl +blk.25.ffn_down_shexp.weight iq4_nl blk.25.attn_output.weight iq2_xxs blk.25.attn_v.weight q4_K +blk.26.ffn_down_exps.weight iq4_nl +blk.26.ffn_down_shexp.weight iq4_nl blk.26.attn_output.weight iq2_xxs blk.26.attn_v.weight q4_K +blk.27.ffn_down_exps.weight iq4_nl +blk.27.ffn_down_shexp.weight iq4_nl blk.27.attn_output.weight iq2_xxs blk.27.attn_v.weight q4_K +blk.28.ffn_down_exps.weight iq4_nl +blk.28.ffn_down_shexp.weight iq4_nl blk.28.attn_output.weight iq2_xxs blk.28.attn_v.weight q4_K +blk.29.ffn_down_exps.weight iq4_nl +blk.29.ffn_down_shexp.weight iq4_nl blk.29.attn_output.weight iq2_xxs blk.29.attn_v.weight q4_K +blk.30.ffn_down_exps.weight iq4_nl +blk.30.ffn_down_shexp.weight iq4_nl blk.30.attn_output.weight iq2_xxs blk.30.attn_v.weight q4_K +blk.31.ffn_down_exps.weight iq4_nl +blk.31.ffn_down_shexp.weight iq4_nl blk.31.attn_output.weight iq2_xxs blk.31.attn_v.weight q4_K +blk.32.ffn_down_exps.weight iq4_nl +blk.32.ffn_down_shexp.weight iq4_nl blk.32.attn_output.weight iq2_xxs blk.32.attn_v.weight q4_K +blk.33.ffn_down_exps.weight iq4_nl +blk.33.ffn_down_shexp.weight iq4_nl blk.33.attn_output.weight iq2_xxs blk.33.attn_v.weight q4_K +blk.34.ffn_down_exps.weight iq4_nl blk.34.attn_output.weight iq2_xxs +blk.34.ffn_down_shexp.weight iq4_nl blk.34.attn_v.weight q4_K +blk.35.ffn_down_exps.weight iq4_nl +blk.35.ffn_down_shexp.weight iq4_nl blk.35.attn_output.weight iq2_xxs blk.35.attn_v.weight q4_K +blk.36.ffn_down_exps.weight iq4_nl +blk.36.ffn_down_shexp.weight iq4_nl blk.36.attn_output.weight iq2_xxs blk.36.attn_v.weight q4_K +blk.37.ffn_down_exps.weight iq4_nl +blk.37.ffn_down_shexp.weight iq4_nl blk.37.attn_output.weight iq2_xxs blk.37.attn_v.weight q4_K +blk.38.ffn_down_exps.weight iq4_nl +blk.38.ffn_down_shexp.weight iq4_nl blk.38.attn_output.weight iq2_xxs blk.38.attn_v.weight q4_K +blk.39.ffn_down_exps.weight iq4_nl +blk.39.ffn_down_shexp.weight iq4_nl blk.39.attn_output.weight iq2_xxs blk.39.attn_v.weight q4_K +blk.40.ffn_down_exps.weight iq4_nl +blk.40.ffn_down_shexp.weight iq4_nl blk.40.attn_output.weight iq2_xxs blk.40.attn_v.weight q4_K +blk.41.ffn_down_exps.weight iq4_nl +blk.41.ffn_down_shexp.weight iq4_nl blk.41.attn_output.weight iq2_xxs blk.41.attn_v.weight q4_K +blk.42.ffn_down_exps.weight iq4_nl +blk.42.ffn_down_shexp.weight iq4_nl blk.42.attn_v.weight q4_K blk.42.attn_output.weight iq2_xxs +blk.43.ffn_down_exps.weight iq4_nl +blk.43.ffn_down_shexp.weight iq4_nl blk.43.attn_output.weight iq2_xxs blk.43.attn_v.weight q4_K +blk.44.ffn_down_exps.weight iq4_nl +blk.44.ffn_down_shexp.weight iq4_nl blk.44.attn_output.weight iq2_xxs blk.44.attn_v.weight q4_K output.weight q5_K +blk.45.ffn_down_exps.weight iq4_nl +blk.45.ffn_down_shexp.weight iq4_nl blk.45.attn_output.weight iq2_xxs blk.45.attn_v.weight q4_K [IQ4_NL] iq4_nl -blk.0.ffn_down.weight q5_K +blk.0.ffn_down.weight q5_1 blk.0.attn_v.weight q5_K -blk.1.ffn_down_exps.weight q5_K -blk.1.ffn_down_shexp.weight q5_K +blk.1.ffn_down_exps.weight q5_1 +blk.1.ffn_down_shexp.weight q5_1 blk.1.attn_v.weight q5_K -blk.2.ffn_down_exps.weight q5_K -blk.2.ffn_down_shexp.weight q5_K +blk.2.ffn_down_exps.weight q5_1 +blk.2.ffn_down_shexp.weight q5_1 blk.2.attn_v.weight q5_K -blk.3.ffn_down_exps.weight q5_K -blk.3.ffn_down_shexp.weight q5_K +blk.3.ffn_down_exps.weight q5_1 +blk.3.ffn_down_shexp.weight q5_1 blk.3.attn_v.weight q5_K -blk.4.ffn_down_exps.weight q5_K -blk.4.ffn_down_shexp.weight q5_K +blk.4.ffn_down_exps.weight q5_1 +blk.4.ffn_down_shexp.weight q5_1 blk.4.attn_v.weight q5_K blk.5.attn_v.weight q5_K blk.6.attn_v.weight q5_K @@ -1627,529 +2507,1223 @@ output.weight q6_K blk.45.attn_v.weight q5_K [IQ3_S] iq3_s +blk.0.ffn_down.weight iq4_nl blk.0.attn_v.weight q4_K +blk.1.ffn_down_exps.weight iq4_nl +blk.1.ffn_down_shexp.weight iq4_nl blk.1.attn_v.weight q4_K +blk.2.ffn_down_exps.weight iq4_nl +blk.2.ffn_down_shexp.weight iq4_nl blk.2.attn_v.weight q4_K +blk.3.ffn_down_exps.weight iq4_nl +blk.3.ffn_down_shexp.weight iq4_nl blk.3.attn_v.weight q4_K +blk.4.ffn_down_exps.weight iq4_nl +blk.4.ffn_down_shexp.weight iq4_nl blk.4.attn_v.weight q4_K +blk.5.ffn_down_exps.weight iq4_nl +blk.5.ffn_down_shexp.weight iq4_nl blk.5.attn_v.weight q4_K +blk.6.ffn_down_exps.weight iq4_nl +blk.6.ffn_down_shexp.weight iq4_nl blk.6.attn_v.weight q4_K +blk.7.ffn_down_exps.weight iq4_nl +blk.7.ffn_down_shexp.weight iq4_nl blk.7.attn_v.weight q4_K +blk.8.ffn_down_exps.weight iq4_nl +blk.8.ffn_down_shexp.weight iq4_nl blk.8.attn_v.weight q4_K +blk.9.ffn_down_exps.weight iq4_nl +blk.9.ffn_down_shexp.weight iq4_nl blk.9.attn_v.weight q4_K +blk.10.ffn_down_exps.weight iq4_nl +blk.10.ffn_down_shexp.weight iq4_nl blk.10.attn_v.weight q4_K +blk.11.ffn_down_exps.weight iq4_nl +blk.11.ffn_down_shexp.weight iq4_nl blk.11.attn_v.weight q4_K +blk.12.ffn_down_exps.weight iq4_nl +blk.12.ffn_down_shexp.weight iq4_nl blk.12.attn_v.weight q4_K +blk.13.ffn_down_exps.weight iq4_nl +blk.13.ffn_down_shexp.weight iq4_nl blk.13.attn_v.weight q4_K +blk.14.ffn_down_exps.weight iq4_nl +blk.14.ffn_down_shexp.weight iq4_nl blk.14.attn_v.weight q4_K +blk.15.ffn_down_exps.weight iq4_nl +blk.15.ffn_down_shexp.weight iq4_nl blk.15.attn_v.weight q4_K +blk.16.ffn_down_exps.weight iq4_nl +blk.16.ffn_down_shexp.weight iq4_nl blk.16.attn_v.weight q4_K +blk.17.ffn_down_exps.weight iq4_nl +blk.17.ffn_down_shexp.weight iq4_nl blk.17.attn_v.weight q4_K +blk.18.ffn_down_exps.weight iq4_nl +blk.18.ffn_down_shexp.weight iq4_nl blk.18.attn_v.weight q4_K +blk.19.ffn_down_exps.weight iq4_nl +blk.19.ffn_down_shexp.weight iq4_nl blk.19.attn_v.weight q4_K +blk.20.ffn_down_exps.weight iq4_nl +blk.20.ffn_down_shexp.weight iq4_nl blk.20.attn_v.weight q4_K +blk.21.ffn_down_exps.weight iq4_nl +blk.21.ffn_down_shexp.weight iq4_nl blk.21.attn_v.weight q4_K +blk.22.ffn_down_exps.weight iq4_nl +blk.22.ffn_down_shexp.weight iq4_nl blk.22.attn_v.weight q4_K +blk.23.ffn_down_exps.weight iq4_nl +blk.23.ffn_down_shexp.weight iq4_nl blk.23.attn_v.weight q4_K +blk.24.ffn_down_exps.weight iq4_nl +blk.24.ffn_down_shexp.weight iq4_nl blk.24.attn_v.weight q4_K +blk.25.ffn_down_exps.weight iq4_nl +blk.25.ffn_down_shexp.weight iq4_nl blk.25.attn_v.weight q4_K +blk.26.ffn_down_exps.weight iq4_nl +blk.26.ffn_down_shexp.weight iq4_nl blk.26.attn_v.weight q4_K +blk.27.ffn_down_exps.weight iq4_nl +blk.27.ffn_down_shexp.weight iq4_nl blk.27.attn_v.weight q4_K +blk.28.ffn_down_exps.weight iq4_nl +blk.28.ffn_down_shexp.weight iq4_nl blk.28.attn_v.weight q4_K +blk.29.ffn_down_exps.weight iq4_nl +blk.29.ffn_down_shexp.weight iq4_nl blk.29.attn_v.weight q4_K +blk.30.ffn_down_exps.weight iq4_nl +blk.30.ffn_down_shexp.weight iq4_nl blk.30.attn_v.weight q4_K +blk.31.ffn_down_exps.weight iq4_nl +blk.31.ffn_down_shexp.weight iq4_nl blk.31.attn_v.weight q4_K +blk.32.ffn_down_exps.weight iq4_nl +blk.32.ffn_down_shexp.weight iq4_nl blk.32.attn_v.weight q4_K +blk.33.ffn_down_exps.weight iq4_nl +blk.33.ffn_down_shexp.weight iq4_nl blk.33.attn_v.weight q4_K +blk.34.ffn_down_exps.weight iq4_nl +blk.34.ffn_down_shexp.weight iq4_nl blk.34.attn_v.weight q4_K +blk.35.ffn_down_exps.weight iq4_nl +blk.35.ffn_down_shexp.weight iq4_nl blk.35.attn_v.weight q4_K +blk.36.ffn_down_exps.weight iq4_nl +blk.36.ffn_down_shexp.weight iq4_nl blk.36.attn_v.weight q4_K +blk.37.ffn_down_exps.weight iq4_nl +blk.37.ffn_down_shexp.weight iq4_nl blk.37.attn_v.weight q4_K +blk.38.ffn_down_exps.weight iq4_nl +blk.38.ffn_down_shexp.weight iq4_nl blk.38.attn_v.weight q4_K +blk.39.ffn_down_exps.weight iq4_nl +blk.39.ffn_down_shexp.weight iq4_nl blk.39.attn_v.weight q4_K +blk.40.ffn_down_exps.weight iq4_nl +blk.40.ffn_down_shexp.weight iq4_nl blk.40.attn_v.weight q4_K +blk.41.ffn_down_exps.weight iq4_nl +blk.41.ffn_down_shexp.weight iq4_nl blk.41.attn_v.weight q4_K +blk.42.ffn_down_exps.weight iq4_nl +blk.42.ffn_down_shexp.weight iq4_nl blk.42.attn_v.weight q4_K +blk.43.ffn_down_exps.weight iq4_nl +blk.43.ffn_down_shexp.weight iq4_nl blk.43.attn_v.weight q4_K +blk.44.ffn_down_exps.weight iq4_nl +blk.44.ffn_down_shexp.weight iq4_nl blk.44.attn_v.weight q4_K output.weight q6_K +blk.45.ffn_down_exps.weight iq4_nl +blk.45.ffn_down_shexp.weight iq4_nl blk.45.attn_v.weight q4_K [IQ3_M] iq3_s -blk.0.ffn_down.weight q4_K +blk.0.ffn_down.weight q5_0 blk.0.attn_output.weight q4_K blk.0.attn_v.weight q4_K -blk.1.ffn_down_exps.weight q4_K -blk.1.ffn_down_shexp.weight q4_K +blk.1.ffn_down_exps.weight q5_0 +blk.1.ffn_down_shexp.weight q5_0 blk.1.attn_output.weight q4_K blk.1.attn_v.weight q4_K -blk.2.ffn_down_exps.weight q4_K -blk.2.ffn_down_shexp.weight q4_K +blk.2.ffn_down_exps.weight q5_0 +blk.2.ffn_down_shexp.weight q5_0 blk.2.attn_output.weight q4_K blk.2.attn_v.weight q4_K -blk.3.ffn_down_exps.weight q4_K -blk.3.ffn_down_shexp.weight q4_K +blk.3.ffn_down_exps.weight q5_0 +blk.3.ffn_down_shexp.weight q5_0 blk.3.attn_output.weight q4_K blk.3.attn_v.weight q4_K -blk.4.ffn_down_exps.weight q4_K -blk.4.ffn_down_shexp.weight q4_K +blk.4.ffn_down_exps.weight q5_0 +blk.4.ffn_down_shexp.weight q5_0 blk.4.attn_output.weight q4_K blk.4.attn_v.weight q4_K +blk.5.ffn_down_exps.weight iq4_nl +blk.5.ffn_down_shexp.weight iq4_nl blk.5.attn_output.weight q4_K blk.5.attn_v.weight q4_K +blk.6.ffn_down_exps.weight iq4_nl +blk.6.ffn_down_shexp.weight iq4_nl blk.6.attn_output.weight q4_K blk.6.attn_v.weight q4_K +blk.7.ffn_down_exps.weight iq4_nl +blk.7.ffn_down_shexp.weight iq4_nl blk.7.attn_output.weight q4_K blk.7.attn_v.weight q4_K +blk.8.ffn_down_exps.weight iq4_nl +blk.8.ffn_down_shexp.weight iq4_nl blk.8.attn_output.weight q4_K blk.8.attn_v.weight q4_K +blk.9.ffn_down_exps.weight iq4_nl +blk.9.ffn_down_shexp.weight iq4_nl blk.9.attn_output.weight q4_K blk.9.attn_v.weight q4_K +blk.10.ffn_down_exps.weight iq4_nl +blk.10.ffn_down_shexp.weight iq4_nl blk.10.attn_output.weight q4_K blk.10.attn_v.weight q4_K +blk.11.ffn_down_exps.weight iq4_nl +blk.11.ffn_down_shexp.weight iq4_nl blk.11.attn_output.weight q4_K blk.11.attn_v.weight q4_K +blk.12.ffn_down_exps.weight iq4_nl +blk.12.ffn_down_shexp.weight iq4_nl blk.12.attn_output.weight q4_K blk.12.attn_v.weight q4_K +blk.13.ffn_down_exps.weight iq4_nl +blk.13.ffn_down_shexp.weight iq4_nl blk.13.attn_output.weight q4_K blk.13.attn_v.weight q4_K +blk.14.ffn_down_exps.weight iq4_nl +blk.14.ffn_down_shexp.weight iq4_nl blk.14.attn_output.weight q4_K blk.14.attn_v.weight q4_K +blk.15.ffn_down_exps.weight iq4_nl +blk.15.ffn_down_shexp.weight iq4_nl blk.15.attn_output.weight q4_K blk.15.attn_v.weight q4_K +blk.16.ffn_down_exps.weight iq4_nl +blk.16.ffn_down_shexp.weight iq4_nl blk.16.attn_output.weight q4_K blk.16.attn_v.weight q4_K +blk.17.ffn_down_exps.weight iq4_nl +blk.17.ffn_down_shexp.weight iq4_nl blk.17.attn_output.weight q4_K blk.17.attn_v.weight q4_K +blk.18.ffn_down_exps.weight iq4_nl +blk.18.ffn_down_shexp.weight iq4_nl blk.18.attn_output.weight q4_K blk.18.attn_v.weight q4_K +blk.19.ffn_down_exps.weight iq4_nl +blk.19.ffn_down_shexp.weight iq4_nl blk.19.attn_output.weight q4_K blk.19.attn_v.weight q4_K +blk.20.ffn_down_exps.weight iq4_nl +blk.20.ffn_down_shexp.weight iq4_nl blk.20.attn_output.weight q4_K blk.20.attn_v.weight q4_K +blk.21.ffn_down_exps.weight iq4_nl +blk.21.ffn_down_shexp.weight iq4_nl blk.21.attn_output.weight q4_K blk.21.attn_v.weight q4_K +blk.22.ffn_down_exps.weight iq4_nl +blk.22.ffn_down_shexp.weight iq4_nl blk.22.attn_output.weight q4_K blk.22.attn_v.weight q4_K +blk.23.ffn_down_exps.weight iq4_nl +blk.23.ffn_down_shexp.weight iq4_nl blk.23.attn_output.weight q4_K blk.23.attn_v.weight q4_K +blk.24.ffn_down_exps.weight iq4_nl +blk.24.ffn_down_shexp.weight iq4_nl blk.24.attn_output.weight q4_K blk.24.attn_v.weight q4_K +blk.25.ffn_down_exps.weight iq4_nl +blk.25.ffn_down_shexp.weight iq4_nl blk.25.attn_output.weight q4_K blk.25.attn_v.weight q4_K +blk.26.ffn_down_exps.weight iq4_nl +blk.26.ffn_down_shexp.weight iq4_nl blk.26.attn_output.weight q4_K blk.26.attn_v.weight q4_K +blk.27.ffn_down_exps.weight iq4_nl +blk.27.ffn_down_shexp.weight iq4_nl blk.27.attn_output.weight q4_K blk.27.attn_v.weight q4_K +blk.28.ffn_down_exps.weight iq4_nl +blk.28.ffn_down_shexp.weight iq4_nl blk.28.attn_output.weight q4_K blk.28.attn_v.weight q4_K +blk.29.ffn_down_exps.weight iq4_nl +blk.29.ffn_down_shexp.weight iq4_nl blk.29.attn_output.weight q4_K blk.29.attn_v.weight q4_K +blk.30.ffn_down_exps.weight iq4_nl +blk.30.ffn_down_shexp.weight iq4_nl blk.30.attn_output.weight q4_K blk.30.attn_v.weight q4_K +blk.31.ffn_down_exps.weight iq4_nl +blk.31.ffn_down_shexp.weight iq4_nl blk.31.attn_output.weight q4_K blk.31.attn_v.weight q4_K +blk.32.ffn_down_exps.weight iq4_nl +blk.32.ffn_down_shexp.weight iq4_nl blk.32.attn_output.weight q4_K blk.32.attn_v.weight q4_K +blk.33.ffn_down_exps.weight iq4_nl +blk.33.ffn_down_shexp.weight iq4_nl blk.33.attn_output.weight q4_K blk.33.attn_v.weight q4_K +blk.34.ffn_down_exps.weight iq4_nl blk.34.attn_output.weight q4_K +blk.34.ffn_down_shexp.weight iq4_nl blk.34.attn_v.weight q4_K +blk.35.ffn_down_exps.weight iq4_nl +blk.35.ffn_down_shexp.weight iq4_nl blk.35.attn_output.weight q4_K blk.35.attn_v.weight q4_K +blk.36.ffn_down_exps.weight iq4_nl +blk.36.ffn_down_shexp.weight iq4_nl blk.36.attn_output.weight q4_K blk.36.attn_v.weight q4_K +blk.37.ffn_down_exps.weight iq4_nl +blk.37.ffn_down_shexp.weight iq4_nl blk.37.attn_output.weight q4_K blk.37.attn_v.weight q4_K +blk.38.ffn_down_exps.weight iq4_nl +blk.38.ffn_down_shexp.weight iq4_nl blk.38.attn_output.weight q4_K blk.38.attn_v.weight q4_K +blk.39.ffn_down_exps.weight iq4_nl +blk.39.ffn_down_shexp.weight iq4_nl blk.39.attn_output.weight q4_K blk.39.attn_v.weight q4_K +blk.40.ffn_down_exps.weight iq4_nl +blk.40.ffn_down_shexp.weight iq4_nl blk.40.attn_output.weight q4_K blk.40.attn_v.weight q4_K +blk.41.ffn_down_exps.weight iq4_nl +blk.41.ffn_down_shexp.weight iq4_nl blk.41.attn_output.weight q4_K blk.41.attn_v.weight q4_K +blk.42.ffn_down_exps.weight iq4_nl +blk.42.ffn_down_shexp.weight iq4_nl blk.42.attn_v.weight q4_K blk.42.attn_output.weight q4_K +blk.43.ffn_down_exps.weight iq4_nl +blk.43.ffn_down_shexp.weight iq4_nl blk.43.attn_output.weight q4_K blk.43.attn_v.weight q4_K +blk.44.ffn_down_exps.weight iq4_nl +blk.44.ffn_down_shexp.weight iq4_nl blk.44.attn_output.weight q4_K blk.44.attn_v.weight q4_K output.weight q6_K +blk.45.ffn_down_exps.weight iq4_nl +blk.45.ffn_down_shexp.weight iq4_nl blk.45.attn_output.weight q4_K blk.45.attn_v.weight q4_K [IQ2_S] iq2_xs token_embd.weight iq3_s -blk.0.ffn_down.weight iq3_s +blk.0.ffn_down.weight iq4_nl blk.0.attn_output.weight iq3_s blk.0.attn_v.weight q4_K -blk.1.ffn_down_exps.weight iq3_s -blk.1.ffn_down_shexp.weight iq3_s +blk.1.ffn_down_exps.weight iq4_nl +blk.1.ffn_down_shexp.weight iq4_nl blk.1.attn_output.weight iq3_s blk.1.attn_v.weight q4_K -blk.2.ffn_down_exps.weight iq3_s -blk.2.ffn_down_shexp.weight iq3_s +blk.2.ffn_down_exps.weight iq4_nl +blk.2.ffn_down_shexp.weight iq4_nl blk.2.attn_output.weight iq3_s blk.2.attn_v.weight q4_K +blk.3.ffn_down_exps.weight iq4_nl +blk.3.ffn_down_shexp.weight iq4_nl blk.3.attn_output.weight iq3_s blk.3.attn_v.weight q4_K +blk.4.ffn_down_exps.weight iq4_nl +blk.4.ffn_down_shexp.weight iq4_nl blk.4.attn_output.weight iq3_s blk.4.attn_v.weight q4_K +blk.5.ffn_down_exps.weight iq4_nl +blk.5.ffn_down_shexp.weight iq4_nl blk.5.attn_output.weight iq3_s blk.5.attn_v.weight q4_K +blk.6.ffn_down_exps.weight iq4_nl +blk.6.ffn_down_shexp.weight iq4_nl blk.6.attn_output.weight iq3_s blk.6.attn_v.weight q4_K +blk.7.ffn_down_exps.weight iq4_nl +blk.7.ffn_down_shexp.weight iq4_nl blk.7.attn_output.weight iq3_s blk.7.attn_v.weight q4_K +blk.8.ffn_down_exps.weight iq4_nl +blk.8.ffn_down_shexp.weight iq4_nl blk.8.attn_output.weight iq3_s blk.8.attn_v.weight q4_K +blk.9.ffn_down_exps.weight iq4_nl +blk.9.ffn_down_shexp.weight iq4_nl blk.9.attn_output.weight iq3_s blk.9.attn_v.weight q4_K +blk.10.ffn_down_exps.weight iq4_nl +blk.10.ffn_down_shexp.weight iq4_nl blk.10.attn_output.weight iq3_s blk.10.attn_v.weight q4_K +blk.11.ffn_down_exps.weight iq4_nl +blk.11.ffn_down_shexp.weight iq4_nl blk.11.attn_output.weight iq3_s blk.11.attn_v.weight q4_K +blk.12.ffn_down_exps.weight iq4_nl +blk.12.ffn_down_shexp.weight iq4_nl blk.12.attn_output.weight iq3_s blk.12.attn_v.weight q4_K +blk.13.ffn_down_exps.weight iq4_nl +blk.13.ffn_down_shexp.weight iq4_nl blk.13.attn_output.weight iq3_s blk.13.attn_v.weight q4_K +blk.14.ffn_down_exps.weight iq4_nl +blk.14.ffn_down_shexp.weight iq4_nl blk.14.attn_output.weight iq3_s blk.14.attn_v.weight q4_K +blk.15.ffn_down_exps.weight iq4_nl +blk.15.ffn_down_shexp.weight iq4_nl blk.15.attn_output.weight iq3_s blk.15.attn_v.weight q4_K +blk.16.ffn_down_exps.weight iq4_nl +blk.16.ffn_down_shexp.weight iq4_nl blk.16.attn_output.weight iq3_s blk.16.attn_v.weight q4_K +blk.17.ffn_down_exps.weight iq4_nl +blk.17.ffn_down_shexp.weight iq4_nl blk.17.attn_output.weight iq3_s blk.17.attn_v.weight q4_K +blk.18.ffn_down_exps.weight iq4_nl +blk.18.ffn_down_shexp.weight iq4_nl blk.18.attn_output.weight iq3_s blk.18.attn_v.weight q4_K +blk.19.ffn_down_exps.weight iq4_nl +blk.19.ffn_down_shexp.weight iq4_nl blk.19.attn_output.weight iq3_s blk.19.attn_v.weight q4_K +blk.20.ffn_down_exps.weight iq4_nl +blk.20.ffn_down_shexp.weight iq4_nl blk.20.attn_output.weight iq3_s blk.20.attn_v.weight q4_K +blk.21.ffn_down_exps.weight iq4_nl +blk.21.ffn_down_shexp.weight iq4_nl blk.21.attn_output.weight iq3_s blk.21.attn_v.weight q4_K +blk.22.ffn_down_exps.weight iq4_nl +blk.22.ffn_down_shexp.weight iq4_nl blk.22.attn_output.weight iq3_s blk.22.attn_v.weight q4_K +blk.23.ffn_down_exps.weight iq4_nl +blk.23.ffn_down_shexp.weight iq4_nl blk.23.attn_output.weight iq3_s blk.23.attn_v.weight q4_K +blk.24.ffn_down_exps.weight iq4_nl +blk.24.ffn_down_shexp.weight iq4_nl blk.24.attn_output.weight iq3_s blk.24.attn_v.weight q4_K +blk.25.ffn_down_exps.weight iq4_nl +blk.25.ffn_down_shexp.weight iq4_nl blk.25.attn_output.weight iq3_s blk.25.attn_v.weight q4_K +blk.26.ffn_down_exps.weight iq4_nl +blk.26.ffn_down_shexp.weight iq4_nl blk.26.attn_output.weight iq3_s blk.26.attn_v.weight q4_K +blk.27.ffn_down_exps.weight iq4_nl +blk.27.ffn_down_shexp.weight iq4_nl blk.27.attn_output.weight iq3_s blk.27.attn_v.weight q4_K +blk.28.ffn_down_exps.weight iq4_nl +blk.28.ffn_down_shexp.weight iq4_nl blk.28.attn_output.weight iq3_s blk.28.attn_v.weight q4_K +blk.29.ffn_down_exps.weight iq4_nl +blk.29.ffn_down_shexp.weight iq4_nl blk.29.attn_output.weight iq3_s blk.29.attn_v.weight q4_K +blk.30.ffn_down_exps.weight iq4_nl +blk.30.ffn_down_shexp.weight iq4_nl blk.30.attn_output.weight iq3_s blk.30.attn_v.weight q4_K +blk.31.ffn_down_exps.weight iq4_nl +blk.31.ffn_down_shexp.weight iq4_nl blk.31.attn_output.weight iq3_s blk.31.attn_v.weight q4_K +blk.32.ffn_down_exps.weight iq4_nl +blk.32.ffn_down_shexp.weight iq4_nl blk.32.attn_output.weight iq3_s blk.32.attn_v.weight q4_K +blk.33.ffn_down_exps.weight iq4_nl +blk.33.ffn_down_shexp.weight iq4_nl blk.33.attn_output.weight iq3_s blk.33.attn_v.weight q4_K +blk.34.ffn_down_exps.weight iq4_nl blk.34.attn_output.weight iq3_s +blk.34.ffn_down_shexp.weight iq4_nl blk.34.attn_v.weight q4_K +blk.35.ffn_down_exps.weight iq4_nl +blk.35.ffn_down_shexp.weight iq4_nl blk.35.attn_output.weight iq3_s blk.35.attn_v.weight q4_K +blk.36.ffn_down_exps.weight iq4_nl +blk.36.ffn_down_shexp.weight iq4_nl blk.36.attn_output.weight iq3_s blk.36.attn_v.weight q4_K +blk.37.ffn_down_exps.weight iq4_nl +blk.37.ffn_down_shexp.weight iq4_nl blk.37.attn_output.weight iq3_s blk.37.attn_v.weight q4_K +blk.38.ffn_down_exps.weight iq4_nl +blk.38.ffn_down_shexp.weight iq4_nl blk.38.attn_output.weight iq3_s blk.38.attn_v.weight q4_K +blk.39.ffn_down_exps.weight iq4_nl +blk.39.ffn_down_shexp.weight iq4_nl blk.39.attn_output.weight iq3_s blk.39.attn_v.weight q4_K +blk.40.ffn_down_exps.weight iq4_nl +blk.40.ffn_down_shexp.weight iq4_nl blk.40.attn_output.weight iq3_s blk.40.attn_v.weight q4_K +blk.41.ffn_down_exps.weight iq4_nl +blk.41.ffn_down_shexp.weight iq4_nl blk.41.attn_output.weight iq3_s blk.41.attn_v.weight q4_K +blk.42.ffn_down_exps.weight iq4_nl +blk.42.ffn_down_shexp.weight iq4_nl blk.42.attn_v.weight q4_K blk.42.attn_output.weight iq3_s +blk.43.ffn_down_exps.weight iq4_nl +blk.43.ffn_down_shexp.weight iq4_nl blk.43.attn_output.weight iq3_s blk.43.attn_v.weight q4_K +blk.44.ffn_down_exps.weight iq4_nl +blk.44.ffn_down_shexp.weight iq4_nl blk.44.attn_output.weight iq3_s blk.44.attn_v.weight q4_K output.weight q5_K +blk.45.ffn_down_exps.weight iq4_nl +blk.45.ffn_down_shexp.weight iq4_nl blk.45.attn_output.weight iq3_s blk.45.attn_v.weight q4_K [IQ2_M] iq2_s token_embd.weight iq3_s -blk.0.ffn_down.weight iq3_s +blk.0.ffn_down.weight iq4_nl blk.0.attn_output.weight iq3_s blk.0.attn_v.weight q4_K -blk.1.ffn_down_exps.weight iq3_s -blk.1.ffn_down_shexp.weight iq3_s +blk.1.ffn_down_exps.weight iq4_nl +blk.1.ffn_down_shexp.weight iq4_nl blk.1.attn_output.weight iq3_s blk.1.attn_v.weight q4_K -blk.2.ffn_down_exps.weight iq3_s -blk.2.ffn_down_shexp.weight iq3_s +blk.2.ffn_down_exps.weight iq4_nl +blk.2.ffn_down_shexp.weight iq4_nl blk.2.attn_output.weight iq3_s blk.2.attn_v.weight q4_K +blk.3.ffn_down_exps.weight iq4_nl +blk.3.ffn_down_shexp.weight iq4_nl blk.3.attn_output.weight iq3_s blk.3.attn_v.weight q4_K +blk.4.ffn_down_exps.weight iq4_nl +blk.4.ffn_down_shexp.weight iq4_nl blk.4.attn_output.weight iq3_s blk.4.attn_v.weight q4_K +blk.5.ffn_down_exps.weight iq4_nl +blk.5.ffn_down_shexp.weight iq4_nl blk.5.attn_output.weight iq3_s blk.5.attn_v.weight q4_K +blk.6.ffn_down_exps.weight iq4_nl +blk.6.ffn_down_shexp.weight iq4_nl blk.6.attn_output.weight iq3_s blk.6.attn_v.weight q4_K +blk.7.ffn_down_exps.weight iq4_nl +blk.7.ffn_down_shexp.weight iq4_nl blk.7.attn_output.weight iq3_s blk.7.attn_v.weight q4_K +blk.8.ffn_down_exps.weight iq4_nl +blk.8.ffn_down_shexp.weight iq4_nl blk.8.attn_output.weight iq3_s blk.8.attn_v.weight q4_K +blk.9.ffn_down_exps.weight iq4_nl +blk.9.ffn_down_shexp.weight iq4_nl blk.9.attn_output.weight iq3_s blk.9.attn_v.weight q4_K +blk.10.ffn_down_exps.weight iq4_nl +blk.10.ffn_down_shexp.weight iq4_nl blk.10.attn_output.weight iq3_s blk.10.attn_v.weight q4_K +blk.11.ffn_down_exps.weight iq4_nl +blk.11.ffn_down_shexp.weight iq4_nl blk.11.attn_output.weight iq3_s blk.11.attn_v.weight q4_K +blk.12.ffn_down_exps.weight iq4_nl +blk.12.ffn_down_shexp.weight iq4_nl blk.12.attn_output.weight iq3_s blk.12.attn_v.weight q4_K +blk.13.ffn_down_exps.weight iq4_nl +blk.13.ffn_down_shexp.weight iq4_nl blk.13.attn_output.weight iq3_s blk.13.attn_v.weight q4_K +blk.14.ffn_down_exps.weight iq4_nl +blk.14.ffn_down_shexp.weight iq4_nl blk.14.attn_output.weight iq3_s blk.14.attn_v.weight q4_K +blk.15.ffn_down_exps.weight iq4_nl +blk.15.ffn_down_shexp.weight iq4_nl blk.15.attn_output.weight iq3_s blk.15.attn_v.weight q4_K +blk.16.ffn_down_exps.weight iq4_nl +blk.16.ffn_down_shexp.weight iq4_nl blk.16.attn_output.weight iq3_s blk.16.attn_v.weight q4_K +blk.17.ffn_down_exps.weight iq4_nl +blk.17.ffn_down_shexp.weight iq4_nl blk.17.attn_output.weight iq3_s blk.17.attn_v.weight q4_K +blk.18.ffn_down_exps.weight iq4_nl +blk.18.ffn_down_shexp.weight iq4_nl blk.18.attn_output.weight iq3_s blk.18.attn_v.weight q4_K +blk.19.ffn_down_exps.weight iq4_nl +blk.19.ffn_down_shexp.weight iq4_nl blk.19.attn_output.weight iq3_s blk.19.attn_v.weight q4_K +blk.20.ffn_down_exps.weight iq4_nl +blk.20.ffn_down_shexp.weight iq4_nl blk.20.attn_output.weight iq3_s blk.20.attn_v.weight q4_K +blk.21.ffn_down_exps.weight iq4_nl +blk.21.ffn_down_shexp.weight iq4_nl blk.21.attn_output.weight iq3_s blk.21.attn_v.weight q4_K +blk.22.ffn_down_exps.weight iq4_nl +blk.22.ffn_down_shexp.weight iq4_nl blk.22.attn_output.weight iq3_s blk.22.attn_v.weight q4_K +blk.23.ffn_down_exps.weight iq4_nl +blk.23.ffn_down_shexp.weight iq4_nl blk.23.attn_output.weight iq3_s blk.23.attn_v.weight q4_K +blk.24.ffn_down_exps.weight iq4_nl +blk.24.ffn_down_shexp.weight iq4_nl blk.24.attn_output.weight iq3_s blk.24.attn_v.weight q4_K +blk.25.ffn_down_exps.weight iq4_nl +blk.25.ffn_down_shexp.weight iq4_nl blk.25.attn_output.weight iq3_s blk.25.attn_v.weight q4_K +blk.26.ffn_down_exps.weight iq4_nl +blk.26.ffn_down_shexp.weight iq4_nl blk.26.attn_output.weight iq3_s blk.26.attn_v.weight q4_K +blk.27.ffn_down_exps.weight iq4_nl +blk.27.ffn_down_shexp.weight iq4_nl blk.27.attn_output.weight iq3_s blk.27.attn_v.weight q4_K +blk.28.ffn_down_exps.weight iq4_nl +blk.28.ffn_down_shexp.weight iq4_nl blk.28.attn_output.weight iq3_s blk.28.attn_v.weight q4_K +blk.29.ffn_down_exps.weight iq4_nl +blk.29.ffn_down_shexp.weight iq4_nl blk.29.attn_output.weight iq3_s blk.29.attn_v.weight q4_K +blk.30.ffn_down_exps.weight iq4_nl +blk.30.ffn_down_shexp.weight iq4_nl blk.30.attn_output.weight iq3_s blk.30.attn_v.weight q4_K +blk.31.ffn_down_exps.weight iq4_nl +blk.31.ffn_down_shexp.weight iq4_nl blk.31.attn_output.weight iq3_s blk.31.attn_v.weight q4_K +blk.32.ffn_down_exps.weight iq4_nl +blk.32.ffn_down_shexp.weight iq4_nl blk.32.attn_output.weight iq3_s blk.32.attn_v.weight q4_K +blk.33.ffn_down_exps.weight iq4_nl +blk.33.ffn_down_shexp.weight iq4_nl blk.33.attn_output.weight iq3_s blk.33.attn_v.weight q4_K +blk.34.ffn_down_exps.weight iq4_nl blk.34.attn_output.weight iq3_s +blk.34.ffn_down_shexp.weight iq4_nl blk.34.attn_v.weight q4_K +blk.35.ffn_down_exps.weight iq4_nl +blk.35.ffn_down_shexp.weight iq4_nl blk.35.attn_output.weight iq3_s blk.35.attn_v.weight q4_K +blk.36.ffn_down_exps.weight iq4_nl +blk.36.ffn_down_shexp.weight iq4_nl blk.36.attn_output.weight iq3_s blk.36.attn_v.weight q4_K +blk.37.ffn_down_exps.weight iq4_nl +blk.37.ffn_down_shexp.weight iq4_nl blk.37.attn_output.weight iq3_s blk.37.attn_v.weight q4_K +blk.38.ffn_down_exps.weight iq4_nl +blk.38.ffn_down_shexp.weight iq4_nl blk.38.attn_output.weight iq3_s blk.38.attn_v.weight q4_K +blk.39.ffn_down_exps.weight iq4_nl +blk.39.ffn_down_shexp.weight iq4_nl blk.39.attn_output.weight iq3_s blk.39.attn_v.weight q4_K +blk.40.ffn_down_exps.weight iq4_nl +blk.40.ffn_down_shexp.weight iq4_nl blk.40.attn_output.weight iq3_s blk.40.attn_v.weight q4_K +blk.41.ffn_down_exps.weight iq4_nl +blk.41.ffn_down_shexp.weight iq4_nl blk.41.attn_output.weight iq3_s blk.41.attn_v.weight q4_K +blk.42.ffn_down_exps.weight iq4_nl +blk.42.ffn_down_shexp.weight iq4_nl blk.42.attn_v.weight q4_K blk.42.attn_output.weight iq3_s +blk.43.ffn_down_exps.weight iq4_nl +blk.43.ffn_down_shexp.weight iq4_nl blk.43.attn_output.weight iq3_s blk.43.attn_v.weight q4_K +blk.44.ffn_down_exps.weight iq4_nl +blk.44.ffn_down_shexp.weight iq4_nl blk.44.attn_output.weight iq3_s blk.44.attn_v.weight q4_K output.weight q5_K +blk.45.ffn_down_exps.weight iq4_nl +blk.45.ffn_down_shexp.weight iq4_nl blk.45.attn_output.weight iq3_s blk.45.attn_v.weight q4_K [IQ4_XS] iq4_xs -blk.0.ffn_down.weight q5_K +blk.0.ffn_down.weight q5_1 blk.0.attn_v.weight q5_K -blk.1.ffn_down_exps.weight q5_K -blk.1.ffn_down_shexp.weight q5_K +blk.1.ffn_down_exps.weight q5_1 +blk.1.ffn_down_shexp.weight q5_1 blk.1.attn_v.weight q5_K -blk.2.ffn_down_exps.weight q5_K -blk.2.ffn_down_shexp.weight q5_K +blk.2.ffn_down_exps.weight q5_1 +blk.2.ffn_down_shexp.weight q5_1 blk.2.attn_v.weight q5_K -blk.3.ffn_down_exps.weight q5_K -blk.3.ffn_down_shexp.weight q5_K +blk.3.ffn_down_exps.weight q5_1 +blk.3.ffn_down_shexp.weight q5_1 blk.3.attn_v.weight q5_K -blk.4.ffn_down_exps.weight q5_K -blk.4.ffn_down_shexp.weight q5_K +blk.4.ffn_down_exps.weight q5_1 +blk.4.ffn_down_shexp.weight q5_1 blk.4.attn_v.weight q5_K +blk.5.ffn_down_exps.weight iq4_nl +blk.5.ffn_down_shexp.weight iq4_nl blk.5.attn_v.weight q5_K +blk.6.ffn_down_exps.weight iq4_nl +blk.6.ffn_down_shexp.weight iq4_nl blk.6.attn_v.weight q5_K +blk.7.ffn_down_exps.weight iq4_nl +blk.7.ffn_down_shexp.weight iq4_nl blk.7.attn_v.weight q5_K +blk.8.ffn_down_exps.weight iq4_nl +blk.8.ffn_down_shexp.weight iq4_nl blk.8.attn_v.weight q5_K +blk.9.ffn_down_exps.weight iq4_nl +blk.9.ffn_down_shexp.weight iq4_nl blk.9.attn_v.weight q5_K +blk.10.ffn_down_exps.weight iq4_nl +blk.10.ffn_down_shexp.weight iq4_nl blk.10.attn_v.weight q5_K +blk.11.ffn_down_exps.weight iq4_nl +blk.11.ffn_down_shexp.weight iq4_nl blk.11.attn_v.weight q5_K +blk.12.ffn_down_exps.weight iq4_nl +blk.12.ffn_down_shexp.weight iq4_nl blk.12.attn_v.weight q5_K +blk.13.ffn_down_exps.weight iq4_nl +blk.13.ffn_down_shexp.weight iq4_nl blk.13.attn_v.weight q5_K +blk.14.ffn_down_exps.weight iq4_nl +blk.14.ffn_down_shexp.weight iq4_nl blk.14.attn_v.weight q5_K +blk.15.ffn_down_exps.weight iq4_nl +blk.15.ffn_down_shexp.weight iq4_nl blk.15.attn_v.weight q5_K +blk.16.ffn_down_exps.weight iq4_nl +blk.16.ffn_down_shexp.weight iq4_nl blk.16.attn_v.weight q5_K +blk.17.ffn_down_exps.weight iq4_nl +blk.17.ffn_down_shexp.weight iq4_nl blk.17.attn_v.weight q5_K +blk.18.ffn_down_exps.weight iq4_nl +blk.18.ffn_down_shexp.weight iq4_nl blk.18.attn_v.weight q5_K +blk.19.ffn_down_exps.weight iq4_nl +blk.19.ffn_down_shexp.weight iq4_nl blk.19.attn_v.weight q5_K +blk.20.ffn_down_exps.weight iq4_nl +blk.20.ffn_down_shexp.weight iq4_nl blk.20.attn_v.weight q5_K +blk.21.ffn_down_exps.weight iq4_nl +blk.21.ffn_down_shexp.weight iq4_nl blk.21.attn_v.weight q5_K +blk.22.ffn_down_exps.weight iq4_nl +blk.22.ffn_down_shexp.weight iq4_nl blk.22.attn_v.weight q5_K +blk.23.ffn_down_exps.weight iq4_nl +blk.23.ffn_down_shexp.weight iq4_nl blk.23.attn_v.weight q5_K +blk.24.ffn_down_exps.weight iq4_nl +blk.24.ffn_down_shexp.weight iq4_nl blk.24.attn_v.weight q5_K +blk.25.ffn_down_exps.weight iq4_nl +blk.25.ffn_down_shexp.weight iq4_nl blk.25.attn_v.weight q5_K +blk.26.ffn_down_exps.weight iq4_nl +blk.26.ffn_down_shexp.weight iq4_nl blk.26.attn_v.weight q5_K +blk.27.ffn_down_exps.weight iq4_nl +blk.27.ffn_down_shexp.weight iq4_nl blk.27.attn_v.weight q5_K +blk.28.ffn_down_exps.weight iq4_nl +blk.28.ffn_down_shexp.weight iq4_nl blk.28.attn_v.weight q5_K +blk.29.ffn_down_exps.weight iq4_nl +blk.29.ffn_down_shexp.weight iq4_nl blk.29.attn_v.weight q5_K +blk.30.ffn_down_exps.weight iq4_nl +blk.30.ffn_down_shexp.weight iq4_nl blk.30.attn_v.weight q5_K +blk.31.ffn_down_exps.weight iq4_nl +blk.31.ffn_down_shexp.weight iq4_nl blk.31.attn_v.weight q5_K +blk.32.ffn_down_exps.weight iq4_nl +blk.32.ffn_down_shexp.weight iq4_nl blk.32.attn_v.weight q5_K +blk.33.ffn_down_exps.weight iq4_nl +blk.33.ffn_down_shexp.weight iq4_nl blk.33.attn_v.weight q5_K +blk.34.ffn_down_exps.weight iq4_nl +blk.34.ffn_down_shexp.weight iq4_nl blk.34.attn_v.weight q5_K +blk.35.ffn_down_exps.weight iq4_nl +blk.35.ffn_down_shexp.weight iq4_nl blk.35.attn_v.weight q5_K +blk.36.ffn_down_exps.weight iq4_nl +blk.36.ffn_down_shexp.weight iq4_nl blk.36.attn_v.weight q5_K +blk.37.ffn_down_exps.weight iq4_nl +blk.37.ffn_down_shexp.weight iq4_nl blk.37.attn_v.weight q5_K +blk.38.ffn_down_exps.weight iq4_nl +blk.38.ffn_down_shexp.weight iq4_nl blk.38.attn_v.weight q5_K +blk.39.ffn_down_exps.weight iq4_nl +blk.39.ffn_down_shexp.weight iq4_nl blk.39.attn_v.weight q5_K +blk.40.ffn_down_exps.weight iq4_nl +blk.40.ffn_down_shexp.weight iq4_nl blk.40.attn_v.weight q5_K +blk.41.ffn_down_exps.weight iq4_nl +blk.41.ffn_down_shexp.weight iq4_nl blk.41.attn_v.weight q5_K +blk.42.ffn_down_exps.weight iq4_nl +blk.42.ffn_down_shexp.weight iq4_nl blk.42.attn_v.weight q5_K +blk.43.ffn_down_exps.weight iq4_nl +blk.43.ffn_down_shexp.weight iq4_nl blk.43.attn_v.weight q5_K +blk.44.ffn_down_exps.weight iq4_nl +blk.44.ffn_down_shexp.weight iq4_nl blk.44.attn_v.weight q5_K output.weight q6_K +blk.45.ffn_down_exps.weight iq4_nl +blk.45.ffn_down_shexp.weight iq4_nl blk.45.attn_v.weight q5_K [IQ1_M] iq1_m token_embd.weight q2_K -blk.0.ffn_down.weight q2_K +blk.0.ffn_down.weight q4_0 blk.0.attn_output.weight iq2_xxs blk.0.attn_v.weight q4_K -blk.1.ffn_down_exps.weight q2_K -blk.1.ffn_down_shexp.weight q2_K +blk.1.ffn_down_exps.weight q4_0 +blk.1.ffn_down_shexp.weight q4_0 blk.1.attn_output.weight iq2_xxs blk.1.attn_v.weight q4_K -blk.2.ffn_down_exps.weight q2_K -blk.2.ffn_down_shexp.weight q2_K +blk.2.ffn_down_exps.weight q4_0 +blk.2.ffn_down_shexp.weight q4_0 blk.2.attn_output.weight iq2_xxs blk.2.attn_v.weight q4_K +blk.3.ffn_down_exps.weight iq4_nl +blk.3.ffn_down_shexp.weight iq4_nl blk.3.attn_output.weight iq2_xxs blk.3.attn_v.weight q4_K +blk.4.ffn_down_exps.weight iq4_nl +blk.4.ffn_down_shexp.weight iq4_nl blk.4.attn_output.weight iq2_xxs blk.4.attn_v.weight q4_K +blk.5.ffn_down_exps.weight iq4_nl +blk.5.ffn_down_shexp.weight iq4_nl blk.5.attn_output.weight iq2_xxs blk.5.attn_v.weight q4_K +blk.6.ffn_down_exps.weight iq4_nl +blk.6.ffn_down_shexp.weight iq4_nl blk.6.attn_output.weight iq2_xxs blk.6.attn_v.weight q4_K +blk.7.ffn_down_exps.weight iq4_nl +blk.7.ffn_down_shexp.weight iq4_nl blk.7.attn_output.weight iq2_xxs blk.7.attn_v.weight q4_K +blk.8.ffn_down_exps.weight iq4_nl +blk.8.ffn_down_shexp.weight iq4_nl blk.8.attn_output.weight iq2_xxs blk.8.attn_v.weight q4_K +blk.9.ffn_down_exps.weight iq4_nl +blk.9.ffn_down_shexp.weight iq4_nl blk.9.attn_output.weight iq2_xxs blk.9.attn_v.weight q4_K +blk.10.ffn_down_exps.weight iq4_nl +blk.10.ffn_down_shexp.weight iq4_nl blk.10.attn_output.weight iq2_xxs blk.10.attn_v.weight q4_K +blk.11.ffn_down_exps.weight iq4_nl +blk.11.ffn_down_shexp.weight iq4_nl blk.11.attn_output.weight iq2_xxs blk.11.attn_v.weight q4_K +blk.12.ffn_down_exps.weight iq4_nl +blk.12.ffn_down_shexp.weight iq4_nl blk.12.attn_output.weight iq2_xxs blk.12.attn_v.weight q4_K +blk.13.ffn_down_exps.weight iq4_nl +blk.13.ffn_down_shexp.weight iq4_nl blk.13.attn_output.weight iq2_xxs blk.13.attn_v.weight q4_K +blk.14.ffn_down_exps.weight iq4_nl +blk.14.ffn_down_shexp.weight iq4_nl blk.14.attn_output.weight iq2_xxs blk.14.attn_v.weight q4_K +blk.15.ffn_down_exps.weight iq4_nl +blk.15.ffn_down_shexp.weight iq4_nl blk.15.attn_output.weight iq2_xxs blk.15.attn_v.weight q4_K +blk.16.ffn_down_exps.weight iq4_nl +blk.16.ffn_down_shexp.weight iq4_nl blk.16.attn_output.weight iq2_xxs blk.16.attn_v.weight q4_K +blk.17.ffn_down_exps.weight iq4_nl +blk.17.ffn_down_shexp.weight iq4_nl blk.17.attn_output.weight iq2_xxs blk.17.attn_v.weight q4_K +blk.18.ffn_down_exps.weight iq4_nl +blk.18.ffn_down_shexp.weight iq4_nl blk.18.attn_output.weight iq2_xxs blk.18.attn_v.weight q4_K +blk.19.ffn_down_exps.weight iq4_nl +blk.19.ffn_down_shexp.weight iq4_nl blk.19.attn_output.weight iq2_xxs blk.19.attn_v.weight q4_K +blk.20.ffn_down_exps.weight iq4_nl +blk.20.ffn_down_shexp.weight iq4_nl blk.20.attn_output.weight iq2_xxs blk.20.attn_v.weight q4_K +blk.21.ffn_down_exps.weight iq4_nl +blk.21.ffn_down_shexp.weight iq4_nl blk.21.attn_output.weight iq2_xxs blk.21.attn_v.weight q4_K +blk.22.ffn_down_exps.weight iq4_nl +blk.22.ffn_down_shexp.weight iq4_nl blk.22.attn_output.weight iq2_xxs blk.22.attn_v.weight q4_K +blk.23.ffn_down_exps.weight iq4_nl +blk.23.ffn_down_shexp.weight iq4_nl blk.23.attn_output.weight iq2_xxs blk.23.attn_v.weight q4_K +blk.24.ffn_down_exps.weight iq4_nl +blk.24.ffn_down_shexp.weight iq4_nl blk.24.attn_output.weight iq2_xxs blk.24.attn_v.weight q4_K +blk.25.ffn_down_exps.weight iq4_nl +blk.25.ffn_down_shexp.weight iq4_nl blk.25.attn_output.weight iq2_xxs blk.25.attn_v.weight q4_K +blk.26.ffn_down_exps.weight iq4_nl +blk.26.ffn_down_shexp.weight iq4_nl blk.26.attn_output.weight iq2_xxs blk.26.attn_v.weight q4_K +blk.27.ffn_down_exps.weight iq4_nl +blk.27.ffn_down_shexp.weight iq4_nl blk.27.attn_output.weight iq2_xxs blk.27.attn_v.weight q4_K +blk.28.ffn_down_exps.weight iq4_nl +blk.28.ffn_down_shexp.weight iq4_nl blk.28.attn_output.weight iq2_xxs blk.28.attn_v.weight q4_K +blk.29.ffn_down_exps.weight iq4_nl +blk.29.ffn_down_shexp.weight iq4_nl blk.29.attn_output.weight iq2_xxs blk.29.attn_v.weight q4_K +blk.30.ffn_down_exps.weight iq4_nl +blk.30.ffn_down_shexp.weight iq4_nl blk.30.attn_output.weight iq2_xxs blk.30.attn_v.weight q4_K +blk.31.ffn_down_exps.weight iq4_nl +blk.31.ffn_down_shexp.weight iq4_nl blk.31.attn_output.weight iq2_xxs blk.31.attn_v.weight q4_K +blk.32.ffn_down_exps.weight iq4_nl +blk.32.ffn_down_shexp.weight iq4_nl blk.32.attn_output.weight iq2_xxs blk.32.attn_v.weight q4_K +blk.33.ffn_down_exps.weight iq4_nl +blk.33.ffn_down_shexp.weight iq4_nl blk.33.attn_output.weight iq2_xxs blk.33.attn_v.weight q4_K +blk.34.ffn_down_exps.weight iq4_nl blk.34.attn_output.weight iq2_xxs +blk.34.ffn_down_shexp.weight iq4_nl blk.34.attn_v.weight q4_K +blk.35.ffn_down_exps.weight iq4_nl +blk.35.ffn_down_shexp.weight iq4_nl blk.35.attn_output.weight iq2_xxs blk.35.attn_v.weight q4_K +blk.36.ffn_down_exps.weight iq4_nl +blk.36.ffn_down_shexp.weight iq4_nl blk.36.attn_output.weight iq2_xxs blk.36.attn_v.weight q4_K +blk.37.ffn_down_exps.weight iq4_nl +blk.37.ffn_down_shexp.weight iq4_nl blk.37.attn_output.weight iq2_xxs blk.37.attn_v.weight q4_K +blk.38.ffn_down_exps.weight iq4_nl +blk.38.ffn_down_shexp.weight iq4_nl blk.38.attn_output.weight iq2_xxs blk.38.attn_v.weight q4_K +blk.39.ffn_down_exps.weight iq4_nl +blk.39.ffn_down_shexp.weight iq4_nl blk.39.attn_output.weight iq2_xxs blk.39.attn_v.weight q4_K +blk.40.ffn_down_exps.weight iq4_nl +blk.40.ffn_down_shexp.weight iq4_nl blk.40.attn_output.weight iq2_xxs blk.40.attn_v.weight q4_K +blk.41.ffn_down_exps.weight iq4_nl +blk.41.ffn_down_shexp.weight iq4_nl blk.41.attn_output.weight iq2_xxs blk.41.attn_v.weight q4_K +blk.42.ffn_down_exps.weight iq4_nl +blk.42.ffn_down_shexp.weight iq4_nl blk.42.attn_v.weight q4_K blk.42.attn_output.weight iq2_xxs +blk.43.ffn_down_exps.weight iq4_nl +blk.43.ffn_down_shexp.weight iq4_nl blk.43.attn_output.weight iq2_xxs blk.43.attn_v.weight q4_K +blk.44.ffn_down_exps.weight iq4_nl +blk.44.ffn_down_shexp.weight iq4_nl blk.44.attn_output.weight iq2_xxs blk.44.attn_v.weight q4_K output.weight q5_K +blk.45.ffn_down_exps.weight iq4_nl +blk.45.ffn_down_shexp.weight iq4_nl blk.45.attn_output.weight iq2_xxs blk.45.attn_v.weight q4_K [BF16] bf16 -output.weight q6_K [TQ1_0] tq1_0 token_embd.weight q4_K +blk.0.ffn_down.weight q4_0 +blk.1.ffn_down_exps.weight q4_0 +blk.1.ffn_down_shexp.weight q4_0 +blk.2.ffn_down_exps.weight q4_0 +blk.2.ffn_down_shexp.weight q4_0 +blk.3.ffn_down_exps.weight q4_0 +blk.3.ffn_down_shexp.weight q4_0 +blk.4.ffn_down_exps.weight q4_0 +blk.4.ffn_down_shexp.weight q4_0 +blk.5.ffn_down_exps.weight q4_0 +blk.5.ffn_down_shexp.weight q4_0 +blk.6.ffn_down_exps.weight q4_0 +blk.6.ffn_down_shexp.weight q4_0 +blk.7.ffn_down_exps.weight q4_0 +blk.7.ffn_down_shexp.weight q4_0 +blk.8.ffn_down_exps.weight q4_0 +blk.8.ffn_down_shexp.weight q4_0 +blk.9.ffn_down_exps.weight q4_0 +blk.9.ffn_down_shexp.weight q4_0 +blk.10.ffn_down_exps.weight q4_0 +blk.10.ffn_down_shexp.weight q4_0 +blk.11.ffn_down_exps.weight q4_0 +blk.11.ffn_down_shexp.weight q4_0 +blk.12.ffn_down_exps.weight q4_0 +blk.12.ffn_down_shexp.weight q4_0 +blk.13.ffn_down_exps.weight q4_0 +blk.13.ffn_down_shexp.weight q4_0 +blk.14.ffn_down_exps.weight q4_0 +blk.14.ffn_down_shexp.weight q4_0 +blk.15.ffn_down_exps.weight q4_0 +blk.15.ffn_down_shexp.weight q4_0 +blk.16.ffn_down_exps.weight q4_0 +blk.16.ffn_down_shexp.weight q4_0 +blk.17.ffn_down_exps.weight q4_0 +blk.17.ffn_down_shexp.weight q4_0 +blk.18.ffn_down_exps.weight q4_0 +blk.18.ffn_down_shexp.weight q4_0 +blk.19.ffn_down_exps.weight q4_0 +blk.19.ffn_down_shexp.weight q4_0 +blk.20.ffn_down_exps.weight q4_0 +blk.20.ffn_down_shexp.weight q4_0 +blk.21.ffn_down_exps.weight q4_0 +blk.21.ffn_down_shexp.weight q4_0 +blk.22.ffn_down_exps.weight q4_0 +blk.22.ffn_down_shexp.weight q4_0 +blk.23.ffn_down_exps.weight q4_0 +blk.23.ffn_down_shexp.weight q4_0 +blk.24.ffn_down_exps.weight q4_0 +blk.24.ffn_down_shexp.weight q4_0 +blk.25.ffn_down_exps.weight q4_0 +blk.25.ffn_down_shexp.weight q4_0 +blk.26.ffn_down_exps.weight q4_0 +blk.26.ffn_down_shexp.weight q4_0 +blk.27.ffn_down_exps.weight q4_0 +blk.27.ffn_down_shexp.weight q4_0 +blk.28.ffn_down_exps.weight q4_0 +blk.28.ffn_down_shexp.weight q4_0 +blk.29.ffn_down_exps.weight q4_0 +blk.29.ffn_down_shexp.weight q4_0 +blk.30.ffn_down_exps.weight q4_0 +blk.30.ffn_down_shexp.weight q4_0 +blk.31.ffn_down_exps.weight q4_0 +blk.31.ffn_down_shexp.weight q4_0 +blk.32.ffn_down_exps.weight q4_0 +blk.32.ffn_down_shexp.weight q4_0 +blk.33.ffn_down_exps.weight q4_0 +blk.33.ffn_down_shexp.weight q4_0 +blk.34.ffn_down_exps.weight q4_0 +blk.34.ffn_down_shexp.weight q4_0 +blk.35.ffn_down_exps.weight q4_0 +blk.35.ffn_down_shexp.weight q4_0 +blk.36.ffn_down_exps.weight q4_0 +blk.36.ffn_down_shexp.weight q4_0 +blk.37.ffn_down_exps.weight q4_0 +blk.37.ffn_down_shexp.weight q4_0 +blk.38.ffn_down_exps.weight q4_0 +blk.38.ffn_down_shexp.weight q4_0 +blk.39.ffn_down_exps.weight q4_0 +blk.39.ffn_down_shexp.weight q4_0 +blk.40.ffn_down_exps.weight q4_0 +blk.40.ffn_down_shexp.weight q4_0 +blk.41.ffn_down_exps.weight q4_0 +blk.41.ffn_down_shexp.weight q4_0 +blk.42.ffn_down_exps.weight q4_0 +blk.42.ffn_down_shexp.weight q4_0 +blk.43.ffn_down_exps.weight q4_0 +blk.43.ffn_down_shexp.weight q4_0 +blk.44.ffn_down_exps.weight q4_0 +blk.44.ffn_down_shexp.weight q4_0 output.weight q6_K +blk.45.ffn_down_exps.weight q4_0 +blk.45.ffn_down_shexp.weight q4_0 [TQ2_0] tq2_0 token_embd.weight q4_K +blk.0.ffn_down.weight q4_0 +blk.1.ffn_down_exps.weight q4_0 +blk.1.ffn_down_shexp.weight q4_0 +blk.2.ffn_down_exps.weight q4_0 +blk.2.ffn_down_shexp.weight q4_0 +blk.3.ffn_down_exps.weight q4_0 +blk.3.ffn_down_shexp.weight q4_0 +blk.4.ffn_down_exps.weight q4_0 +blk.4.ffn_down_shexp.weight q4_0 +blk.5.ffn_down_exps.weight q4_0 +blk.5.ffn_down_shexp.weight q4_0 +blk.6.ffn_down_exps.weight q4_0 +blk.6.ffn_down_shexp.weight q4_0 +blk.7.ffn_down_exps.weight q4_0 +blk.7.ffn_down_shexp.weight q4_0 +blk.8.ffn_down_exps.weight q4_0 +blk.8.ffn_down_shexp.weight q4_0 +blk.9.ffn_down_exps.weight q4_0 +blk.9.ffn_down_shexp.weight q4_0 +blk.10.ffn_down_exps.weight q4_0 +blk.10.ffn_down_shexp.weight q4_0 +blk.11.ffn_down_exps.weight q4_0 +blk.11.ffn_down_shexp.weight q4_0 +blk.12.ffn_down_exps.weight q4_0 +blk.12.ffn_down_shexp.weight q4_0 +blk.13.ffn_down_exps.weight q4_0 +blk.13.ffn_down_shexp.weight q4_0 +blk.14.ffn_down_exps.weight q4_0 +blk.14.ffn_down_shexp.weight q4_0 +blk.15.ffn_down_exps.weight q4_0 +blk.15.ffn_down_shexp.weight q4_0 +blk.16.ffn_down_exps.weight q4_0 +blk.16.ffn_down_shexp.weight q4_0 +blk.17.ffn_down_exps.weight q4_0 +blk.17.ffn_down_shexp.weight q4_0 +blk.18.ffn_down_exps.weight q4_0 +blk.18.ffn_down_shexp.weight q4_0 +blk.19.ffn_down_exps.weight q4_0 +blk.19.ffn_down_shexp.weight q4_0 +blk.20.ffn_down_exps.weight q4_0 +blk.20.ffn_down_shexp.weight q4_0 +blk.21.ffn_down_exps.weight q4_0 +blk.21.ffn_down_shexp.weight q4_0 +blk.22.ffn_down_exps.weight q4_0 +blk.22.ffn_down_shexp.weight q4_0 +blk.23.ffn_down_exps.weight q4_0 +blk.23.ffn_down_shexp.weight q4_0 +blk.24.ffn_down_exps.weight q4_0 +blk.24.ffn_down_shexp.weight q4_0 +blk.25.ffn_down_exps.weight q4_0 +blk.25.ffn_down_shexp.weight q4_0 +blk.26.ffn_down_exps.weight q4_0 +blk.26.ffn_down_shexp.weight q4_0 +blk.27.ffn_down_exps.weight q4_0 +blk.27.ffn_down_shexp.weight q4_0 +blk.28.ffn_down_exps.weight q4_0 +blk.28.ffn_down_shexp.weight q4_0 +blk.29.ffn_down_exps.weight q4_0 +blk.29.ffn_down_shexp.weight q4_0 +blk.30.ffn_down_exps.weight q4_0 +blk.30.ffn_down_shexp.weight q4_0 +blk.31.ffn_down_exps.weight q4_0 +blk.31.ffn_down_shexp.weight q4_0 +blk.32.ffn_down_exps.weight q4_0 +blk.32.ffn_down_shexp.weight q4_0 +blk.33.ffn_down_exps.weight q4_0 +blk.33.ffn_down_shexp.weight q4_0 +blk.34.ffn_down_exps.weight q4_0 +blk.34.ffn_down_shexp.weight q4_0 +blk.35.ffn_down_exps.weight q4_0 +blk.35.ffn_down_shexp.weight q4_0 +blk.36.ffn_down_exps.weight q4_0 +blk.36.ffn_down_shexp.weight q4_0 +blk.37.ffn_down_exps.weight q4_0 +blk.37.ffn_down_shexp.weight q4_0 +blk.38.ffn_down_exps.weight q4_0 +blk.38.ffn_down_shexp.weight q4_0 +blk.39.ffn_down_exps.weight q4_0 +blk.39.ffn_down_shexp.weight q4_0 +blk.40.ffn_down_exps.weight q4_0 +blk.40.ffn_down_shexp.weight q4_0 +blk.41.ffn_down_exps.weight q4_0 +blk.41.ffn_down_shexp.weight q4_0 +blk.42.ffn_down_exps.weight q4_0 +blk.42.ffn_down_shexp.weight q4_0 +blk.43.ffn_down_exps.weight q4_0 +blk.43.ffn_down_shexp.weight q4_0 +blk.44.ffn_down_exps.weight q4_0 +blk.44.ffn_down_shexp.weight q4_0 output.weight q6_K +blk.45.ffn_down_exps.weight q4_0 +blk.45.ffn_down_shexp.weight q4_0 [MXFP4_MOE] mxfp4 token_embd.weight q8_0 diff --git a/tests/snapshots/gpt-oss-120b.schema b/tests/snapshots/gpt-oss-120b.schema index e66fbcfd2b..25c98f23cd 100644 --- a/tests/snapshots/gpt-oss-120b.schema +++ b/tests/snapshots/gpt-oss-120b.schema @@ -2,1470 +2,5451 @@ # n_embd=2880, n_ff=2880, n_vocab=0, n_layer=36, n_head=64, n_head_kv=8, n_expert=128 [F32] f32 -output.weight q6_K [F16] f16 -output.weight q6_K [Q4_0] q4_0 -output.weight q6_K +output.weight q8_0 [Q4_1] q4_1 -output.weight q6_K +output.weight q8_0 [Q8_0] q8_0 [Q5_0] q5_0 -output.weight q6_K +output.weight q8_0 [Q5_1] q5_1 -output.weight q6_K +output.weight q8_0 [Q2_K] q2_K output.weight q8_0 +token_embd.weight q4_0 +blk.0.attn_k.weight q4_0 blk.0.attn_output.weight q3_K -blk.0.attn_v.weight q4_K -blk.0.ffn_down_exps.weight q3_K +blk.0.attn_q.weight q4_0 +blk.0.attn_v.weight q5_0 +blk.0.ffn_down_exps.weight q4_0 +blk.0.ffn_gate_exps.weight q4_0 +blk.0.ffn_up_exps.weight q4_0 +blk.1.attn_k.weight q4_0 blk.1.attn_output.weight q3_K -blk.1.attn_v.weight q4_K -blk.1.ffn_down_exps.weight q3_K +blk.1.attn_q.weight q4_0 +blk.1.attn_v.weight q5_0 +blk.1.ffn_down_exps.weight q4_0 +blk.1.ffn_gate_exps.weight q4_0 +blk.1.ffn_up_exps.weight q4_0 +blk.2.attn_k.weight q4_0 blk.2.attn_output.weight q3_K -blk.2.attn_v.weight q4_K -blk.2.ffn_down_exps.weight q3_K +blk.2.attn_q.weight q4_0 +blk.2.attn_v.weight q5_0 +blk.2.ffn_down_exps.weight q4_0 +blk.2.ffn_gate_exps.weight q4_0 +blk.2.ffn_up_exps.weight q4_0 +blk.3.attn_k.weight q4_0 blk.3.attn_output.weight q3_K -blk.3.attn_v.weight q4_K -blk.3.ffn_down_exps.weight q3_K +blk.3.attn_q.weight q4_0 +blk.3.attn_v.weight q5_0 +blk.3.ffn_down_exps.weight q4_0 +blk.3.ffn_gate_exps.weight q4_0 +blk.3.ffn_up_exps.weight q4_0 +blk.4.attn_k.weight q4_0 blk.4.attn_output.weight q3_K -blk.4.attn_v.weight q4_K -blk.4.ffn_down_exps.weight q3_K +blk.4.attn_q.weight q4_0 +blk.4.attn_v.weight q5_0 +blk.4.ffn_down_exps.weight q4_0 +blk.4.ffn_gate_exps.weight q4_0 +blk.4.ffn_up_exps.weight q4_0 +blk.5.attn_k.weight q4_0 blk.5.attn_output.weight q3_K -blk.5.attn_v.weight q4_K -blk.5.ffn_down_exps.weight q3_K +blk.5.attn_q.weight q4_0 +blk.5.attn_v.weight q5_0 +blk.5.ffn_down_exps.weight q4_0 +blk.5.ffn_gate_exps.weight q4_0 +blk.5.ffn_up_exps.weight q4_0 +blk.6.attn_k.weight q4_0 blk.6.attn_output.weight q3_K -blk.6.attn_v.weight q4_K -blk.6.ffn_down_exps.weight q3_K +blk.6.attn_q.weight q4_0 +blk.6.attn_v.weight q5_0 +blk.6.ffn_down_exps.weight q4_0 +blk.6.ffn_gate_exps.weight q4_0 +blk.6.ffn_up_exps.weight q4_0 +blk.7.attn_k.weight q4_0 blk.7.attn_output.weight q3_K -blk.7.attn_v.weight q4_K -blk.7.ffn_down_exps.weight q3_K +blk.7.attn_q.weight q4_0 +blk.7.attn_v.weight q5_0 +blk.7.ffn_down_exps.weight q4_0 +blk.7.ffn_gate_exps.weight q4_0 +blk.7.ffn_up_exps.weight q4_0 +blk.8.attn_k.weight q4_0 blk.8.attn_output.weight q3_K -blk.8.attn_v.weight q4_K -blk.8.ffn_down_exps.weight q3_K +blk.8.attn_q.weight q4_0 +blk.8.attn_v.weight q5_0 +blk.8.ffn_down_exps.weight q4_0 +blk.8.ffn_gate_exps.weight q4_0 +blk.8.ffn_up_exps.weight q4_0 +blk.9.attn_k.weight q4_0 blk.9.attn_output.weight q3_K -blk.9.attn_v.weight q4_K -blk.9.ffn_down_exps.weight q3_K +blk.9.attn_q.weight q4_0 +blk.9.attn_v.weight q5_0 +blk.9.ffn_down_exps.weight q4_0 +blk.9.ffn_gate_exps.weight q4_0 +blk.9.ffn_up_exps.weight q4_0 +blk.10.attn_k.weight q4_0 blk.10.attn_output.weight q3_K -blk.10.attn_v.weight q4_K -blk.10.ffn_down_exps.weight q3_K +blk.10.attn_q.weight q4_0 +blk.10.attn_v.weight q5_0 +blk.10.ffn_down_exps.weight q4_0 +blk.10.ffn_gate_exps.weight q4_0 +blk.10.ffn_up_exps.weight q4_0 +blk.11.attn_k.weight q4_0 blk.11.attn_output.weight q3_K -blk.11.attn_v.weight q4_K -blk.11.ffn_down_exps.weight q3_K +blk.11.attn_q.weight q4_0 +blk.11.attn_v.weight q5_0 +blk.11.ffn_down_exps.weight q4_0 +blk.11.ffn_gate_exps.weight q4_0 +blk.11.ffn_up_exps.weight q4_0 +blk.12.attn_k.weight q4_0 blk.12.attn_output.weight q3_K -blk.12.attn_v.weight q4_K -blk.12.ffn_down_exps.weight q3_K +blk.12.attn_q.weight q4_0 +blk.12.attn_v.weight q5_0 +blk.12.ffn_down_exps.weight q4_0 +blk.12.ffn_gate_exps.weight q4_0 +blk.12.ffn_up_exps.weight q4_0 +blk.13.attn_k.weight q4_0 blk.13.attn_output.weight q3_K -blk.13.attn_v.weight q4_K -blk.13.ffn_down_exps.weight q3_K +blk.13.attn_q.weight q4_0 +blk.13.attn_v.weight q5_0 +blk.13.ffn_down_exps.weight q4_0 +blk.13.ffn_gate_exps.weight q4_0 +blk.13.ffn_up_exps.weight q4_0 +blk.14.attn_k.weight q4_0 blk.14.attn_output.weight q3_K -blk.14.attn_v.weight q4_K -blk.14.ffn_down_exps.weight q3_K +blk.14.attn_q.weight q4_0 +blk.14.attn_v.weight q5_0 +blk.14.ffn_down_exps.weight q4_0 +blk.14.ffn_gate_exps.weight q4_0 +blk.14.ffn_up_exps.weight q4_0 +blk.15.attn_k.weight q4_0 blk.15.attn_output.weight q3_K -blk.15.attn_v.weight q4_K -blk.15.ffn_down_exps.weight q3_K +blk.15.attn_q.weight q4_0 +blk.15.attn_v.weight q5_0 +blk.15.ffn_down_exps.weight q4_0 +blk.15.ffn_gate_exps.weight q4_0 +blk.15.ffn_up_exps.weight q4_0 +blk.16.attn_k.weight q4_0 blk.16.attn_output.weight q3_K -blk.16.attn_v.weight q4_K -blk.16.ffn_down_exps.weight q3_K +blk.16.attn_q.weight q4_0 +blk.16.attn_v.weight q5_0 +blk.16.ffn_down_exps.weight q4_0 +blk.16.ffn_gate_exps.weight q4_0 +blk.16.ffn_up_exps.weight q4_0 +blk.17.attn_k.weight q4_0 blk.17.attn_output.weight q3_K -blk.17.attn_v.weight q4_K -blk.17.ffn_down_exps.weight q3_K +blk.17.attn_q.weight q4_0 +blk.17.attn_v.weight q5_0 +blk.17.ffn_down_exps.weight q4_0 +blk.17.ffn_gate_exps.weight q4_0 +blk.17.ffn_up_exps.weight q4_0 +blk.18.attn_k.weight q4_0 blk.18.attn_output.weight q3_K -blk.18.attn_v.weight q4_K -blk.18.ffn_down_exps.weight q3_K +blk.18.attn_q.weight q4_0 +blk.18.attn_v.weight q5_0 +blk.18.ffn_down_exps.weight q4_0 +blk.18.ffn_gate_exps.weight q4_0 +blk.18.ffn_up_exps.weight q4_0 +blk.19.attn_k.weight q4_0 blk.19.attn_output.weight q3_K -blk.19.attn_v.weight q4_K -blk.19.ffn_down_exps.weight q3_K +blk.19.attn_q.weight q4_0 +blk.19.attn_v.weight q5_0 +blk.19.ffn_down_exps.weight q4_0 +blk.19.ffn_gate_exps.weight q4_0 +blk.19.ffn_up_exps.weight q4_0 +blk.20.attn_k.weight q4_0 blk.20.attn_output.weight q3_K -blk.20.attn_v.weight q4_K -blk.20.ffn_down_exps.weight q3_K +blk.20.attn_q.weight q4_0 +blk.20.attn_v.weight q5_0 +blk.20.ffn_down_exps.weight q4_0 +blk.20.ffn_gate_exps.weight q4_0 +blk.20.ffn_up_exps.weight q4_0 +blk.21.attn_k.weight q4_0 blk.21.attn_output.weight q3_K -blk.21.attn_v.weight q4_K -blk.21.ffn_down_exps.weight q3_K +blk.21.attn_q.weight q4_0 +blk.21.attn_v.weight q5_0 +blk.21.ffn_down_exps.weight q4_0 +blk.21.ffn_gate_exps.weight q4_0 +blk.21.ffn_up_exps.weight q4_0 +blk.22.attn_k.weight q4_0 blk.22.attn_output.weight q3_K -blk.22.attn_v.weight q4_K -blk.22.ffn_down_exps.weight q3_K +blk.22.attn_q.weight q4_0 +blk.22.attn_v.weight q5_0 +blk.22.ffn_down_exps.weight q4_0 +blk.22.ffn_gate_exps.weight q4_0 +blk.22.ffn_up_exps.weight q4_0 +blk.23.attn_k.weight q4_0 blk.23.attn_output.weight q3_K -blk.23.attn_v.weight q4_K -blk.23.ffn_down_exps.weight q3_K +blk.23.attn_q.weight q4_0 +blk.23.attn_v.weight q5_0 +blk.23.ffn_down_exps.weight q4_0 +blk.23.ffn_gate_exps.weight q4_0 +blk.23.ffn_up_exps.weight q4_0 +blk.24.attn_k.weight q4_0 blk.24.attn_output.weight q3_K -blk.24.attn_v.weight q4_K -blk.24.ffn_down_exps.weight q3_K +blk.24.attn_q.weight q4_0 +blk.24.attn_v.weight q5_0 +blk.24.ffn_down_exps.weight q4_0 +blk.24.ffn_gate_exps.weight q4_0 +blk.24.ffn_up_exps.weight q4_0 +blk.25.attn_k.weight q4_0 blk.25.attn_output.weight q3_K -blk.25.attn_v.weight q4_K -blk.25.ffn_down_exps.weight q3_K +blk.25.attn_q.weight q4_0 +blk.25.attn_v.weight q5_0 +blk.25.ffn_down_exps.weight q4_0 +blk.25.ffn_gate_exps.weight q4_0 +blk.25.ffn_up_exps.weight q4_0 +blk.26.attn_k.weight q4_0 blk.26.attn_output.weight q3_K -blk.26.attn_v.weight q4_K -blk.26.ffn_down_exps.weight q3_K +blk.26.attn_q.weight q4_0 +blk.26.attn_v.weight q5_0 +blk.26.ffn_down_exps.weight q4_0 +blk.26.ffn_gate_exps.weight q4_0 +blk.26.ffn_up_exps.weight q4_0 +blk.27.attn_k.weight q4_0 blk.27.attn_output.weight q3_K -blk.27.attn_v.weight q4_K -blk.27.ffn_down_exps.weight q3_K +blk.27.attn_q.weight q4_0 +blk.27.attn_v.weight q5_0 +blk.27.ffn_down_exps.weight q4_0 +blk.27.ffn_gate_exps.weight q4_0 +blk.27.ffn_up_exps.weight q4_0 +blk.28.attn_k.weight q4_0 blk.28.attn_output.weight q3_K -blk.28.attn_v.weight q4_K -blk.28.ffn_down_exps.weight q3_K +blk.28.attn_q.weight q4_0 +blk.28.attn_v.weight q5_0 +blk.28.ffn_down_exps.weight q4_0 +blk.28.ffn_gate_exps.weight q4_0 +blk.28.ffn_up_exps.weight q4_0 +blk.29.attn_k.weight q4_0 blk.29.attn_output.weight q3_K -blk.29.attn_v.weight q4_K -blk.29.ffn_down_exps.weight q3_K +blk.29.attn_q.weight q4_0 +blk.29.attn_v.weight q5_0 +blk.29.ffn_down_exps.weight q4_0 +blk.29.ffn_gate_exps.weight q4_0 +blk.29.ffn_up_exps.weight q4_0 +blk.30.attn_k.weight q4_0 blk.30.attn_output.weight q3_K -blk.30.attn_v.weight q4_K -blk.30.ffn_down_exps.weight q3_K +blk.30.attn_q.weight q4_0 +blk.30.attn_v.weight q5_0 +blk.30.ffn_down_exps.weight q4_0 +blk.30.ffn_gate_exps.weight q4_0 +blk.30.ffn_up_exps.weight q4_0 +blk.31.attn_k.weight q4_0 blk.31.attn_output.weight q3_K -blk.31.attn_v.weight q4_K -blk.31.ffn_down_exps.weight q3_K +blk.31.attn_q.weight q4_0 +blk.31.attn_v.weight q5_0 +blk.31.ffn_down_exps.weight q4_0 +blk.31.ffn_gate_exps.weight q4_0 +blk.31.ffn_up_exps.weight q4_0 +blk.32.attn_k.weight q4_0 blk.32.attn_output.weight q3_K -blk.32.attn_v.weight q4_K -blk.32.ffn_down_exps.weight q3_K +blk.32.attn_q.weight q4_0 +blk.32.attn_v.weight q5_0 +blk.32.ffn_down_exps.weight q4_0 +blk.32.ffn_gate_exps.weight q4_0 +blk.32.ffn_up_exps.weight q4_0 +blk.33.attn_k.weight q4_0 blk.33.attn_output.weight q3_K -blk.33.attn_v.weight q4_K -blk.33.ffn_down_exps.weight q3_K +blk.33.attn_q.weight q4_0 +blk.33.attn_v.weight q5_0 +blk.33.ffn_down_exps.weight q4_0 +blk.33.ffn_gate_exps.weight q4_0 +blk.33.ffn_up_exps.weight q4_0 +blk.34.attn_k.weight q4_0 blk.34.attn_output.weight q3_K -blk.34.attn_v.weight q4_K -blk.34.ffn_down_exps.weight q3_K +blk.34.attn_q.weight q4_0 +blk.34.attn_v.weight q5_0 +blk.34.ffn_down_exps.weight q4_0 +blk.34.ffn_gate_exps.weight q4_0 +blk.34.ffn_up_exps.weight q4_0 +blk.35.attn_k.weight q4_0 blk.35.attn_output.weight q3_K -blk.35.attn_v.weight q4_K -blk.35.ffn_down_exps.weight q3_K +blk.35.attn_q.weight q4_0 +blk.35.attn_v.weight q5_0 +blk.35.ffn_down_exps.weight q4_0 +blk.35.ffn_gate_exps.weight q4_0 +blk.35.ffn_up_exps.weight q4_0 [Q3_K_S] q3_K output.weight q8_0 +token_embd.weight q4_0 +blk.0.attn_k.weight q4_0 +blk.0.attn_q.weight q4_0 +blk.0.attn_v.weight q4_0 +blk.0.ffn_down_exps.weight q4_0 +blk.0.ffn_gate_exps.weight q4_0 +blk.0.ffn_up_exps.weight q4_0 +blk.1.attn_k.weight q4_0 +blk.1.attn_q.weight q4_0 +blk.1.attn_v.weight q4_0 +blk.1.ffn_down_exps.weight q4_0 +blk.1.ffn_gate_exps.weight q4_0 +blk.1.ffn_up_exps.weight q4_0 +blk.2.attn_k.weight q4_0 +blk.2.attn_q.weight q4_0 +blk.2.attn_v.weight q4_0 +blk.2.ffn_down_exps.weight q4_0 +blk.2.ffn_gate_exps.weight q4_0 +blk.2.ffn_up_exps.weight q4_0 +blk.3.attn_k.weight q4_0 +blk.3.attn_q.weight q4_0 +blk.3.attn_v.weight q4_0 +blk.3.ffn_down_exps.weight q4_0 +blk.3.ffn_gate_exps.weight q4_0 +blk.3.ffn_up_exps.weight q4_0 +blk.4.attn_k.weight q4_0 +blk.4.attn_q.weight q4_0 +blk.4.attn_v.weight q4_0 +blk.4.ffn_down_exps.weight q4_0 +blk.4.ffn_gate_exps.weight q4_0 +blk.4.ffn_up_exps.weight q4_0 +blk.5.attn_k.weight q4_0 +blk.5.attn_q.weight q4_0 +blk.5.attn_v.weight q4_0 +blk.5.ffn_down_exps.weight q4_0 +blk.5.ffn_gate_exps.weight q4_0 +blk.5.ffn_up_exps.weight q4_0 +blk.6.attn_k.weight q4_0 +blk.6.attn_q.weight q4_0 +blk.6.attn_v.weight q4_0 +blk.6.ffn_down_exps.weight q4_0 +blk.6.ffn_gate_exps.weight q4_0 +blk.6.ffn_up_exps.weight q4_0 +blk.7.attn_k.weight q4_0 +blk.7.attn_q.weight q4_0 +blk.7.attn_v.weight q4_0 +blk.7.ffn_down_exps.weight q4_0 +blk.7.ffn_gate_exps.weight q4_0 +blk.7.ffn_up_exps.weight q4_0 +blk.8.attn_k.weight q4_0 +blk.8.attn_q.weight q4_0 +blk.8.attn_v.weight q4_0 +blk.8.ffn_down_exps.weight q4_0 +blk.8.ffn_gate_exps.weight q4_0 +blk.8.ffn_up_exps.weight q4_0 +blk.9.attn_k.weight q4_0 +blk.9.attn_q.weight q4_0 +blk.9.attn_v.weight q4_0 +blk.9.ffn_down_exps.weight q4_0 +blk.9.ffn_gate_exps.weight q4_0 +blk.9.ffn_up_exps.weight q4_0 +blk.10.attn_k.weight q4_0 +blk.10.attn_q.weight q4_0 +blk.10.attn_v.weight q4_0 +blk.10.ffn_down_exps.weight q4_0 +blk.10.ffn_gate_exps.weight q4_0 +blk.10.ffn_up_exps.weight q4_0 +blk.11.attn_k.weight q4_0 +blk.11.attn_q.weight q4_0 +blk.11.attn_v.weight q4_0 +blk.11.ffn_down_exps.weight q4_0 +blk.11.ffn_gate_exps.weight q4_0 +blk.11.ffn_up_exps.weight q4_0 +blk.12.attn_k.weight q4_0 +blk.12.attn_q.weight q4_0 +blk.12.attn_v.weight q4_0 +blk.12.ffn_down_exps.weight q4_0 +blk.12.ffn_gate_exps.weight q4_0 +blk.12.ffn_up_exps.weight q4_0 +blk.13.attn_k.weight q4_0 +blk.13.attn_q.weight q4_0 +blk.13.attn_v.weight q4_0 +blk.13.ffn_down_exps.weight q4_0 +blk.13.ffn_gate_exps.weight q4_0 +blk.13.ffn_up_exps.weight q4_0 +blk.14.attn_k.weight q4_0 +blk.14.attn_q.weight q4_0 +blk.14.attn_v.weight q4_0 +blk.14.ffn_down_exps.weight q4_0 +blk.14.ffn_gate_exps.weight q4_0 +blk.14.ffn_up_exps.weight q4_0 +blk.15.attn_k.weight q4_0 +blk.15.attn_q.weight q4_0 +blk.15.attn_v.weight q4_0 +blk.15.ffn_down_exps.weight q4_0 +blk.15.ffn_gate_exps.weight q4_0 +blk.15.ffn_up_exps.weight q4_0 +blk.16.attn_k.weight q4_0 +blk.16.attn_q.weight q4_0 +blk.16.attn_v.weight q4_0 +blk.16.ffn_down_exps.weight q4_0 +blk.16.ffn_gate_exps.weight q4_0 +blk.16.ffn_up_exps.weight q4_0 +blk.17.attn_k.weight q4_0 +blk.17.attn_q.weight q4_0 +blk.17.attn_v.weight q4_0 +blk.17.ffn_down_exps.weight q4_0 +blk.17.ffn_gate_exps.weight q4_0 +blk.17.ffn_up_exps.weight q4_0 +blk.18.attn_k.weight q4_0 +blk.18.attn_q.weight q4_0 +blk.18.attn_v.weight q4_0 +blk.18.ffn_down_exps.weight q4_0 +blk.18.ffn_gate_exps.weight q4_0 +blk.18.ffn_up_exps.weight q4_0 +blk.19.attn_k.weight q4_0 +blk.19.attn_q.weight q4_0 +blk.19.attn_v.weight q4_0 +blk.19.ffn_down_exps.weight q4_0 +blk.19.ffn_gate_exps.weight q4_0 +blk.19.ffn_up_exps.weight q4_0 +blk.20.attn_k.weight q4_0 +blk.20.attn_q.weight q4_0 +blk.20.attn_v.weight q4_0 +blk.20.ffn_down_exps.weight q4_0 +blk.20.ffn_gate_exps.weight q4_0 +blk.20.ffn_up_exps.weight q4_0 +blk.21.attn_k.weight q4_0 +blk.21.attn_q.weight q4_0 +blk.21.attn_v.weight q4_0 +blk.21.ffn_down_exps.weight q4_0 +blk.21.ffn_gate_exps.weight q4_0 +blk.21.ffn_up_exps.weight q4_0 +blk.22.attn_k.weight q4_0 +blk.22.attn_q.weight q4_0 +blk.22.attn_v.weight q4_0 +blk.22.ffn_down_exps.weight q4_0 +blk.22.ffn_gate_exps.weight q4_0 +blk.22.ffn_up_exps.weight q4_0 +blk.23.attn_k.weight q4_0 +blk.23.attn_q.weight q4_0 +blk.23.attn_v.weight q4_0 +blk.23.ffn_down_exps.weight q4_0 +blk.23.ffn_gate_exps.weight q4_0 +blk.23.ffn_up_exps.weight q4_0 +blk.24.attn_k.weight q4_0 +blk.24.attn_q.weight q4_0 +blk.24.attn_v.weight q4_0 +blk.24.ffn_down_exps.weight q4_0 +blk.24.ffn_gate_exps.weight q4_0 +blk.24.ffn_up_exps.weight q4_0 +blk.25.attn_k.weight q4_0 +blk.25.attn_q.weight q4_0 +blk.25.attn_v.weight q4_0 +blk.25.ffn_down_exps.weight q4_0 +blk.25.ffn_gate_exps.weight q4_0 +blk.25.ffn_up_exps.weight q4_0 +blk.26.attn_k.weight q4_0 +blk.26.attn_q.weight q4_0 +blk.26.attn_v.weight q4_0 +blk.26.ffn_down_exps.weight q4_0 +blk.26.ffn_gate_exps.weight q4_0 +blk.26.ffn_up_exps.weight q4_0 +blk.27.attn_k.weight q4_0 +blk.27.attn_q.weight q4_0 +blk.27.attn_v.weight q4_0 +blk.27.ffn_down_exps.weight q4_0 +blk.27.ffn_gate_exps.weight q4_0 +blk.27.ffn_up_exps.weight q4_0 +blk.28.attn_k.weight q4_0 +blk.28.attn_q.weight q4_0 +blk.28.attn_v.weight q4_0 +blk.28.ffn_down_exps.weight q4_0 +blk.28.ffn_gate_exps.weight q4_0 +blk.28.ffn_up_exps.weight q4_0 +blk.29.attn_k.weight q4_0 +blk.29.attn_q.weight q4_0 +blk.29.attn_v.weight q4_0 +blk.29.ffn_down_exps.weight q4_0 +blk.29.ffn_gate_exps.weight q4_0 +blk.29.ffn_up_exps.weight q4_0 +blk.30.attn_k.weight q4_0 +blk.30.attn_q.weight q4_0 +blk.30.attn_v.weight q4_0 +blk.30.ffn_down_exps.weight q4_0 +blk.30.ffn_gate_exps.weight q4_0 +blk.30.ffn_up_exps.weight q4_0 +blk.31.attn_k.weight q4_0 +blk.31.attn_q.weight q4_0 +blk.31.attn_v.weight q4_0 +blk.31.ffn_down_exps.weight q4_0 +blk.31.ffn_gate_exps.weight q4_0 +blk.31.ffn_up_exps.weight q4_0 +blk.32.attn_k.weight q4_0 +blk.32.attn_q.weight q4_0 +blk.32.attn_v.weight q4_0 +blk.32.ffn_down_exps.weight q4_0 +blk.32.ffn_gate_exps.weight q4_0 +blk.32.ffn_up_exps.weight q4_0 +blk.33.attn_k.weight q4_0 +blk.33.attn_q.weight q4_0 +blk.33.attn_v.weight q4_0 +blk.33.ffn_down_exps.weight q4_0 +blk.33.ffn_gate_exps.weight q4_0 +blk.33.ffn_up_exps.weight q4_0 +blk.34.attn_k.weight q4_0 +blk.34.attn_q.weight q4_0 +blk.34.attn_v.weight q4_0 +blk.34.ffn_down_exps.weight q4_0 +blk.34.ffn_gate_exps.weight q4_0 +blk.34.ffn_up_exps.weight q4_0 +blk.35.attn_k.weight q4_0 +blk.35.attn_q.weight q4_0 +blk.35.attn_v.weight q4_0 +blk.35.ffn_down_exps.weight q4_0 +blk.35.ffn_gate_exps.weight q4_0 +blk.35.ffn_up_exps.weight q4_0 [Q3_K_M] q3_K output.weight q8_0 +token_embd.weight q4_0 +blk.0.attn_k.weight q4_0 blk.0.attn_output.weight q4_K -blk.0.attn_v.weight q5_K -blk.0.ffn_down_exps.weight q5_K +blk.0.attn_q.weight q4_0 +blk.0.attn_v.weight q5_1 +blk.0.ffn_down_exps.weight q5_1 +blk.0.ffn_gate_exps.weight q4_0 +blk.0.ffn_up_exps.weight q4_0 +blk.1.attn_k.weight q4_0 blk.1.attn_output.weight q4_K -blk.1.attn_v.weight q5_K -blk.1.ffn_down_exps.weight q5_K +blk.1.attn_q.weight q4_0 +blk.1.attn_v.weight q5_1 +blk.1.ffn_down_exps.weight q5_1 +blk.1.ffn_gate_exps.weight q4_0 +blk.1.ffn_up_exps.weight q4_0 +blk.2.attn_k.weight q4_0 blk.2.attn_output.weight q4_K -blk.2.attn_v.weight q4_K -blk.2.ffn_down_exps.weight q4_K +blk.2.attn_q.weight q4_0 +blk.2.attn_v.weight q5_0 +blk.2.ffn_down_exps.weight q5_0 +blk.2.ffn_gate_exps.weight q4_0 +blk.2.ffn_up_exps.weight q4_0 +blk.3.attn_k.weight q4_0 blk.3.attn_output.weight q4_K -blk.3.attn_v.weight q4_K -blk.3.ffn_down_exps.weight q4_K +blk.3.attn_q.weight q4_0 +blk.3.attn_v.weight q5_0 +blk.3.ffn_down_exps.weight q5_0 +blk.3.ffn_gate_exps.weight q4_0 +blk.3.ffn_up_exps.weight q4_0 +blk.4.attn_k.weight q4_0 blk.4.attn_output.weight q4_K -blk.4.attn_v.weight q4_K -blk.4.ffn_down_exps.weight q4_K +blk.4.attn_q.weight q4_0 +blk.4.attn_v.weight q5_0 +blk.4.ffn_down_exps.weight q5_0 +blk.4.ffn_gate_exps.weight q4_0 +blk.4.ffn_up_exps.weight q4_0 +blk.5.attn_k.weight q4_0 blk.5.attn_output.weight q4_K -blk.5.attn_v.weight q4_K -blk.5.ffn_down_exps.weight q4_K +blk.5.attn_q.weight q4_0 +blk.5.attn_v.weight q5_0 +blk.5.ffn_down_exps.weight q5_0 +blk.5.ffn_gate_exps.weight q4_0 +blk.5.ffn_up_exps.weight q4_0 +blk.6.attn_k.weight q4_0 blk.6.attn_output.weight q4_K -blk.6.attn_v.weight q4_K -blk.6.ffn_down_exps.weight q4_K +blk.6.attn_q.weight q4_0 +blk.6.attn_v.weight q5_0 +blk.6.ffn_down_exps.weight q5_0 +blk.6.ffn_gate_exps.weight q4_0 +blk.6.ffn_up_exps.weight q4_0 +blk.7.attn_k.weight q4_0 blk.7.attn_output.weight q4_K -blk.7.attn_v.weight q4_K -blk.7.ffn_down_exps.weight q4_K +blk.7.attn_q.weight q4_0 +blk.7.attn_v.weight q5_0 +blk.7.ffn_down_exps.weight q5_0 +blk.7.ffn_gate_exps.weight q4_0 +blk.7.ffn_up_exps.weight q4_0 +blk.8.attn_k.weight q4_0 blk.8.attn_output.weight q4_K -blk.8.attn_v.weight q4_K -blk.8.ffn_down_exps.weight q4_K +blk.8.attn_q.weight q4_0 +blk.8.attn_v.weight q5_0 +blk.8.ffn_down_exps.weight q5_0 +blk.8.ffn_gate_exps.weight q4_0 +blk.8.ffn_up_exps.weight q4_0 +blk.9.attn_k.weight q4_0 blk.9.attn_output.weight q4_K -blk.9.attn_v.weight q4_K -blk.9.ffn_down_exps.weight q4_K +blk.9.attn_q.weight q4_0 +blk.9.attn_v.weight q5_0 +blk.9.ffn_down_exps.weight q5_0 +blk.9.ffn_gate_exps.weight q4_0 +blk.9.ffn_up_exps.weight q4_0 +blk.10.attn_k.weight q4_0 blk.10.attn_output.weight q4_K -blk.10.attn_v.weight q4_K -blk.10.ffn_down_exps.weight q4_K +blk.10.attn_q.weight q4_0 +blk.10.attn_v.weight q5_0 +blk.10.ffn_down_exps.weight q5_0 +blk.10.ffn_gate_exps.weight q4_0 +blk.10.ffn_up_exps.weight q4_0 +blk.11.attn_k.weight q4_0 blk.11.attn_output.weight q4_K -blk.11.attn_v.weight q4_K -blk.11.ffn_down_exps.weight q4_K +blk.11.attn_q.weight q4_0 +blk.11.attn_v.weight q5_0 +blk.11.ffn_down_exps.weight q5_0 +blk.11.ffn_gate_exps.weight q4_0 +blk.11.ffn_up_exps.weight q4_0 +blk.12.attn_k.weight q4_0 blk.12.attn_output.weight q4_K -blk.12.attn_v.weight q4_K -blk.12.ffn_down_exps.weight q4_K +blk.12.attn_q.weight q4_0 +blk.12.attn_v.weight q5_0 +blk.12.ffn_down_exps.weight q5_0 +blk.12.ffn_gate_exps.weight q4_0 +blk.12.ffn_up_exps.weight q4_0 +blk.13.attn_k.weight q4_0 blk.13.attn_output.weight q4_K -blk.13.attn_v.weight q4_K -blk.13.ffn_down_exps.weight q4_K +blk.13.attn_q.weight q4_0 +blk.13.attn_v.weight q5_0 +blk.13.ffn_down_exps.weight q5_0 +blk.13.ffn_gate_exps.weight q4_0 +blk.13.ffn_up_exps.weight q4_0 +blk.14.attn_k.weight q4_0 blk.14.attn_output.weight q4_K -blk.14.attn_v.weight q4_K -blk.14.ffn_down_exps.weight q4_K +blk.14.attn_q.weight q4_0 +blk.14.attn_v.weight q5_0 +blk.14.ffn_down_exps.weight q5_0 +blk.14.ffn_gate_exps.weight q4_0 +blk.14.ffn_up_exps.weight q4_0 +blk.15.attn_k.weight q4_0 blk.15.attn_output.weight q4_K -blk.15.attn_v.weight q4_K -blk.15.ffn_down_exps.weight q4_K +blk.15.attn_q.weight q4_0 +blk.15.attn_v.weight q5_0 +blk.15.ffn_down_exps.weight q5_0 +blk.15.ffn_gate_exps.weight q4_0 +blk.15.ffn_up_exps.weight q4_0 +blk.16.attn_k.weight q4_0 blk.16.attn_output.weight q4_K -blk.16.attn_v.weight q4_K -blk.16.ffn_down_exps.weight q4_K +blk.16.attn_q.weight q4_0 +blk.16.attn_v.weight q5_0 +blk.16.ffn_down_exps.weight q5_0 +blk.16.ffn_gate_exps.weight q4_0 +blk.16.ffn_up_exps.weight q4_0 +blk.17.attn_k.weight q4_0 blk.17.attn_output.weight q4_K -blk.17.attn_v.weight q4_K -blk.17.ffn_down_exps.weight q4_K +blk.17.attn_q.weight q4_0 +blk.17.attn_v.weight q5_0 +blk.17.ffn_down_exps.weight q5_0 +blk.17.ffn_gate_exps.weight q4_0 +blk.17.ffn_up_exps.weight q4_0 +blk.18.attn_k.weight q4_0 blk.18.attn_output.weight q4_K -blk.18.attn_v.weight q4_K -blk.18.ffn_down_exps.weight q4_K +blk.18.attn_q.weight q4_0 +blk.18.attn_v.weight q5_0 +blk.18.ffn_down_exps.weight q5_0 +blk.18.ffn_gate_exps.weight q4_0 +blk.18.ffn_up_exps.weight q4_0 +blk.19.attn_k.weight q4_0 blk.19.attn_output.weight q4_K -blk.19.attn_v.weight q4_K -blk.19.ffn_down_exps.weight q4_K +blk.19.attn_q.weight q4_0 +blk.19.attn_v.weight q5_0 +blk.19.ffn_down_exps.weight q5_0 +blk.19.ffn_gate_exps.weight q4_0 +blk.19.ffn_up_exps.weight q4_0 +blk.20.attn_k.weight q4_0 blk.20.attn_output.weight q4_K -blk.20.attn_v.weight q4_K -blk.20.ffn_down_exps.weight q4_K +blk.20.attn_q.weight q4_0 +blk.20.attn_v.weight q5_0 +blk.20.ffn_down_exps.weight q5_0 +blk.20.ffn_gate_exps.weight q4_0 +blk.20.ffn_up_exps.weight q4_0 +blk.21.attn_k.weight q4_0 blk.21.attn_output.weight q4_K -blk.21.attn_v.weight q4_K -blk.21.ffn_down_exps.weight q4_K +blk.21.attn_q.weight q4_0 +blk.21.attn_v.weight q5_0 +blk.21.ffn_down_exps.weight q5_0 +blk.21.ffn_gate_exps.weight q4_0 +blk.21.ffn_up_exps.weight q4_0 +blk.22.attn_k.weight q4_0 blk.22.attn_output.weight q4_K -blk.22.attn_v.weight q4_K -blk.22.ffn_down_exps.weight q4_K +blk.22.attn_q.weight q4_0 +blk.22.attn_v.weight q5_0 +blk.22.ffn_down_exps.weight q5_0 +blk.22.ffn_gate_exps.weight q4_0 +blk.22.ffn_up_exps.weight q4_0 +blk.23.attn_k.weight q4_0 blk.23.attn_output.weight q4_K -blk.23.attn_v.weight q4_K -blk.23.ffn_down_exps.weight q4_K +blk.23.attn_q.weight q4_0 +blk.23.attn_v.weight q5_0 +blk.23.ffn_down_exps.weight q5_0 +blk.23.ffn_gate_exps.weight q4_0 +blk.23.ffn_up_exps.weight q4_0 +blk.24.attn_k.weight q4_0 blk.24.attn_output.weight q4_K -blk.24.attn_v.weight q4_K -blk.24.ffn_down_exps.weight q4_K +blk.24.attn_q.weight q4_0 +blk.24.attn_v.weight q5_0 +blk.24.ffn_down_exps.weight q5_0 +blk.24.ffn_gate_exps.weight q4_0 +blk.24.ffn_up_exps.weight q4_0 +blk.25.attn_k.weight q4_0 blk.25.attn_output.weight q4_K -blk.25.attn_v.weight q4_K -blk.25.ffn_down_exps.weight q4_K +blk.25.attn_q.weight q4_0 +blk.25.attn_v.weight q5_0 +blk.25.ffn_down_exps.weight q5_0 +blk.25.ffn_gate_exps.weight q4_0 +blk.25.ffn_up_exps.weight q4_0 +blk.26.attn_k.weight q4_0 blk.26.attn_output.weight q4_K -blk.26.attn_v.weight q4_K -blk.26.ffn_down_exps.weight q4_K +blk.26.attn_q.weight q4_0 +blk.26.attn_v.weight q5_0 +blk.26.ffn_down_exps.weight q5_0 +blk.26.ffn_gate_exps.weight q4_0 +blk.26.ffn_up_exps.weight q4_0 +blk.27.attn_k.weight q4_0 blk.27.attn_output.weight q4_K -blk.27.attn_v.weight q4_K -blk.27.ffn_down_exps.weight q4_K +blk.27.attn_q.weight q4_0 +blk.27.attn_v.weight q5_0 +blk.27.ffn_down_exps.weight q5_0 +blk.27.ffn_gate_exps.weight q4_0 +blk.27.ffn_up_exps.weight q4_0 +blk.28.attn_k.weight q4_0 blk.28.attn_output.weight q4_K -blk.28.attn_v.weight q4_K -blk.28.ffn_down_exps.weight q4_K +blk.28.attn_q.weight q4_0 +blk.28.attn_v.weight q5_0 +blk.28.ffn_down_exps.weight q5_0 +blk.28.ffn_gate_exps.weight q4_0 +blk.28.ffn_up_exps.weight q4_0 +blk.29.attn_k.weight q4_0 blk.29.attn_output.weight q4_K -blk.29.attn_v.weight q4_K -blk.29.ffn_down_exps.weight q4_K +blk.29.attn_q.weight q4_0 +blk.29.attn_v.weight q5_0 +blk.29.ffn_down_exps.weight q5_0 +blk.29.ffn_gate_exps.weight q4_0 +blk.29.ffn_up_exps.weight q4_0 +blk.30.attn_k.weight q4_0 blk.30.attn_output.weight q4_K -blk.30.attn_v.weight q4_K -blk.30.ffn_down_exps.weight q4_K +blk.30.attn_q.weight q4_0 +blk.30.attn_v.weight q5_0 +blk.30.ffn_down_exps.weight q5_0 +blk.30.ffn_gate_exps.weight q4_0 +blk.30.ffn_up_exps.weight q4_0 +blk.31.attn_k.weight q4_0 blk.31.attn_output.weight q4_K -blk.31.attn_v.weight q4_K -blk.31.ffn_down_exps.weight q4_K +blk.31.attn_q.weight q4_0 +blk.31.attn_v.weight q5_0 +blk.31.ffn_down_exps.weight q5_0 +blk.31.ffn_gate_exps.weight q4_0 +blk.31.ffn_up_exps.weight q4_0 +blk.32.attn_k.weight q4_0 blk.32.attn_output.weight q4_K -blk.32.attn_v.weight q4_K -blk.32.ffn_down_exps.weight q4_K +blk.32.attn_q.weight q4_0 +blk.32.attn_v.weight q5_0 +blk.32.ffn_down_exps.weight q5_0 +blk.32.ffn_gate_exps.weight q4_0 +blk.32.ffn_up_exps.weight q4_0 +blk.33.attn_k.weight q4_0 blk.33.attn_output.weight q4_K -blk.33.attn_v.weight q4_K -blk.33.ffn_down_exps.weight q4_K +blk.33.attn_q.weight q4_0 +blk.33.attn_v.weight q5_0 +blk.33.ffn_down_exps.weight q5_0 +blk.33.ffn_gate_exps.weight q4_0 +blk.33.ffn_up_exps.weight q4_0 +blk.34.attn_k.weight q4_0 blk.34.attn_output.weight q4_K -blk.34.attn_v.weight q4_K -blk.34.ffn_down_exps.weight q4_K +blk.34.attn_q.weight q4_0 +blk.34.attn_v.weight q5_0 +blk.34.ffn_down_exps.weight q5_0 +blk.34.ffn_gate_exps.weight q4_0 +blk.34.ffn_up_exps.weight q4_0 +blk.35.attn_k.weight q4_0 blk.35.attn_output.weight q4_K -blk.35.attn_v.weight q4_K -blk.35.ffn_down_exps.weight q4_K +blk.35.attn_q.weight q4_0 +blk.35.attn_v.weight q5_0 +blk.35.ffn_down_exps.weight q5_0 +blk.35.ffn_gate_exps.weight q4_0 +blk.35.ffn_up_exps.weight q4_0 [Q3_K_L] q3_K output.weight q8_0 +token_embd.weight q4_0 +blk.0.attn_k.weight q4_0 blk.0.attn_output.weight q5_K -blk.0.attn_v.weight q5_K -blk.0.ffn_down_exps.weight q5_K +blk.0.attn_q.weight q4_0 +blk.0.attn_v.weight q5_1 +blk.0.ffn_down_exps.weight q5_1 +blk.0.ffn_gate_exps.weight q4_0 +blk.0.ffn_up_exps.weight q4_0 +blk.1.attn_k.weight q4_0 blk.1.attn_output.weight q5_K -blk.1.attn_v.weight q5_K -blk.1.ffn_down_exps.weight q5_K +blk.1.attn_q.weight q4_0 +blk.1.attn_v.weight q5_1 +blk.1.ffn_down_exps.weight q5_1 +blk.1.ffn_gate_exps.weight q4_0 +blk.1.ffn_up_exps.weight q4_0 +blk.2.attn_k.weight q4_0 blk.2.attn_output.weight q5_K -blk.2.attn_v.weight q5_K -blk.2.ffn_down_exps.weight q5_K +blk.2.attn_q.weight q4_0 +blk.2.attn_v.weight q5_1 +blk.2.ffn_down_exps.weight q5_1 +blk.2.ffn_gate_exps.weight q4_0 +blk.2.ffn_up_exps.weight q4_0 +blk.3.attn_k.weight q4_0 blk.3.attn_output.weight q5_K -blk.3.attn_v.weight q5_K -blk.3.ffn_down_exps.weight q5_K +blk.3.attn_q.weight q4_0 +blk.3.attn_v.weight q5_1 +blk.3.ffn_down_exps.weight q5_1 +blk.3.ffn_gate_exps.weight q4_0 +blk.3.ffn_up_exps.weight q4_0 +blk.4.attn_k.weight q4_0 blk.4.attn_output.weight q5_K -blk.4.attn_v.weight q5_K -blk.4.ffn_down_exps.weight q5_K +blk.4.attn_q.weight q4_0 +blk.4.attn_v.weight q5_1 +blk.4.ffn_down_exps.weight q5_1 +blk.4.ffn_gate_exps.weight q4_0 +blk.4.ffn_up_exps.weight q4_0 +blk.5.attn_k.weight q4_0 blk.5.attn_output.weight q5_K -blk.5.attn_v.weight q5_K -blk.5.ffn_down_exps.weight q5_K +blk.5.attn_q.weight q4_0 +blk.5.attn_v.weight q5_1 +blk.5.ffn_down_exps.weight q5_1 +blk.5.ffn_gate_exps.weight q4_0 +blk.5.ffn_up_exps.weight q4_0 +blk.6.attn_k.weight q4_0 blk.6.attn_output.weight q5_K -blk.6.attn_v.weight q5_K -blk.6.ffn_down_exps.weight q5_K +blk.6.attn_q.weight q4_0 +blk.6.attn_v.weight q5_1 +blk.6.ffn_down_exps.weight q5_1 +blk.6.ffn_gate_exps.weight q4_0 +blk.6.ffn_up_exps.weight q4_0 +blk.7.attn_k.weight q4_0 blk.7.attn_output.weight q5_K -blk.7.attn_v.weight q5_K -blk.7.ffn_down_exps.weight q5_K +blk.7.attn_q.weight q4_0 +blk.7.attn_v.weight q5_1 +blk.7.ffn_down_exps.weight q5_1 +blk.7.ffn_gate_exps.weight q4_0 +blk.7.ffn_up_exps.weight q4_0 +blk.8.attn_k.weight q4_0 blk.8.attn_output.weight q5_K -blk.8.attn_v.weight q5_K -blk.8.ffn_down_exps.weight q5_K +blk.8.attn_q.weight q4_0 +blk.8.attn_v.weight q5_1 +blk.8.ffn_down_exps.weight q5_1 +blk.8.ffn_gate_exps.weight q4_0 +blk.8.ffn_up_exps.weight q4_0 +blk.9.attn_k.weight q4_0 blk.9.attn_output.weight q5_K -blk.9.attn_v.weight q5_K -blk.9.ffn_down_exps.weight q5_K +blk.9.attn_q.weight q4_0 +blk.9.attn_v.weight q5_1 +blk.9.ffn_down_exps.weight q5_1 +blk.9.ffn_gate_exps.weight q4_0 +blk.9.ffn_up_exps.weight q4_0 +blk.10.attn_k.weight q4_0 blk.10.attn_output.weight q5_K -blk.10.attn_v.weight q5_K -blk.10.ffn_down_exps.weight q5_K +blk.10.attn_q.weight q4_0 +blk.10.attn_v.weight q5_1 +blk.10.ffn_down_exps.weight q5_1 +blk.10.ffn_gate_exps.weight q4_0 +blk.10.ffn_up_exps.weight q4_0 +blk.11.attn_k.weight q4_0 blk.11.attn_output.weight q5_K -blk.11.attn_v.weight q5_K -blk.11.ffn_down_exps.weight q5_K +blk.11.attn_q.weight q4_0 +blk.11.attn_v.weight q5_1 +blk.11.ffn_down_exps.weight q5_1 +blk.11.ffn_gate_exps.weight q4_0 +blk.11.ffn_up_exps.weight q4_0 +blk.12.attn_k.weight q4_0 blk.12.attn_output.weight q5_K -blk.12.attn_v.weight q5_K -blk.12.ffn_down_exps.weight q5_K +blk.12.attn_q.weight q4_0 +blk.12.attn_v.weight q5_1 +blk.12.ffn_down_exps.weight q5_1 +blk.12.ffn_gate_exps.weight q4_0 +blk.12.ffn_up_exps.weight q4_0 +blk.13.attn_k.weight q4_0 blk.13.attn_output.weight q5_K -blk.13.attn_v.weight q5_K -blk.13.ffn_down_exps.weight q5_K +blk.13.attn_q.weight q4_0 +blk.13.attn_v.weight q5_1 +blk.13.ffn_down_exps.weight q5_1 +blk.13.ffn_gate_exps.weight q4_0 +blk.13.ffn_up_exps.weight q4_0 +blk.14.attn_k.weight q4_0 blk.14.attn_output.weight q5_K -blk.14.attn_v.weight q5_K -blk.14.ffn_down_exps.weight q5_K +blk.14.attn_q.weight q4_0 +blk.14.attn_v.weight q5_1 +blk.14.ffn_down_exps.weight q5_1 +blk.14.ffn_gate_exps.weight q4_0 +blk.14.ffn_up_exps.weight q4_0 +blk.15.attn_k.weight q4_0 blk.15.attn_output.weight q5_K -blk.15.attn_v.weight q5_K -blk.15.ffn_down_exps.weight q5_K +blk.15.attn_q.weight q4_0 +blk.15.attn_v.weight q5_1 +blk.15.ffn_down_exps.weight q5_1 +blk.15.ffn_gate_exps.weight q4_0 +blk.15.ffn_up_exps.weight q4_0 +blk.16.attn_k.weight q4_0 blk.16.attn_output.weight q5_K -blk.16.attn_v.weight q5_K -blk.16.ffn_down_exps.weight q5_K +blk.16.attn_q.weight q4_0 +blk.16.attn_v.weight q5_1 +blk.16.ffn_down_exps.weight q5_1 +blk.16.ffn_gate_exps.weight q4_0 +blk.16.ffn_up_exps.weight q4_0 +blk.17.attn_k.weight q4_0 blk.17.attn_output.weight q5_K -blk.17.attn_v.weight q5_K -blk.17.ffn_down_exps.weight q5_K +blk.17.attn_q.weight q4_0 +blk.17.attn_v.weight q5_1 +blk.17.ffn_down_exps.weight q5_1 +blk.17.ffn_gate_exps.weight q4_0 +blk.17.ffn_up_exps.weight q4_0 +blk.18.attn_k.weight q4_0 blk.18.attn_output.weight q5_K -blk.18.attn_v.weight q5_K -blk.18.ffn_down_exps.weight q5_K +blk.18.attn_q.weight q4_0 +blk.18.attn_v.weight q5_1 +blk.18.ffn_down_exps.weight q5_1 +blk.18.ffn_gate_exps.weight q4_0 +blk.18.ffn_up_exps.weight q4_0 +blk.19.attn_k.weight q4_0 blk.19.attn_output.weight q5_K -blk.19.attn_v.weight q5_K -blk.19.ffn_down_exps.weight q5_K +blk.19.attn_q.weight q4_0 +blk.19.attn_v.weight q5_1 +blk.19.ffn_down_exps.weight q5_1 +blk.19.ffn_gate_exps.weight q4_0 +blk.19.ffn_up_exps.weight q4_0 +blk.20.attn_k.weight q4_0 blk.20.attn_output.weight q5_K -blk.20.attn_v.weight q5_K -blk.20.ffn_down_exps.weight q5_K +blk.20.attn_q.weight q4_0 +blk.20.attn_v.weight q5_1 +blk.20.ffn_down_exps.weight q5_1 +blk.20.ffn_gate_exps.weight q4_0 +blk.20.ffn_up_exps.weight q4_0 +blk.21.attn_k.weight q4_0 blk.21.attn_output.weight q5_K -blk.21.attn_v.weight q5_K -blk.21.ffn_down_exps.weight q5_K +blk.21.attn_q.weight q4_0 +blk.21.attn_v.weight q5_1 +blk.21.ffn_down_exps.weight q5_1 +blk.21.ffn_gate_exps.weight q4_0 +blk.21.ffn_up_exps.weight q4_0 +blk.22.attn_k.weight q4_0 blk.22.attn_output.weight q5_K -blk.22.attn_v.weight q5_K -blk.22.ffn_down_exps.weight q5_K +blk.22.attn_q.weight q4_0 +blk.22.attn_v.weight q5_1 +blk.22.ffn_down_exps.weight q5_1 +blk.22.ffn_gate_exps.weight q4_0 +blk.22.ffn_up_exps.weight q4_0 +blk.23.attn_k.weight q4_0 blk.23.attn_output.weight q5_K -blk.23.attn_v.weight q5_K -blk.23.ffn_down_exps.weight q5_K +blk.23.attn_q.weight q4_0 +blk.23.attn_v.weight q5_1 +blk.23.ffn_down_exps.weight q5_1 +blk.23.ffn_gate_exps.weight q4_0 +blk.23.ffn_up_exps.weight q4_0 +blk.24.attn_k.weight q4_0 blk.24.attn_output.weight q5_K -blk.24.attn_v.weight q5_K -blk.24.ffn_down_exps.weight q5_K +blk.24.attn_q.weight q4_0 +blk.24.attn_v.weight q5_1 +blk.24.ffn_down_exps.weight q5_1 +blk.24.ffn_gate_exps.weight q4_0 +blk.24.ffn_up_exps.weight q4_0 +blk.25.attn_k.weight q4_0 blk.25.attn_output.weight q5_K -blk.25.attn_v.weight q5_K -blk.25.ffn_down_exps.weight q5_K +blk.25.attn_q.weight q4_0 +blk.25.attn_v.weight q5_1 +blk.25.ffn_down_exps.weight q5_1 +blk.25.ffn_gate_exps.weight q4_0 +blk.25.ffn_up_exps.weight q4_0 +blk.26.attn_k.weight q4_0 blk.26.attn_output.weight q5_K -blk.26.attn_v.weight q5_K -blk.26.ffn_down_exps.weight q5_K +blk.26.attn_q.weight q4_0 +blk.26.attn_v.weight q5_1 +blk.26.ffn_down_exps.weight q5_1 +blk.26.ffn_gate_exps.weight q4_0 +blk.26.ffn_up_exps.weight q4_0 +blk.27.attn_k.weight q4_0 blk.27.attn_output.weight q5_K -blk.27.attn_v.weight q5_K -blk.27.ffn_down_exps.weight q5_K +blk.27.attn_q.weight q4_0 +blk.27.attn_v.weight q5_1 +blk.27.ffn_down_exps.weight q5_1 +blk.27.ffn_gate_exps.weight q4_0 +blk.27.ffn_up_exps.weight q4_0 +blk.28.attn_k.weight q4_0 blk.28.attn_output.weight q5_K -blk.28.attn_v.weight q5_K -blk.28.ffn_down_exps.weight q5_K +blk.28.attn_q.weight q4_0 +blk.28.attn_v.weight q5_1 +blk.28.ffn_down_exps.weight q5_1 +blk.28.ffn_gate_exps.weight q4_0 +blk.28.ffn_up_exps.weight q4_0 +blk.29.attn_k.weight q4_0 blk.29.attn_output.weight q5_K -blk.29.attn_v.weight q5_K -blk.29.ffn_down_exps.weight q5_K +blk.29.attn_q.weight q4_0 +blk.29.attn_v.weight q5_1 +blk.29.ffn_down_exps.weight q5_1 +blk.29.ffn_gate_exps.weight q4_0 +blk.29.ffn_up_exps.weight q4_0 +blk.30.attn_k.weight q4_0 blk.30.attn_output.weight q5_K -blk.30.attn_v.weight q5_K -blk.30.ffn_down_exps.weight q5_K +blk.30.attn_q.weight q4_0 +blk.30.attn_v.weight q5_1 +blk.30.ffn_down_exps.weight q5_1 +blk.30.ffn_gate_exps.weight q4_0 +blk.30.ffn_up_exps.weight q4_0 +blk.31.attn_k.weight q4_0 blk.31.attn_output.weight q5_K -blk.31.attn_v.weight q5_K -blk.31.ffn_down_exps.weight q5_K +blk.31.attn_q.weight q4_0 +blk.31.attn_v.weight q5_1 +blk.31.ffn_down_exps.weight q5_1 +blk.31.ffn_gate_exps.weight q4_0 +blk.31.ffn_up_exps.weight q4_0 +blk.32.attn_k.weight q4_0 blk.32.attn_output.weight q5_K -blk.32.attn_v.weight q5_K -blk.32.ffn_down_exps.weight q5_K +blk.32.attn_q.weight q4_0 +blk.32.attn_v.weight q5_1 +blk.32.ffn_down_exps.weight q5_1 +blk.32.ffn_gate_exps.weight q4_0 +blk.32.ffn_up_exps.weight q4_0 +blk.33.attn_k.weight q4_0 blk.33.attn_output.weight q5_K -blk.33.attn_v.weight q5_K -blk.33.ffn_down_exps.weight q5_K +blk.33.attn_q.weight q4_0 +blk.33.attn_v.weight q5_1 +blk.33.ffn_down_exps.weight q5_1 +blk.33.ffn_gate_exps.weight q4_0 +blk.33.ffn_up_exps.weight q4_0 +blk.34.attn_k.weight q4_0 blk.34.attn_output.weight q5_K -blk.34.attn_v.weight q5_K -blk.34.ffn_down_exps.weight q5_K +blk.34.attn_q.weight q4_0 +blk.34.attn_v.weight q5_1 +blk.34.ffn_down_exps.weight q5_1 +blk.34.ffn_gate_exps.weight q4_0 +blk.34.ffn_up_exps.weight q4_0 +blk.35.attn_k.weight q4_0 blk.35.attn_output.weight q5_K -blk.35.attn_v.weight q5_K -blk.35.ffn_down_exps.weight q5_K +blk.35.attn_q.weight q4_0 +blk.35.attn_v.weight q5_1 +blk.35.ffn_down_exps.weight q5_1 +blk.35.ffn_gate_exps.weight q4_0 +blk.35.ffn_up_exps.weight q4_0 [Q4_K_S] q4_K output.weight q8_0 -blk.0.attn_v.weight q5_K -blk.0.ffn_down_exps.weight q5_K -blk.1.attn_v.weight q5_K -blk.1.ffn_down_exps.weight q5_K -blk.2.attn_v.weight q5_K -blk.2.ffn_down_exps.weight q5_K -blk.3.attn_v.weight q5_K -blk.3.ffn_down_exps.weight q5_K +token_embd.weight q5_0 +blk.0.attn_k.weight q5_0 +blk.0.attn_q.weight q5_0 +blk.0.attn_v.weight q5_1 +blk.0.ffn_down_exps.weight q5_1 +blk.0.ffn_gate_exps.weight q5_0 +blk.0.ffn_up_exps.weight q5_0 +blk.1.attn_k.weight q5_0 +blk.1.attn_q.weight q5_0 +blk.1.attn_v.weight q5_1 +blk.1.ffn_down_exps.weight q5_1 +blk.1.ffn_gate_exps.weight q5_0 +blk.1.ffn_up_exps.weight q5_0 +blk.2.attn_k.weight q5_0 +blk.2.attn_q.weight q5_0 +blk.2.attn_v.weight q5_1 +blk.2.ffn_down_exps.weight q5_1 +blk.2.ffn_gate_exps.weight q5_0 +blk.2.ffn_up_exps.weight q5_0 +blk.3.attn_k.weight q5_0 +blk.3.attn_q.weight q5_0 +blk.3.attn_v.weight q5_1 +blk.3.ffn_down_exps.weight q5_1 +blk.3.ffn_gate_exps.weight q5_0 +blk.3.ffn_up_exps.weight q5_0 +blk.4.attn_k.weight q5_0 +blk.4.attn_q.weight q5_0 +blk.4.attn_v.weight q5_0 +blk.4.ffn_down_exps.weight q5_0 +blk.4.ffn_gate_exps.weight q5_0 +blk.4.ffn_up_exps.weight q5_0 +blk.5.attn_k.weight q5_0 +blk.5.attn_q.weight q5_0 +blk.5.attn_v.weight q5_0 +blk.5.ffn_down_exps.weight q5_0 +blk.5.ffn_gate_exps.weight q5_0 +blk.5.ffn_up_exps.weight q5_0 +blk.6.attn_k.weight q5_0 +blk.6.attn_q.weight q5_0 +blk.6.attn_v.weight q5_0 +blk.6.ffn_down_exps.weight q5_0 +blk.6.ffn_gate_exps.weight q5_0 +blk.6.ffn_up_exps.weight q5_0 +blk.7.attn_k.weight q5_0 +blk.7.attn_q.weight q5_0 +blk.7.attn_v.weight q5_0 +blk.7.ffn_down_exps.weight q5_0 +blk.7.ffn_gate_exps.weight q5_0 +blk.7.ffn_up_exps.weight q5_0 +blk.8.attn_k.weight q5_0 +blk.8.attn_q.weight q5_0 +blk.8.attn_v.weight q5_0 +blk.8.ffn_down_exps.weight q5_0 +blk.8.ffn_gate_exps.weight q5_0 +blk.8.ffn_up_exps.weight q5_0 +blk.9.attn_k.weight q5_0 +blk.9.attn_q.weight q5_0 +blk.9.attn_v.weight q5_0 +blk.9.ffn_down_exps.weight q5_0 +blk.9.ffn_gate_exps.weight q5_0 +blk.9.ffn_up_exps.weight q5_0 +blk.10.attn_k.weight q5_0 +blk.10.attn_q.weight q5_0 +blk.10.attn_v.weight q5_0 +blk.10.ffn_down_exps.weight q5_0 +blk.10.ffn_gate_exps.weight q5_0 +blk.10.ffn_up_exps.weight q5_0 +blk.11.attn_k.weight q5_0 +blk.11.attn_q.weight q5_0 +blk.11.attn_v.weight q5_0 +blk.11.ffn_down_exps.weight q5_0 +blk.11.ffn_gate_exps.weight q5_0 +blk.11.ffn_up_exps.weight q5_0 +blk.12.attn_k.weight q5_0 +blk.12.attn_q.weight q5_0 +blk.12.attn_v.weight q5_0 +blk.12.ffn_down_exps.weight q5_0 +blk.12.ffn_gate_exps.weight q5_0 +blk.12.ffn_up_exps.weight q5_0 +blk.13.attn_k.weight q5_0 +blk.13.attn_q.weight q5_0 +blk.13.attn_v.weight q5_0 +blk.13.ffn_down_exps.weight q5_0 +blk.13.ffn_gate_exps.weight q5_0 +blk.13.ffn_up_exps.weight q5_0 +blk.14.attn_k.weight q5_0 +blk.14.attn_q.weight q5_0 +blk.14.attn_v.weight q5_0 +blk.14.ffn_down_exps.weight q5_0 +blk.14.ffn_gate_exps.weight q5_0 +blk.14.ffn_up_exps.weight q5_0 +blk.15.attn_k.weight q5_0 +blk.15.attn_q.weight q5_0 +blk.15.attn_v.weight q5_0 +blk.15.ffn_down_exps.weight q5_0 +blk.15.ffn_gate_exps.weight q5_0 +blk.15.ffn_up_exps.weight q5_0 +blk.16.attn_k.weight q5_0 +blk.16.attn_q.weight q5_0 +blk.16.attn_v.weight q5_0 +blk.16.ffn_down_exps.weight q5_0 +blk.16.ffn_gate_exps.weight q5_0 +blk.16.ffn_up_exps.weight q5_0 +blk.17.attn_k.weight q5_0 +blk.17.attn_q.weight q5_0 +blk.17.attn_v.weight q5_0 +blk.17.ffn_down_exps.weight q5_0 +blk.17.ffn_gate_exps.weight q5_0 +blk.17.ffn_up_exps.weight q5_0 +blk.18.attn_k.weight q5_0 +blk.18.attn_q.weight q5_0 +blk.18.attn_v.weight q5_0 +blk.18.ffn_down_exps.weight q5_0 +blk.18.ffn_gate_exps.weight q5_0 +blk.18.ffn_up_exps.weight q5_0 +blk.19.attn_k.weight q5_0 +blk.19.attn_q.weight q5_0 +blk.19.attn_v.weight q5_0 +blk.19.ffn_down_exps.weight q5_0 +blk.19.ffn_gate_exps.weight q5_0 +blk.19.ffn_up_exps.weight q5_0 +blk.20.attn_k.weight q5_0 +blk.20.attn_q.weight q5_0 +blk.20.attn_v.weight q5_0 +blk.20.ffn_down_exps.weight q5_0 +blk.20.ffn_gate_exps.weight q5_0 +blk.20.ffn_up_exps.weight q5_0 +blk.21.attn_k.weight q5_0 +blk.21.attn_q.weight q5_0 +blk.21.attn_v.weight q5_0 +blk.21.ffn_down_exps.weight q5_0 +blk.21.ffn_gate_exps.weight q5_0 +blk.21.ffn_up_exps.weight q5_0 +blk.22.attn_k.weight q5_0 +blk.22.attn_q.weight q5_0 +blk.22.attn_v.weight q5_0 +blk.22.ffn_down_exps.weight q5_0 +blk.22.ffn_gate_exps.weight q5_0 +blk.22.ffn_up_exps.weight q5_0 +blk.23.attn_k.weight q5_0 +blk.23.attn_q.weight q5_0 +blk.23.attn_v.weight q5_0 +blk.23.ffn_down_exps.weight q5_0 +blk.23.ffn_gate_exps.weight q5_0 +blk.23.ffn_up_exps.weight q5_0 +blk.24.attn_k.weight q5_0 +blk.24.attn_q.weight q5_0 +blk.24.attn_v.weight q5_0 +blk.24.ffn_down_exps.weight q5_0 +blk.24.ffn_gate_exps.weight q5_0 +blk.24.ffn_up_exps.weight q5_0 +blk.25.attn_k.weight q5_0 +blk.25.attn_q.weight q5_0 +blk.25.attn_v.weight q5_0 +blk.25.ffn_down_exps.weight q5_0 +blk.25.ffn_gate_exps.weight q5_0 +blk.25.ffn_up_exps.weight q5_0 +blk.26.attn_k.weight q5_0 +blk.26.attn_q.weight q5_0 +blk.26.attn_v.weight q5_0 +blk.26.ffn_down_exps.weight q5_0 +blk.26.ffn_gate_exps.weight q5_0 +blk.26.ffn_up_exps.weight q5_0 +blk.27.attn_k.weight q5_0 +blk.27.attn_q.weight q5_0 +blk.27.attn_v.weight q5_0 +blk.27.ffn_down_exps.weight q5_0 +blk.27.ffn_gate_exps.weight q5_0 +blk.27.ffn_up_exps.weight q5_0 +blk.28.attn_k.weight q5_0 +blk.28.attn_q.weight q5_0 +blk.28.attn_v.weight q5_0 +blk.28.ffn_down_exps.weight q5_0 +blk.28.ffn_gate_exps.weight q5_0 +blk.28.ffn_up_exps.weight q5_0 +blk.29.attn_k.weight q5_0 +blk.29.attn_q.weight q5_0 +blk.29.attn_v.weight q5_0 +blk.29.ffn_down_exps.weight q5_0 +blk.29.ffn_gate_exps.weight q5_0 +blk.29.ffn_up_exps.weight q5_0 +blk.30.attn_k.weight q5_0 +blk.30.attn_q.weight q5_0 +blk.30.attn_v.weight q5_0 +blk.30.ffn_down_exps.weight q5_0 +blk.30.ffn_gate_exps.weight q5_0 +blk.30.ffn_up_exps.weight q5_0 +blk.31.attn_k.weight q5_0 +blk.31.attn_q.weight q5_0 +blk.31.attn_v.weight q5_0 +blk.31.ffn_down_exps.weight q5_0 +blk.31.ffn_gate_exps.weight q5_0 +blk.31.ffn_up_exps.weight q5_0 +blk.32.attn_k.weight q5_0 +blk.32.attn_q.weight q5_0 +blk.32.attn_v.weight q5_0 +blk.32.ffn_down_exps.weight q5_0 +blk.32.ffn_gate_exps.weight q5_0 +blk.32.ffn_up_exps.weight q5_0 +blk.33.attn_k.weight q5_0 +blk.33.attn_q.weight q5_0 +blk.33.attn_v.weight q5_0 +blk.33.ffn_down_exps.weight q5_0 +blk.33.ffn_gate_exps.weight q5_0 +blk.33.ffn_up_exps.weight q5_0 +blk.34.attn_k.weight q5_0 +blk.34.attn_q.weight q5_0 +blk.34.attn_v.weight q5_0 +blk.34.ffn_down_exps.weight q5_0 +blk.34.ffn_gate_exps.weight q5_0 +blk.34.ffn_up_exps.weight q5_0 +blk.35.attn_k.weight q5_0 +blk.35.attn_q.weight q5_0 +blk.35.attn_v.weight q5_0 +blk.35.ffn_down_exps.weight q5_0 +blk.35.ffn_gate_exps.weight q5_0 +blk.35.ffn_up_exps.weight q5_0 [Q4_K_M] q4_K output.weight q8_0 -blk.0.attn_v.weight q6_K -blk.0.ffn_down_exps.weight q6_K -blk.1.attn_v.weight q6_K -blk.1.ffn_down_exps.weight q6_K -blk.2.attn_v.weight q6_K -blk.2.ffn_down_exps.weight q6_K -blk.3.attn_v.weight q6_K -blk.3.ffn_down_exps.weight q6_K -blk.6.attn_v.weight q6_K -blk.6.ffn_down_exps.weight q6_K -blk.9.attn_v.weight q6_K -blk.9.ffn_down_exps.weight q6_K -blk.12.attn_v.weight q6_K -blk.12.ffn_down_exps.weight q6_K -blk.15.attn_v.weight q6_K -blk.15.ffn_down_exps.weight q6_K -blk.18.attn_v.weight q6_K -blk.18.ffn_down_exps.weight q6_K -blk.21.attn_v.weight q6_K -blk.21.ffn_down_exps.weight q6_K -blk.24.attn_v.weight q6_K -blk.24.ffn_down_exps.weight q6_K -blk.27.attn_v.weight q6_K -blk.27.ffn_down_exps.weight q6_K -blk.30.attn_v.weight q6_K -blk.30.ffn_down_exps.weight q6_K -blk.31.attn_v.weight q6_K -blk.31.ffn_down_exps.weight q6_K -blk.32.attn_v.weight q6_K -blk.32.ffn_down_exps.weight q6_K -blk.33.attn_v.weight q6_K -blk.33.ffn_down_exps.weight q6_K -blk.34.attn_v.weight q6_K -blk.34.ffn_down_exps.weight q6_K -blk.35.attn_v.weight q6_K -blk.35.ffn_down_exps.weight q6_K +token_embd.weight q5_0 +blk.0.attn_k.weight q5_0 +blk.0.attn_q.weight q5_0 +blk.0.attn_v.weight q8_0 +blk.0.ffn_down_exps.weight q8_0 +blk.0.ffn_gate_exps.weight q5_0 +blk.0.ffn_up_exps.weight q5_0 +blk.1.attn_k.weight q5_0 +blk.1.attn_q.weight q5_0 +blk.1.attn_v.weight q8_0 +blk.1.ffn_down_exps.weight q8_0 +blk.1.ffn_gate_exps.weight q5_0 +blk.1.ffn_up_exps.weight q5_0 +blk.2.attn_k.weight q5_0 +blk.2.attn_q.weight q5_0 +blk.2.attn_v.weight q8_0 +blk.2.ffn_down_exps.weight q8_0 +blk.2.ffn_gate_exps.weight q5_0 +blk.2.ffn_up_exps.weight q5_0 +blk.3.attn_k.weight q5_0 +blk.3.attn_q.weight q5_0 +blk.3.attn_v.weight q8_0 +blk.3.ffn_down_exps.weight q8_0 +blk.3.ffn_gate_exps.weight q5_0 +blk.3.ffn_up_exps.weight q5_0 +blk.4.attn_k.weight q5_0 +blk.4.attn_q.weight q5_0 +blk.4.attn_v.weight q5_0 +blk.4.ffn_down_exps.weight q5_0 +blk.4.ffn_gate_exps.weight q5_0 +blk.4.ffn_up_exps.weight q5_0 +blk.5.attn_k.weight q5_0 +blk.5.attn_q.weight q5_0 +blk.5.attn_v.weight q5_0 +blk.5.ffn_down_exps.weight q5_0 +blk.5.ffn_gate_exps.weight q5_0 +blk.5.ffn_up_exps.weight q5_0 +blk.6.attn_k.weight q5_0 +blk.6.attn_q.weight q5_0 +blk.6.attn_v.weight q8_0 +blk.6.ffn_down_exps.weight q8_0 +blk.6.ffn_gate_exps.weight q5_0 +blk.6.ffn_up_exps.weight q5_0 +blk.7.attn_k.weight q5_0 +blk.7.attn_q.weight q5_0 +blk.7.attn_v.weight q5_0 +blk.7.ffn_down_exps.weight q5_0 +blk.7.ffn_gate_exps.weight q5_0 +blk.7.ffn_up_exps.weight q5_0 +blk.8.attn_k.weight q5_0 +blk.8.attn_q.weight q5_0 +blk.8.attn_v.weight q5_0 +blk.8.ffn_down_exps.weight q5_0 +blk.8.ffn_gate_exps.weight q5_0 +blk.8.ffn_up_exps.weight q5_0 +blk.9.attn_k.weight q5_0 +blk.9.attn_q.weight q5_0 +blk.9.attn_v.weight q8_0 +blk.9.ffn_down_exps.weight q8_0 +blk.9.ffn_gate_exps.weight q5_0 +blk.9.ffn_up_exps.weight q5_0 +blk.10.attn_k.weight q5_0 +blk.10.attn_q.weight q5_0 +blk.10.attn_v.weight q5_0 +blk.10.ffn_down_exps.weight q5_0 +blk.10.ffn_gate_exps.weight q5_0 +blk.10.ffn_up_exps.weight q5_0 +blk.11.attn_k.weight q5_0 +blk.11.attn_q.weight q5_0 +blk.11.attn_v.weight q5_0 +blk.11.ffn_down_exps.weight q5_0 +blk.11.ffn_gate_exps.weight q5_0 +blk.11.ffn_up_exps.weight q5_0 +blk.12.attn_k.weight q5_0 +blk.12.attn_q.weight q5_0 +blk.12.attn_v.weight q8_0 +blk.12.ffn_down_exps.weight q8_0 +blk.12.ffn_gate_exps.weight q5_0 +blk.12.ffn_up_exps.weight q5_0 +blk.13.attn_k.weight q5_0 +blk.13.attn_q.weight q5_0 +blk.13.attn_v.weight q5_0 +blk.13.ffn_down_exps.weight q5_0 +blk.13.ffn_gate_exps.weight q5_0 +blk.13.ffn_up_exps.weight q5_0 +blk.14.attn_k.weight q5_0 +blk.14.attn_q.weight q5_0 +blk.14.attn_v.weight q5_0 +blk.14.ffn_down_exps.weight q5_0 +blk.14.ffn_gate_exps.weight q5_0 +blk.14.ffn_up_exps.weight q5_0 +blk.15.attn_k.weight q5_0 +blk.15.attn_q.weight q5_0 +blk.15.attn_v.weight q8_0 +blk.15.ffn_down_exps.weight q8_0 +blk.15.ffn_gate_exps.weight q5_0 +blk.15.ffn_up_exps.weight q5_0 +blk.16.attn_k.weight q5_0 +blk.16.attn_q.weight q5_0 +blk.16.attn_v.weight q5_0 +blk.16.ffn_down_exps.weight q5_0 +blk.16.ffn_gate_exps.weight q5_0 +blk.16.ffn_up_exps.weight q5_0 +blk.17.attn_k.weight q5_0 +blk.17.attn_q.weight q5_0 +blk.17.attn_v.weight q5_0 +blk.17.ffn_down_exps.weight q5_0 +blk.17.ffn_gate_exps.weight q5_0 +blk.17.ffn_up_exps.weight q5_0 +blk.18.attn_k.weight q5_0 +blk.18.attn_q.weight q5_0 +blk.18.attn_v.weight q8_0 +blk.18.ffn_down_exps.weight q8_0 +blk.18.ffn_gate_exps.weight q5_0 +blk.18.ffn_up_exps.weight q5_0 +blk.19.attn_k.weight q5_0 +blk.19.attn_q.weight q5_0 +blk.19.attn_v.weight q5_0 +blk.19.ffn_down_exps.weight q5_0 +blk.19.ffn_gate_exps.weight q5_0 +blk.19.ffn_up_exps.weight q5_0 +blk.20.attn_k.weight q5_0 +blk.20.attn_q.weight q5_0 +blk.20.attn_v.weight q5_0 +blk.20.ffn_down_exps.weight q5_0 +blk.20.ffn_gate_exps.weight q5_0 +blk.20.ffn_up_exps.weight q5_0 +blk.21.attn_k.weight q5_0 +blk.21.attn_q.weight q5_0 +blk.21.attn_v.weight q8_0 +blk.21.ffn_down_exps.weight q8_0 +blk.21.ffn_gate_exps.weight q5_0 +blk.21.ffn_up_exps.weight q5_0 +blk.22.attn_k.weight q5_0 +blk.22.attn_q.weight q5_0 +blk.22.attn_v.weight q5_0 +blk.22.ffn_down_exps.weight q5_0 +blk.22.ffn_gate_exps.weight q5_0 +blk.22.ffn_up_exps.weight q5_0 +blk.23.attn_k.weight q5_0 +blk.23.attn_q.weight q5_0 +blk.23.attn_v.weight q5_0 +blk.23.ffn_down_exps.weight q5_0 +blk.23.ffn_gate_exps.weight q5_0 +blk.23.ffn_up_exps.weight q5_0 +blk.24.attn_k.weight q5_0 +blk.24.attn_q.weight q5_0 +blk.24.attn_v.weight q8_0 +blk.24.ffn_down_exps.weight q8_0 +blk.24.ffn_gate_exps.weight q5_0 +blk.24.ffn_up_exps.weight q5_0 +blk.25.attn_k.weight q5_0 +blk.25.attn_q.weight q5_0 +blk.25.attn_v.weight q5_0 +blk.25.ffn_down_exps.weight q5_0 +blk.25.ffn_gate_exps.weight q5_0 +blk.25.ffn_up_exps.weight q5_0 +blk.26.attn_k.weight q5_0 +blk.26.attn_q.weight q5_0 +blk.26.attn_v.weight q5_0 +blk.26.ffn_down_exps.weight q5_0 +blk.26.ffn_gate_exps.weight q5_0 +blk.26.ffn_up_exps.weight q5_0 +blk.27.attn_k.weight q5_0 +blk.27.attn_q.weight q5_0 +blk.27.attn_v.weight q8_0 +blk.27.ffn_down_exps.weight q8_0 +blk.27.ffn_gate_exps.weight q5_0 +blk.27.ffn_up_exps.weight q5_0 +blk.28.attn_k.weight q5_0 +blk.28.attn_q.weight q5_0 +blk.28.attn_v.weight q5_0 +blk.28.ffn_down_exps.weight q5_0 +blk.28.ffn_gate_exps.weight q5_0 +blk.28.ffn_up_exps.weight q5_0 +blk.29.attn_k.weight q5_0 +blk.29.attn_q.weight q5_0 +blk.29.attn_v.weight q5_0 +blk.29.ffn_down_exps.weight q5_0 +blk.29.ffn_gate_exps.weight q5_0 +blk.29.ffn_up_exps.weight q5_0 +blk.30.attn_k.weight q5_0 +blk.30.attn_q.weight q5_0 +blk.30.attn_v.weight q8_0 +blk.30.ffn_down_exps.weight q8_0 +blk.30.ffn_gate_exps.weight q5_0 +blk.30.ffn_up_exps.weight q5_0 +blk.31.attn_k.weight q5_0 +blk.31.attn_q.weight q5_0 +blk.31.attn_v.weight q8_0 +blk.31.ffn_down_exps.weight q8_0 +blk.31.ffn_gate_exps.weight q5_0 +blk.31.ffn_up_exps.weight q5_0 +blk.32.attn_k.weight q5_0 +blk.32.attn_q.weight q5_0 +blk.32.attn_v.weight q8_0 +blk.32.ffn_down_exps.weight q8_0 +blk.32.ffn_gate_exps.weight q5_0 +blk.32.ffn_up_exps.weight q5_0 +blk.33.attn_k.weight q5_0 +blk.33.attn_q.weight q5_0 +blk.33.attn_v.weight q8_0 +blk.33.ffn_down_exps.weight q8_0 +blk.33.ffn_gate_exps.weight q5_0 +blk.33.ffn_up_exps.weight q5_0 +blk.34.attn_k.weight q5_0 +blk.34.attn_q.weight q5_0 +blk.34.attn_v.weight q8_0 +blk.34.ffn_down_exps.weight q8_0 +blk.34.ffn_gate_exps.weight q5_0 +blk.34.ffn_up_exps.weight q5_0 +blk.35.attn_k.weight q5_0 +blk.35.attn_q.weight q5_0 +blk.35.attn_v.weight q8_0 +blk.35.ffn_down_exps.weight q8_0 +blk.35.ffn_gate_exps.weight q5_0 +blk.35.ffn_up_exps.weight q5_0 [Q5_K_S] q5_K output.weight q8_0 +token_embd.weight q5_1 +blk.0.attn_k.weight q5_1 +blk.0.attn_q.weight q5_1 +blk.0.attn_v.weight q5_1 +blk.0.ffn_down_exps.weight q5_1 +blk.0.ffn_gate_exps.weight q5_1 +blk.0.ffn_up_exps.weight q5_1 +blk.1.attn_k.weight q5_1 +blk.1.attn_q.weight q5_1 +blk.1.attn_v.weight q5_1 +blk.1.ffn_down_exps.weight q5_1 +blk.1.ffn_gate_exps.weight q5_1 +blk.1.ffn_up_exps.weight q5_1 +blk.2.attn_k.weight q5_1 +blk.2.attn_q.weight q5_1 +blk.2.attn_v.weight q5_1 +blk.2.ffn_down_exps.weight q5_1 +blk.2.ffn_gate_exps.weight q5_1 +blk.2.ffn_up_exps.weight q5_1 +blk.3.attn_k.weight q5_1 +blk.3.attn_q.weight q5_1 +blk.3.attn_v.weight q5_1 +blk.3.ffn_down_exps.weight q5_1 +blk.3.ffn_gate_exps.weight q5_1 +blk.3.ffn_up_exps.weight q5_1 +blk.4.attn_k.weight q5_1 +blk.4.attn_q.weight q5_1 +blk.4.attn_v.weight q5_1 +blk.4.ffn_down_exps.weight q5_1 +blk.4.ffn_gate_exps.weight q5_1 +blk.4.ffn_up_exps.weight q5_1 +blk.5.attn_k.weight q5_1 +blk.5.attn_q.weight q5_1 +blk.5.attn_v.weight q5_1 +blk.5.ffn_down_exps.weight q5_1 +blk.5.ffn_gate_exps.weight q5_1 +blk.5.ffn_up_exps.weight q5_1 +blk.6.attn_k.weight q5_1 +blk.6.attn_q.weight q5_1 +blk.6.attn_v.weight q5_1 +blk.6.ffn_down_exps.weight q5_1 +blk.6.ffn_gate_exps.weight q5_1 +blk.6.ffn_up_exps.weight q5_1 +blk.7.attn_k.weight q5_1 +blk.7.attn_q.weight q5_1 +blk.7.attn_v.weight q5_1 +blk.7.ffn_down_exps.weight q5_1 +blk.7.ffn_gate_exps.weight q5_1 +blk.7.ffn_up_exps.weight q5_1 +blk.8.attn_k.weight q5_1 +blk.8.attn_q.weight q5_1 +blk.8.attn_v.weight q5_1 +blk.8.ffn_down_exps.weight q5_1 +blk.8.ffn_gate_exps.weight q5_1 +blk.8.ffn_up_exps.weight q5_1 +blk.9.attn_k.weight q5_1 +blk.9.attn_q.weight q5_1 +blk.9.attn_v.weight q5_1 +blk.9.ffn_down_exps.weight q5_1 +blk.9.ffn_gate_exps.weight q5_1 +blk.9.ffn_up_exps.weight q5_1 +blk.10.attn_k.weight q5_1 +blk.10.attn_q.weight q5_1 +blk.10.attn_v.weight q5_1 +blk.10.ffn_down_exps.weight q5_1 +blk.10.ffn_gate_exps.weight q5_1 +blk.10.ffn_up_exps.weight q5_1 +blk.11.attn_k.weight q5_1 +blk.11.attn_q.weight q5_1 +blk.11.attn_v.weight q5_1 +blk.11.ffn_down_exps.weight q5_1 +blk.11.ffn_gate_exps.weight q5_1 +blk.11.ffn_up_exps.weight q5_1 +blk.12.attn_k.weight q5_1 +blk.12.attn_q.weight q5_1 +blk.12.attn_v.weight q5_1 +blk.12.ffn_down_exps.weight q5_1 +blk.12.ffn_gate_exps.weight q5_1 +blk.12.ffn_up_exps.weight q5_1 +blk.13.attn_k.weight q5_1 +blk.13.attn_q.weight q5_1 +blk.13.attn_v.weight q5_1 +blk.13.ffn_down_exps.weight q5_1 +blk.13.ffn_gate_exps.weight q5_1 +blk.13.ffn_up_exps.weight q5_1 +blk.14.attn_k.weight q5_1 +blk.14.attn_q.weight q5_1 +blk.14.attn_v.weight q5_1 +blk.14.ffn_down_exps.weight q5_1 +blk.14.ffn_gate_exps.weight q5_1 +blk.14.ffn_up_exps.weight q5_1 +blk.15.attn_k.weight q5_1 +blk.15.attn_q.weight q5_1 +blk.15.attn_v.weight q5_1 +blk.15.ffn_down_exps.weight q5_1 +blk.15.ffn_gate_exps.weight q5_1 +blk.15.ffn_up_exps.weight q5_1 +blk.16.attn_k.weight q5_1 +blk.16.attn_q.weight q5_1 +blk.16.attn_v.weight q5_1 +blk.16.ffn_down_exps.weight q5_1 +blk.16.ffn_gate_exps.weight q5_1 +blk.16.ffn_up_exps.weight q5_1 +blk.17.attn_k.weight q5_1 +blk.17.attn_q.weight q5_1 +blk.17.attn_v.weight q5_1 +blk.17.ffn_down_exps.weight q5_1 +blk.17.ffn_gate_exps.weight q5_1 +blk.17.ffn_up_exps.weight q5_1 +blk.18.attn_k.weight q5_1 +blk.18.attn_q.weight q5_1 +blk.18.attn_v.weight q5_1 +blk.18.ffn_down_exps.weight q5_1 +blk.18.ffn_gate_exps.weight q5_1 +blk.18.ffn_up_exps.weight q5_1 +blk.19.attn_k.weight q5_1 +blk.19.attn_q.weight q5_1 +blk.19.attn_v.weight q5_1 +blk.19.ffn_down_exps.weight q5_1 +blk.19.ffn_gate_exps.weight q5_1 +blk.19.ffn_up_exps.weight q5_1 +blk.20.attn_k.weight q5_1 +blk.20.attn_q.weight q5_1 +blk.20.attn_v.weight q5_1 +blk.20.ffn_down_exps.weight q5_1 +blk.20.ffn_gate_exps.weight q5_1 +blk.20.ffn_up_exps.weight q5_1 +blk.21.attn_k.weight q5_1 +blk.21.attn_q.weight q5_1 +blk.21.attn_v.weight q5_1 +blk.21.ffn_down_exps.weight q5_1 +blk.21.ffn_gate_exps.weight q5_1 +blk.21.ffn_up_exps.weight q5_1 +blk.22.attn_k.weight q5_1 +blk.22.attn_q.weight q5_1 +blk.22.attn_v.weight q5_1 +blk.22.ffn_down_exps.weight q5_1 +blk.22.ffn_gate_exps.weight q5_1 +blk.22.ffn_up_exps.weight q5_1 +blk.23.attn_k.weight q5_1 +blk.23.attn_q.weight q5_1 +blk.23.attn_v.weight q5_1 +blk.23.ffn_down_exps.weight q5_1 +blk.23.ffn_gate_exps.weight q5_1 +blk.23.ffn_up_exps.weight q5_1 +blk.24.attn_k.weight q5_1 +blk.24.attn_q.weight q5_1 +blk.24.attn_v.weight q5_1 +blk.24.ffn_down_exps.weight q5_1 +blk.24.ffn_gate_exps.weight q5_1 +blk.24.ffn_up_exps.weight q5_1 +blk.25.attn_k.weight q5_1 +blk.25.attn_q.weight q5_1 +blk.25.attn_v.weight q5_1 +blk.25.ffn_down_exps.weight q5_1 +blk.25.ffn_gate_exps.weight q5_1 +blk.25.ffn_up_exps.weight q5_1 +blk.26.attn_k.weight q5_1 +blk.26.attn_q.weight q5_1 +blk.26.attn_v.weight q5_1 +blk.26.ffn_down_exps.weight q5_1 +blk.26.ffn_gate_exps.weight q5_1 +blk.26.ffn_up_exps.weight q5_1 +blk.27.attn_k.weight q5_1 +blk.27.attn_q.weight q5_1 +blk.27.attn_v.weight q5_1 +blk.27.ffn_down_exps.weight q5_1 +blk.27.ffn_gate_exps.weight q5_1 +blk.27.ffn_up_exps.weight q5_1 +blk.28.attn_k.weight q5_1 +blk.28.attn_q.weight q5_1 +blk.28.attn_v.weight q5_1 +blk.28.ffn_down_exps.weight q5_1 +blk.28.ffn_gate_exps.weight q5_1 +blk.28.ffn_up_exps.weight q5_1 +blk.29.attn_k.weight q5_1 +blk.29.attn_q.weight q5_1 +blk.29.attn_v.weight q5_1 +blk.29.ffn_down_exps.weight q5_1 +blk.29.ffn_gate_exps.weight q5_1 +blk.29.ffn_up_exps.weight q5_1 +blk.30.attn_k.weight q5_1 +blk.30.attn_q.weight q5_1 +blk.30.attn_v.weight q5_1 +blk.30.ffn_down_exps.weight q5_1 +blk.30.ffn_gate_exps.weight q5_1 +blk.30.ffn_up_exps.weight q5_1 +blk.31.attn_k.weight q5_1 +blk.31.attn_q.weight q5_1 +blk.31.attn_v.weight q5_1 +blk.31.ffn_down_exps.weight q5_1 +blk.31.ffn_gate_exps.weight q5_1 +blk.31.ffn_up_exps.weight q5_1 +blk.32.attn_k.weight q5_1 +blk.32.attn_q.weight q5_1 +blk.32.attn_v.weight q5_1 +blk.32.ffn_down_exps.weight q5_1 +blk.32.ffn_gate_exps.weight q5_1 +blk.32.ffn_up_exps.weight q5_1 +blk.33.attn_k.weight q5_1 +blk.33.attn_q.weight q5_1 +blk.33.attn_v.weight q5_1 +blk.33.ffn_down_exps.weight q5_1 +blk.33.ffn_gate_exps.weight q5_1 +blk.33.ffn_up_exps.weight q5_1 +blk.34.attn_k.weight q5_1 +blk.34.attn_q.weight q5_1 +blk.34.attn_v.weight q5_1 +blk.34.ffn_down_exps.weight q5_1 +blk.34.ffn_gate_exps.weight q5_1 +blk.34.ffn_up_exps.weight q5_1 +blk.35.attn_k.weight q5_1 +blk.35.attn_q.weight q5_1 +blk.35.attn_v.weight q5_1 +blk.35.ffn_down_exps.weight q5_1 +blk.35.ffn_gate_exps.weight q5_1 +blk.35.ffn_up_exps.weight q5_1 [Q5_K_M] q5_K output.weight q8_0 -blk.0.attn_v.weight q6_K -blk.0.ffn_down_exps.weight q6_K -blk.1.attn_v.weight q6_K -blk.1.ffn_down_exps.weight q6_K -blk.2.attn_v.weight q6_K -blk.2.ffn_down_exps.weight q6_K -blk.3.attn_v.weight q6_K -blk.3.ffn_down_exps.weight q6_K -blk.6.attn_v.weight q6_K -blk.6.ffn_down_exps.weight q6_K -blk.9.attn_v.weight q6_K -blk.9.ffn_down_exps.weight q6_K -blk.12.attn_v.weight q6_K -blk.12.ffn_down_exps.weight q6_K -blk.15.attn_v.weight q6_K -blk.15.ffn_down_exps.weight q6_K -blk.18.attn_v.weight q6_K -blk.18.ffn_down_exps.weight q6_K -blk.21.attn_v.weight q6_K -blk.21.ffn_down_exps.weight q6_K -blk.24.attn_v.weight q6_K -blk.24.ffn_down_exps.weight q6_K -blk.27.attn_v.weight q6_K -blk.27.ffn_down_exps.weight q6_K -blk.30.attn_v.weight q6_K -blk.30.ffn_down_exps.weight q6_K -blk.31.attn_v.weight q6_K -blk.31.ffn_down_exps.weight q6_K -blk.32.attn_v.weight q6_K -blk.32.ffn_down_exps.weight q6_K -blk.33.attn_v.weight q6_K -blk.33.ffn_down_exps.weight q6_K -blk.34.attn_v.weight q6_K -blk.34.ffn_down_exps.weight q6_K -blk.35.attn_v.weight q6_K -blk.35.ffn_down_exps.weight q6_K +token_embd.weight q5_1 +blk.0.attn_k.weight q5_1 +blk.0.attn_q.weight q5_1 +blk.0.attn_v.weight q8_0 +blk.0.ffn_down_exps.weight q8_0 +blk.0.ffn_gate_exps.weight q5_1 +blk.0.ffn_up_exps.weight q5_1 +blk.1.attn_k.weight q5_1 +blk.1.attn_q.weight q5_1 +blk.1.attn_v.weight q8_0 +blk.1.ffn_down_exps.weight q8_0 +blk.1.ffn_gate_exps.weight q5_1 +blk.1.ffn_up_exps.weight q5_1 +blk.2.attn_k.weight q5_1 +blk.2.attn_q.weight q5_1 +blk.2.attn_v.weight q8_0 +blk.2.ffn_down_exps.weight q8_0 +blk.2.ffn_gate_exps.weight q5_1 +blk.2.ffn_up_exps.weight q5_1 +blk.3.attn_k.weight q5_1 +blk.3.attn_q.weight q5_1 +blk.3.attn_v.weight q8_0 +blk.3.ffn_down_exps.weight q8_0 +blk.3.ffn_gate_exps.weight q5_1 +blk.3.ffn_up_exps.weight q5_1 +blk.4.attn_k.weight q5_1 +blk.4.attn_q.weight q5_1 +blk.4.attn_v.weight q5_1 +blk.4.ffn_down_exps.weight q5_1 +blk.4.ffn_gate_exps.weight q5_1 +blk.4.ffn_up_exps.weight q5_1 +blk.5.attn_k.weight q5_1 +blk.5.attn_q.weight q5_1 +blk.5.attn_v.weight q5_1 +blk.5.ffn_down_exps.weight q5_1 +blk.5.ffn_gate_exps.weight q5_1 +blk.5.ffn_up_exps.weight q5_1 +blk.6.attn_k.weight q5_1 +blk.6.attn_q.weight q5_1 +blk.6.attn_v.weight q8_0 +blk.6.ffn_down_exps.weight q8_0 +blk.6.ffn_gate_exps.weight q5_1 +blk.6.ffn_up_exps.weight q5_1 +blk.7.attn_k.weight q5_1 +blk.7.attn_q.weight q5_1 +blk.7.attn_v.weight q5_1 +blk.7.ffn_down_exps.weight q5_1 +blk.7.ffn_gate_exps.weight q5_1 +blk.7.ffn_up_exps.weight q5_1 +blk.8.attn_k.weight q5_1 +blk.8.attn_q.weight q5_1 +blk.8.attn_v.weight q5_1 +blk.8.ffn_down_exps.weight q5_1 +blk.8.ffn_gate_exps.weight q5_1 +blk.8.ffn_up_exps.weight q5_1 +blk.9.attn_k.weight q5_1 +blk.9.attn_q.weight q5_1 +blk.9.attn_v.weight q8_0 +blk.9.ffn_down_exps.weight q8_0 +blk.9.ffn_gate_exps.weight q5_1 +blk.9.ffn_up_exps.weight q5_1 +blk.10.attn_k.weight q5_1 +blk.10.attn_q.weight q5_1 +blk.10.attn_v.weight q5_1 +blk.10.ffn_down_exps.weight q5_1 +blk.10.ffn_gate_exps.weight q5_1 +blk.10.ffn_up_exps.weight q5_1 +blk.11.attn_k.weight q5_1 +blk.11.attn_q.weight q5_1 +blk.11.attn_v.weight q5_1 +blk.11.ffn_down_exps.weight q5_1 +blk.11.ffn_gate_exps.weight q5_1 +blk.11.ffn_up_exps.weight q5_1 +blk.12.attn_k.weight q5_1 +blk.12.attn_q.weight q5_1 +blk.12.attn_v.weight q8_0 +blk.12.ffn_down_exps.weight q8_0 +blk.12.ffn_gate_exps.weight q5_1 +blk.12.ffn_up_exps.weight q5_1 +blk.13.attn_k.weight q5_1 +blk.13.attn_q.weight q5_1 +blk.13.attn_v.weight q5_1 +blk.13.ffn_down_exps.weight q5_1 +blk.13.ffn_gate_exps.weight q5_1 +blk.13.ffn_up_exps.weight q5_1 +blk.14.attn_k.weight q5_1 +blk.14.attn_q.weight q5_1 +blk.14.attn_v.weight q5_1 +blk.14.ffn_down_exps.weight q5_1 +blk.14.ffn_gate_exps.weight q5_1 +blk.14.ffn_up_exps.weight q5_1 +blk.15.attn_k.weight q5_1 +blk.15.attn_q.weight q5_1 +blk.15.attn_v.weight q8_0 +blk.15.ffn_down_exps.weight q8_0 +blk.15.ffn_gate_exps.weight q5_1 +blk.15.ffn_up_exps.weight q5_1 +blk.16.attn_k.weight q5_1 +blk.16.attn_q.weight q5_1 +blk.16.attn_v.weight q5_1 +blk.16.ffn_down_exps.weight q5_1 +blk.16.ffn_gate_exps.weight q5_1 +blk.16.ffn_up_exps.weight q5_1 +blk.17.attn_k.weight q5_1 +blk.17.attn_q.weight q5_1 +blk.17.attn_v.weight q5_1 +blk.17.ffn_down_exps.weight q5_1 +blk.17.ffn_gate_exps.weight q5_1 +blk.17.ffn_up_exps.weight q5_1 +blk.18.attn_k.weight q5_1 +blk.18.attn_q.weight q5_1 +blk.18.attn_v.weight q8_0 +blk.18.ffn_down_exps.weight q8_0 +blk.18.ffn_gate_exps.weight q5_1 +blk.18.ffn_up_exps.weight q5_1 +blk.19.attn_k.weight q5_1 +blk.19.attn_q.weight q5_1 +blk.19.attn_v.weight q5_1 +blk.19.ffn_down_exps.weight q5_1 +blk.19.ffn_gate_exps.weight q5_1 +blk.19.ffn_up_exps.weight q5_1 +blk.20.attn_k.weight q5_1 +blk.20.attn_q.weight q5_1 +blk.20.attn_v.weight q5_1 +blk.20.ffn_down_exps.weight q5_1 +blk.20.ffn_gate_exps.weight q5_1 +blk.20.ffn_up_exps.weight q5_1 +blk.21.attn_k.weight q5_1 +blk.21.attn_q.weight q5_1 +blk.21.attn_v.weight q8_0 +blk.21.ffn_down_exps.weight q8_0 +blk.21.ffn_gate_exps.weight q5_1 +blk.21.ffn_up_exps.weight q5_1 +blk.22.attn_k.weight q5_1 +blk.22.attn_q.weight q5_1 +blk.22.attn_v.weight q5_1 +blk.22.ffn_down_exps.weight q5_1 +blk.22.ffn_gate_exps.weight q5_1 +blk.22.ffn_up_exps.weight q5_1 +blk.23.attn_k.weight q5_1 +blk.23.attn_q.weight q5_1 +blk.23.attn_v.weight q5_1 +blk.23.ffn_down_exps.weight q5_1 +blk.23.ffn_gate_exps.weight q5_1 +blk.23.ffn_up_exps.weight q5_1 +blk.24.attn_k.weight q5_1 +blk.24.attn_q.weight q5_1 +blk.24.attn_v.weight q8_0 +blk.24.ffn_down_exps.weight q8_0 +blk.24.ffn_gate_exps.weight q5_1 +blk.24.ffn_up_exps.weight q5_1 +blk.25.attn_k.weight q5_1 +blk.25.attn_q.weight q5_1 +blk.25.attn_v.weight q5_1 +blk.25.ffn_down_exps.weight q5_1 +blk.25.ffn_gate_exps.weight q5_1 +blk.25.ffn_up_exps.weight q5_1 +blk.26.attn_k.weight q5_1 +blk.26.attn_q.weight q5_1 +blk.26.attn_v.weight q5_1 +blk.26.ffn_down_exps.weight q5_1 +blk.26.ffn_gate_exps.weight q5_1 +blk.26.ffn_up_exps.weight q5_1 +blk.27.attn_k.weight q5_1 +blk.27.attn_q.weight q5_1 +blk.27.attn_v.weight q8_0 +blk.27.ffn_down_exps.weight q8_0 +blk.27.ffn_gate_exps.weight q5_1 +blk.27.ffn_up_exps.weight q5_1 +blk.28.attn_k.weight q5_1 +blk.28.attn_q.weight q5_1 +blk.28.attn_v.weight q5_1 +blk.28.ffn_down_exps.weight q5_1 +blk.28.ffn_gate_exps.weight q5_1 +blk.28.ffn_up_exps.weight q5_1 +blk.29.attn_k.weight q5_1 +blk.29.attn_q.weight q5_1 +blk.29.attn_v.weight q5_1 +blk.29.ffn_down_exps.weight q5_1 +blk.29.ffn_gate_exps.weight q5_1 +blk.29.ffn_up_exps.weight q5_1 +blk.30.attn_k.weight q5_1 +blk.30.attn_q.weight q5_1 +blk.30.attn_v.weight q8_0 +blk.30.ffn_down_exps.weight q8_0 +blk.30.ffn_gate_exps.weight q5_1 +blk.30.ffn_up_exps.weight q5_1 +blk.31.attn_k.weight q5_1 +blk.31.attn_q.weight q5_1 +blk.31.attn_v.weight q8_0 +blk.31.ffn_down_exps.weight q8_0 +blk.31.ffn_gate_exps.weight q5_1 +blk.31.ffn_up_exps.weight q5_1 +blk.32.attn_k.weight q5_1 +blk.32.attn_q.weight q5_1 +blk.32.attn_v.weight q8_0 +blk.32.ffn_down_exps.weight q8_0 +blk.32.ffn_gate_exps.weight q5_1 +blk.32.ffn_up_exps.weight q5_1 +blk.33.attn_k.weight q5_1 +blk.33.attn_q.weight q5_1 +blk.33.attn_v.weight q8_0 +blk.33.ffn_down_exps.weight q8_0 +blk.33.ffn_gate_exps.weight q5_1 +blk.33.ffn_up_exps.weight q5_1 +blk.34.attn_k.weight q5_1 +blk.34.attn_q.weight q5_1 +blk.34.attn_v.weight q8_0 +blk.34.ffn_down_exps.weight q8_0 +blk.34.ffn_gate_exps.weight q5_1 +blk.34.ffn_up_exps.weight q5_1 +blk.35.attn_k.weight q5_1 +blk.35.attn_q.weight q5_1 +blk.35.attn_v.weight q8_0 +blk.35.ffn_down_exps.weight q8_0 +blk.35.ffn_gate_exps.weight q5_1 +blk.35.ffn_up_exps.weight q5_1 [Q6_K] q6_K output.weight q8_0 +token_embd.weight q8_0 +blk.0.attn_k.weight q8_0 +blk.0.attn_q.weight q8_0 +blk.0.attn_v.weight q8_0 +blk.0.ffn_down_exps.weight q8_0 +blk.0.ffn_gate_exps.weight q8_0 +blk.0.ffn_up_exps.weight q8_0 +blk.1.attn_k.weight q8_0 +blk.1.attn_q.weight q8_0 +blk.1.attn_v.weight q8_0 +blk.1.ffn_down_exps.weight q8_0 +blk.1.ffn_gate_exps.weight q8_0 +blk.1.ffn_up_exps.weight q8_0 +blk.2.attn_k.weight q8_0 +blk.2.attn_q.weight q8_0 +blk.2.attn_v.weight q8_0 +blk.2.ffn_down_exps.weight q8_0 +blk.2.ffn_gate_exps.weight q8_0 +blk.2.ffn_up_exps.weight q8_0 +blk.3.attn_k.weight q8_0 +blk.3.attn_q.weight q8_0 +blk.3.attn_v.weight q8_0 +blk.3.ffn_down_exps.weight q8_0 +blk.3.ffn_gate_exps.weight q8_0 +blk.3.ffn_up_exps.weight q8_0 +blk.4.attn_k.weight q8_0 +blk.4.attn_q.weight q8_0 +blk.4.attn_v.weight q8_0 +blk.4.ffn_down_exps.weight q8_0 +blk.4.ffn_gate_exps.weight q8_0 +blk.4.ffn_up_exps.weight q8_0 +blk.5.attn_k.weight q8_0 +blk.5.attn_q.weight q8_0 +blk.5.attn_v.weight q8_0 +blk.5.ffn_down_exps.weight q8_0 +blk.5.ffn_gate_exps.weight q8_0 +blk.5.ffn_up_exps.weight q8_0 +blk.6.attn_k.weight q8_0 +blk.6.attn_q.weight q8_0 +blk.6.attn_v.weight q8_0 +blk.6.ffn_down_exps.weight q8_0 +blk.6.ffn_gate_exps.weight q8_0 +blk.6.ffn_up_exps.weight q8_0 +blk.7.attn_k.weight q8_0 +blk.7.attn_q.weight q8_0 +blk.7.attn_v.weight q8_0 +blk.7.ffn_down_exps.weight q8_0 +blk.7.ffn_gate_exps.weight q8_0 +blk.7.ffn_up_exps.weight q8_0 +blk.8.attn_k.weight q8_0 +blk.8.attn_q.weight q8_0 +blk.8.attn_v.weight q8_0 +blk.8.ffn_down_exps.weight q8_0 +blk.8.ffn_gate_exps.weight q8_0 +blk.8.ffn_up_exps.weight q8_0 +blk.9.attn_k.weight q8_0 +blk.9.attn_q.weight q8_0 +blk.9.attn_v.weight q8_0 +blk.9.ffn_down_exps.weight q8_0 +blk.9.ffn_gate_exps.weight q8_0 +blk.9.ffn_up_exps.weight q8_0 +blk.10.attn_k.weight q8_0 +blk.10.attn_q.weight q8_0 +blk.10.attn_v.weight q8_0 +blk.10.ffn_down_exps.weight q8_0 +blk.10.ffn_gate_exps.weight q8_0 +blk.10.ffn_up_exps.weight q8_0 +blk.11.attn_k.weight q8_0 +blk.11.attn_q.weight q8_0 +blk.11.attn_v.weight q8_0 +blk.11.ffn_down_exps.weight q8_0 +blk.11.ffn_gate_exps.weight q8_0 +blk.11.ffn_up_exps.weight q8_0 +blk.12.attn_k.weight q8_0 +blk.12.attn_q.weight q8_0 +blk.12.attn_v.weight q8_0 +blk.12.ffn_down_exps.weight q8_0 +blk.12.ffn_gate_exps.weight q8_0 +blk.12.ffn_up_exps.weight q8_0 +blk.13.attn_k.weight q8_0 +blk.13.attn_q.weight q8_0 +blk.13.attn_v.weight q8_0 +blk.13.ffn_down_exps.weight q8_0 +blk.13.ffn_gate_exps.weight q8_0 +blk.13.ffn_up_exps.weight q8_0 +blk.14.attn_k.weight q8_0 +blk.14.attn_q.weight q8_0 +blk.14.attn_v.weight q8_0 +blk.14.ffn_down_exps.weight q8_0 +blk.14.ffn_gate_exps.weight q8_0 +blk.14.ffn_up_exps.weight q8_0 +blk.15.attn_k.weight q8_0 +blk.15.attn_q.weight q8_0 +blk.15.attn_v.weight q8_0 +blk.15.ffn_down_exps.weight q8_0 +blk.15.ffn_gate_exps.weight q8_0 +blk.15.ffn_up_exps.weight q8_0 +blk.16.attn_k.weight q8_0 +blk.16.attn_q.weight q8_0 +blk.16.attn_v.weight q8_0 +blk.16.ffn_down_exps.weight q8_0 +blk.16.ffn_gate_exps.weight q8_0 +blk.16.ffn_up_exps.weight q8_0 +blk.17.attn_k.weight q8_0 +blk.17.attn_q.weight q8_0 +blk.17.attn_v.weight q8_0 +blk.17.ffn_down_exps.weight q8_0 +blk.17.ffn_gate_exps.weight q8_0 +blk.17.ffn_up_exps.weight q8_0 +blk.18.attn_k.weight q8_0 +blk.18.attn_q.weight q8_0 +blk.18.attn_v.weight q8_0 +blk.18.ffn_down_exps.weight q8_0 +blk.18.ffn_gate_exps.weight q8_0 +blk.18.ffn_up_exps.weight q8_0 +blk.19.attn_k.weight q8_0 +blk.19.attn_q.weight q8_0 +blk.19.attn_v.weight q8_0 +blk.19.ffn_down_exps.weight q8_0 +blk.19.ffn_gate_exps.weight q8_0 +blk.19.ffn_up_exps.weight q8_0 +blk.20.attn_k.weight q8_0 +blk.20.attn_q.weight q8_0 +blk.20.attn_v.weight q8_0 +blk.20.ffn_down_exps.weight q8_0 +blk.20.ffn_gate_exps.weight q8_0 +blk.20.ffn_up_exps.weight q8_0 +blk.21.attn_k.weight q8_0 +blk.21.attn_q.weight q8_0 +blk.21.attn_v.weight q8_0 +blk.21.ffn_down_exps.weight q8_0 +blk.21.ffn_gate_exps.weight q8_0 +blk.21.ffn_up_exps.weight q8_0 +blk.22.attn_k.weight q8_0 +blk.22.attn_q.weight q8_0 +blk.22.attn_v.weight q8_0 +blk.22.ffn_down_exps.weight q8_0 +blk.22.ffn_gate_exps.weight q8_0 +blk.22.ffn_up_exps.weight q8_0 +blk.23.attn_k.weight q8_0 +blk.23.attn_q.weight q8_0 +blk.23.attn_v.weight q8_0 +blk.23.ffn_down_exps.weight q8_0 +blk.23.ffn_gate_exps.weight q8_0 +blk.23.ffn_up_exps.weight q8_0 +blk.24.attn_k.weight q8_0 +blk.24.attn_q.weight q8_0 +blk.24.attn_v.weight q8_0 +blk.24.ffn_down_exps.weight q8_0 +blk.24.ffn_gate_exps.weight q8_0 +blk.24.ffn_up_exps.weight q8_0 +blk.25.attn_k.weight q8_0 +blk.25.attn_q.weight q8_0 +blk.25.attn_v.weight q8_0 +blk.25.ffn_down_exps.weight q8_0 +blk.25.ffn_gate_exps.weight q8_0 +blk.25.ffn_up_exps.weight q8_0 +blk.26.attn_k.weight q8_0 +blk.26.attn_q.weight q8_0 +blk.26.attn_v.weight q8_0 +blk.26.ffn_down_exps.weight q8_0 +blk.26.ffn_gate_exps.weight q8_0 +blk.26.ffn_up_exps.weight q8_0 +blk.27.attn_k.weight q8_0 +blk.27.attn_q.weight q8_0 +blk.27.attn_v.weight q8_0 +blk.27.ffn_down_exps.weight q8_0 +blk.27.ffn_gate_exps.weight q8_0 +blk.27.ffn_up_exps.weight q8_0 +blk.28.attn_k.weight q8_0 +blk.28.attn_q.weight q8_0 +blk.28.attn_v.weight q8_0 +blk.28.ffn_down_exps.weight q8_0 +blk.28.ffn_gate_exps.weight q8_0 +blk.28.ffn_up_exps.weight q8_0 +blk.29.attn_k.weight q8_0 +blk.29.attn_q.weight q8_0 +blk.29.attn_v.weight q8_0 +blk.29.ffn_down_exps.weight q8_0 +blk.29.ffn_gate_exps.weight q8_0 +blk.29.ffn_up_exps.weight q8_0 +blk.30.attn_k.weight q8_0 +blk.30.attn_q.weight q8_0 +blk.30.attn_v.weight q8_0 +blk.30.ffn_down_exps.weight q8_0 +blk.30.ffn_gate_exps.weight q8_0 +blk.30.ffn_up_exps.weight q8_0 +blk.31.attn_k.weight q8_0 +blk.31.attn_q.weight q8_0 +blk.31.attn_v.weight q8_0 +blk.31.ffn_down_exps.weight q8_0 +blk.31.ffn_gate_exps.weight q8_0 +blk.31.ffn_up_exps.weight q8_0 +blk.32.attn_k.weight q8_0 +blk.32.attn_q.weight q8_0 +blk.32.attn_v.weight q8_0 +blk.32.ffn_down_exps.weight q8_0 +blk.32.ffn_gate_exps.weight q8_0 +blk.32.ffn_up_exps.weight q8_0 +blk.33.attn_k.weight q8_0 +blk.33.attn_q.weight q8_0 +blk.33.attn_v.weight q8_0 +blk.33.ffn_down_exps.weight q8_0 +blk.33.ffn_gate_exps.weight q8_0 +blk.33.ffn_up_exps.weight q8_0 +blk.34.attn_k.weight q8_0 +blk.34.attn_q.weight q8_0 +blk.34.attn_v.weight q8_0 +blk.34.ffn_down_exps.weight q8_0 +blk.34.ffn_gate_exps.weight q8_0 +blk.34.ffn_up_exps.weight q8_0 +blk.35.attn_k.weight q8_0 +blk.35.attn_q.weight q8_0 +blk.35.attn_v.weight q8_0 +blk.35.ffn_down_exps.weight q8_0 +blk.35.ffn_gate_exps.weight q8_0 +blk.35.ffn_up_exps.weight q8_0 [IQ2_XXS] iq2_xxs output.weight q8_0 -token_embd.weight q2_K -blk.0.attn_v.weight q4_K -blk.0.ffn_down_exps.weight q2_K -blk.1.attn_v.weight q4_K -blk.1.ffn_down_exps.weight q2_K -blk.2.attn_v.weight q4_K -blk.2.ffn_down_exps.weight q2_K -blk.3.attn_v.weight q4_K -blk.3.ffn_down_exps.weight q2_K -blk.4.attn_v.weight q4_K -blk.5.attn_v.weight q4_K -blk.6.attn_v.weight q4_K -blk.7.attn_v.weight q4_K -blk.8.attn_v.weight q4_K -blk.9.attn_v.weight q4_K -blk.10.attn_v.weight q4_K -blk.11.attn_v.weight q4_K -blk.12.attn_v.weight q4_K -blk.13.attn_v.weight q4_K -blk.14.attn_v.weight q4_K -blk.15.attn_v.weight q4_K -blk.16.attn_v.weight q4_K -blk.17.attn_v.weight q4_K -blk.18.attn_v.weight q4_K -blk.19.attn_v.weight q4_K -blk.20.attn_v.weight q4_K -blk.21.attn_v.weight q4_K -blk.22.attn_v.weight q4_K -blk.23.attn_v.weight q4_K -blk.24.attn_v.weight q4_K -blk.25.attn_v.weight q4_K -blk.26.attn_v.weight q4_K -blk.27.attn_v.weight q4_K -blk.28.attn_v.weight q4_K -blk.29.attn_v.weight q4_K -blk.30.attn_v.weight q4_K -blk.31.attn_v.weight q4_K -blk.32.attn_v.weight q4_K -blk.33.attn_v.weight q4_K -blk.34.attn_v.weight q4_K -blk.35.attn_v.weight q4_K +token_embd.weight q4_0 +blk.0.attn_k.weight iq4_nl +blk.0.attn_q.weight iq4_nl +blk.0.attn_v.weight q5_0 +blk.0.ffn_down_exps.weight q4_0 +blk.0.ffn_gate_exps.weight iq4_nl +blk.0.ffn_up_exps.weight iq4_nl +blk.1.attn_k.weight iq4_nl +blk.1.attn_q.weight iq4_nl +blk.1.attn_v.weight q5_0 +blk.1.ffn_down_exps.weight q4_0 +blk.1.ffn_gate_exps.weight iq4_nl +blk.1.ffn_up_exps.weight iq4_nl +blk.2.attn_k.weight iq4_nl +blk.2.attn_q.weight iq4_nl +blk.2.attn_v.weight q5_0 +blk.2.ffn_down_exps.weight q4_0 +blk.2.ffn_gate_exps.weight iq4_nl +blk.2.ffn_up_exps.weight iq4_nl +blk.3.attn_k.weight iq4_nl +blk.3.attn_q.weight iq4_nl +blk.3.attn_v.weight q5_0 +blk.3.ffn_down_exps.weight q4_0 +blk.3.ffn_gate_exps.weight iq4_nl +blk.3.ffn_up_exps.weight iq4_nl +blk.4.attn_k.weight iq4_nl +blk.4.attn_q.weight iq4_nl +blk.4.attn_v.weight q5_0 +blk.4.ffn_down_exps.weight iq4_nl +blk.4.ffn_gate_exps.weight iq4_nl +blk.4.ffn_up_exps.weight iq4_nl +blk.5.attn_k.weight iq4_nl +blk.5.attn_q.weight iq4_nl +blk.5.attn_v.weight q5_0 +blk.5.ffn_down_exps.weight iq4_nl +blk.5.ffn_gate_exps.weight iq4_nl +blk.5.ffn_up_exps.weight iq4_nl +blk.6.attn_k.weight iq4_nl +blk.6.attn_q.weight iq4_nl +blk.6.attn_v.weight q5_0 +blk.6.ffn_down_exps.weight iq4_nl +blk.6.ffn_gate_exps.weight iq4_nl +blk.6.ffn_up_exps.weight iq4_nl +blk.7.attn_k.weight iq4_nl +blk.7.attn_q.weight iq4_nl +blk.7.attn_v.weight q5_0 +blk.7.ffn_down_exps.weight iq4_nl +blk.7.ffn_gate_exps.weight iq4_nl +blk.7.ffn_up_exps.weight iq4_nl +blk.8.attn_k.weight iq4_nl +blk.8.attn_q.weight iq4_nl +blk.8.attn_v.weight q5_0 +blk.8.ffn_down_exps.weight iq4_nl +blk.8.ffn_gate_exps.weight iq4_nl +blk.8.ffn_up_exps.weight iq4_nl +blk.9.attn_k.weight iq4_nl +blk.9.attn_q.weight iq4_nl +blk.9.attn_v.weight q5_0 +blk.9.ffn_down_exps.weight iq4_nl +blk.9.ffn_gate_exps.weight iq4_nl +blk.9.ffn_up_exps.weight iq4_nl +blk.10.attn_k.weight iq4_nl +blk.10.attn_q.weight iq4_nl +blk.10.attn_v.weight q5_0 +blk.10.ffn_down_exps.weight iq4_nl +blk.10.ffn_gate_exps.weight iq4_nl +blk.10.ffn_up_exps.weight iq4_nl +blk.11.attn_k.weight iq4_nl +blk.11.attn_q.weight iq4_nl +blk.11.attn_v.weight q5_0 +blk.11.ffn_down_exps.weight iq4_nl +blk.11.ffn_gate_exps.weight iq4_nl +blk.11.ffn_up_exps.weight iq4_nl +blk.12.attn_k.weight iq4_nl +blk.12.attn_q.weight iq4_nl +blk.12.attn_v.weight q5_0 +blk.12.ffn_down_exps.weight iq4_nl +blk.12.ffn_gate_exps.weight iq4_nl +blk.12.ffn_up_exps.weight iq4_nl +blk.13.attn_k.weight iq4_nl +blk.13.attn_q.weight iq4_nl +blk.13.attn_v.weight q5_0 +blk.13.ffn_down_exps.weight iq4_nl +blk.13.ffn_gate_exps.weight iq4_nl +blk.13.ffn_up_exps.weight iq4_nl +blk.14.attn_k.weight iq4_nl +blk.14.attn_q.weight iq4_nl +blk.14.attn_v.weight q5_0 +blk.14.ffn_down_exps.weight iq4_nl +blk.14.ffn_gate_exps.weight iq4_nl +blk.14.ffn_up_exps.weight iq4_nl +blk.15.attn_k.weight iq4_nl +blk.15.attn_q.weight iq4_nl +blk.15.attn_v.weight q5_0 +blk.15.ffn_down_exps.weight iq4_nl +blk.15.ffn_gate_exps.weight iq4_nl +blk.15.ffn_up_exps.weight iq4_nl +blk.16.attn_k.weight iq4_nl +blk.16.attn_q.weight iq4_nl +blk.16.attn_v.weight q5_0 +blk.16.ffn_down_exps.weight iq4_nl +blk.16.ffn_gate_exps.weight iq4_nl +blk.16.ffn_up_exps.weight iq4_nl +blk.17.attn_k.weight iq4_nl +blk.17.attn_q.weight iq4_nl +blk.17.attn_v.weight q5_0 +blk.17.ffn_down_exps.weight iq4_nl +blk.17.ffn_gate_exps.weight iq4_nl +blk.17.ffn_up_exps.weight iq4_nl +blk.18.attn_k.weight iq4_nl +blk.18.attn_q.weight iq4_nl +blk.18.attn_v.weight q5_0 +blk.18.ffn_down_exps.weight iq4_nl +blk.18.ffn_gate_exps.weight iq4_nl +blk.18.ffn_up_exps.weight iq4_nl +blk.19.attn_k.weight iq4_nl +blk.19.attn_q.weight iq4_nl +blk.19.attn_v.weight q5_0 +blk.19.ffn_down_exps.weight iq4_nl +blk.19.ffn_gate_exps.weight iq4_nl +blk.19.ffn_up_exps.weight iq4_nl +blk.20.attn_k.weight iq4_nl +blk.20.attn_q.weight iq4_nl +blk.20.attn_v.weight q5_0 +blk.20.ffn_down_exps.weight iq4_nl +blk.20.ffn_gate_exps.weight iq4_nl +blk.20.ffn_up_exps.weight iq4_nl +blk.21.attn_k.weight iq4_nl +blk.21.attn_q.weight iq4_nl +blk.21.attn_v.weight q5_0 +blk.21.ffn_down_exps.weight iq4_nl +blk.21.ffn_gate_exps.weight iq4_nl +blk.21.ffn_up_exps.weight iq4_nl +blk.22.attn_k.weight iq4_nl +blk.22.attn_q.weight iq4_nl +blk.22.attn_v.weight q5_0 +blk.22.ffn_down_exps.weight iq4_nl +blk.22.ffn_gate_exps.weight iq4_nl +blk.22.ffn_up_exps.weight iq4_nl +blk.23.attn_k.weight iq4_nl +blk.23.attn_q.weight iq4_nl +blk.23.attn_v.weight q5_0 +blk.23.ffn_down_exps.weight iq4_nl +blk.23.ffn_gate_exps.weight iq4_nl +blk.23.ffn_up_exps.weight iq4_nl +blk.24.attn_k.weight iq4_nl +blk.24.attn_q.weight iq4_nl +blk.24.attn_v.weight q5_0 +blk.24.ffn_down_exps.weight iq4_nl +blk.24.ffn_gate_exps.weight iq4_nl +blk.24.ffn_up_exps.weight iq4_nl +blk.25.attn_k.weight iq4_nl +blk.25.attn_q.weight iq4_nl +blk.25.attn_v.weight q5_0 +blk.25.ffn_down_exps.weight iq4_nl +blk.25.ffn_gate_exps.weight iq4_nl +blk.25.ffn_up_exps.weight iq4_nl +blk.26.attn_k.weight iq4_nl +blk.26.attn_q.weight iq4_nl +blk.26.attn_v.weight q5_0 +blk.26.ffn_down_exps.weight iq4_nl +blk.26.ffn_gate_exps.weight iq4_nl +blk.26.ffn_up_exps.weight iq4_nl +blk.27.attn_k.weight iq4_nl +blk.27.attn_q.weight iq4_nl +blk.27.attn_v.weight q5_0 +blk.27.ffn_down_exps.weight iq4_nl +blk.27.ffn_gate_exps.weight iq4_nl +blk.27.ffn_up_exps.weight iq4_nl +blk.28.attn_k.weight iq4_nl +blk.28.attn_q.weight iq4_nl +blk.28.attn_v.weight q5_0 +blk.28.ffn_down_exps.weight iq4_nl +blk.28.ffn_gate_exps.weight iq4_nl +blk.28.ffn_up_exps.weight iq4_nl +blk.29.attn_k.weight iq4_nl +blk.29.attn_q.weight iq4_nl +blk.29.attn_v.weight q5_0 +blk.29.ffn_down_exps.weight iq4_nl +blk.29.ffn_gate_exps.weight iq4_nl +blk.29.ffn_up_exps.weight iq4_nl +blk.30.attn_k.weight iq4_nl +blk.30.attn_q.weight iq4_nl +blk.30.attn_v.weight q5_0 +blk.30.ffn_down_exps.weight iq4_nl +blk.30.ffn_gate_exps.weight iq4_nl +blk.30.ffn_up_exps.weight iq4_nl +blk.31.attn_k.weight iq4_nl +blk.31.attn_q.weight iq4_nl +blk.31.attn_v.weight q5_0 +blk.31.ffn_down_exps.weight iq4_nl +blk.31.ffn_gate_exps.weight iq4_nl +blk.31.ffn_up_exps.weight iq4_nl +blk.32.attn_k.weight iq4_nl +blk.32.attn_q.weight iq4_nl +blk.32.attn_v.weight q5_0 +blk.32.ffn_down_exps.weight iq4_nl +blk.32.ffn_gate_exps.weight iq4_nl +blk.32.ffn_up_exps.weight iq4_nl +blk.33.attn_k.weight iq4_nl +blk.33.attn_q.weight iq4_nl +blk.33.attn_v.weight q5_0 +blk.33.ffn_down_exps.weight iq4_nl +blk.33.ffn_gate_exps.weight iq4_nl +blk.33.ffn_up_exps.weight iq4_nl +blk.34.attn_k.weight iq4_nl +blk.34.attn_q.weight iq4_nl +blk.34.attn_v.weight q5_0 +blk.34.ffn_down_exps.weight iq4_nl +blk.34.ffn_gate_exps.weight iq4_nl +blk.34.ffn_up_exps.weight iq4_nl +blk.35.attn_k.weight iq4_nl +blk.35.attn_q.weight iq4_nl +blk.35.attn_v.weight q5_0 +blk.35.ffn_down_exps.weight iq4_nl +blk.35.ffn_gate_exps.weight iq4_nl +blk.35.ffn_up_exps.weight iq4_nl [IQ2_XS] iq2_xs output.weight q8_0 -token_embd.weight q2_K -blk.0.attn_v.weight q4_K -blk.0.ffn_down_exps.weight q2_K -blk.1.attn_v.weight q4_K -blk.1.ffn_down_exps.weight q2_K -blk.2.attn_v.weight q4_K -blk.2.ffn_down_exps.weight q2_K -blk.3.attn_v.weight q4_K -blk.3.ffn_down_exps.weight q2_K -blk.4.attn_v.weight q4_K -blk.5.attn_v.weight q4_K -blk.6.attn_v.weight q4_K -blk.7.attn_v.weight q4_K -blk.8.attn_v.weight q4_K -blk.9.attn_v.weight q4_K -blk.10.attn_v.weight q4_K -blk.11.attn_v.weight q4_K -blk.12.attn_v.weight q4_K -blk.13.attn_v.weight q4_K -blk.14.attn_v.weight q4_K -blk.15.attn_v.weight q4_K -blk.16.attn_v.weight q4_K -blk.17.attn_v.weight q4_K -blk.18.attn_v.weight q4_K -blk.19.attn_v.weight q4_K -blk.20.attn_v.weight q4_K -blk.21.attn_v.weight q4_K -blk.22.attn_v.weight q4_K -blk.23.attn_v.weight q4_K -blk.24.attn_v.weight q4_K -blk.25.attn_v.weight q4_K -blk.26.attn_v.weight q4_K -blk.27.attn_v.weight q4_K -blk.28.attn_v.weight q4_K -blk.29.attn_v.weight q4_K -blk.30.attn_v.weight q4_K -blk.31.attn_v.weight q4_K -blk.32.attn_v.weight q4_K -blk.33.attn_v.weight q4_K -blk.34.attn_v.weight q4_K -blk.35.attn_v.weight q4_K +token_embd.weight q4_0 +blk.0.attn_k.weight iq4_nl +blk.0.attn_q.weight iq4_nl +blk.0.attn_v.weight q5_0 +blk.0.ffn_down_exps.weight q4_0 +blk.0.ffn_gate_exps.weight iq4_nl +blk.0.ffn_up_exps.weight iq4_nl +blk.1.attn_k.weight iq4_nl +blk.1.attn_q.weight iq4_nl +blk.1.attn_v.weight q5_0 +blk.1.ffn_down_exps.weight q4_0 +blk.1.ffn_gate_exps.weight iq4_nl +blk.1.ffn_up_exps.weight iq4_nl +blk.2.attn_k.weight iq4_nl +blk.2.attn_q.weight iq4_nl +blk.2.attn_v.weight q5_0 +blk.2.ffn_down_exps.weight q4_0 +blk.2.ffn_gate_exps.weight iq4_nl +blk.2.ffn_up_exps.weight iq4_nl +blk.3.attn_k.weight iq4_nl +blk.3.attn_q.weight iq4_nl +blk.3.attn_v.weight q5_0 +blk.3.ffn_down_exps.weight q4_0 +blk.3.ffn_gate_exps.weight iq4_nl +blk.3.ffn_up_exps.weight iq4_nl +blk.4.attn_k.weight iq4_nl +blk.4.attn_q.weight iq4_nl +blk.4.attn_v.weight q5_0 +blk.4.ffn_down_exps.weight iq4_nl +blk.4.ffn_gate_exps.weight iq4_nl +blk.4.ffn_up_exps.weight iq4_nl +blk.5.attn_k.weight iq4_nl +blk.5.attn_q.weight iq4_nl +blk.5.attn_v.weight q5_0 +blk.5.ffn_down_exps.weight iq4_nl +blk.5.ffn_gate_exps.weight iq4_nl +blk.5.ffn_up_exps.weight iq4_nl +blk.6.attn_k.weight iq4_nl +blk.6.attn_q.weight iq4_nl +blk.6.attn_v.weight q5_0 +blk.6.ffn_down_exps.weight iq4_nl +blk.6.ffn_gate_exps.weight iq4_nl +blk.6.ffn_up_exps.weight iq4_nl +blk.7.attn_k.weight iq4_nl +blk.7.attn_q.weight iq4_nl +blk.7.attn_v.weight q5_0 +blk.7.ffn_down_exps.weight iq4_nl +blk.7.ffn_gate_exps.weight iq4_nl +blk.7.ffn_up_exps.weight iq4_nl +blk.8.attn_k.weight iq4_nl +blk.8.attn_q.weight iq4_nl +blk.8.attn_v.weight q5_0 +blk.8.ffn_down_exps.weight iq4_nl +blk.8.ffn_gate_exps.weight iq4_nl +blk.8.ffn_up_exps.weight iq4_nl +blk.9.attn_k.weight iq4_nl +blk.9.attn_q.weight iq4_nl +blk.9.attn_v.weight q5_0 +blk.9.ffn_down_exps.weight iq4_nl +blk.9.ffn_gate_exps.weight iq4_nl +blk.9.ffn_up_exps.weight iq4_nl +blk.10.attn_k.weight iq4_nl +blk.10.attn_q.weight iq4_nl +blk.10.attn_v.weight q5_0 +blk.10.ffn_down_exps.weight iq4_nl +blk.10.ffn_gate_exps.weight iq4_nl +blk.10.ffn_up_exps.weight iq4_nl +blk.11.attn_k.weight iq4_nl +blk.11.attn_q.weight iq4_nl +blk.11.attn_v.weight q5_0 +blk.11.ffn_down_exps.weight iq4_nl +blk.11.ffn_gate_exps.weight iq4_nl +blk.11.ffn_up_exps.weight iq4_nl +blk.12.attn_k.weight iq4_nl +blk.12.attn_q.weight iq4_nl +blk.12.attn_v.weight q5_0 +blk.12.ffn_down_exps.weight iq4_nl +blk.12.ffn_gate_exps.weight iq4_nl +blk.12.ffn_up_exps.weight iq4_nl +blk.13.attn_k.weight iq4_nl +blk.13.attn_q.weight iq4_nl +blk.13.attn_v.weight q5_0 +blk.13.ffn_down_exps.weight iq4_nl +blk.13.ffn_gate_exps.weight iq4_nl +blk.13.ffn_up_exps.weight iq4_nl +blk.14.attn_k.weight iq4_nl +blk.14.attn_q.weight iq4_nl +blk.14.attn_v.weight q5_0 +blk.14.ffn_down_exps.weight iq4_nl +blk.14.ffn_gate_exps.weight iq4_nl +blk.14.ffn_up_exps.weight iq4_nl +blk.15.attn_k.weight iq4_nl +blk.15.attn_q.weight iq4_nl +blk.15.attn_v.weight q5_0 +blk.15.ffn_down_exps.weight iq4_nl +blk.15.ffn_gate_exps.weight iq4_nl +blk.15.ffn_up_exps.weight iq4_nl +blk.16.attn_k.weight iq4_nl +blk.16.attn_q.weight iq4_nl +blk.16.attn_v.weight q5_0 +blk.16.ffn_down_exps.weight iq4_nl +blk.16.ffn_gate_exps.weight iq4_nl +blk.16.ffn_up_exps.weight iq4_nl +blk.17.attn_k.weight iq4_nl +blk.17.attn_q.weight iq4_nl +blk.17.attn_v.weight q5_0 +blk.17.ffn_down_exps.weight iq4_nl +blk.17.ffn_gate_exps.weight iq4_nl +blk.17.ffn_up_exps.weight iq4_nl +blk.18.attn_k.weight iq4_nl +blk.18.attn_q.weight iq4_nl +blk.18.attn_v.weight q5_0 +blk.18.ffn_down_exps.weight iq4_nl +blk.18.ffn_gate_exps.weight iq4_nl +blk.18.ffn_up_exps.weight iq4_nl +blk.19.attn_k.weight iq4_nl +blk.19.attn_q.weight iq4_nl +blk.19.attn_v.weight q5_0 +blk.19.ffn_down_exps.weight iq4_nl +blk.19.ffn_gate_exps.weight iq4_nl +blk.19.ffn_up_exps.weight iq4_nl +blk.20.attn_k.weight iq4_nl +blk.20.attn_q.weight iq4_nl +blk.20.attn_v.weight q5_0 +blk.20.ffn_down_exps.weight iq4_nl +blk.20.ffn_gate_exps.weight iq4_nl +blk.20.ffn_up_exps.weight iq4_nl +blk.21.attn_k.weight iq4_nl +blk.21.attn_q.weight iq4_nl +blk.21.attn_v.weight q5_0 +blk.21.ffn_down_exps.weight iq4_nl +blk.21.ffn_gate_exps.weight iq4_nl +blk.21.ffn_up_exps.weight iq4_nl +blk.22.attn_k.weight iq4_nl +blk.22.attn_q.weight iq4_nl +blk.22.attn_v.weight q5_0 +blk.22.ffn_down_exps.weight iq4_nl +blk.22.ffn_gate_exps.weight iq4_nl +blk.22.ffn_up_exps.weight iq4_nl +blk.23.attn_k.weight iq4_nl +blk.23.attn_q.weight iq4_nl +blk.23.attn_v.weight q5_0 +blk.23.ffn_down_exps.weight iq4_nl +blk.23.ffn_gate_exps.weight iq4_nl +blk.23.ffn_up_exps.weight iq4_nl +blk.24.attn_k.weight iq4_nl +blk.24.attn_q.weight iq4_nl +blk.24.attn_v.weight q5_0 +blk.24.ffn_down_exps.weight iq4_nl +blk.24.ffn_gate_exps.weight iq4_nl +blk.24.ffn_up_exps.weight iq4_nl +blk.25.attn_k.weight iq4_nl +blk.25.attn_q.weight iq4_nl +blk.25.attn_v.weight q5_0 +blk.25.ffn_down_exps.weight iq4_nl +blk.25.ffn_gate_exps.weight iq4_nl +blk.25.ffn_up_exps.weight iq4_nl +blk.26.attn_k.weight iq4_nl +blk.26.attn_q.weight iq4_nl +blk.26.attn_v.weight q5_0 +blk.26.ffn_down_exps.weight iq4_nl +blk.26.ffn_gate_exps.weight iq4_nl +blk.26.ffn_up_exps.weight iq4_nl +blk.27.attn_k.weight iq4_nl +blk.27.attn_q.weight iq4_nl +blk.27.attn_v.weight q5_0 +blk.27.ffn_down_exps.weight iq4_nl +blk.27.ffn_gate_exps.weight iq4_nl +blk.27.ffn_up_exps.weight iq4_nl +blk.28.attn_k.weight iq4_nl +blk.28.attn_q.weight iq4_nl +blk.28.attn_v.weight q5_0 +blk.28.ffn_down_exps.weight iq4_nl +blk.28.ffn_gate_exps.weight iq4_nl +blk.28.ffn_up_exps.weight iq4_nl +blk.29.attn_k.weight iq4_nl +blk.29.attn_q.weight iq4_nl +blk.29.attn_v.weight q5_0 +blk.29.ffn_down_exps.weight iq4_nl +blk.29.ffn_gate_exps.weight iq4_nl +blk.29.ffn_up_exps.weight iq4_nl +blk.30.attn_k.weight iq4_nl +blk.30.attn_q.weight iq4_nl +blk.30.attn_v.weight q5_0 +blk.30.ffn_down_exps.weight iq4_nl +blk.30.ffn_gate_exps.weight iq4_nl +blk.30.ffn_up_exps.weight iq4_nl +blk.31.attn_k.weight iq4_nl +blk.31.attn_q.weight iq4_nl +blk.31.attn_v.weight q5_0 +blk.31.ffn_down_exps.weight iq4_nl +blk.31.ffn_gate_exps.weight iq4_nl +blk.31.ffn_up_exps.weight iq4_nl +blk.32.attn_k.weight iq4_nl +blk.32.attn_q.weight iq4_nl +blk.32.attn_v.weight q5_0 +blk.32.ffn_down_exps.weight iq4_nl +blk.32.ffn_gate_exps.weight iq4_nl +blk.32.ffn_up_exps.weight iq4_nl +blk.33.attn_k.weight iq4_nl +blk.33.attn_q.weight iq4_nl +blk.33.attn_v.weight q5_0 +blk.33.ffn_down_exps.weight iq4_nl +blk.33.ffn_gate_exps.weight iq4_nl +blk.33.ffn_up_exps.weight iq4_nl +blk.34.attn_k.weight iq4_nl +blk.34.attn_q.weight iq4_nl +blk.34.attn_v.weight q5_0 +blk.34.ffn_down_exps.weight iq4_nl +blk.34.ffn_gate_exps.weight iq4_nl +blk.34.ffn_up_exps.weight iq4_nl +blk.35.attn_k.weight iq4_nl +blk.35.attn_q.weight iq4_nl +blk.35.attn_v.weight q5_0 +blk.35.ffn_down_exps.weight iq4_nl +blk.35.ffn_gate_exps.weight iq4_nl +blk.35.ffn_up_exps.weight iq4_nl [Q2_K_S] q2_K output.weight q8_0 -blk.0.attn_v.weight q4_K -blk.0.ffn_down_exps.weight q4_K -blk.1.attn_v.weight q4_K -blk.1.ffn_down_exps.weight q4_K -blk.2.attn_v.weight q4_K -blk.2.ffn_down_exps.weight q4_K -blk.3.attn_v.weight q4_K -blk.3.ffn_down_exps.weight q4_K -blk.4.attn_v.weight q4_K -blk.5.attn_v.weight q4_K -blk.6.attn_v.weight q4_K -blk.7.attn_v.weight q4_K -blk.8.attn_v.weight q4_K -blk.9.attn_v.weight q4_K -blk.10.attn_v.weight q4_K -blk.11.attn_v.weight q4_K -blk.12.attn_v.weight q4_K -blk.13.attn_v.weight q4_K -blk.14.attn_v.weight q4_K -blk.15.attn_v.weight q4_K -blk.16.attn_v.weight q4_K -blk.17.attn_v.weight q4_K -blk.18.attn_v.weight q4_K -blk.19.attn_v.weight q4_K -blk.20.attn_v.weight q4_K -blk.21.attn_v.weight q4_K -blk.22.attn_v.weight q4_K -blk.23.attn_v.weight q4_K -blk.24.attn_v.weight q4_K -blk.25.attn_v.weight q4_K -blk.26.attn_v.weight q4_K -blk.27.attn_v.weight q4_K -blk.28.attn_v.weight q4_K -blk.29.attn_v.weight q4_K -blk.30.attn_v.weight q4_K -blk.31.attn_v.weight q4_K -blk.32.attn_v.weight q4_K -blk.33.attn_v.weight q4_K -blk.34.attn_v.weight q4_K -blk.35.attn_v.weight q4_K +token_embd.weight q4_0 +blk.0.attn_k.weight q4_0 +blk.0.attn_q.weight q4_0 +blk.0.attn_v.weight q5_0 +blk.0.ffn_down_exps.weight q5_0 +blk.0.ffn_gate_exps.weight q4_0 +blk.0.ffn_up_exps.weight q4_0 +blk.1.attn_k.weight q4_0 +blk.1.attn_q.weight q4_0 +blk.1.attn_v.weight q5_0 +blk.1.ffn_down_exps.weight q5_0 +blk.1.ffn_gate_exps.weight q4_0 +blk.1.ffn_up_exps.weight q4_0 +blk.2.attn_k.weight q4_0 +blk.2.attn_q.weight q4_0 +blk.2.attn_v.weight q5_0 +blk.2.ffn_down_exps.weight q5_0 +blk.2.ffn_gate_exps.weight q4_0 +blk.2.ffn_up_exps.weight q4_0 +blk.3.attn_k.weight q4_0 +blk.3.attn_q.weight q4_0 +blk.3.attn_v.weight q5_0 +blk.3.ffn_down_exps.weight q5_0 +blk.3.ffn_gate_exps.weight q4_0 +blk.3.ffn_up_exps.weight q4_0 +blk.4.attn_k.weight q4_0 +blk.4.attn_q.weight q4_0 +blk.4.attn_v.weight q5_0 +blk.4.ffn_down_exps.weight q4_0 +blk.4.ffn_gate_exps.weight q4_0 +blk.4.ffn_up_exps.weight q4_0 +blk.5.attn_k.weight q4_0 +blk.5.attn_q.weight q4_0 +blk.5.attn_v.weight q5_0 +blk.5.ffn_down_exps.weight q4_0 +blk.5.ffn_gate_exps.weight q4_0 +blk.5.ffn_up_exps.weight q4_0 +blk.6.attn_k.weight q4_0 +blk.6.attn_q.weight q4_0 +blk.6.attn_v.weight q5_0 +blk.6.ffn_down_exps.weight q4_0 +blk.6.ffn_gate_exps.weight q4_0 +blk.6.ffn_up_exps.weight q4_0 +blk.7.attn_k.weight q4_0 +blk.7.attn_q.weight q4_0 +blk.7.attn_v.weight q5_0 +blk.7.ffn_down_exps.weight q4_0 +blk.7.ffn_gate_exps.weight q4_0 +blk.7.ffn_up_exps.weight q4_0 +blk.8.attn_k.weight q4_0 +blk.8.attn_q.weight q4_0 +blk.8.attn_v.weight q5_0 +blk.8.ffn_down_exps.weight q4_0 +blk.8.ffn_gate_exps.weight q4_0 +blk.8.ffn_up_exps.weight q4_0 +blk.9.attn_k.weight q4_0 +blk.9.attn_q.weight q4_0 +blk.9.attn_v.weight q5_0 +blk.9.ffn_down_exps.weight q4_0 +blk.9.ffn_gate_exps.weight q4_0 +blk.9.ffn_up_exps.weight q4_0 +blk.10.attn_k.weight q4_0 +blk.10.attn_q.weight q4_0 +blk.10.attn_v.weight q5_0 +blk.10.ffn_down_exps.weight q4_0 +blk.10.ffn_gate_exps.weight q4_0 +blk.10.ffn_up_exps.weight q4_0 +blk.11.attn_k.weight q4_0 +blk.11.attn_q.weight q4_0 +blk.11.attn_v.weight q5_0 +blk.11.ffn_down_exps.weight q4_0 +blk.11.ffn_gate_exps.weight q4_0 +blk.11.ffn_up_exps.weight q4_0 +blk.12.attn_k.weight q4_0 +blk.12.attn_q.weight q4_0 +blk.12.attn_v.weight q5_0 +blk.12.ffn_down_exps.weight q4_0 +blk.12.ffn_gate_exps.weight q4_0 +blk.12.ffn_up_exps.weight q4_0 +blk.13.attn_k.weight q4_0 +blk.13.attn_q.weight q4_0 +blk.13.attn_v.weight q5_0 +blk.13.ffn_down_exps.weight q4_0 +blk.13.ffn_gate_exps.weight q4_0 +blk.13.ffn_up_exps.weight q4_0 +blk.14.attn_k.weight q4_0 +blk.14.attn_q.weight q4_0 +blk.14.attn_v.weight q5_0 +blk.14.ffn_down_exps.weight q4_0 +blk.14.ffn_gate_exps.weight q4_0 +blk.14.ffn_up_exps.weight q4_0 +blk.15.attn_k.weight q4_0 +blk.15.attn_q.weight q4_0 +blk.15.attn_v.weight q5_0 +blk.15.ffn_down_exps.weight q4_0 +blk.15.ffn_gate_exps.weight q4_0 +blk.15.ffn_up_exps.weight q4_0 +blk.16.attn_k.weight q4_0 +blk.16.attn_q.weight q4_0 +blk.16.attn_v.weight q5_0 +blk.16.ffn_down_exps.weight q4_0 +blk.16.ffn_gate_exps.weight q4_0 +blk.16.ffn_up_exps.weight q4_0 +blk.17.attn_k.weight q4_0 +blk.17.attn_q.weight q4_0 +blk.17.attn_v.weight q5_0 +blk.17.ffn_down_exps.weight q4_0 +blk.17.ffn_gate_exps.weight q4_0 +blk.17.ffn_up_exps.weight q4_0 +blk.18.attn_k.weight q4_0 +blk.18.attn_q.weight q4_0 +blk.18.attn_v.weight q5_0 +blk.18.ffn_down_exps.weight q4_0 +blk.18.ffn_gate_exps.weight q4_0 +blk.18.ffn_up_exps.weight q4_0 +blk.19.attn_k.weight q4_0 +blk.19.attn_q.weight q4_0 +blk.19.attn_v.weight q5_0 +blk.19.ffn_down_exps.weight q4_0 +blk.19.ffn_gate_exps.weight q4_0 +blk.19.ffn_up_exps.weight q4_0 +blk.20.attn_k.weight q4_0 +blk.20.attn_q.weight q4_0 +blk.20.attn_v.weight q5_0 +blk.20.ffn_down_exps.weight q4_0 +blk.20.ffn_gate_exps.weight q4_0 +blk.20.ffn_up_exps.weight q4_0 +blk.21.attn_k.weight q4_0 +blk.21.attn_q.weight q4_0 +blk.21.attn_v.weight q5_0 +blk.21.ffn_down_exps.weight q4_0 +blk.21.ffn_gate_exps.weight q4_0 +blk.21.ffn_up_exps.weight q4_0 +blk.22.attn_k.weight q4_0 +blk.22.attn_q.weight q4_0 +blk.22.attn_v.weight q5_0 +blk.22.ffn_down_exps.weight q4_0 +blk.22.ffn_gate_exps.weight q4_0 +blk.22.ffn_up_exps.weight q4_0 +blk.23.attn_k.weight q4_0 +blk.23.attn_q.weight q4_0 +blk.23.attn_v.weight q5_0 +blk.23.ffn_down_exps.weight q4_0 +blk.23.ffn_gate_exps.weight q4_0 +blk.23.ffn_up_exps.weight q4_0 +blk.24.attn_k.weight q4_0 +blk.24.attn_q.weight q4_0 +blk.24.attn_v.weight q5_0 +blk.24.ffn_down_exps.weight q4_0 +blk.24.ffn_gate_exps.weight q4_0 +blk.24.ffn_up_exps.weight q4_0 +blk.25.attn_k.weight q4_0 +blk.25.attn_q.weight q4_0 +blk.25.attn_v.weight q5_0 +blk.25.ffn_down_exps.weight q4_0 +blk.25.ffn_gate_exps.weight q4_0 +blk.25.ffn_up_exps.weight q4_0 +blk.26.attn_k.weight q4_0 +blk.26.attn_q.weight q4_0 +blk.26.attn_v.weight q5_0 +blk.26.ffn_down_exps.weight q4_0 +blk.26.ffn_gate_exps.weight q4_0 +blk.26.ffn_up_exps.weight q4_0 +blk.27.attn_k.weight q4_0 +blk.27.attn_q.weight q4_0 +blk.27.attn_v.weight q5_0 +blk.27.ffn_down_exps.weight q4_0 +blk.27.ffn_gate_exps.weight q4_0 +blk.27.ffn_up_exps.weight q4_0 +blk.28.attn_k.weight q4_0 +blk.28.attn_q.weight q4_0 +blk.28.attn_v.weight q5_0 +blk.28.ffn_down_exps.weight q4_0 +blk.28.ffn_gate_exps.weight q4_0 +blk.28.ffn_up_exps.weight q4_0 +blk.29.attn_k.weight q4_0 +blk.29.attn_q.weight q4_0 +blk.29.attn_v.weight q5_0 +blk.29.ffn_down_exps.weight q4_0 +blk.29.ffn_gate_exps.weight q4_0 +blk.29.ffn_up_exps.weight q4_0 +blk.30.attn_k.weight q4_0 +blk.30.attn_q.weight q4_0 +blk.30.attn_v.weight q5_0 +blk.30.ffn_down_exps.weight q4_0 +blk.30.ffn_gate_exps.weight q4_0 +blk.30.ffn_up_exps.weight q4_0 +blk.31.attn_k.weight q4_0 +blk.31.attn_q.weight q4_0 +blk.31.attn_v.weight q5_0 +blk.31.ffn_down_exps.weight q4_0 +blk.31.ffn_gate_exps.weight q4_0 +blk.31.ffn_up_exps.weight q4_0 +blk.32.attn_k.weight q4_0 +blk.32.attn_q.weight q4_0 +blk.32.attn_v.weight q5_0 +blk.32.ffn_down_exps.weight q4_0 +blk.32.ffn_gate_exps.weight q4_0 +blk.32.ffn_up_exps.weight q4_0 +blk.33.attn_k.weight q4_0 +blk.33.attn_q.weight q4_0 +blk.33.attn_v.weight q5_0 +blk.33.ffn_down_exps.weight q4_0 +blk.33.ffn_gate_exps.weight q4_0 +blk.33.ffn_up_exps.weight q4_0 +blk.34.attn_k.weight q4_0 +blk.34.attn_q.weight q4_0 +blk.34.attn_v.weight q5_0 +blk.34.ffn_down_exps.weight q4_0 +blk.34.ffn_gate_exps.weight q4_0 +blk.34.ffn_up_exps.weight q4_0 +blk.35.attn_k.weight q4_0 +blk.35.attn_q.weight q4_0 +blk.35.attn_v.weight q5_0 +blk.35.ffn_down_exps.weight q4_0 +blk.35.ffn_gate_exps.weight q4_0 +blk.35.ffn_up_exps.weight q4_0 [IQ3_XS] iq3_s output.weight q8_0 -blk.0.attn_k.weight iq3_xxs -blk.0.attn_q.weight iq3_xxs -blk.0.attn_v.weight q4_K -blk.1.attn_k.weight iq3_xxs -blk.1.attn_q.weight iq3_xxs -blk.1.attn_v.weight q4_K -blk.2.attn_k.weight iq3_xxs -blk.2.attn_q.weight iq3_xxs -blk.2.attn_v.weight q4_K -blk.3.attn_k.weight iq3_xxs -blk.3.attn_q.weight iq3_xxs -blk.3.attn_v.weight q4_K -blk.4.attn_k.weight iq3_xxs -blk.4.attn_q.weight iq3_xxs -blk.4.attn_v.weight q4_K -blk.4.ffn_gate_exps.weight iq3_xxs -blk.4.ffn_up_exps.weight iq3_xxs -blk.5.attn_k.weight iq3_xxs -blk.5.attn_q.weight iq3_xxs -blk.5.attn_v.weight q4_K -blk.5.ffn_gate_exps.weight iq3_xxs -blk.5.ffn_up_exps.weight iq3_xxs -blk.6.attn_k.weight iq3_xxs -blk.6.attn_q.weight iq3_xxs -blk.6.attn_v.weight q4_K -blk.6.ffn_gate_exps.weight iq3_xxs -blk.6.ffn_up_exps.weight iq3_xxs -blk.7.attn_k.weight iq3_xxs -blk.7.attn_q.weight iq3_xxs -blk.7.attn_v.weight q4_K -blk.7.ffn_gate_exps.weight iq3_xxs -blk.7.ffn_up_exps.weight iq3_xxs -blk.8.attn_k.weight iq3_xxs -blk.8.attn_q.weight iq3_xxs -blk.8.attn_v.weight q4_K -blk.8.ffn_gate_exps.weight iq3_xxs -blk.8.ffn_up_exps.weight iq3_xxs -blk.9.attn_k.weight iq3_xxs -blk.9.attn_q.weight iq3_xxs -blk.9.attn_v.weight q4_K -blk.9.ffn_gate_exps.weight iq3_xxs -blk.9.ffn_up_exps.weight iq3_xxs -blk.10.attn_k.weight iq3_xxs -blk.10.attn_q.weight iq3_xxs -blk.10.attn_v.weight q4_K -blk.10.ffn_gate_exps.weight iq3_xxs -blk.10.ffn_up_exps.weight iq3_xxs -blk.11.attn_k.weight iq3_xxs -blk.11.attn_q.weight iq3_xxs -blk.11.attn_v.weight q4_K -blk.11.ffn_gate_exps.weight iq3_xxs -blk.11.ffn_up_exps.weight iq3_xxs -blk.12.attn_k.weight iq3_xxs -blk.12.attn_q.weight iq3_xxs -blk.12.attn_v.weight q4_K -blk.12.ffn_gate_exps.weight iq3_xxs -blk.12.ffn_up_exps.weight iq3_xxs -blk.13.attn_k.weight iq3_xxs -blk.13.attn_q.weight iq3_xxs -blk.13.attn_v.weight q4_K -blk.13.ffn_gate_exps.weight iq3_xxs -blk.13.ffn_up_exps.weight iq3_xxs -blk.14.attn_k.weight iq3_xxs -blk.14.attn_q.weight iq3_xxs -blk.14.attn_v.weight q4_K -blk.14.ffn_gate_exps.weight iq3_xxs -blk.14.ffn_up_exps.weight iq3_xxs -blk.15.attn_k.weight iq3_xxs -blk.15.attn_q.weight iq3_xxs -blk.15.attn_v.weight q4_K -blk.15.ffn_gate_exps.weight iq3_xxs -blk.15.ffn_up_exps.weight iq3_xxs -blk.16.attn_k.weight iq3_xxs -blk.16.attn_q.weight iq3_xxs -blk.16.attn_v.weight q4_K -blk.16.ffn_gate_exps.weight iq3_xxs -blk.16.ffn_up_exps.weight iq3_xxs -blk.17.attn_k.weight iq3_xxs -blk.17.attn_q.weight iq3_xxs -blk.17.attn_v.weight q4_K -blk.17.ffn_gate_exps.weight iq3_xxs -blk.17.ffn_up_exps.weight iq3_xxs -blk.18.attn_k.weight iq3_xxs -blk.18.attn_q.weight iq3_xxs -blk.18.attn_v.weight q4_K -blk.18.ffn_gate_exps.weight iq3_xxs -blk.18.ffn_up_exps.weight iq3_xxs -blk.19.attn_k.weight iq3_xxs -blk.19.attn_q.weight iq3_xxs -blk.19.attn_v.weight q4_K -blk.19.ffn_gate_exps.weight iq3_xxs -blk.19.ffn_up_exps.weight iq3_xxs -blk.20.attn_k.weight iq3_xxs -blk.20.attn_q.weight iq3_xxs -blk.20.attn_v.weight q4_K -blk.20.ffn_gate_exps.weight iq3_xxs -blk.20.ffn_up_exps.weight iq3_xxs -blk.21.attn_k.weight iq3_xxs -blk.21.attn_q.weight iq3_xxs -blk.21.attn_v.weight q4_K -blk.21.ffn_gate_exps.weight iq3_xxs -blk.21.ffn_up_exps.weight iq3_xxs -blk.22.attn_k.weight iq3_xxs -blk.22.attn_q.weight iq3_xxs -blk.22.attn_v.weight q4_K -blk.22.ffn_gate_exps.weight iq3_xxs -blk.22.ffn_up_exps.weight iq3_xxs -blk.23.attn_k.weight iq3_xxs -blk.23.attn_q.weight iq3_xxs -blk.23.attn_v.weight q4_K -blk.23.ffn_gate_exps.weight iq3_xxs -blk.23.ffn_up_exps.weight iq3_xxs -blk.24.attn_k.weight iq3_xxs -blk.24.attn_q.weight iq3_xxs -blk.24.attn_v.weight q4_K -blk.24.ffn_gate_exps.weight iq3_xxs -blk.24.ffn_up_exps.weight iq3_xxs -blk.25.attn_k.weight iq3_xxs -blk.25.attn_q.weight iq3_xxs -blk.25.attn_v.weight q4_K -blk.25.ffn_gate_exps.weight iq3_xxs -blk.25.ffn_up_exps.weight iq3_xxs -blk.26.attn_k.weight iq3_xxs -blk.26.attn_q.weight iq3_xxs -blk.26.attn_v.weight q4_K -blk.26.ffn_gate_exps.weight iq3_xxs -blk.26.ffn_up_exps.weight iq3_xxs -blk.27.attn_k.weight iq3_xxs -blk.27.attn_q.weight iq3_xxs -blk.27.attn_v.weight q4_K -blk.27.ffn_gate_exps.weight iq3_xxs -blk.27.ffn_up_exps.weight iq3_xxs -blk.28.attn_k.weight iq3_xxs -blk.28.attn_q.weight iq3_xxs -blk.28.attn_v.weight q4_K -blk.28.ffn_gate_exps.weight iq3_xxs -blk.28.ffn_up_exps.weight iq3_xxs -blk.29.attn_k.weight iq3_xxs -blk.29.attn_q.weight iq3_xxs -blk.29.attn_v.weight q4_K -blk.29.ffn_gate_exps.weight iq3_xxs -blk.29.ffn_up_exps.weight iq3_xxs -blk.30.attn_k.weight iq3_xxs -blk.30.attn_q.weight iq3_xxs -blk.30.attn_v.weight q4_K -blk.30.ffn_gate_exps.weight iq3_xxs -blk.30.ffn_up_exps.weight iq3_xxs -blk.31.attn_k.weight iq3_xxs -blk.31.attn_q.weight iq3_xxs -blk.31.attn_v.weight q4_K -blk.32.attn_k.weight iq3_xxs -blk.32.attn_q.weight iq3_xxs -blk.32.attn_v.weight q4_K -blk.33.attn_k.weight iq3_xxs -blk.33.attn_q.weight iq3_xxs -blk.33.attn_v.weight q4_K -blk.34.attn_k.weight iq3_xxs -blk.34.attn_q.weight iq3_xxs -blk.34.attn_v.weight q4_K -blk.35.attn_k.weight iq3_xxs -blk.35.attn_q.weight iq3_xxs -blk.35.attn_v.weight q4_K +token_embd.weight iq4_nl +blk.0.attn_k.weight iq4_nl +blk.0.attn_q.weight iq4_nl +blk.0.attn_v.weight q5_0 +blk.0.ffn_down_exps.weight iq4_nl +blk.0.ffn_gate_exps.weight iq4_nl +blk.0.ffn_up_exps.weight iq4_nl +blk.1.attn_k.weight iq4_nl +blk.1.attn_q.weight iq4_nl +blk.1.attn_v.weight q5_0 +blk.1.ffn_down_exps.weight iq4_nl +blk.1.ffn_gate_exps.weight iq4_nl +blk.1.ffn_up_exps.weight iq4_nl +blk.2.attn_k.weight iq4_nl +blk.2.attn_q.weight iq4_nl +blk.2.attn_v.weight q5_0 +blk.2.ffn_down_exps.weight iq4_nl +blk.2.ffn_gate_exps.weight iq4_nl +blk.2.ffn_up_exps.weight iq4_nl +blk.3.attn_k.weight iq4_nl +blk.3.attn_q.weight iq4_nl +blk.3.attn_v.weight q5_0 +blk.3.ffn_down_exps.weight iq4_nl +blk.3.ffn_gate_exps.weight iq4_nl +blk.3.ffn_up_exps.weight iq4_nl +blk.4.attn_k.weight iq4_nl +blk.4.attn_q.weight iq4_nl +blk.4.attn_v.weight q5_0 +blk.4.ffn_down_exps.weight iq4_nl +blk.4.ffn_gate_exps.weight iq4_nl +blk.4.ffn_up_exps.weight iq4_nl +blk.5.attn_k.weight iq4_nl +blk.5.attn_q.weight iq4_nl +blk.5.attn_v.weight q5_0 +blk.5.ffn_down_exps.weight iq4_nl +blk.5.ffn_gate_exps.weight iq4_nl +blk.5.ffn_up_exps.weight iq4_nl +blk.6.attn_k.weight iq4_nl +blk.6.attn_q.weight iq4_nl +blk.6.attn_v.weight q5_0 +blk.6.ffn_down_exps.weight iq4_nl +blk.6.ffn_gate_exps.weight iq4_nl +blk.6.ffn_up_exps.weight iq4_nl +blk.7.attn_k.weight iq4_nl +blk.7.attn_q.weight iq4_nl +blk.7.attn_v.weight q5_0 +blk.7.ffn_down_exps.weight iq4_nl +blk.7.ffn_gate_exps.weight iq4_nl +blk.7.ffn_up_exps.weight iq4_nl +blk.8.attn_k.weight iq4_nl +blk.8.attn_q.weight iq4_nl +blk.8.attn_v.weight q5_0 +blk.8.ffn_down_exps.weight iq4_nl +blk.8.ffn_gate_exps.weight iq4_nl +blk.8.ffn_up_exps.weight iq4_nl +blk.9.attn_k.weight iq4_nl +blk.9.attn_q.weight iq4_nl +blk.9.attn_v.weight q5_0 +blk.9.ffn_down_exps.weight iq4_nl +blk.9.ffn_gate_exps.weight iq4_nl +blk.9.ffn_up_exps.weight iq4_nl +blk.10.attn_k.weight iq4_nl +blk.10.attn_q.weight iq4_nl +blk.10.attn_v.weight q5_0 +blk.10.ffn_down_exps.weight iq4_nl +blk.10.ffn_gate_exps.weight iq4_nl +blk.10.ffn_up_exps.weight iq4_nl +blk.11.attn_k.weight iq4_nl +blk.11.attn_q.weight iq4_nl +blk.11.attn_v.weight q5_0 +blk.11.ffn_down_exps.weight iq4_nl +blk.11.ffn_gate_exps.weight iq4_nl +blk.11.ffn_up_exps.weight iq4_nl +blk.12.attn_k.weight iq4_nl +blk.12.attn_q.weight iq4_nl +blk.12.attn_v.weight q5_0 +blk.12.ffn_down_exps.weight iq4_nl +blk.12.ffn_gate_exps.weight iq4_nl +blk.12.ffn_up_exps.weight iq4_nl +blk.13.attn_k.weight iq4_nl +blk.13.attn_q.weight iq4_nl +blk.13.attn_v.weight q5_0 +blk.13.ffn_down_exps.weight iq4_nl +blk.13.ffn_gate_exps.weight iq4_nl +blk.13.ffn_up_exps.weight iq4_nl +blk.14.attn_k.weight iq4_nl +blk.14.attn_q.weight iq4_nl +blk.14.attn_v.weight q5_0 +blk.14.ffn_down_exps.weight iq4_nl +blk.14.ffn_gate_exps.weight iq4_nl +blk.14.ffn_up_exps.weight iq4_nl +blk.15.attn_k.weight iq4_nl +blk.15.attn_q.weight iq4_nl +blk.15.attn_v.weight q5_0 +blk.15.ffn_down_exps.weight iq4_nl +blk.15.ffn_gate_exps.weight iq4_nl +blk.15.ffn_up_exps.weight iq4_nl +blk.16.attn_k.weight iq4_nl +blk.16.attn_q.weight iq4_nl +blk.16.attn_v.weight q5_0 +blk.16.ffn_down_exps.weight iq4_nl +blk.16.ffn_gate_exps.weight iq4_nl +blk.16.ffn_up_exps.weight iq4_nl +blk.17.attn_k.weight iq4_nl +blk.17.attn_q.weight iq4_nl +blk.17.attn_v.weight q5_0 +blk.17.ffn_down_exps.weight iq4_nl +blk.17.ffn_gate_exps.weight iq4_nl +blk.17.ffn_up_exps.weight iq4_nl +blk.18.attn_k.weight iq4_nl +blk.18.attn_q.weight iq4_nl +blk.18.attn_v.weight q5_0 +blk.18.ffn_down_exps.weight iq4_nl +blk.18.ffn_gate_exps.weight iq4_nl +blk.18.ffn_up_exps.weight iq4_nl +blk.19.attn_k.weight iq4_nl +blk.19.attn_q.weight iq4_nl +blk.19.attn_v.weight q5_0 +blk.19.ffn_down_exps.weight iq4_nl +blk.19.ffn_gate_exps.weight iq4_nl +blk.19.ffn_up_exps.weight iq4_nl +blk.20.attn_k.weight iq4_nl +blk.20.attn_q.weight iq4_nl +blk.20.attn_v.weight q5_0 +blk.20.ffn_down_exps.weight iq4_nl +blk.20.ffn_gate_exps.weight iq4_nl +blk.20.ffn_up_exps.weight iq4_nl +blk.21.attn_k.weight iq4_nl +blk.21.attn_q.weight iq4_nl +blk.21.attn_v.weight q5_0 +blk.21.ffn_down_exps.weight iq4_nl +blk.21.ffn_gate_exps.weight iq4_nl +blk.21.ffn_up_exps.weight iq4_nl +blk.22.attn_k.weight iq4_nl +blk.22.attn_q.weight iq4_nl +blk.22.attn_v.weight q5_0 +blk.22.ffn_down_exps.weight iq4_nl +blk.22.ffn_gate_exps.weight iq4_nl +blk.22.ffn_up_exps.weight iq4_nl +blk.23.attn_k.weight iq4_nl +blk.23.attn_q.weight iq4_nl +blk.23.attn_v.weight q5_0 +blk.23.ffn_down_exps.weight iq4_nl +blk.23.ffn_gate_exps.weight iq4_nl +blk.23.ffn_up_exps.weight iq4_nl +blk.24.attn_k.weight iq4_nl +blk.24.attn_q.weight iq4_nl +blk.24.attn_v.weight q5_0 +blk.24.ffn_down_exps.weight iq4_nl +blk.24.ffn_gate_exps.weight iq4_nl +blk.24.ffn_up_exps.weight iq4_nl +blk.25.attn_k.weight iq4_nl +blk.25.attn_q.weight iq4_nl +blk.25.attn_v.weight q5_0 +blk.25.ffn_down_exps.weight iq4_nl +blk.25.ffn_gate_exps.weight iq4_nl +blk.25.ffn_up_exps.weight iq4_nl +blk.26.attn_k.weight iq4_nl +blk.26.attn_q.weight iq4_nl +blk.26.attn_v.weight q5_0 +blk.26.ffn_down_exps.weight iq4_nl +blk.26.ffn_gate_exps.weight iq4_nl +blk.26.ffn_up_exps.weight iq4_nl +blk.27.attn_k.weight iq4_nl +blk.27.attn_q.weight iq4_nl +blk.27.attn_v.weight q5_0 +blk.27.ffn_down_exps.weight iq4_nl +blk.27.ffn_gate_exps.weight iq4_nl +blk.27.ffn_up_exps.weight iq4_nl +blk.28.attn_k.weight iq4_nl +blk.28.attn_q.weight iq4_nl +blk.28.attn_v.weight q5_0 +blk.28.ffn_down_exps.weight iq4_nl +blk.28.ffn_gate_exps.weight iq4_nl +blk.28.ffn_up_exps.weight iq4_nl +blk.29.attn_k.weight iq4_nl +blk.29.attn_q.weight iq4_nl +blk.29.attn_v.weight q5_0 +blk.29.ffn_down_exps.weight iq4_nl +blk.29.ffn_gate_exps.weight iq4_nl +blk.29.ffn_up_exps.weight iq4_nl +blk.30.attn_k.weight iq4_nl +blk.30.attn_q.weight iq4_nl +blk.30.attn_v.weight q5_0 +blk.30.ffn_down_exps.weight iq4_nl +blk.30.ffn_gate_exps.weight iq4_nl +blk.30.ffn_up_exps.weight iq4_nl +blk.31.attn_k.weight iq4_nl +blk.31.attn_q.weight iq4_nl +blk.31.attn_v.weight q5_0 +blk.31.ffn_down_exps.weight iq4_nl +blk.31.ffn_gate_exps.weight iq4_nl +blk.31.ffn_up_exps.weight iq4_nl +blk.32.attn_k.weight iq4_nl +blk.32.attn_q.weight iq4_nl +blk.32.attn_v.weight q5_0 +blk.32.ffn_down_exps.weight iq4_nl +blk.32.ffn_gate_exps.weight iq4_nl +blk.32.ffn_up_exps.weight iq4_nl +blk.33.attn_k.weight iq4_nl +blk.33.attn_q.weight iq4_nl +blk.33.attn_v.weight q5_0 +blk.33.ffn_down_exps.weight iq4_nl +blk.33.ffn_gate_exps.weight iq4_nl +blk.33.ffn_up_exps.weight iq4_nl +blk.34.attn_k.weight iq4_nl +blk.34.attn_q.weight iq4_nl +blk.34.attn_v.weight q5_0 +blk.34.ffn_down_exps.weight iq4_nl +blk.34.ffn_gate_exps.weight iq4_nl +blk.34.ffn_up_exps.weight iq4_nl +blk.35.attn_k.weight iq4_nl +blk.35.attn_q.weight iq4_nl +blk.35.attn_v.weight q5_0 +blk.35.ffn_down_exps.weight iq4_nl +blk.35.ffn_gate_exps.weight iq4_nl +blk.35.ffn_up_exps.weight iq4_nl [IQ3_XXS] iq3_xxs output.weight q8_0 -token_embd.weight iq3_s -blk.0.attn_k.weight iq2_s +token_embd.weight iq4_nl +blk.0.attn_k.weight iq4_nl blk.0.attn_output.weight iq3_s -blk.0.attn_q.weight iq2_s -blk.0.attn_v.weight q4_K -blk.0.ffn_down_exps.weight q4_K -blk.1.attn_k.weight iq2_s +blk.0.attn_q.weight iq4_nl +blk.0.attn_v.weight q5_0 +blk.0.ffn_down_exps.weight q5_0 +blk.0.ffn_gate_exps.weight iq4_nl +blk.0.ffn_up_exps.weight iq4_nl +blk.1.attn_k.weight iq4_nl blk.1.attn_output.weight iq3_s -blk.1.attn_q.weight iq2_s -blk.1.attn_v.weight q4_K -blk.1.ffn_down_exps.weight q4_K -blk.2.attn_k.weight iq2_s +blk.1.attn_q.weight iq4_nl +blk.1.attn_v.weight q5_0 +blk.1.ffn_down_exps.weight q5_0 +blk.1.ffn_gate_exps.weight iq4_nl +blk.1.ffn_up_exps.weight iq4_nl +blk.2.attn_k.weight iq4_nl blk.2.attn_output.weight iq3_s -blk.2.attn_q.weight iq2_s -blk.2.attn_v.weight q4_K -blk.2.ffn_down_exps.weight q4_K -blk.3.attn_k.weight iq2_s +blk.2.attn_q.weight iq4_nl +blk.2.attn_v.weight q5_0 +blk.2.ffn_down_exps.weight q5_0 +blk.2.ffn_gate_exps.weight iq4_nl +blk.2.ffn_up_exps.weight iq4_nl +blk.3.attn_k.weight iq4_nl blk.3.attn_output.weight iq3_s -blk.3.attn_q.weight iq2_s -blk.3.attn_v.weight q4_K -blk.3.ffn_down_exps.weight q4_K -blk.4.attn_k.weight iq2_s +blk.3.attn_q.weight iq4_nl +blk.3.attn_v.weight q5_0 +blk.3.ffn_down_exps.weight q5_0 +blk.3.ffn_gate_exps.weight iq4_nl +blk.3.ffn_up_exps.weight iq4_nl +blk.4.attn_k.weight iq4_nl blk.4.attn_output.weight iq3_s -blk.4.attn_q.weight iq2_s -blk.4.attn_v.weight q4_K -blk.4.ffn_down_exps.weight q3_K -blk.5.attn_k.weight iq2_s +blk.4.attn_q.weight iq4_nl +blk.4.attn_v.weight q5_0 +blk.4.ffn_down_exps.weight q4_0 +blk.4.ffn_gate_exps.weight iq4_nl +blk.4.ffn_up_exps.weight iq4_nl +blk.5.attn_k.weight iq4_nl blk.5.attn_output.weight iq3_s -blk.5.attn_q.weight iq2_s -blk.5.attn_v.weight q4_K -blk.5.ffn_down_exps.weight q3_K -blk.6.attn_k.weight iq2_s +blk.5.attn_q.weight iq4_nl +blk.5.attn_v.weight q5_0 +blk.5.ffn_down_exps.weight q4_0 +blk.5.ffn_gate_exps.weight iq4_nl +blk.5.ffn_up_exps.weight iq4_nl +blk.6.attn_k.weight iq4_nl blk.6.attn_output.weight iq3_s -blk.6.attn_q.weight iq2_s -blk.6.attn_v.weight q4_K -blk.6.ffn_down_exps.weight q3_K -blk.7.attn_k.weight iq2_s +blk.6.attn_q.weight iq4_nl +blk.6.attn_v.weight q5_0 +blk.6.ffn_down_exps.weight q4_0 +blk.6.ffn_gate_exps.weight iq4_nl +blk.6.ffn_up_exps.weight iq4_nl +blk.7.attn_k.weight iq4_nl blk.7.attn_output.weight iq3_s -blk.7.attn_q.weight iq2_s -blk.7.attn_v.weight q4_K -blk.7.ffn_down_exps.weight q3_K -blk.8.attn_k.weight iq2_s +blk.7.attn_q.weight iq4_nl +blk.7.attn_v.weight q5_0 +blk.7.ffn_down_exps.weight q4_0 +blk.7.ffn_gate_exps.weight iq4_nl +blk.7.ffn_up_exps.weight iq4_nl +blk.8.attn_k.weight iq4_nl blk.8.attn_output.weight iq3_s -blk.8.attn_q.weight iq2_s -blk.8.attn_v.weight q4_K -blk.8.ffn_down_exps.weight q3_K -blk.9.attn_k.weight iq2_s +blk.8.attn_q.weight iq4_nl +blk.8.attn_v.weight q5_0 +blk.8.ffn_down_exps.weight q4_0 +blk.8.ffn_gate_exps.weight iq4_nl +blk.8.ffn_up_exps.weight iq4_nl +blk.9.attn_k.weight iq4_nl blk.9.attn_output.weight iq3_s -blk.9.attn_q.weight iq2_s -blk.9.attn_v.weight q4_K -blk.9.ffn_down_exps.weight q3_K -blk.10.attn_k.weight iq2_s +blk.9.attn_q.weight iq4_nl +blk.9.attn_v.weight q5_0 +blk.9.ffn_down_exps.weight q4_0 +blk.9.ffn_gate_exps.weight iq4_nl +blk.9.ffn_up_exps.weight iq4_nl +blk.10.attn_k.weight iq4_nl blk.10.attn_output.weight iq3_s -blk.10.attn_q.weight iq2_s -blk.10.attn_v.weight q4_K -blk.10.ffn_down_exps.weight q3_K -blk.11.attn_k.weight iq2_s +blk.10.attn_q.weight iq4_nl +blk.10.attn_v.weight q5_0 +blk.10.ffn_down_exps.weight q4_0 +blk.10.ffn_gate_exps.weight iq4_nl +blk.10.ffn_up_exps.weight iq4_nl +blk.11.attn_k.weight iq4_nl blk.11.attn_output.weight iq3_s -blk.11.attn_q.weight iq2_s -blk.11.attn_v.weight q4_K -blk.11.ffn_down_exps.weight q3_K -blk.12.attn_k.weight iq2_s +blk.11.attn_q.weight iq4_nl +blk.11.attn_v.weight q5_0 +blk.11.ffn_down_exps.weight q4_0 +blk.11.ffn_gate_exps.weight iq4_nl +blk.11.ffn_up_exps.weight iq4_nl +blk.12.attn_k.weight iq4_nl blk.12.attn_output.weight iq3_s -blk.12.attn_q.weight iq2_s -blk.12.attn_v.weight q4_K -blk.12.ffn_down_exps.weight q3_K -blk.13.attn_k.weight iq2_s +blk.12.attn_q.weight iq4_nl +blk.12.attn_v.weight q5_0 +blk.12.ffn_down_exps.weight q4_0 +blk.12.ffn_gate_exps.weight iq4_nl +blk.12.ffn_up_exps.weight iq4_nl +blk.13.attn_k.weight iq4_nl blk.13.attn_output.weight iq3_s -blk.13.attn_q.weight iq2_s -blk.13.attn_v.weight q4_K -blk.13.ffn_down_exps.weight q3_K -blk.14.attn_k.weight iq2_s +blk.13.attn_q.weight iq4_nl +blk.13.attn_v.weight q5_0 +blk.13.ffn_down_exps.weight q4_0 +blk.13.ffn_gate_exps.weight iq4_nl +blk.13.ffn_up_exps.weight iq4_nl +blk.14.attn_k.weight iq4_nl blk.14.attn_output.weight iq3_s -blk.14.attn_q.weight iq2_s -blk.14.attn_v.weight q4_K -blk.14.ffn_down_exps.weight q3_K -blk.15.attn_k.weight iq2_s +blk.14.attn_q.weight iq4_nl +blk.14.attn_v.weight q5_0 +blk.14.ffn_down_exps.weight q4_0 +blk.14.ffn_gate_exps.weight iq4_nl +blk.14.ffn_up_exps.weight iq4_nl +blk.15.attn_k.weight iq4_nl blk.15.attn_output.weight iq3_s -blk.15.attn_q.weight iq2_s -blk.15.attn_v.weight q4_K -blk.15.ffn_down_exps.weight q3_K -blk.16.attn_k.weight iq2_s +blk.15.attn_q.weight iq4_nl +blk.15.attn_v.weight q5_0 +blk.15.ffn_down_exps.weight q4_0 +blk.15.ffn_gate_exps.weight iq4_nl +blk.15.ffn_up_exps.weight iq4_nl +blk.16.attn_k.weight iq4_nl blk.16.attn_output.weight iq3_s -blk.16.attn_q.weight iq2_s -blk.16.attn_v.weight q4_K -blk.16.ffn_down_exps.weight q3_K -blk.17.attn_k.weight iq2_s +blk.16.attn_q.weight iq4_nl +blk.16.attn_v.weight q5_0 +blk.16.ffn_down_exps.weight q4_0 +blk.16.ffn_gate_exps.weight iq4_nl +blk.16.ffn_up_exps.weight iq4_nl +blk.17.attn_k.weight iq4_nl blk.17.attn_output.weight iq3_s -blk.17.attn_q.weight iq2_s -blk.17.attn_v.weight q4_K -blk.17.ffn_down_exps.weight q3_K -blk.18.attn_k.weight iq2_s +blk.17.attn_q.weight iq4_nl +blk.17.attn_v.weight q5_0 +blk.17.ffn_down_exps.weight q4_0 +blk.17.ffn_gate_exps.weight iq4_nl +blk.17.ffn_up_exps.weight iq4_nl +blk.18.attn_k.weight iq4_nl blk.18.attn_output.weight iq3_s -blk.18.attn_q.weight iq2_s -blk.18.attn_v.weight q4_K -blk.18.ffn_down_exps.weight q3_K -blk.19.attn_k.weight iq2_s +blk.18.attn_q.weight iq4_nl +blk.18.attn_v.weight q5_0 +blk.18.ffn_down_exps.weight q4_0 +blk.18.ffn_gate_exps.weight iq4_nl +blk.18.ffn_up_exps.weight iq4_nl +blk.19.attn_k.weight iq4_nl blk.19.attn_output.weight iq3_s -blk.19.attn_q.weight iq2_s -blk.19.attn_v.weight q4_K -blk.19.ffn_down_exps.weight q3_K -blk.20.attn_k.weight iq2_s +blk.19.attn_q.weight iq4_nl +blk.19.attn_v.weight q5_0 +blk.19.ffn_down_exps.weight q4_0 +blk.19.ffn_gate_exps.weight iq4_nl +blk.19.ffn_up_exps.weight iq4_nl +blk.20.attn_k.weight iq4_nl blk.20.attn_output.weight iq3_s -blk.20.attn_q.weight iq2_s -blk.20.attn_v.weight q4_K -blk.20.ffn_down_exps.weight q3_K -blk.21.attn_k.weight iq2_s +blk.20.attn_q.weight iq4_nl +blk.20.attn_v.weight q5_0 +blk.20.ffn_down_exps.weight q4_0 +blk.20.ffn_gate_exps.weight iq4_nl +blk.20.ffn_up_exps.weight iq4_nl +blk.21.attn_k.weight iq4_nl blk.21.attn_output.weight iq3_s -blk.21.attn_q.weight iq2_s -blk.21.attn_v.weight q4_K -blk.21.ffn_down_exps.weight q3_K -blk.22.attn_k.weight iq2_s +blk.21.attn_q.weight iq4_nl +blk.21.attn_v.weight q5_0 +blk.21.ffn_down_exps.weight q4_0 +blk.21.ffn_gate_exps.weight iq4_nl +blk.21.ffn_up_exps.weight iq4_nl +blk.22.attn_k.weight iq4_nl blk.22.attn_output.weight iq3_s -blk.22.attn_q.weight iq2_s -blk.22.attn_v.weight q4_K -blk.22.ffn_down_exps.weight q3_K -blk.23.attn_k.weight iq2_s +blk.22.attn_q.weight iq4_nl +blk.22.attn_v.weight q5_0 +blk.22.ffn_down_exps.weight q4_0 +blk.22.ffn_gate_exps.weight iq4_nl +blk.22.ffn_up_exps.weight iq4_nl +blk.23.attn_k.weight iq4_nl blk.23.attn_output.weight iq3_s -blk.23.attn_q.weight iq2_s -blk.23.attn_v.weight q4_K -blk.23.ffn_down_exps.weight q3_K -blk.24.attn_k.weight iq2_s +blk.23.attn_q.weight iq4_nl +blk.23.attn_v.weight q5_0 +blk.23.ffn_down_exps.weight q4_0 +blk.23.ffn_gate_exps.weight iq4_nl +blk.23.ffn_up_exps.weight iq4_nl +blk.24.attn_k.weight iq4_nl blk.24.attn_output.weight iq3_s -blk.24.attn_q.weight iq2_s -blk.24.attn_v.weight q4_K -blk.24.ffn_down_exps.weight q3_K -blk.25.attn_k.weight iq2_s +blk.24.attn_q.weight iq4_nl +blk.24.attn_v.weight q5_0 +blk.24.ffn_down_exps.weight q4_0 +blk.24.ffn_gate_exps.weight iq4_nl +blk.24.ffn_up_exps.weight iq4_nl +blk.25.attn_k.weight iq4_nl blk.25.attn_output.weight iq3_s -blk.25.attn_q.weight iq2_s -blk.25.attn_v.weight q4_K -blk.25.ffn_down_exps.weight q3_K -blk.26.attn_k.weight iq2_s +blk.25.attn_q.weight iq4_nl +blk.25.attn_v.weight q5_0 +blk.25.ffn_down_exps.weight q4_0 +blk.25.ffn_gate_exps.weight iq4_nl +blk.25.ffn_up_exps.weight iq4_nl +blk.26.attn_k.weight iq4_nl blk.26.attn_output.weight iq3_s -blk.26.attn_q.weight iq2_s -blk.26.attn_v.weight q4_K -blk.26.ffn_down_exps.weight q3_K -blk.27.attn_k.weight iq2_s +blk.26.attn_q.weight iq4_nl +blk.26.attn_v.weight q5_0 +blk.26.ffn_down_exps.weight q4_0 +blk.26.ffn_gate_exps.weight iq4_nl +blk.26.ffn_up_exps.weight iq4_nl +blk.27.attn_k.weight iq4_nl blk.27.attn_output.weight iq3_s -blk.27.attn_q.weight iq2_s -blk.27.attn_v.weight q4_K -blk.27.ffn_down_exps.weight q3_K -blk.28.attn_k.weight iq2_s +blk.27.attn_q.weight iq4_nl +blk.27.attn_v.weight q5_0 +blk.27.ffn_down_exps.weight q4_0 +blk.27.ffn_gate_exps.weight iq4_nl +blk.27.ffn_up_exps.weight iq4_nl +blk.28.attn_k.weight iq4_nl blk.28.attn_output.weight iq3_s -blk.28.attn_q.weight iq2_s -blk.28.attn_v.weight q4_K -blk.28.ffn_down_exps.weight q3_K -blk.29.attn_k.weight iq2_s +blk.28.attn_q.weight iq4_nl +blk.28.attn_v.weight q5_0 +blk.28.ffn_down_exps.weight q4_0 +blk.28.ffn_gate_exps.weight iq4_nl +blk.28.ffn_up_exps.weight iq4_nl +blk.29.attn_k.weight iq4_nl blk.29.attn_output.weight iq3_s -blk.29.attn_q.weight iq2_s -blk.29.attn_v.weight q4_K -blk.29.ffn_down_exps.weight q3_K -blk.30.attn_k.weight iq2_s +blk.29.attn_q.weight iq4_nl +blk.29.attn_v.weight q5_0 +blk.29.ffn_down_exps.weight q4_0 +blk.29.ffn_gate_exps.weight iq4_nl +blk.29.ffn_up_exps.weight iq4_nl +blk.30.attn_k.weight iq4_nl blk.30.attn_output.weight iq3_s -blk.30.attn_q.weight iq2_s -blk.30.attn_v.weight q4_K -blk.30.ffn_down_exps.weight q3_K -blk.31.attn_k.weight iq2_s +blk.30.attn_q.weight iq4_nl +blk.30.attn_v.weight q5_0 +blk.30.ffn_down_exps.weight q4_0 +blk.30.ffn_gate_exps.weight iq4_nl +blk.30.ffn_up_exps.weight iq4_nl +blk.31.attn_k.weight iq4_nl blk.31.attn_output.weight iq3_s -blk.31.attn_q.weight iq2_s -blk.31.attn_v.weight q4_K -blk.31.ffn_down_exps.weight q3_K -blk.32.attn_k.weight iq2_s +blk.31.attn_q.weight iq4_nl +blk.31.attn_v.weight q5_0 +blk.31.ffn_down_exps.weight q4_0 +blk.31.ffn_gate_exps.weight iq4_nl +blk.31.ffn_up_exps.weight iq4_nl +blk.32.attn_k.weight iq4_nl blk.32.attn_output.weight iq3_s -blk.32.attn_q.weight iq2_s -blk.32.attn_v.weight q4_K -blk.32.ffn_down_exps.weight q3_K -blk.33.attn_k.weight iq2_s +blk.32.attn_q.weight iq4_nl +blk.32.attn_v.weight q5_0 +blk.32.ffn_down_exps.weight q4_0 +blk.32.ffn_gate_exps.weight iq4_nl +blk.32.ffn_up_exps.weight iq4_nl +blk.33.attn_k.weight iq4_nl blk.33.attn_output.weight iq3_s -blk.33.attn_q.weight iq2_s -blk.33.attn_v.weight q4_K -blk.33.ffn_down_exps.weight q3_K -blk.34.attn_k.weight iq2_s +blk.33.attn_q.weight iq4_nl +blk.33.attn_v.weight q5_0 +blk.33.ffn_down_exps.weight q4_0 +blk.33.ffn_gate_exps.weight iq4_nl +blk.33.ffn_up_exps.weight iq4_nl +blk.34.attn_k.weight iq4_nl blk.34.attn_output.weight iq3_s -blk.34.attn_q.weight iq2_s -blk.34.attn_v.weight q4_K -blk.34.ffn_down_exps.weight q3_K -blk.35.attn_k.weight iq2_s +blk.34.attn_q.weight iq4_nl +blk.34.attn_v.weight q5_0 +blk.34.ffn_down_exps.weight q4_0 +blk.34.ffn_gate_exps.weight iq4_nl +blk.34.ffn_up_exps.weight iq4_nl +blk.35.attn_k.weight iq4_nl blk.35.attn_output.weight iq3_s -blk.35.attn_q.weight iq2_s -blk.35.attn_v.weight q4_K -blk.35.ffn_down_exps.weight q3_K +blk.35.attn_q.weight iq4_nl +blk.35.attn_v.weight q5_0 +blk.35.ffn_down_exps.weight q4_0 +blk.35.ffn_gate_exps.weight iq4_nl +blk.35.ffn_up_exps.weight iq4_nl [IQ1_S] iq1_s output.weight q8_0 -token_embd.weight q2_K +token_embd.weight q4_0 +blk.0.attn_k.weight iq4_nl blk.0.attn_output.weight iq2_xxs -blk.0.attn_v.weight q4_K -blk.0.ffn_down_exps.weight q2_K +blk.0.attn_q.weight iq4_nl +blk.0.attn_v.weight q5_0 +blk.0.ffn_down_exps.weight q4_0 +blk.0.ffn_gate_exps.weight iq4_nl +blk.0.ffn_up_exps.weight iq4_nl +blk.1.attn_k.weight iq4_nl blk.1.attn_output.weight iq2_xxs -blk.1.attn_v.weight q4_K -blk.1.ffn_down_exps.weight q2_K +blk.1.attn_q.weight iq4_nl +blk.1.attn_v.weight q5_0 +blk.1.ffn_down_exps.weight q4_0 +blk.1.ffn_gate_exps.weight iq4_nl +blk.1.ffn_up_exps.weight iq4_nl +blk.2.attn_k.weight iq4_nl blk.2.attn_output.weight iq2_xxs -blk.2.attn_v.weight q4_K -blk.2.ffn_down_exps.weight q2_K +blk.2.attn_q.weight iq4_nl +blk.2.attn_v.weight q5_0 +blk.2.ffn_down_exps.weight q4_0 +blk.2.ffn_gate_exps.weight iq4_nl +blk.2.ffn_up_exps.weight iq4_nl +blk.3.attn_k.weight iq4_nl blk.3.attn_output.weight iq2_xxs -blk.3.attn_v.weight q4_K -blk.3.ffn_down_exps.weight q2_K +blk.3.attn_q.weight iq4_nl +blk.3.attn_v.weight q5_0 +blk.3.ffn_down_exps.weight q4_0 +blk.3.ffn_gate_exps.weight iq4_nl +blk.3.ffn_up_exps.weight iq4_nl +blk.4.attn_k.weight iq4_nl blk.4.attn_output.weight iq2_xxs -blk.4.attn_v.weight q4_K +blk.4.attn_q.weight iq4_nl +blk.4.attn_v.weight q5_0 +blk.4.ffn_down_exps.weight iq4_nl +blk.4.ffn_gate_exps.weight iq4_nl +blk.4.ffn_up_exps.weight iq4_nl +blk.5.attn_k.weight iq4_nl blk.5.attn_output.weight iq2_xxs -blk.5.attn_v.weight q4_K +blk.5.attn_q.weight iq4_nl +blk.5.attn_v.weight q5_0 +blk.5.ffn_down_exps.weight iq4_nl +blk.5.ffn_gate_exps.weight iq4_nl +blk.5.ffn_up_exps.weight iq4_nl +blk.6.attn_k.weight iq4_nl blk.6.attn_output.weight iq2_xxs -blk.6.attn_v.weight q4_K +blk.6.attn_q.weight iq4_nl +blk.6.attn_v.weight q5_0 +blk.6.ffn_down_exps.weight iq4_nl +blk.6.ffn_gate_exps.weight iq4_nl +blk.6.ffn_up_exps.weight iq4_nl +blk.7.attn_k.weight iq4_nl blk.7.attn_output.weight iq2_xxs -blk.7.attn_v.weight q4_K +blk.7.attn_q.weight iq4_nl +blk.7.attn_v.weight q5_0 +blk.7.ffn_down_exps.weight iq4_nl +blk.7.ffn_gate_exps.weight iq4_nl +blk.7.ffn_up_exps.weight iq4_nl +blk.8.attn_k.weight iq4_nl blk.8.attn_output.weight iq2_xxs -blk.8.attn_v.weight q4_K +blk.8.attn_q.weight iq4_nl +blk.8.attn_v.weight q5_0 +blk.8.ffn_down_exps.weight iq4_nl +blk.8.ffn_gate_exps.weight iq4_nl +blk.8.ffn_up_exps.weight iq4_nl +blk.9.attn_k.weight iq4_nl blk.9.attn_output.weight iq2_xxs -blk.9.attn_v.weight q4_K +blk.9.attn_q.weight iq4_nl +blk.9.attn_v.weight q5_0 +blk.9.ffn_down_exps.weight iq4_nl +blk.9.ffn_gate_exps.weight iq4_nl +blk.9.ffn_up_exps.weight iq4_nl +blk.10.attn_k.weight iq4_nl blk.10.attn_output.weight iq2_xxs -blk.10.attn_v.weight q4_K +blk.10.attn_q.weight iq4_nl +blk.10.attn_v.weight q5_0 +blk.10.ffn_down_exps.weight iq4_nl +blk.10.ffn_gate_exps.weight iq4_nl +blk.10.ffn_up_exps.weight iq4_nl +blk.11.attn_k.weight iq4_nl blk.11.attn_output.weight iq2_xxs -blk.11.attn_v.weight q4_K +blk.11.attn_q.weight iq4_nl +blk.11.attn_v.weight q5_0 +blk.11.ffn_down_exps.weight iq4_nl +blk.11.ffn_gate_exps.weight iq4_nl +blk.11.ffn_up_exps.weight iq4_nl +blk.12.attn_k.weight iq4_nl blk.12.attn_output.weight iq2_xxs -blk.12.attn_v.weight q4_K +blk.12.attn_q.weight iq4_nl +blk.12.attn_v.weight q5_0 +blk.12.ffn_down_exps.weight iq4_nl +blk.12.ffn_gate_exps.weight iq4_nl +blk.12.ffn_up_exps.weight iq4_nl +blk.13.attn_k.weight iq4_nl blk.13.attn_output.weight iq2_xxs -blk.13.attn_v.weight q4_K +blk.13.attn_q.weight iq4_nl +blk.13.attn_v.weight q5_0 +blk.13.ffn_down_exps.weight iq4_nl +blk.13.ffn_gate_exps.weight iq4_nl +blk.13.ffn_up_exps.weight iq4_nl +blk.14.attn_k.weight iq4_nl blk.14.attn_output.weight iq2_xxs -blk.14.attn_v.weight q4_K +blk.14.attn_q.weight iq4_nl +blk.14.attn_v.weight q5_0 +blk.14.ffn_down_exps.weight iq4_nl +blk.14.ffn_gate_exps.weight iq4_nl +blk.14.ffn_up_exps.weight iq4_nl +blk.15.attn_k.weight iq4_nl blk.15.attn_output.weight iq2_xxs -blk.15.attn_v.weight q4_K +blk.15.attn_q.weight iq4_nl +blk.15.attn_v.weight q5_0 +blk.15.ffn_down_exps.weight iq4_nl +blk.15.ffn_gate_exps.weight iq4_nl +blk.15.ffn_up_exps.weight iq4_nl +blk.16.attn_k.weight iq4_nl blk.16.attn_output.weight iq2_xxs -blk.16.attn_v.weight q4_K +blk.16.attn_q.weight iq4_nl +blk.16.attn_v.weight q5_0 +blk.16.ffn_down_exps.weight iq4_nl +blk.16.ffn_gate_exps.weight iq4_nl +blk.16.ffn_up_exps.weight iq4_nl +blk.17.attn_k.weight iq4_nl blk.17.attn_output.weight iq2_xxs -blk.17.attn_v.weight q4_K +blk.17.attn_q.weight iq4_nl +blk.17.attn_v.weight q5_0 +blk.17.ffn_down_exps.weight iq4_nl +blk.17.ffn_gate_exps.weight iq4_nl +blk.17.ffn_up_exps.weight iq4_nl +blk.18.attn_k.weight iq4_nl blk.18.attn_output.weight iq2_xxs -blk.18.attn_v.weight q4_K +blk.18.attn_q.weight iq4_nl +blk.18.attn_v.weight q5_0 +blk.18.ffn_down_exps.weight iq4_nl +blk.18.ffn_gate_exps.weight iq4_nl +blk.18.ffn_up_exps.weight iq4_nl +blk.19.attn_k.weight iq4_nl blk.19.attn_output.weight iq2_xxs -blk.19.attn_v.weight q4_K +blk.19.attn_q.weight iq4_nl +blk.19.attn_v.weight q5_0 +blk.19.ffn_down_exps.weight iq4_nl +blk.19.ffn_gate_exps.weight iq4_nl +blk.19.ffn_up_exps.weight iq4_nl +blk.20.attn_k.weight iq4_nl blk.20.attn_output.weight iq2_xxs -blk.20.attn_v.weight q4_K +blk.20.attn_q.weight iq4_nl +blk.20.attn_v.weight q5_0 +blk.20.ffn_down_exps.weight iq4_nl +blk.20.ffn_gate_exps.weight iq4_nl +blk.20.ffn_up_exps.weight iq4_nl +blk.21.attn_k.weight iq4_nl blk.21.attn_output.weight iq2_xxs -blk.21.attn_v.weight q4_K +blk.21.attn_q.weight iq4_nl +blk.21.attn_v.weight q5_0 +blk.21.ffn_down_exps.weight iq4_nl +blk.21.ffn_gate_exps.weight iq4_nl +blk.21.ffn_up_exps.weight iq4_nl +blk.22.attn_k.weight iq4_nl blk.22.attn_output.weight iq2_xxs -blk.22.attn_v.weight q4_K +blk.22.attn_q.weight iq4_nl +blk.22.attn_v.weight q5_0 +blk.22.ffn_down_exps.weight iq4_nl +blk.22.ffn_gate_exps.weight iq4_nl +blk.22.ffn_up_exps.weight iq4_nl +blk.23.attn_k.weight iq4_nl blk.23.attn_output.weight iq2_xxs -blk.23.attn_v.weight q4_K +blk.23.attn_q.weight iq4_nl +blk.23.attn_v.weight q5_0 +blk.23.ffn_down_exps.weight iq4_nl +blk.23.ffn_gate_exps.weight iq4_nl +blk.23.ffn_up_exps.weight iq4_nl +blk.24.attn_k.weight iq4_nl blk.24.attn_output.weight iq2_xxs -blk.24.attn_v.weight q4_K +blk.24.attn_q.weight iq4_nl +blk.24.attn_v.weight q5_0 +blk.24.ffn_down_exps.weight iq4_nl +blk.24.ffn_gate_exps.weight iq4_nl +blk.24.ffn_up_exps.weight iq4_nl +blk.25.attn_k.weight iq4_nl blk.25.attn_output.weight iq2_xxs -blk.25.attn_v.weight q4_K +blk.25.attn_q.weight iq4_nl +blk.25.attn_v.weight q5_0 +blk.25.ffn_down_exps.weight iq4_nl +blk.25.ffn_gate_exps.weight iq4_nl +blk.25.ffn_up_exps.weight iq4_nl +blk.26.attn_k.weight iq4_nl blk.26.attn_output.weight iq2_xxs -blk.26.attn_v.weight q4_K +blk.26.attn_q.weight iq4_nl +blk.26.attn_v.weight q5_0 +blk.26.ffn_down_exps.weight iq4_nl +blk.26.ffn_gate_exps.weight iq4_nl +blk.26.ffn_up_exps.weight iq4_nl +blk.27.attn_k.weight iq4_nl blk.27.attn_output.weight iq2_xxs -blk.27.attn_v.weight q4_K +blk.27.attn_q.weight iq4_nl +blk.27.attn_v.weight q5_0 +blk.27.ffn_down_exps.weight iq4_nl +blk.27.ffn_gate_exps.weight iq4_nl +blk.27.ffn_up_exps.weight iq4_nl +blk.28.attn_k.weight iq4_nl blk.28.attn_output.weight iq2_xxs -blk.28.attn_v.weight q4_K +blk.28.attn_q.weight iq4_nl +blk.28.attn_v.weight q5_0 +blk.28.ffn_down_exps.weight iq4_nl +blk.28.ffn_gate_exps.weight iq4_nl +blk.28.ffn_up_exps.weight iq4_nl +blk.29.attn_k.weight iq4_nl blk.29.attn_output.weight iq2_xxs -blk.29.attn_v.weight q4_K +blk.29.attn_q.weight iq4_nl +blk.29.attn_v.weight q5_0 +blk.29.ffn_down_exps.weight iq4_nl +blk.29.ffn_gate_exps.weight iq4_nl +blk.29.ffn_up_exps.weight iq4_nl +blk.30.attn_k.weight iq4_nl blk.30.attn_output.weight iq2_xxs -blk.30.attn_v.weight q4_K +blk.30.attn_q.weight iq4_nl +blk.30.attn_v.weight q5_0 +blk.30.ffn_down_exps.weight iq4_nl +blk.30.ffn_gate_exps.weight iq4_nl +blk.30.ffn_up_exps.weight iq4_nl +blk.31.attn_k.weight iq4_nl blk.31.attn_output.weight iq2_xxs -blk.31.attn_v.weight q4_K +blk.31.attn_q.weight iq4_nl +blk.31.attn_v.weight q5_0 +blk.31.ffn_down_exps.weight iq4_nl +blk.31.ffn_gate_exps.weight iq4_nl +blk.31.ffn_up_exps.weight iq4_nl +blk.32.attn_k.weight iq4_nl blk.32.attn_output.weight iq2_xxs -blk.32.attn_v.weight q4_K +blk.32.attn_q.weight iq4_nl +blk.32.attn_v.weight q5_0 +blk.32.ffn_down_exps.weight iq4_nl +blk.32.ffn_gate_exps.weight iq4_nl +blk.32.ffn_up_exps.weight iq4_nl +blk.33.attn_k.weight iq4_nl blk.33.attn_output.weight iq2_xxs -blk.33.attn_v.weight q4_K +blk.33.attn_q.weight iq4_nl +blk.33.attn_v.weight q5_0 +blk.33.ffn_down_exps.weight iq4_nl +blk.33.ffn_gate_exps.weight iq4_nl +blk.33.ffn_up_exps.weight iq4_nl +blk.34.attn_k.weight iq4_nl blk.34.attn_output.weight iq2_xxs -blk.34.attn_v.weight q4_K +blk.34.attn_q.weight iq4_nl +blk.34.attn_v.weight q5_0 +blk.34.ffn_down_exps.weight iq4_nl +blk.34.ffn_gate_exps.weight iq4_nl +blk.34.ffn_up_exps.weight iq4_nl +blk.35.attn_k.weight iq4_nl blk.35.attn_output.weight iq2_xxs -blk.35.attn_v.weight q4_K +blk.35.attn_q.weight iq4_nl +blk.35.attn_v.weight q5_0 +blk.35.ffn_down_exps.weight iq4_nl +blk.35.ffn_gate_exps.weight iq4_nl +blk.35.ffn_up_exps.weight iq4_nl [IQ4_NL] iq4_nl -output.weight q6_K -blk.0.attn_v.weight q5_K -blk.0.ffn_down_exps.weight q5_K -blk.1.attn_v.weight q5_K -blk.1.ffn_down_exps.weight q5_K -blk.2.attn_v.weight q5_K -blk.2.ffn_down_exps.weight q5_K -blk.3.attn_v.weight q5_K -blk.3.ffn_down_exps.weight q5_K -blk.4.attn_v.weight q5_K -blk.5.attn_v.weight q5_K -blk.6.attn_v.weight q5_K -blk.7.attn_v.weight q5_K -blk.8.attn_v.weight q5_K -blk.9.attn_v.weight q5_K -blk.10.attn_v.weight q5_K -blk.11.attn_v.weight q5_K -blk.12.attn_v.weight q5_K -blk.13.attn_v.weight q5_K -blk.14.attn_v.weight q5_K -blk.15.attn_v.weight q5_K -blk.16.attn_v.weight q5_K -blk.17.attn_v.weight q5_K -blk.18.attn_v.weight q5_K -blk.19.attn_v.weight q5_K -blk.20.attn_v.weight q5_K -blk.21.attn_v.weight q5_K -blk.22.attn_v.weight q5_K -blk.23.attn_v.weight q5_K -blk.24.attn_v.weight q5_K -blk.25.attn_v.weight q5_K -blk.26.attn_v.weight q5_K -blk.27.attn_v.weight q5_K -blk.28.attn_v.weight q5_K -blk.29.attn_v.weight q5_K -blk.30.attn_v.weight q5_K -blk.31.attn_v.weight q5_K -blk.32.attn_v.weight q5_K -blk.33.attn_v.weight q5_K -blk.34.attn_v.weight q5_K -blk.35.attn_v.weight q5_K +output.weight q8_0 +blk.0.attn_v.weight q5_1 +blk.0.ffn_down_exps.weight q5_1 +blk.1.attn_v.weight q5_1 +blk.1.ffn_down_exps.weight q5_1 +blk.2.attn_v.weight q5_1 +blk.2.ffn_down_exps.weight q5_1 +blk.3.attn_v.weight q5_1 +blk.3.ffn_down_exps.weight q5_1 +blk.4.attn_v.weight q5_1 +blk.5.attn_v.weight q5_1 +blk.6.attn_v.weight q5_1 +blk.7.attn_v.weight q5_1 +blk.8.attn_v.weight q5_1 +blk.9.attn_v.weight q5_1 +blk.10.attn_v.weight q5_1 +blk.11.attn_v.weight q5_1 +blk.12.attn_v.weight q5_1 +blk.13.attn_v.weight q5_1 +blk.14.attn_v.weight q5_1 +blk.15.attn_v.weight q5_1 +blk.16.attn_v.weight q5_1 +blk.17.attn_v.weight q5_1 +blk.18.attn_v.weight q5_1 +blk.19.attn_v.weight q5_1 +blk.20.attn_v.weight q5_1 +blk.21.attn_v.weight q5_1 +blk.22.attn_v.weight q5_1 +blk.23.attn_v.weight q5_1 +blk.24.attn_v.weight q5_1 +blk.25.attn_v.weight q5_1 +blk.26.attn_v.weight q5_1 +blk.27.attn_v.weight q5_1 +blk.28.attn_v.weight q5_1 +blk.29.attn_v.weight q5_1 +blk.30.attn_v.weight q5_1 +blk.31.attn_v.weight q5_1 +blk.32.attn_v.weight q5_1 +blk.33.attn_v.weight q5_1 +blk.34.attn_v.weight q5_1 +blk.35.attn_v.weight q5_1 [IQ3_S] iq3_s output.weight q8_0 -blk.0.attn_v.weight q4_K -blk.1.attn_v.weight q4_K -blk.2.attn_v.weight q4_K -blk.3.attn_v.weight q4_K -blk.4.attn_v.weight q4_K -blk.5.attn_v.weight q4_K -blk.6.attn_v.weight q4_K -blk.7.attn_v.weight q4_K -blk.8.attn_v.weight q4_K -blk.9.attn_v.weight q4_K -blk.10.attn_v.weight q4_K -blk.11.attn_v.weight q4_K -blk.12.attn_v.weight q4_K -blk.13.attn_v.weight q4_K -blk.14.attn_v.weight q4_K -blk.15.attn_v.weight q4_K -blk.16.attn_v.weight q4_K -blk.17.attn_v.weight q4_K -blk.18.attn_v.weight q4_K -blk.19.attn_v.weight q4_K -blk.20.attn_v.weight q4_K -blk.21.attn_v.weight q4_K -blk.22.attn_v.weight q4_K -blk.23.attn_v.weight q4_K -blk.24.attn_v.weight q4_K -blk.25.attn_v.weight q4_K -blk.26.attn_v.weight q4_K -blk.27.attn_v.weight q4_K -blk.28.attn_v.weight q4_K -blk.29.attn_v.weight q4_K -blk.30.attn_v.weight q4_K -blk.31.attn_v.weight q4_K -blk.32.attn_v.weight q4_K -blk.33.attn_v.weight q4_K -blk.34.attn_v.weight q4_K -blk.35.attn_v.weight q4_K +token_embd.weight iq4_nl +blk.0.attn_k.weight iq4_nl +blk.0.attn_q.weight iq4_nl +blk.0.attn_v.weight q5_0 +blk.0.ffn_down_exps.weight iq4_nl +blk.0.ffn_gate_exps.weight iq4_nl +blk.0.ffn_up_exps.weight iq4_nl +blk.1.attn_k.weight iq4_nl +blk.1.attn_q.weight iq4_nl +blk.1.attn_v.weight q5_0 +blk.1.ffn_down_exps.weight iq4_nl +blk.1.ffn_gate_exps.weight iq4_nl +blk.1.ffn_up_exps.weight iq4_nl +blk.2.attn_k.weight iq4_nl +blk.2.attn_q.weight iq4_nl +blk.2.attn_v.weight q5_0 +blk.2.ffn_down_exps.weight iq4_nl +blk.2.ffn_gate_exps.weight iq4_nl +blk.2.ffn_up_exps.weight iq4_nl +blk.3.attn_k.weight iq4_nl +blk.3.attn_q.weight iq4_nl +blk.3.attn_v.weight q5_0 +blk.3.ffn_down_exps.weight iq4_nl +blk.3.ffn_gate_exps.weight iq4_nl +blk.3.ffn_up_exps.weight iq4_nl +blk.4.attn_k.weight iq4_nl +blk.4.attn_q.weight iq4_nl +blk.4.attn_v.weight q5_0 +blk.4.ffn_down_exps.weight iq4_nl +blk.4.ffn_gate_exps.weight iq4_nl +blk.4.ffn_up_exps.weight iq4_nl +blk.5.attn_k.weight iq4_nl +blk.5.attn_q.weight iq4_nl +blk.5.attn_v.weight q5_0 +blk.5.ffn_down_exps.weight iq4_nl +blk.5.ffn_gate_exps.weight iq4_nl +blk.5.ffn_up_exps.weight iq4_nl +blk.6.attn_k.weight iq4_nl +blk.6.attn_q.weight iq4_nl +blk.6.attn_v.weight q5_0 +blk.6.ffn_down_exps.weight iq4_nl +blk.6.ffn_gate_exps.weight iq4_nl +blk.6.ffn_up_exps.weight iq4_nl +blk.7.attn_k.weight iq4_nl +blk.7.attn_q.weight iq4_nl +blk.7.attn_v.weight q5_0 +blk.7.ffn_down_exps.weight iq4_nl +blk.7.ffn_gate_exps.weight iq4_nl +blk.7.ffn_up_exps.weight iq4_nl +blk.8.attn_k.weight iq4_nl +blk.8.attn_q.weight iq4_nl +blk.8.attn_v.weight q5_0 +blk.8.ffn_down_exps.weight iq4_nl +blk.8.ffn_gate_exps.weight iq4_nl +blk.8.ffn_up_exps.weight iq4_nl +blk.9.attn_k.weight iq4_nl +blk.9.attn_q.weight iq4_nl +blk.9.attn_v.weight q5_0 +blk.9.ffn_down_exps.weight iq4_nl +blk.9.ffn_gate_exps.weight iq4_nl +blk.9.ffn_up_exps.weight iq4_nl +blk.10.attn_k.weight iq4_nl +blk.10.attn_q.weight iq4_nl +blk.10.attn_v.weight q5_0 +blk.10.ffn_down_exps.weight iq4_nl +blk.10.ffn_gate_exps.weight iq4_nl +blk.10.ffn_up_exps.weight iq4_nl +blk.11.attn_k.weight iq4_nl +blk.11.attn_q.weight iq4_nl +blk.11.attn_v.weight q5_0 +blk.11.ffn_down_exps.weight iq4_nl +blk.11.ffn_gate_exps.weight iq4_nl +blk.11.ffn_up_exps.weight iq4_nl +blk.12.attn_k.weight iq4_nl +blk.12.attn_q.weight iq4_nl +blk.12.attn_v.weight q5_0 +blk.12.ffn_down_exps.weight iq4_nl +blk.12.ffn_gate_exps.weight iq4_nl +blk.12.ffn_up_exps.weight iq4_nl +blk.13.attn_k.weight iq4_nl +blk.13.attn_q.weight iq4_nl +blk.13.attn_v.weight q5_0 +blk.13.ffn_down_exps.weight iq4_nl +blk.13.ffn_gate_exps.weight iq4_nl +blk.13.ffn_up_exps.weight iq4_nl +blk.14.attn_k.weight iq4_nl +blk.14.attn_q.weight iq4_nl +blk.14.attn_v.weight q5_0 +blk.14.ffn_down_exps.weight iq4_nl +blk.14.ffn_gate_exps.weight iq4_nl +blk.14.ffn_up_exps.weight iq4_nl +blk.15.attn_k.weight iq4_nl +blk.15.attn_q.weight iq4_nl +blk.15.attn_v.weight q5_0 +blk.15.ffn_down_exps.weight iq4_nl +blk.15.ffn_gate_exps.weight iq4_nl +blk.15.ffn_up_exps.weight iq4_nl +blk.16.attn_k.weight iq4_nl +blk.16.attn_q.weight iq4_nl +blk.16.attn_v.weight q5_0 +blk.16.ffn_down_exps.weight iq4_nl +blk.16.ffn_gate_exps.weight iq4_nl +blk.16.ffn_up_exps.weight iq4_nl +blk.17.attn_k.weight iq4_nl +blk.17.attn_q.weight iq4_nl +blk.17.attn_v.weight q5_0 +blk.17.ffn_down_exps.weight iq4_nl +blk.17.ffn_gate_exps.weight iq4_nl +blk.17.ffn_up_exps.weight iq4_nl +blk.18.attn_k.weight iq4_nl +blk.18.attn_q.weight iq4_nl +blk.18.attn_v.weight q5_0 +blk.18.ffn_down_exps.weight iq4_nl +blk.18.ffn_gate_exps.weight iq4_nl +blk.18.ffn_up_exps.weight iq4_nl +blk.19.attn_k.weight iq4_nl +blk.19.attn_q.weight iq4_nl +blk.19.attn_v.weight q5_0 +blk.19.ffn_down_exps.weight iq4_nl +blk.19.ffn_gate_exps.weight iq4_nl +blk.19.ffn_up_exps.weight iq4_nl +blk.20.attn_k.weight iq4_nl +blk.20.attn_q.weight iq4_nl +blk.20.attn_v.weight q5_0 +blk.20.ffn_down_exps.weight iq4_nl +blk.20.ffn_gate_exps.weight iq4_nl +blk.20.ffn_up_exps.weight iq4_nl +blk.21.attn_k.weight iq4_nl +blk.21.attn_q.weight iq4_nl +blk.21.attn_v.weight q5_0 +blk.21.ffn_down_exps.weight iq4_nl +blk.21.ffn_gate_exps.weight iq4_nl +blk.21.ffn_up_exps.weight iq4_nl +blk.22.attn_k.weight iq4_nl +blk.22.attn_q.weight iq4_nl +blk.22.attn_v.weight q5_0 +blk.22.ffn_down_exps.weight iq4_nl +blk.22.ffn_gate_exps.weight iq4_nl +blk.22.ffn_up_exps.weight iq4_nl +blk.23.attn_k.weight iq4_nl +blk.23.attn_q.weight iq4_nl +blk.23.attn_v.weight q5_0 +blk.23.ffn_down_exps.weight iq4_nl +blk.23.ffn_gate_exps.weight iq4_nl +blk.23.ffn_up_exps.weight iq4_nl +blk.24.attn_k.weight iq4_nl +blk.24.attn_q.weight iq4_nl +blk.24.attn_v.weight q5_0 +blk.24.ffn_down_exps.weight iq4_nl +blk.24.ffn_gate_exps.weight iq4_nl +blk.24.ffn_up_exps.weight iq4_nl +blk.25.attn_k.weight iq4_nl +blk.25.attn_q.weight iq4_nl +blk.25.attn_v.weight q5_0 +blk.25.ffn_down_exps.weight iq4_nl +blk.25.ffn_gate_exps.weight iq4_nl +blk.25.ffn_up_exps.weight iq4_nl +blk.26.attn_k.weight iq4_nl +blk.26.attn_q.weight iq4_nl +blk.26.attn_v.weight q5_0 +blk.26.ffn_down_exps.weight iq4_nl +blk.26.ffn_gate_exps.weight iq4_nl +blk.26.ffn_up_exps.weight iq4_nl +blk.27.attn_k.weight iq4_nl +blk.27.attn_q.weight iq4_nl +blk.27.attn_v.weight q5_0 +blk.27.ffn_down_exps.weight iq4_nl +blk.27.ffn_gate_exps.weight iq4_nl +blk.27.ffn_up_exps.weight iq4_nl +blk.28.attn_k.weight iq4_nl +blk.28.attn_q.weight iq4_nl +blk.28.attn_v.weight q5_0 +blk.28.ffn_down_exps.weight iq4_nl +blk.28.ffn_gate_exps.weight iq4_nl +blk.28.ffn_up_exps.weight iq4_nl +blk.29.attn_k.weight iq4_nl +blk.29.attn_q.weight iq4_nl +blk.29.attn_v.weight q5_0 +blk.29.ffn_down_exps.weight iq4_nl +blk.29.ffn_gate_exps.weight iq4_nl +blk.29.ffn_up_exps.weight iq4_nl +blk.30.attn_k.weight iq4_nl +blk.30.attn_q.weight iq4_nl +blk.30.attn_v.weight q5_0 +blk.30.ffn_down_exps.weight iq4_nl +blk.30.ffn_gate_exps.weight iq4_nl +blk.30.ffn_up_exps.weight iq4_nl +blk.31.attn_k.weight iq4_nl +blk.31.attn_q.weight iq4_nl +blk.31.attn_v.weight q5_0 +blk.31.ffn_down_exps.weight iq4_nl +blk.31.ffn_gate_exps.weight iq4_nl +blk.31.ffn_up_exps.weight iq4_nl +blk.32.attn_k.weight iq4_nl +blk.32.attn_q.weight iq4_nl +blk.32.attn_v.weight q5_0 +blk.32.ffn_down_exps.weight iq4_nl +blk.32.ffn_gate_exps.weight iq4_nl +blk.32.ffn_up_exps.weight iq4_nl +blk.33.attn_k.weight iq4_nl +blk.33.attn_q.weight iq4_nl +blk.33.attn_v.weight q5_0 +blk.33.ffn_down_exps.weight iq4_nl +blk.33.ffn_gate_exps.weight iq4_nl +blk.33.ffn_up_exps.weight iq4_nl +blk.34.attn_k.weight iq4_nl +blk.34.attn_q.weight iq4_nl +blk.34.attn_v.weight q5_0 +blk.34.ffn_down_exps.weight iq4_nl +blk.34.ffn_gate_exps.weight iq4_nl +blk.34.ffn_up_exps.weight iq4_nl +blk.35.attn_k.weight iq4_nl +blk.35.attn_q.weight iq4_nl +blk.35.attn_v.weight q5_0 +blk.35.ffn_down_exps.weight iq4_nl +blk.35.ffn_gate_exps.weight iq4_nl +blk.35.ffn_up_exps.weight iq4_nl [IQ3_M] iq3_s output.weight q8_0 +token_embd.weight iq4_nl +blk.0.attn_k.weight iq4_nl blk.0.attn_output.weight q4_K -blk.0.attn_v.weight q4_K -blk.0.ffn_down_exps.weight q4_K +blk.0.attn_q.weight iq4_nl +blk.0.attn_v.weight q5_0 +blk.0.ffn_down_exps.weight q5_0 +blk.0.ffn_gate_exps.weight iq4_nl +blk.0.ffn_up_exps.weight iq4_nl +blk.1.attn_k.weight iq4_nl blk.1.attn_output.weight q4_K -blk.1.attn_v.weight q4_K -blk.1.ffn_down_exps.weight q4_K +blk.1.attn_q.weight iq4_nl +blk.1.attn_v.weight q5_0 +blk.1.ffn_down_exps.weight q5_0 +blk.1.ffn_gate_exps.weight iq4_nl +blk.1.ffn_up_exps.weight iq4_nl +blk.2.attn_k.weight iq4_nl blk.2.attn_output.weight q4_K -blk.2.attn_v.weight q4_K -blk.2.ffn_down_exps.weight q4_K +blk.2.attn_q.weight iq4_nl +blk.2.attn_v.weight q5_0 +blk.2.ffn_down_exps.weight q5_0 +blk.2.ffn_gate_exps.weight iq4_nl +blk.2.ffn_up_exps.weight iq4_nl +blk.3.attn_k.weight iq4_nl blk.3.attn_output.weight q4_K -blk.3.attn_v.weight q4_K -blk.3.ffn_down_exps.weight q4_K +blk.3.attn_q.weight iq4_nl +blk.3.attn_v.weight q5_0 +blk.3.ffn_down_exps.weight q5_0 +blk.3.ffn_gate_exps.weight iq4_nl +blk.3.ffn_up_exps.weight iq4_nl +blk.4.attn_k.weight iq4_nl blk.4.attn_output.weight q4_K -blk.4.attn_v.weight q4_K +blk.4.attn_q.weight iq4_nl +blk.4.attn_v.weight q5_0 +blk.4.ffn_down_exps.weight iq4_nl +blk.4.ffn_gate_exps.weight iq4_nl +blk.4.ffn_up_exps.weight iq4_nl +blk.5.attn_k.weight iq4_nl blk.5.attn_output.weight q4_K -blk.5.attn_v.weight q4_K +blk.5.attn_q.weight iq4_nl +blk.5.attn_v.weight q5_0 +blk.5.ffn_down_exps.weight iq4_nl +blk.5.ffn_gate_exps.weight iq4_nl +blk.5.ffn_up_exps.weight iq4_nl +blk.6.attn_k.weight iq4_nl blk.6.attn_output.weight q4_K -blk.6.attn_v.weight q4_K +blk.6.attn_q.weight iq4_nl +blk.6.attn_v.weight q5_0 +blk.6.ffn_down_exps.weight iq4_nl +blk.6.ffn_gate_exps.weight iq4_nl +blk.6.ffn_up_exps.weight iq4_nl +blk.7.attn_k.weight iq4_nl blk.7.attn_output.weight q4_K -blk.7.attn_v.weight q4_K +blk.7.attn_q.weight iq4_nl +blk.7.attn_v.weight q5_0 +blk.7.ffn_down_exps.weight iq4_nl +blk.7.ffn_gate_exps.weight iq4_nl +blk.7.ffn_up_exps.weight iq4_nl +blk.8.attn_k.weight iq4_nl blk.8.attn_output.weight q4_K -blk.8.attn_v.weight q4_K +blk.8.attn_q.weight iq4_nl +blk.8.attn_v.weight q5_0 +blk.8.ffn_down_exps.weight iq4_nl +blk.8.ffn_gate_exps.weight iq4_nl +blk.8.ffn_up_exps.weight iq4_nl +blk.9.attn_k.weight iq4_nl blk.9.attn_output.weight q4_K -blk.9.attn_v.weight q4_K +blk.9.attn_q.weight iq4_nl +blk.9.attn_v.weight q5_0 +blk.9.ffn_down_exps.weight iq4_nl +blk.9.ffn_gate_exps.weight iq4_nl +blk.9.ffn_up_exps.weight iq4_nl +blk.10.attn_k.weight iq4_nl blk.10.attn_output.weight q4_K -blk.10.attn_v.weight q4_K +blk.10.attn_q.weight iq4_nl +blk.10.attn_v.weight q5_0 +blk.10.ffn_down_exps.weight iq4_nl +blk.10.ffn_gate_exps.weight iq4_nl +blk.10.ffn_up_exps.weight iq4_nl +blk.11.attn_k.weight iq4_nl blk.11.attn_output.weight q4_K -blk.11.attn_v.weight q4_K +blk.11.attn_q.weight iq4_nl +blk.11.attn_v.weight q5_0 +blk.11.ffn_down_exps.weight iq4_nl +blk.11.ffn_gate_exps.weight iq4_nl +blk.11.ffn_up_exps.weight iq4_nl +blk.12.attn_k.weight iq4_nl blk.12.attn_output.weight q4_K -blk.12.attn_v.weight q4_K +blk.12.attn_q.weight iq4_nl +blk.12.attn_v.weight q5_0 +blk.12.ffn_down_exps.weight iq4_nl +blk.12.ffn_gate_exps.weight iq4_nl +blk.12.ffn_up_exps.weight iq4_nl +blk.13.attn_k.weight iq4_nl blk.13.attn_output.weight q4_K -blk.13.attn_v.weight q4_K +blk.13.attn_q.weight iq4_nl +blk.13.attn_v.weight q5_0 +blk.13.ffn_down_exps.weight iq4_nl +blk.13.ffn_gate_exps.weight iq4_nl +blk.13.ffn_up_exps.weight iq4_nl +blk.14.attn_k.weight iq4_nl blk.14.attn_output.weight q4_K -blk.14.attn_v.weight q4_K +blk.14.attn_q.weight iq4_nl +blk.14.attn_v.weight q5_0 +blk.14.ffn_down_exps.weight iq4_nl +blk.14.ffn_gate_exps.weight iq4_nl +blk.14.ffn_up_exps.weight iq4_nl +blk.15.attn_k.weight iq4_nl blk.15.attn_output.weight q4_K -blk.15.attn_v.weight q4_K +blk.15.attn_q.weight iq4_nl +blk.15.attn_v.weight q5_0 +blk.15.ffn_down_exps.weight iq4_nl +blk.15.ffn_gate_exps.weight iq4_nl +blk.15.ffn_up_exps.weight iq4_nl +blk.16.attn_k.weight iq4_nl blk.16.attn_output.weight q4_K -blk.16.attn_v.weight q4_K +blk.16.attn_q.weight iq4_nl +blk.16.attn_v.weight q5_0 +blk.16.ffn_down_exps.weight iq4_nl +blk.16.ffn_gate_exps.weight iq4_nl +blk.16.ffn_up_exps.weight iq4_nl +blk.17.attn_k.weight iq4_nl blk.17.attn_output.weight q4_K -blk.17.attn_v.weight q4_K +blk.17.attn_q.weight iq4_nl +blk.17.attn_v.weight q5_0 +blk.17.ffn_down_exps.weight iq4_nl +blk.17.ffn_gate_exps.weight iq4_nl +blk.17.ffn_up_exps.weight iq4_nl +blk.18.attn_k.weight iq4_nl blk.18.attn_output.weight q4_K -blk.18.attn_v.weight q4_K +blk.18.attn_q.weight iq4_nl +blk.18.attn_v.weight q5_0 +blk.18.ffn_down_exps.weight iq4_nl +blk.18.ffn_gate_exps.weight iq4_nl +blk.18.ffn_up_exps.weight iq4_nl +blk.19.attn_k.weight iq4_nl blk.19.attn_output.weight q4_K -blk.19.attn_v.weight q4_K +blk.19.attn_q.weight iq4_nl +blk.19.attn_v.weight q5_0 +blk.19.ffn_down_exps.weight iq4_nl +blk.19.ffn_gate_exps.weight iq4_nl +blk.19.ffn_up_exps.weight iq4_nl +blk.20.attn_k.weight iq4_nl blk.20.attn_output.weight q4_K -blk.20.attn_v.weight q4_K +blk.20.attn_q.weight iq4_nl +blk.20.attn_v.weight q5_0 +blk.20.ffn_down_exps.weight iq4_nl +blk.20.ffn_gate_exps.weight iq4_nl +blk.20.ffn_up_exps.weight iq4_nl +blk.21.attn_k.weight iq4_nl blk.21.attn_output.weight q4_K -blk.21.attn_v.weight q4_K +blk.21.attn_q.weight iq4_nl +blk.21.attn_v.weight q5_0 +blk.21.ffn_down_exps.weight iq4_nl +blk.21.ffn_gate_exps.weight iq4_nl +blk.21.ffn_up_exps.weight iq4_nl +blk.22.attn_k.weight iq4_nl blk.22.attn_output.weight q4_K -blk.22.attn_v.weight q4_K +blk.22.attn_q.weight iq4_nl +blk.22.attn_v.weight q5_0 +blk.22.ffn_down_exps.weight iq4_nl +blk.22.ffn_gate_exps.weight iq4_nl +blk.22.ffn_up_exps.weight iq4_nl +blk.23.attn_k.weight iq4_nl blk.23.attn_output.weight q4_K -blk.23.attn_v.weight q4_K +blk.23.attn_q.weight iq4_nl +blk.23.attn_v.weight q5_0 +blk.23.ffn_down_exps.weight iq4_nl +blk.23.ffn_gate_exps.weight iq4_nl +blk.23.ffn_up_exps.weight iq4_nl +blk.24.attn_k.weight iq4_nl blk.24.attn_output.weight q4_K -blk.24.attn_v.weight q4_K +blk.24.attn_q.weight iq4_nl +blk.24.attn_v.weight q5_0 +blk.24.ffn_down_exps.weight iq4_nl +blk.24.ffn_gate_exps.weight iq4_nl +blk.24.ffn_up_exps.weight iq4_nl +blk.25.attn_k.weight iq4_nl blk.25.attn_output.weight q4_K -blk.25.attn_v.weight q4_K +blk.25.attn_q.weight iq4_nl +blk.25.attn_v.weight q5_0 +blk.25.ffn_down_exps.weight iq4_nl +blk.25.ffn_gate_exps.weight iq4_nl +blk.25.ffn_up_exps.weight iq4_nl +blk.26.attn_k.weight iq4_nl blk.26.attn_output.weight q4_K -blk.26.attn_v.weight q4_K +blk.26.attn_q.weight iq4_nl +blk.26.attn_v.weight q5_0 +blk.26.ffn_down_exps.weight iq4_nl +blk.26.ffn_gate_exps.weight iq4_nl +blk.26.ffn_up_exps.weight iq4_nl +blk.27.attn_k.weight iq4_nl blk.27.attn_output.weight q4_K -blk.27.attn_v.weight q4_K +blk.27.attn_q.weight iq4_nl +blk.27.attn_v.weight q5_0 +blk.27.ffn_down_exps.weight iq4_nl +blk.27.ffn_gate_exps.weight iq4_nl +blk.27.ffn_up_exps.weight iq4_nl +blk.28.attn_k.weight iq4_nl blk.28.attn_output.weight q4_K -blk.28.attn_v.weight q4_K +blk.28.attn_q.weight iq4_nl +blk.28.attn_v.weight q5_0 +blk.28.ffn_down_exps.weight iq4_nl +blk.28.ffn_gate_exps.weight iq4_nl +blk.28.ffn_up_exps.weight iq4_nl +blk.29.attn_k.weight iq4_nl blk.29.attn_output.weight q4_K -blk.29.attn_v.weight q4_K +blk.29.attn_q.weight iq4_nl +blk.29.attn_v.weight q5_0 +blk.29.ffn_down_exps.weight iq4_nl +blk.29.ffn_gate_exps.weight iq4_nl +blk.29.ffn_up_exps.weight iq4_nl +blk.30.attn_k.weight iq4_nl blk.30.attn_output.weight q4_K -blk.30.attn_v.weight q4_K +blk.30.attn_q.weight iq4_nl +blk.30.attn_v.weight q5_0 +blk.30.ffn_down_exps.weight iq4_nl +blk.30.ffn_gate_exps.weight iq4_nl +blk.30.ffn_up_exps.weight iq4_nl +blk.31.attn_k.weight iq4_nl blk.31.attn_output.weight q4_K -blk.31.attn_v.weight q4_K +blk.31.attn_q.weight iq4_nl +blk.31.attn_v.weight q5_0 +blk.31.ffn_down_exps.weight iq4_nl +blk.31.ffn_gate_exps.weight iq4_nl +blk.31.ffn_up_exps.weight iq4_nl +blk.32.attn_k.weight iq4_nl blk.32.attn_output.weight q4_K -blk.32.attn_v.weight q4_K +blk.32.attn_q.weight iq4_nl +blk.32.attn_v.weight q5_0 +blk.32.ffn_down_exps.weight iq4_nl +blk.32.ffn_gate_exps.weight iq4_nl +blk.32.ffn_up_exps.weight iq4_nl +blk.33.attn_k.weight iq4_nl blk.33.attn_output.weight q4_K -blk.33.attn_v.weight q4_K +blk.33.attn_q.weight iq4_nl +blk.33.attn_v.weight q5_0 +blk.33.ffn_down_exps.weight iq4_nl +blk.33.ffn_gate_exps.weight iq4_nl +blk.33.ffn_up_exps.weight iq4_nl +blk.34.attn_k.weight iq4_nl blk.34.attn_output.weight q4_K -blk.34.attn_v.weight q4_K +blk.34.attn_q.weight iq4_nl +blk.34.attn_v.weight q5_0 +blk.34.ffn_down_exps.weight iq4_nl +blk.34.ffn_gate_exps.weight iq4_nl +blk.34.ffn_up_exps.weight iq4_nl +blk.35.attn_k.weight iq4_nl blk.35.attn_output.weight q4_K -blk.35.attn_v.weight q4_K +blk.35.attn_q.weight iq4_nl +blk.35.attn_v.weight q5_0 +blk.35.ffn_down_exps.weight iq4_nl +blk.35.ffn_gate_exps.weight iq4_nl +blk.35.ffn_up_exps.weight iq4_nl [IQ2_S] iq2_xs output.weight q8_0 -token_embd.weight iq3_s +token_embd.weight iq4_nl +blk.0.attn_k.weight iq4_nl blk.0.attn_output.weight iq3_s -blk.0.attn_v.weight q4_K -blk.0.ffn_down_exps.weight iq3_s +blk.0.attn_q.weight iq4_nl +blk.0.attn_v.weight q5_0 +blk.0.ffn_down_exps.weight iq4_nl +blk.0.ffn_gate_exps.weight iq4_nl +blk.0.ffn_up_exps.weight iq4_nl +blk.1.attn_k.weight iq4_nl blk.1.attn_output.weight iq3_s -blk.1.attn_v.weight q4_K -blk.1.ffn_down_exps.weight iq3_s +blk.1.attn_q.weight iq4_nl +blk.1.attn_v.weight q5_0 +blk.1.ffn_down_exps.weight iq4_nl +blk.1.ffn_gate_exps.weight iq4_nl +blk.1.ffn_up_exps.weight iq4_nl +blk.2.attn_k.weight iq4_nl blk.2.attn_output.weight iq3_s -blk.2.attn_v.weight q4_K -blk.2.ffn_down_exps.weight iq3_s +blk.2.attn_q.weight iq4_nl +blk.2.attn_v.weight q5_0 +blk.2.ffn_down_exps.weight iq4_nl +blk.2.ffn_gate_exps.weight iq4_nl +blk.2.ffn_up_exps.weight iq4_nl +blk.3.attn_k.weight iq4_nl blk.3.attn_output.weight iq3_s -blk.3.attn_v.weight q4_K -blk.3.ffn_down_exps.weight iq3_s +blk.3.attn_q.weight iq4_nl +blk.3.attn_v.weight q5_0 +blk.3.ffn_down_exps.weight iq4_nl +blk.3.ffn_gate_exps.weight iq4_nl +blk.3.ffn_up_exps.weight iq4_nl +blk.4.attn_k.weight iq4_nl blk.4.attn_output.weight iq3_s -blk.4.attn_v.weight q4_K +blk.4.attn_q.weight iq4_nl +blk.4.attn_v.weight q5_0 +blk.4.ffn_down_exps.weight iq4_nl +blk.4.ffn_gate_exps.weight iq4_nl +blk.4.ffn_up_exps.weight iq4_nl +blk.5.attn_k.weight iq4_nl blk.5.attn_output.weight iq3_s -blk.5.attn_v.weight q4_K +blk.5.attn_q.weight iq4_nl +blk.5.attn_v.weight q5_0 +blk.5.ffn_down_exps.weight iq4_nl +blk.5.ffn_gate_exps.weight iq4_nl +blk.5.ffn_up_exps.weight iq4_nl +blk.6.attn_k.weight iq4_nl blk.6.attn_output.weight iq3_s -blk.6.attn_v.weight q4_K +blk.6.attn_q.weight iq4_nl +blk.6.attn_v.weight q5_0 +blk.6.ffn_down_exps.weight iq4_nl +blk.6.ffn_gate_exps.weight iq4_nl +blk.6.ffn_up_exps.weight iq4_nl +blk.7.attn_k.weight iq4_nl blk.7.attn_output.weight iq3_s -blk.7.attn_v.weight q4_K +blk.7.attn_q.weight iq4_nl +blk.7.attn_v.weight q5_0 +blk.7.ffn_down_exps.weight iq4_nl +blk.7.ffn_gate_exps.weight iq4_nl +blk.7.ffn_up_exps.weight iq4_nl +blk.8.attn_k.weight iq4_nl blk.8.attn_output.weight iq3_s -blk.8.attn_v.weight q4_K +blk.8.attn_q.weight iq4_nl +blk.8.attn_v.weight q5_0 +blk.8.ffn_down_exps.weight iq4_nl +blk.8.ffn_gate_exps.weight iq4_nl +blk.8.ffn_up_exps.weight iq4_nl +blk.9.attn_k.weight iq4_nl blk.9.attn_output.weight iq3_s -blk.9.attn_v.weight q4_K +blk.9.attn_q.weight iq4_nl +blk.9.attn_v.weight q5_0 +blk.9.ffn_down_exps.weight iq4_nl +blk.9.ffn_gate_exps.weight iq4_nl +blk.9.ffn_up_exps.weight iq4_nl +blk.10.attn_k.weight iq4_nl blk.10.attn_output.weight iq3_s -blk.10.attn_v.weight q4_K +blk.10.attn_q.weight iq4_nl +blk.10.attn_v.weight q5_0 +blk.10.ffn_down_exps.weight iq4_nl +blk.10.ffn_gate_exps.weight iq4_nl +blk.10.ffn_up_exps.weight iq4_nl +blk.11.attn_k.weight iq4_nl blk.11.attn_output.weight iq3_s -blk.11.attn_v.weight q4_K +blk.11.attn_q.weight iq4_nl +blk.11.attn_v.weight q5_0 +blk.11.ffn_down_exps.weight iq4_nl +blk.11.ffn_gate_exps.weight iq4_nl +blk.11.ffn_up_exps.weight iq4_nl +blk.12.attn_k.weight iq4_nl blk.12.attn_output.weight iq3_s -blk.12.attn_v.weight q4_K +blk.12.attn_q.weight iq4_nl +blk.12.attn_v.weight q5_0 +blk.12.ffn_down_exps.weight iq4_nl +blk.12.ffn_gate_exps.weight iq4_nl +blk.12.ffn_up_exps.weight iq4_nl +blk.13.attn_k.weight iq4_nl blk.13.attn_output.weight iq3_s -blk.13.attn_v.weight q4_K +blk.13.attn_q.weight iq4_nl +blk.13.attn_v.weight q5_0 +blk.13.ffn_down_exps.weight iq4_nl +blk.13.ffn_gate_exps.weight iq4_nl +blk.13.ffn_up_exps.weight iq4_nl +blk.14.attn_k.weight iq4_nl blk.14.attn_output.weight iq3_s -blk.14.attn_v.weight q4_K +blk.14.attn_q.weight iq4_nl +blk.14.attn_v.weight q5_0 +blk.14.ffn_down_exps.weight iq4_nl +blk.14.ffn_gate_exps.weight iq4_nl +blk.14.ffn_up_exps.weight iq4_nl +blk.15.attn_k.weight iq4_nl blk.15.attn_output.weight iq3_s -blk.15.attn_v.weight q4_K +blk.15.attn_q.weight iq4_nl +blk.15.attn_v.weight q5_0 +blk.15.ffn_down_exps.weight iq4_nl +blk.15.ffn_gate_exps.weight iq4_nl +blk.15.ffn_up_exps.weight iq4_nl +blk.16.attn_k.weight iq4_nl blk.16.attn_output.weight iq3_s -blk.16.attn_v.weight q4_K +blk.16.attn_q.weight iq4_nl +blk.16.attn_v.weight q5_0 +blk.16.ffn_down_exps.weight iq4_nl +blk.16.ffn_gate_exps.weight iq4_nl +blk.16.ffn_up_exps.weight iq4_nl +blk.17.attn_k.weight iq4_nl blk.17.attn_output.weight iq3_s -blk.17.attn_v.weight q4_K +blk.17.attn_q.weight iq4_nl +blk.17.attn_v.weight q5_0 +blk.17.ffn_down_exps.weight iq4_nl +blk.17.ffn_gate_exps.weight iq4_nl +blk.17.ffn_up_exps.weight iq4_nl +blk.18.attn_k.weight iq4_nl blk.18.attn_output.weight iq3_s -blk.18.attn_v.weight q4_K +blk.18.attn_q.weight iq4_nl +blk.18.attn_v.weight q5_0 +blk.18.ffn_down_exps.weight iq4_nl +blk.18.ffn_gate_exps.weight iq4_nl +blk.18.ffn_up_exps.weight iq4_nl +blk.19.attn_k.weight iq4_nl blk.19.attn_output.weight iq3_s -blk.19.attn_v.weight q4_K +blk.19.attn_q.weight iq4_nl +blk.19.attn_v.weight q5_0 +blk.19.ffn_down_exps.weight iq4_nl +blk.19.ffn_gate_exps.weight iq4_nl +blk.19.ffn_up_exps.weight iq4_nl +blk.20.attn_k.weight iq4_nl blk.20.attn_output.weight iq3_s -blk.20.attn_v.weight q4_K +blk.20.attn_q.weight iq4_nl +blk.20.attn_v.weight q5_0 +blk.20.ffn_down_exps.weight iq4_nl +blk.20.ffn_gate_exps.weight iq4_nl +blk.20.ffn_up_exps.weight iq4_nl +blk.21.attn_k.weight iq4_nl blk.21.attn_output.weight iq3_s -blk.21.attn_v.weight q4_K +blk.21.attn_q.weight iq4_nl +blk.21.attn_v.weight q5_0 +blk.21.ffn_down_exps.weight iq4_nl +blk.21.ffn_gate_exps.weight iq4_nl +blk.21.ffn_up_exps.weight iq4_nl +blk.22.attn_k.weight iq4_nl blk.22.attn_output.weight iq3_s -blk.22.attn_v.weight q4_K +blk.22.attn_q.weight iq4_nl +blk.22.attn_v.weight q5_0 +blk.22.ffn_down_exps.weight iq4_nl +blk.22.ffn_gate_exps.weight iq4_nl +blk.22.ffn_up_exps.weight iq4_nl +blk.23.attn_k.weight iq4_nl blk.23.attn_output.weight iq3_s -blk.23.attn_v.weight q4_K +blk.23.attn_q.weight iq4_nl +blk.23.attn_v.weight q5_0 +blk.23.ffn_down_exps.weight iq4_nl +blk.23.ffn_gate_exps.weight iq4_nl +blk.23.ffn_up_exps.weight iq4_nl +blk.24.attn_k.weight iq4_nl blk.24.attn_output.weight iq3_s -blk.24.attn_v.weight q4_K +blk.24.attn_q.weight iq4_nl +blk.24.attn_v.weight q5_0 +blk.24.ffn_down_exps.weight iq4_nl +blk.24.ffn_gate_exps.weight iq4_nl +blk.24.ffn_up_exps.weight iq4_nl +blk.25.attn_k.weight iq4_nl blk.25.attn_output.weight iq3_s -blk.25.attn_v.weight q4_K +blk.25.attn_q.weight iq4_nl +blk.25.attn_v.weight q5_0 +blk.25.ffn_down_exps.weight iq4_nl +blk.25.ffn_gate_exps.weight iq4_nl +blk.25.ffn_up_exps.weight iq4_nl +blk.26.attn_k.weight iq4_nl blk.26.attn_output.weight iq3_s -blk.26.attn_v.weight q4_K +blk.26.attn_q.weight iq4_nl +blk.26.attn_v.weight q5_0 +blk.26.ffn_down_exps.weight iq4_nl +blk.26.ffn_gate_exps.weight iq4_nl +blk.26.ffn_up_exps.weight iq4_nl +blk.27.attn_k.weight iq4_nl blk.27.attn_output.weight iq3_s -blk.27.attn_v.weight q4_K +blk.27.attn_q.weight iq4_nl +blk.27.attn_v.weight q5_0 +blk.27.ffn_down_exps.weight iq4_nl +blk.27.ffn_gate_exps.weight iq4_nl +blk.27.ffn_up_exps.weight iq4_nl +blk.28.attn_k.weight iq4_nl blk.28.attn_output.weight iq3_s -blk.28.attn_v.weight q4_K +blk.28.attn_q.weight iq4_nl +blk.28.attn_v.weight q5_0 +blk.28.ffn_down_exps.weight iq4_nl +blk.28.ffn_gate_exps.weight iq4_nl +blk.28.ffn_up_exps.weight iq4_nl +blk.29.attn_k.weight iq4_nl blk.29.attn_output.weight iq3_s -blk.29.attn_v.weight q4_K +blk.29.attn_q.weight iq4_nl +blk.29.attn_v.weight q5_0 +blk.29.ffn_down_exps.weight iq4_nl +blk.29.ffn_gate_exps.weight iq4_nl +blk.29.ffn_up_exps.weight iq4_nl +blk.30.attn_k.weight iq4_nl blk.30.attn_output.weight iq3_s -blk.30.attn_v.weight q4_K +blk.30.attn_q.weight iq4_nl +blk.30.attn_v.weight q5_0 +blk.30.ffn_down_exps.weight iq4_nl +blk.30.ffn_gate_exps.weight iq4_nl +blk.30.ffn_up_exps.weight iq4_nl +blk.31.attn_k.weight iq4_nl blk.31.attn_output.weight iq3_s -blk.31.attn_v.weight q4_K +blk.31.attn_q.weight iq4_nl +blk.31.attn_v.weight q5_0 +blk.31.ffn_down_exps.weight iq4_nl +blk.31.ffn_gate_exps.weight iq4_nl +blk.31.ffn_up_exps.weight iq4_nl +blk.32.attn_k.weight iq4_nl blk.32.attn_output.weight iq3_s -blk.32.attn_v.weight q4_K +blk.32.attn_q.weight iq4_nl +blk.32.attn_v.weight q5_0 +blk.32.ffn_down_exps.weight iq4_nl +blk.32.ffn_gate_exps.weight iq4_nl +blk.32.ffn_up_exps.weight iq4_nl +blk.33.attn_k.weight iq4_nl blk.33.attn_output.weight iq3_s -blk.33.attn_v.weight q4_K +blk.33.attn_q.weight iq4_nl +blk.33.attn_v.weight q5_0 +blk.33.ffn_down_exps.weight iq4_nl +blk.33.ffn_gate_exps.weight iq4_nl +blk.33.ffn_up_exps.weight iq4_nl +blk.34.attn_k.weight iq4_nl blk.34.attn_output.weight iq3_s -blk.34.attn_v.weight q4_K +blk.34.attn_q.weight iq4_nl +blk.34.attn_v.weight q5_0 +blk.34.ffn_down_exps.weight iq4_nl +blk.34.ffn_gate_exps.weight iq4_nl +blk.34.ffn_up_exps.weight iq4_nl +blk.35.attn_k.weight iq4_nl blk.35.attn_output.weight iq3_s -blk.35.attn_v.weight q4_K +blk.35.attn_q.weight iq4_nl +blk.35.attn_v.weight q5_0 +blk.35.ffn_down_exps.weight iq4_nl +blk.35.ffn_gate_exps.weight iq4_nl +blk.35.ffn_up_exps.weight iq4_nl [IQ2_M] iq2_s output.weight q8_0 -token_embd.weight iq3_s +token_embd.weight iq4_nl +blk.0.attn_k.weight iq4_nl blk.0.attn_output.weight iq3_s -blk.0.attn_v.weight q4_K -blk.0.ffn_down_exps.weight iq3_s +blk.0.attn_q.weight iq4_nl +blk.0.attn_v.weight q5_0 +blk.0.ffn_down_exps.weight iq4_nl +blk.0.ffn_gate_exps.weight iq4_nl +blk.0.ffn_up_exps.weight iq4_nl +blk.1.attn_k.weight iq4_nl blk.1.attn_output.weight iq3_s -blk.1.attn_v.weight q4_K -blk.1.ffn_down_exps.weight iq3_s +blk.1.attn_q.weight iq4_nl +blk.1.attn_v.weight q5_0 +blk.1.ffn_down_exps.weight iq4_nl +blk.1.ffn_gate_exps.weight iq4_nl +blk.1.ffn_up_exps.weight iq4_nl +blk.2.attn_k.weight iq4_nl blk.2.attn_output.weight iq3_s -blk.2.attn_v.weight q4_K -blk.2.ffn_down_exps.weight iq3_s +blk.2.attn_q.weight iq4_nl +blk.2.attn_v.weight q5_0 +blk.2.ffn_down_exps.weight iq4_nl +blk.2.ffn_gate_exps.weight iq4_nl +blk.2.ffn_up_exps.weight iq4_nl +blk.3.attn_k.weight iq4_nl blk.3.attn_output.weight iq3_s -blk.3.attn_v.weight q4_K -blk.3.ffn_down_exps.weight iq3_s +blk.3.attn_q.weight iq4_nl +blk.3.attn_v.weight q5_0 +blk.3.ffn_down_exps.weight iq4_nl +blk.3.ffn_gate_exps.weight iq4_nl +blk.3.ffn_up_exps.weight iq4_nl +blk.4.attn_k.weight iq4_nl blk.4.attn_output.weight iq3_s -blk.4.attn_v.weight q4_K +blk.4.attn_q.weight iq4_nl +blk.4.attn_v.weight q5_0 +blk.4.ffn_down_exps.weight iq4_nl +blk.4.ffn_gate_exps.weight iq4_nl +blk.4.ffn_up_exps.weight iq4_nl +blk.5.attn_k.weight iq4_nl blk.5.attn_output.weight iq3_s -blk.5.attn_v.weight q4_K +blk.5.attn_q.weight iq4_nl +blk.5.attn_v.weight q5_0 +blk.5.ffn_down_exps.weight iq4_nl +blk.5.ffn_gate_exps.weight iq4_nl +blk.5.ffn_up_exps.weight iq4_nl +blk.6.attn_k.weight iq4_nl blk.6.attn_output.weight iq3_s -blk.6.attn_v.weight q4_K +blk.6.attn_q.weight iq4_nl +blk.6.attn_v.weight q5_0 +blk.6.ffn_down_exps.weight iq4_nl +blk.6.ffn_gate_exps.weight iq4_nl +blk.6.ffn_up_exps.weight iq4_nl +blk.7.attn_k.weight iq4_nl blk.7.attn_output.weight iq3_s -blk.7.attn_v.weight q4_K +blk.7.attn_q.weight iq4_nl +blk.7.attn_v.weight q5_0 +blk.7.ffn_down_exps.weight iq4_nl +blk.7.ffn_gate_exps.weight iq4_nl +blk.7.ffn_up_exps.weight iq4_nl +blk.8.attn_k.weight iq4_nl blk.8.attn_output.weight iq3_s -blk.8.attn_v.weight q4_K +blk.8.attn_q.weight iq4_nl +blk.8.attn_v.weight q5_0 +blk.8.ffn_down_exps.weight iq4_nl +blk.8.ffn_gate_exps.weight iq4_nl +blk.8.ffn_up_exps.weight iq4_nl +blk.9.attn_k.weight iq4_nl blk.9.attn_output.weight iq3_s -blk.9.attn_v.weight q4_K +blk.9.attn_q.weight iq4_nl +blk.9.attn_v.weight q5_0 +blk.9.ffn_down_exps.weight iq4_nl +blk.9.ffn_gate_exps.weight iq4_nl +blk.9.ffn_up_exps.weight iq4_nl +blk.10.attn_k.weight iq4_nl blk.10.attn_output.weight iq3_s -blk.10.attn_v.weight q4_K +blk.10.attn_q.weight iq4_nl +blk.10.attn_v.weight q5_0 +blk.10.ffn_down_exps.weight iq4_nl +blk.10.ffn_gate_exps.weight iq4_nl +blk.10.ffn_up_exps.weight iq4_nl +blk.11.attn_k.weight iq4_nl blk.11.attn_output.weight iq3_s -blk.11.attn_v.weight q4_K +blk.11.attn_q.weight iq4_nl +blk.11.attn_v.weight q5_0 +blk.11.ffn_down_exps.weight iq4_nl +blk.11.ffn_gate_exps.weight iq4_nl +blk.11.ffn_up_exps.weight iq4_nl +blk.12.attn_k.weight iq4_nl blk.12.attn_output.weight iq3_s -blk.12.attn_v.weight q4_K +blk.12.attn_q.weight iq4_nl +blk.12.attn_v.weight q5_0 +blk.12.ffn_down_exps.weight iq4_nl +blk.12.ffn_gate_exps.weight iq4_nl +blk.12.ffn_up_exps.weight iq4_nl +blk.13.attn_k.weight iq4_nl blk.13.attn_output.weight iq3_s -blk.13.attn_v.weight q4_K +blk.13.attn_q.weight iq4_nl +blk.13.attn_v.weight q5_0 +blk.13.ffn_down_exps.weight iq4_nl +blk.13.ffn_gate_exps.weight iq4_nl +blk.13.ffn_up_exps.weight iq4_nl +blk.14.attn_k.weight iq4_nl blk.14.attn_output.weight iq3_s -blk.14.attn_v.weight q4_K +blk.14.attn_q.weight iq4_nl +blk.14.attn_v.weight q5_0 +blk.14.ffn_down_exps.weight iq4_nl +blk.14.ffn_gate_exps.weight iq4_nl +blk.14.ffn_up_exps.weight iq4_nl +blk.15.attn_k.weight iq4_nl blk.15.attn_output.weight iq3_s -blk.15.attn_v.weight q4_K +blk.15.attn_q.weight iq4_nl +blk.15.attn_v.weight q5_0 +blk.15.ffn_down_exps.weight iq4_nl +blk.15.ffn_gate_exps.weight iq4_nl +blk.15.ffn_up_exps.weight iq4_nl +blk.16.attn_k.weight iq4_nl blk.16.attn_output.weight iq3_s -blk.16.attn_v.weight q4_K +blk.16.attn_q.weight iq4_nl +blk.16.attn_v.weight q5_0 +blk.16.ffn_down_exps.weight iq4_nl +blk.16.ffn_gate_exps.weight iq4_nl +blk.16.ffn_up_exps.weight iq4_nl +blk.17.attn_k.weight iq4_nl blk.17.attn_output.weight iq3_s -blk.17.attn_v.weight q4_K +blk.17.attn_q.weight iq4_nl +blk.17.attn_v.weight q5_0 +blk.17.ffn_down_exps.weight iq4_nl +blk.17.ffn_gate_exps.weight iq4_nl +blk.17.ffn_up_exps.weight iq4_nl +blk.18.attn_k.weight iq4_nl blk.18.attn_output.weight iq3_s -blk.18.attn_v.weight q4_K +blk.18.attn_q.weight iq4_nl +blk.18.attn_v.weight q5_0 +blk.18.ffn_down_exps.weight iq4_nl +blk.18.ffn_gate_exps.weight iq4_nl +blk.18.ffn_up_exps.weight iq4_nl +blk.19.attn_k.weight iq4_nl blk.19.attn_output.weight iq3_s -blk.19.attn_v.weight q4_K +blk.19.attn_q.weight iq4_nl +blk.19.attn_v.weight q5_0 +blk.19.ffn_down_exps.weight iq4_nl +blk.19.ffn_gate_exps.weight iq4_nl +blk.19.ffn_up_exps.weight iq4_nl +blk.20.attn_k.weight iq4_nl blk.20.attn_output.weight iq3_s -blk.20.attn_v.weight q4_K +blk.20.attn_q.weight iq4_nl +blk.20.attn_v.weight q5_0 +blk.20.ffn_down_exps.weight iq4_nl +blk.20.ffn_gate_exps.weight iq4_nl +blk.20.ffn_up_exps.weight iq4_nl +blk.21.attn_k.weight iq4_nl blk.21.attn_output.weight iq3_s -blk.21.attn_v.weight q4_K +blk.21.attn_q.weight iq4_nl +blk.21.attn_v.weight q5_0 +blk.21.ffn_down_exps.weight iq4_nl +blk.21.ffn_gate_exps.weight iq4_nl +blk.21.ffn_up_exps.weight iq4_nl +blk.22.attn_k.weight iq4_nl blk.22.attn_output.weight iq3_s -blk.22.attn_v.weight q4_K +blk.22.attn_q.weight iq4_nl +blk.22.attn_v.weight q5_0 +blk.22.ffn_down_exps.weight iq4_nl +blk.22.ffn_gate_exps.weight iq4_nl +blk.22.ffn_up_exps.weight iq4_nl +blk.23.attn_k.weight iq4_nl blk.23.attn_output.weight iq3_s -blk.23.attn_v.weight q4_K +blk.23.attn_q.weight iq4_nl +blk.23.attn_v.weight q5_0 +blk.23.ffn_down_exps.weight iq4_nl +blk.23.ffn_gate_exps.weight iq4_nl +blk.23.ffn_up_exps.weight iq4_nl +blk.24.attn_k.weight iq4_nl blk.24.attn_output.weight iq3_s -blk.24.attn_v.weight q4_K +blk.24.attn_q.weight iq4_nl +blk.24.attn_v.weight q5_0 +blk.24.ffn_down_exps.weight iq4_nl +blk.24.ffn_gate_exps.weight iq4_nl +blk.24.ffn_up_exps.weight iq4_nl +blk.25.attn_k.weight iq4_nl blk.25.attn_output.weight iq3_s -blk.25.attn_v.weight q4_K +blk.25.attn_q.weight iq4_nl +blk.25.attn_v.weight q5_0 +blk.25.ffn_down_exps.weight iq4_nl +blk.25.ffn_gate_exps.weight iq4_nl +blk.25.ffn_up_exps.weight iq4_nl +blk.26.attn_k.weight iq4_nl blk.26.attn_output.weight iq3_s -blk.26.attn_v.weight q4_K +blk.26.attn_q.weight iq4_nl +blk.26.attn_v.weight q5_0 +blk.26.ffn_down_exps.weight iq4_nl +blk.26.ffn_gate_exps.weight iq4_nl +blk.26.ffn_up_exps.weight iq4_nl +blk.27.attn_k.weight iq4_nl blk.27.attn_output.weight iq3_s -blk.27.attn_v.weight q4_K +blk.27.attn_q.weight iq4_nl +blk.27.attn_v.weight q5_0 +blk.27.ffn_down_exps.weight iq4_nl +blk.27.ffn_gate_exps.weight iq4_nl +blk.27.ffn_up_exps.weight iq4_nl +blk.28.attn_k.weight iq4_nl blk.28.attn_output.weight iq3_s -blk.28.attn_v.weight q4_K +blk.28.attn_q.weight iq4_nl +blk.28.attn_v.weight q5_0 +blk.28.ffn_down_exps.weight iq4_nl +blk.28.ffn_gate_exps.weight iq4_nl +blk.28.ffn_up_exps.weight iq4_nl +blk.29.attn_k.weight iq4_nl blk.29.attn_output.weight iq3_s -blk.29.attn_v.weight q4_K +blk.29.attn_q.weight iq4_nl +blk.29.attn_v.weight q5_0 +blk.29.ffn_down_exps.weight iq4_nl +blk.29.ffn_gate_exps.weight iq4_nl +blk.29.ffn_up_exps.weight iq4_nl +blk.30.attn_k.weight iq4_nl blk.30.attn_output.weight iq3_s -blk.30.attn_v.weight q4_K +blk.30.attn_q.weight iq4_nl +blk.30.attn_v.weight q5_0 +blk.30.ffn_down_exps.weight iq4_nl +blk.30.ffn_gate_exps.weight iq4_nl +blk.30.ffn_up_exps.weight iq4_nl +blk.31.attn_k.weight iq4_nl blk.31.attn_output.weight iq3_s -blk.31.attn_v.weight q4_K +blk.31.attn_q.weight iq4_nl +blk.31.attn_v.weight q5_0 +blk.31.ffn_down_exps.weight iq4_nl +blk.31.ffn_gate_exps.weight iq4_nl +blk.31.ffn_up_exps.weight iq4_nl +blk.32.attn_k.weight iq4_nl blk.32.attn_output.weight iq3_s -blk.32.attn_v.weight q4_K +blk.32.attn_q.weight iq4_nl +blk.32.attn_v.weight q5_0 +blk.32.ffn_down_exps.weight iq4_nl +blk.32.ffn_gate_exps.weight iq4_nl +blk.32.ffn_up_exps.weight iq4_nl +blk.33.attn_k.weight iq4_nl blk.33.attn_output.weight iq3_s -blk.33.attn_v.weight q4_K +blk.33.attn_q.weight iq4_nl +blk.33.attn_v.weight q5_0 +blk.33.ffn_down_exps.weight iq4_nl +blk.33.ffn_gate_exps.weight iq4_nl +blk.33.ffn_up_exps.weight iq4_nl +blk.34.attn_k.weight iq4_nl blk.34.attn_output.weight iq3_s -blk.34.attn_v.weight q4_K +blk.34.attn_q.weight iq4_nl +blk.34.attn_v.weight q5_0 +blk.34.ffn_down_exps.weight iq4_nl +blk.34.ffn_gate_exps.weight iq4_nl +blk.34.ffn_up_exps.weight iq4_nl +blk.35.attn_k.weight iq4_nl blk.35.attn_output.weight iq3_s -blk.35.attn_v.weight q4_K +blk.35.attn_q.weight iq4_nl +blk.35.attn_v.weight q5_0 +blk.35.ffn_down_exps.weight iq4_nl +blk.35.ffn_gate_exps.weight iq4_nl +blk.35.ffn_up_exps.weight iq4_nl [IQ4_XS] iq4_xs output.weight q8_0 -blk.0.attn_v.weight q5_K -blk.0.ffn_down_exps.weight q5_K -blk.1.attn_v.weight q5_K -blk.1.ffn_down_exps.weight q5_K -blk.2.attn_v.weight q5_K -blk.2.ffn_down_exps.weight q5_K -blk.3.attn_v.weight q5_K -blk.3.ffn_down_exps.weight q5_K -blk.4.attn_v.weight q5_K -blk.5.attn_v.weight q5_K -blk.6.attn_v.weight q5_K -blk.7.attn_v.weight q5_K -blk.8.attn_v.weight q5_K -blk.9.attn_v.weight q5_K -blk.10.attn_v.weight q5_K -blk.11.attn_v.weight q5_K -blk.12.attn_v.weight q5_K -blk.13.attn_v.weight q5_K -blk.14.attn_v.weight q5_K -blk.15.attn_v.weight q5_K -blk.16.attn_v.weight q5_K -blk.17.attn_v.weight q5_K -blk.18.attn_v.weight q5_K -blk.19.attn_v.weight q5_K -blk.20.attn_v.weight q5_K -blk.21.attn_v.weight q5_K -blk.22.attn_v.weight q5_K -blk.23.attn_v.weight q5_K -blk.24.attn_v.weight q5_K -blk.25.attn_v.weight q5_K -blk.26.attn_v.weight q5_K -blk.27.attn_v.weight q5_K -blk.28.attn_v.weight q5_K -blk.29.attn_v.weight q5_K -blk.30.attn_v.weight q5_K -blk.31.attn_v.weight q5_K -blk.32.attn_v.weight q5_K -blk.33.attn_v.weight q5_K -blk.34.attn_v.weight q5_K -blk.35.attn_v.weight q5_K +token_embd.weight iq4_nl +blk.0.attn_k.weight iq4_nl +blk.0.attn_q.weight iq4_nl +blk.0.attn_v.weight q5_1 +blk.0.ffn_down_exps.weight q5_1 +blk.0.ffn_gate_exps.weight iq4_nl +blk.0.ffn_up_exps.weight iq4_nl +blk.1.attn_k.weight iq4_nl +blk.1.attn_q.weight iq4_nl +blk.1.attn_v.weight q5_1 +blk.1.ffn_down_exps.weight q5_1 +blk.1.ffn_gate_exps.weight iq4_nl +blk.1.ffn_up_exps.weight iq4_nl +blk.2.attn_k.weight iq4_nl +blk.2.attn_q.weight iq4_nl +blk.2.attn_v.weight q5_1 +blk.2.ffn_down_exps.weight q5_1 +blk.2.ffn_gate_exps.weight iq4_nl +blk.2.ffn_up_exps.weight iq4_nl +blk.3.attn_k.weight iq4_nl +blk.3.attn_q.weight iq4_nl +blk.3.attn_v.weight q5_1 +blk.3.ffn_down_exps.weight q5_1 +blk.3.ffn_gate_exps.weight iq4_nl +blk.3.ffn_up_exps.weight iq4_nl +blk.4.attn_k.weight iq4_nl +blk.4.attn_q.weight iq4_nl +blk.4.attn_v.weight q5_1 +blk.4.ffn_down_exps.weight iq4_nl +blk.4.ffn_gate_exps.weight iq4_nl +blk.4.ffn_up_exps.weight iq4_nl +blk.5.attn_k.weight iq4_nl +blk.5.attn_q.weight iq4_nl +blk.5.attn_v.weight q5_1 +blk.5.ffn_down_exps.weight iq4_nl +blk.5.ffn_gate_exps.weight iq4_nl +blk.5.ffn_up_exps.weight iq4_nl +blk.6.attn_k.weight iq4_nl +blk.6.attn_q.weight iq4_nl +blk.6.attn_v.weight q5_1 +blk.6.ffn_down_exps.weight iq4_nl +blk.6.ffn_gate_exps.weight iq4_nl +blk.6.ffn_up_exps.weight iq4_nl +blk.7.attn_k.weight iq4_nl +blk.7.attn_q.weight iq4_nl +blk.7.attn_v.weight q5_1 +blk.7.ffn_down_exps.weight iq4_nl +blk.7.ffn_gate_exps.weight iq4_nl +blk.7.ffn_up_exps.weight iq4_nl +blk.8.attn_k.weight iq4_nl +blk.8.attn_q.weight iq4_nl +blk.8.attn_v.weight q5_1 +blk.8.ffn_down_exps.weight iq4_nl +blk.8.ffn_gate_exps.weight iq4_nl +blk.8.ffn_up_exps.weight iq4_nl +blk.9.attn_k.weight iq4_nl +blk.9.attn_q.weight iq4_nl +blk.9.attn_v.weight q5_1 +blk.9.ffn_down_exps.weight iq4_nl +blk.9.ffn_gate_exps.weight iq4_nl +blk.9.ffn_up_exps.weight iq4_nl +blk.10.attn_k.weight iq4_nl +blk.10.attn_q.weight iq4_nl +blk.10.attn_v.weight q5_1 +blk.10.ffn_down_exps.weight iq4_nl +blk.10.ffn_gate_exps.weight iq4_nl +blk.10.ffn_up_exps.weight iq4_nl +blk.11.attn_k.weight iq4_nl +blk.11.attn_q.weight iq4_nl +blk.11.attn_v.weight q5_1 +blk.11.ffn_down_exps.weight iq4_nl +blk.11.ffn_gate_exps.weight iq4_nl +blk.11.ffn_up_exps.weight iq4_nl +blk.12.attn_k.weight iq4_nl +blk.12.attn_q.weight iq4_nl +blk.12.attn_v.weight q5_1 +blk.12.ffn_down_exps.weight iq4_nl +blk.12.ffn_gate_exps.weight iq4_nl +blk.12.ffn_up_exps.weight iq4_nl +blk.13.attn_k.weight iq4_nl +blk.13.attn_q.weight iq4_nl +blk.13.attn_v.weight q5_1 +blk.13.ffn_down_exps.weight iq4_nl +blk.13.ffn_gate_exps.weight iq4_nl +blk.13.ffn_up_exps.weight iq4_nl +blk.14.attn_k.weight iq4_nl +blk.14.attn_q.weight iq4_nl +blk.14.attn_v.weight q5_1 +blk.14.ffn_down_exps.weight iq4_nl +blk.14.ffn_gate_exps.weight iq4_nl +blk.14.ffn_up_exps.weight iq4_nl +blk.15.attn_k.weight iq4_nl +blk.15.attn_q.weight iq4_nl +blk.15.attn_v.weight q5_1 +blk.15.ffn_down_exps.weight iq4_nl +blk.15.ffn_gate_exps.weight iq4_nl +blk.15.ffn_up_exps.weight iq4_nl +blk.16.attn_k.weight iq4_nl +blk.16.attn_q.weight iq4_nl +blk.16.attn_v.weight q5_1 +blk.16.ffn_down_exps.weight iq4_nl +blk.16.ffn_gate_exps.weight iq4_nl +blk.16.ffn_up_exps.weight iq4_nl +blk.17.attn_k.weight iq4_nl +blk.17.attn_q.weight iq4_nl +blk.17.attn_v.weight q5_1 +blk.17.ffn_down_exps.weight iq4_nl +blk.17.ffn_gate_exps.weight iq4_nl +blk.17.ffn_up_exps.weight iq4_nl +blk.18.attn_k.weight iq4_nl +blk.18.attn_q.weight iq4_nl +blk.18.attn_v.weight q5_1 +blk.18.ffn_down_exps.weight iq4_nl +blk.18.ffn_gate_exps.weight iq4_nl +blk.18.ffn_up_exps.weight iq4_nl +blk.19.attn_k.weight iq4_nl +blk.19.attn_q.weight iq4_nl +blk.19.attn_v.weight q5_1 +blk.19.ffn_down_exps.weight iq4_nl +blk.19.ffn_gate_exps.weight iq4_nl +blk.19.ffn_up_exps.weight iq4_nl +blk.20.attn_k.weight iq4_nl +blk.20.attn_q.weight iq4_nl +blk.20.attn_v.weight q5_1 +blk.20.ffn_down_exps.weight iq4_nl +blk.20.ffn_gate_exps.weight iq4_nl +blk.20.ffn_up_exps.weight iq4_nl +blk.21.attn_k.weight iq4_nl +blk.21.attn_q.weight iq4_nl +blk.21.attn_v.weight q5_1 +blk.21.ffn_down_exps.weight iq4_nl +blk.21.ffn_gate_exps.weight iq4_nl +blk.21.ffn_up_exps.weight iq4_nl +blk.22.attn_k.weight iq4_nl +blk.22.attn_q.weight iq4_nl +blk.22.attn_v.weight q5_1 +blk.22.ffn_down_exps.weight iq4_nl +blk.22.ffn_gate_exps.weight iq4_nl +blk.22.ffn_up_exps.weight iq4_nl +blk.23.attn_k.weight iq4_nl +blk.23.attn_q.weight iq4_nl +blk.23.attn_v.weight q5_1 +blk.23.ffn_down_exps.weight iq4_nl +blk.23.ffn_gate_exps.weight iq4_nl +blk.23.ffn_up_exps.weight iq4_nl +blk.24.attn_k.weight iq4_nl +blk.24.attn_q.weight iq4_nl +blk.24.attn_v.weight q5_1 +blk.24.ffn_down_exps.weight iq4_nl +blk.24.ffn_gate_exps.weight iq4_nl +blk.24.ffn_up_exps.weight iq4_nl +blk.25.attn_k.weight iq4_nl +blk.25.attn_q.weight iq4_nl +blk.25.attn_v.weight q5_1 +blk.25.ffn_down_exps.weight iq4_nl +blk.25.ffn_gate_exps.weight iq4_nl +blk.25.ffn_up_exps.weight iq4_nl +blk.26.attn_k.weight iq4_nl +blk.26.attn_q.weight iq4_nl +blk.26.attn_v.weight q5_1 +blk.26.ffn_down_exps.weight iq4_nl +blk.26.ffn_gate_exps.weight iq4_nl +blk.26.ffn_up_exps.weight iq4_nl +blk.27.attn_k.weight iq4_nl +blk.27.attn_q.weight iq4_nl +blk.27.attn_v.weight q5_1 +blk.27.ffn_down_exps.weight iq4_nl +blk.27.ffn_gate_exps.weight iq4_nl +blk.27.ffn_up_exps.weight iq4_nl +blk.28.attn_k.weight iq4_nl +blk.28.attn_q.weight iq4_nl +blk.28.attn_v.weight q5_1 +blk.28.ffn_down_exps.weight iq4_nl +blk.28.ffn_gate_exps.weight iq4_nl +blk.28.ffn_up_exps.weight iq4_nl +blk.29.attn_k.weight iq4_nl +blk.29.attn_q.weight iq4_nl +blk.29.attn_v.weight q5_1 +blk.29.ffn_down_exps.weight iq4_nl +blk.29.ffn_gate_exps.weight iq4_nl +blk.29.ffn_up_exps.weight iq4_nl +blk.30.attn_k.weight iq4_nl +blk.30.attn_q.weight iq4_nl +blk.30.attn_v.weight q5_1 +blk.30.ffn_down_exps.weight iq4_nl +blk.30.ffn_gate_exps.weight iq4_nl +blk.30.ffn_up_exps.weight iq4_nl +blk.31.attn_k.weight iq4_nl +blk.31.attn_q.weight iq4_nl +blk.31.attn_v.weight q5_1 +blk.31.ffn_down_exps.weight iq4_nl +blk.31.ffn_gate_exps.weight iq4_nl +blk.31.ffn_up_exps.weight iq4_nl +blk.32.attn_k.weight iq4_nl +blk.32.attn_q.weight iq4_nl +blk.32.attn_v.weight q5_1 +blk.32.ffn_down_exps.weight iq4_nl +blk.32.ffn_gate_exps.weight iq4_nl +blk.32.ffn_up_exps.weight iq4_nl +blk.33.attn_k.weight iq4_nl +blk.33.attn_q.weight iq4_nl +blk.33.attn_v.weight q5_1 +blk.33.ffn_down_exps.weight iq4_nl +blk.33.ffn_gate_exps.weight iq4_nl +blk.33.ffn_up_exps.weight iq4_nl +blk.34.attn_k.weight iq4_nl +blk.34.attn_q.weight iq4_nl +blk.34.attn_v.weight q5_1 +blk.34.ffn_down_exps.weight iq4_nl +blk.34.ffn_gate_exps.weight iq4_nl +blk.34.ffn_up_exps.weight iq4_nl +blk.35.attn_k.weight iq4_nl +blk.35.attn_q.weight iq4_nl +blk.35.attn_v.weight q5_1 +blk.35.ffn_down_exps.weight iq4_nl +blk.35.ffn_gate_exps.weight iq4_nl +blk.35.ffn_up_exps.weight iq4_nl [IQ1_M] iq1_m output.weight q8_0 -token_embd.weight q2_K +token_embd.weight q4_0 +blk.0.attn_k.weight iq4_nl blk.0.attn_output.weight iq2_xxs -blk.0.attn_v.weight q4_K -blk.0.ffn_down_exps.weight q2_K +blk.0.attn_q.weight iq4_nl +blk.0.attn_v.weight q5_0 +blk.0.ffn_down_exps.weight q4_0 +blk.0.ffn_gate_exps.weight iq4_nl +blk.0.ffn_up_exps.weight iq4_nl +blk.1.attn_k.weight iq4_nl blk.1.attn_output.weight iq2_xxs -blk.1.attn_v.weight q4_K -blk.1.ffn_down_exps.weight q2_K +blk.1.attn_q.weight iq4_nl +blk.1.attn_v.weight q5_0 +blk.1.ffn_down_exps.weight q4_0 +blk.1.ffn_gate_exps.weight iq4_nl +blk.1.ffn_up_exps.weight iq4_nl +blk.2.attn_k.weight iq4_nl blk.2.attn_output.weight iq2_xxs -blk.2.attn_v.weight q4_K -blk.2.ffn_down_exps.weight q2_K +blk.2.attn_q.weight iq4_nl +blk.2.attn_v.weight q5_0 +blk.2.ffn_down_exps.weight q4_0 +blk.2.ffn_gate_exps.weight iq4_nl +blk.2.ffn_up_exps.weight iq4_nl +blk.3.attn_k.weight iq4_nl blk.3.attn_output.weight iq2_xxs -blk.3.attn_v.weight q4_K -blk.3.ffn_down_exps.weight q2_K +blk.3.attn_q.weight iq4_nl +blk.3.attn_v.weight q5_0 +blk.3.ffn_down_exps.weight q4_0 +blk.3.ffn_gate_exps.weight iq4_nl +blk.3.ffn_up_exps.weight iq4_nl +blk.4.attn_k.weight iq4_nl blk.4.attn_output.weight iq2_xxs -blk.4.attn_v.weight q4_K +blk.4.attn_q.weight iq4_nl +blk.4.attn_v.weight q5_0 +blk.4.ffn_down_exps.weight iq4_nl +blk.4.ffn_gate_exps.weight iq4_nl +blk.4.ffn_up_exps.weight iq4_nl +blk.5.attn_k.weight iq4_nl blk.5.attn_output.weight iq2_xxs -blk.5.attn_v.weight q4_K +blk.5.attn_q.weight iq4_nl +blk.5.attn_v.weight q5_0 +blk.5.ffn_down_exps.weight iq4_nl +blk.5.ffn_gate_exps.weight iq4_nl +blk.5.ffn_up_exps.weight iq4_nl +blk.6.attn_k.weight iq4_nl blk.6.attn_output.weight iq2_xxs -blk.6.attn_v.weight q4_K +blk.6.attn_q.weight iq4_nl +blk.6.attn_v.weight q5_0 +blk.6.ffn_down_exps.weight iq4_nl +blk.6.ffn_gate_exps.weight iq4_nl +blk.6.ffn_up_exps.weight iq4_nl +blk.7.attn_k.weight iq4_nl blk.7.attn_output.weight iq2_xxs -blk.7.attn_v.weight q4_K +blk.7.attn_q.weight iq4_nl +blk.7.attn_v.weight q5_0 +blk.7.ffn_down_exps.weight iq4_nl +blk.7.ffn_gate_exps.weight iq4_nl +blk.7.ffn_up_exps.weight iq4_nl +blk.8.attn_k.weight iq4_nl blk.8.attn_output.weight iq2_xxs -blk.8.attn_v.weight q4_K +blk.8.attn_q.weight iq4_nl +blk.8.attn_v.weight q5_0 +blk.8.ffn_down_exps.weight iq4_nl +blk.8.ffn_gate_exps.weight iq4_nl +blk.8.ffn_up_exps.weight iq4_nl +blk.9.attn_k.weight iq4_nl blk.9.attn_output.weight iq2_xxs -blk.9.attn_v.weight q4_K +blk.9.attn_q.weight iq4_nl +blk.9.attn_v.weight q5_0 +blk.9.ffn_down_exps.weight iq4_nl +blk.9.ffn_gate_exps.weight iq4_nl +blk.9.ffn_up_exps.weight iq4_nl +blk.10.attn_k.weight iq4_nl blk.10.attn_output.weight iq2_xxs -blk.10.attn_v.weight q4_K +blk.10.attn_q.weight iq4_nl +blk.10.attn_v.weight q5_0 +blk.10.ffn_down_exps.weight iq4_nl +blk.10.ffn_gate_exps.weight iq4_nl +blk.10.ffn_up_exps.weight iq4_nl +blk.11.attn_k.weight iq4_nl blk.11.attn_output.weight iq2_xxs -blk.11.attn_v.weight q4_K +blk.11.attn_q.weight iq4_nl +blk.11.attn_v.weight q5_0 +blk.11.ffn_down_exps.weight iq4_nl +blk.11.ffn_gate_exps.weight iq4_nl +blk.11.ffn_up_exps.weight iq4_nl +blk.12.attn_k.weight iq4_nl blk.12.attn_output.weight iq2_xxs -blk.12.attn_v.weight q4_K +blk.12.attn_q.weight iq4_nl +blk.12.attn_v.weight q5_0 +blk.12.ffn_down_exps.weight iq4_nl +blk.12.ffn_gate_exps.weight iq4_nl +blk.12.ffn_up_exps.weight iq4_nl +blk.13.attn_k.weight iq4_nl blk.13.attn_output.weight iq2_xxs -blk.13.attn_v.weight q4_K +blk.13.attn_q.weight iq4_nl +blk.13.attn_v.weight q5_0 +blk.13.ffn_down_exps.weight iq4_nl +blk.13.ffn_gate_exps.weight iq4_nl +blk.13.ffn_up_exps.weight iq4_nl +blk.14.attn_k.weight iq4_nl blk.14.attn_output.weight iq2_xxs -blk.14.attn_v.weight q4_K +blk.14.attn_q.weight iq4_nl +blk.14.attn_v.weight q5_0 +blk.14.ffn_down_exps.weight iq4_nl +blk.14.ffn_gate_exps.weight iq4_nl +blk.14.ffn_up_exps.weight iq4_nl +blk.15.attn_k.weight iq4_nl blk.15.attn_output.weight iq2_xxs -blk.15.attn_v.weight q4_K +blk.15.attn_q.weight iq4_nl +blk.15.attn_v.weight q5_0 +blk.15.ffn_down_exps.weight iq4_nl +blk.15.ffn_gate_exps.weight iq4_nl +blk.15.ffn_up_exps.weight iq4_nl +blk.16.attn_k.weight iq4_nl blk.16.attn_output.weight iq2_xxs -blk.16.attn_v.weight q4_K +blk.16.attn_q.weight iq4_nl +blk.16.attn_v.weight q5_0 +blk.16.ffn_down_exps.weight iq4_nl +blk.16.ffn_gate_exps.weight iq4_nl +blk.16.ffn_up_exps.weight iq4_nl +blk.17.attn_k.weight iq4_nl blk.17.attn_output.weight iq2_xxs -blk.17.attn_v.weight q4_K +blk.17.attn_q.weight iq4_nl +blk.17.attn_v.weight q5_0 +blk.17.ffn_down_exps.weight iq4_nl +blk.17.ffn_gate_exps.weight iq4_nl +blk.17.ffn_up_exps.weight iq4_nl +blk.18.attn_k.weight iq4_nl blk.18.attn_output.weight iq2_xxs -blk.18.attn_v.weight q4_K +blk.18.attn_q.weight iq4_nl +blk.18.attn_v.weight q5_0 +blk.18.ffn_down_exps.weight iq4_nl +blk.18.ffn_gate_exps.weight iq4_nl +blk.18.ffn_up_exps.weight iq4_nl +blk.19.attn_k.weight iq4_nl blk.19.attn_output.weight iq2_xxs -blk.19.attn_v.weight q4_K +blk.19.attn_q.weight iq4_nl +blk.19.attn_v.weight q5_0 +blk.19.ffn_down_exps.weight iq4_nl +blk.19.ffn_gate_exps.weight iq4_nl +blk.19.ffn_up_exps.weight iq4_nl +blk.20.attn_k.weight iq4_nl blk.20.attn_output.weight iq2_xxs -blk.20.attn_v.weight q4_K +blk.20.attn_q.weight iq4_nl +blk.20.attn_v.weight q5_0 +blk.20.ffn_down_exps.weight iq4_nl +blk.20.ffn_gate_exps.weight iq4_nl +blk.20.ffn_up_exps.weight iq4_nl +blk.21.attn_k.weight iq4_nl blk.21.attn_output.weight iq2_xxs -blk.21.attn_v.weight q4_K +blk.21.attn_q.weight iq4_nl +blk.21.attn_v.weight q5_0 +blk.21.ffn_down_exps.weight iq4_nl +blk.21.ffn_gate_exps.weight iq4_nl +blk.21.ffn_up_exps.weight iq4_nl +blk.22.attn_k.weight iq4_nl blk.22.attn_output.weight iq2_xxs -blk.22.attn_v.weight q4_K +blk.22.attn_q.weight iq4_nl +blk.22.attn_v.weight q5_0 +blk.22.ffn_down_exps.weight iq4_nl +blk.22.ffn_gate_exps.weight iq4_nl +blk.22.ffn_up_exps.weight iq4_nl +blk.23.attn_k.weight iq4_nl blk.23.attn_output.weight iq2_xxs -blk.23.attn_v.weight q4_K +blk.23.attn_q.weight iq4_nl +blk.23.attn_v.weight q5_0 +blk.23.ffn_down_exps.weight iq4_nl +blk.23.ffn_gate_exps.weight iq4_nl +blk.23.ffn_up_exps.weight iq4_nl +blk.24.attn_k.weight iq4_nl blk.24.attn_output.weight iq2_xxs -blk.24.attn_v.weight q4_K +blk.24.attn_q.weight iq4_nl +blk.24.attn_v.weight q5_0 +blk.24.ffn_down_exps.weight iq4_nl +blk.24.ffn_gate_exps.weight iq4_nl +blk.24.ffn_up_exps.weight iq4_nl +blk.25.attn_k.weight iq4_nl blk.25.attn_output.weight iq2_xxs -blk.25.attn_v.weight q4_K +blk.25.attn_q.weight iq4_nl +blk.25.attn_v.weight q5_0 +blk.25.ffn_down_exps.weight iq4_nl +blk.25.ffn_gate_exps.weight iq4_nl +blk.25.ffn_up_exps.weight iq4_nl +blk.26.attn_k.weight iq4_nl blk.26.attn_output.weight iq2_xxs -blk.26.attn_v.weight q4_K +blk.26.attn_q.weight iq4_nl +blk.26.attn_v.weight q5_0 +blk.26.ffn_down_exps.weight iq4_nl +blk.26.ffn_gate_exps.weight iq4_nl +blk.26.ffn_up_exps.weight iq4_nl +blk.27.attn_k.weight iq4_nl blk.27.attn_output.weight iq2_xxs -blk.27.attn_v.weight q4_K +blk.27.attn_q.weight iq4_nl +blk.27.attn_v.weight q5_0 +blk.27.ffn_down_exps.weight iq4_nl +blk.27.ffn_gate_exps.weight iq4_nl +blk.27.ffn_up_exps.weight iq4_nl +blk.28.attn_k.weight iq4_nl blk.28.attn_output.weight iq2_xxs -blk.28.attn_v.weight q4_K +blk.28.attn_q.weight iq4_nl +blk.28.attn_v.weight q5_0 +blk.28.ffn_down_exps.weight iq4_nl +blk.28.ffn_gate_exps.weight iq4_nl +blk.28.ffn_up_exps.weight iq4_nl +blk.29.attn_k.weight iq4_nl blk.29.attn_output.weight iq2_xxs -blk.29.attn_v.weight q4_K +blk.29.attn_q.weight iq4_nl +blk.29.attn_v.weight q5_0 +blk.29.ffn_down_exps.weight iq4_nl +blk.29.ffn_gate_exps.weight iq4_nl +blk.29.ffn_up_exps.weight iq4_nl +blk.30.attn_k.weight iq4_nl blk.30.attn_output.weight iq2_xxs -blk.30.attn_v.weight q4_K +blk.30.attn_q.weight iq4_nl +blk.30.attn_v.weight q5_0 +blk.30.ffn_down_exps.weight iq4_nl +blk.30.ffn_gate_exps.weight iq4_nl +blk.30.ffn_up_exps.weight iq4_nl +blk.31.attn_k.weight iq4_nl blk.31.attn_output.weight iq2_xxs -blk.31.attn_v.weight q4_K +blk.31.attn_q.weight iq4_nl +blk.31.attn_v.weight q5_0 +blk.31.ffn_down_exps.weight iq4_nl +blk.31.ffn_gate_exps.weight iq4_nl +blk.31.ffn_up_exps.weight iq4_nl +blk.32.attn_k.weight iq4_nl blk.32.attn_output.weight iq2_xxs -blk.32.attn_v.weight q4_K +blk.32.attn_q.weight iq4_nl +blk.32.attn_v.weight q5_0 +blk.32.ffn_down_exps.weight iq4_nl +blk.32.ffn_gate_exps.weight iq4_nl +blk.32.ffn_up_exps.weight iq4_nl +blk.33.attn_k.weight iq4_nl blk.33.attn_output.weight iq2_xxs -blk.33.attn_v.weight q4_K +blk.33.attn_q.weight iq4_nl +blk.33.attn_v.weight q5_0 +blk.33.ffn_down_exps.weight iq4_nl +blk.33.ffn_gate_exps.weight iq4_nl +blk.33.ffn_up_exps.weight iq4_nl +blk.34.attn_k.weight iq4_nl blk.34.attn_output.weight iq2_xxs -blk.34.attn_v.weight q4_K +blk.34.attn_q.weight iq4_nl +blk.34.attn_v.weight q5_0 +blk.34.ffn_down_exps.weight iq4_nl +blk.34.ffn_gate_exps.weight iq4_nl +blk.34.ffn_up_exps.weight iq4_nl +blk.35.attn_k.weight iq4_nl blk.35.attn_output.weight iq2_xxs -blk.35.attn_v.weight q4_K +blk.35.attn_q.weight iq4_nl +blk.35.attn_v.weight q5_0 +blk.35.ffn_down_exps.weight iq4_nl +blk.35.ffn_gate_exps.weight iq4_nl +blk.35.ffn_up_exps.weight iq4_nl [BF16] bf16 -output.weight q6_K [TQ1_0] tq1_0 output.weight q8_0 -token_embd.weight q4_K +token_embd.weight q5_0 +blk.0.attn_k.weight q4_0 +blk.0.attn_q.weight q4_0 +blk.0.attn_v.weight q4_0 +blk.0.ffn_down_exps.weight q4_0 +blk.0.ffn_gate_exps.weight q4_0 +blk.0.ffn_up_exps.weight q4_0 +blk.1.attn_k.weight q4_0 +blk.1.attn_q.weight q4_0 +blk.1.attn_v.weight q4_0 +blk.1.ffn_down_exps.weight q4_0 +blk.1.ffn_gate_exps.weight q4_0 +blk.1.ffn_up_exps.weight q4_0 +blk.2.attn_k.weight q4_0 +blk.2.attn_q.weight q4_0 +blk.2.attn_v.weight q4_0 +blk.2.ffn_down_exps.weight q4_0 +blk.2.ffn_gate_exps.weight q4_0 +blk.2.ffn_up_exps.weight q4_0 +blk.3.attn_k.weight q4_0 +blk.3.attn_q.weight q4_0 +blk.3.attn_v.weight q4_0 +blk.3.ffn_down_exps.weight q4_0 +blk.3.ffn_gate_exps.weight q4_0 +blk.3.ffn_up_exps.weight q4_0 +blk.4.attn_k.weight q4_0 +blk.4.attn_q.weight q4_0 +blk.4.attn_v.weight q4_0 +blk.4.ffn_down_exps.weight q4_0 +blk.4.ffn_gate_exps.weight q4_0 +blk.4.ffn_up_exps.weight q4_0 +blk.5.attn_k.weight q4_0 +blk.5.attn_q.weight q4_0 +blk.5.attn_v.weight q4_0 +blk.5.ffn_down_exps.weight q4_0 +blk.5.ffn_gate_exps.weight q4_0 +blk.5.ffn_up_exps.weight q4_0 +blk.6.attn_k.weight q4_0 +blk.6.attn_q.weight q4_0 +blk.6.attn_v.weight q4_0 +blk.6.ffn_down_exps.weight q4_0 +blk.6.ffn_gate_exps.weight q4_0 +blk.6.ffn_up_exps.weight q4_0 +blk.7.attn_k.weight q4_0 +blk.7.attn_q.weight q4_0 +blk.7.attn_v.weight q4_0 +blk.7.ffn_down_exps.weight q4_0 +blk.7.ffn_gate_exps.weight q4_0 +blk.7.ffn_up_exps.weight q4_0 +blk.8.attn_k.weight q4_0 +blk.8.attn_q.weight q4_0 +blk.8.attn_v.weight q4_0 +blk.8.ffn_down_exps.weight q4_0 +blk.8.ffn_gate_exps.weight q4_0 +blk.8.ffn_up_exps.weight q4_0 +blk.9.attn_k.weight q4_0 +blk.9.attn_q.weight q4_0 +blk.9.attn_v.weight q4_0 +blk.9.ffn_down_exps.weight q4_0 +blk.9.ffn_gate_exps.weight q4_0 +blk.9.ffn_up_exps.weight q4_0 +blk.10.attn_k.weight q4_0 +blk.10.attn_q.weight q4_0 +blk.10.attn_v.weight q4_0 +blk.10.ffn_down_exps.weight q4_0 +blk.10.ffn_gate_exps.weight q4_0 +blk.10.ffn_up_exps.weight q4_0 +blk.11.attn_k.weight q4_0 +blk.11.attn_q.weight q4_0 +blk.11.attn_v.weight q4_0 +blk.11.ffn_down_exps.weight q4_0 +blk.11.ffn_gate_exps.weight q4_0 +blk.11.ffn_up_exps.weight q4_0 +blk.12.attn_k.weight q4_0 +blk.12.attn_q.weight q4_0 +blk.12.attn_v.weight q4_0 +blk.12.ffn_down_exps.weight q4_0 +blk.12.ffn_gate_exps.weight q4_0 +blk.12.ffn_up_exps.weight q4_0 +blk.13.attn_k.weight q4_0 +blk.13.attn_q.weight q4_0 +blk.13.attn_v.weight q4_0 +blk.13.ffn_down_exps.weight q4_0 +blk.13.ffn_gate_exps.weight q4_0 +blk.13.ffn_up_exps.weight q4_0 +blk.14.attn_k.weight q4_0 +blk.14.attn_q.weight q4_0 +blk.14.attn_v.weight q4_0 +blk.14.ffn_down_exps.weight q4_0 +blk.14.ffn_gate_exps.weight q4_0 +blk.14.ffn_up_exps.weight q4_0 +blk.15.attn_k.weight q4_0 +blk.15.attn_q.weight q4_0 +blk.15.attn_v.weight q4_0 +blk.15.ffn_down_exps.weight q4_0 +blk.15.ffn_gate_exps.weight q4_0 +blk.15.ffn_up_exps.weight q4_0 +blk.16.attn_k.weight q4_0 +blk.16.attn_q.weight q4_0 +blk.16.attn_v.weight q4_0 +blk.16.ffn_down_exps.weight q4_0 +blk.16.ffn_gate_exps.weight q4_0 +blk.16.ffn_up_exps.weight q4_0 +blk.17.attn_k.weight q4_0 +blk.17.attn_q.weight q4_0 +blk.17.attn_v.weight q4_0 +blk.17.ffn_down_exps.weight q4_0 +blk.17.ffn_gate_exps.weight q4_0 +blk.17.ffn_up_exps.weight q4_0 +blk.18.attn_k.weight q4_0 +blk.18.attn_q.weight q4_0 +blk.18.attn_v.weight q4_0 +blk.18.ffn_down_exps.weight q4_0 +blk.18.ffn_gate_exps.weight q4_0 +blk.18.ffn_up_exps.weight q4_0 +blk.19.attn_k.weight q4_0 +blk.19.attn_q.weight q4_0 +blk.19.attn_v.weight q4_0 +blk.19.ffn_down_exps.weight q4_0 +blk.19.ffn_gate_exps.weight q4_0 +blk.19.ffn_up_exps.weight q4_0 +blk.20.attn_k.weight q4_0 +blk.20.attn_q.weight q4_0 +blk.20.attn_v.weight q4_0 +blk.20.ffn_down_exps.weight q4_0 +blk.20.ffn_gate_exps.weight q4_0 +blk.20.ffn_up_exps.weight q4_0 +blk.21.attn_k.weight q4_0 +blk.21.attn_q.weight q4_0 +blk.21.attn_v.weight q4_0 +blk.21.ffn_down_exps.weight q4_0 +blk.21.ffn_gate_exps.weight q4_0 +blk.21.ffn_up_exps.weight q4_0 +blk.22.attn_k.weight q4_0 +blk.22.attn_q.weight q4_0 +blk.22.attn_v.weight q4_0 +blk.22.ffn_down_exps.weight q4_0 +blk.22.ffn_gate_exps.weight q4_0 +blk.22.ffn_up_exps.weight q4_0 +blk.23.attn_k.weight q4_0 +blk.23.attn_q.weight q4_0 +blk.23.attn_v.weight q4_0 +blk.23.ffn_down_exps.weight q4_0 +blk.23.ffn_gate_exps.weight q4_0 +blk.23.ffn_up_exps.weight q4_0 +blk.24.attn_k.weight q4_0 +blk.24.attn_q.weight q4_0 +blk.24.attn_v.weight q4_0 +blk.24.ffn_down_exps.weight q4_0 +blk.24.ffn_gate_exps.weight q4_0 +blk.24.ffn_up_exps.weight q4_0 +blk.25.attn_k.weight q4_0 +blk.25.attn_q.weight q4_0 +blk.25.attn_v.weight q4_0 +blk.25.ffn_down_exps.weight q4_0 +blk.25.ffn_gate_exps.weight q4_0 +blk.25.ffn_up_exps.weight q4_0 +blk.26.attn_k.weight q4_0 +blk.26.attn_q.weight q4_0 +blk.26.attn_v.weight q4_0 +blk.26.ffn_down_exps.weight q4_0 +blk.26.ffn_gate_exps.weight q4_0 +blk.26.ffn_up_exps.weight q4_0 +blk.27.attn_k.weight q4_0 +blk.27.attn_q.weight q4_0 +blk.27.attn_v.weight q4_0 +blk.27.ffn_down_exps.weight q4_0 +blk.27.ffn_gate_exps.weight q4_0 +blk.27.ffn_up_exps.weight q4_0 +blk.28.attn_k.weight q4_0 +blk.28.attn_q.weight q4_0 +blk.28.attn_v.weight q4_0 +blk.28.ffn_down_exps.weight q4_0 +blk.28.ffn_gate_exps.weight q4_0 +blk.28.ffn_up_exps.weight q4_0 +blk.29.attn_k.weight q4_0 +blk.29.attn_q.weight q4_0 +blk.29.attn_v.weight q4_0 +blk.29.ffn_down_exps.weight q4_0 +blk.29.ffn_gate_exps.weight q4_0 +blk.29.ffn_up_exps.weight q4_0 +blk.30.attn_k.weight q4_0 +blk.30.attn_q.weight q4_0 +blk.30.attn_v.weight q4_0 +blk.30.ffn_down_exps.weight q4_0 +blk.30.ffn_gate_exps.weight q4_0 +blk.30.ffn_up_exps.weight q4_0 +blk.31.attn_k.weight q4_0 +blk.31.attn_q.weight q4_0 +blk.31.attn_v.weight q4_0 +blk.31.ffn_down_exps.weight q4_0 +blk.31.ffn_gate_exps.weight q4_0 +blk.31.ffn_up_exps.weight q4_0 +blk.32.attn_k.weight q4_0 +blk.32.attn_q.weight q4_0 +blk.32.attn_v.weight q4_0 +blk.32.ffn_down_exps.weight q4_0 +blk.32.ffn_gate_exps.weight q4_0 +blk.32.ffn_up_exps.weight q4_0 +blk.33.attn_k.weight q4_0 +blk.33.attn_q.weight q4_0 +blk.33.attn_v.weight q4_0 +blk.33.ffn_down_exps.weight q4_0 +blk.33.ffn_gate_exps.weight q4_0 +blk.33.ffn_up_exps.weight q4_0 +blk.34.attn_k.weight q4_0 +blk.34.attn_q.weight q4_0 +blk.34.attn_v.weight q4_0 +blk.34.ffn_down_exps.weight q4_0 +blk.34.ffn_gate_exps.weight q4_0 +blk.34.ffn_up_exps.weight q4_0 +blk.35.attn_k.weight q4_0 +blk.35.attn_q.weight q4_0 +blk.35.attn_v.weight q4_0 +blk.35.ffn_down_exps.weight q4_0 +blk.35.ffn_gate_exps.weight q4_0 +blk.35.ffn_up_exps.weight q4_0 [TQ2_0] tq2_0 output.weight q8_0 -token_embd.weight q4_K +token_embd.weight q5_0 +blk.0.attn_k.weight q4_0 +blk.0.attn_q.weight q4_0 +blk.0.attn_v.weight q4_0 +blk.0.ffn_down_exps.weight q4_0 +blk.0.ffn_gate_exps.weight q4_0 +blk.0.ffn_up_exps.weight q4_0 +blk.1.attn_k.weight q4_0 +blk.1.attn_q.weight q4_0 +blk.1.attn_v.weight q4_0 +blk.1.ffn_down_exps.weight q4_0 +blk.1.ffn_gate_exps.weight q4_0 +blk.1.ffn_up_exps.weight q4_0 +blk.2.attn_k.weight q4_0 +blk.2.attn_q.weight q4_0 +blk.2.attn_v.weight q4_0 +blk.2.ffn_down_exps.weight q4_0 +blk.2.ffn_gate_exps.weight q4_0 +blk.2.ffn_up_exps.weight q4_0 +blk.3.attn_k.weight q4_0 +blk.3.attn_q.weight q4_0 +blk.3.attn_v.weight q4_0 +blk.3.ffn_down_exps.weight q4_0 +blk.3.ffn_gate_exps.weight q4_0 +blk.3.ffn_up_exps.weight q4_0 +blk.4.attn_k.weight q4_0 +blk.4.attn_q.weight q4_0 +blk.4.attn_v.weight q4_0 +blk.4.ffn_down_exps.weight q4_0 +blk.4.ffn_gate_exps.weight q4_0 +blk.4.ffn_up_exps.weight q4_0 +blk.5.attn_k.weight q4_0 +blk.5.attn_q.weight q4_0 +blk.5.attn_v.weight q4_0 +blk.5.ffn_down_exps.weight q4_0 +blk.5.ffn_gate_exps.weight q4_0 +blk.5.ffn_up_exps.weight q4_0 +blk.6.attn_k.weight q4_0 +blk.6.attn_q.weight q4_0 +blk.6.attn_v.weight q4_0 +blk.6.ffn_down_exps.weight q4_0 +blk.6.ffn_gate_exps.weight q4_0 +blk.6.ffn_up_exps.weight q4_0 +blk.7.attn_k.weight q4_0 +blk.7.attn_q.weight q4_0 +blk.7.attn_v.weight q4_0 +blk.7.ffn_down_exps.weight q4_0 +blk.7.ffn_gate_exps.weight q4_0 +blk.7.ffn_up_exps.weight q4_0 +blk.8.attn_k.weight q4_0 +blk.8.attn_q.weight q4_0 +blk.8.attn_v.weight q4_0 +blk.8.ffn_down_exps.weight q4_0 +blk.8.ffn_gate_exps.weight q4_0 +blk.8.ffn_up_exps.weight q4_0 +blk.9.attn_k.weight q4_0 +blk.9.attn_q.weight q4_0 +blk.9.attn_v.weight q4_0 +blk.9.ffn_down_exps.weight q4_0 +blk.9.ffn_gate_exps.weight q4_0 +blk.9.ffn_up_exps.weight q4_0 +blk.10.attn_k.weight q4_0 +blk.10.attn_q.weight q4_0 +blk.10.attn_v.weight q4_0 +blk.10.ffn_down_exps.weight q4_0 +blk.10.ffn_gate_exps.weight q4_0 +blk.10.ffn_up_exps.weight q4_0 +blk.11.attn_k.weight q4_0 +blk.11.attn_q.weight q4_0 +blk.11.attn_v.weight q4_0 +blk.11.ffn_down_exps.weight q4_0 +blk.11.ffn_gate_exps.weight q4_0 +blk.11.ffn_up_exps.weight q4_0 +blk.12.attn_k.weight q4_0 +blk.12.attn_q.weight q4_0 +blk.12.attn_v.weight q4_0 +blk.12.ffn_down_exps.weight q4_0 +blk.12.ffn_gate_exps.weight q4_0 +blk.12.ffn_up_exps.weight q4_0 +blk.13.attn_k.weight q4_0 +blk.13.attn_q.weight q4_0 +blk.13.attn_v.weight q4_0 +blk.13.ffn_down_exps.weight q4_0 +blk.13.ffn_gate_exps.weight q4_0 +blk.13.ffn_up_exps.weight q4_0 +blk.14.attn_k.weight q4_0 +blk.14.attn_q.weight q4_0 +blk.14.attn_v.weight q4_0 +blk.14.ffn_down_exps.weight q4_0 +blk.14.ffn_gate_exps.weight q4_0 +blk.14.ffn_up_exps.weight q4_0 +blk.15.attn_k.weight q4_0 +blk.15.attn_q.weight q4_0 +blk.15.attn_v.weight q4_0 +blk.15.ffn_down_exps.weight q4_0 +blk.15.ffn_gate_exps.weight q4_0 +blk.15.ffn_up_exps.weight q4_0 +blk.16.attn_k.weight q4_0 +blk.16.attn_q.weight q4_0 +blk.16.attn_v.weight q4_0 +blk.16.ffn_down_exps.weight q4_0 +blk.16.ffn_gate_exps.weight q4_0 +blk.16.ffn_up_exps.weight q4_0 +blk.17.attn_k.weight q4_0 +blk.17.attn_q.weight q4_0 +blk.17.attn_v.weight q4_0 +blk.17.ffn_down_exps.weight q4_0 +blk.17.ffn_gate_exps.weight q4_0 +blk.17.ffn_up_exps.weight q4_0 +blk.18.attn_k.weight q4_0 +blk.18.attn_q.weight q4_0 +blk.18.attn_v.weight q4_0 +blk.18.ffn_down_exps.weight q4_0 +blk.18.ffn_gate_exps.weight q4_0 +blk.18.ffn_up_exps.weight q4_0 +blk.19.attn_k.weight q4_0 +blk.19.attn_q.weight q4_0 +blk.19.attn_v.weight q4_0 +blk.19.ffn_down_exps.weight q4_0 +blk.19.ffn_gate_exps.weight q4_0 +blk.19.ffn_up_exps.weight q4_0 +blk.20.attn_k.weight q4_0 +blk.20.attn_q.weight q4_0 +blk.20.attn_v.weight q4_0 +blk.20.ffn_down_exps.weight q4_0 +blk.20.ffn_gate_exps.weight q4_0 +blk.20.ffn_up_exps.weight q4_0 +blk.21.attn_k.weight q4_0 +blk.21.attn_q.weight q4_0 +blk.21.attn_v.weight q4_0 +blk.21.ffn_down_exps.weight q4_0 +blk.21.ffn_gate_exps.weight q4_0 +blk.21.ffn_up_exps.weight q4_0 +blk.22.attn_k.weight q4_0 +blk.22.attn_q.weight q4_0 +blk.22.attn_v.weight q4_0 +blk.22.ffn_down_exps.weight q4_0 +blk.22.ffn_gate_exps.weight q4_0 +blk.22.ffn_up_exps.weight q4_0 +blk.23.attn_k.weight q4_0 +blk.23.attn_q.weight q4_0 +blk.23.attn_v.weight q4_0 +blk.23.ffn_down_exps.weight q4_0 +blk.23.ffn_gate_exps.weight q4_0 +blk.23.ffn_up_exps.weight q4_0 +blk.24.attn_k.weight q4_0 +blk.24.attn_q.weight q4_0 +blk.24.attn_v.weight q4_0 +blk.24.ffn_down_exps.weight q4_0 +blk.24.ffn_gate_exps.weight q4_0 +blk.24.ffn_up_exps.weight q4_0 +blk.25.attn_k.weight q4_0 +blk.25.attn_q.weight q4_0 +blk.25.attn_v.weight q4_0 +blk.25.ffn_down_exps.weight q4_0 +blk.25.ffn_gate_exps.weight q4_0 +blk.25.ffn_up_exps.weight q4_0 +blk.26.attn_k.weight q4_0 +blk.26.attn_q.weight q4_0 +blk.26.attn_v.weight q4_0 +blk.26.ffn_down_exps.weight q4_0 +blk.26.ffn_gate_exps.weight q4_0 +blk.26.ffn_up_exps.weight q4_0 +blk.27.attn_k.weight q4_0 +blk.27.attn_q.weight q4_0 +blk.27.attn_v.weight q4_0 +blk.27.ffn_down_exps.weight q4_0 +blk.27.ffn_gate_exps.weight q4_0 +blk.27.ffn_up_exps.weight q4_0 +blk.28.attn_k.weight q4_0 +blk.28.attn_q.weight q4_0 +blk.28.attn_v.weight q4_0 +blk.28.ffn_down_exps.weight q4_0 +blk.28.ffn_gate_exps.weight q4_0 +blk.28.ffn_up_exps.weight q4_0 +blk.29.attn_k.weight q4_0 +blk.29.attn_q.weight q4_0 +blk.29.attn_v.weight q4_0 +blk.29.ffn_down_exps.weight q4_0 +blk.29.ffn_gate_exps.weight q4_0 +blk.29.ffn_up_exps.weight q4_0 +blk.30.attn_k.weight q4_0 +blk.30.attn_q.weight q4_0 +blk.30.attn_v.weight q4_0 +blk.30.ffn_down_exps.weight q4_0 +blk.30.ffn_gate_exps.weight q4_0 +blk.30.ffn_up_exps.weight q4_0 +blk.31.attn_k.weight q4_0 +blk.31.attn_q.weight q4_0 +blk.31.attn_v.weight q4_0 +blk.31.ffn_down_exps.weight q4_0 +blk.31.ffn_gate_exps.weight q4_0 +blk.31.ffn_up_exps.weight q4_0 +blk.32.attn_k.weight q4_0 +blk.32.attn_q.weight q4_0 +blk.32.attn_v.weight q4_0 +blk.32.ffn_down_exps.weight q4_0 +blk.32.ffn_gate_exps.weight q4_0 +blk.32.ffn_up_exps.weight q4_0 +blk.33.attn_k.weight q4_0 +blk.33.attn_q.weight q4_0 +blk.33.attn_v.weight q4_0 +blk.33.ffn_down_exps.weight q4_0 +blk.33.ffn_gate_exps.weight q4_0 +blk.33.ffn_up_exps.weight q4_0 +blk.34.attn_k.weight q4_0 +blk.34.attn_q.weight q4_0 +blk.34.attn_v.weight q4_0 +blk.34.ffn_down_exps.weight q4_0 +blk.34.ffn_gate_exps.weight q4_0 +blk.34.ffn_up_exps.weight q4_0 +blk.35.attn_k.weight q4_0 +blk.35.attn_q.weight q4_0 +blk.35.attn_v.weight q4_0 +blk.35.ffn_down_exps.weight q4_0 +blk.35.ffn_gate_exps.weight q4_0 +blk.35.ffn_up_exps.weight q4_0 [MXFP4_MOE] mxfp4 output.weight q8_0 diff --git a/tests/snapshots/meta-llama-3.1-70b-instruct.schema b/tests/snapshots/meta-llama-3.1-70b-instruct.schema index b26755d6f7..95e0fdf148 100644 --- a/tests/snapshots/meta-llama-3.1-70b-instruct.schema +++ b/tests/snapshots/meta-llama-3.1-70b-instruct.schema @@ -2,10 +2,8 @@ # n_embd=8192, n_ff=28672, n_vocab=128256, n_layer=80, n_head=64, n_head_kv=8 [F32] f32 -output.weight q6_K [F16] f16 -output.weight q6_K [Q4_0] q4_0 output.weight q6_K @@ -3324,7 +3322,6 @@ blk.79.attn_v.weight q4_K output.weight q5_K [BF16] bf16 -output.weight q6_K [TQ1_0] tq1_0 token_embd.weight q4_K diff --git a/tests/snapshots/nemotron-nano-3-30b-a3b.schema b/tests/snapshots/nemotron-nano-3-30b-a3b.schema index c68a6e0892..cf46f9de4b 100644 --- a/tests/snapshots/nemotron-nano-3-30b-a3b.schema +++ b/tests/snapshots/nemotron-nano-3-30b-a3b.schema @@ -2,578 +2,3236 @@ # n_embd=2688, n_ff=0, n_vocab=131072, n_layer=52, n_head=32, n_head_kv=0, n_expert=128 [F32] f32 -output.weight q6_K [F16] f16 -output.weight q6_K [Q4_0] q4_0 -output.weight q6_K +output.weight q8_0 [Q4_1] q4_1 -output.weight q6_K +output.weight q8_0 [Q8_0] q8_0 [Q5_0] q5_0 -output.weight q6_K +output.weight q8_0 [Q5_1] q5_1 -output.weight q6_K +output.weight q8_0 [Q2_K] q2_K output.weight q8_0 -blk.1.ffn_down_exps.weight q3_K -blk.1.ffn_down_shexp.weight q3_K -blk.3.ffn_down_exps.weight q3_K -blk.3.ffn_down_shexp.weight q3_K +token_embd.weight q4_0 +blk.0.ssm_in.weight q4_0 +blk.1.ffn_down_exps.weight q4_0 +blk.1.ffn_down_shexp.weight q4_0 +blk.1.ffn_up_exps.weight q4_0 +blk.1.ffn_up_shexp.weight q4_0 +blk.2.ssm_in.weight q4_0 +blk.3.ffn_down_exps.weight q4_0 +blk.3.ffn_down_shexp.weight q4_0 +blk.3.ffn_up_exps.weight q4_0 +blk.3.ffn_up_shexp.weight q4_0 +blk.4.ssm_in.weight q4_0 +blk.5.attn_k.weight q4_0 blk.5.attn_output.weight q3_K -blk.5.attn_v.weight q3_K -blk.6.ffn_down_exps.weight q3_K -blk.6.ffn_down_shexp.weight q3_K -blk.8.ffn_down_exps.weight q3_K -blk.8.ffn_down_shexp.weight q3_K -blk.10.ffn_down_exps.weight q3_K -blk.10.ffn_down_shexp.weight q3_K +blk.5.attn_q.weight q4_0 +blk.5.attn_v.weight q4_0 +blk.6.ffn_down_exps.weight q4_0 +blk.6.ffn_down_shexp.weight q4_0 +blk.6.ffn_up_exps.weight q4_0 +blk.6.ffn_up_shexp.weight q4_0 +blk.7.ssm_in.weight q4_0 +blk.8.ffn_down_exps.weight q4_0 +blk.8.ffn_down_shexp.weight q4_0 +blk.8.ffn_up_exps.weight q4_0 +blk.8.ffn_up_shexp.weight q4_0 +blk.9.ssm_in.weight q4_0 +blk.10.ffn_down_exps.weight q4_0 +blk.10.ffn_down_shexp.weight q4_0 +blk.10.ffn_up_exps.weight q4_0 +blk.10.ffn_up_shexp.weight q4_0 +blk.11.ssm_in.weight q4_0 +blk.12.attn_k.weight q4_0 blk.12.attn_output.weight q3_K -blk.12.attn_v.weight q3_K -blk.13.ffn_down_exps.weight q3_K -blk.13.ffn_down_shexp.weight q3_K -blk.15.ffn_down_exps.weight q3_K -blk.15.ffn_down_shexp.weight q3_K -blk.17.ffn_down_exps.weight q3_K -blk.17.ffn_down_shexp.weight q3_K +blk.12.attn_q.weight q4_0 +blk.12.attn_v.weight q4_0 +blk.13.ffn_down_exps.weight q4_0 +blk.13.ffn_down_shexp.weight q4_0 +blk.13.ffn_up_exps.weight q4_0 +blk.13.ffn_up_shexp.weight q4_0 +blk.14.ssm_in.weight q4_0 +blk.15.ffn_down_exps.weight q4_0 +blk.15.ffn_down_shexp.weight q4_0 +blk.15.ffn_up_exps.weight q4_0 +blk.15.ffn_up_shexp.weight q4_0 +blk.16.ssm_in.weight q4_0 +blk.17.ffn_down_exps.weight q4_0 +blk.17.ffn_down_shexp.weight q4_0 +blk.17.ffn_up_exps.weight q4_0 +blk.17.ffn_up_shexp.weight q4_0 +blk.18.ssm_in.weight q4_0 +blk.19.attn_k.weight q4_0 blk.19.attn_output.weight q3_K -blk.19.attn_v.weight q3_K -blk.20.ffn_down_exps.weight q3_K -blk.20.ffn_down_shexp.weight q3_K -blk.22.ffn_down_exps.weight q3_K -blk.22.ffn_down_shexp.weight q3_K -blk.24.ffn_down_exps.weight q3_K -blk.24.ffn_down_shexp.weight q3_K +blk.19.attn_q.weight q4_0 +blk.19.attn_v.weight q4_0 +blk.20.ffn_down_exps.weight q4_0 +blk.20.ffn_down_shexp.weight q4_0 +blk.20.ffn_up_exps.weight q4_0 +blk.20.ffn_up_shexp.weight q4_0 +blk.21.ssm_in.weight q4_0 +blk.22.ffn_down_exps.weight q4_0 +blk.22.ffn_down_shexp.weight q4_0 +blk.22.ffn_up_exps.weight q4_0 +blk.22.ffn_up_shexp.weight q4_0 +blk.23.ssm_in.weight q4_0 +blk.24.ffn_down_exps.weight q4_0 +blk.24.ffn_down_shexp.weight q4_0 +blk.24.ffn_up_exps.weight q4_0 +blk.24.ffn_up_shexp.weight q4_0 +blk.25.ssm_in.weight q4_0 +blk.26.attn_k.weight q4_0 blk.26.attn_output.weight q3_K -blk.26.attn_v.weight q3_K -blk.27.ffn_down_exps.weight q3_K -blk.27.ffn_down_shexp.weight q3_K -blk.29.ffn_down_exps.weight q3_K -blk.29.ffn_down_shexp.weight q3_K -blk.31.ffn_down_exps.weight q3_K -blk.31.ffn_down_shexp.weight q3_K +blk.26.attn_q.weight q4_0 +blk.26.attn_v.weight q4_0 +blk.27.ffn_down_exps.weight q4_0 +blk.27.ffn_down_shexp.weight q4_0 +blk.27.ffn_up_exps.weight q4_0 +blk.27.ffn_up_shexp.weight q4_0 +blk.28.ssm_in.weight q4_0 +blk.29.ffn_down_exps.weight q4_0 +blk.29.ffn_down_shexp.weight q4_0 +blk.29.ffn_up_exps.weight q4_0 +blk.29.ffn_up_shexp.weight q4_0 +blk.30.ssm_in.weight q4_0 +blk.31.ffn_down_exps.weight q4_0 +blk.31.ffn_down_shexp.weight q4_0 +blk.31.ffn_up_exps.weight q4_0 +blk.31.ffn_up_shexp.weight q4_0 +blk.32.ssm_in.weight q4_0 +blk.33.attn_k.weight q4_0 blk.33.attn_output.weight q3_K -blk.33.attn_v.weight q3_K -blk.34.ffn_down_exps.weight q3_K -blk.34.ffn_down_shexp.weight q3_K -blk.36.ffn_down_exps.weight q3_K -blk.36.ffn_down_shexp.weight q3_K -blk.38.ffn_down_exps.weight q3_K -blk.38.ffn_down_shexp.weight q3_K -blk.40.ffn_down_exps.weight q3_K -blk.40.ffn_down_shexp.weight q3_K +blk.33.attn_q.weight q4_0 +blk.33.attn_v.weight q4_0 +blk.34.ffn_down_exps.weight q4_0 +blk.34.ffn_down_shexp.weight q4_0 +blk.34.ffn_up_exps.weight q4_0 +blk.34.ffn_up_shexp.weight q4_0 +blk.35.ssm_in.weight q4_0 +blk.36.ffn_down_exps.weight q4_0 +blk.36.ffn_down_shexp.weight q4_0 +blk.36.ffn_up_exps.weight q4_0 +blk.36.ffn_up_shexp.weight q4_0 +blk.37.ssm_in.weight q4_0 +blk.38.ffn_down_exps.weight q4_0 +blk.38.ffn_down_shexp.weight q4_0 +blk.38.ffn_up_exps.weight q4_0 +blk.38.ffn_up_shexp.weight q4_0 +blk.39.ssm_in.weight q4_0 +blk.40.ffn_down_exps.weight q4_0 +blk.40.ffn_down_shexp.weight q4_0 +blk.40.ffn_up_exps.weight q4_0 +blk.40.ffn_up_shexp.weight q4_0 +blk.41.ssm_in.weight q4_0 +blk.42.attn_k.weight q4_0 blk.42.attn_output.weight q3_K -blk.42.attn_v.weight q3_K -blk.43.ffn_down_exps.weight q3_K -blk.43.ffn_down_shexp.weight q3_K -blk.45.ffn_down_exps.weight q3_K -blk.45.ffn_down_shexp.weight q3_K -blk.47.ffn_down_exps.weight q3_K -blk.47.ffn_down_shexp.weight q3_K -blk.49.ffn_down_exps.weight q3_K -blk.49.ffn_down_shexp.weight q3_K -blk.51.ffn_down_exps.weight q3_K -blk.51.ffn_down_shexp.weight q3_K +blk.42.attn_q.weight q4_0 +blk.42.attn_v.weight q4_0 +blk.43.ffn_down_exps.weight q4_0 +blk.43.ffn_down_shexp.weight q4_0 +blk.43.ffn_up_exps.weight q4_0 +blk.43.ffn_up_shexp.weight q4_0 +blk.44.ssm_in.weight q4_0 +blk.45.ffn_down_exps.weight q4_0 +blk.45.ffn_down_shexp.weight q4_0 +blk.45.ffn_up_exps.weight q4_0 +blk.45.ffn_up_shexp.weight q4_0 +blk.46.ssm_in.weight q4_0 +blk.47.ffn_down_exps.weight q4_0 +blk.47.ffn_down_shexp.weight q4_0 +blk.47.ffn_up_exps.weight q4_0 +blk.47.ffn_up_shexp.weight q4_0 +blk.48.ssm_in.weight q4_0 +blk.49.ffn_down_exps.weight q4_0 +blk.49.ffn_down_shexp.weight q4_0 +blk.49.ffn_up_exps.weight q4_0 +blk.49.ffn_up_shexp.weight q4_0 +blk.50.ssm_in.weight q4_0 +blk.51.ffn_down_exps.weight q4_0 +blk.51.ffn_down_shexp.weight q4_0 +blk.51.ffn_up_exps.weight q4_0 +blk.51.ffn_up_shexp.weight q4_0 [Q3_K_S] q3_K output.weight q8_0 +token_embd.weight q4_0 +blk.0.ssm_in.weight q4_0 +blk.1.ffn_down_exps.weight q4_0 +blk.1.ffn_down_shexp.weight q4_0 +blk.1.ffn_up_exps.weight q4_0 +blk.1.ffn_up_shexp.weight q4_0 +blk.2.ssm_in.weight q4_0 +blk.3.ffn_down_exps.weight q4_0 +blk.3.ffn_down_shexp.weight q4_0 +blk.3.ffn_up_exps.weight q4_0 +blk.3.ffn_up_shexp.weight q4_0 +blk.4.ssm_in.weight q4_0 +blk.5.attn_k.weight q4_0 +blk.5.attn_q.weight q4_0 +blk.5.attn_v.weight q4_0 +blk.6.ffn_down_exps.weight q4_0 +blk.6.ffn_down_shexp.weight q4_0 +blk.6.ffn_up_exps.weight q4_0 +blk.6.ffn_up_shexp.weight q4_0 +blk.7.ssm_in.weight q4_0 +blk.8.ffn_down_exps.weight q4_0 +blk.8.ffn_down_shexp.weight q4_0 +blk.8.ffn_up_exps.weight q4_0 +blk.8.ffn_up_shexp.weight q4_0 +blk.9.ssm_in.weight q4_0 +blk.10.ffn_down_exps.weight q4_0 +blk.10.ffn_down_shexp.weight q4_0 +blk.10.ffn_up_exps.weight q4_0 +blk.10.ffn_up_shexp.weight q4_0 +blk.11.ssm_in.weight q4_0 +blk.12.attn_k.weight q4_0 +blk.12.attn_q.weight q4_0 +blk.12.attn_v.weight q4_0 +blk.13.ffn_down_exps.weight q4_0 +blk.13.ffn_down_shexp.weight q4_0 +blk.13.ffn_up_exps.weight q4_0 +blk.13.ffn_up_shexp.weight q4_0 +blk.14.ssm_in.weight q4_0 +blk.15.ffn_down_exps.weight q4_0 +blk.15.ffn_down_shexp.weight q4_0 +blk.15.ffn_up_exps.weight q4_0 +blk.15.ffn_up_shexp.weight q4_0 +blk.16.ssm_in.weight q4_0 +blk.17.ffn_down_exps.weight q4_0 +blk.17.ffn_down_shexp.weight q4_0 +blk.17.ffn_up_exps.weight q4_0 +blk.17.ffn_up_shexp.weight q4_0 +blk.18.ssm_in.weight q4_0 +blk.19.attn_k.weight q4_0 +blk.19.attn_q.weight q4_0 +blk.19.attn_v.weight q4_0 +blk.20.ffn_down_exps.weight q4_0 +blk.20.ffn_down_shexp.weight q4_0 +blk.20.ffn_up_exps.weight q4_0 +blk.20.ffn_up_shexp.weight q4_0 +blk.21.ssm_in.weight q4_0 +blk.22.ffn_down_exps.weight q4_0 +blk.22.ffn_down_shexp.weight q4_0 +blk.22.ffn_up_exps.weight q4_0 +blk.22.ffn_up_shexp.weight q4_0 +blk.23.ssm_in.weight q4_0 +blk.24.ffn_down_exps.weight q4_0 +blk.24.ffn_down_shexp.weight q4_0 +blk.24.ffn_up_exps.weight q4_0 +blk.24.ffn_up_shexp.weight q4_0 +blk.25.ssm_in.weight q4_0 +blk.26.attn_k.weight q4_0 +blk.26.attn_q.weight q4_0 +blk.26.attn_v.weight q4_0 +blk.27.ffn_down_exps.weight q4_0 +blk.27.ffn_down_shexp.weight q4_0 +blk.27.ffn_up_exps.weight q4_0 +blk.27.ffn_up_shexp.weight q4_0 +blk.28.ssm_in.weight q4_0 +blk.29.ffn_down_exps.weight q4_0 +blk.29.ffn_down_shexp.weight q4_0 +blk.29.ffn_up_exps.weight q4_0 +blk.29.ffn_up_shexp.weight q4_0 +blk.30.ssm_in.weight q4_0 +blk.31.ffn_down_exps.weight q4_0 +blk.31.ffn_down_shexp.weight q4_0 +blk.31.ffn_up_exps.weight q4_0 +blk.31.ffn_up_shexp.weight q4_0 +blk.32.ssm_in.weight q4_0 +blk.33.attn_k.weight q4_0 +blk.33.attn_q.weight q4_0 +blk.33.attn_v.weight q4_0 +blk.34.ffn_down_exps.weight q4_0 +blk.34.ffn_down_shexp.weight q4_0 +blk.34.ffn_up_exps.weight q4_0 +blk.34.ffn_up_shexp.weight q4_0 +blk.35.ssm_in.weight q4_0 +blk.36.ffn_down_exps.weight q4_0 +blk.36.ffn_down_shexp.weight q4_0 +blk.36.ffn_up_exps.weight q4_0 +blk.36.ffn_up_shexp.weight q4_0 +blk.37.ssm_in.weight q4_0 +blk.38.ffn_down_exps.weight q4_0 +blk.38.ffn_down_shexp.weight q4_0 +blk.38.ffn_up_exps.weight q4_0 +blk.38.ffn_up_shexp.weight q4_0 +blk.39.ssm_in.weight q4_0 +blk.40.ffn_down_exps.weight q4_0 +blk.40.ffn_down_shexp.weight q4_0 +blk.40.ffn_up_exps.weight q4_0 +blk.40.ffn_up_shexp.weight q4_0 +blk.41.ssm_in.weight q4_0 +blk.42.attn_k.weight q4_0 +blk.42.attn_q.weight q4_0 +blk.42.attn_v.weight q4_0 +blk.43.ffn_down_exps.weight q4_0 +blk.43.ffn_down_shexp.weight q4_0 +blk.43.ffn_up_exps.weight q4_0 +blk.43.ffn_up_shexp.weight q4_0 +blk.44.ssm_in.weight q4_0 +blk.45.ffn_down_exps.weight q4_0 +blk.45.ffn_down_shexp.weight q4_0 +blk.45.ffn_up_exps.weight q4_0 +blk.45.ffn_up_shexp.weight q4_0 +blk.46.ssm_in.weight q4_0 +blk.47.ffn_down_exps.weight q4_0 +blk.47.ffn_down_shexp.weight q4_0 +blk.47.ffn_up_exps.weight q4_0 +blk.47.ffn_up_shexp.weight q4_0 +blk.48.ssm_in.weight q4_0 +blk.49.ffn_down_exps.weight q4_0 +blk.49.ffn_down_shexp.weight q4_0 +blk.49.ffn_up_exps.weight q4_0 +blk.49.ffn_up_shexp.weight q4_0 +blk.50.ssm_in.weight q4_0 +blk.51.ffn_down_exps.weight q4_0 +blk.51.ffn_down_shexp.weight q4_0 +blk.51.ffn_up_exps.weight q4_0 +blk.51.ffn_up_shexp.weight q4_0 [Q3_K_M] q3_K output.weight q8_0 -blk.1.ffn_down_exps.weight q5_K -blk.1.ffn_down_shexp.weight q5_K -blk.3.ffn_down_exps.weight q4_K -blk.3.ffn_down_shexp.weight q4_K +token_embd.weight q4_0 +blk.0.ssm_in.weight q4_0 +blk.1.ffn_down_exps.weight q5_1 +blk.1.ffn_down_shexp.weight q5_1 +blk.1.ffn_up_exps.weight q4_0 +blk.1.ffn_up_shexp.weight q4_0 +blk.2.ssm_in.weight q4_0 +blk.3.ffn_down_exps.weight q5_0 +blk.3.ffn_down_shexp.weight q5_0 +blk.3.ffn_up_exps.weight q4_0 +blk.3.ffn_up_shexp.weight q4_0 +blk.4.ssm_in.weight q4_0 +blk.5.attn_k.weight q4_0 blk.5.attn_output.weight q4_K -blk.5.attn_v.weight q5_K -blk.6.ffn_down_exps.weight q4_K -blk.6.ffn_down_shexp.weight q4_K -blk.8.ffn_down_exps.weight q4_K -blk.8.ffn_down_shexp.weight q4_K -blk.10.ffn_down_exps.weight q4_K -blk.10.ffn_down_shexp.weight q4_K +blk.5.attn_q.weight q4_0 +blk.5.attn_v.weight q5_1 +blk.6.ffn_down_exps.weight q5_0 +blk.6.ffn_down_shexp.weight q5_0 +blk.6.ffn_up_exps.weight q4_0 +blk.6.ffn_up_shexp.weight q4_0 +blk.7.ssm_in.weight q4_0 +blk.8.ffn_down_exps.weight q5_0 +blk.8.ffn_down_shexp.weight q5_0 +blk.8.ffn_up_exps.weight q4_0 +blk.8.ffn_up_shexp.weight q4_0 +blk.9.ssm_in.weight q4_0 +blk.10.ffn_down_exps.weight q5_0 +blk.10.ffn_down_shexp.weight q5_0 +blk.10.ffn_up_exps.weight q4_0 +blk.10.ffn_up_shexp.weight q4_0 +blk.11.ssm_in.weight q4_0 +blk.12.attn_k.weight q4_0 blk.12.attn_output.weight q4_K -blk.12.attn_v.weight q5_K -blk.13.ffn_down_exps.weight q4_K -blk.13.ffn_down_shexp.weight q4_K -blk.15.ffn_down_exps.weight q4_K -blk.15.ffn_down_shexp.weight q4_K -blk.17.ffn_down_exps.weight q4_K -blk.17.ffn_down_shexp.weight q4_K +blk.12.attn_q.weight q4_0 +blk.12.attn_v.weight q5_1 +blk.13.ffn_down_exps.weight q5_0 +blk.13.ffn_down_shexp.weight q5_0 +blk.13.ffn_up_exps.weight q4_0 +blk.13.ffn_up_shexp.weight q4_0 +blk.14.ssm_in.weight q4_0 +blk.15.ffn_down_exps.weight q5_0 +blk.15.ffn_down_shexp.weight q5_0 +blk.15.ffn_up_exps.weight q4_0 +blk.15.ffn_up_shexp.weight q4_0 +blk.16.ssm_in.weight q4_0 +blk.17.ffn_down_exps.weight q5_0 +blk.17.ffn_down_shexp.weight q5_0 +blk.17.ffn_up_exps.weight q4_0 +blk.17.ffn_up_shexp.weight q4_0 +blk.18.ssm_in.weight q4_0 +blk.19.attn_k.weight q4_0 blk.19.attn_output.weight q4_K -blk.19.attn_v.weight q4_K -blk.20.ffn_down_exps.weight q4_K -blk.20.ffn_down_shexp.weight q4_K -blk.22.ffn_down_exps.weight q4_K -blk.22.ffn_down_shexp.weight q4_K -blk.24.ffn_down_exps.weight q4_K -blk.24.ffn_down_shexp.weight q4_K +blk.19.attn_q.weight q4_0 +blk.19.attn_v.weight q5_0 +blk.20.ffn_down_exps.weight q5_0 +blk.20.ffn_down_shexp.weight q5_0 +blk.20.ffn_up_exps.weight q4_0 +blk.20.ffn_up_shexp.weight q4_0 +blk.21.ssm_in.weight q4_0 +blk.22.ffn_down_exps.weight q5_0 +blk.22.ffn_down_shexp.weight q5_0 +blk.22.ffn_up_exps.weight q4_0 +blk.22.ffn_up_shexp.weight q4_0 +blk.23.ssm_in.weight q4_0 +blk.24.ffn_down_exps.weight q5_0 +blk.24.ffn_down_shexp.weight q5_0 +blk.24.ffn_up_exps.weight q4_0 +blk.24.ffn_up_shexp.weight q4_0 +blk.25.ssm_in.weight q4_0 +blk.26.attn_k.weight q4_0 blk.26.attn_output.weight q4_K -blk.26.attn_v.weight q4_K -blk.27.ffn_down_exps.weight q4_K -blk.27.ffn_down_shexp.weight q4_K -blk.29.ffn_down_exps.weight q4_K -blk.29.ffn_down_shexp.weight q4_K -blk.31.ffn_down_exps.weight q4_K -blk.31.ffn_down_shexp.weight q4_K +blk.26.attn_q.weight q4_0 +blk.26.attn_v.weight q5_0 +blk.27.ffn_down_exps.weight q5_0 +blk.27.ffn_down_shexp.weight q5_0 +blk.27.ffn_up_exps.weight q4_0 +blk.27.ffn_up_shexp.weight q4_0 +blk.28.ssm_in.weight q4_0 +blk.29.ffn_down_exps.weight q5_0 +blk.29.ffn_down_shexp.weight q5_0 +blk.29.ffn_up_exps.weight q4_0 +blk.29.ffn_up_shexp.weight q4_0 +blk.30.ssm_in.weight q4_0 +blk.31.ffn_down_exps.weight q5_0 +blk.31.ffn_down_shexp.weight q5_0 +blk.31.ffn_up_exps.weight q4_0 +blk.31.ffn_up_shexp.weight q4_0 +blk.32.ssm_in.weight q4_0 +blk.33.attn_k.weight q4_0 blk.33.attn_output.weight q4_K -blk.33.attn_v.weight q4_K -blk.34.ffn_down_exps.weight q4_K -blk.34.ffn_down_shexp.weight q4_K -blk.36.ffn_down_exps.weight q4_K -blk.36.ffn_down_shexp.weight q4_K -blk.38.ffn_down_exps.weight q4_K -blk.38.ffn_down_shexp.weight q4_K -blk.40.ffn_down_exps.weight q4_K -blk.40.ffn_down_shexp.weight q4_K +blk.33.attn_q.weight q4_0 +blk.33.attn_v.weight q5_0 +blk.34.ffn_down_exps.weight q5_0 +blk.34.ffn_down_shexp.weight q5_0 +blk.34.ffn_up_exps.weight q4_0 +blk.34.ffn_up_shexp.weight q4_0 +blk.35.ssm_in.weight q4_0 +blk.36.ffn_down_exps.weight q5_0 +blk.36.ffn_down_shexp.weight q5_0 +blk.36.ffn_up_exps.weight q4_0 +blk.36.ffn_up_shexp.weight q4_0 +blk.37.ssm_in.weight q4_0 +blk.38.ffn_down_exps.weight q5_0 +blk.38.ffn_down_shexp.weight q5_0 +blk.38.ffn_up_exps.weight q4_0 +blk.38.ffn_up_shexp.weight q4_0 +blk.39.ssm_in.weight q4_0 +blk.40.ffn_down_exps.weight q5_0 +blk.40.ffn_down_shexp.weight q5_0 +blk.40.ffn_up_exps.weight q4_0 +blk.40.ffn_up_shexp.weight q4_0 +blk.41.ssm_in.weight q4_0 +blk.42.attn_k.weight q4_0 blk.42.attn_output.weight q4_K -blk.42.attn_v.weight q4_K -blk.43.ffn_down_exps.weight q4_K -blk.43.ffn_down_shexp.weight q4_K -blk.45.ffn_down_exps.weight q4_K -blk.45.ffn_down_shexp.weight q4_K -blk.47.ffn_down_exps.weight q4_K -blk.47.ffn_down_shexp.weight q4_K -blk.49.ffn_down_exps.weight q4_K -blk.49.ffn_down_shexp.weight q4_K -blk.51.ffn_down_exps.weight q4_K -blk.51.ffn_down_shexp.weight q4_K +blk.42.attn_q.weight q4_0 +blk.42.attn_v.weight q5_0 +blk.43.ffn_down_exps.weight q5_0 +blk.43.ffn_down_shexp.weight q5_0 +blk.43.ffn_up_exps.weight q4_0 +blk.43.ffn_up_shexp.weight q4_0 +blk.44.ssm_in.weight q4_0 +blk.45.ffn_down_exps.weight q5_0 +blk.45.ffn_down_shexp.weight q5_0 +blk.45.ffn_up_exps.weight q4_0 +blk.45.ffn_up_shexp.weight q4_0 +blk.46.ssm_in.weight q4_0 +blk.47.ffn_down_exps.weight q5_0 +blk.47.ffn_down_shexp.weight q5_0 +blk.47.ffn_up_exps.weight q4_0 +blk.47.ffn_up_shexp.weight q4_0 +blk.48.ssm_in.weight q4_0 +blk.49.ffn_down_exps.weight q5_0 +blk.49.ffn_down_shexp.weight q5_0 +blk.49.ffn_up_exps.weight q4_0 +blk.49.ffn_up_shexp.weight q4_0 +blk.50.ssm_in.weight q4_0 +blk.51.ffn_down_exps.weight q5_0 +blk.51.ffn_down_shexp.weight q5_0 +blk.51.ffn_up_exps.weight q4_0 +blk.51.ffn_up_shexp.weight q4_0 [Q3_K_L] q3_K output.weight q8_0 -blk.1.ffn_down_exps.weight q5_K -blk.1.ffn_down_shexp.weight q5_K -blk.3.ffn_down_exps.weight q5_K -blk.3.ffn_down_shexp.weight q5_K +token_embd.weight q4_0 +blk.0.ssm_in.weight q4_0 +blk.1.ffn_down_exps.weight q5_1 +blk.1.ffn_down_shexp.weight q5_1 +blk.1.ffn_up_exps.weight q4_0 +blk.1.ffn_up_shexp.weight q4_0 +blk.2.ssm_in.weight q4_0 +blk.3.ffn_down_exps.weight q5_1 +blk.3.ffn_down_shexp.weight q5_1 +blk.3.ffn_up_exps.weight q4_0 +blk.3.ffn_up_shexp.weight q4_0 +blk.4.ssm_in.weight q4_0 +blk.5.attn_k.weight q4_0 blk.5.attn_output.weight q5_K -blk.5.attn_v.weight q5_K -blk.6.ffn_down_exps.weight q5_K -blk.6.ffn_down_shexp.weight q5_K -blk.8.ffn_down_exps.weight q5_K -blk.8.ffn_down_shexp.weight q5_K -blk.10.ffn_down_exps.weight q5_K -blk.10.ffn_down_shexp.weight q5_K +blk.5.attn_q.weight q4_0 +blk.5.attn_v.weight q5_1 +blk.6.ffn_down_exps.weight q5_1 +blk.6.ffn_down_shexp.weight q5_1 +blk.6.ffn_up_exps.weight q4_0 +blk.6.ffn_up_shexp.weight q4_0 +blk.7.ssm_in.weight q4_0 +blk.8.ffn_down_exps.weight q5_1 +blk.8.ffn_down_shexp.weight q5_1 +blk.8.ffn_up_exps.weight q4_0 +blk.8.ffn_up_shexp.weight q4_0 +blk.9.ssm_in.weight q4_0 +blk.10.ffn_down_exps.weight q5_1 +blk.10.ffn_down_shexp.weight q5_1 +blk.10.ffn_up_exps.weight q4_0 +blk.10.ffn_up_shexp.weight q4_0 +blk.11.ssm_in.weight q4_0 +blk.12.attn_k.weight q4_0 blk.12.attn_output.weight q5_K -blk.12.attn_v.weight q5_K -blk.13.ffn_down_exps.weight q5_K -blk.13.ffn_down_shexp.weight q5_K -blk.15.ffn_down_exps.weight q5_K -blk.15.ffn_down_shexp.weight q5_K -blk.17.ffn_down_exps.weight q5_K -blk.17.ffn_down_shexp.weight q5_K +blk.12.attn_q.weight q4_0 +blk.12.attn_v.weight q5_1 +blk.13.ffn_down_exps.weight q5_1 +blk.13.ffn_down_shexp.weight q5_1 +blk.13.ffn_up_exps.weight q4_0 +blk.13.ffn_up_shexp.weight q4_0 +blk.14.ssm_in.weight q4_0 +blk.15.ffn_down_exps.weight q5_1 +blk.15.ffn_down_shexp.weight q5_1 +blk.15.ffn_up_exps.weight q4_0 +blk.15.ffn_up_shexp.weight q4_0 +blk.16.ssm_in.weight q4_0 +blk.17.ffn_down_exps.weight q5_1 +blk.17.ffn_down_shexp.weight q5_1 +blk.17.ffn_up_exps.weight q4_0 +blk.17.ffn_up_shexp.weight q4_0 +blk.18.ssm_in.weight q4_0 +blk.19.attn_k.weight q4_0 blk.19.attn_output.weight q5_K -blk.19.attn_v.weight q5_K -blk.20.ffn_down_exps.weight q5_K -blk.20.ffn_down_shexp.weight q5_K -blk.22.ffn_down_exps.weight q5_K -blk.22.ffn_down_shexp.weight q5_K -blk.24.ffn_down_exps.weight q5_K -blk.24.ffn_down_shexp.weight q5_K +blk.19.attn_q.weight q4_0 +blk.19.attn_v.weight q5_1 +blk.20.ffn_down_exps.weight q5_1 +blk.20.ffn_down_shexp.weight q5_1 +blk.20.ffn_up_exps.weight q4_0 +blk.20.ffn_up_shexp.weight q4_0 +blk.21.ssm_in.weight q4_0 +blk.22.ffn_down_exps.weight q5_1 +blk.22.ffn_down_shexp.weight q5_1 +blk.22.ffn_up_exps.weight q4_0 +blk.22.ffn_up_shexp.weight q4_0 +blk.23.ssm_in.weight q4_0 +blk.24.ffn_down_exps.weight q5_1 +blk.24.ffn_down_shexp.weight q5_1 +blk.24.ffn_up_exps.weight q4_0 +blk.24.ffn_up_shexp.weight q4_0 +blk.25.ssm_in.weight q4_0 +blk.26.attn_k.weight q4_0 blk.26.attn_output.weight q5_K -blk.26.attn_v.weight q5_K -blk.27.ffn_down_exps.weight q5_K -blk.27.ffn_down_shexp.weight q5_K -blk.29.ffn_down_exps.weight q5_K -blk.29.ffn_down_shexp.weight q5_K -blk.31.ffn_down_exps.weight q5_K -blk.31.ffn_down_shexp.weight q5_K +blk.26.attn_q.weight q4_0 +blk.26.attn_v.weight q5_1 +blk.27.ffn_down_exps.weight q5_1 +blk.27.ffn_down_shexp.weight q5_1 +blk.27.ffn_up_exps.weight q4_0 +blk.27.ffn_up_shexp.weight q4_0 +blk.28.ssm_in.weight q4_0 +blk.29.ffn_down_exps.weight q5_1 +blk.29.ffn_down_shexp.weight q5_1 +blk.29.ffn_up_exps.weight q4_0 +blk.29.ffn_up_shexp.weight q4_0 +blk.30.ssm_in.weight q4_0 +blk.31.ffn_down_exps.weight q5_1 +blk.31.ffn_down_shexp.weight q5_1 +blk.31.ffn_up_exps.weight q4_0 +blk.31.ffn_up_shexp.weight q4_0 +blk.32.ssm_in.weight q4_0 +blk.33.attn_k.weight q4_0 blk.33.attn_output.weight q5_K -blk.33.attn_v.weight q5_K -blk.34.ffn_down_exps.weight q5_K -blk.34.ffn_down_shexp.weight q5_K -blk.36.ffn_down_exps.weight q5_K -blk.36.ffn_down_shexp.weight q5_K -blk.38.ffn_down_exps.weight q5_K -blk.38.ffn_down_shexp.weight q5_K -blk.40.ffn_down_exps.weight q5_K -blk.40.ffn_down_shexp.weight q5_K +blk.33.attn_q.weight q4_0 +blk.33.attn_v.weight q5_1 +blk.34.ffn_down_exps.weight q5_1 +blk.34.ffn_down_shexp.weight q5_1 +blk.34.ffn_up_exps.weight q4_0 +blk.34.ffn_up_shexp.weight q4_0 +blk.35.ssm_in.weight q4_0 +blk.36.ffn_down_exps.weight q5_1 +blk.36.ffn_down_shexp.weight q5_1 +blk.36.ffn_up_exps.weight q4_0 +blk.36.ffn_up_shexp.weight q4_0 +blk.37.ssm_in.weight q4_0 +blk.38.ffn_down_exps.weight q5_1 +blk.38.ffn_down_shexp.weight q5_1 +blk.38.ffn_up_exps.weight q4_0 +blk.38.ffn_up_shexp.weight q4_0 +blk.39.ssm_in.weight q4_0 +blk.40.ffn_down_exps.weight q5_1 +blk.40.ffn_down_shexp.weight q5_1 +blk.40.ffn_up_exps.weight q4_0 +blk.40.ffn_up_shexp.weight q4_0 +blk.41.ssm_in.weight q4_0 +blk.42.attn_k.weight q4_0 blk.42.attn_output.weight q5_K -blk.42.attn_v.weight q5_K -blk.43.ffn_down_exps.weight q5_K -blk.43.ffn_down_shexp.weight q5_K -blk.45.ffn_down_exps.weight q5_K -blk.45.ffn_down_shexp.weight q5_K -blk.47.ffn_down_exps.weight q5_K -blk.47.ffn_down_shexp.weight q5_K -blk.49.ffn_down_exps.weight q5_K -blk.49.ffn_down_shexp.weight q5_K -blk.51.ffn_down_exps.weight q5_K -blk.51.ffn_down_shexp.weight q5_K +blk.42.attn_q.weight q4_0 +blk.42.attn_v.weight q5_1 +blk.43.ffn_down_exps.weight q5_1 +blk.43.ffn_down_shexp.weight q5_1 +blk.43.ffn_up_exps.weight q4_0 +blk.43.ffn_up_shexp.weight q4_0 +blk.44.ssm_in.weight q4_0 +blk.45.ffn_down_exps.weight q5_1 +blk.45.ffn_down_shexp.weight q5_1 +blk.45.ffn_up_exps.weight q4_0 +blk.45.ffn_up_shexp.weight q4_0 +blk.46.ssm_in.weight q4_0 +blk.47.ffn_down_exps.weight q5_1 +blk.47.ffn_down_shexp.weight q5_1 +blk.47.ffn_up_exps.weight q4_0 +blk.47.ffn_up_shexp.weight q4_0 +blk.48.ssm_in.weight q4_0 +blk.49.ffn_down_exps.weight q5_1 +blk.49.ffn_down_shexp.weight q5_1 +blk.49.ffn_up_exps.weight q4_0 +blk.49.ffn_up_shexp.weight q4_0 +blk.50.ssm_in.weight q4_0 +blk.51.ffn_down_exps.weight q5_1 +blk.51.ffn_down_shexp.weight q5_1 +blk.51.ffn_up_exps.weight q4_0 +blk.51.ffn_up_shexp.weight q4_0 [Q4_K_S] q4_K output.weight q8_0 -blk.1.ffn_down_exps.weight q5_K -blk.1.ffn_down_shexp.weight q5_K -blk.3.ffn_down_exps.weight q5_K -blk.3.ffn_down_shexp.weight q5_K -blk.5.attn_v.weight q5_K -blk.12.attn_v.weight q5_K -blk.19.attn_v.weight q5_K -blk.26.attn_v.weight q5_K +token_embd.weight q5_0 +blk.0.ssm_in.weight q5_0 +blk.1.ffn_down_exps.weight q5_1 +blk.1.ffn_down_shexp.weight q5_1 +blk.1.ffn_up_exps.weight q5_0 +blk.1.ffn_up_shexp.weight q5_0 +blk.2.ssm_in.weight q5_0 +blk.3.ffn_down_exps.weight q5_1 +blk.3.ffn_down_shexp.weight q5_1 +blk.3.ffn_up_exps.weight q5_0 +blk.3.ffn_up_shexp.weight q5_0 +blk.4.ssm_in.weight q5_0 +blk.5.attn_k.weight q5_0 +blk.5.attn_q.weight q5_0 +blk.5.attn_v.weight q5_1 +blk.6.ffn_down_exps.weight q5_0 +blk.6.ffn_down_shexp.weight q5_0 +blk.6.ffn_up_exps.weight q5_0 +blk.6.ffn_up_shexp.weight q5_0 +blk.7.ssm_in.weight q5_0 +blk.8.ffn_down_exps.weight q5_0 +blk.8.ffn_down_shexp.weight q5_0 +blk.8.ffn_up_exps.weight q5_0 +blk.8.ffn_up_shexp.weight q5_0 +blk.9.ssm_in.weight q5_0 +blk.10.ffn_down_exps.weight q5_0 +blk.10.ffn_down_shexp.weight q5_0 +blk.10.ffn_up_exps.weight q5_0 +blk.10.ffn_up_shexp.weight q5_0 +blk.11.ssm_in.weight q5_0 +blk.12.attn_k.weight q5_0 +blk.12.attn_q.weight q5_0 +blk.12.attn_v.weight q5_1 +blk.13.ffn_down_exps.weight q5_0 +blk.13.ffn_down_shexp.weight q5_0 +blk.13.ffn_up_exps.weight q5_0 +blk.13.ffn_up_shexp.weight q5_0 +blk.14.ssm_in.weight q5_0 +blk.15.ffn_down_exps.weight q5_0 +blk.15.ffn_down_shexp.weight q5_0 +blk.15.ffn_up_exps.weight q5_0 +blk.15.ffn_up_shexp.weight q5_0 +blk.16.ssm_in.weight q5_0 +blk.17.ffn_down_exps.weight q5_0 +blk.17.ffn_down_shexp.weight q5_0 +blk.17.ffn_up_exps.weight q5_0 +blk.17.ffn_up_shexp.weight q5_0 +blk.18.ssm_in.weight q5_0 +blk.19.attn_k.weight q5_0 +blk.19.attn_q.weight q5_0 +blk.19.attn_v.weight q5_1 +blk.20.ffn_down_exps.weight q5_0 +blk.20.ffn_down_shexp.weight q5_0 +blk.20.ffn_up_exps.weight q5_0 +blk.20.ffn_up_shexp.weight q5_0 +blk.21.ssm_in.weight q5_0 +blk.22.ffn_down_exps.weight q5_0 +blk.22.ffn_down_shexp.weight q5_0 +blk.22.ffn_up_exps.weight q5_0 +blk.22.ffn_up_shexp.weight q5_0 +blk.23.ssm_in.weight q5_0 +blk.24.ffn_down_exps.weight q5_0 +blk.24.ffn_down_shexp.weight q5_0 +blk.24.ffn_up_exps.weight q5_0 +blk.24.ffn_up_shexp.weight q5_0 +blk.25.ssm_in.weight q5_0 +blk.26.attn_k.weight q5_0 +blk.26.attn_q.weight q5_0 +blk.26.attn_v.weight q5_1 +blk.27.ffn_down_exps.weight q5_0 +blk.27.ffn_down_shexp.weight q5_0 +blk.27.ffn_up_exps.weight q5_0 +blk.27.ffn_up_shexp.weight q5_0 +blk.28.ssm_in.weight q5_0 +blk.29.ffn_down_exps.weight q5_0 +blk.29.ffn_down_shexp.weight q5_0 +blk.29.ffn_up_exps.weight q5_0 +blk.29.ffn_up_shexp.weight q5_0 +blk.30.ssm_in.weight q5_0 +blk.31.ffn_down_exps.weight q5_0 +blk.31.ffn_down_shexp.weight q5_0 +blk.31.ffn_up_exps.weight q5_0 +blk.31.ffn_up_shexp.weight q5_0 +blk.32.ssm_in.weight q5_0 +blk.33.attn_k.weight q5_0 +blk.33.attn_q.weight q5_0 +blk.33.attn_v.weight q5_0 +blk.34.ffn_down_exps.weight q5_0 +blk.34.ffn_down_shexp.weight q5_0 +blk.34.ffn_up_exps.weight q5_0 +blk.34.ffn_up_shexp.weight q5_0 +blk.35.ssm_in.weight q5_0 +blk.36.ffn_down_exps.weight q5_0 +blk.36.ffn_down_shexp.weight q5_0 +blk.36.ffn_up_exps.weight q5_0 +blk.36.ffn_up_shexp.weight q5_0 +blk.37.ssm_in.weight q5_0 +blk.38.ffn_down_exps.weight q5_0 +blk.38.ffn_down_shexp.weight q5_0 +blk.38.ffn_up_exps.weight q5_0 +blk.38.ffn_up_shexp.weight q5_0 +blk.39.ssm_in.weight q5_0 +blk.40.ffn_down_exps.weight q5_0 +blk.40.ffn_down_shexp.weight q5_0 +blk.40.ffn_up_exps.weight q5_0 +blk.40.ffn_up_shexp.weight q5_0 +blk.41.ssm_in.weight q5_0 +blk.42.attn_k.weight q5_0 +blk.42.attn_q.weight q5_0 +blk.42.attn_v.weight q5_0 +blk.43.ffn_down_exps.weight q5_0 +blk.43.ffn_down_shexp.weight q5_0 +blk.43.ffn_up_exps.weight q5_0 +blk.43.ffn_up_shexp.weight q5_0 +blk.44.ssm_in.weight q5_0 +blk.45.ffn_down_exps.weight q5_0 +blk.45.ffn_down_shexp.weight q5_0 +blk.45.ffn_up_exps.weight q5_0 +blk.45.ffn_up_shexp.weight q5_0 +blk.46.ssm_in.weight q5_0 +blk.47.ffn_down_exps.weight q5_0 +blk.47.ffn_down_shexp.weight q5_0 +blk.47.ffn_up_exps.weight q5_0 +blk.47.ffn_up_shexp.weight q5_0 +blk.48.ssm_in.weight q5_0 +blk.49.ffn_down_exps.weight q5_0 +blk.49.ffn_down_shexp.weight q5_0 +blk.49.ffn_up_exps.weight q5_0 +blk.49.ffn_up_shexp.weight q5_0 +blk.50.ssm_in.weight q5_0 +blk.51.ffn_down_exps.weight q5_0 +blk.51.ffn_down_shexp.weight q5_0 +blk.51.ffn_up_exps.weight q5_0 +blk.51.ffn_up_shexp.weight q5_0 [Q4_K_M] q4_K output.weight q8_0 -blk.1.ffn_down_exps.weight q6_K -blk.1.ffn_down_shexp.weight q6_K -blk.3.ffn_down_exps.weight q6_K -blk.3.ffn_down_shexp.weight q6_K -blk.8.ffn_down_exps.weight q6_K -blk.8.ffn_down_shexp.weight q6_K -blk.17.ffn_down_exps.weight q6_K -blk.17.ffn_down_shexp.weight q6_K -blk.19.attn_v.weight q6_K -blk.20.ffn_down_exps.weight q6_K -blk.20.ffn_down_shexp.weight q6_K -blk.29.ffn_down_exps.weight q6_K -blk.29.ffn_down_shexp.weight q6_K -blk.38.ffn_down_exps.weight q6_K -blk.38.ffn_down_shexp.weight q6_K -blk.42.attn_v.weight q6_K -blk.45.ffn_down_exps.weight q6_K -blk.45.ffn_down_shexp.weight q6_K -blk.47.ffn_down_exps.weight q6_K -blk.47.ffn_down_shexp.weight q6_K -blk.49.ffn_down_exps.weight q6_K -blk.49.ffn_down_shexp.weight q6_K -blk.51.ffn_down_exps.weight q6_K -blk.51.ffn_down_shexp.weight q6_K +token_embd.weight q5_0 +blk.0.ssm_in.weight q5_0 +blk.1.ffn_down_exps.weight q8_0 +blk.1.ffn_down_shexp.weight q8_0 +blk.1.ffn_up_exps.weight q5_0 +blk.1.ffn_up_shexp.weight q5_0 +blk.2.ssm_in.weight q5_0 +blk.3.ffn_down_exps.weight q8_0 +blk.3.ffn_down_shexp.weight q8_0 +blk.3.ffn_up_exps.weight q5_0 +blk.3.ffn_up_shexp.weight q5_0 +blk.4.ssm_in.weight q5_0 +blk.5.attn_k.weight q5_0 +blk.5.attn_q.weight q5_0 +blk.5.attn_v.weight q5_0 +blk.6.ffn_down_exps.weight q5_0 +blk.6.ffn_down_shexp.weight q5_0 +blk.6.ffn_up_exps.weight q5_0 +blk.6.ffn_up_shexp.weight q5_0 +blk.7.ssm_in.weight q5_0 +blk.8.ffn_down_exps.weight q8_0 +blk.8.ffn_down_shexp.weight q8_0 +blk.8.ffn_up_exps.weight q5_0 +blk.8.ffn_up_shexp.weight q5_0 +blk.9.ssm_in.weight q5_0 +blk.10.ffn_down_exps.weight q5_0 +blk.10.ffn_down_shexp.weight q5_0 +blk.10.ffn_up_exps.weight q5_0 +blk.10.ffn_up_shexp.weight q5_0 +blk.11.ssm_in.weight q5_0 +blk.12.attn_k.weight q5_0 +blk.12.attn_q.weight q5_0 +blk.12.attn_v.weight q5_0 +blk.13.ffn_down_exps.weight q5_0 +blk.13.ffn_down_shexp.weight q5_0 +blk.13.ffn_up_exps.weight q5_0 +blk.13.ffn_up_shexp.weight q5_0 +blk.14.ssm_in.weight q5_0 +blk.15.ffn_down_exps.weight q5_0 +blk.15.ffn_down_shexp.weight q5_0 +blk.15.ffn_up_exps.weight q5_0 +blk.15.ffn_up_shexp.weight q5_0 +blk.16.ssm_in.weight q5_0 +blk.17.ffn_down_exps.weight q8_0 +blk.17.ffn_down_shexp.weight q8_0 +blk.17.ffn_up_exps.weight q5_0 +blk.17.ffn_up_shexp.weight q5_0 +blk.18.ssm_in.weight q5_0 +blk.19.attn_k.weight q5_0 +blk.19.attn_q.weight q5_0 +blk.19.attn_v.weight q8_0 +blk.20.ffn_down_exps.weight q8_0 +blk.20.ffn_down_shexp.weight q8_0 +blk.20.ffn_up_exps.weight q5_0 +blk.20.ffn_up_shexp.weight q5_0 +blk.21.ssm_in.weight q5_0 +blk.22.ffn_down_exps.weight q5_0 +blk.22.ffn_down_shexp.weight q5_0 +blk.22.ffn_up_exps.weight q5_0 +blk.22.ffn_up_shexp.weight q5_0 +blk.23.ssm_in.weight q5_0 +blk.24.ffn_down_exps.weight q5_0 +blk.24.ffn_down_shexp.weight q5_0 +blk.24.ffn_up_exps.weight q5_0 +blk.24.ffn_up_shexp.weight q5_0 +blk.25.ssm_in.weight q5_0 +blk.26.attn_k.weight q5_0 +blk.26.attn_q.weight q5_0 +blk.26.attn_v.weight q5_0 +blk.27.ffn_down_exps.weight q5_0 +blk.27.ffn_down_shexp.weight q5_0 +blk.27.ffn_up_exps.weight q5_0 +blk.27.ffn_up_shexp.weight q5_0 +blk.28.ssm_in.weight q5_0 +blk.29.ffn_down_exps.weight q8_0 +blk.29.ffn_down_shexp.weight q8_0 +blk.29.ffn_up_exps.weight q5_0 +blk.29.ffn_up_shexp.weight q5_0 +blk.30.ssm_in.weight q5_0 +blk.31.ffn_down_exps.weight q5_0 +blk.31.ffn_down_shexp.weight q5_0 +blk.31.ffn_up_exps.weight q5_0 +blk.31.ffn_up_shexp.weight q5_0 +blk.32.ssm_in.weight q5_0 +blk.33.attn_k.weight q5_0 +blk.33.attn_q.weight q5_0 +blk.33.attn_v.weight q5_0 +blk.34.ffn_down_exps.weight q5_0 +blk.34.ffn_down_shexp.weight q5_0 +blk.34.ffn_up_exps.weight q5_0 +blk.34.ffn_up_shexp.weight q5_0 +blk.35.ssm_in.weight q5_0 +blk.36.ffn_down_exps.weight q5_0 +blk.36.ffn_down_shexp.weight q5_0 +blk.36.ffn_up_exps.weight q5_0 +blk.36.ffn_up_shexp.weight q5_0 +blk.37.ssm_in.weight q5_0 +blk.38.ffn_down_exps.weight q8_0 +blk.38.ffn_down_shexp.weight q8_0 +blk.38.ffn_up_exps.weight q5_0 +blk.38.ffn_up_shexp.weight q5_0 +blk.39.ssm_in.weight q5_0 +blk.40.ffn_down_exps.weight q5_0 +blk.40.ffn_down_shexp.weight q5_0 +blk.40.ffn_up_exps.weight q5_0 +blk.40.ffn_up_shexp.weight q5_0 +blk.41.ssm_in.weight q5_0 +blk.42.attn_k.weight q5_0 +blk.42.attn_q.weight q5_0 +blk.42.attn_v.weight q8_0 +blk.43.ffn_down_exps.weight q5_0 +blk.43.ffn_down_shexp.weight q5_0 +blk.43.ffn_up_exps.weight q5_0 +blk.43.ffn_up_shexp.weight q5_0 +blk.44.ssm_in.weight q5_0 +blk.45.ffn_down_exps.weight q8_0 +blk.45.ffn_down_shexp.weight q8_0 +blk.45.ffn_up_exps.weight q5_0 +blk.45.ffn_up_shexp.weight q5_0 +blk.46.ssm_in.weight q5_0 +blk.47.ffn_down_exps.weight q8_0 +blk.47.ffn_down_shexp.weight q8_0 +blk.47.ffn_up_exps.weight q5_0 +blk.47.ffn_up_shexp.weight q5_0 +blk.48.ssm_in.weight q5_0 +blk.49.ffn_down_exps.weight q8_0 +blk.49.ffn_down_shexp.weight q8_0 +blk.49.ffn_up_exps.weight q5_0 +blk.49.ffn_up_shexp.weight q5_0 +blk.50.ssm_in.weight q5_0 +blk.51.ffn_down_exps.weight q8_0 +blk.51.ffn_down_shexp.weight q8_0 +blk.51.ffn_up_exps.weight q5_0 +blk.51.ffn_up_shexp.weight q5_0 [Q5_K_S] q5_K output.weight q8_0 +token_embd.weight q5_1 +blk.0.ssm_in.weight q5_1 +blk.1.ffn_down_exps.weight q5_1 +blk.1.ffn_down_shexp.weight q5_1 +blk.1.ffn_up_exps.weight q5_1 +blk.1.ffn_up_shexp.weight q5_1 +blk.2.ssm_in.weight q5_1 +blk.3.ffn_down_exps.weight q5_1 +blk.3.ffn_down_shexp.weight q5_1 +blk.3.ffn_up_exps.weight q5_1 +blk.3.ffn_up_shexp.weight q5_1 +blk.4.ssm_in.weight q5_1 +blk.5.attn_k.weight q5_1 +blk.5.attn_q.weight q5_1 +blk.5.attn_v.weight q5_1 +blk.6.ffn_down_exps.weight q5_1 +blk.6.ffn_down_shexp.weight q5_1 +blk.6.ffn_up_exps.weight q5_1 +blk.6.ffn_up_shexp.weight q5_1 +blk.7.ssm_in.weight q5_1 +blk.8.ffn_down_exps.weight q5_1 +blk.8.ffn_down_shexp.weight q5_1 +blk.8.ffn_up_exps.weight q5_1 +blk.8.ffn_up_shexp.weight q5_1 +blk.9.ssm_in.weight q5_1 +blk.10.ffn_down_exps.weight q5_1 +blk.10.ffn_down_shexp.weight q5_1 +blk.10.ffn_up_exps.weight q5_1 +blk.10.ffn_up_shexp.weight q5_1 +blk.11.ssm_in.weight q5_1 +blk.12.attn_k.weight q5_1 +blk.12.attn_q.weight q5_1 +blk.12.attn_v.weight q5_1 +blk.13.ffn_down_exps.weight q5_1 +blk.13.ffn_down_shexp.weight q5_1 +blk.13.ffn_up_exps.weight q5_1 +blk.13.ffn_up_shexp.weight q5_1 +blk.14.ssm_in.weight q5_1 +blk.15.ffn_down_exps.weight q5_1 +blk.15.ffn_down_shexp.weight q5_1 +blk.15.ffn_up_exps.weight q5_1 +blk.15.ffn_up_shexp.weight q5_1 +blk.16.ssm_in.weight q5_1 +blk.17.ffn_down_exps.weight q5_1 +blk.17.ffn_down_shexp.weight q5_1 +blk.17.ffn_up_exps.weight q5_1 +blk.17.ffn_up_shexp.weight q5_1 +blk.18.ssm_in.weight q5_1 +blk.19.attn_k.weight q5_1 +blk.19.attn_q.weight q5_1 +blk.19.attn_v.weight q5_1 +blk.20.ffn_down_exps.weight q5_1 +blk.20.ffn_down_shexp.weight q5_1 +blk.20.ffn_up_exps.weight q5_1 +blk.20.ffn_up_shexp.weight q5_1 +blk.21.ssm_in.weight q5_1 +blk.22.ffn_down_exps.weight q5_1 +blk.22.ffn_down_shexp.weight q5_1 +blk.22.ffn_up_exps.weight q5_1 +blk.22.ffn_up_shexp.weight q5_1 +blk.23.ssm_in.weight q5_1 +blk.24.ffn_down_exps.weight q5_1 +blk.24.ffn_down_shexp.weight q5_1 +blk.24.ffn_up_exps.weight q5_1 +blk.24.ffn_up_shexp.weight q5_1 +blk.25.ssm_in.weight q5_1 +blk.26.attn_k.weight q5_1 +blk.26.attn_q.weight q5_1 +blk.26.attn_v.weight q5_1 +blk.27.ffn_down_exps.weight q5_1 +blk.27.ffn_down_shexp.weight q5_1 +blk.27.ffn_up_exps.weight q5_1 +blk.27.ffn_up_shexp.weight q5_1 +blk.28.ssm_in.weight q5_1 +blk.29.ffn_down_exps.weight q5_1 +blk.29.ffn_down_shexp.weight q5_1 +blk.29.ffn_up_exps.weight q5_1 +blk.29.ffn_up_shexp.weight q5_1 +blk.30.ssm_in.weight q5_1 +blk.31.ffn_down_exps.weight q5_1 +blk.31.ffn_down_shexp.weight q5_1 +blk.31.ffn_up_exps.weight q5_1 +blk.31.ffn_up_shexp.weight q5_1 +blk.32.ssm_in.weight q5_1 +blk.33.attn_k.weight q5_1 +blk.33.attn_q.weight q5_1 +blk.33.attn_v.weight q5_1 +blk.34.ffn_down_exps.weight q5_1 +blk.34.ffn_down_shexp.weight q5_1 +blk.34.ffn_up_exps.weight q5_1 +blk.34.ffn_up_shexp.weight q5_1 +blk.35.ssm_in.weight q5_1 +blk.36.ffn_down_exps.weight q5_1 +blk.36.ffn_down_shexp.weight q5_1 +blk.36.ffn_up_exps.weight q5_1 +blk.36.ffn_up_shexp.weight q5_1 +blk.37.ssm_in.weight q5_1 +blk.38.ffn_down_exps.weight q5_1 +blk.38.ffn_down_shexp.weight q5_1 +blk.38.ffn_up_exps.weight q5_1 +blk.38.ffn_up_shexp.weight q5_1 +blk.39.ssm_in.weight q5_1 +blk.40.ffn_down_exps.weight q5_1 +blk.40.ffn_down_shexp.weight q5_1 +blk.40.ffn_up_exps.weight q5_1 +blk.40.ffn_up_shexp.weight q5_1 +blk.41.ssm_in.weight q5_1 +blk.42.attn_k.weight q5_1 +blk.42.attn_q.weight q5_1 +blk.42.attn_v.weight q5_1 +blk.43.ffn_down_exps.weight q5_1 +blk.43.ffn_down_shexp.weight q5_1 +blk.43.ffn_up_exps.weight q5_1 +blk.43.ffn_up_shexp.weight q5_1 +blk.44.ssm_in.weight q5_1 +blk.45.ffn_down_exps.weight q5_1 +blk.45.ffn_down_shexp.weight q5_1 +blk.45.ffn_up_exps.weight q5_1 +blk.45.ffn_up_shexp.weight q5_1 +blk.46.ssm_in.weight q5_1 +blk.47.ffn_down_exps.weight q5_1 +blk.47.ffn_down_shexp.weight q5_1 +blk.47.ffn_up_exps.weight q5_1 +blk.47.ffn_up_shexp.weight q5_1 +blk.48.ssm_in.weight q5_1 +blk.49.ffn_down_exps.weight q5_1 +blk.49.ffn_down_shexp.weight q5_1 +blk.49.ffn_up_exps.weight q5_1 +blk.49.ffn_up_shexp.weight q5_1 +blk.50.ssm_in.weight q5_1 +blk.51.ffn_down_exps.weight q5_1 +blk.51.ffn_down_shexp.weight q5_1 +blk.51.ffn_up_exps.weight q5_1 +blk.51.ffn_up_shexp.weight q5_1 [Q5_K_M] q5_K output.weight q8_0 -blk.1.ffn_down_exps.weight q6_K -blk.1.ffn_down_shexp.weight q6_K -blk.3.ffn_down_exps.weight q6_K -blk.3.ffn_down_shexp.weight q6_K -blk.8.ffn_down_exps.weight q6_K -blk.8.ffn_down_shexp.weight q6_K -blk.17.ffn_down_exps.weight q6_K -blk.17.ffn_down_shexp.weight q6_K -blk.19.attn_v.weight q6_K -blk.20.ffn_down_exps.weight q6_K -blk.20.ffn_down_shexp.weight q6_K -blk.29.ffn_down_exps.weight q6_K -blk.29.ffn_down_shexp.weight q6_K -blk.38.ffn_down_exps.weight q6_K -blk.38.ffn_down_shexp.weight q6_K -blk.42.attn_v.weight q6_K -blk.45.ffn_down_exps.weight q6_K -blk.45.ffn_down_shexp.weight q6_K -blk.47.ffn_down_exps.weight q6_K -blk.47.ffn_down_shexp.weight q6_K -blk.49.ffn_down_exps.weight q6_K -blk.49.ffn_down_shexp.weight q6_K -blk.51.ffn_down_exps.weight q6_K -blk.51.ffn_down_shexp.weight q6_K +token_embd.weight q5_1 +blk.0.ssm_in.weight q5_1 +blk.1.ffn_down_exps.weight q8_0 +blk.1.ffn_down_shexp.weight q8_0 +blk.1.ffn_up_exps.weight q5_1 +blk.1.ffn_up_shexp.weight q5_1 +blk.2.ssm_in.weight q5_1 +blk.3.ffn_down_exps.weight q8_0 +blk.3.ffn_down_shexp.weight q8_0 +blk.3.ffn_up_exps.weight q5_1 +blk.3.ffn_up_shexp.weight q5_1 +blk.4.ssm_in.weight q5_1 +blk.5.attn_k.weight q5_1 +blk.5.attn_q.weight q5_1 +blk.5.attn_v.weight q5_1 +blk.6.ffn_down_exps.weight q5_1 +blk.6.ffn_down_shexp.weight q5_1 +blk.6.ffn_up_exps.weight q5_1 +blk.6.ffn_up_shexp.weight q5_1 +blk.7.ssm_in.weight q5_1 +blk.8.ffn_down_exps.weight q8_0 +blk.8.ffn_down_shexp.weight q8_0 +blk.8.ffn_up_exps.weight q5_1 +blk.8.ffn_up_shexp.weight q5_1 +blk.9.ssm_in.weight q5_1 +blk.10.ffn_down_exps.weight q5_1 +blk.10.ffn_down_shexp.weight q5_1 +blk.10.ffn_up_exps.weight q5_1 +blk.10.ffn_up_shexp.weight q5_1 +blk.11.ssm_in.weight q5_1 +blk.12.attn_k.weight q5_1 +blk.12.attn_q.weight q5_1 +blk.12.attn_v.weight q5_1 +blk.13.ffn_down_exps.weight q5_1 +blk.13.ffn_down_shexp.weight q5_1 +blk.13.ffn_up_exps.weight q5_1 +blk.13.ffn_up_shexp.weight q5_1 +blk.14.ssm_in.weight q5_1 +blk.15.ffn_down_exps.weight q5_1 +blk.15.ffn_down_shexp.weight q5_1 +blk.15.ffn_up_exps.weight q5_1 +blk.15.ffn_up_shexp.weight q5_1 +blk.16.ssm_in.weight q5_1 +blk.17.ffn_down_exps.weight q8_0 +blk.17.ffn_down_shexp.weight q8_0 +blk.17.ffn_up_exps.weight q5_1 +blk.17.ffn_up_shexp.weight q5_1 +blk.18.ssm_in.weight q5_1 +blk.19.attn_k.weight q5_1 +blk.19.attn_q.weight q5_1 +blk.19.attn_v.weight q8_0 +blk.20.ffn_down_exps.weight q8_0 +blk.20.ffn_down_shexp.weight q8_0 +blk.20.ffn_up_exps.weight q5_1 +blk.20.ffn_up_shexp.weight q5_1 +blk.21.ssm_in.weight q5_1 +blk.22.ffn_down_exps.weight q5_1 +blk.22.ffn_down_shexp.weight q5_1 +blk.22.ffn_up_exps.weight q5_1 +blk.22.ffn_up_shexp.weight q5_1 +blk.23.ssm_in.weight q5_1 +blk.24.ffn_down_exps.weight q5_1 +blk.24.ffn_down_shexp.weight q5_1 +blk.24.ffn_up_exps.weight q5_1 +blk.24.ffn_up_shexp.weight q5_1 +blk.25.ssm_in.weight q5_1 +blk.26.attn_k.weight q5_1 +blk.26.attn_q.weight q5_1 +blk.26.attn_v.weight q5_1 +blk.27.ffn_down_exps.weight q5_1 +blk.27.ffn_down_shexp.weight q5_1 +blk.27.ffn_up_exps.weight q5_1 +blk.27.ffn_up_shexp.weight q5_1 +blk.28.ssm_in.weight q5_1 +blk.29.ffn_down_exps.weight q8_0 +blk.29.ffn_down_shexp.weight q8_0 +blk.29.ffn_up_exps.weight q5_1 +blk.29.ffn_up_shexp.weight q5_1 +blk.30.ssm_in.weight q5_1 +blk.31.ffn_down_exps.weight q5_1 +blk.31.ffn_down_shexp.weight q5_1 +blk.31.ffn_up_exps.weight q5_1 +blk.31.ffn_up_shexp.weight q5_1 +blk.32.ssm_in.weight q5_1 +blk.33.attn_k.weight q5_1 +blk.33.attn_q.weight q5_1 +blk.33.attn_v.weight q5_1 +blk.34.ffn_down_exps.weight q5_1 +blk.34.ffn_down_shexp.weight q5_1 +blk.34.ffn_up_exps.weight q5_1 +blk.34.ffn_up_shexp.weight q5_1 +blk.35.ssm_in.weight q5_1 +blk.36.ffn_down_exps.weight q5_1 +blk.36.ffn_down_shexp.weight q5_1 +blk.36.ffn_up_exps.weight q5_1 +blk.36.ffn_up_shexp.weight q5_1 +blk.37.ssm_in.weight q5_1 +blk.38.ffn_down_exps.weight q8_0 +blk.38.ffn_down_shexp.weight q8_0 +blk.38.ffn_up_exps.weight q5_1 +blk.38.ffn_up_shexp.weight q5_1 +blk.39.ssm_in.weight q5_1 +blk.40.ffn_down_exps.weight q5_1 +blk.40.ffn_down_shexp.weight q5_1 +blk.40.ffn_up_exps.weight q5_1 +blk.40.ffn_up_shexp.weight q5_1 +blk.41.ssm_in.weight q5_1 +blk.42.attn_k.weight q5_1 +blk.42.attn_q.weight q5_1 +blk.42.attn_v.weight q8_0 +blk.43.ffn_down_exps.weight q5_1 +blk.43.ffn_down_shexp.weight q5_1 +blk.43.ffn_up_exps.weight q5_1 +blk.43.ffn_up_shexp.weight q5_1 +blk.44.ssm_in.weight q5_1 +blk.45.ffn_down_exps.weight q8_0 +blk.45.ffn_down_shexp.weight q8_0 +blk.45.ffn_up_exps.weight q5_1 +blk.45.ffn_up_shexp.weight q5_1 +blk.46.ssm_in.weight q5_1 +blk.47.ffn_down_exps.weight q8_0 +blk.47.ffn_down_shexp.weight q8_0 +blk.47.ffn_up_exps.weight q5_1 +blk.47.ffn_up_shexp.weight q5_1 +blk.48.ssm_in.weight q5_1 +blk.49.ffn_down_exps.weight q8_0 +blk.49.ffn_down_shexp.weight q8_0 +blk.49.ffn_up_exps.weight q5_1 +blk.49.ffn_up_shexp.weight q5_1 +blk.50.ssm_in.weight q5_1 +blk.51.ffn_down_exps.weight q8_0 +blk.51.ffn_down_shexp.weight q8_0 +blk.51.ffn_up_exps.weight q5_1 +blk.51.ffn_up_shexp.weight q5_1 [Q6_K] q6_K output.weight q8_0 +token_embd.weight q8_0 +blk.0.ssm_in.weight q8_0 +blk.1.ffn_down_exps.weight q8_0 +blk.1.ffn_down_shexp.weight q8_0 +blk.1.ffn_up_exps.weight q8_0 +blk.1.ffn_up_shexp.weight q8_0 +blk.2.ssm_in.weight q8_0 +blk.3.ffn_down_exps.weight q8_0 +blk.3.ffn_down_shexp.weight q8_0 +blk.3.ffn_up_exps.weight q8_0 +blk.3.ffn_up_shexp.weight q8_0 +blk.4.ssm_in.weight q8_0 +blk.5.attn_k.weight q8_0 +blk.5.attn_q.weight q8_0 +blk.5.attn_v.weight q8_0 +blk.6.ffn_down_exps.weight q8_0 +blk.6.ffn_down_shexp.weight q8_0 +blk.6.ffn_up_exps.weight q8_0 +blk.6.ffn_up_shexp.weight q8_0 +blk.7.ssm_in.weight q8_0 +blk.8.ffn_down_exps.weight q8_0 +blk.8.ffn_down_shexp.weight q8_0 +blk.8.ffn_up_exps.weight q8_0 +blk.8.ffn_up_shexp.weight q8_0 +blk.9.ssm_in.weight q8_0 +blk.10.ffn_down_exps.weight q8_0 +blk.10.ffn_down_shexp.weight q8_0 +blk.10.ffn_up_exps.weight q8_0 +blk.10.ffn_up_shexp.weight q8_0 +blk.11.ssm_in.weight q8_0 +blk.12.attn_k.weight q8_0 +blk.12.attn_q.weight q8_0 +blk.12.attn_v.weight q8_0 +blk.13.ffn_down_exps.weight q8_0 +blk.13.ffn_down_shexp.weight q8_0 +blk.13.ffn_up_exps.weight q8_0 +blk.13.ffn_up_shexp.weight q8_0 +blk.14.ssm_in.weight q8_0 +blk.15.ffn_down_exps.weight q8_0 +blk.15.ffn_down_shexp.weight q8_0 +blk.15.ffn_up_exps.weight q8_0 +blk.15.ffn_up_shexp.weight q8_0 +blk.16.ssm_in.weight q8_0 +blk.17.ffn_down_exps.weight q8_0 +blk.17.ffn_down_shexp.weight q8_0 +blk.17.ffn_up_exps.weight q8_0 +blk.17.ffn_up_shexp.weight q8_0 +blk.18.ssm_in.weight q8_0 +blk.19.attn_k.weight q8_0 +blk.19.attn_q.weight q8_0 +blk.19.attn_v.weight q8_0 +blk.20.ffn_down_exps.weight q8_0 +blk.20.ffn_down_shexp.weight q8_0 +blk.20.ffn_up_exps.weight q8_0 +blk.20.ffn_up_shexp.weight q8_0 +blk.21.ssm_in.weight q8_0 +blk.22.ffn_down_exps.weight q8_0 +blk.22.ffn_down_shexp.weight q8_0 +blk.22.ffn_up_exps.weight q8_0 +blk.22.ffn_up_shexp.weight q8_0 +blk.23.ssm_in.weight q8_0 +blk.24.ffn_down_exps.weight q8_0 +blk.24.ffn_down_shexp.weight q8_0 +blk.24.ffn_up_exps.weight q8_0 +blk.24.ffn_up_shexp.weight q8_0 +blk.25.ssm_in.weight q8_0 +blk.26.attn_k.weight q8_0 +blk.26.attn_q.weight q8_0 +blk.26.attn_v.weight q8_0 +blk.27.ffn_down_exps.weight q8_0 +blk.27.ffn_down_shexp.weight q8_0 +blk.27.ffn_up_exps.weight q8_0 +blk.27.ffn_up_shexp.weight q8_0 +blk.28.ssm_in.weight q8_0 +blk.29.ffn_down_exps.weight q8_0 +blk.29.ffn_down_shexp.weight q8_0 +blk.29.ffn_up_exps.weight q8_0 +blk.29.ffn_up_shexp.weight q8_0 +blk.30.ssm_in.weight q8_0 +blk.31.ffn_down_exps.weight q8_0 +blk.31.ffn_down_shexp.weight q8_0 +blk.31.ffn_up_exps.weight q8_0 +blk.31.ffn_up_shexp.weight q8_0 +blk.32.ssm_in.weight q8_0 +blk.33.attn_k.weight q8_0 +blk.33.attn_q.weight q8_0 +blk.33.attn_v.weight q8_0 +blk.34.ffn_down_exps.weight q8_0 +blk.34.ffn_down_shexp.weight q8_0 +blk.34.ffn_up_exps.weight q8_0 +blk.34.ffn_up_shexp.weight q8_0 +blk.35.ssm_in.weight q8_0 +blk.36.ffn_down_exps.weight q8_0 +blk.36.ffn_down_shexp.weight q8_0 +blk.36.ffn_up_exps.weight q8_0 +blk.36.ffn_up_shexp.weight q8_0 +blk.37.ssm_in.weight q8_0 +blk.38.ffn_down_exps.weight q8_0 +blk.38.ffn_down_shexp.weight q8_0 +blk.38.ffn_up_exps.weight q8_0 +blk.38.ffn_up_shexp.weight q8_0 +blk.39.ssm_in.weight q8_0 +blk.40.ffn_down_exps.weight q8_0 +blk.40.ffn_down_shexp.weight q8_0 +blk.40.ffn_up_exps.weight q8_0 +blk.40.ffn_up_shexp.weight q8_0 +blk.41.ssm_in.weight q8_0 +blk.42.attn_k.weight q8_0 +blk.42.attn_q.weight q8_0 +blk.42.attn_v.weight q8_0 +blk.43.ffn_down_exps.weight q8_0 +blk.43.ffn_down_shexp.weight q8_0 +blk.43.ffn_up_exps.weight q8_0 +blk.43.ffn_up_shexp.weight q8_0 +blk.44.ssm_in.weight q8_0 +blk.45.ffn_down_exps.weight q8_0 +blk.45.ffn_down_shexp.weight q8_0 +blk.45.ffn_up_exps.weight q8_0 +blk.45.ffn_up_shexp.weight q8_0 +blk.46.ssm_in.weight q8_0 +blk.47.ffn_down_exps.weight q8_0 +blk.47.ffn_down_shexp.weight q8_0 +blk.47.ffn_up_exps.weight q8_0 +blk.47.ffn_up_shexp.weight q8_0 +blk.48.ssm_in.weight q8_0 +blk.49.ffn_down_exps.weight q8_0 +blk.49.ffn_down_shexp.weight q8_0 +blk.49.ffn_up_exps.weight q8_0 +blk.49.ffn_up_shexp.weight q8_0 +blk.50.ssm_in.weight q8_0 +blk.51.ffn_down_exps.weight q8_0 +blk.51.ffn_down_shexp.weight q8_0 +blk.51.ffn_up_exps.weight q8_0 +blk.51.ffn_up_shexp.weight q8_0 [IQ2_XXS] iq2_xxs output.weight q8_0 -token_embd.weight q2_K -blk.1.ffn_down_exps.weight q2_K -blk.1.ffn_down_shexp.weight q2_K -blk.3.ffn_down_exps.weight q2_K -blk.3.ffn_down_shexp.weight q2_K -blk.5.attn_v.weight q4_K -blk.6.ffn_down_exps.weight q2_K -blk.6.ffn_down_shexp.weight q2_K -blk.12.attn_v.weight q4_K -blk.19.attn_v.weight q4_K -blk.26.attn_v.weight q4_K -blk.33.attn_v.weight q4_K -blk.42.attn_v.weight q4_K +token_embd.weight q4_0 +blk.0.ssm_in.weight iq4_nl +blk.1.ffn_down_exps.weight q4_0 +blk.1.ffn_down_shexp.weight q4_0 +blk.1.ffn_up_exps.weight iq4_nl +blk.1.ffn_up_shexp.weight iq4_nl +blk.2.ssm_in.weight iq4_nl +blk.3.ffn_down_exps.weight q4_0 +blk.3.ffn_down_shexp.weight q4_0 +blk.3.ffn_up_exps.weight iq4_nl +blk.3.ffn_up_shexp.weight iq4_nl +blk.4.ssm_in.weight iq4_nl +blk.5.attn_k.weight iq4_nl +blk.5.attn_q.weight iq4_nl +blk.5.attn_v.weight q5_0 +blk.6.ffn_down_exps.weight q4_0 +blk.6.ffn_down_shexp.weight q4_0 +blk.6.ffn_up_exps.weight iq4_nl +blk.6.ffn_up_shexp.weight iq4_nl +blk.7.ssm_in.weight iq4_nl +blk.8.ffn_down_exps.weight iq4_nl +blk.8.ffn_down_shexp.weight iq4_nl +blk.8.ffn_up_exps.weight iq4_nl +blk.8.ffn_up_shexp.weight iq4_nl +blk.9.ssm_in.weight iq4_nl +blk.10.ffn_down_exps.weight iq4_nl +blk.10.ffn_down_shexp.weight iq4_nl +blk.10.ffn_up_exps.weight iq4_nl +blk.10.ffn_up_shexp.weight iq4_nl +blk.11.ssm_in.weight iq4_nl +blk.12.attn_k.weight iq4_nl +blk.12.attn_q.weight iq4_nl +blk.12.attn_v.weight q5_0 +blk.13.ffn_down_exps.weight iq4_nl +blk.13.ffn_down_shexp.weight iq4_nl +blk.13.ffn_up_exps.weight iq4_nl +blk.13.ffn_up_shexp.weight iq4_nl +blk.14.ssm_in.weight iq4_nl +blk.15.ffn_down_exps.weight iq4_nl +blk.15.ffn_down_shexp.weight iq4_nl +blk.15.ffn_up_exps.weight iq4_nl +blk.15.ffn_up_shexp.weight iq4_nl +blk.16.ssm_in.weight iq4_nl +blk.17.ffn_down_exps.weight iq4_nl +blk.17.ffn_down_shexp.weight iq4_nl +blk.17.ffn_up_exps.weight iq4_nl +blk.17.ffn_up_shexp.weight iq4_nl +blk.18.ssm_in.weight iq4_nl +blk.19.attn_k.weight iq4_nl +blk.19.attn_q.weight iq4_nl +blk.19.attn_v.weight q5_0 +blk.20.ffn_down_exps.weight iq4_nl +blk.20.ffn_down_shexp.weight iq4_nl +blk.20.ffn_up_exps.weight iq4_nl +blk.20.ffn_up_shexp.weight iq4_nl +blk.21.ssm_in.weight iq4_nl +blk.22.ffn_down_exps.weight iq4_nl +blk.22.ffn_down_shexp.weight iq4_nl +blk.22.ffn_up_exps.weight iq4_nl +blk.22.ffn_up_shexp.weight iq4_nl +blk.23.ssm_in.weight iq4_nl +blk.24.ffn_down_exps.weight iq4_nl +blk.24.ffn_down_shexp.weight iq4_nl +blk.24.ffn_up_exps.weight iq4_nl +blk.24.ffn_up_shexp.weight iq4_nl +blk.25.ssm_in.weight iq4_nl +blk.26.attn_k.weight iq4_nl +blk.26.attn_q.weight iq4_nl +blk.26.attn_v.weight q5_0 +blk.27.ffn_down_exps.weight iq4_nl +blk.27.ffn_down_shexp.weight iq4_nl +blk.27.ffn_up_exps.weight iq4_nl +blk.27.ffn_up_shexp.weight iq4_nl +blk.28.ssm_in.weight iq4_nl +blk.29.ffn_down_exps.weight iq4_nl +blk.29.ffn_down_shexp.weight iq4_nl +blk.29.ffn_up_exps.weight iq4_nl +blk.29.ffn_up_shexp.weight iq4_nl +blk.30.ssm_in.weight iq4_nl +blk.31.ffn_down_exps.weight iq4_nl +blk.31.ffn_down_shexp.weight iq4_nl +blk.31.ffn_up_exps.weight iq4_nl +blk.31.ffn_up_shexp.weight iq4_nl +blk.32.ssm_in.weight iq4_nl +blk.33.attn_k.weight iq4_nl +blk.33.attn_q.weight iq4_nl +blk.33.attn_v.weight q5_0 +blk.34.ffn_down_exps.weight iq4_nl +blk.34.ffn_down_shexp.weight iq4_nl +blk.34.ffn_up_exps.weight iq4_nl +blk.34.ffn_up_shexp.weight iq4_nl +blk.35.ssm_in.weight iq4_nl +blk.36.ffn_down_exps.weight iq4_nl +blk.36.ffn_down_shexp.weight iq4_nl +blk.36.ffn_up_exps.weight iq4_nl +blk.36.ffn_up_shexp.weight iq4_nl +blk.37.ssm_in.weight iq4_nl +blk.38.ffn_down_exps.weight iq4_nl +blk.38.ffn_down_shexp.weight iq4_nl +blk.38.ffn_up_exps.weight iq4_nl +blk.38.ffn_up_shexp.weight iq4_nl +blk.39.ssm_in.weight iq4_nl +blk.40.ffn_down_exps.weight iq4_nl +blk.40.ffn_down_shexp.weight iq4_nl +blk.40.ffn_up_exps.weight iq4_nl +blk.40.ffn_up_shexp.weight iq4_nl +blk.41.ssm_in.weight iq4_nl +blk.42.attn_k.weight iq4_nl +blk.42.attn_q.weight iq4_nl +blk.42.attn_v.weight q5_0 +blk.43.ffn_down_exps.weight iq4_nl +blk.43.ffn_down_shexp.weight iq4_nl +blk.43.ffn_up_exps.weight iq4_nl +blk.43.ffn_up_shexp.weight iq4_nl +blk.44.ssm_in.weight iq4_nl +blk.45.ffn_down_exps.weight iq4_nl +blk.45.ffn_down_shexp.weight iq4_nl +blk.45.ffn_up_exps.weight iq4_nl +blk.45.ffn_up_shexp.weight iq4_nl +blk.46.ssm_in.weight iq4_nl +blk.47.ffn_down_exps.weight iq4_nl +blk.47.ffn_down_shexp.weight iq4_nl +blk.47.ffn_up_exps.weight iq4_nl +blk.47.ffn_up_shexp.weight iq4_nl +blk.48.ssm_in.weight iq4_nl +blk.49.ffn_down_exps.weight iq4_nl +blk.49.ffn_down_shexp.weight iq4_nl +blk.49.ffn_up_exps.weight iq4_nl +blk.49.ffn_up_shexp.weight iq4_nl +blk.50.ssm_in.weight iq4_nl +blk.51.ffn_down_exps.weight iq4_nl +blk.51.ffn_down_shexp.weight iq4_nl +blk.51.ffn_up_exps.weight iq4_nl +blk.51.ffn_up_shexp.weight iq4_nl [IQ2_XS] iq2_xs output.weight q8_0 -token_embd.weight q2_K -blk.1.ffn_down_exps.weight q2_K -blk.1.ffn_down_shexp.weight q2_K -blk.3.ffn_down_exps.weight q2_K -blk.3.ffn_down_shexp.weight q2_K -blk.5.attn_v.weight q4_K -blk.6.ffn_down_exps.weight q2_K -blk.6.ffn_down_shexp.weight q2_K -blk.12.attn_v.weight q4_K -blk.19.attn_v.weight q4_K -blk.26.attn_v.weight q4_K -blk.33.attn_v.weight q4_K -blk.42.attn_v.weight q4_K +token_embd.weight q4_0 +blk.0.ssm_in.weight iq4_nl +blk.1.ffn_down_exps.weight q4_0 +blk.1.ffn_down_shexp.weight q4_0 +blk.1.ffn_up_exps.weight iq4_nl +blk.1.ffn_up_shexp.weight iq4_nl +blk.2.ssm_in.weight iq4_nl +blk.3.ffn_down_exps.weight q4_0 +blk.3.ffn_down_shexp.weight q4_0 +blk.3.ffn_up_exps.weight iq4_nl +blk.3.ffn_up_shexp.weight iq4_nl +blk.4.ssm_in.weight iq4_nl +blk.5.attn_k.weight iq4_nl +blk.5.attn_q.weight iq4_nl +blk.5.attn_v.weight q5_0 +blk.6.ffn_down_exps.weight q4_0 +blk.6.ffn_down_shexp.weight q4_0 +blk.6.ffn_up_exps.weight iq4_nl +blk.6.ffn_up_shexp.weight iq4_nl +blk.7.ssm_in.weight iq4_nl +blk.8.ffn_down_exps.weight iq4_nl +blk.8.ffn_down_shexp.weight iq4_nl +blk.8.ffn_up_exps.weight iq4_nl +blk.8.ffn_up_shexp.weight iq4_nl +blk.9.ssm_in.weight iq4_nl +blk.10.ffn_down_exps.weight iq4_nl +blk.10.ffn_down_shexp.weight iq4_nl +blk.10.ffn_up_exps.weight iq4_nl +blk.10.ffn_up_shexp.weight iq4_nl +blk.11.ssm_in.weight iq4_nl +blk.12.attn_k.weight iq4_nl +blk.12.attn_q.weight iq4_nl +blk.12.attn_v.weight q5_0 +blk.13.ffn_down_exps.weight iq4_nl +blk.13.ffn_down_shexp.weight iq4_nl +blk.13.ffn_up_exps.weight iq4_nl +blk.13.ffn_up_shexp.weight iq4_nl +blk.14.ssm_in.weight iq4_nl +blk.15.ffn_down_exps.weight iq4_nl +blk.15.ffn_down_shexp.weight iq4_nl +blk.15.ffn_up_exps.weight iq4_nl +blk.15.ffn_up_shexp.weight iq4_nl +blk.16.ssm_in.weight iq4_nl +blk.17.ffn_down_exps.weight iq4_nl +blk.17.ffn_down_shexp.weight iq4_nl +blk.17.ffn_up_exps.weight iq4_nl +blk.17.ffn_up_shexp.weight iq4_nl +blk.18.ssm_in.weight iq4_nl +blk.19.attn_k.weight iq4_nl +blk.19.attn_q.weight iq4_nl +blk.19.attn_v.weight q5_0 +blk.20.ffn_down_exps.weight iq4_nl +blk.20.ffn_down_shexp.weight iq4_nl +blk.20.ffn_up_exps.weight iq4_nl +blk.20.ffn_up_shexp.weight iq4_nl +blk.21.ssm_in.weight iq4_nl +blk.22.ffn_down_exps.weight iq4_nl +blk.22.ffn_down_shexp.weight iq4_nl +blk.22.ffn_up_exps.weight iq4_nl +blk.22.ffn_up_shexp.weight iq4_nl +blk.23.ssm_in.weight iq4_nl +blk.24.ffn_down_exps.weight iq4_nl +blk.24.ffn_down_shexp.weight iq4_nl +blk.24.ffn_up_exps.weight iq4_nl +blk.24.ffn_up_shexp.weight iq4_nl +blk.25.ssm_in.weight iq4_nl +blk.26.attn_k.weight iq4_nl +blk.26.attn_q.weight iq4_nl +blk.26.attn_v.weight q5_0 +blk.27.ffn_down_exps.weight iq4_nl +blk.27.ffn_down_shexp.weight iq4_nl +blk.27.ffn_up_exps.weight iq4_nl +blk.27.ffn_up_shexp.weight iq4_nl +blk.28.ssm_in.weight iq4_nl +blk.29.ffn_down_exps.weight iq4_nl +blk.29.ffn_down_shexp.weight iq4_nl +blk.29.ffn_up_exps.weight iq4_nl +blk.29.ffn_up_shexp.weight iq4_nl +blk.30.ssm_in.weight iq4_nl +blk.31.ffn_down_exps.weight iq4_nl +blk.31.ffn_down_shexp.weight iq4_nl +blk.31.ffn_up_exps.weight iq4_nl +blk.31.ffn_up_shexp.weight iq4_nl +blk.32.ssm_in.weight iq4_nl +blk.33.attn_k.weight iq4_nl +blk.33.attn_q.weight iq4_nl +blk.33.attn_v.weight q5_0 +blk.34.ffn_down_exps.weight iq4_nl +blk.34.ffn_down_shexp.weight iq4_nl +blk.34.ffn_up_exps.weight iq4_nl +blk.34.ffn_up_shexp.weight iq4_nl +blk.35.ssm_in.weight iq4_nl +blk.36.ffn_down_exps.weight iq4_nl +blk.36.ffn_down_shexp.weight iq4_nl +blk.36.ffn_up_exps.weight iq4_nl +blk.36.ffn_up_shexp.weight iq4_nl +blk.37.ssm_in.weight iq4_nl +blk.38.ffn_down_exps.weight iq4_nl +blk.38.ffn_down_shexp.weight iq4_nl +blk.38.ffn_up_exps.weight iq4_nl +blk.38.ffn_up_shexp.weight iq4_nl +blk.39.ssm_in.weight iq4_nl +blk.40.ffn_down_exps.weight iq4_nl +blk.40.ffn_down_shexp.weight iq4_nl +blk.40.ffn_up_exps.weight iq4_nl +blk.40.ffn_up_shexp.weight iq4_nl +blk.41.ssm_in.weight iq4_nl +blk.42.attn_k.weight iq4_nl +blk.42.attn_q.weight iq4_nl +blk.42.attn_v.weight q5_0 +blk.43.ffn_down_exps.weight iq4_nl +blk.43.ffn_down_shexp.weight iq4_nl +blk.43.ffn_up_exps.weight iq4_nl +blk.43.ffn_up_shexp.weight iq4_nl +blk.44.ssm_in.weight iq4_nl +blk.45.ffn_down_exps.weight iq4_nl +blk.45.ffn_down_shexp.weight iq4_nl +blk.45.ffn_up_exps.weight iq4_nl +blk.45.ffn_up_shexp.weight iq4_nl +blk.46.ssm_in.weight iq4_nl +blk.47.ffn_down_exps.weight iq4_nl +blk.47.ffn_down_shexp.weight iq4_nl +blk.47.ffn_up_exps.weight iq4_nl +blk.47.ffn_up_shexp.weight iq4_nl +blk.48.ssm_in.weight iq4_nl +blk.49.ffn_down_exps.weight iq4_nl +blk.49.ffn_down_shexp.weight iq4_nl +blk.49.ffn_up_exps.weight iq4_nl +blk.49.ffn_up_shexp.weight iq4_nl +blk.50.ssm_in.weight iq4_nl +blk.51.ffn_down_exps.weight iq4_nl +blk.51.ffn_down_shexp.weight iq4_nl +blk.51.ffn_up_exps.weight iq4_nl +blk.51.ffn_up_shexp.weight iq4_nl [Q2_K_S] q2_K output.weight q8_0 -blk.1.ffn_down_exps.weight q4_K -blk.1.ffn_down_shexp.weight q4_K -blk.3.ffn_down_exps.weight q4_K -blk.3.ffn_down_shexp.weight q4_K +token_embd.weight q4_0 +blk.0.ssm_in.weight q4_0 +blk.1.ffn_down_exps.weight q5_0 +blk.1.ffn_down_shexp.weight q5_0 +blk.1.ffn_up_exps.weight q4_0 +blk.1.ffn_up_shexp.weight q4_0 +blk.2.ssm_in.weight q4_0 +blk.3.ffn_down_exps.weight q5_0 +blk.3.ffn_down_shexp.weight q5_0 +blk.3.ffn_up_exps.weight q4_0 +blk.3.ffn_up_shexp.weight q4_0 +blk.4.ssm_in.weight q4_0 +blk.5.attn_k.weight q4_0 +blk.5.attn_q.weight q4_0 +blk.5.attn_v.weight q4_0 +blk.6.ffn_down_exps.weight q4_0 +blk.6.ffn_down_shexp.weight q4_0 +blk.6.ffn_up_exps.weight q4_0 +blk.6.ffn_up_shexp.weight q4_0 +blk.7.ssm_in.weight q4_0 +blk.8.ffn_down_exps.weight q4_0 +blk.8.ffn_down_shexp.weight q4_0 +blk.8.ffn_up_exps.weight q4_0 +blk.8.ffn_up_shexp.weight q4_0 +blk.9.ssm_in.weight q4_0 +blk.10.ffn_down_exps.weight q4_0 +blk.10.ffn_down_shexp.weight q4_0 +blk.10.ffn_up_exps.weight q4_0 +blk.10.ffn_up_shexp.weight q4_0 +blk.11.ssm_in.weight q4_0 +blk.12.attn_k.weight q4_0 +blk.12.attn_q.weight q4_0 +blk.12.attn_v.weight q4_0 +blk.13.ffn_down_exps.weight q4_0 +blk.13.ffn_down_shexp.weight q4_0 +blk.13.ffn_up_exps.weight q4_0 +blk.13.ffn_up_shexp.weight q4_0 +blk.14.ssm_in.weight q4_0 +blk.15.ffn_down_exps.weight q4_0 +blk.15.ffn_down_shexp.weight q4_0 +blk.15.ffn_up_exps.weight q4_0 +blk.15.ffn_up_shexp.weight q4_0 +blk.16.ssm_in.weight q4_0 +blk.17.ffn_down_exps.weight q4_0 +blk.17.ffn_down_shexp.weight q4_0 +blk.17.ffn_up_exps.weight q4_0 +blk.17.ffn_up_shexp.weight q4_0 +blk.18.ssm_in.weight q4_0 +blk.19.attn_k.weight q4_0 +blk.19.attn_q.weight q4_0 +blk.19.attn_v.weight q4_0 +blk.20.ffn_down_exps.weight q4_0 +blk.20.ffn_down_shexp.weight q4_0 +blk.20.ffn_up_exps.weight q4_0 +blk.20.ffn_up_shexp.weight q4_0 +blk.21.ssm_in.weight q4_0 +blk.22.ffn_down_exps.weight q4_0 +blk.22.ffn_down_shexp.weight q4_0 +blk.22.ffn_up_exps.weight q4_0 +blk.22.ffn_up_shexp.weight q4_0 +blk.23.ssm_in.weight q4_0 +blk.24.ffn_down_exps.weight q4_0 +blk.24.ffn_down_shexp.weight q4_0 +blk.24.ffn_up_exps.weight q4_0 +blk.24.ffn_up_shexp.weight q4_0 +blk.25.ssm_in.weight q4_0 +blk.26.attn_k.weight q4_0 +blk.26.attn_q.weight q4_0 +blk.26.attn_v.weight q4_0 +blk.27.ffn_down_exps.weight q4_0 +blk.27.ffn_down_shexp.weight q4_0 +blk.27.ffn_up_exps.weight q4_0 +blk.27.ffn_up_shexp.weight q4_0 +blk.28.ssm_in.weight q4_0 +blk.29.ffn_down_exps.weight q4_0 +blk.29.ffn_down_shexp.weight q4_0 +blk.29.ffn_up_exps.weight q4_0 +blk.29.ffn_up_shexp.weight q4_0 +blk.30.ssm_in.weight q4_0 +blk.31.ffn_down_exps.weight q4_0 +blk.31.ffn_down_shexp.weight q4_0 +blk.31.ffn_up_exps.weight q4_0 +blk.31.ffn_up_shexp.weight q4_0 +blk.32.ssm_in.weight q4_0 +blk.33.attn_k.weight q4_0 +blk.33.attn_q.weight q4_0 +blk.33.attn_v.weight q4_0 +blk.34.ffn_down_exps.weight q4_0 +blk.34.ffn_down_shexp.weight q4_0 +blk.34.ffn_up_exps.weight q4_0 +blk.34.ffn_up_shexp.weight q4_0 +blk.35.ssm_in.weight q4_0 +blk.36.ffn_down_exps.weight q4_0 +blk.36.ffn_down_shexp.weight q4_0 +blk.36.ffn_up_exps.weight q4_0 +blk.36.ffn_up_shexp.weight q4_0 +blk.37.ssm_in.weight q4_0 +blk.38.ffn_down_exps.weight q4_0 +blk.38.ffn_down_shexp.weight q4_0 +blk.38.ffn_up_exps.weight q4_0 +blk.38.ffn_up_shexp.weight q4_0 +blk.39.ssm_in.weight q4_0 +blk.40.ffn_down_exps.weight q4_0 +blk.40.ffn_down_shexp.weight q4_0 +blk.40.ffn_up_exps.weight q4_0 +blk.40.ffn_up_shexp.weight q4_0 +blk.41.ssm_in.weight q4_0 +blk.42.attn_k.weight q4_0 +blk.42.attn_q.weight q4_0 +blk.42.attn_v.weight q4_0 +blk.43.ffn_down_exps.weight q4_0 +blk.43.ffn_down_shexp.weight q4_0 +blk.43.ffn_up_exps.weight q4_0 +blk.43.ffn_up_shexp.weight q4_0 +blk.44.ssm_in.weight q4_0 +blk.45.ffn_down_exps.weight q4_0 +blk.45.ffn_down_shexp.weight q4_0 +blk.45.ffn_up_exps.weight q4_0 +blk.45.ffn_up_shexp.weight q4_0 +blk.46.ssm_in.weight q4_0 +blk.47.ffn_down_exps.weight q4_0 +blk.47.ffn_down_shexp.weight q4_0 +blk.47.ffn_up_exps.weight q4_0 +blk.47.ffn_up_shexp.weight q4_0 +blk.48.ssm_in.weight q4_0 +blk.49.ffn_down_exps.weight q4_0 +blk.49.ffn_down_shexp.weight q4_0 +blk.49.ffn_up_exps.weight q4_0 +blk.49.ffn_up_shexp.weight q4_0 +blk.50.ssm_in.weight q4_0 +blk.51.ffn_down_exps.weight q4_0 +blk.51.ffn_down_shexp.weight q4_0 +blk.51.ffn_up_exps.weight q4_0 +blk.51.ffn_up_shexp.weight q4_0 [IQ3_XS] iq3_s output.weight q8_0 -blk.5.attn_k.weight iq3_xxs -blk.5.attn_q.weight iq3_xxs -blk.6.ffn_up_exps.weight iq3_xxs -blk.6.ffn_up_shexp.weight iq3_xxs -blk.8.ffn_up_exps.weight iq3_xxs -blk.8.ffn_up_shexp.weight iq3_xxs -blk.10.ffn_up_exps.weight iq3_xxs -blk.10.ffn_up_shexp.weight iq3_xxs -blk.12.attn_k.weight iq3_xxs -blk.12.attn_q.weight iq3_xxs -blk.13.ffn_up_exps.weight iq3_xxs -blk.13.ffn_up_shexp.weight iq3_xxs -blk.15.ffn_up_exps.weight iq3_xxs -blk.15.ffn_up_shexp.weight iq3_xxs -blk.17.ffn_up_exps.weight iq3_xxs -blk.17.ffn_up_shexp.weight iq3_xxs -blk.19.attn_k.weight iq3_xxs -blk.19.attn_q.weight iq3_xxs -blk.20.ffn_up_exps.weight iq3_xxs -blk.20.ffn_up_shexp.weight iq3_xxs -blk.22.ffn_up_exps.weight iq3_xxs -blk.22.ffn_up_shexp.weight iq3_xxs -blk.24.ffn_up_exps.weight iq3_xxs -blk.24.ffn_up_shexp.weight iq3_xxs -blk.26.attn_k.weight iq3_xxs -blk.26.attn_q.weight iq3_xxs -blk.27.ffn_up_exps.weight iq3_xxs -blk.27.ffn_up_shexp.weight iq3_xxs -blk.29.ffn_up_exps.weight iq3_xxs -blk.29.ffn_up_shexp.weight iq3_xxs -blk.31.ffn_up_exps.weight iq3_xxs -blk.31.ffn_up_shexp.weight iq3_xxs -blk.33.attn_k.weight iq3_xxs -blk.33.attn_q.weight iq3_xxs -blk.34.ffn_up_exps.weight iq3_xxs -blk.34.ffn_up_shexp.weight iq3_xxs -blk.36.ffn_up_exps.weight iq3_xxs -blk.36.ffn_up_shexp.weight iq3_xxs -blk.38.ffn_up_exps.weight iq3_xxs -blk.38.ffn_up_shexp.weight iq3_xxs -blk.40.ffn_up_exps.weight iq3_xxs -blk.40.ffn_up_shexp.weight iq3_xxs -blk.42.attn_k.weight iq3_xxs -blk.42.attn_q.weight iq3_xxs -blk.43.ffn_up_exps.weight iq3_xxs -blk.43.ffn_up_shexp.weight iq3_xxs +token_embd.weight iq4_nl +blk.0.ssm_in.weight iq4_nl +blk.1.ffn_down_exps.weight iq4_nl +blk.1.ffn_down_shexp.weight iq4_nl +blk.1.ffn_up_exps.weight iq4_nl +blk.1.ffn_up_shexp.weight iq4_nl +blk.2.ssm_in.weight iq4_nl +blk.3.ffn_down_exps.weight iq4_nl +blk.3.ffn_down_shexp.weight iq4_nl +blk.3.ffn_up_exps.weight iq4_nl +blk.3.ffn_up_shexp.weight iq4_nl +blk.4.ssm_in.weight iq4_nl +blk.5.attn_k.weight iq4_nl +blk.5.attn_q.weight iq4_nl +blk.5.attn_v.weight iq4_nl +blk.6.ffn_down_exps.weight iq4_nl +blk.6.ffn_down_shexp.weight iq4_nl +blk.6.ffn_up_exps.weight iq4_nl +blk.6.ffn_up_shexp.weight iq4_nl +blk.7.ssm_in.weight iq4_nl +blk.8.ffn_down_exps.weight iq4_nl +blk.8.ffn_down_shexp.weight iq4_nl +blk.8.ffn_up_exps.weight iq4_nl +blk.8.ffn_up_shexp.weight iq4_nl +blk.9.ssm_in.weight iq4_nl +blk.10.ffn_down_exps.weight iq4_nl +blk.10.ffn_down_shexp.weight iq4_nl +blk.10.ffn_up_exps.weight iq4_nl +blk.10.ffn_up_shexp.weight iq4_nl +blk.11.ssm_in.weight iq4_nl +blk.12.attn_k.weight iq4_nl +blk.12.attn_q.weight iq4_nl +blk.12.attn_v.weight iq4_nl +blk.13.ffn_down_exps.weight iq4_nl +blk.13.ffn_down_shexp.weight iq4_nl +blk.13.ffn_up_exps.weight iq4_nl +blk.13.ffn_up_shexp.weight iq4_nl +blk.14.ssm_in.weight iq4_nl +blk.15.ffn_down_exps.weight iq4_nl +blk.15.ffn_down_shexp.weight iq4_nl +blk.15.ffn_up_exps.weight iq4_nl +blk.15.ffn_up_shexp.weight iq4_nl +blk.16.ssm_in.weight iq4_nl +blk.17.ffn_down_exps.weight iq4_nl +blk.17.ffn_down_shexp.weight iq4_nl +blk.17.ffn_up_exps.weight iq4_nl +blk.17.ffn_up_shexp.weight iq4_nl +blk.18.ssm_in.weight iq4_nl +blk.19.attn_k.weight iq4_nl +blk.19.attn_q.weight iq4_nl +blk.19.attn_v.weight iq4_nl +blk.20.ffn_down_exps.weight iq4_nl +blk.20.ffn_down_shexp.weight iq4_nl +blk.20.ffn_up_exps.weight iq4_nl +blk.20.ffn_up_shexp.weight iq4_nl +blk.21.ssm_in.weight iq4_nl +blk.22.ffn_down_exps.weight iq4_nl +blk.22.ffn_down_shexp.weight iq4_nl +blk.22.ffn_up_exps.weight iq4_nl +blk.22.ffn_up_shexp.weight iq4_nl +blk.23.ssm_in.weight iq4_nl +blk.24.ffn_down_exps.weight iq4_nl +blk.24.ffn_down_shexp.weight iq4_nl +blk.24.ffn_up_exps.weight iq4_nl +blk.24.ffn_up_shexp.weight iq4_nl +blk.25.ssm_in.weight iq4_nl +blk.26.attn_k.weight iq4_nl +blk.26.attn_q.weight iq4_nl +blk.26.attn_v.weight iq4_nl +blk.27.ffn_down_exps.weight iq4_nl +blk.27.ffn_down_shexp.weight iq4_nl +blk.27.ffn_up_exps.weight iq4_nl +blk.27.ffn_up_shexp.weight iq4_nl +blk.28.ssm_in.weight iq4_nl +blk.29.ffn_down_exps.weight iq4_nl +blk.29.ffn_down_shexp.weight iq4_nl +blk.29.ffn_up_exps.weight iq4_nl +blk.29.ffn_up_shexp.weight iq4_nl +blk.30.ssm_in.weight iq4_nl +blk.31.ffn_down_exps.weight iq4_nl +blk.31.ffn_down_shexp.weight iq4_nl +blk.31.ffn_up_exps.weight iq4_nl +blk.31.ffn_up_shexp.weight iq4_nl +blk.32.ssm_in.weight iq4_nl +blk.33.attn_k.weight iq4_nl +blk.33.attn_q.weight iq4_nl +blk.33.attn_v.weight iq4_nl +blk.34.ffn_down_exps.weight iq4_nl +blk.34.ffn_down_shexp.weight iq4_nl +blk.34.ffn_up_exps.weight iq4_nl +blk.34.ffn_up_shexp.weight iq4_nl +blk.35.ssm_in.weight iq4_nl +blk.36.ffn_down_exps.weight iq4_nl +blk.36.ffn_down_shexp.weight iq4_nl +blk.36.ffn_up_exps.weight iq4_nl +blk.36.ffn_up_shexp.weight iq4_nl +blk.37.ssm_in.weight iq4_nl +blk.38.ffn_down_exps.weight iq4_nl +blk.38.ffn_down_shexp.weight iq4_nl +blk.38.ffn_up_exps.weight iq4_nl +blk.38.ffn_up_shexp.weight iq4_nl +blk.39.ssm_in.weight iq4_nl +blk.40.ffn_down_exps.weight iq4_nl +blk.40.ffn_down_shexp.weight iq4_nl +blk.40.ffn_up_exps.weight iq4_nl +blk.40.ffn_up_shexp.weight iq4_nl +blk.41.ssm_in.weight iq4_nl +blk.42.attn_k.weight iq4_nl +blk.42.attn_q.weight iq4_nl +blk.42.attn_v.weight iq4_nl +blk.43.ffn_down_exps.weight iq4_nl +blk.43.ffn_down_shexp.weight iq4_nl +blk.43.ffn_up_exps.weight iq4_nl +blk.43.ffn_up_shexp.weight iq4_nl +blk.44.ssm_in.weight iq4_nl +blk.45.ffn_down_exps.weight iq4_nl +blk.45.ffn_down_shexp.weight iq4_nl +blk.45.ffn_up_exps.weight iq4_nl +blk.45.ffn_up_shexp.weight iq4_nl +blk.46.ssm_in.weight iq4_nl +blk.47.ffn_down_exps.weight iq4_nl +blk.47.ffn_down_shexp.weight iq4_nl +blk.47.ffn_up_exps.weight iq4_nl +blk.47.ffn_up_shexp.weight iq4_nl +blk.48.ssm_in.weight iq4_nl +blk.49.ffn_down_exps.weight iq4_nl +blk.49.ffn_down_shexp.weight iq4_nl +blk.49.ffn_up_exps.weight iq4_nl +blk.49.ffn_up_shexp.weight iq4_nl +blk.50.ssm_in.weight iq4_nl +blk.51.ffn_down_exps.weight iq4_nl +blk.51.ffn_down_shexp.weight iq4_nl +blk.51.ffn_up_exps.weight iq4_nl +blk.51.ffn_up_shexp.weight iq4_nl [IQ3_XXS] iq3_xxs output.weight q8_0 -token_embd.weight iq3_s -blk.1.ffn_down_exps.weight q4_K -blk.1.ffn_down_shexp.weight q4_K -blk.3.ffn_down_exps.weight q4_K -blk.3.ffn_down_shexp.weight q4_K -blk.5.attn_k.weight iq2_s +token_embd.weight iq4_nl +blk.0.ssm_in.weight iq4_nl +blk.1.ffn_down_exps.weight q5_0 +blk.1.ffn_down_shexp.weight q5_0 +blk.1.ffn_up_exps.weight iq4_nl +blk.1.ffn_up_shexp.weight iq4_nl +blk.2.ssm_in.weight iq4_nl +blk.3.ffn_down_exps.weight q5_0 +blk.3.ffn_down_shexp.weight q5_0 +blk.3.ffn_up_exps.weight iq4_nl +blk.3.ffn_up_shexp.weight iq4_nl +blk.4.ssm_in.weight iq4_nl +blk.5.attn_k.weight iq4_nl blk.5.attn_output.weight iq3_s -blk.5.attn_q.weight iq2_s -blk.5.attn_v.weight iq3_s -blk.6.ffn_down_exps.weight q3_K -blk.6.ffn_down_shexp.weight q3_K -blk.8.ffn_down_exps.weight q3_K -blk.8.ffn_down_shexp.weight q3_K -blk.10.ffn_down_exps.weight q3_K -blk.10.ffn_down_shexp.weight q3_K -blk.12.attn_k.weight iq2_s +blk.5.attn_q.weight iq4_nl +blk.5.attn_v.weight iq4_nl +blk.6.ffn_down_exps.weight q4_0 +blk.6.ffn_down_shexp.weight q4_0 +blk.6.ffn_up_exps.weight iq4_nl +blk.6.ffn_up_shexp.weight iq4_nl +blk.7.ssm_in.weight iq4_nl +blk.8.ffn_down_exps.weight q4_0 +blk.8.ffn_down_shexp.weight q4_0 +blk.8.ffn_up_exps.weight iq4_nl +blk.8.ffn_up_shexp.weight iq4_nl +blk.9.ssm_in.weight iq4_nl +blk.10.ffn_down_exps.weight q4_0 +blk.10.ffn_down_shexp.weight q4_0 +blk.10.ffn_up_exps.weight iq4_nl +blk.10.ffn_up_shexp.weight iq4_nl +blk.11.ssm_in.weight iq4_nl +blk.12.attn_k.weight iq4_nl blk.12.attn_output.weight iq3_s -blk.12.attn_q.weight iq2_s -blk.12.attn_v.weight iq3_s -blk.13.ffn_down_exps.weight q3_K -blk.13.ffn_down_shexp.weight q3_K -blk.15.ffn_down_exps.weight q3_K -blk.15.ffn_down_shexp.weight q3_K -blk.17.ffn_down_exps.weight q3_K -blk.17.ffn_down_shexp.weight q3_K -blk.19.attn_k.weight iq2_s +blk.12.attn_q.weight iq4_nl +blk.12.attn_v.weight iq4_nl +blk.13.ffn_down_exps.weight q4_0 +blk.13.ffn_down_shexp.weight q4_0 +blk.13.ffn_up_exps.weight iq4_nl +blk.13.ffn_up_shexp.weight iq4_nl +blk.14.ssm_in.weight iq4_nl +blk.15.ffn_down_exps.weight q4_0 +blk.15.ffn_down_shexp.weight q4_0 +blk.15.ffn_up_exps.weight iq4_nl +blk.15.ffn_up_shexp.weight iq4_nl +blk.16.ssm_in.weight iq4_nl +blk.17.ffn_down_exps.weight q4_0 +blk.17.ffn_down_shexp.weight q4_0 +blk.17.ffn_up_exps.weight iq4_nl +blk.17.ffn_up_shexp.weight iq4_nl +blk.18.ssm_in.weight iq4_nl +blk.19.attn_k.weight iq4_nl blk.19.attn_output.weight iq3_s -blk.19.attn_q.weight iq2_s -blk.19.attn_v.weight iq3_s -blk.20.ffn_down_exps.weight q3_K -blk.20.ffn_down_shexp.weight q3_K -blk.22.ffn_down_exps.weight q3_K -blk.22.ffn_down_shexp.weight q3_K -blk.24.ffn_down_exps.weight q3_K -blk.24.ffn_down_shexp.weight q3_K -blk.26.attn_k.weight iq2_s +blk.19.attn_q.weight iq4_nl +blk.19.attn_v.weight iq4_nl +blk.20.ffn_down_exps.weight q4_0 +blk.20.ffn_down_shexp.weight q4_0 +blk.20.ffn_up_exps.weight iq4_nl +blk.20.ffn_up_shexp.weight iq4_nl +blk.21.ssm_in.weight iq4_nl +blk.22.ffn_down_exps.weight q4_0 +blk.22.ffn_down_shexp.weight q4_0 +blk.22.ffn_up_exps.weight iq4_nl +blk.22.ffn_up_shexp.weight iq4_nl +blk.23.ssm_in.weight iq4_nl +blk.24.ffn_down_exps.weight q4_0 +blk.24.ffn_down_shexp.weight q4_0 +blk.24.ffn_up_exps.weight iq4_nl +blk.24.ffn_up_shexp.weight iq4_nl +blk.25.ssm_in.weight iq4_nl +blk.26.attn_k.weight iq4_nl blk.26.attn_output.weight iq3_s -blk.26.attn_q.weight iq2_s -blk.26.attn_v.weight iq3_s -blk.27.ffn_down_exps.weight q3_K -blk.27.ffn_down_shexp.weight q3_K -blk.29.ffn_down_exps.weight q3_K -blk.29.ffn_down_shexp.weight q3_K -blk.31.ffn_down_exps.weight q3_K -blk.31.ffn_down_shexp.weight q3_K -blk.33.attn_k.weight iq2_s +blk.26.attn_q.weight iq4_nl +blk.26.attn_v.weight iq4_nl +blk.27.ffn_down_exps.weight q4_0 +blk.27.ffn_down_shexp.weight q4_0 +blk.27.ffn_up_exps.weight iq4_nl +blk.27.ffn_up_shexp.weight iq4_nl +blk.28.ssm_in.weight iq4_nl +blk.29.ffn_down_exps.weight q4_0 +blk.29.ffn_down_shexp.weight q4_0 +blk.29.ffn_up_exps.weight iq4_nl +blk.29.ffn_up_shexp.weight iq4_nl +blk.30.ssm_in.weight iq4_nl +blk.31.ffn_down_exps.weight q4_0 +blk.31.ffn_down_shexp.weight q4_0 +blk.31.ffn_up_exps.weight iq4_nl +blk.31.ffn_up_shexp.weight iq4_nl +blk.32.ssm_in.weight iq4_nl +blk.33.attn_k.weight iq4_nl blk.33.attn_output.weight iq3_s -blk.33.attn_q.weight iq2_s -blk.33.attn_v.weight iq3_s -blk.34.ffn_down_exps.weight q3_K -blk.34.ffn_down_shexp.weight q3_K -blk.36.ffn_down_exps.weight q3_K -blk.36.ffn_down_shexp.weight q3_K -blk.38.ffn_down_exps.weight q3_K -blk.38.ffn_down_shexp.weight q3_K -blk.40.ffn_down_exps.weight q3_K -blk.40.ffn_down_shexp.weight q3_K -blk.42.attn_k.weight iq2_s +blk.33.attn_q.weight iq4_nl +blk.33.attn_v.weight iq4_nl +blk.34.ffn_down_exps.weight q4_0 +blk.34.ffn_down_shexp.weight q4_0 +blk.34.ffn_up_exps.weight iq4_nl +blk.34.ffn_up_shexp.weight iq4_nl +blk.35.ssm_in.weight iq4_nl +blk.36.ffn_down_exps.weight q4_0 +blk.36.ffn_down_shexp.weight q4_0 +blk.36.ffn_up_exps.weight iq4_nl +blk.36.ffn_up_shexp.weight iq4_nl +blk.37.ssm_in.weight iq4_nl +blk.38.ffn_down_exps.weight q4_0 +blk.38.ffn_down_shexp.weight q4_0 +blk.38.ffn_up_exps.weight iq4_nl +blk.38.ffn_up_shexp.weight iq4_nl +blk.39.ssm_in.weight iq4_nl +blk.40.ffn_down_exps.weight q4_0 +blk.40.ffn_down_shexp.weight q4_0 +blk.40.ffn_up_exps.weight iq4_nl +blk.40.ffn_up_shexp.weight iq4_nl +blk.41.ssm_in.weight iq4_nl +blk.42.attn_k.weight iq4_nl blk.42.attn_output.weight iq3_s -blk.42.attn_q.weight iq2_s -blk.42.attn_v.weight iq3_s -blk.43.ffn_down_exps.weight q3_K -blk.43.ffn_down_shexp.weight q3_K -blk.45.ffn_down_exps.weight q3_K -blk.45.ffn_down_shexp.weight q3_K -blk.47.ffn_down_exps.weight q3_K -blk.47.ffn_down_shexp.weight q3_K -blk.49.ffn_down_exps.weight q3_K -blk.49.ffn_down_shexp.weight q3_K -blk.51.ffn_down_exps.weight q3_K -blk.51.ffn_down_shexp.weight q3_K +blk.42.attn_q.weight iq4_nl +blk.42.attn_v.weight iq4_nl +blk.43.ffn_down_exps.weight q4_0 +blk.43.ffn_down_shexp.weight q4_0 +blk.43.ffn_up_exps.weight iq4_nl +blk.43.ffn_up_shexp.weight iq4_nl +blk.44.ssm_in.weight iq4_nl +blk.45.ffn_down_exps.weight q4_0 +blk.45.ffn_down_shexp.weight q4_0 +blk.45.ffn_up_exps.weight iq4_nl +blk.45.ffn_up_shexp.weight iq4_nl +blk.46.ssm_in.weight iq4_nl +blk.47.ffn_down_exps.weight q4_0 +blk.47.ffn_down_shexp.weight q4_0 +blk.47.ffn_up_exps.weight iq4_nl +blk.47.ffn_up_shexp.weight iq4_nl +blk.48.ssm_in.weight iq4_nl +blk.49.ffn_down_exps.weight q4_0 +blk.49.ffn_down_shexp.weight q4_0 +blk.49.ffn_up_exps.weight iq4_nl +blk.49.ffn_up_shexp.weight iq4_nl +blk.50.ssm_in.weight iq4_nl +blk.51.ffn_down_exps.weight q4_0 +blk.51.ffn_down_shexp.weight q4_0 +blk.51.ffn_up_exps.weight iq4_nl +blk.51.ffn_up_shexp.weight iq4_nl [IQ1_S] iq1_s output.weight q8_0 -token_embd.weight q2_K -blk.1.ffn_down_exps.weight q2_K -blk.1.ffn_down_shexp.weight q2_K -blk.3.ffn_down_exps.weight q2_K -blk.3.ffn_down_shexp.weight q2_K +token_embd.weight q4_0 +blk.0.ssm_in.weight iq4_nl +blk.1.ffn_down_exps.weight q4_0 +blk.1.ffn_down_shexp.weight q4_0 +blk.1.ffn_up_exps.weight iq4_nl +blk.1.ffn_up_shexp.weight iq4_nl +blk.2.ssm_in.weight iq4_nl +blk.3.ffn_down_exps.weight q4_0 +blk.3.ffn_down_shexp.weight q4_0 +blk.3.ffn_up_exps.weight iq4_nl +blk.3.ffn_up_shexp.weight iq4_nl +blk.4.ssm_in.weight iq4_nl +blk.5.attn_k.weight iq4_nl blk.5.attn_output.weight iq2_xxs -blk.5.attn_v.weight q4_K -blk.6.ffn_down_exps.weight q2_K -blk.6.ffn_down_shexp.weight q2_K +blk.5.attn_q.weight iq4_nl +blk.5.attn_v.weight q5_0 +blk.6.ffn_down_exps.weight q4_0 +blk.6.ffn_down_shexp.weight q4_0 +blk.6.ffn_up_exps.weight iq4_nl +blk.6.ffn_up_shexp.weight iq4_nl +blk.7.ssm_in.weight iq4_nl +blk.8.ffn_down_exps.weight iq4_nl +blk.8.ffn_down_shexp.weight iq4_nl +blk.8.ffn_up_exps.weight iq4_nl +blk.8.ffn_up_shexp.weight iq4_nl +blk.9.ssm_in.weight iq4_nl +blk.10.ffn_down_exps.weight iq4_nl +blk.10.ffn_down_shexp.weight iq4_nl +blk.10.ffn_up_exps.weight iq4_nl +blk.10.ffn_up_shexp.weight iq4_nl +blk.11.ssm_in.weight iq4_nl +blk.12.attn_k.weight iq4_nl blk.12.attn_output.weight iq2_xxs -blk.12.attn_v.weight q4_K +blk.12.attn_q.weight iq4_nl +blk.12.attn_v.weight q5_0 +blk.13.ffn_down_exps.weight iq4_nl +blk.13.ffn_down_shexp.weight iq4_nl +blk.13.ffn_up_exps.weight iq4_nl +blk.13.ffn_up_shexp.weight iq4_nl +blk.14.ssm_in.weight iq4_nl +blk.15.ffn_down_exps.weight iq4_nl +blk.15.ffn_down_shexp.weight iq4_nl +blk.15.ffn_up_exps.weight iq4_nl +blk.15.ffn_up_shexp.weight iq4_nl +blk.16.ssm_in.weight iq4_nl +blk.17.ffn_down_exps.weight iq4_nl +blk.17.ffn_down_shexp.weight iq4_nl +blk.17.ffn_up_exps.weight iq4_nl +blk.17.ffn_up_shexp.weight iq4_nl +blk.18.ssm_in.weight iq4_nl +blk.19.attn_k.weight iq4_nl blk.19.attn_output.weight iq2_xxs -blk.19.attn_v.weight q4_K +blk.19.attn_q.weight iq4_nl +blk.19.attn_v.weight q5_0 +blk.20.ffn_down_exps.weight iq4_nl +blk.20.ffn_down_shexp.weight iq4_nl +blk.20.ffn_up_exps.weight iq4_nl +blk.20.ffn_up_shexp.weight iq4_nl +blk.21.ssm_in.weight iq4_nl +blk.22.ffn_down_exps.weight iq4_nl +blk.22.ffn_down_shexp.weight iq4_nl +blk.22.ffn_up_exps.weight iq4_nl +blk.22.ffn_up_shexp.weight iq4_nl +blk.23.ssm_in.weight iq4_nl +blk.24.ffn_down_exps.weight iq4_nl +blk.24.ffn_down_shexp.weight iq4_nl +blk.24.ffn_up_exps.weight iq4_nl +blk.24.ffn_up_shexp.weight iq4_nl +blk.25.ssm_in.weight iq4_nl +blk.26.attn_k.weight iq4_nl blk.26.attn_output.weight iq2_xxs -blk.26.attn_v.weight q4_K +blk.26.attn_q.weight iq4_nl +blk.26.attn_v.weight q5_0 +blk.27.ffn_down_exps.weight iq4_nl +blk.27.ffn_down_shexp.weight iq4_nl +blk.27.ffn_up_exps.weight iq4_nl +blk.27.ffn_up_shexp.weight iq4_nl +blk.28.ssm_in.weight iq4_nl +blk.29.ffn_down_exps.weight iq4_nl +blk.29.ffn_down_shexp.weight iq4_nl +blk.29.ffn_up_exps.weight iq4_nl +blk.29.ffn_up_shexp.weight iq4_nl +blk.30.ssm_in.weight iq4_nl +blk.31.ffn_down_exps.weight iq4_nl +blk.31.ffn_down_shexp.weight iq4_nl +blk.31.ffn_up_exps.weight iq4_nl +blk.31.ffn_up_shexp.weight iq4_nl +blk.32.ssm_in.weight iq4_nl +blk.33.attn_k.weight iq4_nl blk.33.attn_output.weight iq2_xxs -blk.33.attn_v.weight q4_K +blk.33.attn_q.weight iq4_nl +blk.33.attn_v.weight q5_0 +blk.34.ffn_down_exps.weight iq4_nl +blk.34.ffn_down_shexp.weight iq4_nl +blk.34.ffn_up_exps.weight iq4_nl +blk.34.ffn_up_shexp.weight iq4_nl +blk.35.ssm_in.weight iq4_nl +blk.36.ffn_down_exps.weight iq4_nl +blk.36.ffn_down_shexp.weight iq4_nl +blk.36.ffn_up_exps.weight iq4_nl +blk.36.ffn_up_shexp.weight iq4_nl +blk.37.ssm_in.weight iq4_nl +blk.38.ffn_down_exps.weight iq4_nl +blk.38.ffn_down_shexp.weight iq4_nl +blk.38.ffn_up_exps.weight iq4_nl +blk.38.ffn_up_shexp.weight iq4_nl +blk.39.ssm_in.weight iq4_nl +blk.40.ffn_down_exps.weight iq4_nl +blk.40.ffn_down_shexp.weight iq4_nl +blk.40.ffn_up_exps.weight iq4_nl +blk.40.ffn_up_shexp.weight iq4_nl +blk.41.ssm_in.weight iq4_nl +blk.42.attn_k.weight iq4_nl blk.42.attn_output.weight iq2_xxs -blk.42.attn_v.weight q4_K +blk.42.attn_q.weight iq4_nl +blk.42.attn_v.weight q5_0 +blk.43.ffn_down_exps.weight iq4_nl +blk.43.ffn_down_shexp.weight iq4_nl +blk.43.ffn_up_exps.weight iq4_nl +blk.43.ffn_up_shexp.weight iq4_nl +blk.44.ssm_in.weight iq4_nl +blk.45.ffn_down_exps.weight iq4_nl +blk.45.ffn_down_shexp.weight iq4_nl +blk.45.ffn_up_exps.weight iq4_nl +blk.45.ffn_up_shexp.weight iq4_nl +blk.46.ssm_in.weight iq4_nl +blk.47.ffn_down_exps.weight iq4_nl +blk.47.ffn_down_shexp.weight iq4_nl +blk.47.ffn_up_exps.weight iq4_nl +blk.47.ffn_up_shexp.weight iq4_nl +blk.48.ssm_in.weight iq4_nl +blk.49.ffn_down_exps.weight iq4_nl +blk.49.ffn_down_shexp.weight iq4_nl +blk.49.ffn_up_exps.weight iq4_nl +blk.49.ffn_up_shexp.weight iq4_nl +blk.50.ssm_in.weight iq4_nl +blk.51.ffn_down_exps.weight iq4_nl +blk.51.ffn_down_shexp.weight iq4_nl +blk.51.ffn_up_exps.weight iq4_nl +blk.51.ffn_up_shexp.weight iq4_nl [IQ4_NL] iq4_nl -output.weight q6_K -blk.1.ffn_down_exps.weight q5_K -blk.1.ffn_down_shexp.weight q5_K -blk.3.ffn_down_exps.weight q5_K -blk.3.ffn_down_shexp.weight q5_K +output.weight q8_0 +blk.1.ffn_down_exps.weight q5_1 +blk.1.ffn_down_shexp.weight q5_1 +blk.3.ffn_down_exps.weight q5_1 +blk.3.ffn_down_shexp.weight q5_1 [IQ3_S] iq3_s output.weight q8_0 +token_embd.weight iq4_nl +blk.0.ssm_in.weight iq4_nl +blk.1.ffn_down_exps.weight iq4_nl +blk.1.ffn_down_shexp.weight iq4_nl +blk.1.ffn_up_exps.weight iq4_nl +blk.1.ffn_up_shexp.weight iq4_nl +blk.2.ssm_in.weight iq4_nl +blk.3.ffn_down_exps.weight iq4_nl +blk.3.ffn_down_shexp.weight iq4_nl +blk.3.ffn_up_exps.weight iq4_nl +blk.3.ffn_up_shexp.weight iq4_nl +blk.4.ssm_in.weight iq4_nl +blk.5.attn_k.weight iq4_nl +blk.5.attn_q.weight iq4_nl +blk.5.attn_v.weight iq4_nl +blk.6.ffn_down_exps.weight iq4_nl +blk.6.ffn_down_shexp.weight iq4_nl +blk.6.ffn_up_exps.weight iq4_nl +blk.6.ffn_up_shexp.weight iq4_nl +blk.7.ssm_in.weight iq4_nl +blk.8.ffn_down_exps.weight iq4_nl +blk.8.ffn_down_shexp.weight iq4_nl +blk.8.ffn_up_exps.weight iq4_nl +blk.8.ffn_up_shexp.weight iq4_nl +blk.9.ssm_in.weight iq4_nl +blk.10.ffn_down_exps.weight iq4_nl +blk.10.ffn_down_shexp.weight iq4_nl +blk.10.ffn_up_exps.weight iq4_nl +blk.10.ffn_up_shexp.weight iq4_nl +blk.11.ssm_in.weight iq4_nl +blk.12.attn_k.weight iq4_nl +blk.12.attn_q.weight iq4_nl +blk.12.attn_v.weight iq4_nl +blk.13.ffn_down_exps.weight iq4_nl +blk.13.ffn_down_shexp.weight iq4_nl +blk.13.ffn_up_exps.weight iq4_nl +blk.13.ffn_up_shexp.weight iq4_nl +blk.14.ssm_in.weight iq4_nl +blk.15.ffn_down_exps.weight iq4_nl +blk.15.ffn_down_shexp.weight iq4_nl +blk.15.ffn_up_exps.weight iq4_nl +blk.15.ffn_up_shexp.weight iq4_nl +blk.16.ssm_in.weight iq4_nl +blk.17.ffn_down_exps.weight iq4_nl +blk.17.ffn_down_shexp.weight iq4_nl +blk.17.ffn_up_exps.weight iq4_nl +blk.17.ffn_up_shexp.weight iq4_nl +blk.18.ssm_in.weight iq4_nl +blk.19.attn_k.weight iq4_nl +blk.19.attn_q.weight iq4_nl +blk.19.attn_v.weight iq4_nl +blk.20.ffn_down_exps.weight iq4_nl +blk.20.ffn_down_shexp.weight iq4_nl +blk.20.ffn_up_exps.weight iq4_nl +blk.20.ffn_up_shexp.weight iq4_nl +blk.21.ssm_in.weight iq4_nl +blk.22.ffn_down_exps.weight iq4_nl +blk.22.ffn_down_shexp.weight iq4_nl +blk.22.ffn_up_exps.weight iq4_nl +blk.22.ffn_up_shexp.weight iq4_nl +blk.23.ssm_in.weight iq4_nl +blk.24.ffn_down_exps.weight iq4_nl +blk.24.ffn_down_shexp.weight iq4_nl +blk.24.ffn_up_exps.weight iq4_nl +blk.24.ffn_up_shexp.weight iq4_nl +blk.25.ssm_in.weight iq4_nl +blk.26.attn_k.weight iq4_nl +blk.26.attn_q.weight iq4_nl +blk.26.attn_v.weight iq4_nl +blk.27.ffn_down_exps.weight iq4_nl +blk.27.ffn_down_shexp.weight iq4_nl +blk.27.ffn_up_exps.weight iq4_nl +blk.27.ffn_up_shexp.weight iq4_nl +blk.28.ssm_in.weight iq4_nl +blk.29.ffn_down_exps.weight iq4_nl +blk.29.ffn_down_shexp.weight iq4_nl +blk.29.ffn_up_exps.weight iq4_nl +blk.29.ffn_up_shexp.weight iq4_nl +blk.30.ssm_in.weight iq4_nl +blk.31.ffn_down_exps.weight iq4_nl +blk.31.ffn_down_shexp.weight iq4_nl +blk.31.ffn_up_exps.weight iq4_nl +blk.31.ffn_up_shexp.weight iq4_nl +blk.32.ssm_in.weight iq4_nl +blk.33.attn_k.weight iq4_nl +blk.33.attn_q.weight iq4_nl +blk.33.attn_v.weight iq4_nl +blk.34.ffn_down_exps.weight iq4_nl +blk.34.ffn_down_shexp.weight iq4_nl +blk.34.ffn_up_exps.weight iq4_nl +blk.34.ffn_up_shexp.weight iq4_nl +blk.35.ssm_in.weight iq4_nl +blk.36.ffn_down_exps.weight iq4_nl +blk.36.ffn_down_shexp.weight iq4_nl +blk.36.ffn_up_exps.weight iq4_nl +blk.36.ffn_up_shexp.weight iq4_nl +blk.37.ssm_in.weight iq4_nl +blk.38.ffn_down_exps.weight iq4_nl +blk.38.ffn_down_shexp.weight iq4_nl +blk.38.ffn_up_exps.weight iq4_nl +blk.38.ffn_up_shexp.weight iq4_nl +blk.39.ssm_in.weight iq4_nl +blk.40.ffn_down_exps.weight iq4_nl +blk.40.ffn_down_shexp.weight iq4_nl +blk.40.ffn_up_exps.weight iq4_nl +blk.40.ffn_up_shexp.weight iq4_nl +blk.41.ssm_in.weight iq4_nl +blk.42.attn_k.weight iq4_nl +blk.42.attn_q.weight iq4_nl +blk.42.attn_v.weight iq4_nl +blk.43.ffn_down_exps.weight iq4_nl +blk.43.ffn_down_shexp.weight iq4_nl +blk.43.ffn_up_exps.weight iq4_nl +blk.43.ffn_up_shexp.weight iq4_nl +blk.44.ssm_in.weight iq4_nl +blk.45.ffn_down_exps.weight iq4_nl +blk.45.ffn_down_shexp.weight iq4_nl +blk.45.ffn_up_exps.weight iq4_nl +blk.45.ffn_up_shexp.weight iq4_nl +blk.46.ssm_in.weight iq4_nl +blk.47.ffn_down_exps.weight iq4_nl +blk.47.ffn_down_shexp.weight iq4_nl +blk.47.ffn_up_exps.weight iq4_nl +blk.47.ffn_up_shexp.weight iq4_nl +blk.48.ssm_in.weight iq4_nl +blk.49.ffn_down_exps.weight iq4_nl +blk.49.ffn_down_shexp.weight iq4_nl +blk.49.ffn_up_exps.weight iq4_nl +blk.49.ffn_up_shexp.weight iq4_nl +blk.50.ssm_in.weight iq4_nl +blk.51.ffn_down_exps.weight iq4_nl +blk.51.ffn_down_shexp.weight iq4_nl +blk.51.ffn_up_exps.weight iq4_nl +blk.51.ffn_up_shexp.weight iq4_nl [IQ3_M] iq3_s output.weight q8_0 -blk.1.ffn_down_exps.weight q4_K -blk.1.ffn_down_shexp.weight q4_K -blk.3.ffn_down_exps.weight q4_K -blk.3.ffn_down_shexp.weight q4_K +token_embd.weight iq4_nl +blk.0.ssm_in.weight iq4_nl +blk.1.ffn_down_exps.weight q5_0 +blk.1.ffn_down_shexp.weight q5_0 +blk.1.ffn_up_exps.weight iq4_nl +blk.1.ffn_up_shexp.weight iq4_nl +blk.2.ssm_in.weight iq4_nl +blk.3.ffn_down_exps.weight q5_0 +blk.3.ffn_down_shexp.weight q5_0 +blk.3.ffn_up_exps.weight iq4_nl +blk.3.ffn_up_shexp.weight iq4_nl +blk.4.ssm_in.weight iq4_nl +blk.5.attn_k.weight iq4_nl blk.5.attn_output.weight q4_K -blk.5.attn_v.weight q4_K +blk.5.attn_q.weight iq4_nl +blk.5.attn_v.weight q5_0 +blk.6.ffn_down_exps.weight iq4_nl +blk.6.ffn_down_shexp.weight iq4_nl +blk.6.ffn_up_exps.weight iq4_nl +blk.6.ffn_up_shexp.weight iq4_nl +blk.7.ssm_in.weight iq4_nl +blk.8.ffn_down_exps.weight iq4_nl +blk.8.ffn_down_shexp.weight iq4_nl +blk.8.ffn_up_exps.weight iq4_nl +blk.8.ffn_up_shexp.weight iq4_nl +blk.9.ssm_in.weight iq4_nl +blk.10.ffn_down_exps.weight iq4_nl +blk.10.ffn_down_shexp.weight iq4_nl +blk.10.ffn_up_exps.weight iq4_nl +blk.10.ffn_up_shexp.weight iq4_nl +blk.11.ssm_in.weight iq4_nl +blk.12.attn_k.weight iq4_nl blk.12.attn_output.weight q4_K -blk.12.attn_v.weight q4_K +blk.12.attn_q.weight iq4_nl +blk.12.attn_v.weight q5_0 +blk.13.ffn_down_exps.weight iq4_nl +blk.13.ffn_down_shexp.weight iq4_nl +blk.13.ffn_up_exps.weight iq4_nl +blk.13.ffn_up_shexp.weight iq4_nl +blk.14.ssm_in.weight iq4_nl +blk.15.ffn_down_exps.weight iq4_nl +blk.15.ffn_down_shexp.weight iq4_nl +blk.15.ffn_up_exps.weight iq4_nl +blk.15.ffn_up_shexp.weight iq4_nl +blk.16.ssm_in.weight iq4_nl +blk.17.ffn_down_exps.weight iq4_nl +blk.17.ffn_down_shexp.weight iq4_nl +blk.17.ffn_up_exps.weight iq4_nl +blk.17.ffn_up_shexp.weight iq4_nl +blk.18.ssm_in.weight iq4_nl +blk.19.attn_k.weight iq4_nl blk.19.attn_output.weight q4_K -blk.19.attn_v.weight q4_K +blk.19.attn_q.weight iq4_nl +blk.19.attn_v.weight q5_0 +blk.20.ffn_down_exps.weight iq4_nl +blk.20.ffn_down_shexp.weight iq4_nl +blk.20.ffn_up_exps.weight iq4_nl +blk.20.ffn_up_shexp.weight iq4_nl +blk.21.ssm_in.weight iq4_nl +blk.22.ffn_down_exps.weight iq4_nl +blk.22.ffn_down_shexp.weight iq4_nl +blk.22.ffn_up_exps.weight iq4_nl +blk.22.ffn_up_shexp.weight iq4_nl +blk.23.ssm_in.weight iq4_nl +blk.24.ffn_down_exps.weight iq4_nl +blk.24.ffn_down_shexp.weight iq4_nl +blk.24.ffn_up_exps.weight iq4_nl +blk.24.ffn_up_shexp.weight iq4_nl +blk.25.ssm_in.weight iq4_nl +blk.26.attn_k.weight iq4_nl blk.26.attn_output.weight q4_K -blk.26.attn_v.weight q4_K +blk.26.attn_q.weight iq4_nl +blk.26.attn_v.weight q5_0 +blk.27.ffn_down_exps.weight iq4_nl +blk.27.ffn_down_shexp.weight iq4_nl +blk.27.ffn_up_exps.weight iq4_nl +blk.27.ffn_up_shexp.weight iq4_nl +blk.28.ssm_in.weight iq4_nl +blk.29.ffn_down_exps.weight iq4_nl +blk.29.ffn_down_shexp.weight iq4_nl +blk.29.ffn_up_exps.weight iq4_nl +blk.29.ffn_up_shexp.weight iq4_nl +blk.30.ssm_in.weight iq4_nl +blk.31.ffn_down_exps.weight iq4_nl +blk.31.ffn_down_shexp.weight iq4_nl +blk.31.ffn_up_exps.weight iq4_nl +blk.31.ffn_up_shexp.weight iq4_nl +blk.32.ssm_in.weight iq4_nl +blk.33.attn_k.weight iq4_nl blk.33.attn_output.weight q4_K -blk.33.attn_v.weight q4_K +blk.33.attn_q.weight iq4_nl +blk.33.attn_v.weight q5_0 +blk.34.ffn_down_exps.weight iq4_nl +blk.34.ffn_down_shexp.weight iq4_nl +blk.34.ffn_up_exps.weight iq4_nl +blk.34.ffn_up_shexp.weight iq4_nl +blk.35.ssm_in.weight iq4_nl +blk.36.ffn_down_exps.weight iq4_nl +blk.36.ffn_down_shexp.weight iq4_nl +blk.36.ffn_up_exps.weight iq4_nl +blk.36.ffn_up_shexp.weight iq4_nl +blk.37.ssm_in.weight iq4_nl +blk.38.ffn_down_exps.weight iq4_nl +blk.38.ffn_down_shexp.weight iq4_nl +blk.38.ffn_up_exps.weight iq4_nl +blk.38.ffn_up_shexp.weight iq4_nl +blk.39.ssm_in.weight iq4_nl +blk.40.ffn_down_exps.weight iq4_nl +blk.40.ffn_down_shexp.weight iq4_nl +blk.40.ffn_up_exps.weight iq4_nl +blk.40.ffn_up_shexp.weight iq4_nl +blk.41.ssm_in.weight iq4_nl +blk.42.attn_k.weight iq4_nl blk.42.attn_output.weight q4_K -blk.42.attn_v.weight q4_K +blk.42.attn_q.weight iq4_nl +blk.42.attn_v.weight q5_0 +blk.43.ffn_down_exps.weight iq4_nl +blk.43.ffn_down_shexp.weight iq4_nl +blk.43.ffn_up_exps.weight iq4_nl +blk.43.ffn_up_shexp.weight iq4_nl +blk.44.ssm_in.weight iq4_nl +blk.45.ffn_down_exps.weight iq4_nl +blk.45.ffn_down_shexp.weight iq4_nl +blk.45.ffn_up_exps.weight iq4_nl +blk.45.ffn_up_shexp.weight iq4_nl +blk.46.ssm_in.weight iq4_nl +blk.47.ffn_down_exps.weight iq4_nl +blk.47.ffn_down_shexp.weight iq4_nl +blk.47.ffn_up_exps.weight iq4_nl +blk.47.ffn_up_shexp.weight iq4_nl +blk.48.ssm_in.weight iq4_nl +blk.49.ffn_down_exps.weight iq4_nl +blk.49.ffn_down_shexp.weight iq4_nl +blk.49.ffn_up_exps.weight iq4_nl +blk.49.ffn_up_shexp.weight iq4_nl +blk.50.ssm_in.weight iq4_nl +blk.51.ffn_down_exps.weight iq4_nl +blk.51.ffn_down_shexp.weight iq4_nl +blk.51.ffn_up_exps.weight iq4_nl +blk.51.ffn_up_shexp.weight iq4_nl [IQ2_S] iq2_xs output.weight q8_0 -token_embd.weight iq3_s -blk.1.ffn_down_exps.weight iq3_s -blk.1.ffn_down_shexp.weight iq3_s -blk.3.ffn_down_exps.weight iq3_s -blk.3.ffn_down_shexp.weight iq3_s +token_embd.weight iq4_nl +blk.0.ssm_in.weight iq4_nl +blk.1.ffn_down_exps.weight iq4_nl +blk.1.ffn_down_shexp.weight iq4_nl +blk.1.ffn_up_exps.weight iq4_nl +blk.1.ffn_up_shexp.weight iq4_nl +blk.2.ssm_in.weight iq4_nl +blk.3.ffn_down_exps.weight iq4_nl +blk.3.ffn_down_shexp.weight iq4_nl +blk.3.ffn_up_exps.weight iq4_nl +blk.3.ffn_up_shexp.weight iq4_nl +blk.4.ssm_in.weight iq4_nl +blk.5.attn_k.weight iq4_nl blk.5.attn_output.weight iq3_s -blk.5.attn_v.weight q4_K -blk.6.ffn_down_exps.weight iq3_s -blk.6.ffn_down_shexp.weight iq3_s +blk.5.attn_q.weight iq4_nl +blk.5.attn_v.weight q5_0 +blk.6.ffn_down_exps.weight iq4_nl +blk.6.ffn_down_shexp.weight iq4_nl +blk.6.ffn_up_exps.weight iq4_nl +blk.6.ffn_up_shexp.weight iq4_nl +blk.7.ssm_in.weight iq4_nl +blk.8.ffn_down_exps.weight iq4_nl +blk.8.ffn_down_shexp.weight iq4_nl +blk.8.ffn_up_exps.weight iq4_nl +blk.8.ffn_up_shexp.weight iq4_nl +blk.9.ssm_in.weight iq4_nl +blk.10.ffn_down_exps.weight iq4_nl +blk.10.ffn_down_shexp.weight iq4_nl +blk.10.ffn_up_exps.weight iq4_nl +blk.10.ffn_up_shexp.weight iq4_nl +blk.11.ssm_in.weight iq4_nl +blk.12.attn_k.weight iq4_nl blk.12.attn_output.weight iq3_s -blk.12.attn_v.weight q4_K +blk.12.attn_q.weight iq4_nl +blk.12.attn_v.weight q5_0 +blk.13.ffn_down_exps.weight iq4_nl +blk.13.ffn_down_shexp.weight iq4_nl +blk.13.ffn_up_exps.weight iq4_nl +blk.13.ffn_up_shexp.weight iq4_nl +blk.14.ssm_in.weight iq4_nl +blk.15.ffn_down_exps.weight iq4_nl +blk.15.ffn_down_shexp.weight iq4_nl +blk.15.ffn_up_exps.weight iq4_nl +blk.15.ffn_up_shexp.weight iq4_nl +blk.16.ssm_in.weight iq4_nl +blk.17.ffn_down_exps.weight iq4_nl +blk.17.ffn_down_shexp.weight iq4_nl +blk.17.ffn_up_exps.weight iq4_nl +blk.17.ffn_up_shexp.weight iq4_nl +blk.18.ssm_in.weight iq4_nl +blk.19.attn_k.weight iq4_nl blk.19.attn_output.weight iq3_s -blk.19.attn_v.weight q4_K +blk.19.attn_q.weight iq4_nl +blk.19.attn_v.weight q5_0 +blk.20.ffn_down_exps.weight iq4_nl +blk.20.ffn_down_shexp.weight iq4_nl +blk.20.ffn_up_exps.weight iq4_nl +blk.20.ffn_up_shexp.weight iq4_nl +blk.21.ssm_in.weight iq4_nl +blk.22.ffn_down_exps.weight iq4_nl +blk.22.ffn_down_shexp.weight iq4_nl +blk.22.ffn_up_exps.weight iq4_nl +blk.22.ffn_up_shexp.weight iq4_nl +blk.23.ssm_in.weight iq4_nl +blk.24.ffn_down_exps.weight iq4_nl +blk.24.ffn_down_shexp.weight iq4_nl +blk.24.ffn_up_exps.weight iq4_nl +blk.24.ffn_up_shexp.weight iq4_nl +blk.25.ssm_in.weight iq4_nl +blk.26.attn_k.weight iq4_nl blk.26.attn_output.weight iq3_s -blk.26.attn_v.weight q4_K +blk.26.attn_q.weight iq4_nl +blk.26.attn_v.weight q5_0 +blk.27.ffn_down_exps.weight iq4_nl +blk.27.ffn_down_shexp.weight iq4_nl +blk.27.ffn_up_exps.weight iq4_nl +blk.27.ffn_up_shexp.weight iq4_nl +blk.28.ssm_in.weight iq4_nl +blk.29.ffn_down_exps.weight iq4_nl +blk.29.ffn_down_shexp.weight iq4_nl +blk.29.ffn_up_exps.weight iq4_nl +blk.29.ffn_up_shexp.weight iq4_nl +blk.30.ssm_in.weight iq4_nl +blk.31.ffn_down_exps.weight iq4_nl +blk.31.ffn_down_shexp.weight iq4_nl +blk.31.ffn_up_exps.weight iq4_nl +blk.31.ffn_up_shexp.weight iq4_nl +blk.32.ssm_in.weight iq4_nl +blk.33.attn_k.weight iq4_nl blk.33.attn_output.weight iq3_s -blk.33.attn_v.weight q4_K +blk.33.attn_q.weight iq4_nl +blk.33.attn_v.weight q5_0 +blk.34.ffn_down_exps.weight iq4_nl +blk.34.ffn_down_shexp.weight iq4_nl +blk.34.ffn_up_exps.weight iq4_nl +blk.34.ffn_up_shexp.weight iq4_nl +blk.35.ssm_in.weight iq4_nl +blk.36.ffn_down_exps.weight iq4_nl +blk.36.ffn_down_shexp.weight iq4_nl +blk.36.ffn_up_exps.weight iq4_nl +blk.36.ffn_up_shexp.weight iq4_nl +blk.37.ssm_in.weight iq4_nl +blk.38.ffn_down_exps.weight iq4_nl +blk.38.ffn_down_shexp.weight iq4_nl +blk.38.ffn_up_exps.weight iq4_nl +blk.38.ffn_up_shexp.weight iq4_nl +blk.39.ssm_in.weight iq4_nl +blk.40.ffn_down_exps.weight iq4_nl +blk.40.ffn_down_shexp.weight iq4_nl +blk.40.ffn_up_exps.weight iq4_nl +blk.40.ffn_up_shexp.weight iq4_nl +blk.41.ssm_in.weight iq4_nl +blk.42.attn_k.weight iq4_nl blk.42.attn_output.weight iq3_s -blk.42.attn_v.weight q4_K +blk.42.attn_q.weight iq4_nl +blk.42.attn_v.weight q5_0 +blk.43.ffn_down_exps.weight iq4_nl +blk.43.ffn_down_shexp.weight iq4_nl +blk.43.ffn_up_exps.weight iq4_nl +blk.43.ffn_up_shexp.weight iq4_nl +blk.44.ssm_in.weight iq4_nl +blk.45.ffn_down_exps.weight iq4_nl +blk.45.ffn_down_shexp.weight iq4_nl +blk.45.ffn_up_exps.weight iq4_nl +blk.45.ffn_up_shexp.weight iq4_nl +blk.46.ssm_in.weight iq4_nl +blk.47.ffn_down_exps.weight iq4_nl +blk.47.ffn_down_shexp.weight iq4_nl +blk.47.ffn_up_exps.weight iq4_nl +blk.47.ffn_up_shexp.weight iq4_nl +blk.48.ssm_in.weight iq4_nl +blk.49.ffn_down_exps.weight iq4_nl +blk.49.ffn_down_shexp.weight iq4_nl +blk.49.ffn_up_exps.weight iq4_nl +blk.49.ffn_up_shexp.weight iq4_nl +blk.50.ssm_in.weight iq4_nl +blk.51.ffn_down_exps.weight iq4_nl +blk.51.ffn_down_shexp.weight iq4_nl +blk.51.ffn_up_exps.weight iq4_nl +blk.51.ffn_up_shexp.weight iq4_nl [IQ2_M] iq2_s output.weight q8_0 -token_embd.weight iq3_s -blk.1.ffn_down_exps.weight iq3_s -blk.1.ffn_down_shexp.weight iq3_s -blk.3.ffn_down_exps.weight iq3_s -blk.3.ffn_down_shexp.weight iq3_s +token_embd.weight iq4_nl +blk.0.ssm_in.weight iq4_nl +blk.1.ffn_down_exps.weight iq4_nl +blk.1.ffn_down_shexp.weight iq4_nl +blk.1.ffn_up_exps.weight iq4_nl +blk.1.ffn_up_shexp.weight iq4_nl +blk.2.ssm_in.weight iq4_nl +blk.3.ffn_down_exps.weight iq4_nl +blk.3.ffn_down_shexp.weight iq4_nl +blk.3.ffn_up_exps.weight iq4_nl +blk.3.ffn_up_shexp.weight iq4_nl +blk.4.ssm_in.weight iq4_nl +blk.5.attn_k.weight iq4_nl blk.5.attn_output.weight iq3_s -blk.5.attn_v.weight q4_K -blk.6.ffn_down_exps.weight iq3_s -blk.6.ffn_down_shexp.weight iq3_s +blk.5.attn_q.weight iq4_nl +blk.5.attn_v.weight q5_0 +blk.6.ffn_down_exps.weight iq4_nl +blk.6.ffn_down_shexp.weight iq4_nl +blk.6.ffn_up_exps.weight iq4_nl +blk.6.ffn_up_shexp.weight iq4_nl +blk.7.ssm_in.weight iq4_nl +blk.8.ffn_down_exps.weight iq4_nl +blk.8.ffn_down_shexp.weight iq4_nl +blk.8.ffn_up_exps.weight iq4_nl +blk.8.ffn_up_shexp.weight iq4_nl +blk.9.ssm_in.weight iq4_nl +blk.10.ffn_down_exps.weight iq4_nl +blk.10.ffn_down_shexp.weight iq4_nl +blk.10.ffn_up_exps.weight iq4_nl +blk.10.ffn_up_shexp.weight iq4_nl +blk.11.ssm_in.weight iq4_nl +blk.12.attn_k.weight iq4_nl blk.12.attn_output.weight iq3_s -blk.12.attn_v.weight q4_K +blk.12.attn_q.weight iq4_nl +blk.12.attn_v.weight q5_0 +blk.13.ffn_down_exps.weight iq4_nl +blk.13.ffn_down_shexp.weight iq4_nl +blk.13.ffn_up_exps.weight iq4_nl +blk.13.ffn_up_shexp.weight iq4_nl +blk.14.ssm_in.weight iq4_nl +blk.15.ffn_down_exps.weight iq4_nl +blk.15.ffn_down_shexp.weight iq4_nl +blk.15.ffn_up_exps.weight iq4_nl +blk.15.ffn_up_shexp.weight iq4_nl +blk.16.ssm_in.weight iq4_nl +blk.17.ffn_down_exps.weight iq4_nl +blk.17.ffn_down_shexp.weight iq4_nl +blk.17.ffn_up_exps.weight iq4_nl +blk.17.ffn_up_shexp.weight iq4_nl +blk.18.ssm_in.weight iq4_nl +blk.19.attn_k.weight iq4_nl blk.19.attn_output.weight iq3_s -blk.19.attn_v.weight q4_K +blk.19.attn_q.weight iq4_nl +blk.19.attn_v.weight q5_0 +blk.20.ffn_down_exps.weight iq4_nl +blk.20.ffn_down_shexp.weight iq4_nl +blk.20.ffn_up_exps.weight iq4_nl +blk.20.ffn_up_shexp.weight iq4_nl +blk.21.ssm_in.weight iq4_nl +blk.22.ffn_down_exps.weight iq4_nl +blk.22.ffn_down_shexp.weight iq4_nl +blk.22.ffn_up_exps.weight iq4_nl +blk.22.ffn_up_shexp.weight iq4_nl +blk.23.ssm_in.weight iq4_nl +blk.24.ffn_down_exps.weight iq4_nl +blk.24.ffn_down_shexp.weight iq4_nl +blk.24.ffn_up_exps.weight iq4_nl +blk.24.ffn_up_shexp.weight iq4_nl +blk.25.ssm_in.weight iq4_nl +blk.26.attn_k.weight iq4_nl blk.26.attn_output.weight iq3_s -blk.26.attn_v.weight q4_K +blk.26.attn_q.weight iq4_nl +blk.26.attn_v.weight q5_0 +blk.27.ffn_down_exps.weight iq4_nl +blk.27.ffn_down_shexp.weight iq4_nl +blk.27.ffn_up_exps.weight iq4_nl +blk.27.ffn_up_shexp.weight iq4_nl +blk.28.ssm_in.weight iq4_nl +blk.29.ffn_down_exps.weight iq4_nl +blk.29.ffn_down_shexp.weight iq4_nl +blk.29.ffn_up_exps.weight iq4_nl +blk.29.ffn_up_shexp.weight iq4_nl +blk.30.ssm_in.weight iq4_nl +blk.31.ffn_down_exps.weight iq4_nl +blk.31.ffn_down_shexp.weight iq4_nl +blk.31.ffn_up_exps.weight iq4_nl +blk.31.ffn_up_shexp.weight iq4_nl +blk.32.ssm_in.weight iq4_nl +blk.33.attn_k.weight iq4_nl blk.33.attn_output.weight iq3_s -blk.33.attn_v.weight q4_K +blk.33.attn_q.weight iq4_nl +blk.33.attn_v.weight q5_0 +blk.34.ffn_down_exps.weight iq4_nl +blk.34.ffn_down_shexp.weight iq4_nl +blk.34.ffn_up_exps.weight iq4_nl +blk.34.ffn_up_shexp.weight iq4_nl +blk.35.ssm_in.weight iq4_nl +blk.36.ffn_down_exps.weight iq4_nl +blk.36.ffn_down_shexp.weight iq4_nl +blk.36.ffn_up_exps.weight iq4_nl +blk.36.ffn_up_shexp.weight iq4_nl +blk.37.ssm_in.weight iq4_nl +blk.38.ffn_down_exps.weight iq4_nl +blk.38.ffn_down_shexp.weight iq4_nl +blk.38.ffn_up_exps.weight iq4_nl +blk.38.ffn_up_shexp.weight iq4_nl +blk.39.ssm_in.weight iq4_nl +blk.40.ffn_down_exps.weight iq4_nl +blk.40.ffn_down_shexp.weight iq4_nl +blk.40.ffn_up_exps.weight iq4_nl +blk.40.ffn_up_shexp.weight iq4_nl +blk.41.ssm_in.weight iq4_nl +blk.42.attn_k.weight iq4_nl blk.42.attn_output.weight iq3_s -blk.42.attn_v.weight q4_K +blk.42.attn_q.weight iq4_nl +blk.42.attn_v.weight q5_0 +blk.43.ffn_down_exps.weight iq4_nl +blk.43.ffn_down_shexp.weight iq4_nl +blk.43.ffn_up_exps.weight iq4_nl +blk.43.ffn_up_shexp.weight iq4_nl +blk.44.ssm_in.weight iq4_nl +blk.45.ffn_down_exps.weight iq4_nl +blk.45.ffn_down_shexp.weight iq4_nl +blk.45.ffn_up_exps.weight iq4_nl +blk.45.ffn_up_shexp.weight iq4_nl +blk.46.ssm_in.weight iq4_nl +blk.47.ffn_down_exps.weight iq4_nl +blk.47.ffn_down_shexp.weight iq4_nl +blk.47.ffn_up_exps.weight iq4_nl +blk.47.ffn_up_shexp.weight iq4_nl +blk.48.ssm_in.weight iq4_nl +blk.49.ffn_down_exps.weight iq4_nl +blk.49.ffn_down_shexp.weight iq4_nl +blk.49.ffn_up_exps.weight iq4_nl +blk.49.ffn_up_shexp.weight iq4_nl +blk.50.ssm_in.weight iq4_nl +blk.51.ffn_down_exps.weight iq4_nl +blk.51.ffn_down_shexp.weight iq4_nl +blk.51.ffn_up_exps.weight iq4_nl +blk.51.ffn_up_shexp.weight iq4_nl [IQ4_XS] iq4_xs output.weight q8_0 -blk.1.ffn_down_exps.weight q5_K -blk.1.ffn_down_shexp.weight q5_K -blk.3.ffn_down_exps.weight q5_K -blk.3.ffn_down_shexp.weight q5_K +token_embd.weight iq4_nl +blk.0.ssm_in.weight iq4_nl +blk.1.ffn_down_exps.weight q5_1 +blk.1.ffn_down_shexp.weight q5_1 +blk.1.ffn_up_exps.weight iq4_nl +blk.1.ffn_up_shexp.weight iq4_nl +blk.2.ssm_in.weight iq4_nl +blk.3.ffn_down_exps.weight q5_1 +blk.3.ffn_down_shexp.weight q5_1 +blk.3.ffn_up_exps.weight iq4_nl +blk.3.ffn_up_shexp.weight iq4_nl +blk.4.ssm_in.weight iq4_nl +blk.5.attn_k.weight iq4_nl +blk.5.attn_q.weight iq4_nl +blk.5.attn_v.weight iq4_nl +blk.6.ffn_down_exps.weight iq4_nl +blk.6.ffn_down_shexp.weight iq4_nl +blk.6.ffn_up_exps.weight iq4_nl +blk.6.ffn_up_shexp.weight iq4_nl +blk.7.ssm_in.weight iq4_nl +blk.8.ffn_down_exps.weight iq4_nl +blk.8.ffn_down_shexp.weight iq4_nl +blk.8.ffn_up_exps.weight iq4_nl +blk.8.ffn_up_shexp.weight iq4_nl +blk.9.ssm_in.weight iq4_nl +blk.10.ffn_down_exps.weight iq4_nl +blk.10.ffn_down_shexp.weight iq4_nl +blk.10.ffn_up_exps.weight iq4_nl +blk.10.ffn_up_shexp.weight iq4_nl +blk.11.ssm_in.weight iq4_nl +blk.12.attn_k.weight iq4_nl +blk.12.attn_q.weight iq4_nl +blk.12.attn_v.weight iq4_nl +blk.13.ffn_down_exps.weight iq4_nl +blk.13.ffn_down_shexp.weight iq4_nl +blk.13.ffn_up_exps.weight iq4_nl +blk.13.ffn_up_shexp.weight iq4_nl +blk.14.ssm_in.weight iq4_nl +blk.15.ffn_down_exps.weight iq4_nl +blk.15.ffn_down_shexp.weight iq4_nl +blk.15.ffn_up_exps.weight iq4_nl +blk.15.ffn_up_shexp.weight iq4_nl +blk.16.ssm_in.weight iq4_nl +blk.17.ffn_down_exps.weight iq4_nl +blk.17.ffn_down_shexp.weight iq4_nl +blk.17.ffn_up_exps.weight iq4_nl +blk.17.ffn_up_shexp.weight iq4_nl +blk.18.ssm_in.weight iq4_nl +blk.19.attn_k.weight iq4_nl +blk.19.attn_q.weight iq4_nl +blk.19.attn_v.weight iq4_nl +blk.20.ffn_down_exps.weight iq4_nl +blk.20.ffn_down_shexp.weight iq4_nl +blk.20.ffn_up_exps.weight iq4_nl +blk.20.ffn_up_shexp.weight iq4_nl +blk.21.ssm_in.weight iq4_nl +blk.22.ffn_down_exps.weight iq4_nl +blk.22.ffn_down_shexp.weight iq4_nl +blk.22.ffn_up_exps.weight iq4_nl +blk.22.ffn_up_shexp.weight iq4_nl +blk.23.ssm_in.weight iq4_nl +blk.24.ffn_down_exps.weight iq4_nl +blk.24.ffn_down_shexp.weight iq4_nl +blk.24.ffn_up_exps.weight iq4_nl +blk.24.ffn_up_shexp.weight iq4_nl +blk.25.ssm_in.weight iq4_nl +blk.26.attn_k.weight iq4_nl +blk.26.attn_q.weight iq4_nl +blk.26.attn_v.weight iq4_nl +blk.27.ffn_down_exps.weight iq4_nl +blk.27.ffn_down_shexp.weight iq4_nl +blk.27.ffn_up_exps.weight iq4_nl +blk.27.ffn_up_shexp.weight iq4_nl +blk.28.ssm_in.weight iq4_nl +blk.29.ffn_down_exps.weight iq4_nl +blk.29.ffn_down_shexp.weight iq4_nl +blk.29.ffn_up_exps.weight iq4_nl +blk.29.ffn_up_shexp.weight iq4_nl +blk.30.ssm_in.weight iq4_nl +blk.31.ffn_down_exps.weight iq4_nl +blk.31.ffn_down_shexp.weight iq4_nl +blk.31.ffn_up_exps.weight iq4_nl +blk.31.ffn_up_shexp.weight iq4_nl +blk.32.ssm_in.weight iq4_nl +blk.33.attn_k.weight iq4_nl +blk.33.attn_q.weight iq4_nl +blk.33.attn_v.weight iq4_nl +blk.34.ffn_down_exps.weight iq4_nl +blk.34.ffn_down_shexp.weight iq4_nl +blk.34.ffn_up_exps.weight iq4_nl +blk.34.ffn_up_shexp.weight iq4_nl +blk.35.ssm_in.weight iq4_nl +blk.36.ffn_down_exps.weight iq4_nl +blk.36.ffn_down_shexp.weight iq4_nl +blk.36.ffn_up_exps.weight iq4_nl +blk.36.ffn_up_shexp.weight iq4_nl +blk.37.ssm_in.weight iq4_nl +blk.38.ffn_down_exps.weight iq4_nl +blk.38.ffn_down_shexp.weight iq4_nl +blk.38.ffn_up_exps.weight iq4_nl +blk.38.ffn_up_shexp.weight iq4_nl +blk.39.ssm_in.weight iq4_nl +blk.40.ffn_down_exps.weight iq4_nl +blk.40.ffn_down_shexp.weight iq4_nl +blk.40.ffn_up_exps.weight iq4_nl +blk.40.ffn_up_shexp.weight iq4_nl +blk.41.ssm_in.weight iq4_nl +blk.42.attn_k.weight iq4_nl +blk.42.attn_q.weight iq4_nl +blk.42.attn_v.weight iq4_nl +blk.43.ffn_down_exps.weight iq4_nl +blk.43.ffn_down_shexp.weight iq4_nl +blk.43.ffn_up_exps.weight iq4_nl +blk.43.ffn_up_shexp.weight iq4_nl +blk.44.ssm_in.weight iq4_nl +blk.45.ffn_down_exps.weight iq4_nl +blk.45.ffn_down_shexp.weight iq4_nl +blk.45.ffn_up_exps.weight iq4_nl +blk.45.ffn_up_shexp.weight iq4_nl +blk.46.ssm_in.weight iq4_nl +blk.47.ffn_down_exps.weight iq4_nl +blk.47.ffn_down_shexp.weight iq4_nl +blk.47.ffn_up_exps.weight iq4_nl +blk.47.ffn_up_shexp.weight iq4_nl +blk.48.ssm_in.weight iq4_nl +blk.49.ffn_down_exps.weight iq4_nl +blk.49.ffn_down_shexp.weight iq4_nl +blk.49.ffn_up_exps.weight iq4_nl +blk.49.ffn_up_shexp.weight iq4_nl +blk.50.ssm_in.weight iq4_nl +blk.51.ffn_down_exps.weight iq4_nl +blk.51.ffn_down_shexp.weight iq4_nl +blk.51.ffn_up_exps.weight iq4_nl +blk.51.ffn_up_shexp.weight iq4_nl [IQ1_M] iq1_m output.weight q8_0 -token_embd.weight q2_K -blk.1.ffn_down_exps.weight q2_K -blk.1.ffn_down_shexp.weight q2_K -blk.3.ffn_down_exps.weight q2_K -blk.3.ffn_down_shexp.weight q2_K +token_embd.weight q4_0 +blk.0.ssm_in.weight iq4_nl +blk.1.ffn_down_exps.weight q4_0 +blk.1.ffn_down_shexp.weight q4_0 +blk.1.ffn_up_exps.weight iq4_nl +blk.1.ffn_up_shexp.weight iq4_nl +blk.2.ssm_in.weight iq4_nl +blk.3.ffn_down_exps.weight q4_0 +blk.3.ffn_down_shexp.weight q4_0 +blk.3.ffn_up_exps.weight iq4_nl +blk.3.ffn_up_shexp.weight iq4_nl +blk.4.ssm_in.weight iq4_nl +blk.5.attn_k.weight iq4_nl blk.5.attn_output.weight iq2_xxs -blk.5.attn_v.weight q4_K -blk.6.ffn_down_exps.weight q2_K -blk.6.ffn_down_shexp.weight q2_K +blk.5.attn_q.weight iq4_nl +blk.5.attn_v.weight q5_0 +blk.6.ffn_down_exps.weight q4_0 +blk.6.ffn_down_shexp.weight q4_0 +blk.6.ffn_up_exps.weight iq4_nl +blk.6.ffn_up_shexp.weight iq4_nl +blk.7.ssm_in.weight iq4_nl +blk.8.ffn_down_exps.weight iq4_nl +blk.8.ffn_down_shexp.weight iq4_nl +blk.8.ffn_up_exps.weight iq4_nl +blk.8.ffn_up_shexp.weight iq4_nl +blk.9.ssm_in.weight iq4_nl +blk.10.ffn_down_exps.weight iq4_nl +blk.10.ffn_down_shexp.weight iq4_nl +blk.10.ffn_up_exps.weight iq4_nl +blk.10.ffn_up_shexp.weight iq4_nl +blk.11.ssm_in.weight iq4_nl +blk.12.attn_k.weight iq4_nl blk.12.attn_output.weight iq2_xxs -blk.12.attn_v.weight q4_K +blk.12.attn_q.weight iq4_nl +blk.12.attn_v.weight q5_0 +blk.13.ffn_down_exps.weight iq4_nl +blk.13.ffn_down_shexp.weight iq4_nl +blk.13.ffn_up_exps.weight iq4_nl +blk.13.ffn_up_shexp.weight iq4_nl +blk.14.ssm_in.weight iq4_nl +blk.15.ffn_down_exps.weight iq4_nl +blk.15.ffn_down_shexp.weight iq4_nl +blk.15.ffn_up_exps.weight iq4_nl +blk.15.ffn_up_shexp.weight iq4_nl +blk.16.ssm_in.weight iq4_nl +blk.17.ffn_down_exps.weight iq4_nl +blk.17.ffn_down_shexp.weight iq4_nl +blk.17.ffn_up_exps.weight iq4_nl +blk.17.ffn_up_shexp.weight iq4_nl +blk.18.ssm_in.weight iq4_nl +blk.19.attn_k.weight iq4_nl blk.19.attn_output.weight iq2_xxs -blk.19.attn_v.weight q4_K +blk.19.attn_q.weight iq4_nl +blk.19.attn_v.weight q5_0 +blk.20.ffn_down_exps.weight iq4_nl +blk.20.ffn_down_shexp.weight iq4_nl +blk.20.ffn_up_exps.weight iq4_nl +blk.20.ffn_up_shexp.weight iq4_nl +blk.21.ssm_in.weight iq4_nl +blk.22.ffn_down_exps.weight iq4_nl +blk.22.ffn_down_shexp.weight iq4_nl +blk.22.ffn_up_exps.weight iq4_nl +blk.22.ffn_up_shexp.weight iq4_nl +blk.23.ssm_in.weight iq4_nl +blk.24.ffn_down_exps.weight iq4_nl +blk.24.ffn_down_shexp.weight iq4_nl +blk.24.ffn_up_exps.weight iq4_nl +blk.24.ffn_up_shexp.weight iq4_nl +blk.25.ssm_in.weight iq4_nl +blk.26.attn_k.weight iq4_nl blk.26.attn_output.weight iq2_xxs -blk.26.attn_v.weight q4_K +blk.26.attn_q.weight iq4_nl +blk.26.attn_v.weight q5_0 +blk.27.ffn_down_exps.weight iq4_nl +blk.27.ffn_down_shexp.weight iq4_nl +blk.27.ffn_up_exps.weight iq4_nl +blk.27.ffn_up_shexp.weight iq4_nl +blk.28.ssm_in.weight iq4_nl +blk.29.ffn_down_exps.weight iq4_nl +blk.29.ffn_down_shexp.weight iq4_nl +blk.29.ffn_up_exps.weight iq4_nl +blk.29.ffn_up_shexp.weight iq4_nl +blk.30.ssm_in.weight iq4_nl +blk.31.ffn_down_exps.weight iq4_nl +blk.31.ffn_down_shexp.weight iq4_nl +blk.31.ffn_up_exps.weight iq4_nl +blk.31.ffn_up_shexp.weight iq4_nl +blk.32.ssm_in.weight iq4_nl +blk.33.attn_k.weight iq4_nl blk.33.attn_output.weight iq2_xxs -blk.33.attn_v.weight q4_K +blk.33.attn_q.weight iq4_nl +blk.33.attn_v.weight q5_0 +blk.34.ffn_down_exps.weight iq4_nl +blk.34.ffn_down_shexp.weight iq4_nl +blk.34.ffn_up_exps.weight iq4_nl +blk.34.ffn_up_shexp.weight iq4_nl +blk.35.ssm_in.weight iq4_nl +blk.36.ffn_down_exps.weight iq4_nl +blk.36.ffn_down_shexp.weight iq4_nl +blk.36.ffn_up_exps.weight iq4_nl +blk.36.ffn_up_shexp.weight iq4_nl +blk.37.ssm_in.weight iq4_nl +blk.38.ffn_down_exps.weight iq4_nl +blk.38.ffn_down_shexp.weight iq4_nl +blk.38.ffn_up_exps.weight iq4_nl +blk.38.ffn_up_shexp.weight iq4_nl +blk.39.ssm_in.weight iq4_nl +blk.40.ffn_down_exps.weight iq4_nl +blk.40.ffn_down_shexp.weight iq4_nl +blk.40.ffn_up_exps.weight iq4_nl +blk.40.ffn_up_shexp.weight iq4_nl +blk.41.ssm_in.weight iq4_nl +blk.42.attn_k.weight iq4_nl blk.42.attn_output.weight iq2_xxs -blk.42.attn_v.weight q4_K +blk.42.attn_q.weight iq4_nl +blk.42.attn_v.weight q5_0 +blk.43.ffn_down_exps.weight iq4_nl +blk.43.ffn_down_shexp.weight iq4_nl +blk.43.ffn_up_exps.weight iq4_nl +blk.43.ffn_up_shexp.weight iq4_nl +blk.44.ssm_in.weight iq4_nl +blk.45.ffn_down_exps.weight iq4_nl +blk.45.ffn_down_shexp.weight iq4_nl +blk.45.ffn_up_exps.weight iq4_nl +blk.45.ffn_up_shexp.weight iq4_nl +blk.46.ssm_in.weight iq4_nl +blk.47.ffn_down_exps.weight iq4_nl +blk.47.ffn_down_shexp.weight iq4_nl +blk.47.ffn_up_exps.weight iq4_nl +blk.47.ffn_up_shexp.weight iq4_nl +blk.48.ssm_in.weight iq4_nl +blk.49.ffn_down_exps.weight iq4_nl +blk.49.ffn_down_shexp.weight iq4_nl +blk.49.ffn_up_exps.weight iq4_nl +blk.49.ffn_up_shexp.weight iq4_nl +blk.50.ssm_in.weight iq4_nl +blk.51.ffn_down_exps.weight iq4_nl +blk.51.ffn_down_shexp.weight iq4_nl +blk.51.ffn_up_exps.weight iq4_nl +blk.51.ffn_up_shexp.weight iq4_nl [BF16] bf16 -output.weight q6_K [TQ1_0] tq1_0 output.weight q8_0 -token_embd.weight q4_K +token_embd.weight q5_0 +blk.0.ssm_in.weight q4_0 +blk.1.ffn_down_exps.weight q4_0 +blk.1.ffn_down_shexp.weight q4_0 +blk.1.ffn_up_exps.weight q4_0 +blk.1.ffn_up_shexp.weight q4_0 +blk.2.ssm_in.weight q4_0 +blk.3.ffn_down_exps.weight q4_0 +blk.3.ffn_down_shexp.weight q4_0 +blk.3.ffn_up_exps.weight q4_0 +blk.3.ffn_up_shexp.weight q4_0 +blk.4.ssm_in.weight q4_0 +blk.5.attn_k.weight q4_0 +blk.5.attn_q.weight q4_0 +blk.5.attn_v.weight q4_0 +blk.6.ffn_down_exps.weight q4_0 +blk.6.ffn_down_shexp.weight q4_0 +blk.6.ffn_up_exps.weight q4_0 +blk.6.ffn_up_shexp.weight q4_0 +blk.7.ssm_in.weight q4_0 +blk.8.ffn_down_exps.weight q4_0 +blk.8.ffn_down_shexp.weight q4_0 +blk.8.ffn_up_exps.weight q4_0 +blk.8.ffn_up_shexp.weight q4_0 +blk.9.ssm_in.weight q4_0 +blk.10.ffn_down_exps.weight q4_0 +blk.10.ffn_down_shexp.weight q4_0 +blk.10.ffn_up_exps.weight q4_0 +blk.10.ffn_up_shexp.weight q4_0 +blk.11.ssm_in.weight q4_0 +blk.12.attn_k.weight q4_0 +blk.12.attn_q.weight q4_0 +blk.12.attn_v.weight q4_0 +blk.13.ffn_down_exps.weight q4_0 +blk.13.ffn_down_shexp.weight q4_0 +blk.13.ffn_up_exps.weight q4_0 +blk.13.ffn_up_shexp.weight q4_0 +blk.14.ssm_in.weight q4_0 +blk.15.ffn_down_exps.weight q4_0 +blk.15.ffn_down_shexp.weight q4_0 +blk.15.ffn_up_exps.weight q4_0 +blk.15.ffn_up_shexp.weight q4_0 +blk.16.ssm_in.weight q4_0 +blk.17.ffn_down_exps.weight q4_0 +blk.17.ffn_down_shexp.weight q4_0 +blk.17.ffn_up_exps.weight q4_0 +blk.17.ffn_up_shexp.weight q4_0 +blk.18.ssm_in.weight q4_0 +blk.19.attn_k.weight q4_0 +blk.19.attn_q.weight q4_0 +blk.19.attn_v.weight q4_0 +blk.20.ffn_down_exps.weight q4_0 +blk.20.ffn_down_shexp.weight q4_0 +blk.20.ffn_up_exps.weight q4_0 +blk.20.ffn_up_shexp.weight q4_0 +blk.21.ssm_in.weight q4_0 +blk.22.ffn_down_exps.weight q4_0 +blk.22.ffn_down_shexp.weight q4_0 +blk.22.ffn_up_exps.weight q4_0 +blk.22.ffn_up_shexp.weight q4_0 +blk.23.ssm_in.weight q4_0 +blk.24.ffn_down_exps.weight q4_0 +blk.24.ffn_down_shexp.weight q4_0 +blk.24.ffn_up_exps.weight q4_0 +blk.24.ffn_up_shexp.weight q4_0 +blk.25.ssm_in.weight q4_0 +blk.26.attn_k.weight q4_0 +blk.26.attn_q.weight q4_0 +blk.26.attn_v.weight q4_0 +blk.27.ffn_down_exps.weight q4_0 +blk.27.ffn_down_shexp.weight q4_0 +blk.27.ffn_up_exps.weight q4_0 +blk.27.ffn_up_shexp.weight q4_0 +blk.28.ssm_in.weight q4_0 +blk.29.ffn_down_exps.weight q4_0 +blk.29.ffn_down_shexp.weight q4_0 +blk.29.ffn_up_exps.weight q4_0 +blk.29.ffn_up_shexp.weight q4_0 +blk.30.ssm_in.weight q4_0 +blk.31.ffn_down_exps.weight q4_0 +blk.31.ffn_down_shexp.weight q4_0 +blk.31.ffn_up_exps.weight q4_0 +blk.31.ffn_up_shexp.weight q4_0 +blk.32.ssm_in.weight q4_0 +blk.33.attn_k.weight q4_0 +blk.33.attn_q.weight q4_0 +blk.33.attn_v.weight q4_0 +blk.34.ffn_down_exps.weight q4_0 +blk.34.ffn_down_shexp.weight q4_0 +blk.34.ffn_up_exps.weight q4_0 +blk.34.ffn_up_shexp.weight q4_0 +blk.35.ssm_in.weight q4_0 +blk.36.ffn_down_exps.weight q4_0 +blk.36.ffn_down_shexp.weight q4_0 +blk.36.ffn_up_exps.weight q4_0 +blk.36.ffn_up_shexp.weight q4_0 +blk.37.ssm_in.weight q4_0 +blk.38.ffn_down_exps.weight q4_0 +blk.38.ffn_down_shexp.weight q4_0 +blk.38.ffn_up_exps.weight q4_0 +blk.38.ffn_up_shexp.weight q4_0 +blk.39.ssm_in.weight q4_0 +blk.40.ffn_down_exps.weight q4_0 +blk.40.ffn_down_shexp.weight q4_0 +blk.40.ffn_up_exps.weight q4_0 +blk.40.ffn_up_shexp.weight q4_0 +blk.41.ssm_in.weight q4_0 +blk.42.attn_k.weight q4_0 +blk.42.attn_q.weight q4_0 +blk.42.attn_v.weight q4_0 +blk.43.ffn_down_exps.weight q4_0 +blk.43.ffn_down_shexp.weight q4_0 +blk.43.ffn_up_exps.weight q4_0 +blk.43.ffn_up_shexp.weight q4_0 +blk.44.ssm_in.weight q4_0 +blk.45.ffn_down_exps.weight q4_0 +blk.45.ffn_down_shexp.weight q4_0 +blk.45.ffn_up_exps.weight q4_0 +blk.45.ffn_up_shexp.weight q4_0 +blk.46.ssm_in.weight q4_0 +blk.47.ffn_down_exps.weight q4_0 +blk.47.ffn_down_shexp.weight q4_0 +blk.47.ffn_up_exps.weight q4_0 +blk.47.ffn_up_shexp.weight q4_0 +blk.48.ssm_in.weight q4_0 +blk.49.ffn_down_exps.weight q4_0 +blk.49.ffn_down_shexp.weight q4_0 +blk.49.ffn_up_exps.weight q4_0 +blk.49.ffn_up_shexp.weight q4_0 +blk.50.ssm_in.weight q4_0 +blk.51.ffn_down_exps.weight q4_0 +blk.51.ffn_down_shexp.weight q4_0 +blk.51.ffn_up_exps.weight q4_0 +blk.51.ffn_up_shexp.weight q4_0 [TQ2_0] tq2_0 output.weight q8_0 -token_embd.weight q4_K +token_embd.weight q5_0 +blk.0.ssm_in.weight q4_0 +blk.1.ffn_down_exps.weight q4_0 +blk.1.ffn_down_shexp.weight q4_0 +blk.1.ffn_up_exps.weight q4_0 +blk.1.ffn_up_shexp.weight q4_0 +blk.2.ssm_in.weight q4_0 +blk.3.ffn_down_exps.weight q4_0 +blk.3.ffn_down_shexp.weight q4_0 +blk.3.ffn_up_exps.weight q4_0 +blk.3.ffn_up_shexp.weight q4_0 +blk.4.ssm_in.weight q4_0 +blk.5.attn_k.weight q4_0 +blk.5.attn_q.weight q4_0 +blk.5.attn_v.weight q4_0 +blk.6.ffn_down_exps.weight q4_0 +blk.6.ffn_down_shexp.weight q4_0 +blk.6.ffn_up_exps.weight q4_0 +blk.6.ffn_up_shexp.weight q4_0 +blk.7.ssm_in.weight q4_0 +blk.8.ffn_down_exps.weight q4_0 +blk.8.ffn_down_shexp.weight q4_0 +blk.8.ffn_up_exps.weight q4_0 +blk.8.ffn_up_shexp.weight q4_0 +blk.9.ssm_in.weight q4_0 +blk.10.ffn_down_exps.weight q4_0 +blk.10.ffn_down_shexp.weight q4_0 +blk.10.ffn_up_exps.weight q4_0 +blk.10.ffn_up_shexp.weight q4_0 +blk.11.ssm_in.weight q4_0 +blk.12.attn_k.weight q4_0 +blk.12.attn_q.weight q4_0 +blk.12.attn_v.weight q4_0 +blk.13.ffn_down_exps.weight q4_0 +blk.13.ffn_down_shexp.weight q4_0 +blk.13.ffn_up_exps.weight q4_0 +blk.13.ffn_up_shexp.weight q4_0 +blk.14.ssm_in.weight q4_0 +blk.15.ffn_down_exps.weight q4_0 +blk.15.ffn_down_shexp.weight q4_0 +blk.15.ffn_up_exps.weight q4_0 +blk.15.ffn_up_shexp.weight q4_0 +blk.16.ssm_in.weight q4_0 +blk.17.ffn_down_exps.weight q4_0 +blk.17.ffn_down_shexp.weight q4_0 +blk.17.ffn_up_exps.weight q4_0 +blk.17.ffn_up_shexp.weight q4_0 +blk.18.ssm_in.weight q4_0 +blk.19.attn_k.weight q4_0 +blk.19.attn_q.weight q4_0 +blk.19.attn_v.weight q4_0 +blk.20.ffn_down_exps.weight q4_0 +blk.20.ffn_down_shexp.weight q4_0 +blk.20.ffn_up_exps.weight q4_0 +blk.20.ffn_up_shexp.weight q4_0 +blk.21.ssm_in.weight q4_0 +blk.22.ffn_down_exps.weight q4_0 +blk.22.ffn_down_shexp.weight q4_0 +blk.22.ffn_up_exps.weight q4_0 +blk.22.ffn_up_shexp.weight q4_0 +blk.23.ssm_in.weight q4_0 +blk.24.ffn_down_exps.weight q4_0 +blk.24.ffn_down_shexp.weight q4_0 +blk.24.ffn_up_exps.weight q4_0 +blk.24.ffn_up_shexp.weight q4_0 +blk.25.ssm_in.weight q4_0 +blk.26.attn_k.weight q4_0 +blk.26.attn_q.weight q4_0 +blk.26.attn_v.weight q4_0 +blk.27.ffn_down_exps.weight q4_0 +blk.27.ffn_down_shexp.weight q4_0 +blk.27.ffn_up_exps.weight q4_0 +blk.27.ffn_up_shexp.weight q4_0 +blk.28.ssm_in.weight q4_0 +blk.29.ffn_down_exps.weight q4_0 +blk.29.ffn_down_shexp.weight q4_0 +blk.29.ffn_up_exps.weight q4_0 +blk.29.ffn_up_shexp.weight q4_0 +blk.30.ssm_in.weight q4_0 +blk.31.ffn_down_exps.weight q4_0 +blk.31.ffn_down_shexp.weight q4_0 +blk.31.ffn_up_exps.weight q4_0 +blk.31.ffn_up_shexp.weight q4_0 +blk.32.ssm_in.weight q4_0 +blk.33.attn_k.weight q4_0 +blk.33.attn_q.weight q4_0 +blk.33.attn_v.weight q4_0 +blk.34.ffn_down_exps.weight q4_0 +blk.34.ffn_down_shexp.weight q4_0 +blk.34.ffn_up_exps.weight q4_0 +blk.34.ffn_up_shexp.weight q4_0 +blk.35.ssm_in.weight q4_0 +blk.36.ffn_down_exps.weight q4_0 +blk.36.ffn_down_shexp.weight q4_0 +blk.36.ffn_up_exps.weight q4_0 +blk.36.ffn_up_shexp.weight q4_0 +blk.37.ssm_in.weight q4_0 +blk.38.ffn_down_exps.weight q4_0 +blk.38.ffn_down_shexp.weight q4_0 +blk.38.ffn_up_exps.weight q4_0 +blk.38.ffn_up_shexp.weight q4_0 +blk.39.ssm_in.weight q4_0 +blk.40.ffn_down_exps.weight q4_0 +blk.40.ffn_down_shexp.weight q4_0 +blk.40.ffn_up_exps.weight q4_0 +blk.40.ffn_up_shexp.weight q4_0 +blk.41.ssm_in.weight q4_0 +blk.42.attn_k.weight q4_0 +blk.42.attn_q.weight q4_0 +blk.42.attn_v.weight q4_0 +blk.43.ffn_down_exps.weight q4_0 +blk.43.ffn_down_shexp.weight q4_0 +blk.43.ffn_up_exps.weight q4_0 +blk.43.ffn_up_shexp.weight q4_0 +blk.44.ssm_in.weight q4_0 +blk.45.ffn_down_exps.weight q4_0 +blk.45.ffn_down_shexp.weight q4_0 +blk.45.ffn_up_exps.weight q4_0 +blk.45.ffn_up_shexp.weight q4_0 +blk.46.ssm_in.weight q4_0 +blk.47.ffn_down_exps.weight q4_0 +blk.47.ffn_down_shexp.weight q4_0 +blk.47.ffn_up_exps.weight q4_0 +blk.47.ffn_up_shexp.weight q4_0 +blk.48.ssm_in.weight q4_0 +blk.49.ffn_down_exps.weight q4_0 +blk.49.ffn_down_shexp.weight q4_0 +blk.49.ffn_up_exps.weight q4_0 +blk.49.ffn_up_shexp.weight q4_0 +blk.50.ssm_in.weight q4_0 +blk.51.ffn_down_exps.weight q4_0 +blk.51.ffn_down_shexp.weight q4_0 +blk.51.ffn_up_exps.weight q4_0 +blk.51.ffn_up_shexp.weight q4_0 [MXFP4_MOE] mxfp4 output.weight q8_0 diff --git a/tests/snapshots/qwen3-0.6b.schema b/tests/snapshots/qwen3-0.6b.schema index 5ada58c60e..fd994f2c1e 100644 --- a/tests/snapshots/qwen3-0.6b.schema +++ b/tests/snapshots/qwen3-0.6b.schema @@ -2,10 +2,8 @@ # n_embd=1024, n_ff=3072, n_vocab=151936, n_layer=28, n_head=16, n_head_kv=8 [F32] f32 -output.weight q6_K [F16] f16 -output.weight q6_K [Q4_0] q4_0 output.weight q6_K @@ -1013,7 +1011,6 @@ blk.27.attn_output.weight iq2_xxs blk.27.attn_v.weight q2_K [BF16] bf16 -output.weight q6_K [TQ1_0] tq1_0 output.weight q6_K diff --git a/tests/snapshots/qwen3-14b.schema b/tests/snapshots/qwen3-14b.schema index 010849a57b..2fdd908bd0 100644 --- a/tests/snapshots/qwen3-14b.schema +++ b/tests/snapshots/qwen3-14b.schema @@ -2,10 +2,8 @@ # n_embd=5120, n_ff=17408, n_vocab=151936, n_layer=40, n_head=40, n_head_kv=8 [F32] f32 -output.weight q6_K [F16] f16 -output.weight q6_K [Q4_0] q4_0 output.weight q6_K @@ -1613,7 +1611,6 @@ blk.39.attn_output.weight iq2_xxs blk.39.attn_v.weight q4_K [BF16] bf16 -output.weight q6_K [TQ1_0] tq1_0 output.weight q6_K diff --git a/tests/snapshots/qwen3-coder-next.schema b/tests/snapshots/qwen3-coder-next.schema index 4f12721f16..9bf16fca4b 100644 --- a/tests/snapshots/qwen3-coder-next.schema +++ b/tests/snapshots/qwen3-coder-next.schema @@ -2,10 +2,8 @@ # n_embd=2048, n_ff=5120, n_vocab=151936, n_layer=48, n_head=16, n_head_kv=2, n_expert=512 [F32] f32 -output.weight q6_K [F16] f16 -output.weight q6_K [Q4_0] q4_0 output.weight q6_K @@ -1790,7 +1788,6 @@ blk.47.attn_v.weight q4_K output.weight q5_K [BF16] bf16 -output.weight q6_K [TQ1_0] tq1_0 token_embd.weight q4_K diff --git a/tests/snapshots/qwen3.5-27b.schema b/tests/snapshots/qwen3.5-27b.schema index b5e07f01fc..4080205336 100644 --- a/tests/snapshots/qwen3.5-27b.schema +++ b/tests/snapshots/qwen3.5-27b.schema @@ -2,10 +2,8 @@ # n_embd=5120, n_ff=17408, n_vocab=248320, n_layer=64, n_head=24, n_head_kv=4 [F32] f32 -output.weight q6_K [F16] f16 -output.weight q6_K [Q4_0] q4_0 output.weight q6_K @@ -1898,7 +1896,6 @@ blk.63.attn_output.weight iq2_xxs blk.63.attn_v.weight q4_K [BF16] bf16 -output.weight q6_K [TQ1_0] tq1_0 output.weight q6_K diff --git a/tests/snapshots/qwen3.5-397b-a17b.schema b/tests/snapshots/qwen3.5-397b-a17b.schema index e62948dac8..d5056b9835 100644 --- a/tests/snapshots/qwen3.5-397b-a17b.schema +++ b/tests/snapshots/qwen3.5-397b-a17b.schema @@ -2,10 +2,8 @@ # n_embd=4096, n_ff=0, n_vocab=248320, n_layer=60, n_head=32, n_head_kv=2, n_expert=512 [F32] f32 -output.weight q6_K [F16] f16 -output.weight q6_K [Q4_0] q4_0 output.weight q6_K @@ -2205,7 +2203,6 @@ blk.59.attn_output.weight iq2_xxs blk.59.attn_v.weight q4_K [BF16] bf16 -output.weight q6_K [TQ1_0] tq1_0 output.weight q6_K diff --git a/tests/snapshots/step-3.5-flash.schema b/tests/snapshots/step-3.5-flash.schema index c9d1be0082..36a13b3de2 100644 --- a/tests/snapshots/step-3.5-flash.schema +++ b/tests/snapshots/step-3.5-flash.schema @@ -2,10 +2,8 @@ # n_embd=4096, n_ff=11264, n_vocab=128896, n_layer=45, n_head=64, n_head_kv=8, n_expert=288 [F32] f32 -output.weight q6_K [F16] f16 -output.weight q6_K [Q4_0] q4_0 output.weight q6_K @@ -2078,7 +2076,6 @@ blk.44.attn_output.weight iq2_xxs blk.44.attn_v.weight q4_K [BF16] bf16 -output.weight q6_K [TQ1_0] tq1_0 output.weight q6_K diff --git a/tests/test-quant-type-selection.cpp b/tests/test-quant-type-selection.cpp index d5e06173f6..fd4b81ac97 100644 --- a/tests/test-quant-type-selection.cpp +++ b/tests/test-quant-type-selection.cpp @@ -268,21 +268,20 @@ static std::vector> compute_quant_types(llama_ quantize_state_impl qs(mdl, &qparams); - std::vector names; - names.reserve(tensors.size()); - for (const auto & mt : tensors) { - names.push_back(mt.tensor->name); + std::vector metadata(tensors.size()); + for (size_t i = 0; i < tensors.size(); ++i) { + metadata[i].name = tensors[i].tensor->name; } - init_quantize_state_counters(qs, names); + init_quantize_state_counters(qs, metadata); ggml_type default_type = llama_ftype_get_default_type(ftype); std::vector> result; result.reserve(tensors.size()); - for (const auto & mt : tensors) { - ggml_type got = llama_tensor_get_type(qs, default_type, mt.tensor, ftype); - result.push_back({ mt.tensor->name, got }); + for (size_t i = 0; i < tensors.size(); ++i) { + ggml_type got = llama_tensor_get_type(qs, &qparams, tensors[i].tensor, default_type, metadata[i]); + result.push_back({ metadata[i].name, got }); } return result; @@ -408,7 +407,7 @@ static bool run_test_section(llama_model & mdl, } if (got != expected) { - printf(" FAIL %-50s expected %s, got %s\n", name.c_str(), ggml_type_name(expected), ggml_type_name(got)); + printf(" FAIL %-50s %-10s expected %s, got %s\n", name.c_str(), llama_ftype_to_name(section.ftype), ggml_type_name(expected), ggml_type_name(got)); all_pass = false; } } @@ -432,8 +431,7 @@ static int run_remote_tests(const std::string & snapshot_dir, const char * argv0 std::string name = model_name_from_repo(spec.repo); printf("=== %s ===\n", name.c_str()); - fprintf(stderr, "Fetching model metadata for %s from %s...\n", name.c_str(), spec.repo); - auto result = gguf_fetch_model_meta(spec.repo, spec.quant); + auto result = gguf_fetch_model_meta(spec.repo, spec.quant, "", false); if (!result.has_value()) { printf(" SKIP (could not fetch model metadata)\n\n"); total_skip++; @@ -506,5 +504,8 @@ int main(int argc, char ** argv) { return run_generate(snapshot_dir); } + // suppress llama log warnings during test (e.g. tensor type fallback messages) + llama_log_set([](enum ggml_log_level, const char *, void *) {}, nullptr); + return run_remote_tests(snapshot_dir, argv[0]); } From aa8d567e958706b81b272760284ba57104499786 Mon Sep 17 00:00:00 2001 From: Colin Kealty <3266127+bartowski1182@users.noreply.github.com> Date: Wed, 11 Mar 2026 13:27:09 -0400 Subject: [PATCH 13/21] Revert accidental personal CLAUDE.md changes --- CLAUDE.md | 35 ----------------------------------- 1 file changed, 35 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index f2bd317322..6758dbd145 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -1,36 +1 @@ IMPORTANT: Ensure you've thoroughly reviewed the [AGENTS.md](AGENTS.md) file before beginning any work. - -# AI Policy -- AI is assistive only; AI-generated PRs are restricted per AGENTS.md -- Contributor reviews and writes code themselves - -# Code Style & Conventions -- snake_case naming; optimize for longest common prefix -- 4 spaces indentation, brackets on same line -- `void * ptr`, `int & a` (space around pointer/reference) -- Avoid templates and fancy STL -- Use sized integer types (`int32_t`) in public API -- See [CONTRIBUTING.md](CONTRIBUTING.md) for full guidelines, naming, and PR process - -# ggml Tensor Conventions -- Data stored in row-major order -- Dimension 0 = columns, dimension 1 = rows, dimension 2 = matrices -- **Matrix multiply is unconventional**: `C = ggml_mul_mat(ctx, A, B)` means `C^T = A * B^T` - -# Quantization -- See [docs/quantization/](docs/quantization/) for comprehensive documentation -- See [docs/quantization/09-adding-new-types.md](docs/quantization/09-adding-new-types.md) for adding new types - -## Key Files -- `ggml/include/ggml.h`: type enums (`ggml_type`) -- `ggml/src/ggml-common.h`: block structures -- `ggml/src/ggml-quants.c`: reference quantize/dequantize implementations -- `tools/quantize/quantize.cpp`: CLI tool -- `src/llama-quant.cpp`: core quantization engine - -## Quantization Families -- **Q**: simple uniform quantization -- **K**: super-block quantization (multiple sub-blocks per super-block) -- **IQ**: importance-weighted quantization -- **T**: ternary quantization -- **MXFP**: Microsoft floating-point quantization From d2586d50e6fbb49a50888c56f5cfb0580180938f Mon Sep 17 00:00:00 2001 From: Colin Kealty <3266127+bartowski1182@users.noreply.github.com> Date: Wed, 11 Mar 2026 13:27:54 -0400 Subject: [PATCH 14/21] Change quotation mark --- CLAUDE.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CLAUDE.md b/CLAUDE.md index 6758dbd145..302cdeab99 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -1 +1 @@ -IMPORTANT: Ensure you've thoroughly reviewed the [AGENTS.md](AGENTS.md) file before beginning any work. +IMPORTANT: Ensure you’ve thoroughly reviewed the [AGENTS.md](AGENTS.md) file before beginning any work. From 3fe55f1035c49aeb90abaabd8115fdbd198ee9a7 Mon Sep 17 00:00:00 2001 From: Colin Kealty <3266127+bartowski1182@users.noreply.github.com> Date: Wed, 11 Mar 2026 13:41:27 -0400 Subject: [PATCH 15/21] Reuse metadata.name since we have it --- src/llama-quant.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/llama-quant.cpp b/src/llama-quant.cpp index e5890936c4..6a1e46122a 100644 --- a/src/llama-quant.cpp +++ b/src/llama-quant.cpp @@ -997,7 +997,6 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std:: for (size_t i = 0; i < tensors.size(); ++i) { const auto * it = tensors[i]; const struct ggml_tensor * tensor = it->tensor; - const std::string name = ggml_get_name(tensor); metadata[i].category = tensor_get_category(name); @@ -1028,7 +1027,7 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std:: " - offending tensor: %s\n" " - target type: %s\n" "============================================================================\n\n", - name.c_str(), ggml_type_name(metadata[i].target_type)); + metadata[i].name.c_str(), ggml_type_name(metadata[i].target_type)); throw std::runtime_error("this quantization requires an imatrix!"); } } @@ -1101,7 +1100,6 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std:: new_ofstream(weight.idx); } - const std::string name = ggml_get_name(tensor); const size_t tensor_size = ggml_nbytes(tensor); if (!params->dry_run) { @@ -1232,9 +1230,9 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std:: total_size_new += new_size; // update the gguf meta data as we go - gguf_set_tensor_type(ctx_outs[cur_split].get(), name.c_str(), new_type); - GGML_ASSERT(gguf_get_tensor_size(ctx_outs[cur_split].get(), gguf_find_tensor(ctx_outs[cur_split].get(), name.c_str())) == new_size); - gguf_set_tensor_data(ctx_outs[cur_split].get(), name.c_str(), new_data); + gguf_set_tensor_type(ctx_outs[cur_split].get(), metadata[i].name.c_str(), new_type); + GGML_ASSERT(gguf_get_tensor_size(ctx_outs[cur_split].get(), gguf_find_tensor(ctx_outs[cur_split].get(), metadata[i].name.c_str())) == new_size); + gguf_set_tensor_data(ctx_outs[cur_split].get(), metadata[i].name.c_str(), new_data); // write tensor data + padding fout.write((const char *) new_data, new_size); From 8ebfe03f95fc4cab0d89242fbaced5faf7f730c8 Mon Sep 17 00:00:00 2001 From: Colin Kealty <3266127+bartowski1182@users.noreply.github.com> Date: Thu, 12 Mar 2026 14:06:23 -0400 Subject: [PATCH 16/21] Move test-only stuff out of llama-quant.cpp --- src/llama-quant.cpp | 58 -------------------------- src/llama-quant.h | 5 --- tests/test-quant-type-selection.cpp | 63 +++++++++++++++++++++++++++++ 3 files changed, 63 insertions(+), 63 deletions(-) diff --git a/src/llama-quant.cpp b/src/llama-quant.cpp index 6a1e46122a..e0aa84379f 100644 --- a/src/llama-quant.cpp +++ b/src/llama-quant.cpp @@ -755,64 +755,6 @@ ggml_type llama_ftype_get_default_type(llama_ftype ftype) { } } -struct ftype_name_entry { - const char * name; - llama_ftype ftype; -}; - -static const ftype_name_entry ftype_name_table[] = { - { "F32", LLAMA_FTYPE_ALL_F32 }, - { "F16", LLAMA_FTYPE_MOSTLY_F16 }, - { "BF16", LLAMA_FTYPE_MOSTLY_BF16 }, - { "Q4_0", LLAMA_FTYPE_MOSTLY_Q4_0 }, - { "Q4_1", LLAMA_FTYPE_MOSTLY_Q4_1 }, - { "Q5_0", LLAMA_FTYPE_MOSTLY_Q5_0 }, - { "Q5_1", LLAMA_FTYPE_MOSTLY_Q5_1 }, - { "Q8_0", LLAMA_FTYPE_MOSTLY_Q8_0 }, - { "Q2_K", LLAMA_FTYPE_MOSTLY_Q2_K }, - { "Q2_K_S", LLAMA_FTYPE_MOSTLY_Q2_K_S }, - { "Q3_K_S", LLAMA_FTYPE_MOSTLY_Q3_K_S }, - { "Q3_K_M", LLAMA_FTYPE_MOSTLY_Q3_K_M }, - { "Q3_K_L", LLAMA_FTYPE_MOSTLY_Q3_K_L }, - { "Q4_K_S", LLAMA_FTYPE_MOSTLY_Q4_K_S }, - { "Q4_K_M", LLAMA_FTYPE_MOSTLY_Q4_K_M }, - { "Q5_K_S", LLAMA_FTYPE_MOSTLY_Q5_K_S }, - { "Q5_K_M", LLAMA_FTYPE_MOSTLY_Q5_K_M }, - { "Q6_K", LLAMA_FTYPE_MOSTLY_Q6_K }, - { "IQ1_S", LLAMA_FTYPE_MOSTLY_IQ1_S }, - { "IQ1_M", LLAMA_FTYPE_MOSTLY_IQ1_M }, - { "IQ2_XXS", LLAMA_FTYPE_MOSTLY_IQ2_XXS }, - { "IQ2_XS", LLAMA_FTYPE_MOSTLY_IQ2_XS }, - { "IQ2_S", LLAMA_FTYPE_MOSTLY_IQ2_S }, - { "IQ2_M", LLAMA_FTYPE_MOSTLY_IQ2_M }, - { "IQ3_XXS", LLAMA_FTYPE_MOSTLY_IQ3_XXS }, - { "IQ3_XS", LLAMA_FTYPE_MOSTLY_IQ3_XS }, - { "IQ3_S", LLAMA_FTYPE_MOSTLY_IQ3_S }, - { "IQ3_M", LLAMA_FTYPE_MOSTLY_IQ3_M }, - { "IQ4_NL", LLAMA_FTYPE_MOSTLY_IQ4_NL }, - { "IQ4_XS", LLAMA_FTYPE_MOSTLY_IQ4_XS }, - { "TQ1_0", LLAMA_FTYPE_MOSTLY_TQ1_0 }, - { "TQ2_0", LLAMA_FTYPE_MOSTLY_TQ2_0 }, - { "MXFP4_MOE", LLAMA_FTYPE_MOSTLY_MXFP4_MOE }, -}; - -llama_ftype llama_ftype_from_name(const char * name) { - for (const auto & e : ftype_name_table) { - if (strcmp(name, e.name) == 0) { - return e.ftype; - } - } - return (llama_ftype)-1; -} - -const char * llama_ftype_to_name(llama_ftype ftype) { - for (const auto & e : ftype_name_table) { - if (e.ftype == ftype) { - return e.name; - } - } - return nullptr; -} void init_quantize_state_counters(quantize_state_impl & qs, std::vector & metadata) { for (auto & tm : metadata) { diff --git a/src/llama-quant.h b/src/llama-quant.h index d8fe643557..1614aa4013 100644 --- a/src/llama-quant.h +++ b/src/llama-quant.h @@ -86,11 +86,6 @@ struct quantize_state_impl { ggml_type llama_tensor_get_type(quantize_state_impl & qs, const llama_model_quantize_params * params, const ggml_tensor * tensor, ggml_type default_type, const tensor_metadata & tm); ggml_type llama_ftype_get_default_type(llama_ftype ftype); -// Ftype name <-> enum conversions. -// Returns (llama_ftype)-1 on failure. -llama_ftype llama_ftype_from_name(const char * name); -const char * llama_ftype_to_name(llama_ftype ftype); - // Initialize quantize_state_impl counters and populate tensor_metadata categories. // metadata: vector with name fields already set, will have category field populated. void init_quantize_state_counters(quantize_state_impl & qs, std::vector & metadata); diff --git a/tests/test-quant-type-selection.cpp b/tests/test-quant-type-selection.cpp index fd4b81ac97..7dac3b6659 100644 --- a/tests/test-quant-type-selection.cpp +++ b/tests/test-quant-type-selection.cpp @@ -16,6 +16,69 @@ #include #include +// --------------------------------------------------------------------------- +// ftype name <-> enum mapping +// --------------------------------------------------------------------------- + +struct ftype_name_entry { + const char * name; + llama_ftype ftype; +}; + +static const ftype_name_entry ftype_name_table[] = { + { "F32", LLAMA_FTYPE_ALL_F32 }, + { "F16", LLAMA_FTYPE_MOSTLY_F16 }, + { "BF16", LLAMA_FTYPE_MOSTLY_BF16 }, + { "Q4_0", LLAMA_FTYPE_MOSTLY_Q4_0 }, + { "Q4_1", LLAMA_FTYPE_MOSTLY_Q4_1 }, + { "Q5_0", LLAMA_FTYPE_MOSTLY_Q5_0 }, + { "Q5_1", LLAMA_FTYPE_MOSTLY_Q5_1 }, + { "Q8_0", LLAMA_FTYPE_MOSTLY_Q8_0 }, + { "Q2_K", LLAMA_FTYPE_MOSTLY_Q2_K }, + { "Q2_K_S", LLAMA_FTYPE_MOSTLY_Q2_K_S }, + { "Q3_K_S", LLAMA_FTYPE_MOSTLY_Q3_K_S }, + { "Q3_K_M", LLAMA_FTYPE_MOSTLY_Q3_K_M }, + { "Q3_K_L", LLAMA_FTYPE_MOSTLY_Q3_K_L }, + { "Q4_K_S", LLAMA_FTYPE_MOSTLY_Q4_K_S }, + { "Q4_K_M", LLAMA_FTYPE_MOSTLY_Q4_K_M }, + { "Q5_K_S", LLAMA_FTYPE_MOSTLY_Q5_K_S }, + { "Q5_K_M", LLAMA_FTYPE_MOSTLY_Q5_K_M }, + { "Q6_K", LLAMA_FTYPE_MOSTLY_Q6_K }, + { "IQ1_S", LLAMA_FTYPE_MOSTLY_IQ1_S }, + { "IQ1_M", LLAMA_FTYPE_MOSTLY_IQ1_M }, + { "IQ2_XXS", LLAMA_FTYPE_MOSTLY_IQ2_XXS }, + { "IQ2_XS", LLAMA_FTYPE_MOSTLY_IQ2_XS }, + { "IQ2_S", LLAMA_FTYPE_MOSTLY_IQ2_S }, + { "IQ2_M", LLAMA_FTYPE_MOSTLY_IQ2_M }, + { "IQ3_XXS", LLAMA_FTYPE_MOSTLY_IQ3_XXS }, + { "IQ3_XS", LLAMA_FTYPE_MOSTLY_IQ3_XS }, + { "IQ3_S", LLAMA_FTYPE_MOSTLY_IQ3_S }, + { "IQ3_M", LLAMA_FTYPE_MOSTLY_IQ3_M }, + { "IQ4_NL", LLAMA_FTYPE_MOSTLY_IQ4_NL }, + { "IQ4_XS", LLAMA_FTYPE_MOSTLY_IQ4_XS }, + { "TQ1_0", LLAMA_FTYPE_MOSTLY_TQ1_0 }, + { "TQ2_0", LLAMA_FTYPE_MOSTLY_TQ2_0 }, + { "MXFP4_MOE", LLAMA_FTYPE_MOSTLY_MXFP4_MOE }, +}; + +static llama_ftype llama_ftype_from_name(const char * name) { + for (const auto & e : ftype_name_table) { + if (strcmp(name, e.name) == 0) { + return e.ftype; + } + } + return (llama_ftype)-1; +} + +static const char * llama_ftype_to_name(llama_ftype ftype) { + for (const auto & e : ftype_name_table) { + if (e.ftype == ftype) { + return e.name; + } + } + return nullptr; +} + // --------------------------------------------------------------------------- // Mock tensor construction - may be better to extract this in the future // --------------------------------------------------------------------------- From 4a2f648db2c804c27964471957655a83dba50b23 Mon Sep 17 00:00:00 2001 From: Colin Kealty <3266127+bartowski1182@users.noreply.github.com> Date: Thu, 12 Mar 2026 17:57:10 -0400 Subject: [PATCH 17/21] Hide the regex functionality back in llama-quant.cpp, use a unique pointer to a new struct 'compiled_tensor_type_patterns' which contains the patterns --- src/llama-quant.cpp | 26 ++++++++++++++++++++++---- src/llama-quant.h | 21 +++++++-------------- 2 files changed, 29 insertions(+), 18 deletions(-) diff --git a/src/llama-quant.cpp b/src/llama-quant.cpp index e0aa84379f..a103b15a4c 100644 --- a/src/llama-quant.cpp +++ b/src/llama-quant.cpp @@ -134,6 +134,26 @@ static bool category_is_attn_v(tensor_category cat) { cat == tensor_category::ATTENTION_KV_B; } +struct compiled_tensor_type_patterns { + std::vector> patterns; +}; + +quantize_state_impl::quantize_state_impl(const llama_model & model, const llama_model_quantize_params * params) + : model(model), params(params) +{ + if (params->tensor_types) { + const auto & tensor_types = *static_cast *>(params->tensor_types); + if (!tensor_types.empty()) { + tensor_type_patterns = std::make_unique(); + for (const auto & [tname, qtype] : tensor_types) { + tensor_type_patterns->patterns.emplace_back(std::regex(tname), qtype); + } + } + } +} + +quantize_state_impl::~quantize_state_impl() = default; + // // dequantization // @@ -598,9 +618,9 @@ ggml_type llama_tensor_get_type(quantize_state_impl & qs, const llama_model_quan if (!params->pure && ggml_is_quantized(default_type)) { // if the user provided tensor types - use those bool manual = false; - if (!qs.tensor_type_patterns.empty()) { + if (qs.tensor_type_patterns) { const std::string tensor_name(tensor->name); - for (const auto & [pattern, qtype] : qs.tensor_type_patterns) { + for (const auto & [pattern, qtype] : qs.tensor_type_patterns->patterns) { if (std::regex_search(tensor_name, pattern)) { if (qtype != new_type) { LLAMA_LOG_WARN("%s: %-36s - applying manual override: %s -> %s\n", @@ -940,8 +960,6 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std:: const auto * it = tensors[i]; const struct ggml_tensor * tensor = it->tensor; - metadata[i].category = tensor_get_category(name); - uint16_t i_split = params->keep_split ? it->idx : 0; if (!ctx_outs[i_split]) { ctx_outs[i_split].reset(gguf_init_empty()); diff --git a/src/llama-quant.h b/src/llama-quant.h index 1614aa4013..b32ce2d92d 100644 --- a/src/llama-quant.h +++ b/src/llama-quant.h @@ -6,7 +6,7 @@ #include "llama-arch.h" -#include +#include #include #include @@ -46,6 +46,8 @@ struct tensor_type_option { ggml_type type = GGML_TYPE_COUNT; }; +struct compiled_tensor_type_patterns; + struct quantize_state_impl { const llama_model & model; const llama_model_quantize_params * params; @@ -67,20 +69,11 @@ struct quantize_state_impl { // used to figure out if a model has tied embeddings (tok_embd shares weights with output) bool has_tied_embeddings = true; // assume tied until we see output.weight - // tensor type override patterns (compiled once, used twice) - std::vector> tensor_type_patterns; + // tensor type override patterns (compiled once, used in llama_tensor_get_type) + std::unique_ptr tensor_type_patterns; - quantize_state_impl(const llama_model & model, const llama_model_quantize_params * params): - model(model), params(params) - { - // compile regex patterns once - they are expensive - if (params->tensor_types) { - const auto & tensor_types = *static_cast *>(params->tensor_types); - for (const auto & [tname, qtype] : tensor_types) { - tensor_type_patterns.emplace_back(std::regex(tname), qtype); - } - } - } + quantize_state_impl(const llama_model & model, const llama_model_quantize_params * params); + ~quantize_state_impl(); }; ggml_type llama_tensor_get_type(quantize_state_impl & qs, const llama_model_quantize_params * params, const ggml_tensor * tensor, ggml_type default_type, const tensor_metadata & tm); From d576ae32904120e024697e998030b7889ae6920a Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Sun, 15 Mar 2026 10:27:24 +0200 Subject: [PATCH 18/21] cont : inital deslop guidelines --- src/llama-ext.h | 38 ++++++++++++++++++++++++++--- src/llama-quant.cpp | 15 ++++++------ src/llama-quant.h | 24 +++++------------- tests/test-quant-type-selection.cpp | 14 +++++------ 4 files changed, 56 insertions(+), 35 deletions(-) diff --git a/src/llama-ext.h b/src/llama-ext.h index 13ced783b4..a29dcf1536 100644 --- a/src/llama-ext.h +++ b/src/llama-ext.h @@ -1,8 +1,14 @@ #pragma once -#include "llama-context.h" -#include "ggml.h" -#include "stdint.h" +#include "llama.h" + +// TODO: try to remove this headers +#include "llama-arch.h" +#include "llama-model.h" +#include "llama-quant.h" + +#include +#include // Reserve a new compute graph. It is valid until the next call to llama_graph_reserve. LLAMA_API struct ggml_cgraph * llama_graph_reserve( @@ -10,3 +16,29 @@ LLAMA_API struct ggml_cgraph * llama_graph_reserve( uint32_t n_tokens, uint32_t n_seqs, uint32_t n_outputs); + +LLAMA_API ggml_type llama_ftype_get_default_type(llama_ftype ftype); + +// TODO: use llama_quant_ prefix to name these consistently: + +// Returns true if this tensor should be quantized (based on name, dims, params). +LLAMA_API bool tensor_allows_quantization(const llama_model_quantize_params * params, llm_arch arch, const ggml_tensor * tensor); + +// TODO: add: +// LLAMA_API llama_quant * llama_quant_init(...); +// LLAMA_API void llama_quant_free(llama_quant * qnt); + +// TODO: become member function of llama_quant +LLAMA_API ggml_type llama_tensor_get_type( + llama_quant & qs, + const llama_model_quantize_params * params, + const ggml_tensor * tensor, + ggml_type default_type, + const tensor_metadata & tm); + +// Initialize llama_quant counters and populate tensor_metadata categories. +// metadata: vector with name fields already set, will have category field populated. +// TODO: become member function of llama_quant +LLAMA_API void init_quantize_state_counters( + llama_quant & qs, + std::vector & metadata); diff --git a/src/llama-quant.cpp b/src/llama-quant.cpp index a103b15a4c..d1a9d6d15c 100644 --- a/src/llama-quant.cpp +++ b/src/llama-quant.cpp @@ -2,6 +2,7 @@ #include "llama-impl.h" #include "llama-model.h" #include "llama-model-loader.h" +#include "llama-ext.h" #include #include @@ -138,7 +139,7 @@ struct compiled_tensor_type_patterns { std::vector> patterns; }; -quantize_state_impl::quantize_state_impl(const llama_model & model, const llama_model_quantize_params * params) +llama_quant::llama_quant(const llama_model & model, const llama_model_quantize_params * params) : model(model), params(params) { if (params->tensor_types) { @@ -152,7 +153,7 @@ quantize_state_impl::quantize_state_impl(const llama_model & model, const llama_ } } -quantize_state_impl::~quantize_state_impl() = default; +llama_quant::~llama_quant() = default; // // dequantization @@ -302,7 +303,7 @@ bool tensor_allows_quantization(const llama_model_quantize_params * params, llm_ // // incompatible tensor shapes are handled here - fallback to a compatible type -static ggml_type tensor_type_fallback(quantize_state_impl & qs, const ggml_tensor * t, const ggml_type target_type) { +static ggml_type tensor_type_fallback(llama_quant & qs, const ggml_tensor * t, const ggml_type target_type) { ggml_type return_type = target_type; const int64_t ncols = t->ne[0]; @@ -351,7 +352,7 @@ static ggml_type tensor_type_fallback(quantize_state_impl & qs, const ggml_tenso } // internal standard logic for selecting the target tensor type based on tensor category, ftype, and model arch -static ggml_type llama_tensor_get_type_impl(quantize_state_impl & qs, ggml_type new_type, const ggml_tensor * tensor, llama_ftype ftype, tensor_category category) { +static ggml_type llama_tensor_get_type_impl(llama_quant & qs, ggml_type new_type, const ggml_tensor * tensor, llama_ftype ftype, tensor_category category) { const std::string name = ggml_get_name(tensor); // TODO: avoid hardcoded tensor names - use the TN_* constants @@ -601,7 +602,7 @@ static ggml_type llama_tensor_get_type_impl(quantize_state_impl & qs, ggml_type } // outer wrapper: determine the ggml_type that this tensor should be quantized to -ggml_type llama_tensor_get_type(quantize_state_impl & qs, const llama_model_quantize_params * params, const ggml_tensor * tensor, ggml_type default_type, const tensor_metadata & tm) { +ggml_type llama_tensor_get_type(llama_quant & qs, const llama_model_quantize_params * params, const ggml_tensor * tensor, ggml_type default_type, const tensor_metadata & tm) { if (!tensor_allows_quantization(params, qs.model.arch, tensor)) { return tensor->type; } @@ -776,7 +777,7 @@ ggml_type llama_ftype_get_default_type(llama_ftype ftype) { } -void init_quantize_state_counters(quantize_state_impl & qs, std::vector & metadata) { +void init_quantize_state_counters(llama_quant & qs, std::vector & metadata) { for (auto & tm : metadata) { tensor_category cat = tensor_get_category(tm.name); tm.category = cat; @@ -835,7 +836,7 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std:: model.load_hparams(ml); model.load_stats (ml); - quantize_state_impl qs(model, params); + llama_quant qs(model, params); if (params->only_copy) { ftype = ml.ftype; diff --git a/src/llama-quant.h b/src/llama-quant.h index b32ce2d92d..4a7800f98d 100644 --- a/src/llama-quant.h +++ b/src/llama-quant.h @@ -2,16 +2,13 @@ #include "llama.h" -#include "ggml.h" - -#include "llama-arch.h" - #include #include -#include struct llama_model; +// TODO: use llama_quant_ prefix to name these consistently: + // tensor categorization - used to avoid repeated string matching in quantization logic. // this is different from LLM_TN - we want broad categories, not specific tensor names per arch. enum class tensor_category { @@ -30,6 +27,7 @@ enum class tensor_category { }; // per-tensor metadata, computed in the preliminary loop and used in the main loop +// TODO: probably should belong to llama_quant struct tensor_metadata { std::string name; ggml_type target_type; @@ -48,7 +46,7 @@ struct tensor_type_option { struct compiled_tensor_type_patterns; -struct quantize_state_impl { +struct llama_quant { const llama_model & model; const llama_model_quantize_params * params; @@ -72,16 +70,6 @@ struct quantize_state_impl { // tensor type override patterns (compiled once, used in llama_tensor_get_type) std::unique_ptr tensor_type_patterns; - quantize_state_impl(const llama_model & model, const llama_model_quantize_params * params); - ~quantize_state_impl(); + llama_quant(const llama_model & model, const llama_model_quantize_params * params); + ~llama_quant(); }; - -ggml_type llama_tensor_get_type(quantize_state_impl & qs, const llama_model_quantize_params * params, const ggml_tensor * tensor, ggml_type default_type, const tensor_metadata & tm); -ggml_type llama_ftype_get_default_type(llama_ftype ftype); - -// Initialize quantize_state_impl counters and populate tensor_metadata categories. -// metadata: vector with name fields already set, will have category field populated. -void init_quantize_state_counters(quantize_state_impl & qs, std::vector & metadata); - -// Returns true if this tensor should be quantized (based on name, dims, params). -bool tensor_allows_quantization(const llama_model_quantize_params * params, llm_arch arch, const ggml_tensor * tensor); diff --git a/tests/test-quant-type-selection.cpp b/tests/test-quant-type-selection.cpp index 7dac3b6659..7fbc6ce611 100644 --- a/tests/test-quant-type-selection.cpp +++ b/tests/test-quant-type-selection.cpp @@ -1,11 +1,9 @@ -#include "../src/llama-arch.h" -#include "../src/llama-model.h" -#include "../src/llama-quant.h" -#include "ggml-cpp.h" -#include "ggml.h" -#include "gguf-model-data.h" #include "llama.h" +#include "../src/llama-ext.h" + +#include "gguf-model-data.h" + #include #include #include @@ -323,13 +321,15 @@ static std::string read_file_contents(const std::string & path) { // --------------------------------------------------------------------------- // Returns {tensor_name, assigned_type} for each tensor, in order. +// TODO: should likely be moved as a member function of llama_quant and expose through the `llama-ext.h` interface static std::vector> compute_quant_types(llama_model & mdl, const std::vector & tensors, llama_ftype ftype) { llama_model_quantize_params qparams = llama_model_quantize_default_params(); qparams.ftype = ftype; - quantize_state_impl qs(mdl, &qparams); + // TODO: call llama_quant_init(...) + llama_quant qs(mdl, &qparams); std::vector metadata(tensors.size()); for (size_t i = 0; i < tensors.size(); ++i) { From 0b3cc323d97bd2b240203952629eba3d522cde30 Mon Sep 17 00:00:00 2001 From: Colin Kealty <3266127+bartowski1182@users.noreply.github.com> Date: Mon, 16 Mar 2026 15:06:46 -0400 Subject: [PATCH 19/21] Cleanup based on review comments --- src/llama-ext.h | 64 +++++---- src/llama-quant.cpp | 126 ++++++++++++++++- src/llama-quant.h | 39 ------ tests/test-quant-type-selection.cpp | 204 ++++++++++------------------ 4 files changed, 232 insertions(+), 201 deletions(-) diff --git a/src/llama-ext.h b/src/llama-ext.h index a29dcf1536..7f632ab211 100644 --- a/src/llama-ext.h +++ b/src/llama-ext.h @@ -2,13 +2,7 @@ #include "llama.h" -// TODO: try to remove this headers -#include "llama-arch.h" -#include "llama-model.h" -#include "llama-quant.h" - #include -#include // Reserve a new compute graph. It is valid until the next call to llama_graph_reserve. LLAMA_API struct ggml_cgraph * llama_graph_reserve( @@ -17,28 +11,46 @@ LLAMA_API struct ggml_cgraph * llama_graph_reserve( uint32_t n_seqs, uint32_t n_outputs); +// Get the default ggml_type for a given ftype. LLAMA_API ggml_type llama_ftype_get_default_type(llama_ftype ftype); -// TODO: use llama_quant_ prefix to name these consistently: +// Quantization state. +struct llama_quant; + +LLAMA_API llama_quant * llama_quant_init( + const llama_model * model, + const llama_model_quantize_params * params); + +LLAMA_API void llama_quant_free(llama_quant * qnt); + +// Descriptor for constructing a mock model for quantization testing. +struct llama_quant_model_desc { + const char * architecture; + uint32_t n_embd; + uint32_t n_ff; + uint32_t n_layer; + uint32_t n_head; + uint32_t n_head_kv; + uint32_t n_expert; + uint32_t n_embd_head_k; + uint32_t n_embd_head_v; +}; + +// Create a mock model from a metadata descriptor (for testing). +// The returned model must be freed with llama_model_free(). +LLAMA_API llama_model * llama_quant_model_from_metadata(const llama_quant_model_desc * desc); // Returns true if this tensor should be quantized (based on name, dims, params). -LLAMA_API bool tensor_allows_quantization(const llama_model_quantize_params * params, llm_arch arch, const ggml_tensor * tensor); +LLAMA_API bool llama_quant_tensor_allows_quantization( + const llama_quant * qnt, + const ggml_tensor * tensor); -// TODO: add: -// LLAMA_API llama_quant * llama_quant_init(...); -// LLAMA_API void llama_quant_free(llama_quant * qnt); - -// TODO: become member function of llama_quant -LLAMA_API ggml_type llama_tensor_get_type( - llama_quant & qs, - const llama_model_quantize_params * params, - const ggml_tensor * tensor, - ggml_type default_type, - const tensor_metadata & tm); - -// Initialize llama_quant counters and populate tensor_metadata categories. -// metadata: vector with name fields already set, will have category field populated. -// TODO: become member function of llama_quant -LLAMA_API void init_quantize_state_counters( - llama_quant & qs, - std::vector & metadata); +// Compute quantization type assignments for a list of tensors. +// All tensors should be quantizable (use llama_quant_tensor_allows_quantization to filter). +// result_types: caller-allocated array of n_tensors elements, filled with assigned types. +LLAMA_API void llama_quant_compute_types( + llama_quant * qnt, + llama_ftype ftype, + ggml_tensor ** tensors, + ggml_type * result_types, + size_t n_tensors); diff --git a/src/llama-quant.cpp b/src/llama-quant.cpp index d1a9d6d15c..8cb1298167 100644 --- a/src/llama-quant.cpp +++ b/src/llama-quant.cpp @@ -14,6 +14,39 @@ #include #include +// tensor categorization - used to avoid repeated string matching in quantization logic. +// this is different from LLM_TN - we want broad categories, not specific tensor names per arch. +enum class tensor_category { + TOKEN_EMBD, + ATTENTION_Q, + ATTENTION_V, + ATTENTION_K, + ATTENTION_QKV, + ATTENTION_KV_B, + ATTENTION_OUTPUT, + FFN_UP, + FFN_GATE, + FFN_DOWN, + OUTPUT, + OTHER +}; + +// per-tensor metadata, computed in the preliminary loop and used in the main loop +struct tensor_metadata { + std::string name; + ggml_type target_type; + tensor_category category; + std::string remapped_imatrix_name; + bool allows_quantization; + bool requires_imatrix; +}; + +// result of parsing --tensor-type option +// (changes to this struct must be reflected in tools/quantize/quantize.cpp) +struct tensor_type_option { + std::string name; + ggml_type type = GGML_TYPE_COUNT; +}; static void zeros(std::ofstream & file, size_t n) { char zero = 0; @@ -235,7 +268,7 @@ static void llama_tensor_dequantize_impl( // do we allow this tensor to be quantized? // -bool tensor_allows_quantization(const llama_model_quantize_params * params, llm_arch arch, const ggml_tensor * tensor) { +static bool tensor_allows_quantization(const llama_model_quantize_params * params, llm_arch arch, const ggml_tensor * tensor) { // trivial checks first -- no string ops needed if (params->only_copy) return false; @@ -602,7 +635,7 @@ static ggml_type llama_tensor_get_type_impl(llama_quant & qs, ggml_type new_type } // outer wrapper: determine the ggml_type that this tensor should be quantized to -ggml_type llama_tensor_get_type(llama_quant & qs, const llama_model_quantize_params * params, const ggml_tensor * tensor, ggml_type default_type, const tensor_metadata & tm) { +static ggml_type llama_tensor_get_type(llama_quant & qs, const llama_model_quantize_params * params, const ggml_tensor * tensor, ggml_type default_type, const tensor_metadata & tm) { if (!tensor_allows_quantization(params, qs.model.arch, tensor)) { return tensor->type; } @@ -777,7 +810,7 @@ ggml_type llama_ftype_get_default_type(llama_ftype ftype) { } -void init_quantize_state_counters(llama_quant & qs, std::vector & metadata) { +static void init_quantize_state_counters(llama_quant & qs, std::vector & metadata) { for (auto & tm : metadata) { tensor_category cat = tensor_get_category(tm.name); tm.category = cat; @@ -1258,3 +1291,90 @@ uint32_t llama_model_quantize( return 0; } + +// +// Helper functions for external tools exposed in llama-ext.h +// + +llama_quant * llama_quant_init( + const llama_model * model, + const llama_model_quantize_params * params) { + return new llama_quant(*model, params); +} + +void llama_quant_free(llama_quant * qnt) { + delete qnt; +} + +llama_model * llama_quant_model_from_metadata(const llama_quant_model_desc * desc) { + struct llama_model_params mparams = llama_model_default_params(); + auto * model = new llama_model(mparams); + + model->arch = llm_arch_from_string(desc->architecture); + + // infer llm_type: only LLM_TYPE_70B matters for quantization logic + if (model->arch == LLM_ARCH_LLAMA && desc->n_layer == 80 && desc->n_head != desc->n_head_kv) { + model->type = LLM_TYPE_70B; + } + + model->hparams.n_embd = desc->n_embd; + model->hparams.n_embd_head_k_full = desc->n_embd_head_k; + model->hparams.n_embd_head_v_full = desc->n_embd_head_v; + model->hparams.n_layer = desc->n_layer; + model->hparams.n_expert = desc->n_expert; + + for (uint32_t i = 0; i < desc->n_layer; i++) { + model->hparams.n_head_arr[i] = desc->n_head; + model->hparams.n_head_kv_arr[i] = desc->n_head_kv; + model->hparams.n_ff_arr[i] = desc->n_ff; + } + + return model; +} + +bool llama_quant_tensor_allows_quantization( + const llama_quant * qnt, + const ggml_tensor * tensor) { + return tensor_allows_quantization(qnt->params, qnt->model.arch, tensor); +} + +void llama_quant_compute_types( + llama_quant * qnt, + llama_ftype ftype, + ggml_tensor ** tensors, + ggml_type * result_types, + size_t n_tensors) { + // reset per-computation state + qnt->n_attention_wv = 0; + qnt->n_ffn_down = 0; + qnt->n_ffn_gate = 0; + qnt->n_ffn_up = 0; + qnt->i_attention_wv = 0; + qnt->i_ffn_down = 0; + qnt->i_ffn_gate = 0; + qnt->i_ffn_up = 0; + qnt->n_k_quantized = 0; + qnt->n_fallback = 0; + qnt->has_imatrix = false; + qnt->has_tied_embeddings = true; + + // build metadata from tensor names + std::vector metadata(n_tensors); + for (size_t i = 0; i < n_tensors; i++) { + metadata[i].name = ggml_get_name(tensors[i]); + } + + // initialize counters and categories + init_quantize_state_counters(*qnt, metadata); + + // use a local copy of params with the requested ftype + llama_model_quantize_params local_params = *qnt->params; + local_params.ftype = ftype; + + ggml_type default_type = llama_ftype_get_default_type(ftype); + + // compute types + for (size_t i = 0; i < n_tensors; i++) { + result_types[i] = llama_tensor_get_type(*qnt, &local_params, tensors[i], default_type, metadata[i]); + } +} diff --git a/src/llama-quant.h b/src/llama-quant.h index 4a7800f98d..8aff106ddf 100644 --- a/src/llama-quant.h +++ b/src/llama-quant.h @@ -3,47 +3,8 @@ #include "llama.h" #include -#include struct llama_model; - -// TODO: use llama_quant_ prefix to name these consistently: - -// tensor categorization - used to avoid repeated string matching in quantization logic. -// this is different from LLM_TN - we want broad categories, not specific tensor names per arch. -enum class tensor_category { - TOKEN_EMBD, - ATTENTION_Q, - ATTENTION_V, - ATTENTION_K, - ATTENTION_QKV, - ATTENTION_KV_B, - ATTENTION_OUTPUT, - FFN_UP, - FFN_GATE, - FFN_DOWN, - OUTPUT, - OTHER -}; - -// per-tensor metadata, computed in the preliminary loop and used in the main loop -// TODO: probably should belong to llama_quant -struct tensor_metadata { - std::string name; - ggml_type target_type; - tensor_category category; - std::string remapped_imatrix_name; - bool allows_quantization; - bool requires_imatrix; -}; - -// result of parsing --tensor-type option -// (changes to this struct must be reflected in tools/quantize/quantize.cpp) -struct tensor_type_option { - std::string name; - ggml_type type = GGML_TYPE_COUNT; -}; - struct compiled_tensor_type_patterns; struct llama_quant { diff --git a/tests/test-quant-type-selection.cpp b/tests/test-quant-type-selection.cpp index 7fbc6ce611..aa0f14bf9f 100644 --- a/tests/test-quant-type-selection.cpp +++ b/tests/test-quant-type-selection.cpp @@ -2,13 +2,13 @@ #include "../src/llama-ext.h" +#include "ggml-cpp.h" #include "gguf-model-data.h" #include #include #include #include -#include #include #include #include @@ -78,37 +78,9 @@ static const char * llama_ftype_to_name(llama_ftype ftype) { } // --------------------------------------------------------------------------- -// Mock tensor construction - may be better to extract this in the future +// ggml_type name lookup // --------------------------------------------------------------------------- -struct mock_tensor { - ggml_context_ptr ctx; - ggml_tensor * tensor; -}; - -static mock_tensor make_mock_tensor(const std::string & name, - int64_t ne0, - int64_t ne1, - int64_t ne2 = 1, - int64_t ne3 = 1) { - struct ggml_init_params params = { - /*.mem_size =*/ 2 * ggml_tensor_overhead(), - /*.mem_buffer =*/ nullptr, - /*.no_alloc =*/ true, - }; - ggml_context_ptr ctx(ggml_init(params)); - ggml_tensor * t; - if (ne3 > 1) { - t = ggml_new_tensor_4d(ctx.get(), GGML_TYPE_F32, ne0, ne1, ne2, ne3); - } else if (ne2 > 1) { - t = ggml_new_tensor_3d(ctx.get(), GGML_TYPE_F32, ne0, ne1, ne2); - } else { - t = ggml_new_tensor_2d(ctx.get(), GGML_TYPE_F32, ne0, ne1); - } - ggml_set_name(t, name.c_str()); - return { std::move(ctx), t }; -} - static ggml_type ggml_type_from_name(const std::string & name) { for (int i = 0; i < GGML_TYPE_COUNT; i++) { const char * tname = ggml_type_name((ggml_type) i); @@ -260,94 +232,44 @@ static const remote_model_spec model_specs[] = { static const int n_model_specs = (int) (sizeof(model_specs) / sizeof(model_specs[0])); -// Determine llm_type from metadata. -// Only LLM_TYPE_70B matters -> probably can/should be dropped in the future -static llm_type infer_llm_type(llm_arch arch, const gguf_remote_model & remote) { - if (arch == LLM_ARCH_LLAMA && remote.n_layer == 80 && remote.n_head != remote.n_head_kv) { - return LLM_TYPE_70B; - } - return LLM_TYPE_UNKNOWN; +static llama_model * build_mock_model_from_remote(const gguf_remote_model & remote) { + llama_quant_model_desc desc = {}; + desc.architecture = remote.architecture.c_str(); + desc.n_embd = remote.n_embd; + desc.n_ff = remote.n_ff; + desc.n_layer = remote.n_layer; + desc.n_head = remote.n_head; + desc.n_head_kv = remote.n_head_kv; + desc.n_expert = remote.n_expert; + desc.n_embd_head_k = remote.n_embd_head_k; + desc.n_embd_head_v = remote.n_embd_head_v; + return llama_quant_model_from_metadata(&desc); } -static std::unique_ptr build_mock_model_from_remote(const gguf_remote_model & remote) { - struct llama_model_params mparams = llama_model_default_params(); - auto model = std::make_unique(mparams); +// Single ggml context holding all quantizable tensors for a model. +struct mock_tensors { + ggml_context_ptr ctx; + std::vector tensors; +}; - model->arch = llm_arch_from_string(remote.architecture); - model->type = infer_llm_type(model->arch, remote); +static mock_tensors build_mock_tensors(const llama_quant * qnt, + const gguf_remote_model & remote) { + const size_t ctx_size = remote.tensors.size() * ggml_tensor_overhead(); + struct ggml_init_params params = { ctx_size, nullptr, true }; + ggml_context_ptr ctx(ggml_init(params)); - model->hparams.n_embd = remote.n_embd; - model->hparams.n_embd_head_k_full = remote.n_embd_head_k; - model->hparams.n_embd_head_v_full = remote.n_embd_head_v; - model->hparams.n_layer = remote.n_layer; - model->hparams.n_expert = remote.n_expert; - - for (uint32_t i = 0; i < remote.n_layer; i++) { - model->hparams.n_head_arr[i] = remote.n_head; - model->hparams.n_head_kv_arr[i] = remote.n_head_kv; - model->hparams.n_ff_arr[i] = remote.n_ff; - } - - return model; -} - -static std::vector build_mock_tensors(const gguf_remote_model & remote, - llm_arch arch, - const llama_model_quantize_params & qparams) { - std::vector result; + std::vector result; for (const auto & t : remote.tensors) { - auto mt = make_mock_tensor(t.name, t.ne[0], t.ne[1], t.ne[2], t.ne[3]); - if (tensor_allows_quantization(&qparams, arch, mt.tensor)) { - result.push_back(std::move(mt)); + ggml_tensor * gt = ggml_new_tensor_4d(ctx.get(), GGML_TYPE_F32, + t.ne[0], t.ne[1], t.ne[2], t.ne[3]); + ggml_set_name(gt, t.name.c_str()); + if (llama_quant_tensor_allows_quantization(qnt, gt)) { + result.push_back(gt); } } - return result; -} - -static std::string read_file_contents(const std::string & path) { - std::ifstream f(path); - if (!f.good()) { - return ""; - } - std::ostringstream ss; - ss << f.rdbuf(); - return ss.str(); -} - -// --------------------------------------------------------------------------- -// Compute quantization type assignments per target ftype -// --------------------------------------------------------------------------- - -// Returns {tensor_name, assigned_type} for each tensor, in order. -// TODO: should likely be moved as a member function of llama_quant and expose through the `llama-ext.h` interface -static std::vector> compute_quant_types(llama_model & mdl, - const std::vector & tensors, - llama_ftype ftype) { - llama_model_quantize_params qparams = llama_model_quantize_default_params(); - qparams.ftype = ftype; - - // TODO: call llama_quant_init(...) - llama_quant qs(mdl, &qparams); - - std::vector metadata(tensors.size()); - for (size_t i = 0; i < tensors.size(); ++i) { - metadata[i].name = tensors[i].tensor->name; - } - init_quantize_state_counters(qs, metadata); - - ggml_type default_type = llama_ftype_get_default_type(ftype); - - std::vector> result; - result.reserve(tensors.size()); - - for (size_t i = 0; i < tensors.size(); ++i) { - ggml_type got = llama_tensor_get_type(qs, &qparams, tensors[i].tensor, default_type, metadata[i]); - result.push_back({ metadata[i].name, got }); - } - - return result; + return { std::move(ctx), std::move(result) }; } // --------------------------------------------------------------------------- @@ -355,10 +277,10 @@ static std::vector> compute_quant_types(llama_ // Use this when either adding new models or modifying quants // --------------------------------------------------------------------------- -static std::string generate_snapshot(const std::string & name, - const gguf_remote_model & remote, - llama_model & mdl, - const std::vector & tensors) { +static std::string generate_snapshot(const std::string & name, + const gguf_remote_model & remote, + llama_quant * qnt, + mock_tensors & mt) { std::ostringstream out; out << "# Model: " << name << "\n"; @@ -380,12 +302,13 @@ static std::string generate_snapshot(const std::string & name, continue; } - auto types = compute_quant_types(mdl, tensors, ft); + std::vector result_types(mt.tensors.size()); + llama_quant_compute_types(qnt, ft, mt.tensors.data(), result_types.data(), mt.tensors.size()); out << "\n[" << fname << "] " << ggml_type_name(default_type) << "\n"; - for (const auto & [name, type] : types) { - if (type != default_type) { - out << name << " " << ggml_type_name(type) << "\n"; + for (size_t j = 0; j < mt.tensors.size(); j++) { + if (result_types[j] != default_type) { + out << ggml_get_name(mt.tensors[j]) << " " << ggml_type_name(result_types[j]) << "\n"; } } } @@ -418,21 +341,26 @@ static int run_generate(const std::string & snapshot_dir) { } const auto & remote = result.value(); - auto model = build_mock_model_from_remote(remote); + llama_model * model = build_mock_model_from_remote(remote); llama_model_quantize_params qparams = llama_model_quantize_default_params(); - auto tensors = build_mock_tensors(remote, model->arch, qparams); + llama_quant * qnt = llama_quant_init(model, &qparams); + auto mt = build_mock_tensors(qnt, remote); - std::string content = generate_snapshot(name, remote, *model, tensors); + std::string content = generate_snapshot(name, remote, qnt, mt); std::string path = snapshot_dir + "/" + snapshot_file_from_name(name) + ".schema"; std::ofstream f(path); if (!f.good()) { fprintf(stderr, "ERROR: could not write %s\n", path.c_str()); + llama_quant_free(qnt); + llama_model_free(model); return 1; } f << content; n_written++; fprintf(stderr, " wrote %s\n", path.c_str()); + llama_quant_free(qnt); + llama_model_free(model); } fprintf(stderr, "%d files written\n", n_written); @@ -443,9 +371,9 @@ static int run_generate(const std::string & snapshot_dir) { // Test mode: compare against snapshot files // --------------------------------------------------------------------------- -static bool run_test_section(llama_model & mdl, - const std::vector & tensors, - const snapshot_section & section) { +static bool run_test_section(llama_quant * qnt, + mock_tensors & mt, + const snapshot_section & section) { // verify default_type matches what llama_ftype_get_default_type returns ggml_type computed_default = llama_ftype_get_default_type(section.ftype); if (computed_default != section.default_type) { @@ -454,14 +382,18 @@ static bool run_test_section(llama_model & mdl, return false; } - auto types = compute_quant_types(mdl, tensors, section.ftype); + std::vector result_types(mt.tensors.size()); + llama_quant_compute_types(qnt, section.ftype, mt.tensors.data(), result_types.data(), mt.tensors.size()); std::map override_map(section.overrides.begin(), section.overrides.end()); bool all_pass = true; int n_override_found = 0; - for (const auto & [name, got] : types) { + for (size_t i = 0; i < mt.tensors.size(); i++) { + const char * name = ggml_get_name(mt.tensors[i]); + ggml_type got = result_types[i]; + ggml_type expected = section.default_type; auto it = override_map.find(name); if (it != override_map.end()) { @@ -470,7 +402,7 @@ static bool run_test_section(llama_model & mdl, } if (got != expected) { - printf(" FAIL %-50s %-10s expected %s, got %s\n", name.c_str(), llama_ftype_to_name(section.ftype), ggml_type_name(expected), ggml_type_name(got)); + printf(" FAIL %-50s %-10s expected %s, got %s\n", name, llama_ftype_to_name(section.ftype), ggml_type_name(expected), ggml_type_name(got)); all_pass = false; } } @@ -502,14 +434,17 @@ static int run_remote_tests(const std::string & snapshot_dir, const char * argv0 } const auto & remote = result.value(); - auto model = build_mock_model_from_remote(remote); + llama_model * model = build_mock_model_from_remote(remote); llama_model_quantize_params qparams = llama_model_quantize_default_params(); - auto tensors = build_mock_tensors(remote, model->arch, qparams); + llama_quant * qnt = llama_quant_init(model, &qparams); + auto mt = build_mock_tensors(qnt, remote); std::string snapshot_path = snapshot_dir + "/" + snapshot_file_from_name(name) + ".schema"; std::vector sections; if (!parse_snapshot_file(snapshot_path, sections)) { printf(" SKIP (could not read snapshot file: %s)\n\n", snapshot_path.c_str()); + llama_quant_free(qnt); + llama_model_free(model); total_skip++; continue; } @@ -518,7 +453,7 @@ static int run_remote_tests(const std::string & snapshot_dir, const char * argv0 int model_fail = 0; for (const auto & section : sections) { - bool pass = run_test_section(*model, tensors, section); + bool pass = run_test_section(qnt, mt, section); if (pass) { model_pass++; } else { @@ -527,7 +462,7 @@ static int run_remote_tests(const std::string & snapshot_dir, const char * argv0 } printf(" %s %s: %d/%d ftype sections passed (%d tensors)\n", model_fail == 0 ? "PASS" : "FAIL", name.c_str(), - model_pass, model_pass + model_fail, (int) tensors.size()); + model_pass, model_pass + model_fail, (int) mt.tensors.size()); printf("\n"); if (model_fail == 0) { @@ -535,6 +470,9 @@ static int run_remote_tests(const std::string & snapshot_dir, const char * argv0 } else { total_fail++; } + + llama_quant_free(qnt); + llama_model_free(model); } printf("%d/%d models passed", total_pass, total_pass + total_fail); @@ -556,10 +494,10 @@ int main(int argc, char ** argv) { bool generate = false; for (int i = 1; i < argc; i++) { - if (strcmp(argv[i], "--snapshot-dir") == 0 && i + 1 < argc) { - snapshot_dir = argv[++i]; - } else if (strcmp(argv[i], "--generate") == 0) { + if (strcmp(argv[i], "--generate") == 0) { generate = true; + } else if (strcmp(argv[i], "--snapshot-dir") == 0 && i + 1 < argc) { + snapshot_dir = argv[++i]; } } From b85a7c8c6c5365fe869da4bbdb07e06106579b21 Mon Sep 17 00:00:00 2001 From: Colin Kealty <3266127+bartowski1182@users.noreply.github.com> Date: Tue, 17 Mar 2026 12:47:12 -0400 Subject: [PATCH 20/21] Continue cleanup --- src/llama-ext.h | 10 +- src/llama-quant.cpp | 138 ++++++++++++++++------------ src/llama-quant.h | 35 ------- tests/test-quant-type-selection.cpp | 34 +++---- 4 files changed, 101 insertions(+), 116 deletions(-) diff --git a/src/llama-ext.h b/src/llama-ext.h index 7f632ab211..2ffb77934e 100644 --- a/src/llama-ext.h +++ b/src/llama-ext.h @@ -15,13 +15,13 @@ LLAMA_API struct ggml_cgraph * llama_graph_reserve( LLAMA_API ggml_type llama_ftype_get_default_type(llama_ftype ftype); // Quantization state. -struct llama_quant; +struct quantize_state_impl; -LLAMA_API llama_quant * llama_quant_init( +LLAMA_API quantize_state_impl * llama_quant_init( const llama_model * model, const llama_model_quantize_params * params); -LLAMA_API void llama_quant_free(llama_quant * qnt); +LLAMA_API void llama_quant_free(quantize_state_impl * qs); // Descriptor for constructing a mock model for quantization testing. struct llama_quant_model_desc { @@ -42,14 +42,14 @@ LLAMA_API llama_model * llama_quant_model_from_metadata(const llama_quant_model_ // Returns true if this tensor should be quantized (based on name, dims, params). LLAMA_API bool llama_quant_tensor_allows_quantization( - const llama_quant * qnt, + const quantize_state_impl * qs, const ggml_tensor * tensor); // Compute quantization type assignments for a list of tensors. // All tensors should be quantizable (use llama_quant_tensor_allows_quantization to filter). // result_types: caller-allocated array of n_tensors elements, filled with assigned types. LLAMA_API void llama_quant_compute_types( - llama_quant * qnt, + quantize_state_impl * qs, llama_ftype ftype, ggml_tensor ** tensors, ggml_type * result_types, diff --git a/src/llama-quant.cpp b/src/llama-quant.cpp index 8cb1298167..e98d1b91e5 100644 --- a/src/llama-quant.cpp +++ b/src/llama-quant.cpp @@ -14,6 +14,13 @@ #include #include +// result of parsing --tensor-type option +// (changes to this struct must be reflected in tools/quantize/quantize.cpp) +struct tensor_type_option { + std::string name; + ggml_type type = GGML_TYPE_COUNT; +}; + // tensor categorization - used to avoid repeated string matching in quantization logic. // this is different from LLM_TN - we want broad categories, not specific tensor names per arch. enum class tensor_category { @@ -31,23 +38,6 @@ enum class tensor_category { OTHER }; -// per-tensor metadata, computed in the preliminary loop and used in the main loop -struct tensor_metadata { - std::string name; - ggml_type target_type; - tensor_category category; - std::string remapped_imatrix_name; - bool allows_quantization; - bool requires_imatrix; -}; - -// result of parsing --tensor-type option -// (changes to this struct must be reflected in tools/quantize/quantize.cpp) -struct tensor_type_option { - std::string name; - ggml_type type = GGML_TYPE_COUNT; -}; - static void zeros(std::ofstream & file, size_t n) { char zero = 0; for (size_t i = 0; i < n; ++i) { @@ -168,25 +158,56 @@ static bool category_is_attn_v(tensor_category cat) { cat == tensor_category::ATTENTION_KV_B; } -struct compiled_tensor_type_patterns { - std::vector> patterns; -}; +// +// quantization state +// -llama_quant::llama_quant(const llama_model & model, const llama_model_quantize_params * params) - : model(model), params(params) -{ - if (params->tensor_types) { - const auto & tensor_types = *static_cast *>(params->tensor_types); - if (!tensor_types.empty()) { - tensor_type_patterns = std::make_unique(); +struct quantize_state_impl { + const llama_model & model; + const llama_model_quantize_params * params; + + int n_attention_wv = 0; + int n_ffn_down = 0; + int n_ffn_gate = 0; + int n_ffn_up = 0; + int i_attention_wv = 0; + int i_ffn_down = 0; + int i_ffn_gate = 0; + int i_ffn_up = 0; + + int n_fallback = 0; + + bool has_imatrix = false; + + // used to figure out if a model has tied embeddings (tok_embd shares weights with output) + bool has_tied_embeddings = true; // assume tied until we see output.weight + + // tensor type override patterns (compiled once, used twice) + std::vector> tensor_type_patterns; + + quantize_state_impl(const llama_model & model, const llama_model_quantize_params * params): + model(model), params(params) + { + // compile regex patterns once - they are expensive + if (params->tensor_types) { + const auto & tensor_types = *static_cast *>(params->tensor_types); for (const auto & [tname, qtype] : tensor_types) { - tensor_type_patterns->patterns.emplace_back(std::regex(tname), qtype); + tensor_type_patterns.emplace_back(std::regex(tname), qtype); } } } -} +}; -llama_quant::~llama_quant() = default; + +// per-tensor metadata, computed in the preliminary loop and used in the main loop +struct tensor_metadata { + std::string name; + ggml_type target_type; + tensor_category category; + std::string remapped_imatrix_name; + bool allows_quantization; + bool requires_imatrix; +}; // // dequantization @@ -336,7 +357,7 @@ static bool tensor_allows_quantization(const llama_model_quantize_params * param // // incompatible tensor shapes are handled here - fallback to a compatible type -static ggml_type tensor_type_fallback(llama_quant & qs, const ggml_tensor * t, const ggml_type target_type) { +static ggml_type tensor_type_fallback(quantize_state_impl & qs, const ggml_tensor * t, const ggml_type target_type) { ggml_type return_type = target_type; const int64_t ncols = t->ne[0]; @@ -385,7 +406,7 @@ static ggml_type tensor_type_fallback(llama_quant & qs, const ggml_tensor * t, c } // internal standard logic for selecting the target tensor type based on tensor category, ftype, and model arch -static ggml_type llama_tensor_get_type_impl(llama_quant & qs, ggml_type new_type, const ggml_tensor * tensor, llama_ftype ftype, tensor_category category) { +static ggml_type llama_tensor_get_type_impl(quantize_state_impl & qs, ggml_type new_type, const ggml_tensor * tensor, llama_ftype ftype, tensor_category category) { const std::string name = ggml_get_name(tensor); // TODO: avoid hardcoded tensor names - use the TN_* constants @@ -635,7 +656,7 @@ static ggml_type llama_tensor_get_type_impl(llama_quant & qs, ggml_type new_type } // outer wrapper: determine the ggml_type that this tensor should be quantized to -static ggml_type llama_tensor_get_type(llama_quant & qs, const llama_model_quantize_params * params, const ggml_tensor * tensor, ggml_type default_type, const tensor_metadata & tm) { +static ggml_type llama_tensor_get_type(quantize_state_impl & qs, const llama_model_quantize_params * params, const ggml_tensor * tensor, ggml_type default_type, const tensor_metadata & tm) { if (!tensor_allows_quantization(params, qs.model.arch, tensor)) { return tensor->type; } @@ -652,9 +673,9 @@ static ggml_type llama_tensor_get_type(llama_quant & qs, const llama_model_quant if (!params->pure && ggml_is_quantized(default_type)) { // if the user provided tensor types - use those bool manual = false; - if (qs.tensor_type_patterns) { + if (!qs.tensor_type_patterns.empty()) { const std::string tensor_name(tensor->name); - for (const auto & [pattern, qtype] : qs.tensor_type_patterns->patterns) { + for (const auto & [pattern, qtype] : qs.tensor_type_patterns) { if (std::regex_search(tensor_name, pattern)) { if (qtype != new_type) { LLAMA_LOG_WARN("%s: %-36s - applying manual override: %s -> %s\n", @@ -810,7 +831,7 @@ ggml_type llama_ftype_get_default_type(llama_ftype ftype) { } -static void init_quantize_state_counters(llama_quant & qs, std::vector & metadata) { +static void init_quantize_state_counters(quantize_state_impl & qs, std::vector & metadata) { for (auto & tm : metadata) { tensor_category cat = tensor_get_category(tm.name); tm.category = cat; @@ -869,7 +890,7 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std:: model.load_hparams(ml); model.load_stats (ml); - llama_quant qs(model, params); + quantize_state_impl qs(model, params); if (params->only_copy) { ftype = ml.ftype; @@ -1296,14 +1317,14 @@ uint32_t llama_model_quantize( // Helper functions for external tools exposed in llama-ext.h // -llama_quant * llama_quant_init( +quantize_state_impl * llama_quant_init( const llama_model * model, const llama_model_quantize_params * params) { - return new llama_quant(*model, params); + return new quantize_state_impl(*model, params); } -void llama_quant_free(llama_quant * qnt) { - delete qnt; +void llama_quant_free(quantize_state_impl * qs) { + delete qs; } llama_model * llama_quant_model_from_metadata(const llama_quant_model_desc * desc) { @@ -1333,30 +1354,29 @@ llama_model * llama_quant_model_from_metadata(const llama_quant_model_desc * des } bool llama_quant_tensor_allows_quantization( - const llama_quant * qnt, + const quantize_state_impl * qs, const ggml_tensor * tensor) { - return tensor_allows_quantization(qnt->params, qnt->model.arch, tensor); + return tensor_allows_quantization(qs->params, qs->model.arch, tensor); } void llama_quant_compute_types( - llama_quant * qnt, + quantize_state_impl * qs, llama_ftype ftype, ggml_tensor ** tensors, ggml_type * result_types, size_t n_tensors) { // reset per-computation state - qnt->n_attention_wv = 0; - qnt->n_ffn_down = 0; - qnt->n_ffn_gate = 0; - qnt->n_ffn_up = 0; - qnt->i_attention_wv = 0; - qnt->i_ffn_down = 0; - qnt->i_ffn_gate = 0; - qnt->i_ffn_up = 0; - qnt->n_k_quantized = 0; - qnt->n_fallback = 0; - qnt->has_imatrix = false; - qnt->has_tied_embeddings = true; + qs->n_attention_wv = 0; + qs->n_ffn_down = 0; + qs->n_ffn_gate = 0; + qs->n_ffn_up = 0; + qs->i_attention_wv = 0; + qs->i_ffn_down = 0; + qs->i_ffn_gate = 0; + qs->i_ffn_up = 0; + qs->n_fallback = 0; + qs->has_imatrix = false; + qs->has_tied_embeddings = true; // build metadata from tensor names std::vector metadata(n_tensors); @@ -1365,16 +1385,16 @@ void llama_quant_compute_types( } // initialize counters and categories - init_quantize_state_counters(*qnt, metadata); + init_quantize_state_counters(*qs, metadata); // use a local copy of params with the requested ftype - llama_model_quantize_params local_params = *qnt->params; + llama_model_quantize_params local_params = *qs->params; local_params.ftype = ftype; ggml_type default_type = llama_ftype_get_default_type(ftype); // compute types for (size_t i = 0; i < n_tensors; i++) { - result_types[i] = llama_tensor_get_type(*qnt, &local_params, tensors[i], default_type, metadata[i]); + result_types[i] = llama_tensor_get_type(*qs, &local_params, tensors[i], default_type, metadata[i]); } } diff --git a/src/llama-quant.h b/src/llama-quant.h index 8aff106ddf..6f70f09bee 100644 --- a/src/llama-quant.h +++ b/src/llama-quant.h @@ -1,36 +1 @@ #pragma once - -#include "llama.h" - -#include - -struct llama_model; -struct compiled_tensor_type_patterns; - -struct llama_quant { - const llama_model & model; - const llama_model_quantize_params * params; - - int n_attention_wv = 0; - int n_ffn_down = 0; - int n_ffn_gate = 0; - int n_ffn_up = 0; - int i_attention_wv = 0; - int i_ffn_down = 0; - int i_ffn_gate = 0; - int i_ffn_up = 0; - - int n_k_quantized = 0; - int n_fallback = 0; - - bool has_imatrix = false; - - // used to figure out if a model has tied embeddings (tok_embd shares weights with output) - bool has_tied_embeddings = true; // assume tied until we see output.weight - - // tensor type override patterns (compiled once, used in llama_tensor_get_type) - std::unique_ptr tensor_type_patterns; - - llama_quant(const llama_model & model, const llama_model_quantize_params * params); - ~llama_quant(); -}; diff --git a/tests/test-quant-type-selection.cpp b/tests/test-quant-type-selection.cpp index aa0f14bf9f..ccecbed5c6 100644 --- a/tests/test-quant-type-selection.cpp +++ b/tests/test-quant-type-selection.cpp @@ -252,8 +252,8 @@ struct mock_tensors { std::vector tensors; }; -static mock_tensors build_mock_tensors(const llama_quant * qnt, - const gguf_remote_model & remote) { +static mock_tensors build_mock_tensors(const quantize_state_impl * qs, + const gguf_remote_model & remote) { const size_t ctx_size = remote.tensors.size() * ggml_tensor_overhead(); struct ggml_init_params params = { ctx_size, nullptr, true }; ggml_context_ptr ctx(ggml_init(params)); @@ -264,7 +264,7 @@ static mock_tensors build_mock_tensors(const llama_quant * qnt, ggml_tensor * gt = ggml_new_tensor_4d(ctx.get(), GGML_TYPE_F32, t.ne[0], t.ne[1], t.ne[2], t.ne[3]); ggml_set_name(gt, t.name.c_str()); - if (llama_quant_tensor_allows_quantization(qnt, gt)) { + if (llama_quant_tensor_allows_quantization(qs, gt)) { result.push_back(gt); } } @@ -279,7 +279,7 @@ static mock_tensors build_mock_tensors(const llama_quant * qnt, static std::string generate_snapshot(const std::string & name, const gguf_remote_model & remote, - llama_quant * qnt, + quantize_state_impl * qs, mock_tensors & mt) { std::ostringstream out; @@ -303,7 +303,7 @@ static std::string generate_snapshot(const std::string & name, } std::vector result_types(mt.tensors.size()); - llama_quant_compute_types(qnt, ft, mt.tensors.data(), result_types.data(), mt.tensors.size()); + llama_quant_compute_types(qs, ft, mt.tensors.data(), result_types.data(), mt.tensors.size()); out << "\n[" << fname << "] " << ggml_type_name(default_type) << "\n"; for (size_t j = 0; j < mt.tensors.size(); j++) { @@ -343,23 +343,23 @@ static int run_generate(const std::string & snapshot_dir) { const auto & remote = result.value(); llama_model * model = build_mock_model_from_remote(remote); llama_model_quantize_params qparams = llama_model_quantize_default_params(); - llama_quant * qnt = llama_quant_init(model, &qparams); - auto mt = build_mock_tensors(qnt, remote); + quantize_state_impl * qs = llama_quant_init(model, &qparams); + auto mt = build_mock_tensors(qs, remote); - std::string content = generate_snapshot(name, remote, qnt, mt); + std::string content = generate_snapshot(name, remote, qs, mt); std::string path = snapshot_dir + "/" + snapshot_file_from_name(name) + ".schema"; std::ofstream f(path); if (!f.good()) { fprintf(stderr, "ERROR: could not write %s\n", path.c_str()); - llama_quant_free(qnt); + llama_quant_free(qs); llama_model_free(model); return 1; } f << content; n_written++; fprintf(stderr, " wrote %s\n", path.c_str()); - llama_quant_free(qnt); + llama_quant_free(qs); llama_model_free(model); } @@ -371,7 +371,7 @@ static int run_generate(const std::string & snapshot_dir) { // Test mode: compare against snapshot files // --------------------------------------------------------------------------- -static bool run_test_section(llama_quant * qnt, +static bool run_test_section(quantize_state_impl * qs, mock_tensors & mt, const snapshot_section & section) { // verify default_type matches what llama_ftype_get_default_type returns @@ -383,7 +383,7 @@ static bool run_test_section(llama_quant * qnt, } std::vector result_types(mt.tensors.size()); - llama_quant_compute_types(qnt, section.ftype, mt.tensors.data(), result_types.data(), mt.tensors.size()); + llama_quant_compute_types(qs, section.ftype, mt.tensors.data(), result_types.data(), mt.tensors.size()); std::map override_map(section.overrides.begin(), section.overrides.end()); @@ -436,14 +436,14 @@ static int run_remote_tests(const std::string & snapshot_dir, const char * argv0 const auto & remote = result.value(); llama_model * model = build_mock_model_from_remote(remote); llama_model_quantize_params qparams = llama_model_quantize_default_params(); - llama_quant * qnt = llama_quant_init(model, &qparams); - auto mt = build_mock_tensors(qnt, remote); + quantize_state_impl * qs = llama_quant_init(model, &qparams); + auto mt = build_mock_tensors(qs, remote); std::string snapshot_path = snapshot_dir + "/" + snapshot_file_from_name(name) + ".schema"; std::vector sections; if (!parse_snapshot_file(snapshot_path, sections)) { printf(" SKIP (could not read snapshot file: %s)\n\n", snapshot_path.c_str()); - llama_quant_free(qnt); + llama_quant_free(qs); llama_model_free(model); total_skip++; continue; @@ -453,7 +453,7 @@ static int run_remote_tests(const std::string & snapshot_dir, const char * argv0 int model_fail = 0; for (const auto & section : sections) { - bool pass = run_test_section(qnt, mt, section); + bool pass = run_test_section(qs, mt, section); if (pass) { model_pass++; } else { @@ -471,7 +471,7 @@ static int run_remote_tests(const std::string & snapshot_dir, const char * argv0 total_fail++; } - llama_quant_free(qnt); + llama_quant_free(qs); llama_model_free(model); } From c7aa761b2d3474221222887b5387302f7803044f Mon Sep 17 00:00:00 2001 From: Colin Kealty <3266127+bartowski1182@users.noreply.github.com> Date: Tue, 17 Mar 2026 13:03:44 -0400 Subject: [PATCH 21/21] Small cleanup --- src/llama-quant.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/llama-quant.cpp b/src/llama-quant.cpp index e98d1b91e5..34f1bece09 100644 --- a/src/llama-quant.cpp +++ b/src/llama-quant.cpp @@ -1,4 +1,3 @@ -#include "llama-quant.h" #include "llama-impl.h" #include "llama-model.h" #include "llama-model-loader.h" @@ -18,7 +17,7 @@ // (changes to this struct must be reflected in tools/quantize/quantize.cpp) struct tensor_type_option { std::string name; - ggml_type type = GGML_TYPE_COUNT; + ggml_type type = GGML_TYPE_COUNT; }; // tensor categorization - used to avoid repeated string matching in quantization logic. @@ -198,7 +197,6 @@ struct quantize_state_impl { } }; - // per-tensor metadata, computed in the preliminary loop and used in the main loop struct tensor_metadata { std::string name;