From c93131cef6dbb4e415fd2b3625f644c6714e7465 Mon Sep 17 00:00:00 2001 From: Ed Addario Date: Fri, 10 Oct 2025 13:26:51 +0100 Subject: [PATCH] Remove --no-bias option --- include/llama.h | 1 - src/llama-quant.cpp | 3 +-- tools/quantize/quantize.cpp | 6 +----- 3 files changed, 2 insertions(+), 8 deletions(-) diff --git a/include/llama.h b/include/llama.h index 16f6124727..1df8f96920 100644 --- a/include/llama.h +++ b/include/llama.h @@ -365,7 +365,6 @@ extern "C" { void * tensor_types; // pointer to vector containing tensor types void * prune_layers; // pointer to vector containing layer indices to prune float target_bpw; // target bits per weight (bpw) - bool no_bias; // use mean square error estimation only (no aligment bias) } llama_model_quantize_params; typedef struct llama_logit_bias { diff --git a/src/llama-quant.cpp b/src/llama-quant.cpp index 7b3e956193..4ad5124d1a 100644 --- a/src/llama-quant.cpp +++ b/src/llama-quant.cpp @@ -2180,8 +2180,7 @@ llama_model_quantize_params llama_model_quantize_default_params() { /*.kv_overrides =*/ nullptr, /*.tensor_type =*/ nullptr, /*.prune_layers =*/ nullptr, - /*.target_bpw =*/ -1.0f, - /*.no_bias =*/ false + /*.target_bpw =*/ -1.0f }; return result; diff --git a/tools/quantize/quantize.cpp b/tools/quantize/quantize.cpp index d355f97274..c254c3f6b2 100644 --- a/tools/quantize/quantize.cpp +++ b/tools/quantize/quantize.cpp @@ -118,7 +118,7 @@ static bool try_parse_ftype(const std::string & ftype_str_in, llama_ftype & ftyp [[noreturn]] static void usage(const char * executable) { printf("usage: %s [--help] [--allow-requantize] [--leave-output-tensor] [--pure] [--imatrix] [--include-weights] [--exclude-weights]\n", executable); - printf(" [--target-bpw n] [--no-bias] [--output-tensor-type] [--token-embedding-type] [--tensor-type] [--prune-layers] [--keep-split] [--override-kv]\n"); + printf(" [--target-bpw n] [--output-tensor-type] [--token-embedding-type] [--tensor-type] [--prune-layers] [--keep-split] [--override-kv]\n"); printf(" model-f32.gguf [model-quant.gguf] type [nthreads]\n\n"); printf(" --allow-requantize: allows requantizing tensors that have already been quantized. Warning: This can severely reduce quality compared to quantizing from 16bit or 32bit\n"); printf(" --leave-output-tensor: will leave output.weight un(re)quantized. Increases model size but may also increase quality, especially when requantizing\n"); @@ -134,8 +134,6 @@ static void usage(const char * executable) { printf(" Advanced option to remove all tensors from the given layers\n"); printf(" --target-bpw: target bits per weight (bpw). Must be a positive number between 0.0 and 8.0\n"); printf(" Advanced option to automatically select quantization types to achieve a total bits per weight (bpw) target\n"); - printf(" --no-bias: use mean square error estimation only (no aligment bias)\n"); - printf(" Advanced option use MSE only and disable aligment bias error estimation\n"); printf(" --keep-split: will generate quantized model in the same shards as input\n"); printf(" --override-kv KEY=TYPE:VALUE\n"); printf(" Advanced option to override model metadata by key in the quantized model. May be specified multiple times.\n"); @@ -559,8 +557,6 @@ int main(int argc, char ** argv) { if (arg_idx == argc-1 || !parse_target_bpw(argv[++arg_idx], target_bpw)) { usage(argv[0]); } - } else if (strcmp(argv[arg_idx], "--no-bias") == 0) { - params.no_bias = true; } else if (strcmp(argv[arg_idx], "--prune-layers") == 0) { if (arg_idx == argc-1 || !parse_layer_prune(argv[++arg_idx], prune_layers)) { usage(argv[0]);