# Defines the default ftype (the quantization mix code, # that you pass to quantize if you're not using custom mix). # tensors that are not overriden below will be quantized # according to this mix. # # Must be one of # Q4_0, Q4_1, Q5_0, Q5_1, IQ2_XXS, IQ2_XS, IQ2_S, IQ2_M, # IQ1_S, IQ1_M, Q2_K, Q2_K_S, IQ3_XXS, IQ3_S, IQ3_M, Q3_K, # IQ3_XS, Q3_K_S, Q3_K_M, Q3_K_L, IQ4_NL, IQ4_XS, Q4_K, # Q4_K_S, Q4_K_M, Q5_K, Q5_K_S, Q5_K_M, Q6_K, Q8_0, F16 ftype=Q6_K # Defines overrides for tensors with names matching a given # string. Filters are processed in order given, the first # matching will be used. # # Wildcards are allowed: # ? single character # * multiple characters # # Type must be one of # F16, Q4_0, Q4_1, Q5_0, Q5_1, Q8_0, Q8_1, Q2_K, Q3_K, # Q4_K, Q5_K, Q6_K, Q8_K, IQ2_XXS, IQ2_XS, IQ3_XXS, # IQ1_S, IQ4_NL, IQ3_S, IQ2_S, IQ4_XS, IQ1_M blk.10.ffn_up.weight=Q5_K blk.1?.ffn_up.weight=Q4_K blk.23.*=Q2_K blk.24.*=Q2_K blk.25.*=Q2_K blk.2?.ffn_up.weight=Q4_K *_gate*=Q4_K *.attn*=IQ4_XS *_down*=IQ3_S output.weight=Q5_K