# this defines the default ftype (the quantization mix code, # that you pass to quantize if you're not using custom mix). # tensors that are not overriden below will be quantized # according to this scheme. ftype=7 # allowed values: # LLAMA_FTYPE_ALL_F32 = 0, # LLAMA_FTYPE_MOSTLY_F16 = 1, // except 1d tensors # LLAMA_FTYPE_MOSTLY_Q4_0 = 2, // except 1d tensors # LLAMA_FTYPE_MOSTLY_Q4_1 = 3, // except 1d tensors # LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16 = 4, // tok_embeddings.weight and output.weight are F16 # // LLAMA_FTYPE_MOSTLY_Q4_2 = 5, // support has been removed # // LLAMA_FTYPE_MOSTLY_Q4_3 = 6, // support has been removed # LLAMA_FTYPE_MOSTLY_Q8_0 = 7, // except 1d tensors # LLAMA_FTYPE_MOSTLY_Q5_0 = 8, // except 1d tensors # LLAMA_FTYPE_MOSTLY_Q5_1 = 9, // except 1d tensors # LLAMA_FTYPE_MOSTLY_Q2_K = 10, // except 1d tensors # LLAMA_FTYPE_MOSTLY_Q3_K_S = 11, // except 1d tensors # LLAMA_FTYPE_MOSTLY_Q3_K_M = 12, // except 1d tensors # LLAMA_FTYPE_MOSTLY_Q3_K_L = 13, // except 1d tensors # LLAMA_FTYPE_MOSTLY_Q4_K_S = 14, // except 1d tensors # LLAMA_FTYPE_MOSTLY_Q4_K_M = 15, // except 1d tensors # LLAMA_FTYPE_MOSTLY_Q5_K_S = 16, // except 1d tensors # LLAMA_FTYPE_MOSTLY_Q5_K_M = 17, // except 1d tensors # LLAMA_FTYPE_MOSTLY_Q6_K = 18, // except 1d tensors # LLAMA_FTYPE_MOSTLY_IQ2_XXS = 19, // except 1d tensors # LLAMA_FTYPE_MOSTLY_IQ2_XS = 20, // except 1d tensors # LLAMA_FTYPE_MOSTLY_Q2_K_S = 21, // except 1d tensors # LLAMA_FTYPE_MOSTLY_IQ3_XS = 22, // except 1d tensors # LLAMA_FTYPE_MOSTLY_IQ3_XXS = 23, // except 1d tensors # LLAMA_FTYPE_MOSTLY_IQ1_S = 24, // except 1d tensors # LLAMA_FTYPE_MOSTLY_IQ4_NL = 25, // except 1d tensors # LLAMA_FTYPE_MOSTLY_IQ3_S = 26, // except 1d tensors # LLAMA_FTYPE_MOSTLY_IQ3_M = 27, // except 1d tensors # LLAMA_FTYPE_MOSTLY_IQ2_S = 28, // except 1d tensors # LLAMA_FTYPE_MOSTLY_IQ2_M = 29, // except 1d tensors # LLAMA_FTYPE_MOSTLY_IQ4_XS = 30, // except 1d tensors # LLAMA_FTYPE_MOSTLY_IQ1_M = 31, // except 1d tensors # this defines an override for tensors with names matching # a given string. filters are processed in order given, and the # first matching will be used. # Wildcards are allowed: # ? single character # * multiple characters blk.10.ffn_up.weight=7 blk.1?.ffn_up.weight=10 blk.2?.ffn_up.weight=10 blk.1?.attn*=23 blk.2?.attn*=23 *down*=14 *gate*=12 # allowed values: # LLAMA_FTYPE_ALL_F32 = 0, # LLAMA_FTYPE_MOSTLY_F16 = 1, // except 1d tensors # LLAMA_FTYPE_MOSTLY_Q4_0 = 2, // except 1d tensors # LLAMA_FTYPE_MOSTLY_Q4_1 = 3, // except 1d tensors # LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16 = 4, // tok_embeddings.weight and output.weight are F16 # // LLAMA_FTYPE_MOSTLY_Q4_2 = 5, // support has been removed # // LLAMA_FTYPE_MOSTLY_Q4_3 = 6, // support has been removed # LLAMA_FTYPE_MOSTLY_Q8_0 = 7, // except 1d tensors # LLAMA_FTYPE_MOSTLY_Q5_0 = 8, // except 1d tensors # LLAMA_FTYPE_MOSTLY_Q5_1 = 9, // except 1d tensors # LLAMA_FTYPE_MOSTLY_Q2_K = 10, // except 1d tensors # LLAMA_FTYPE_MOSTLY_Q3_K_S = 11, // except 1d tensors # LLAMA_FTYPE_MOSTLY_Q3_K_M = 12, // except 1d tensors # LLAMA_FTYPE_MOSTLY_Q3_K_L = 13, // except 1d tensors # LLAMA_FTYPE_MOSTLY_Q4_K_S = 14, // except 1d tensors # LLAMA_FTYPE_MOSTLY_Q4_K_M = 15, // except 1d tensors # LLAMA_FTYPE_MOSTLY_Q5_K_S = 16, // except 1d tensors # LLAMA_FTYPE_MOSTLY_Q5_K_M = 17, // except 1d tensors # LLAMA_FTYPE_MOSTLY_Q6_K = 18, // except 1d tensors # LLAMA_FTYPE_MOSTLY_IQ2_XXS = 19, // except 1d tensors # LLAMA_FTYPE_MOSTLY_IQ2_XS = 20, // except 1d tensors # LLAMA_FTYPE_MOSTLY_Q2_K_S = 21, // except 1d tensors # LLAMA_FTYPE_MOSTLY_IQ3_XS = 22, // except 1d tensors # LLAMA_FTYPE_MOSTLY_IQ3_XXS = 23, // except 1d tensors # LLAMA_FTYPE_MOSTLY_IQ1_S = 24, // except 1d tensors # LLAMA_FTYPE_MOSTLY_IQ4_NL = 25, // except 1d tensors # LLAMA_FTYPE_MOSTLY_IQ3_S = 26, // except 1d tensors # LLAMA_FTYPE_MOSTLY_IQ3_M = 27, // except 1d tensors # LLAMA_FTYPE_MOSTLY_IQ2_S = 28, // except 1d tensors # LLAMA_FTYPE_MOSTLY_IQ2_M = 29, // except 1d tensors # LLAMA_FTYPE_MOSTLY_IQ4_XS = 30, // except 1d tensors # LLAMA_FTYPE_MOSTLY_IQ1_M = 31, // except 1d tensors # GGML_TYPE_F32 = 0, # GGML_TYPE_F16 = 1, # GGML_TYPE_Q4_0 = 2, # GGML_TYPE_Q4_1 = 3, # // GGML_TYPE_Q4_2 = 4, support has been removed # // GGML_TYPE_Q4_3 = 5, support has been removed # GGML_TYPE_Q5_0 = 6, # GGML_TYPE_Q5_1 = 7, # GGML_TYPE_Q8_0 = 8, # GGML_TYPE_Q8_1 = 9, # GGML_TYPE_Q2_K = 10, # GGML_TYPE_Q3_K = 11, # GGML_TYPE_Q4_K = 12, # GGML_TYPE_Q5_K = 13, # GGML_TYPE_Q6_K = 14, # GGML_TYPE_Q8_K = 15, # GGML_TYPE_IQ2_XXS = 16, # GGML_TYPE_IQ2_XS = 17, # GGML_TYPE_IQ3_XXS = 18, # GGML_TYPE_IQ1_S = 19, # GGML_TYPE_IQ4_NL = 20, # GGML_TYPE_IQ3_S = 21, # GGML_TYPE_IQ2_S = 22, # GGML_TYPE_IQ4_XS = 23, # GGML_TYPE_I8 = 24, # GGML_TYPE_I16 = 25, # GGML_TYPE_I32 = 26, # GGML_TYPE_I64 = 27, # GGML_TYPE_F64 = 28, # GGML_TYPE_IQ1_M = 29,