Fix merge conflicts, add more schemas
This commit is contained in:
parent
99119ceaf4
commit
a3ff1940e9
|
|
@ -274,6 +274,12 @@ if (TARGET cpp-httplib)
|
|||
add_executable(test-gguf-model-data test-gguf-model-data.cpp)
|
||||
target_link_libraries(test-gguf-model-data PRIVATE gguf-model-data common)
|
||||
llama_test(test-gguf-model-data LABEL "model")
|
||||
|
||||
# test-quant-type-selection requires gguf-model-data for remote model metadata
|
||||
llama_build_and_test(test-quant-type-selection.cpp LABEL "model")
|
||||
target_link_libraries(test-quant-type-selection PRIVATE gguf-model-data)
|
||||
target_compile_definitions(test-quant-type-selection PRIVATE
|
||||
SNAPSHOT_DIR="${CMAKE_CURRENT_SOURCE_DIR}/snapshots")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
|
|
|||
|
|
@ -151,6 +151,7 @@ static bool gguf_read_uint32_val(gguf_buf_reader & r, int32_t vtype, uint32_t &
|
|||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
if (vtype == GGUF_TYPE_UINT8) {
|
||||
uint8_t v;
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,696 @@
|
|||
# Model: Nemotron-Nano-3-30B-A3B
|
||||
# n_embd=2688, n_ff=0, n_vocab=131072, n_layer=52, n_head=32, n_head_kv=0, n_expert=128
|
||||
|
||||
[F32] f32
|
||||
output.weight q6_K
|
||||
|
||||
[F16] f16
|
||||
output.weight q6_K
|
||||
|
||||
[Q4_0] q4_0
|
||||
output.weight q6_K
|
||||
|
||||
[Q4_1] q4_1
|
||||
output.weight q6_K
|
||||
|
||||
[Q8_0] q8_0
|
||||
|
||||
[Q5_0] q5_0
|
||||
output.weight q6_K
|
||||
|
||||
[Q5_1] q5_1
|
||||
output.weight q6_K
|
||||
|
||||
[Q2_K] q2_K
|
||||
output.weight q8_0
|
||||
blk.1.ffn_down_exps.weight q3_K
|
||||
blk.1.ffn_down_shexp.weight q3_K
|
||||
blk.3.ffn_down_exps.weight q3_K
|
||||
blk.3.ffn_down_shexp.weight q3_K
|
||||
blk.5.attn_output.weight q3_K
|
||||
blk.5.attn_v.weight q3_K
|
||||
blk.6.ffn_down_exps.weight q3_K
|
||||
blk.6.ffn_down_shexp.weight q3_K
|
||||
blk.8.ffn_down_exps.weight q3_K
|
||||
blk.8.ffn_down_shexp.weight q3_K
|
||||
blk.10.ffn_down_exps.weight q3_K
|
||||
blk.10.ffn_down_shexp.weight q3_K
|
||||
blk.12.attn_output.weight q3_K
|
||||
blk.12.attn_v.weight q3_K
|
||||
blk.13.ffn_down_exps.weight q3_K
|
||||
blk.13.ffn_down_shexp.weight q3_K
|
||||
blk.15.ffn_down_exps.weight q3_K
|
||||
blk.15.ffn_down_shexp.weight q3_K
|
||||
blk.17.ffn_down_exps.weight q3_K
|
||||
blk.17.ffn_down_shexp.weight q3_K
|
||||
blk.19.attn_output.weight q3_K
|
||||
blk.19.attn_v.weight q3_K
|
||||
blk.20.ffn_down_exps.weight q3_K
|
||||
blk.20.ffn_down_shexp.weight q3_K
|
||||
blk.22.ffn_down_exps.weight q3_K
|
||||
blk.22.ffn_down_shexp.weight q3_K
|
||||
blk.24.ffn_down_exps.weight q3_K
|
||||
blk.24.ffn_down_shexp.weight q3_K
|
||||
blk.26.attn_output.weight q3_K
|
||||
blk.26.attn_v.weight q3_K
|
||||
blk.27.ffn_down_exps.weight q3_K
|
||||
blk.27.ffn_down_shexp.weight q3_K
|
||||
blk.29.ffn_down_exps.weight q3_K
|
||||
blk.29.ffn_down_shexp.weight q3_K
|
||||
blk.31.ffn_down_exps.weight q3_K
|
||||
blk.31.ffn_down_shexp.weight q3_K
|
||||
blk.33.attn_output.weight q3_K
|
||||
blk.33.attn_v.weight q3_K
|
||||
blk.34.ffn_down_exps.weight q3_K
|
||||
blk.34.ffn_down_shexp.weight q3_K
|
||||
blk.36.ffn_down_exps.weight q3_K
|
||||
blk.36.ffn_down_shexp.weight q3_K
|
||||
blk.38.ffn_down_exps.weight q3_K
|
||||
blk.38.ffn_down_shexp.weight q3_K
|
||||
blk.40.ffn_down_exps.weight q3_K
|
||||
blk.40.ffn_down_shexp.weight q3_K
|
||||
blk.42.attn_output.weight q3_K
|
||||
blk.42.attn_v.weight q3_K
|
||||
blk.43.ffn_down_exps.weight q3_K
|
||||
blk.43.ffn_down_shexp.weight q3_K
|
||||
blk.45.ffn_down_exps.weight q3_K
|
||||
blk.45.ffn_down_shexp.weight q3_K
|
||||
blk.47.ffn_down_exps.weight q3_K
|
||||
blk.47.ffn_down_shexp.weight q3_K
|
||||
blk.49.ffn_down_exps.weight q3_K
|
||||
blk.49.ffn_down_shexp.weight q3_K
|
||||
blk.51.ffn_down_exps.weight q3_K
|
||||
blk.51.ffn_down_shexp.weight q3_K
|
||||
|
||||
[Q3_K_S] q3_K
|
||||
output.weight q8_0
|
||||
|
||||
[Q3_K_M] q3_K
|
||||
output.weight q8_0
|
||||
blk.1.ffn_down_exps.weight q5_K
|
||||
blk.1.ffn_down_shexp.weight q5_K
|
||||
blk.3.ffn_down_exps.weight q4_K
|
||||
blk.3.ffn_down_shexp.weight q4_K
|
||||
blk.5.attn_output.weight q4_K
|
||||
blk.5.attn_v.weight q5_K
|
||||
blk.6.ffn_down_exps.weight q4_K
|
||||
blk.6.ffn_down_shexp.weight q4_K
|
||||
blk.8.ffn_down_exps.weight q4_K
|
||||
blk.8.ffn_down_shexp.weight q4_K
|
||||
blk.10.ffn_down_exps.weight q4_K
|
||||
blk.10.ffn_down_shexp.weight q4_K
|
||||
blk.12.attn_output.weight q4_K
|
||||
blk.12.attn_v.weight q5_K
|
||||
blk.13.ffn_down_exps.weight q4_K
|
||||
blk.13.ffn_down_shexp.weight q4_K
|
||||
blk.15.ffn_down_exps.weight q4_K
|
||||
blk.15.ffn_down_shexp.weight q4_K
|
||||
blk.17.ffn_down_exps.weight q4_K
|
||||
blk.17.ffn_down_shexp.weight q4_K
|
||||
blk.19.attn_output.weight q4_K
|
||||
blk.19.attn_v.weight q4_K
|
||||
blk.20.ffn_down_exps.weight q4_K
|
||||
blk.20.ffn_down_shexp.weight q4_K
|
||||
blk.22.ffn_down_exps.weight q4_K
|
||||
blk.22.ffn_down_shexp.weight q4_K
|
||||
blk.24.ffn_down_exps.weight q4_K
|
||||
blk.24.ffn_down_shexp.weight q4_K
|
||||
blk.26.attn_output.weight q4_K
|
||||
blk.26.attn_v.weight q4_K
|
||||
blk.27.ffn_down_exps.weight q4_K
|
||||
blk.27.ffn_down_shexp.weight q4_K
|
||||
blk.29.ffn_down_exps.weight q4_K
|
||||
blk.29.ffn_down_shexp.weight q4_K
|
||||
blk.31.ffn_down_exps.weight q4_K
|
||||
blk.31.ffn_down_shexp.weight q4_K
|
||||
blk.33.attn_output.weight q4_K
|
||||
blk.33.attn_v.weight q4_K
|
||||
blk.34.ffn_down_exps.weight q4_K
|
||||
blk.34.ffn_down_shexp.weight q4_K
|
||||
blk.36.ffn_down_exps.weight q4_K
|
||||
blk.36.ffn_down_shexp.weight q4_K
|
||||
blk.38.ffn_down_exps.weight q4_K
|
||||
blk.38.ffn_down_shexp.weight q4_K
|
||||
blk.40.ffn_down_exps.weight q4_K
|
||||
blk.40.ffn_down_shexp.weight q4_K
|
||||
blk.42.attn_output.weight q4_K
|
||||
blk.42.attn_v.weight q4_K
|
||||
blk.43.ffn_down_exps.weight q4_K
|
||||
blk.43.ffn_down_shexp.weight q4_K
|
||||
blk.45.ffn_down_exps.weight q4_K
|
||||
blk.45.ffn_down_shexp.weight q4_K
|
||||
blk.47.ffn_down_exps.weight q4_K
|
||||
blk.47.ffn_down_shexp.weight q4_K
|
||||
blk.49.ffn_down_exps.weight q4_K
|
||||
blk.49.ffn_down_shexp.weight q4_K
|
||||
blk.51.ffn_down_exps.weight q4_K
|
||||
blk.51.ffn_down_shexp.weight q4_K
|
||||
|
||||
[Q3_K_L] q3_K
|
||||
output.weight q8_0
|
||||
blk.1.ffn_down_exps.weight q5_K
|
||||
blk.1.ffn_down_shexp.weight q5_K
|
||||
blk.3.ffn_down_exps.weight q5_K
|
||||
blk.3.ffn_down_shexp.weight q5_K
|
||||
blk.5.attn_output.weight q5_K
|
||||
blk.5.attn_v.weight q5_K
|
||||
blk.6.ffn_down_exps.weight q5_K
|
||||
blk.6.ffn_down_shexp.weight q5_K
|
||||
blk.8.ffn_down_exps.weight q5_K
|
||||
blk.8.ffn_down_shexp.weight q5_K
|
||||
blk.10.ffn_down_exps.weight q5_K
|
||||
blk.10.ffn_down_shexp.weight q5_K
|
||||
blk.12.attn_output.weight q5_K
|
||||
blk.12.attn_v.weight q5_K
|
||||
blk.13.ffn_down_exps.weight q5_K
|
||||
blk.13.ffn_down_shexp.weight q5_K
|
||||
blk.15.ffn_down_exps.weight q5_K
|
||||
blk.15.ffn_down_shexp.weight q5_K
|
||||
blk.17.ffn_down_exps.weight q5_K
|
||||
blk.17.ffn_down_shexp.weight q5_K
|
||||
blk.19.attn_output.weight q5_K
|
||||
blk.19.attn_v.weight q5_K
|
||||
blk.20.ffn_down_exps.weight q5_K
|
||||
blk.20.ffn_down_shexp.weight q5_K
|
||||
blk.22.ffn_down_exps.weight q5_K
|
||||
blk.22.ffn_down_shexp.weight q5_K
|
||||
blk.24.ffn_down_exps.weight q5_K
|
||||
blk.24.ffn_down_shexp.weight q5_K
|
||||
blk.26.attn_output.weight q5_K
|
||||
blk.26.attn_v.weight q5_K
|
||||
blk.27.ffn_down_exps.weight q5_K
|
||||
blk.27.ffn_down_shexp.weight q5_K
|
||||
blk.29.ffn_down_exps.weight q5_K
|
||||
blk.29.ffn_down_shexp.weight q5_K
|
||||
blk.31.ffn_down_exps.weight q5_K
|
||||
blk.31.ffn_down_shexp.weight q5_K
|
||||
blk.33.attn_output.weight q5_K
|
||||
blk.33.attn_v.weight q5_K
|
||||
blk.34.ffn_down_exps.weight q5_K
|
||||
blk.34.ffn_down_shexp.weight q5_K
|
||||
blk.36.ffn_down_exps.weight q5_K
|
||||
blk.36.ffn_down_shexp.weight q5_K
|
||||
blk.38.ffn_down_exps.weight q5_K
|
||||
blk.38.ffn_down_shexp.weight q5_K
|
||||
blk.40.ffn_down_exps.weight q5_K
|
||||
blk.40.ffn_down_shexp.weight q5_K
|
||||
blk.42.attn_output.weight q5_K
|
||||
blk.42.attn_v.weight q5_K
|
||||
blk.43.ffn_down_exps.weight q5_K
|
||||
blk.43.ffn_down_shexp.weight q5_K
|
||||
blk.45.ffn_down_exps.weight q5_K
|
||||
blk.45.ffn_down_shexp.weight q5_K
|
||||
blk.47.ffn_down_exps.weight q5_K
|
||||
blk.47.ffn_down_shexp.weight q5_K
|
||||
blk.49.ffn_down_exps.weight q5_K
|
||||
blk.49.ffn_down_shexp.weight q5_K
|
||||
blk.51.ffn_down_exps.weight q5_K
|
||||
blk.51.ffn_down_shexp.weight q5_K
|
||||
|
||||
[Q4_K_S] q4_K
|
||||
output.weight q8_0
|
||||
blk.1.ffn_down_exps.weight q5_K
|
||||
blk.1.ffn_down_shexp.weight q5_K
|
||||
blk.3.ffn_down_exps.weight q5_K
|
||||
blk.3.ffn_down_shexp.weight q5_K
|
||||
blk.5.attn_v.weight q5_K
|
||||
blk.12.attn_v.weight q5_K
|
||||
blk.19.attn_v.weight q5_K
|
||||
blk.26.attn_v.weight q5_K
|
||||
|
||||
[Q4_K_M] q4_K
|
||||
output.weight q8_0
|
||||
blk.1.ffn_down_exps.weight q6_K
|
||||
blk.1.ffn_down_shexp.weight q6_K
|
||||
blk.3.ffn_down_exps.weight q6_K
|
||||
blk.3.ffn_down_shexp.weight q6_K
|
||||
blk.8.ffn_down_exps.weight q6_K
|
||||
blk.8.ffn_down_shexp.weight q6_K
|
||||
blk.17.ffn_down_exps.weight q6_K
|
||||
blk.17.ffn_down_shexp.weight q6_K
|
||||
blk.19.attn_v.weight q6_K
|
||||
blk.20.ffn_down_exps.weight q6_K
|
||||
blk.20.ffn_down_shexp.weight q6_K
|
||||
blk.29.ffn_down_exps.weight q6_K
|
||||
blk.29.ffn_down_shexp.weight q6_K
|
||||
blk.38.ffn_down_exps.weight q6_K
|
||||
blk.38.ffn_down_shexp.weight q6_K
|
||||
blk.42.attn_v.weight q6_K
|
||||
blk.45.ffn_down_exps.weight q6_K
|
||||
blk.45.ffn_down_shexp.weight q6_K
|
||||
blk.47.ffn_down_exps.weight q6_K
|
||||
blk.47.ffn_down_shexp.weight q6_K
|
||||
blk.49.ffn_down_exps.weight q6_K
|
||||
blk.49.ffn_down_shexp.weight q6_K
|
||||
blk.51.ffn_down_exps.weight q6_K
|
||||
blk.51.ffn_down_shexp.weight q6_K
|
||||
|
||||
[Q5_K_S] q5_K
|
||||
output.weight q8_0
|
||||
|
||||
[Q5_K_M] q5_K
|
||||
output.weight q8_0
|
||||
blk.1.ffn_down_exps.weight q6_K
|
||||
blk.1.ffn_down_shexp.weight q6_K
|
||||
blk.3.ffn_down_exps.weight q6_K
|
||||
blk.3.ffn_down_shexp.weight q6_K
|
||||
blk.8.ffn_down_exps.weight q6_K
|
||||
blk.8.ffn_down_shexp.weight q6_K
|
||||
blk.17.ffn_down_exps.weight q6_K
|
||||
blk.17.ffn_down_shexp.weight q6_K
|
||||
blk.19.attn_v.weight q6_K
|
||||
blk.20.ffn_down_exps.weight q6_K
|
||||
blk.20.ffn_down_shexp.weight q6_K
|
||||
blk.29.ffn_down_exps.weight q6_K
|
||||
blk.29.ffn_down_shexp.weight q6_K
|
||||
blk.38.ffn_down_exps.weight q6_K
|
||||
blk.38.ffn_down_shexp.weight q6_K
|
||||
blk.42.attn_v.weight q6_K
|
||||
blk.45.ffn_down_exps.weight q6_K
|
||||
blk.45.ffn_down_shexp.weight q6_K
|
||||
blk.47.ffn_down_exps.weight q6_K
|
||||
blk.47.ffn_down_shexp.weight q6_K
|
||||
blk.49.ffn_down_exps.weight q6_K
|
||||
blk.49.ffn_down_shexp.weight q6_K
|
||||
blk.51.ffn_down_exps.weight q6_K
|
||||
blk.51.ffn_down_shexp.weight q6_K
|
||||
|
||||
[Q6_K] q6_K
|
||||
output.weight q8_0
|
||||
|
||||
[IQ2_XXS] iq2_xxs
|
||||
output.weight q8_0
|
||||
token_embd.weight q2_K
|
||||
blk.1.ffn_down_exps.weight q2_K
|
||||
blk.1.ffn_down_shexp.weight q2_K
|
||||
blk.3.ffn_down_exps.weight q2_K
|
||||
blk.3.ffn_down_shexp.weight q2_K
|
||||
blk.5.attn_v.weight q4_K
|
||||
blk.6.ffn_down_exps.weight q2_K
|
||||
blk.6.ffn_down_shexp.weight q2_K
|
||||
blk.12.attn_v.weight q4_K
|
||||
blk.19.attn_v.weight q4_K
|
||||
blk.26.attn_v.weight q4_K
|
||||
blk.33.attn_v.weight q4_K
|
||||
blk.42.attn_v.weight q4_K
|
||||
|
||||
[IQ2_XS] iq2_xs
|
||||
output.weight q8_0
|
||||
token_embd.weight q2_K
|
||||
blk.1.ffn_down_exps.weight q2_K
|
||||
blk.1.ffn_down_shexp.weight q2_K
|
||||
blk.3.ffn_down_exps.weight q2_K
|
||||
blk.3.ffn_down_shexp.weight q2_K
|
||||
blk.5.attn_v.weight q4_K
|
||||
blk.6.ffn_down_exps.weight q2_K
|
||||
blk.6.ffn_down_shexp.weight q2_K
|
||||
blk.12.attn_v.weight q4_K
|
||||
blk.19.attn_v.weight q4_K
|
||||
blk.26.attn_v.weight q4_K
|
||||
blk.33.attn_v.weight q4_K
|
||||
blk.42.attn_v.weight q4_K
|
||||
|
||||
[Q2_K_S] q2_K
|
||||
output.weight q8_0
|
||||
blk.1.ffn_down_exps.weight q4_K
|
||||
blk.1.ffn_down_shexp.weight q4_K
|
||||
blk.3.ffn_down_exps.weight q4_K
|
||||
blk.3.ffn_down_shexp.weight q4_K
|
||||
|
||||
[IQ3_XS] iq3_s
|
||||
output.weight q8_0
|
||||
blk.5.attn_k.weight iq3_xxs
|
||||
blk.5.attn_q.weight iq3_xxs
|
||||
blk.6.ffn_up_exps.weight iq3_xxs
|
||||
blk.6.ffn_up_shexp.weight iq3_xxs
|
||||
blk.8.ffn_up_exps.weight iq3_xxs
|
||||
blk.8.ffn_up_shexp.weight iq3_xxs
|
||||
blk.10.ffn_up_exps.weight iq3_xxs
|
||||
blk.10.ffn_up_shexp.weight iq3_xxs
|
||||
blk.12.attn_k.weight iq3_xxs
|
||||
blk.12.attn_q.weight iq3_xxs
|
||||
blk.13.ffn_up_exps.weight iq3_xxs
|
||||
blk.13.ffn_up_shexp.weight iq3_xxs
|
||||
blk.15.ffn_up_exps.weight iq3_xxs
|
||||
blk.15.ffn_up_shexp.weight iq3_xxs
|
||||
blk.17.ffn_up_exps.weight iq3_xxs
|
||||
blk.17.ffn_up_shexp.weight iq3_xxs
|
||||
blk.19.attn_k.weight iq3_xxs
|
||||
blk.19.attn_q.weight iq3_xxs
|
||||
blk.20.ffn_up_exps.weight iq3_xxs
|
||||
blk.20.ffn_up_shexp.weight iq3_xxs
|
||||
blk.22.ffn_up_exps.weight iq3_xxs
|
||||
blk.22.ffn_up_shexp.weight iq3_xxs
|
||||
blk.24.ffn_up_exps.weight iq3_xxs
|
||||
blk.24.ffn_up_shexp.weight iq3_xxs
|
||||
blk.26.attn_k.weight iq3_xxs
|
||||
blk.26.attn_q.weight iq3_xxs
|
||||
blk.27.ffn_up_exps.weight iq3_xxs
|
||||
blk.27.ffn_up_shexp.weight iq3_xxs
|
||||
blk.29.ffn_up_exps.weight iq3_xxs
|
||||
blk.29.ffn_up_shexp.weight iq3_xxs
|
||||
blk.31.ffn_up_exps.weight iq3_xxs
|
||||
blk.31.ffn_up_shexp.weight iq3_xxs
|
||||
blk.33.attn_k.weight iq3_xxs
|
||||
blk.33.attn_q.weight iq3_xxs
|
||||
blk.34.ffn_up_exps.weight iq3_xxs
|
||||
blk.34.ffn_up_shexp.weight iq3_xxs
|
||||
blk.36.ffn_up_exps.weight iq3_xxs
|
||||
blk.36.ffn_up_shexp.weight iq3_xxs
|
||||
blk.38.ffn_up_exps.weight iq3_xxs
|
||||
blk.38.ffn_up_shexp.weight iq3_xxs
|
||||
blk.40.ffn_up_exps.weight iq3_xxs
|
||||
blk.40.ffn_up_shexp.weight iq3_xxs
|
||||
blk.42.attn_k.weight iq3_xxs
|
||||
blk.42.attn_q.weight iq3_xxs
|
||||
blk.43.ffn_up_exps.weight iq3_xxs
|
||||
blk.43.ffn_up_shexp.weight iq3_xxs
|
||||
|
||||
[IQ3_XXS] iq3_xxs
|
||||
output.weight q8_0
|
||||
token_embd.weight iq3_s
|
||||
blk.1.ffn_down_exps.weight q4_K
|
||||
blk.1.ffn_down_shexp.weight q4_K
|
||||
blk.3.ffn_down_exps.weight q4_K
|
||||
blk.3.ffn_down_shexp.weight q4_K
|
||||
blk.5.attn_k.weight iq2_s
|
||||
blk.5.attn_output.weight iq3_s
|
||||
blk.5.attn_q.weight iq2_s
|
||||
blk.5.attn_v.weight iq3_s
|
||||
blk.6.ffn_down_exps.weight q3_K
|
||||
blk.6.ffn_down_shexp.weight q3_K
|
||||
blk.8.ffn_down_exps.weight q3_K
|
||||
blk.8.ffn_down_shexp.weight q3_K
|
||||
blk.10.ffn_down_exps.weight q3_K
|
||||
blk.10.ffn_down_shexp.weight q3_K
|
||||
blk.12.attn_k.weight iq2_s
|
||||
blk.12.attn_output.weight iq3_s
|
||||
blk.12.attn_q.weight iq2_s
|
||||
blk.12.attn_v.weight iq3_s
|
||||
blk.13.ffn_down_exps.weight q3_K
|
||||
blk.13.ffn_down_shexp.weight q3_K
|
||||
blk.15.ffn_down_exps.weight q3_K
|
||||
blk.15.ffn_down_shexp.weight q3_K
|
||||
blk.17.ffn_down_exps.weight q3_K
|
||||
blk.17.ffn_down_shexp.weight q3_K
|
||||
blk.19.attn_k.weight iq2_s
|
||||
blk.19.attn_output.weight iq3_s
|
||||
blk.19.attn_q.weight iq2_s
|
||||
blk.19.attn_v.weight iq3_s
|
||||
blk.20.ffn_down_exps.weight q3_K
|
||||
blk.20.ffn_down_shexp.weight q3_K
|
||||
blk.22.ffn_down_exps.weight q3_K
|
||||
blk.22.ffn_down_shexp.weight q3_K
|
||||
blk.24.ffn_down_exps.weight q3_K
|
||||
blk.24.ffn_down_shexp.weight q3_K
|
||||
blk.26.attn_k.weight iq2_s
|
||||
blk.26.attn_output.weight iq3_s
|
||||
blk.26.attn_q.weight iq2_s
|
||||
blk.26.attn_v.weight iq3_s
|
||||
blk.27.ffn_down_exps.weight q3_K
|
||||
blk.27.ffn_down_shexp.weight q3_K
|
||||
blk.29.ffn_down_exps.weight q3_K
|
||||
blk.29.ffn_down_shexp.weight q3_K
|
||||
blk.31.ffn_down_exps.weight q3_K
|
||||
blk.31.ffn_down_shexp.weight q3_K
|
||||
blk.33.attn_k.weight iq2_s
|
||||
blk.33.attn_output.weight iq3_s
|
||||
blk.33.attn_q.weight iq2_s
|
||||
blk.33.attn_v.weight iq3_s
|
||||
blk.34.ffn_down_exps.weight q3_K
|
||||
blk.34.ffn_down_shexp.weight q3_K
|
||||
blk.36.ffn_down_exps.weight q3_K
|
||||
blk.36.ffn_down_shexp.weight q3_K
|
||||
blk.38.ffn_down_exps.weight q3_K
|
||||
blk.38.ffn_down_shexp.weight q3_K
|
||||
blk.40.ffn_down_exps.weight q3_K
|
||||
blk.40.ffn_down_shexp.weight q3_K
|
||||
blk.42.attn_k.weight iq2_s
|
||||
blk.42.attn_output.weight iq3_s
|
||||
blk.42.attn_q.weight iq2_s
|
||||
blk.42.attn_v.weight iq3_s
|
||||
blk.43.ffn_down_exps.weight q3_K
|
||||
blk.43.ffn_down_shexp.weight q3_K
|
||||
blk.45.ffn_down_exps.weight q3_K
|
||||
blk.45.ffn_down_shexp.weight q3_K
|
||||
blk.47.ffn_down_exps.weight q3_K
|
||||
blk.47.ffn_down_shexp.weight q3_K
|
||||
blk.49.ffn_down_exps.weight q3_K
|
||||
blk.49.ffn_down_shexp.weight q3_K
|
||||
blk.51.ffn_down_exps.weight q3_K
|
||||
blk.51.ffn_down_shexp.weight q3_K
|
||||
|
||||
[IQ1_S] iq1_s
|
||||
output.weight q8_0
|
||||
token_embd.weight q2_K
|
||||
blk.1.ffn_down_exps.weight q2_K
|
||||
blk.1.ffn_down_shexp.weight q2_K
|
||||
blk.3.ffn_down_exps.weight q2_K
|
||||
blk.3.ffn_down_shexp.weight q2_K
|
||||
blk.5.attn_output.weight iq2_xxs
|
||||
blk.5.attn_v.weight q4_K
|
||||
blk.6.ffn_down_exps.weight q2_K
|
||||
blk.6.ffn_down_shexp.weight q2_K
|
||||
blk.12.attn_output.weight iq2_xxs
|
||||
blk.12.attn_v.weight q4_K
|
||||
blk.19.attn_output.weight iq2_xxs
|
||||
blk.19.attn_v.weight q4_K
|
||||
blk.26.attn_output.weight iq2_xxs
|
||||
blk.26.attn_v.weight q4_K
|
||||
blk.33.attn_output.weight iq2_xxs
|
||||
blk.33.attn_v.weight q4_K
|
||||
blk.42.attn_output.weight iq2_xxs
|
||||
blk.42.attn_v.weight q4_K
|
||||
|
||||
[IQ4_NL] iq4_nl
|
||||
output.weight q6_K
|
||||
blk.1.ffn_down_exps.weight q5_K
|
||||
blk.1.ffn_down_shexp.weight q5_K
|
||||
blk.3.ffn_down_exps.weight q5_K
|
||||
blk.3.ffn_down_shexp.weight q5_K
|
||||
|
||||
[IQ3_S] iq3_s
|
||||
output.weight q8_0
|
||||
|
||||
[IQ3_M] iq3_s
|
||||
output.weight q8_0
|
||||
blk.1.ffn_down_exps.weight q4_K
|
||||
blk.1.ffn_down_shexp.weight q4_K
|
||||
blk.3.ffn_down_exps.weight q4_K
|
||||
blk.3.ffn_down_shexp.weight q4_K
|
||||
blk.5.attn_output.weight q4_K
|
||||
blk.5.attn_v.weight q4_K
|
||||
blk.12.attn_output.weight q4_K
|
||||
blk.12.attn_v.weight q4_K
|
||||
blk.19.attn_output.weight q4_K
|
||||
blk.19.attn_v.weight q4_K
|
||||
blk.26.attn_output.weight q4_K
|
||||
blk.26.attn_v.weight q4_K
|
||||
blk.33.attn_output.weight q4_K
|
||||
blk.33.attn_v.weight q4_K
|
||||
blk.42.attn_output.weight q4_K
|
||||
blk.42.attn_v.weight q4_K
|
||||
|
||||
[IQ2_S] iq2_xs
|
||||
output.weight q8_0
|
||||
token_embd.weight iq3_s
|
||||
blk.1.ffn_down_exps.weight iq3_s
|
||||
blk.1.ffn_down_shexp.weight iq3_s
|
||||
blk.3.ffn_down_exps.weight iq3_s
|
||||
blk.3.ffn_down_shexp.weight iq3_s
|
||||
blk.5.attn_output.weight iq3_s
|
||||
blk.5.attn_v.weight q4_K
|
||||
blk.6.ffn_down_exps.weight iq3_s
|
||||
blk.6.ffn_down_shexp.weight iq3_s
|
||||
blk.12.attn_output.weight iq3_s
|
||||
blk.12.attn_v.weight q4_K
|
||||
blk.19.attn_output.weight iq3_s
|
||||
blk.19.attn_v.weight q4_K
|
||||
blk.26.attn_output.weight iq3_s
|
||||
blk.26.attn_v.weight q4_K
|
||||
blk.33.attn_output.weight iq3_s
|
||||
blk.33.attn_v.weight q4_K
|
||||
blk.42.attn_output.weight iq3_s
|
||||
blk.42.attn_v.weight q4_K
|
||||
|
||||
[IQ2_M] iq2_s
|
||||
output.weight q8_0
|
||||
token_embd.weight iq3_s
|
||||
blk.1.ffn_down_exps.weight iq3_s
|
||||
blk.1.ffn_down_shexp.weight iq3_s
|
||||
blk.3.ffn_down_exps.weight iq3_s
|
||||
blk.3.ffn_down_shexp.weight iq3_s
|
||||
blk.5.attn_output.weight iq3_s
|
||||
blk.5.attn_v.weight q4_K
|
||||
blk.6.ffn_down_exps.weight iq3_s
|
||||
blk.6.ffn_down_shexp.weight iq3_s
|
||||
blk.12.attn_output.weight iq3_s
|
||||
blk.12.attn_v.weight q4_K
|
||||
blk.19.attn_output.weight iq3_s
|
||||
blk.19.attn_v.weight q4_K
|
||||
blk.26.attn_output.weight iq3_s
|
||||
blk.26.attn_v.weight q4_K
|
||||
blk.33.attn_output.weight iq3_s
|
||||
blk.33.attn_v.weight q4_K
|
||||
blk.42.attn_output.weight iq3_s
|
||||
blk.42.attn_v.weight q4_K
|
||||
|
||||
[IQ4_XS] iq4_xs
|
||||
output.weight q8_0
|
||||
blk.1.ffn_down_exps.weight q5_K
|
||||
blk.1.ffn_down_shexp.weight q5_K
|
||||
blk.3.ffn_down_exps.weight q5_K
|
||||
blk.3.ffn_down_shexp.weight q5_K
|
||||
|
||||
[IQ1_M] iq1_m
|
||||
output.weight q8_0
|
||||
token_embd.weight q2_K
|
||||
blk.1.ffn_down_exps.weight q2_K
|
||||
blk.1.ffn_down_shexp.weight q2_K
|
||||
blk.3.ffn_down_exps.weight q2_K
|
||||
blk.3.ffn_down_shexp.weight q2_K
|
||||
blk.5.attn_output.weight iq2_xxs
|
||||
blk.5.attn_v.weight q4_K
|
||||
blk.6.ffn_down_exps.weight q2_K
|
||||
blk.6.ffn_down_shexp.weight q2_K
|
||||
blk.12.attn_output.weight iq2_xxs
|
||||
blk.12.attn_v.weight q4_K
|
||||
blk.19.attn_output.weight iq2_xxs
|
||||
blk.19.attn_v.weight q4_K
|
||||
blk.26.attn_output.weight iq2_xxs
|
||||
blk.26.attn_v.weight q4_K
|
||||
blk.33.attn_output.weight iq2_xxs
|
||||
blk.33.attn_v.weight q4_K
|
||||
blk.42.attn_output.weight iq2_xxs
|
||||
blk.42.attn_v.weight q4_K
|
||||
|
||||
[BF16] bf16
|
||||
output.weight q6_K
|
||||
|
||||
[TQ1_0] tq1_0
|
||||
output.weight q8_0
|
||||
token_embd.weight q4_K
|
||||
|
||||
[TQ2_0] tq2_0
|
||||
output.weight q8_0
|
||||
token_embd.weight q4_K
|
||||
|
||||
[MXFP4_MOE] mxfp4
|
||||
output.weight q8_0
|
||||
token_embd.weight q8_0
|
||||
blk.0.ssm_in.weight q8_0
|
||||
blk.0.ssm_out.weight q8_0
|
||||
blk.1.ffn_down_shexp.weight q8_0
|
||||
blk.1.ffn_up_shexp.weight q8_0
|
||||
blk.2.ssm_in.weight q8_0
|
||||
blk.2.ssm_out.weight q8_0
|
||||
blk.3.ffn_down_shexp.weight q8_0
|
||||
blk.3.ffn_up_shexp.weight q8_0
|
||||
blk.4.ssm_in.weight q8_0
|
||||
blk.4.ssm_out.weight q8_0
|
||||
blk.5.attn_k.weight q8_0
|
||||
blk.5.attn_output.weight q8_0
|
||||
blk.5.attn_q.weight q8_0
|
||||
blk.5.attn_v.weight q8_0
|
||||
blk.6.ffn_down_shexp.weight q8_0
|
||||
blk.6.ffn_up_shexp.weight q8_0
|
||||
blk.7.ssm_in.weight q8_0
|
||||
blk.7.ssm_out.weight q8_0
|
||||
blk.8.ffn_down_shexp.weight q8_0
|
||||
blk.8.ffn_up_shexp.weight q8_0
|
||||
blk.9.ssm_in.weight q8_0
|
||||
blk.9.ssm_out.weight q8_0
|
||||
blk.10.ffn_down_shexp.weight q8_0
|
||||
blk.10.ffn_up_shexp.weight q8_0
|
||||
blk.11.ssm_in.weight q8_0
|
||||
blk.11.ssm_out.weight q8_0
|
||||
blk.12.attn_k.weight q8_0
|
||||
blk.12.attn_output.weight q8_0
|
||||
blk.12.attn_q.weight q8_0
|
||||
blk.12.attn_v.weight q8_0
|
||||
blk.13.ffn_down_shexp.weight q8_0
|
||||
blk.13.ffn_up_shexp.weight q8_0
|
||||
blk.14.ssm_in.weight q8_0
|
||||
blk.14.ssm_out.weight q8_0
|
||||
blk.15.ffn_down_shexp.weight q8_0
|
||||
blk.15.ffn_up_shexp.weight q8_0
|
||||
blk.16.ssm_in.weight q8_0
|
||||
blk.16.ssm_out.weight q8_0
|
||||
blk.17.ffn_down_shexp.weight q8_0
|
||||
blk.17.ffn_up_shexp.weight q8_0
|
||||
blk.18.ssm_in.weight q8_0
|
||||
blk.18.ssm_out.weight q8_0
|
||||
blk.19.attn_k.weight q8_0
|
||||
blk.19.attn_output.weight q8_0
|
||||
blk.19.attn_q.weight q8_0
|
||||
blk.19.attn_v.weight q8_0
|
||||
blk.20.ffn_down_shexp.weight q8_0
|
||||
blk.20.ffn_up_shexp.weight q8_0
|
||||
blk.21.ssm_in.weight q8_0
|
||||
blk.21.ssm_out.weight q8_0
|
||||
blk.22.ffn_down_shexp.weight q8_0
|
||||
blk.22.ffn_up_shexp.weight q8_0
|
||||
blk.23.ssm_in.weight q8_0
|
||||
blk.23.ssm_out.weight q8_0
|
||||
blk.24.ffn_down_shexp.weight q8_0
|
||||
blk.24.ffn_up_shexp.weight q8_0
|
||||
blk.25.ssm_in.weight q8_0
|
||||
blk.25.ssm_out.weight q8_0
|
||||
blk.26.attn_k.weight q8_0
|
||||
blk.26.attn_output.weight q8_0
|
||||
blk.26.attn_q.weight q8_0
|
||||
blk.26.attn_v.weight q8_0
|
||||
blk.27.ffn_down_shexp.weight q8_0
|
||||
blk.27.ffn_up_shexp.weight q8_0
|
||||
blk.28.ssm_in.weight q8_0
|
||||
blk.28.ssm_out.weight q8_0
|
||||
blk.29.ffn_down_shexp.weight q8_0
|
||||
blk.29.ffn_up_shexp.weight q8_0
|
||||
blk.30.ssm_in.weight q8_0
|
||||
blk.30.ssm_out.weight q8_0
|
||||
blk.31.ffn_down_shexp.weight q8_0
|
||||
blk.31.ffn_up_shexp.weight q8_0
|
||||
blk.32.ssm_in.weight q8_0
|
||||
blk.32.ssm_out.weight q8_0
|
||||
blk.33.attn_k.weight q8_0
|
||||
blk.33.attn_output.weight q8_0
|
||||
blk.33.attn_q.weight q8_0
|
||||
blk.33.attn_v.weight q8_0
|
||||
blk.34.ffn_down_shexp.weight q8_0
|
||||
blk.34.ffn_up_shexp.weight q8_0
|
||||
blk.35.ssm_in.weight q8_0
|
||||
blk.35.ssm_out.weight q8_0
|
||||
blk.36.ffn_down_shexp.weight q8_0
|
||||
blk.36.ffn_up_shexp.weight q8_0
|
||||
blk.37.ssm_in.weight q8_0
|
||||
blk.37.ssm_out.weight q8_0
|
||||
blk.38.ffn_down_shexp.weight q8_0
|
||||
blk.38.ffn_up_shexp.weight q8_0
|
||||
blk.39.ssm_in.weight q8_0
|
||||
blk.39.ssm_out.weight q8_0
|
||||
blk.40.ffn_down_shexp.weight q8_0
|
||||
blk.40.ffn_up_shexp.weight q8_0
|
||||
blk.41.ssm_in.weight q8_0
|
||||
blk.41.ssm_out.weight q8_0
|
||||
blk.42.attn_k.weight q8_0
|
||||
blk.42.attn_output.weight q8_0
|
||||
blk.42.attn_q.weight q8_0
|
||||
blk.42.attn_v.weight q8_0
|
||||
blk.43.ffn_down_shexp.weight q8_0
|
||||
blk.43.ffn_up_shexp.weight q8_0
|
||||
blk.44.ssm_in.weight q8_0
|
||||
blk.44.ssm_out.weight q8_0
|
||||
blk.45.ffn_down_shexp.weight q8_0
|
||||
blk.45.ffn_up_shexp.weight q8_0
|
||||
blk.46.ssm_in.weight q8_0
|
||||
blk.46.ssm_out.weight q8_0
|
||||
blk.47.ffn_down_shexp.weight q8_0
|
||||
blk.47.ffn_up_shexp.weight q8_0
|
||||
blk.48.ssm_in.weight q8_0
|
||||
blk.48.ssm_out.weight q8_0
|
||||
blk.49.ffn_down_shexp.weight q8_0
|
||||
blk.49.ffn_up_shexp.weight q8_0
|
||||
blk.50.ssm_in.weight q8_0
|
||||
blk.50.ssm_out.weight q8_0
|
||||
blk.51.ffn_down_shexp.weight q8_0
|
||||
blk.51.ffn_up_shexp.weight q8_0
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
|
@ -190,7 +190,7 @@ static const remote_model_spec model_specs[] = {
|
|||
{ "ggml-org/gpt-oss-120b-GGUF", "mxfp4" },
|
||||
{ "ggml-org/gemma-3-4b-it-GGUF", "Q8_0" },
|
||||
{ "bartowski/Meta-Llama-3.1-70B-Instruct-GGUF", "Q4_K_M" },
|
||||
{ "bartowski/deepseek-ai_DeepSeek-V3.1-GGUF", "IQ1_S" },
|
||||
{ "bartowski/deepseek-ai_DeepSeek-V3.1-GGUF", "IQ1_M" },
|
||||
{ "bartowski/Qwen_Qwen3.5-397B-A17B-GGUF", "IQ1_S" }, // TODO: swap with ggml-org if/when it's released
|
||||
{ "bartowski/Qwen_Qwen3.5-27B-GGUF", "Q8_0" }, // TODO: swap with ggml-org if/when it's released
|
||||
};
|
||||
|
|
|
|||
Loading…
Reference in New Issue