Fix merge conflicts, add more schemas

This commit is contained in:
Colin Kealty 2026-03-04 13:42:59 -05:00
parent 99119ceaf4
commit a3ff1940e9
13 changed files with 19689 additions and 1 deletions

View File

@ -274,6 +274,12 @@ if (TARGET cpp-httplib)
add_executable(test-gguf-model-data test-gguf-model-data.cpp)
target_link_libraries(test-gguf-model-data PRIVATE gguf-model-data common)
llama_test(test-gguf-model-data LABEL "model")
# test-quant-type-selection requires gguf-model-data for remote model metadata
llama_build_and_test(test-quant-type-selection.cpp LABEL "model")
target_link_libraries(test-quant-type-selection PRIVATE gguf-model-data)
target_compile_definitions(test-quant-type-selection PRIVATE
SNAPSHOT_DIR="${CMAKE_CURRENT_SOURCE_DIR}/snapshots")
endif()
endif()

View File

@ -151,6 +151,7 @@ static bool gguf_read_uint32_val(gguf_buf_reader & r, int32_t vtype, uint32_t &
return false;
}
}
return true;
}
if (vtype == GGUF_TYPE_UINT8) {
uint8_t v;

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,696 @@
# Model: Nemotron-Nano-3-30B-A3B
# n_embd=2688, n_ff=0, n_vocab=131072, n_layer=52, n_head=32, n_head_kv=0, n_expert=128
[F32] f32
output.weight q6_K
[F16] f16
output.weight q6_K
[Q4_0] q4_0
output.weight q6_K
[Q4_1] q4_1
output.weight q6_K
[Q8_0] q8_0
[Q5_0] q5_0
output.weight q6_K
[Q5_1] q5_1
output.weight q6_K
[Q2_K] q2_K
output.weight q8_0
blk.1.ffn_down_exps.weight q3_K
blk.1.ffn_down_shexp.weight q3_K
blk.3.ffn_down_exps.weight q3_K
blk.3.ffn_down_shexp.weight q3_K
blk.5.attn_output.weight q3_K
blk.5.attn_v.weight q3_K
blk.6.ffn_down_exps.weight q3_K
blk.6.ffn_down_shexp.weight q3_K
blk.8.ffn_down_exps.weight q3_K
blk.8.ffn_down_shexp.weight q3_K
blk.10.ffn_down_exps.weight q3_K
blk.10.ffn_down_shexp.weight q3_K
blk.12.attn_output.weight q3_K
blk.12.attn_v.weight q3_K
blk.13.ffn_down_exps.weight q3_K
blk.13.ffn_down_shexp.weight q3_K
blk.15.ffn_down_exps.weight q3_K
blk.15.ffn_down_shexp.weight q3_K
blk.17.ffn_down_exps.weight q3_K
blk.17.ffn_down_shexp.weight q3_K
blk.19.attn_output.weight q3_K
blk.19.attn_v.weight q3_K
blk.20.ffn_down_exps.weight q3_K
blk.20.ffn_down_shexp.weight q3_K
blk.22.ffn_down_exps.weight q3_K
blk.22.ffn_down_shexp.weight q3_K
blk.24.ffn_down_exps.weight q3_K
blk.24.ffn_down_shexp.weight q3_K
blk.26.attn_output.weight q3_K
blk.26.attn_v.weight q3_K
blk.27.ffn_down_exps.weight q3_K
blk.27.ffn_down_shexp.weight q3_K
blk.29.ffn_down_exps.weight q3_K
blk.29.ffn_down_shexp.weight q3_K
blk.31.ffn_down_exps.weight q3_K
blk.31.ffn_down_shexp.weight q3_K
blk.33.attn_output.weight q3_K
blk.33.attn_v.weight q3_K
blk.34.ffn_down_exps.weight q3_K
blk.34.ffn_down_shexp.weight q3_K
blk.36.ffn_down_exps.weight q3_K
blk.36.ffn_down_shexp.weight q3_K
blk.38.ffn_down_exps.weight q3_K
blk.38.ffn_down_shexp.weight q3_K
blk.40.ffn_down_exps.weight q3_K
blk.40.ffn_down_shexp.weight q3_K
blk.42.attn_output.weight q3_K
blk.42.attn_v.weight q3_K
blk.43.ffn_down_exps.weight q3_K
blk.43.ffn_down_shexp.weight q3_K
blk.45.ffn_down_exps.weight q3_K
blk.45.ffn_down_shexp.weight q3_K
blk.47.ffn_down_exps.weight q3_K
blk.47.ffn_down_shexp.weight q3_K
blk.49.ffn_down_exps.weight q3_K
blk.49.ffn_down_shexp.weight q3_K
blk.51.ffn_down_exps.weight q3_K
blk.51.ffn_down_shexp.weight q3_K
[Q3_K_S] q3_K
output.weight q8_0
[Q3_K_M] q3_K
output.weight q8_0
blk.1.ffn_down_exps.weight q5_K
blk.1.ffn_down_shexp.weight q5_K
blk.3.ffn_down_exps.weight q4_K
blk.3.ffn_down_shexp.weight q4_K
blk.5.attn_output.weight q4_K
blk.5.attn_v.weight q5_K
blk.6.ffn_down_exps.weight q4_K
blk.6.ffn_down_shexp.weight q4_K
blk.8.ffn_down_exps.weight q4_K
blk.8.ffn_down_shexp.weight q4_K
blk.10.ffn_down_exps.weight q4_K
blk.10.ffn_down_shexp.weight q4_K
blk.12.attn_output.weight q4_K
blk.12.attn_v.weight q5_K
blk.13.ffn_down_exps.weight q4_K
blk.13.ffn_down_shexp.weight q4_K
blk.15.ffn_down_exps.weight q4_K
blk.15.ffn_down_shexp.weight q4_K
blk.17.ffn_down_exps.weight q4_K
blk.17.ffn_down_shexp.weight q4_K
blk.19.attn_output.weight q4_K
blk.19.attn_v.weight q4_K
blk.20.ffn_down_exps.weight q4_K
blk.20.ffn_down_shexp.weight q4_K
blk.22.ffn_down_exps.weight q4_K
blk.22.ffn_down_shexp.weight q4_K
blk.24.ffn_down_exps.weight q4_K
blk.24.ffn_down_shexp.weight q4_K
blk.26.attn_output.weight q4_K
blk.26.attn_v.weight q4_K
blk.27.ffn_down_exps.weight q4_K
blk.27.ffn_down_shexp.weight q4_K
blk.29.ffn_down_exps.weight q4_K
blk.29.ffn_down_shexp.weight q4_K
blk.31.ffn_down_exps.weight q4_K
blk.31.ffn_down_shexp.weight q4_K
blk.33.attn_output.weight q4_K
blk.33.attn_v.weight q4_K
blk.34.ffn_down_exps.weight q4_K
blk.34.ffn_down_shexp.weight q4_K
blk.36.ffn_down_exps.weight q4_K
blk.36.ffn_down_shexp.weight q4_K
blk.38.ffn_down_exps.weight q4_K
blk.38.ffn_down_shexp.weight q4_K
blk.40.ffn_down_exps.weight q4_K
blk.40.ffn_down_shexp.weight q4_K
blk.42.attn_output.weight q4_K
blk.42.attn_v.weight q4_K
blk.43.ffn_down_exps.weight q4_K
blk.43.ffn_down_shexp.weight q4_K
blk.45.ffn_down_exps.weight q4_K
blk.45.ffn_down_shexp.weight q4_K
blk.47.ffn_down_exps.weight q4_K
blk.47.ffn_down_shexp.weight q4_K
blk.49.ffn_down_exps.weight q4_K
blk.49.ffn_down_shexp.weight q4_K
blk.51.ffn_down_exps.weight q4_K
blk.51.ffn_down_shexp.weight q4_K
[Q3_K_L] q3_K
output.weight q8_0
blk.1.ffn_down_exps.weight q5_K
blk.1.ffn_down_shexp.weight q5_K
blk.3.ffn_down_exps.weight q5_K
blk.3.ffn_down_shexp.weight q5_K
blk.5.attn_output.weight q5_K
blk.5.attn_v.weight q5_K
blk.6.ffn_down_exps.weight q5_K
blk.6.ffn_down_shexp.weight q5_K
blk.8.ffn_down_exps.weight q5_K
blk.8.ffn_down_shexp.weight q5_K
blk.10.ffn_down_exps.weight q5_K
blk.10.ffn_down_shexp.weight q5_K
blk.12.attn_output.weight q5_K
blk.12.attn_v.weight q5_K
blk.13.ffn_down_exps.weight q5_K
blk.13.ffn_down_shexp.weight q5_K
blk.15.ffn_down_exps.weight q5_K
blk.15.ffn_down_shexp.weight q5_K
blk.17.ffn_down_exps.weight q5_K
blk.17.ffn_down_shexp.weight q5_K
blk.19.attn_output.weight q5_K
blk.19.attn_v.weight q5_K
blk.20.ffn_down_exps.weight q5_K
blk.20.ffn_down_shexp.weight q5_K
blk.22.ffn_down_exps.weight q5_K
blk.22.ffn_down_shexp.weight q5_K
blk.24.ffn_down_exps.weight q5_K
blk.24.ffn_down_shexp.weight q5_K
blk.26.attn_output.weight q5_K
blk.26.attn_v.weight q5_K
blk.27.ffn_down_exps.weight q5_K
blk.27.ffn_down_shexp.weight q5_K
blk.29.ffn_down_exps.weight q5_K
blk.29.ffn_down_shexp.weight q5_K
blk.31.ffn_down_exps.weight q5_K
blk.31.ffn_down_shexp.weight q5_K
blk.33.attn_output.weight q5_K
blk.33.attn_v.weight q5_K
blk.34.ffn_down_exps.weight q5_K
blk.34.ffn_down_shexp.weight q5_K
blk.36.ffn_down_exps.weight q5_K
blk.36.ffn_down_shexp.weight q5_K
blk.38.ffn_down_exps.weight q5_K
blk.38.ffn_down_shexp.weight q5_K
blk.40.ffn_down_exps.weight q5_K
blk.40.ffn_down_shexp.weight q5_K
blk.42.attn_output.weight q5_K
blk.42.attn_v.weight q5_K
blk.43.ffn_down_exps.weight q5_K
blk.43.ffn_down_shexp.weight q5_K
blk.45.ffn_down_exps.weight q5_K
blk.45.ffn_down_shexp.weight q5_K
blk.47.ffn_down_exps.weight q5_K
blk.47.ffn_down_shexp.weight q5_K
blk.49.ffn_down_exps.weight q5_K
blk.49.ffn_down_shexp.weight q5_K
blk.51.ffn_down_exps.weight q5_K
blk.51.ffn_down_shexp.weight q5_K
[Q4_K_S] q4_K
output.weight q8_0
blk.1.ffn_down_exps.weight q5_K
blk.1.ffn_down_shexp.weight q5_K
blk.3.ffn_down_exps.weight q5_K
blk.3.ffn_down_shexp.weight q5_K
blk.5.attn_v.weight q5_K
blk.12.attn_v.weight q5_K
blk.19.attn_v.weight q5_K
blk.26.attn_v.weight q5_K
[Q4_K_M] q4_K
output.weight q8_0
blk.1.ffn_down_exps.weight q6_K
blk.1.ffn_down_shexp.weight q6_K
blk.3.ffn_down_exps.weight q6_K
blk.3.ffn_down_shexp.weight q6_K
blk.8.ffn_down_exps.weight q6_K
blk.8.ffn_down_shexp.weight q6_K
blk.17.ffn_down_exps.weight q6_K
blk.17.ffn_down_shexp.weight q6_K
blk.19.attn_v.weight q6_K
blk.20.ffn_down_exps.weight q6_K
blk.20.ffn_down_shexp.weight q6_K
blk.29.ffn_down_exps.weight q6_K
blk.29.ffn_down_shexp.weight q6_K
blk.38.ffn_down_exps.weight q6_K
blk.38.ffn_down_shexp.weight q6_K
blk.42.attn_v.weight q6_K
blk.45.ffn_down_exps.weight q6_K
blk.45.ffn_down_shexp.weight q6_K
blk.47.ffn_down_exps.weight q6_K
blk.47.ffn_down_shexp.weight q6_K
blk.49.ffn_down_exps.weight q6_K
blk.49.ffn_down_shexp.weight q6_K
blk.51.ffn_down_exps.weight q6_K
blk.51.ffn_down_shexp.weight q6_K
[Q5_K_S] q5_K
output.weight q8_0
[Q5_K_M] q5_K
output.weight q8_0
blk.1.ffn_down_exps.weight q6_K
blk.1.ffn_down_shexp.weight q6_K
blk.3.ffn_down_exps.weight q6_K
blk.3.ffn_down_shexp.weight q6_K
blk.8.ffn_down_exps.weight q6_K
blk.8.ffn_down_shexp.weight q6_K
blk.17.ffn_down_exps.weight q6_K
blk.17.ffn_down_shexp.weight q6_K
blk.19.attn_v.weight q6_K
blk.20.ffn_down_exps.weight q6_K
blk.20.ffn_down_shexp.weight q6_K
blk.29.ffn_down_exps.weight q6_K
blk.29.ffn_down_shexp.weight q6_K
blk.38.ffn_down_exps.weight q6_K
blk.38.ffn_down_shexp.weight q6_K
blk.42.attn_v.weight q6_K
blk.45.ffn_down_exps.weight q6_K
blk.45.ffn_down_shexp.weight q6_K
blk.47.ffn_down_exps.weight q6_K
blk.47.ffn_down_shexp.weight q6_K
blk.49.ffn_down_exps.weight q6_K
blk.49.ffn_down_shexp.weight q6_K
blk.51.ffn_down_exps.weight q6_K
blk.51.ffn_down_shexp.weight q6_K
[Q6_K] q6_K
output.weight q8_0
[IQ2_XXS] iq2_xxs
output.weight q8_0
token_embd.weight q2_K
blk.1.ffn_down_exps.weight q2_K
blk.1.ffn_down_shexp.weight q2_K
blk.3.ffn_down_exps.weight q2_K
blk.3.ffn_down_shexp.weight q2_K
blk.5.attn_v.weight q4_K
blk.6.ffn_down_exps.weight q2_K
blk.6.ffn_down_shexp.weight q2_K
blk.12.attn_v.weight q4_K
blk.19.attn_v.weight q4_K
blk.26.attn_v.weight q4_K
blk.33.attn_v.weight q4_K
blk.42.attn_v.weight q4_K
[IQ2_XS] iq2_xs
output.weight q8_0
token_embd.weight q2_K
blk.1.ffn_down_exps.weight q2_K
blk.1.ffn_down_shexp.weight q2_K
blk.3.ffn_down_exps.weight q2_K
blk.3.ffn_down_shexp.weight q2_K
blk.5.attn_v.weight q4_K
blk.6.ffn_down_exps.weight q2_K
blk.6.ffn_down_shexp.weight q2_K
blk.12.attn_v.weight q4_K
blk.19.attn_v.weight q4_K
blk.26.attn_v.weight q4_K
blk.33.attn_v.weight q4_K
blk.42.attn_v.weight q4_K
[Q2_K_S] q2_K
output.weight q8_0
blk.1.ffn_down_exps.weight q4_K
blk.1.ffn_down_shexp.weight q4_K
blk.3.ffn_down_exps.weight q4_K
blk.3.ffn_down_shexp.weight q4_K
[IQ3_XS] iq3_s
output.weight q8_0
blk.5.attn_k.weight iq3_xxs
blk.5.attn_q.weight iq3_xxs
blk.6.ffn_up_exps.weight iq3_xxs
blk.6.ffn_up_shexp.weight iq3_xxs
blk.8.ffn_up_exps.weight iq3_xxs
blk.8.ffn_up_shexp.weight iq3_xxs
blk.10.ffn_up_exps.weight iq3_xxs
blk.10.ffn_up_shexp.weight iq3_xxs
blk.12.attn_k.weight iq3_xxs
blk.12.attn_q.weight iq3_xxs
blk.13.ffn_up_exps.weight iq3_xxs
blk.13.ffn_up_shexp.weight iq3_xxs
blk.15.ffn_up_exps.weight iq3_xxs
blk.15.ffn_up_shexp.weight iq3_xxs
blk.17.ffn_up_exps.weight iq3_xxs
blk.17.ffn_up_shexp.weight iq3_xxs
blk.19.attn_k.weight iq3_xxs
blk.19.attn_q.weight iq3_xxs
blk.20.ffn_up_exps.weight iq3_xxs
blk.20.ffn_up_shexp.weight iq3_xxs
blk.22.ffn_up_exps.weight iq3_xxs
blk.22.ffn_up_shexp.weight iq3_xxs
blk.24.ffn_up_exps.weight iq3_xxs
blk.24.ffn_up_shexp.weight iq3_xxs
blk.26.attn_k.weight iq3_xxs
blk.26.attn_q.weight iq3_xxs
blk.27.ffn_up_exps.weight iq3_xxs
blk.27.ffn_up_shexp.weight iq3_xxs
blk.29.ffn_up_exps.weight iq3_xxs
blk.29.ffn_up_shexp.weight iq3_xxs
blk.31.ffn_up_exps.weight iq3_xxs
blk.31.ffn_up_shexp.weight iq3_xxs
blk.33.attn_k.weight iq3_xxs
blk.33.attn_q.weight iq3_xxs
blk.34.ffn_up_exps.weight iq3_xxs
blk.34.ffn_up_shexp.weight iq3_xxs
blk.36.ffn_up_exps.weight iq3_xxs
blk.36.ffn_up_shexp.weight iq3_xxs
blk.38.ffn_up_exps.weight iq3_xxs
blk.38.ffn_up_shexp.weight iq3_xxs
blk.40.ffn_up_exps.weight iq3_xxs
blk.40.ffn_up_shexp.weight iq3_xxs
blk.42.attn_k.weight iq3_xxs
blk.42.attn_q.weight iq3_xxs
blk.43.ffn_up_exps.weight iq3_xxs
blk.43.ffn_up_shexp.weight iq3_xxs
[IQ3_XXS] iq3_xxs
output.weight q8_0
token_embd.weight iq3_s
blk.1.ffn_down_exps.weight q4_K
blk.1.ffn_down_shexp.weight q4_K
blk.3.ffn_down_exps.weight q4_K
blk.3.ffn_down_shexp.weight q4_K
blk.5.attn_k.weight iq2_s
blk.5.attn_output.weight iq3_s
blk.5.attn_q.weight iq2_s
blk.5.attn_v.weight iq3_s
blk.6.ffn_down_exps.weight q3_K
blk.6.ffn_down_shexp.weight q3_K
blk.8.ffn_down_exps.weight q3_K
blk.8.ffn_down_shexp.weight q3_K
blk.10.ffn_down_exps.weight q3_K
blk.10.ffn_down_shexp.weight q3_K
blk.12.attn_k.weight iq2_s
blk.12.attn_output.weight iq3_s
blk.12.attn_q.weight iq2_s
blk.12.attn_v.weight iq3_s
blk.13.ffn_down_exps.weight q3_K
blk.13.ffn_down_shexp.weight q3_K
blk.15.ffn_down_exps.weight q3_K
blk.15.ffn_down_shexp.weight q3_K
blk.17.ffn_down_exps.weight q3_K
blk.17.ffn_down_shexp.weight q3_K
blk.19.attn_k.weight iq2_s
blk.19.attn_output.weight iq3_s
blk.19.attn_q.weight iq2_s
blk.19.attn_v.weight iq3_s
blk.20.ffn_down_exps.weight q3_K
blk.20.ffn_down_shexp.weight q3_K
blk.22.ffn_down_exps.weight q3_K
blk.22.ffn_down_shexp.weight q3_K
blk.24.ffn_down_exps.weight q3_K
blk.24.ffn_down_shexp.weight q3_K
blk.26.attn_k.weight iq2_s
blk.26.attn_output.weight iq3_s
blk.26.attn_q.weight iq2_s
blk.26.attn_v.weight iq3_s
blk.27.ffn_down_exps.weight q3_K
blk.27.ffn_down_shexp.weight q3_K
blk.29.ffn_down_exps.weight q3_K
blk.29.ffn_down_shexp.weight q3_K
blk.31.ffn_down_exps.weight q3_K
blk.31.ffn_down_shexp.weight q3_K
blk.33.attn_k.weight iq2_s
blk.33.attn_output.weight iq3_s
blk.33.attn_q.weight iq2_s
blk.33.attn_v.weight iq3_s
blk.34.ffn_down_exps.weight q3_K
blk.34.ffn_down_shexp.weight q3_K
blk.36.ffn_down_exps.weight q3_K
blk.36.ffn_down_shexp.weight q3_K
blk.38.ffn_down_exps.weight q3_K
blk.38.ffn_down_shexp.weight q3_K
blk.40.ffn_down_exps.weight q3_K
blk.40.ffn_down_shexp.weight q3_K
blk.42.attn_k.weight iq2_s
blk.42.attn_output.weight iq3_s
blk.42.attn_q.weight iq2_s
blk.42.attn_v.weight iq3_s
blk.43.ffn_down_exps.weight q3_K
blk.43.ffn_down_shexp.weight q3_K
blk.45.ffn_down_exps.weight q3_K
blk.45.ffn_down_shexp.weight q3_K
blk.47.ffn_down_exps.weight q3_K
blk.47.ffn_down_shexp.weight q3_K
blk.49.ffn_down_exps.weight q3_K
blk.49.ffn_down_shexp.weight q3_K
blk.51.ffn_down_exps.weight q3_K
blk.51.ffn_down_shexp.weight q3_K
[IQ1_S] iq1_s
output.weight q8_0
token_embd.weight q2_K
blk.1.ffn_down_exps.weight q2_K
blk.1.ffn_down_shexp.weight q2_K
blk.3.ffn_down_exps.weight q2_K
blk.3.ffn_down_shexp.weight q2_K
blk.5.attn_output.weight iq2_xxs
blk.5.attn_v.weight q4_K
blk.6.ffn_down_exps.weight q2_K
blk.6.ffn_down_shexp.weight q2_K
blk.12.attn_output.weight iq2_xxs
blk.12.attn_v.weight q4_K
blk.19.attn_output.weight iq2_xxs
blk.19.attn_v.weight q4_K
blk.26.attn_output.weight iq2_xxs
blk.26.attn_v.weight q4_K
blk.33.attn_output.weight iq2_xxs
blk.33.attn_v.weight q4_K
blk.42.attn_output.weight iq2_xxs
blk.42.attn_v.weight q4_K
[IQ4_NL] iq4_nl
output.weight q6_K
blk.1.ffn_down_exps.weight q5_K
blk.1.ffn_down_shexp.weight q5_K
blk.3.ffn_down_exps.weight q5_K
blk.3.ffn_down_shexp.weight q5_K
[IQ3_S] iq3_s
output.weight q8_0
[IQ3_M] iq3_s
output.weight q8_0
blk.1.ffn_down_exps.weight q4_K
blk.1.ffn_down_shexp.weight q4_K
blk.3.ffn_down_exps.weight q4_K
blk.3.ffn_down_shexp.weight q4_K
blk.5.attn_output.weight q4_K
blk.5.attn_v.weight q4_K
blk.12.attn_output.weight q4_K
blk.12.attn_v.weight q4_K
blk.19.attn_output.weight q4_K
blk.19.attn_v.weight q4_K
blk.26.attn_output.weight q4_K
blk.26.attn_v.weight q4_K
blk.33.attn_output.weight q4_K
blk.33.attn_v.weight q4_K
blk.42.attn_output.weight q4_K
blk.42.attn_v.weight q4_K
[IQ2_S] iq2_xs
output.weight q8_0
token_embd.weight iq3_s
blk.1.ffn_down_exps.weight iq3_s
blk.1.ffn_down_shexp.weight iq3_s
blk.3.ffn_down_exps.weight iq3_s
blk.3.ffn_down_shexp.weight iq3_s
blk.5.attn_output.weight iq3_s
blk.5.attn_v.weight q4_K
blk.6.ffn_down_exps.weight iq3_s
blk.6.ffn_down_shexp.weight iq3_s
blk.12.attn_output.weight iq3_s
blk.12.attn_v.weight q4_K
blk.19.attn_output.weight iq3_s
blk.19.attn_v.weight q4_K
blk.26.attn_output.weight iq3_s
blk.26.attn_v.weight q4_K
blk.33.attn_output.weight iq3_s
blk.33.attn_v.weight q4_K
blk.42.attn_output.weight iq3_s
blk.42.attn_v.weight q4_K
[IQ2_M] iq2_s
output.weight q8_0
token_embd.weight iq3_s
blk.1.ffn_down_exps.weight iq3_s
blk.1.ffn_down_shexp.weight iq3_s
blk.3.ffn_down_exps.weight iq3_s
blk.3.ffn_down_shexp.weight iq3_s
blk.5.attn_output.weight iq3_s
blk.5.attn_v.weight q4_K
blk.6.ffn_down_exps.weight iq3_s
blk.6.ffn_down_shexp.weight iq3_s
blk.12.attn_output.weight iq3_s
blk.12.attn_v.weight q4_K
blk.19.attn_output.weight iq3_s
blk.19.attn_v.weight q4_K
blk.26.attn_output.weight iq3_s
blk.26.attn_v.weight q4_K
blk.33.attn_output.weight iq3_s
blk.33.attn_v.weight q4_K
blk.42.attn_output.weight iq3_s
blk.42.attn_v.weight q4_K
[IQ4_XS] iq4_xs
output.weight q8_0
blk.1.ffn_down_exps.weight q5_K
blk.1.ffn_down_shexp.weight q5_K
blk.3.ffn_down_exps.weight q5_K
blk.3.ffn_down_shexp.weight q5_K
[IQ1_M] iq1_m
output.weight q8_0
token_embd.weight q2_K
blk.1.ffn_down_exps.weight q2_K
blk.1.ffn_down_shexp.weight q2_K
blk.3.ffn_down_exps.weight q2_K
blk.3.ffn_down_shexp.weight q2_K
blk.5.attn_output.weight iq2_xxs
blk.5.attn_v.weight q4_K
blk.6.ffn_down_exps.weight q2_K
blk.6.ffn_down_shexp.weight q2_K
blk.12.attn_output.weight iq2_xxs
blk.12.attn_v.weight q4_K
blk.19.attn_output.weight iq2_xxs
blk.19.attn_v.weight q4_K
blk.26.attn_output.weight iq2_xxs
blk.26.attn_v.weight q4_K
blk.33.attn_output.weight iq2_xxs
blk.33.attn_v.weight q4_K
blk.42.attn_output.weight iq2_xxs
blk.42.attn_v.weight q4_K
[BF16] bf16
output.weight q6_K
[TQ1_0] tq1_0
output.weight q8_0
token_embd.weight q4_K
[TQ2_0] tq2_0
output.weight q8_0
token_embd.weight q4_K
[MXFP4_MOE] mxfp4
output.weight q8_0
token_embd.weight q8_0
blk.0.ssm_in.weight q8_0
blk.0.ssm_out.weight q8_0
blk.1.ffn_down_shexp.weight q8_0
blk.1.ffn_up_shexp.weight q8_0
blk.2.ssm_in.weight q8_0
blk.2.ssm_out.weight q8_0
blk.3.ffn_down_shexp.weight q8_0
blk.3.ffn_up_shexp.weight q8_0
blk.4.ssm_in.weight q8_0
blk.4.ssm_out.weight q8_0
blk.5.attn_k.weight q8_0
blk.5.attn_output.weight q8_0
blk.5.attn_q.weight q8_0
blk.5.attn_v.weight q8_0
blk.6.ffn_down_shexp.weight q8_0
blk.6.ffn_up_shexp.weight q8_0
blk.7.ssm_in.weight q8_0
blk.7.ssm_out.weight q8_0
blk.8.ffn_down_shexp.weight q8_0
blk.8.ffn_up_shexp.weight q8_0
blk.9.ssm_in.weight q8_0
blk.9.ssm_out.weight q8_0
blk.10.ffn_down_shexp.weight q8_0
blk.10.ffn_up_shexp.weight q8_0
blk.11.ssm_in.weight q8_0
blk.11.ssm_out.weight q8_0
blk.12.attn_k.weight q8_0
blk.12.attn_output.weight q8_0
blk.12.attn_q.weight q8_0
blk.12.attn_v.weight q8_0
blk.13.ffn_down_shexp.weight q8_0
blk.13.ffn_up_shexp.weight q8_0
blk.14.ssm_in.weight q8_0
blk.14.ssm_out.weight q8_0
blk.15.ffn_down_shexp.weight q8_0
blk.15.ffn_up_shexp.weight q8_0
blk.16.ssm_in.weight q8_0
blk.16.ssm_out.weight q8_0
blk.17.ffn_down_shexp.weight q8_0
blk.17.ffn_up_shexp.weight q8_0
blk.18.ssm_in.weight q8_0
blk.18.ssm_out.weight q8_0
blk.19.attn_k.weight q8_0
blk.19.attn_output.weight q8_0
blk.19.attn_q.weight q8_0
blk.19.attn_v.weight q8_0
blk.20.ffn_down_shexp.weight q8_0
blk.20.ffn_up_shexp.weight q8_0
blk.21.ssm_in.weight q8_0
blk.21.ssm_out.weight q8_0
blk.22.ffn_down_shexp.weight q8_0
blk.22.ffn_up_shexp.weight q8_0
blk.23.ssm_in.weight q8_0
blk.23.ssm_out.weight q8_0
blk.24.ffn_down_shexp.weight q8_0
blk.24.ffn_up_shexp.weight q8_0
blk.25.ssm_in.weight q8_0
blk.25.ssm_out.weight q8_0
blk.26.attn_k.weight q8_0
blk.26.attn_output.weight q8_0
blk.26.attn_q.weight q8_0
blk.26.attn_v.weight q8_0
blk.27.ffn_down_shexp.weight q8_0
blk.27.ffn_up_shexp.weight q8_0
blk.28.ssm_in.weight q8_0
blk.28.ssm_out.weight q8_0
blk.29.ffn_down_shexp.weight q8_0
blk.29.ffn_up_shexp.weight q8_0
blk.30.ssm_in.weight q8_0
blk.30.ssm_out.weight q8_0
blk.31.ffn_down_shexp.weight q8_0
blk.31.ffn_up_shexp.weight q8_0
blk.32.ssm_in.weight q8_0
blk.32.ssm_out.weight q8_0
blk.33.attn_k.weight q8_0
blk.33.attn_output.weight q8_0
blk.33.attn_q.weight q8_0
blk.33.attn_v.weight q8_0
blk.34.ffn_down_shexp.weight q8_0
blk.34.ffn_up_shexp.weight q8_0
blk.35.ssm_in.weight q8_0
blk.35.ssm_out.weight q8_0
blk.36.ffn_down_shexp.weight q8_0
blk.36.ffn_up_shexp.weight q8_0
blk.37.ssm_in.weight q8_0
blk.37.ssm_out.weight q8_0
blk.38.ffn_down_shexp.weight q8_0
blk.38.ffn_up_shexp.weight q8_0
blk.39.ssm_in.weight q8_0
blk.39.ssm_out.weight q8_0
blk.40.ffn_down_shexp.weight q8_0
blk.40.ffn_up_shexp.weight q8_0
blk.41.ssm_in.weight q8_0
blk.41.ssm_out.weight q8_0
blk.42.attn_k.weight q8_0
blk.42.attn_output.weight q8_0
blk.42.attn_q.weight q8_0
blk.42.attn_v.weight q8_0
blk.43.ffn_down_shexp.weight q8_0
blk.43.ffn_up_shexp.weight q8_0
blk.44.ssm_in.weight q8_0
blk.44.ssm_out.weight q8_0
blk.45.ffn_down_shexp.weight q8_0
blk.45.ffn_up_shexp.weight q8_0
blk.46.ssm_in.weight q8_0
blk.46.ssm_out.weight q8_0
blk.47.ffn_down_shexp.weight q8_0
blk.47.ffn_up_shexp.weight q8_0
blk.48.ssm_in.weight q8_0
blk.48.ssm_out.weight q8_0
blk.49.ffn_down_shexp.weight q8_0
blk.49.ffn_up_shexp.weight q8_0
blk.50.ssm_in.weight q8_0
blk.50.ssm_out.weight q8_0
blk.51.ffn_down_shexp.weight q8_0
blk.51.ffn_up_shexp.weight q8_0

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -190,7 +190,7 @@ static const remote_model_spec model_specs[] = {
{ "ggml-org/gpt-oss-120b-GGUF", "mxfp4" },
{ "ggml-org/gemma-3-4b-it-GGUF", "Q8_0" },
{ "bartowski/Meta-Llama-3.1-70B-Instruct-GGUF", "Q4_K_M" },
{ "bartowski/deepseek-ai_DeepSeek-V3.1-GGUF", "IQ1_S" },
{ "bartowski/deepseek-ai_DeepSeek-V3.1-GGUF", "IQ1_M" },
{ "bartowski/Qwen_Qwen3.5-397B-A17B-GGUF", "IQ1_S" }, // TODO: swap with ggml-org if/when it's released
{ "bartowski/Qwen_Qwen3.5-27B-GGUF", "Q8_0" }, // TODO: swap with ggml-org if/when it's released
};