Update attn_qkv schema, change throw behaviour

This commit is contained in:
Colin Kealty 2026-03-10 11:24:24 -04:00
parent 2015dea820
commit 544745c034
4 changed files with 1907 additions and 367 deletions

View File

@ -355,7 +355,7 @@ static ggml_type tensor_type_fallback(quantize_state_impl & qs, const ggml_tenso
return return_type;
}
ggml_type llama_tensor_get_type_impl(quantize_state_impl & qs, ggml_type new_type, const ggml_tensor * tensor, llama_ftype ftype, tensor_category category) {
static ggml_type llama_tensor_get_type_impl(quantize_state_impl & qs, ggml_type new_type, const ggml_tensor * tensor, llama_ftype ftype, tensor_category category) {
const std::string name = ggml_get_name(tensor);
// TODO: avoid hardcoded tensor names - use the TN_* constants
@ -782,7 +782,7 @@ ggml_type llama_ftype_get_default_type(llama_ftype ftype) {
case LLAMA_FTYPE_MOSTLY_IQ3_S:
case LLAMA_FTYPE_MOSTLY_IQ3_M: return GGML_TYPE_IQ3_S;
default: throw std::runtime_error(format("invalid output file type %d\n", ftype));
default: return GGML_TYPE_COUNT;
}
}
@ -874,6 +874,9 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
}
ggml_type default_type = llama_ftype_get_default_type(ftype);
if (default_type == GGML_TYPE_COUNT) {
throw std::runtime_error(format("invalid output file type %d\n", ftype));
}
// mmap consistently increases speed on Linux, and also increases speed on Windows with
// hot cache. It may cause a slowdown on macOS, possibly related to free memory.

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff