context : refactor model_supports_compute_type to lambda

keep
//
// llama_context
//
look nice.
This commit is contained in:
shaobo.xie 2026-02-12 14:06:38 +08:00
parent d38923a1ec
commit 233f5ab82d
1 changed files with 26 additions and 34 deletions

View File

@ -14,32 +14,6 @@
#include <limits>
#include <stdexcept>
static bool model_supports_compute_type(enum llm_arch arch, ggml_type compute_type) {
// F32 is always supported - it's the default/safe precision
if (compute_type == GGML_TYPE_F32) {
return true;
}
// Nowadays FP16 and BF16 support is model-specific.
// Add models here as their required ops are 'compute_type' implemented and validated.
// Example (uncomment when ready):
// if (compute_type == GGML_TYPE_F16 || compute_type == GGML_TYPE_BF16) {
// switch (arch) {
// case LLM_ARCH_QWEN2:
// case LLM_ARCH_QWEN2MOE:
// case LLM_ARCH_QWEN3:
// // ... other validated models
// return true;
// default:
// return false;
// }
// }
// No models enabled yet for non-F32 compute types
(void)arch;
return false;
}
//
// llama_context
//
@ -192,15 +166,33 @@ llama_context::llama_context(
break;
}
// check if the model supports the requested compute type
if (cparams.compute_type != GGML_TYPE_F32) {
if (!model_supports_compute_type(model.arch, cparams.compute_type)) {
LLAMA_LOG_WARN("%s: model arch '%s' does not yet support compute_type %s, "
"falling back to F32. To enable, the required ops must be implemented first.\n",
__func__, llm_arch_name(model.arch),
ggml_type_name(cparams.compute_type));
cparams.compute_type = GGML_TYPE_F32;
// Nowadays FP16 and BF16 support is model-specific.
// Add models here as their required ops are 'compute_type' implemented and validated.
auto model_supports_compute_type = [&](ggml_type ct) -> bool {
if (ct == GGML_TYPE_F32) {
return true; // F32 is always supported
}
// Example (uncomment when ready):
// if (ct == GGML_TYPE_F16 || ct == GGML_TYPE_BF16) {
// switch (model.arch) {
// case LLM_ARCH_QWEN2:
// case LLM_ARCH_QWEN2MOE:
// case LLM_ARCH_QWEN3:
// return true;
// default:
// return false;
// }
// }
(void)model.arch; // no models enabled yet for non-F32 compute types
return false;
};
if (!model_supports_compute_type(cparams.compute_type)) {
LLAMA_LOG_WARN("%s: model arch '%s' does not yet support compute_type %s, "
"falling back to F32. To enable, the required ops must be implemented first.\n",
__func__, llm_arch_name(model.arch),
ggml_type_name(cparams.compute_type));
cparams.compute_type = GGML_TYPE_F32;
}
// with causal attention, the batch size is limited by the context size