Support BF16 model

This commit is contained in:
Yu, Zijun 2025-09-11 14:34:17 +08:00 committed by Mustafa Cavus
parent dc77cbb3f6
commit bcc343af00
2 changed files with 9 additions and 2 deletions

View File

@ -419,8 +419,14 @@ std::map<std::string, std::shared_ptr<ov::Node>> GgmlOvDecoder::create_weight_no
std::shared_ptr<ov::Node> GgmlOvDecoder::create_weight_node(ggml_tensor* tensor,
std::optional<ExtraQuantType> requant_type) {
std::set<ggml_type> weight_types = {
GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_Q8_0, GGML_TYPE_Q4_0, GGML_TYPE_Q4_1, GGML_TYPE_Q4_K, GGML_TYPE_Q6_K};
std::set<ggml_type> weight_types = {GGML_TYPE_F32,
GGML_TYPE_F16,
GGML_TYPE_BF16,
GGML_TYPE_Q8_0,
GGML_TYPE_Q4_0,
GGML_TYPE_Q4_1,
GGML_TYPE_Q4_K,
GGML_TYPE_Q6_K};
if (weight_types.find(tensor->type) == weight_types.end()) {
throw std::runtime_error("Unexpected weight tensor type: " + std::string(tensor->name) + " with type " +
ggml_type_name(tensor->type));

View File

@ -276,6 +276,7 @@ std::map<ggml_type, ExtraQuantType> get_types_to_requant(const std::string& devi
{GGML_TYPE_Q6_K, ExtraQuantType::Q8_1_C},
};
}
return {};
}
ov::AnyMap get_npu_generate_config() {