Support BF16 model
This commit is contained in:
parent
dc77cbb3f6
commit
bcc343af00
|
|
@ -419,8 +419,14 @@ std::map<std::string, std::shared_ptr<ov::Node>> GgmlOvDecoder::create_weight_no
|
|||
|
||||
std::shared_ptr<ov::Node> GgmlOvDecoder::create_weight_node(ggml_tensor* tensor,
|
||||
std::optional<ExtraQuantType> requant_type) {
|
||||
std::set<ggml_type> weight_types = {
|
||||
GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_Q8_0, GGML_TYPE_Q4_0, GGML_TYPE_Q4_1, GGML_TYPE_Q4_K, GGML_TYPE_Q6_K};
|
||||
std::set<ggml_type> weight_types = {GGML_TYPE_F32,
|
||||
GGML_TYPE_F16,
|
||||
GGML_TYPE_BF16,
|
||||
GGML_TYPE_Q8_0,
|
||||
GGML_TYPE_Q4_0,
|
||||
GGML_TYPE_Q4_1,
|
||||
GGML_TYPE_Q4_K,
|
||||
GGML_TYPE_Q6_K};
|
||||
if (weight_types.find(tensor->type) == weight_types.end()) {
|
||||
throw std::runtime_error("Unexpected weight tensor type: " + std::string(tensor->name) + " with type " +
|
||||
ggml_type_name(tensor->type));
|
||||
|
|
|
|||
|
|
@ -276,6 +276,7 @@ std::map<ggml_type, ExtraQuantType> get_types_to_requant(const std::string& devi
|
|||
{GGML_TYPE_Q6_K, ExtraQuantType::Q8_1_C},
|
||||
};
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
ov::AnyMap get_npu_generate_config() {
|
||||
|
|
|
|||
Loading…
Reference in New Issue