Support BF16 model
This commit is contained in:
parent
dc77cbb3f6
commit
bcc343af00
|
|
@ -419,8 +419,14 @@ std::map<std::string, std::shared_ptr<ov::Node>> GgmlOvDecoder::create_weight_no
|
||||||
|
|
||||||
std::shared_ptr<ov::Node> GgmlOvDecoder::create_weight_node(ggml_tensor* tensor,
|
std::shared_ptr<ov::Node> GgmlOvDecoder::create_weight_node(ggml_tensor* tensor,
|
||||||
std::optional<ExtraQuantType> requant_type) {
|
std::optional<ExtraQuantType> requant_type) {
|
||||||
std::set<ggml_type> weight_types = {
|
std::set<ggml_type> weight_types = {GGML_TYPE_F32,
|
||||||
GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_Q8_0, GGML_TYPE_Q4_0, GGML_TYPE_Q4_1, GGML_TYPE_Q4_K, GGML_TYPE_Q6_K};
|
GGML_TYPE_F16,
|
||||||
|
GGML_TYPE_BF16,
|
||||||
|
GGML_TYPE_Q8_0,
|
||||||
|
GGML_TYPE_Q4_0,
|
||||||
|
GGML_TYPE_Q4_1,
|
||||||
|
GGML_TYPE_Q4_K,
|
||||||
|
GGML_TYPE_Q6_K};
|
||||||
if (weight_types.find(tensor->type) == weight_types.end()) {
|
if (weight_types.find(tensor->type) == weight_types.end()) {
|
||||||
throw std::runtime_error("Unexpected weight tensor type: " + std::string(tensor->name) + " with type " +
|
throw std::runtime_error("Unexpected weight tensor type: " + std::string(tensor->name) + " with type " +
|
||||||
ggml_type_name(tensor->type));
|
ggml_type_name(tensor->type));
|
||||||
|
|
|
||||||
|
|
@ -276,6 +276,7 @@ std::map<ggml_type, ExtraQuantType> get_types_to_requant(const std::string& devi
|
||||||
{GGML_TYPE_Q6_K, ExtraQuantType::Q8_1_C},
|
{GGML_TYPE_Q6_K, ExtraQuantType::Q8_1_C},
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
ov::AnyMap get_npu_generate_config() {
|
ov::AnyMap get_npu_generate_config() {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue