STYLE: make get_types_to_requant a function
This commit is contained in:
parent
2ad1147b9b
commit
dc77cbb3f6
|
|
@ -132,21 +132,7 @@ enum ggml_status openvino_frontend_compute(ggml_backend_t backend, struct ggml_c
|
||||||
compile_end_time = conversion_end_time;
|
compile_end_time = conversion_end_time;
|
||||||
} else {
|
} else {
|
||||||
std::shared_ptr<ov::Model> model;
|
std::shared_ptr<ov::Model> model;
|
||||||
std::map<ggml_type, ExtraQuantType> types_to_requantize;
|
auto model_weights = GgmlOvDecoder::create_weight_nodes(cgraph, get_types_to_requant(device));
|
||||||
if (is_static) {
|
|
||||||
types_to_requantize = {
|
|
||||||
{GGML_TYPE_Q4_0, ExtraQuantType::Q4_0_128},
|
|
||||||
{GGML_TYPE_Q4_1, ExtraQuantType::Q4_0_128},
|
|
||||||
{GGML_TYPE_Q4_K, ExtraQuantType::Q4_0_128},
|
|
||||||
{GGML_TYPE_Q6_K, ExtraQuantType::Q8_1_C },
|
|
||||||
};
|
|
||||||
} else if (device == "GPU") {
|
|
||||||
types_to_requantize = {
|
|
||||||
// CVS-166739
|
|
||||||
{GGML_TYPE_Q6_K, ExtraQuantType::Q8_1_C},
|
|
||||||
};
|
|
||||||
}
|
|
||||||
auto model_weights = GgmlOvDecoder::create_weight_nodes(cgraph, types_to_requantize);
|
|
||||||
|
|
||||||
if (is_static) {
|
if (is_static) {
|
||||||
ggml_decoder = std::make_shared<GgmlOvDecoder>(cgraph, model_weights, is_static, true);
|
ggml_decoder = std::make_shared<GgmlOvDecoder>(cgraph, model_weights, is_static, true);
|
||||||
|
|
@ -275,6 +261,23 @@ ov::AnyMap get_npu_prefill_config() {
|
||||||
return config;
|
return config;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::map<ggml_type, ExtraQuantType> get_types_to_requant(const std::string& device) {
|
||||||
|
if (device == "NPU") {
|
||||||
|
return {
|
||||||
|
{GGML_TYPE_Q4_0, ExtraQuantType::Q4_0_128},
|
||||||
|
{GGML_TYPE_Q4_1, ExtraQuantType::Q4_0_128},
|
||||||
|
{GGML_TYPE_Q4_K, ExtraQuantType::Q4_0_128},
|
||||||
|
{GGML_TYPE_Q6_K, ExtraQuantType::Q8_1_C },
|
||||||
|
};
|
||||||
|
}
|
||||||
|
if (device == "GPU") {
|
||||||
|
return {
|
||||||
|
// CVS-166739
|
||||||
|
{GGML_TYPE_Q6_K, ExtraQuantType::Q8_1_C},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
ov::AnyMap get_npu_generate_config() {
|
ov::AnyMap get_npu_generate_config() {
|
||||||
ov::AnyMap config = get_npu_prefill_config();
|
ov::AnyMap config = get_npu_prefill_config();
|
||||||
config.emplace("NPUW_UNFOLD_IREQS", "YES");
|
config.emplace("NPUW_UNFOLD_IREQS", "YES");
|
||||||
|
|
|
||||||
|
|
@ -43,6 +43,8 @@ bool is_prefill(struct ggml_cgraph * cgraph);
|
||||||
ov::AnyMap get_npu_prefill_config();
|
ov::AnyMap get_npu_prefill_config();
|
||||||
ov::AnyMap get_npu_generate_config();
|
ov::AnyMap get_npu_generate_config();
|
||||||
|
|
||||||
|
std::map<ggml_type, ExtraQuantType> get_types_to_requant(const std::string& device);
|
||||||
|
|
||||||
ov::Tensor get_ov_input_tensor(std::shared_ptr<GgmlOvDecoder> ggml_decoder, const std::string& param_name);
|
ov::Tensor get_ov_input_tensor(std::shared_ptr<GgmlOvDecoder> ggml_decoder, const std::string& param_name);
|
||||||
|
|
||||||
bool is_naive(struct ggml_cgraph* cgraph);
|
bool is_naive(struct ggml_cgraph* cgraph);
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue