STYLE: make get_types_to_requant a function

This commit is contained in:
Yu, Zijun 2025-09-10 16:54:57 +08:00 committed by Mustafa Cavus
parent 2ad1147b9b
commit dc77cbb3f6
2 changed files with 20 additions and 15 deletions

View File

@ -132,21 +132,7 @@ enum ggml_status openvino_frontend_compute(ggml_backend_t backend, struct ggml_c
compile_end_time = conversion_end_time;
} else {
std::shared_ptr<ov::Model> model;
std::map<ggml_type, ExtraQuantType> types_to_requantize;
if (is_static) {
types_to_requantize = {
{GGML_TYPE_Q4_0, ExtraQuantType::Q4_0_128},
{GGML_TYPE_Q4_1, ExtraQuantType::Q4_0_128},
{GGML_TYPE_Q4_K, ExtraQuantType::Q4_0_128},
{GGML_TYPE_Q6_K, ExtraQuantType::Q8_1_C },
};
} else if (device == "GPU") {
types_to_requantize = {
// CVS-166739
{GGML_TYPE_Q6_K, ExtraQuantType::Q8_1_C},
};
}
auto model_weights = GgmlOvDecoder::create_weight_nodes(cgraph, types_to_requantize);
auto model_weights = GgmlOvDecoder::create_weight_nodes(cgraph, get_types_to_requant(device));
if (is_static) {
ggml_decoder = std::make_shared<GgmlOvDecoder>(cgraph, model_weights, is_static, true);
@ -275,6 +261,23 @@ ov::AnyMap get_npu_prefill_config() {
return config;
}
std::map<ggml_type, ExtraQuantType> get_types_to_requant(const std::string& device) {
if (device == "NPU") {
return {
{GGML_TYPE_Q4_0, ExtraQuantType::Q4_0_128},
{GGML_TYPE_Q4_1, ExtraQuantType::Q4_0_128},
{GGML_TYPE_Q4_K, ExtraQuantType::Q4_0_128},
{GGML_TYPE_Q6_K, ExtraQuantType::Q8_1_C },
};
}
if (device == "GPU") {
return {
// CVS-166739
{GGML_TYPE_Q6_K, ExtraQuantType::Q8_1_C},
};
}
}
ov::AnyMap get_npu_generate_config() {
ov::AnyMap config = get_npu_prefill_config();
config.emplace("NPUW_UNFOLD_IREQS", "YES");

View File

@ -43,6 +43,8 @@ bool is_prefill(struct ggml_cgraph * cgraph);
ov::AnyMap get_npu_prefill_config();
ov::AnyMap get_npu_generate_config();
std::map<ggml_type, ExtraQuantType> get_types_to_requant(const std::string& device);
ov::Tensor get_ov_input_tensor(std::shared_ptr<GgmlOvDecoder> ggml_decoder, const std::string& param_name);
bool is_naive(struct ggml_cgraph* cgraph);