From dc77cbb3f68a951df3b4cbdc5d38090d65dd2aaf Mon Sep 17 00:00:00 2001 From: "Yu, Zijun" Date: Wed, 10 Sep 2025 16:54:57 +0800 Subject: [PATCH] STYLE: make get_types_to_requant a function --- ggml/src/ggml-openvino/utils.cpp | 33 +++++++++++++++++--------------- ggml/src/ggml-openvino/utils.h | 2 ++ 2 files changed, 20 insertions(+), 15 deletions(-) diff --git a/ggml/src/ggml-openvino/utils.cpp b/ggml/src/ggml-openvino/utils.cpp index 588404df19..2438f2dd11 100644 --- a/ggml/src/ggml-openvino/utils.cpp +++ b/ggml/src/ggml-openvino/utils.cpp @@ -132,21 +132,7 @@ enum ggml_status openvino_frontend_compute(ggml_backend_t backend, struct ggml_c compile_end_time = conversion_end_time; } else { std::shared_ptr model; - std::map types_to_requantize; - if (is_static) { - types_to_requantize = { - {GGML_TYPE_Q4_0, ExtraQuantType::Q4_0_128}, - {GGML_TYPE_Q4_1, ExtraQuantType::Q4_0_128}, - {GGML_TYPE_Q4_K, ExtraQuantType::Q4_0_128}, - {GGML_TYPE_Q6_K, ExtraQuantType::Q8_1_C }, - }; - } else if (device == "GPU") { - types_to_requantize = { - // CVS-166739 - {GGML_TYPE_Q6_K, ExtraQuantType::Q8_1_C}, - }; - } - auto model_weights = GgmlOvDecoder::create_weight_nodes(cgraph, types_to_requantize); + auto model_weights = GgmlOvDecoder::create_weight_nodes(cgraph, get_types_to_requant(device)); if (is_static) { ggml_decoder = std::make_shared(cgraph, model_weights, is_static, true); @@ -275,6 +261,23 @@ ov::AnyMap get_npu_prefill_config() { return config; } +std::map get_types_to_requant(const std::string& device) { + if (device == "NPU") { + return { + {GGML_TYPE_Q4_0, ExtraQuantType::Q4_0_128}, + {GGML_TYPE_Q4_1, ExtraQuantType::Q4_0_128}, + {GGML_TYPE_Q4_K, ExtraQuantType::Q4_0_128}, + {GGML_TYPE_Q6_K, ExtraQuantType::Q8_1_C }, + }; + } + if (device == "GPU") { + return { + // CVS-166739 + {GGML_TYPE_Q6_K, ExtraQuantType::Q8_1_C}, + }; + } +} + ov::AnyMap get_npu_generate_config() { ov::AnyMap config = get_npu_prefill_config(); config.emplace("NPUW_UNFOLD_IREQS", "YES"); diff --git a/ggml/src/ggml-openvino/utils.h b/ggml/src/ggml-openvino/utils.h index f377fe9d27..42686c593b 100644 --- a/ggml/src/ggml-openvino/utils.h +++ b/ggml/src/ggml-openvino/utils.h @@ -43,6 +43,8 @@ bool is_prefill(struct ggml_cgraph * cgraph); ov::AnyMap get_npu_prefill_config(); ov::AnyMap get_npu_generate_config(); +std::map get_types_to_requant(const std::string& device); + ov::Tensor get_ov_input_tensor(std::shared_ptr ggml_decoder, const std::string& param_name); bool is_naive(struct ggml_cgraph* cgraph);