STYLE: make get_types_to_requant a function
This commit is contained in:
parent
2ad1147b9b
commit
dc77cbb3f6
|
|
@ -132,21 +132,7 @@ enum ggml_status openvino_frontend_compute(ggml_backend_t backend, struct ggml_c
|
|||
compile_end_time = conversion_end_time;
|
||||
} else {
|
||||
std::shared_ptr<ov::Model> model;
|
||||
std::map<ggml_type, ExtraQuantType> types_to_requantize;
|
||||
if (is_static) {
|
||||
types_to_requantize = {
|
||||
{GGML_TYPE_Q4_0, ExtraQuantType::Q4_0_128},
|
||||
{GGML_TYPE_Q4_1, ExtraQuantType::Q4_0_128},
|
||||
{GGML_TYPE_Q4_K, ExtraQuantType::Q4_0_128},
|
||||
{GGML_TYPE_Q6_K, ExtraQuantType::Q8_1_C },
|
||||
};
|
||||
} else if (device == "GPU") {
|
||||
types_to_requantize = {
|
||||
// CVS-166739
|
||||
{GGML_TYPE_Q6_K, ExtraQuantType::Q8_1_C},
|
||||
};
|
||||
}
|
||||
auto model_weights = GgmlOvDecoder::create_weight_nodes(cgraph, types_to_requantize);
|
||||
auto model_weights = GgmlOvDecoder::create_weight_nodes(cgraph, get_types_to_requant(device));
|
||||
|
||||
if (is_static) {
|
||||
ggml_decoder = std::make_shared<GgmlOvDecoder>(cgraph, model_weights, is_static, true);
|
||||
|
|
@ -275,6 +261,23 @@ ov::AnyMap get_npu_prefill_config() {
|
|||
return config;
|
||||
}
|
||||
|
||||
std::map<ggml_type, ExtraQuantType> get_types_to_requant(const std::string& device) {
|
||||
if (device == "NPU") {
|
||||
return {
|
||||
{GGML_TYPE_Q4_0, ExtraQuantType::Q4_0_128},
|
||||
{GGML_TYPE_Q4_1, ExtraQuantType::Q4_0_128},
|
||||
{GGML_TYPE_Q4_K, ExtraQuantType::Q4_0_128},
|
||||
{GGML_TYPE_Q6_K, ExtraQuantType::Q8_1_C },
|
||||
};
|
||||
}
|
||||
if (device == "GPU") {
|
||||
return {
|
||||
// CVS-166739
|
||||
{GGML_TYPE_Q6_K, ExtraQuantType::Q8_1_C},
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
ov::AnyMap get_npu_generate_config() {
|
||||
ov::AnyMap config = get_npu_prefill_config();
|
||||
config.emplace("NPUW_UNFOLD_IREQS", "YES");
|
||||
|
|
|
|||
|
|
@ -43,6 +43,8 @@ bool is_prefill(struct ggml_cgraph * cgraph);
|
|||
ov::AnyMap get_npu_prefill_config();
|
||||
ov::AnyMap get_npu_generate_config();
|
||||
|
||||
std::map<ggml_type, ExtraQuantType> get_types_to_requant(const std::string& device);
|
||||
|
||||
ov::Tensor get_ov_input_tensor(std::shared_ptr<GgmlOvDecoder> ggml_decoder, const std::string& param_name);
|
||||
|
||||
bool is_naive(struct ggml_cgraph* cgraph);
|
||||
|
|
|
|||
Loading…
Reference in New Issue