Apply EliminateZP only for npu

This commit is contained in:
Yu, Zijun 2025-09-12 16:51:46 +08:00 committed by Mustafa Cavus
parent da2cc993bc
commit be07073e0e
2 changed files with 4 additions and 3 deletions

View File

@ -19,6 +19,7 @@ namespace ggml {
namespace pass {
FuseToSDPA::FuseToSDPA() {
// Not maintained since FLASH_ATTN_EXT has replaced this pattern
const auto m_k = ov::pass::pattern::any_input();
const auto m_q = ov::pass::pattern::any_input();
const auto m_qk = ov::pass::pattern::wrap_type<ov::op::v0::MatMul>({m_q, m_k});

View File

@ -27,7 +27,6 @@
#include "ggml-openvino/openvino/utils.hpp"
#include "input_model.hpp"
#include "pass/eliminate_zp.hpp"
#include "pass/fuse_to_sdpa.hpp"
#include "pass/mark_decompression_convert_constant_folding.hpp"
namespace ov {
@ -220,8 +219,9 @@ std::shared_ptr<Model> TranslateSession::apply_transformations(std::shared_ptr<M
manager.register_pass<ov::pass::MakeStateful>(kv_param_res_pairs);
}
manager.register_pass<pass::EliminateZeroPoints>();
manager.register_pass<pass::FuseToSDPA>();
if (ggml_model_decoder->is_static()) {
manager.register_pass<pass::EliminateZeroPoints>();
}
manager.run_passes(model);
}
return model;