Apply EliminateZP only for npu
This commit is contained in:
parent
da2cc993bc
commit
be07073e0e
|
|
@ -19,6 +19,7 @@ namespace ggml {
|
|||
namespace pass {
|
||||
|
||||
FuseToSDPA::FuseToSDPA() {
|
||||
// Not maintained since FLASH_ATTN_EXT has replaced this pattern
|
||||
const auto m_k = ov::pass::pattern::any_input();
|
||||
const auto m_q = ov::pass::pattern::any_input();
|
||||
const auto m_qk = ov::pass::pattern::wrap_type<ov::op::v0::MatMul>({m_q, m_k});
|
||||
|
|
|
|||
|
|
@ -27,7 +27,6 @@
|
|||
#include "ggml-openvino/openvino/utils.hpp"
|
||||
#include "input_model.hpp"
|
||||
#include "pass/eliminate_zp.hpp"
|
||||
#include "pass/fuse_to_sdpa.hpp"
|
||||
#include "pass/mark_decompression_convert_constant_folding.hpp"
|
||||
|
||||
namespace ov {
|
||||
|
|
@ -220,8 +219,9 @@ std::shared_ptr<Model> TranslateSession::apply_transformations(std::shared_ptr<M
|
|||
manager.register_pass<ov::pass::MakeStateful>(kv_param_res_pairs);
|
||||
}
|
||||
|
||||
manager.register_pass<pass::EliminateZeroPoints>();
|
||||
manager.register_pass<pass::FuseToSDPA>();
|
||||
if (ggml_model_decoder->is_static()) {
|
||||
manager.register_pass<pass::EliminateZeroPoints>();
|
||||
}
|
||||
manager.run_passes(model);
|
||||
}
|
||||
return model;
|
||||
|
|
|
|||
Loading…
Reference in New Issue