From be07073e0e10b23a1d1825c59860e846fe7d2293 Mon Sep 17 00:00:00 2001 From: "Yu, Zijun" Date: Fri, 12 Sep 2025 16:51:46 +0800 Subject: [PATCH] Apply EliminateZP only for npu --- ggml/src/ggml-openvino/openvino/pass/fuse_to_sdpa.cpp | 1 + ggml/src/ggml-openvino/openvino/translate_session.cpp | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/ggml/src/ggml-openvino/openvino/pass/fuse_to_sdpa.cpp b/ggml/src/ggml-openvino/openvino/pass/fuse_to_sdpa.cpp index c36579910d..f38c0837d1 100644 --- a/ggml/src/ggml-openvino/openvino/pass/fuse_to_sdpa.cpp +++ b/ggml/src/ggml-openvino/openvino/pass/fuse_to_sdpa.cpp @@ -19,6 +19,7 @@ namespace ggml { namespace pass { FuseToSDPA::FuseToSDPA() { + // Not maintained since FLASH_ATTN_EXT has replaced this pattern const auto m_k = ov::pass::pattern::any_input(); const auto m_q = ov::pass::pattern::any_input(); const auto m_qk = ov::pass::pattern::wrap_type({m_q, m_k}); diff --git a/ggml/src/ggml-openvino/openvino/translate_session.cpp b/ggml/src/ggml-openvino/openvino/translate_session.cpp index 634fea40e9..3b8c30361a 100644 --- a/ggml/src/ggml-openvino/openvino/translate_session.cpp +++ b/ggml/src/ggml-openvino/openvino/translate_session.cpp @@ -27,7 +27,6 @@ #include "ggml-openvino/openvino/utils.hpp" #include "input_model.hpp" #include "pass/eliminate_zp.hpp" -#include "pass/fuse_to_sdpa.hpp" #include "pass/mark_decompression_convert_constant_folding.hpp" namespace ov { @@ -220,8 +219,9 @@ std::shared_ptr TranslateSession::apply_transformations(std::shared_ptr(kv_param_res_pairs); } - manager.register_pass(); - manager.register_pass(); + if (ggml_model_decoder->is_static()) { + manager.register_pass(); + } manager.run_passes(model); } return model;