From cdf5370cb5e6d5c86628e8bcd862f78d4b8771ff Mon Sep 17 00:00:00 2001 From: "Yu, Zijun" Date: Tue, 13 May 2025 08:42:54 +0800 Subject: [PATCH] PERF: favor low precision matmul --- .../ggml-openvino/openvino/node_context.hpp | 2 +- ggml/src/ggml-openvino/openvino/op/mulmat.cpp | 35 ++++++++++--------- .../ggml-openvino/openvino/op/soft_max.cpp | 4 +-- 3 files changed, 21 insertions(+), 20 deletions(-) diff --git a/ggml/src/ggml-openvino/openvino/node_context.hpp b/ggml/src/ggml-openvino/openvino/node_context.hpp index bac135270d..e934e2ac36 100644 --- a/ggml/src/ggml-openvino/openvino/node_context.hpp +++ b/ggml/src/ggml-openvino/openvino/node_context.hpp @@ -33,7 +33,7 @@ public: return m_decoder->get_input_size(); } - Any get_input_type(size_t index) const { + ov::element::Type get_input_type(size_t index) const { return m_decoder->get_input_type(m_input_names[index]); } diff --git a/ggml/src/ggml-openvino/openvino/op/mulmat.cpp b/ggml/src/ggml-openvino/openvino/op/mulmat.cpp index e00435ef81..3e9c5c5083 100644 --- a/ggml/src/ggml-openvino/openvino/op/mulmat.cpp +++ b/ggml/src/ggml-openvino/openvino/op/mulmat.cpp @@ -1,19 +1,18 @@ -#include #include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include "../node_context.hpp" #include "../utils.hpp" -#include "openvino/core/node.hpp" -#include "openvino/core/node_output.hpp" -#include "openvino/op/concat.hpp" -#include "openvino/op/constant.hpp" -#include "openvino/op/convert_like.hpp" -#include "openvino/op/matmul.hpp" -#include "openvino/op/reshape.hpp" -#include "openvino/op/slice.hpp" -#include "openvino/op/transpose.hpp" namespace ov { namespace frontend { @@ -25,9 +24,10 @@ OutputVector translate_mulmat(const NodeContext& context) { bool continuous = context.check_if_continuous(); if (continuous) { - auto src1 = context.get_input(1); - auto src0_converted = std::make_shared(context.get_input(0), src1); - auto result = std::make_shared(src1, src0_converted, false, true); + auto src0 = context.get_input(0); + auto src1 = std::make_shared(context.get_input(1), context.get_input_type(0)); + auto result_lp = std::make_shared(src1, src0, false, true); + auto result = std::make_shared(result_lp, context.get_output_type(0)); return {result}; } else { /* @@ -94,8 +94,7 @@ OutputVector translate_mulmat(const NodeContext& context) { B = src0_slice; } - A = context.get_input(1); - B = std::make_shared(B, A); + A = std::make_shared(context.get_input(1), context.get_input_type(0)); int64_t num_heads = context.get_input_shape(1).to_shape()[0]; int64_t num_heads_kv = src0_shape[0]; @@ -116,10 +115,12 @@ OutputVector translate_mulmat(const NodeContext& context) { B = std::make_shared(B, new_B_shape, false); } - auto result = std::make_shared(A, B, false, true); + auto result_lp = std::make_shared(A, B, false, true); + auto result = std::make_shared(result_lp, context.get_output_type(0)); + return {result}; } -}; +} } // namespace op } // namespace ggml diff --git a/ggml/src/ggml-openvino/openvino/op/soft_max.cpp b/ggml/src/ggml-openvino/openvino/op/soft_max.cpp index 27c7cefef0..cdb59f47d9 100644 --- a/ggml/src/ggml-openvino/openvino/op/soft_max.cpp +++ b/ggml/src/ggml-openvino/openvino/op/soft_max.cpp @@ -49,7 +49,7 @@ OutputVector translate_soft_max(const NodeContext& context) { if (context.get_input_size() == 2) { // Calculate mask then softmax auto mask_node = context.get_input(1); - ov::element::Type mask_type = (context.get_input_type(1)).as(); + ov::element::Type mask_type = context.get_input_type(1); if (mask_type == ov::element::f16) { // Convert f16 to f32 mask_node = std::make_shared(mask_node, ov::element::f32); @@ -80,7 +80,7 @@ OutputVector translate_soft_max(const NodeContext& context) { auto res = std::make_shared(input_node, 0); return {res}; } -}; +} } // namespace op } // namespace ggml