diff --git a/ggml/src/ggml-openvino/openvino/op/glu_swiglu.cpp b/ggml/src/ggml-openvino/openvino/op/glu_swiglu.cpp index 28013fbaa0..138ef65090 100644 --- a/ggml/src/ggml-openvino/openvino/op/glu_swiglu.cpp +++ b/ggml/src/ggml-openvino/openvino/op/glu_swiglu.cpp @@ -1,6 +1,11 @@ +#include +#include #include +#include #include #include +#include +#include #include "../node_context.hpp" #include "../op_table.hpp" @@ -12,13 +17,23 @@ namespace ggml { namespace op { OutputVector translate_glu_swiglu(const NodeContext& context) { - num_inputs_check(context, 2, 2); + num_inputs_check(context, 1, 2); - auto src1 = context.get_input(0); - auto src2 = context.get_input(1); - auto sigmoid = std::make_shared(src1); - auto silu = std::make_shared(src1, sigmoid); - auto res = std::make_shared(silu, src2); + ov::Output src0; + ov::Output src1; + if (context.get_input_size() == 2) { + src0 = context.get_input(0); + src1 = context.get_input(1); + } else { + auto combined = context.get_input(0); + auto split_axis = ov::op::v0::Constant::create(ov::element::i64, {}, {2}); + auto split = std::make_shared(combined, split_axis, 2); + src0 = split->output(0); + src1 = split->output(1); + } + auto sigmoid = std::make_shared(src0); + auto silu = std::make_shared(src0, sigmoid); + auto res = std::make_shared(silu, src1); return rename_outputs_with_suffix({res}, context.get_name()); } diff --git a/ggml/src/ggml-openvino/openvino/op/soft_max.cpp b/ggml/src/ggml-openvino/openvino/op/soft_max.cpp index 81d43c37fe..d59f4499a3 100644 --- a/ggml/src/ggml-openvino/openvino/op/soft_max.cpp +++ b/ggml/src/ggml-openvino/openvino/op/soft_max.cpp @@ -43,12 +43,8 @@ OutputVector translate_soft_max(const NodeContext& context) { const float slope = (max_bias > 0.0f) ? h < n_head_log2 ? powf(m0, h + 1) : powf(m1, 2 * (h - n_head_log2) + 1) : 1.0f; - std::shared_ptr scaled_input; - if (scale != 1.0f) { - auto scale_node = - std::make_shared(ov::element::f32, ov::Shape{}, std::vector{scale}); - scaled_input = std::make_shared(input_node, scale_node); - } + auto scale_node = std::make_shared(ov::element::f32, ov::Shape{}, std::vector{scale}); + auto scaled_input = std::make_shared(input_node, scale_node); auto mask_node = context.get_input(1);