From 05d7abae8cf0c3cae615c93feb9ce78e67b7a967 Mon Sep 17 00:00:00 2001 From: cavusmustafa Date: Thu, 2 Oct 2025 11:24:40 -0700 Subject: [PATCH] Fix for Phi3 --- .../ggml-openvino/openvino/op/flash_attn_ext.cpp | 8 ++++---- ggml/src/ggml-openvino/openvino/op/permute.cpp | 12 ++++++++++-- ggml/src/ggml-openvino/openvino/op/set_rows.cpp | 16 +++++++--------- ggml/src/ggml-openvino/openvino/op/softmax.cpp | 8 ++++---- .../ggml-openvino/openvino/translate_session.cpp | 8 ++++---- 5 files changed, 29 insertions(+), 23 deletions(-) diff --git a/ggml/src/ggml-openvino/openvino/op/flash_attn_ext.cpp b/ggml/src/ggml-openvino/openvino/op/flash_attn_ext.cpp index 36d0f8844a..ec9bb0aac5 100644 --- a/ggml/src/ggml-openvino/openvino/op/flash_attn_ext.cpp +++ b/ggml/src/ggml-openvino/openvino/op/flash_attn_ext.cpp @@ -47,10 +47,10 @@ OutputVector translate_flash_attn_ext(const NodeContext& context) { auto zero_1d = ov::op::v0::Constant::create(ov::element::i64, {1}, {0}); auto two_1d = ov::op::v0::Constant::create(ov::element::i64, {1}, {2}); auto axes = ov::op::v0::Constant::create(ov::element::i64, {2}, {1,2}); - auto leaf_8 = context.get_input("leaf_8"); - auto shape_of_leaf_8 = std::make_shared(leaf_8); - auto gather_leaf_8 = std::make_shared(shape_of_leaf_8, two_1d, zero_1d); - auto stop = std::make_shared(ov::OutputVector{token_len, gather_leaf_8}, 0); + auto inp_pos = context.get_input("inp_pos"); + auto shape_of_inp_pos = std::make_shared(inp_pos); + auto gather_inp_pos = std::make_shared(shape_of_inp_pos, two_1d, zero_1d); + auto stop = std::make_shared(ov::OutputVector{token_len, gather_inp_pos}, 0); mask_sliced = std::make_shared(mask, zero_2d, stop, one_2d, axes); mask_sliced = std::make_shared(mask_sliced, zero_1d); diff --git a/ggml/src/ggml-openvino/openvino/op/permute.cpp b/ggml/src/ggml-openvino/openvino/op/permute.cpp index cd0d073ab3..ea5e417965 100644 --- a/ggml/src/ggml-openvino/openvino/op/permute.cpp +++ b/ggml/src/ggml-openvino/openvino/op/permute.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include "../node_context.hpp" #include "../op_table.hpp" @@ -23,13 +24,18 @@ OutputVector translate_permute(const NodeContext& context) { int op_case = context.get_op_case(); FRONT_END_CHECK_IMPLEMENTED(op_case == 1 || op_case == 2 || op_case == 3, "Unsupported PERMUTE case"); ov::Output res; + auto zero = ov::op::v0::Constant::create(ov::element::i64, {1}, {0}); if (op_case == 1) { if (context.is_static()) { res = std::make_shared(context.get_input(0), ov::op::v0::Constant::create(ov::element::i64, {3}, {1, 0, 2})); } else { - res = std::make_shared(context.get_input(0), + auto src = context.get_input(0); + if (src.get_partial_shape().rank() == 3) { + src = std::make_shared(src, zero); + } + res = std::make_shared(src, ov::op::v0::Constant::create(ov::element::i64, {4}, {0, 2, 1, 3})); } } else { @@ -43,7 +49,6 @@ OutputVector translate_permute(const NodeContext& context) { attention_size = context.get_input("attention_size_swa"); } - auto zero = ov::op::v0::Constant::create(ov::element::i64, {1}, {0}); auto one = ov::op::v0::Constant::create(ov::element::i64, {1}, {1}); if (context.is_static()) { @@ -57,6 +62,9 @@ OutputVector translate_permute(const NodeContext& context) { res = std::make_shared(src_slice, ov::op::v0::Constant::create(ov::element::i64, {3}, {1, 0, 2})); } else { + if (src.get_partial_shape().rank() == 3) { + src = std::make_shared(src, zero); + } res = std::make_shared(src, ov::op::v0::Constant::create(ov::element::i64, {4}, {0, 2, 1, 3})); } diff --git a/ggml/src/ggml-openvino/openvino/op/set_rows.cpp b/ggml/src/ggml-openvino/openvino/op/set_rows.cpp index a3285d41ce..0b2f29441a 100644 --- a/ggml/src/ggml-openvino/openvino/op/set_rows.cpp +++ b/ggml/src/ggml-openvino/openvino/op/set_rows.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -8,7 +9,6 @@ #include #include #include -#include #include #include #include @@ -55,14 +55,12 @@ OutputVector translate_set_rows(const NodeContext& context) { auto updated = std::make_shared(dst_reshaped, indices_reshaped, data_reshaped, zero); res = std::make_shared(updated, std::make_shared(dst), false); } else { - // TODO: Better solution would be to reshape the data into 4D at first place (for stateful model) - if (data.get_partial_shape().rank() + 1 == dst.get_partial_shape().rank()) { - data = std::make_shared(data, zero); - } - int concat_axis = 1; - if (context.is_static()) - concat_axis = 0; - res = std::make_shared(OutputVector{dst, data}, concat_axis); + assert(dst.get_partial_shape().rank() == 4 && dst.get_partial_shape()[2].is_static() && dst.get_partial_shape()[3].is_static()); + int64_t dim2 = dst.get_partial_shape()[2].get_length(); + int64_t dim3 = dst.get_partial_shape()[3].get_length(); + data = std::make_shared( + data, ov::op::v0::Constant::create(ov::element::i64, {4}, {(int64_t) 1, (int64_t) -1, dim2, dim3}), false); + res = std::make_shared(OutputVector{dst, data}, 1); } return rename_outputs_with_suffix({res}, context.get_name()); } diff --git a/ggml/src/ggml-openvino/openvino/op/softmax.cpp b/ggml/src/ggml-openvino/openvino/op/softmax.cpp index 8f134626c8..12db9e82a0 100644 --- a/ggml/src/ggml-openvino/openvino/op/softmax.cpp +++ b/ggml/src/ggml-openvino/openvino/op/softmax.cpp @@ -64,10 +64,10 @@ OutputVector translate_soft_max(const NodeContext& context) { auto zero_1d = ov::op::v0::Constant::create(ov::element::i64, {1}, {0}); auto two_1d = ov::op::v0::Constant::create(ov::element::i64, {1}, {2}); auto axes = ov::op::v0::Constant::create(ov::element::i64, {2}, {1,2}); - auto leaf_8 = context.get_input("leaf_8"); - auto shape_of_leaf_8 = std::make_shared(leaf_8); - auto gather_leaf_8 = std::make_shared(shape_of_leaf_8, two_1d, zero_1d); - auto stop = std::make_shared(ov::OutputVector{token_len, gather_leaf_8}, 0); + auto inp_pos = context.get_input("inp_pos"); + auto shape_of_inp_pos = std::make_shared(inp_pos); + auto gather_inp_pos = std::make_shared(shape_of_inp_pos, two_1d, zero_1d); + auto stop = std::make_shared(ov::OutputVector{token_len, gather_inp_pos}, 0); mask_node_sliced = std::make_shared(mask_node, zero_2d, stop, one_2d, axes); if (!(context.is_static())) { diff --git a/ggml/src/ggml-openvino/openvino/translate_session.cpp b/ggml/src/ggml-openvino/openvino/translate_session.cpp index 58a94d6149..830344020c 100644 --- a/ggml/src/ggml-openvino/openvino/translate_session.cpp +++ b/ggml/src/ggml-openvino/openvino/translate_session.cpp @@ -93,10 +93,10 @@ void add_sliced_mask(TensorMap& tensor_map, GgmlDecoder& ggml_model_decoder) { auto zero_1d = ov::op::v0::Constant::create(ov::element::i64, {1}, {0}); auto two_1d = ov::op::v0::Constant::create(ov::element::i64, {1}, {2}); auto axes = ov::op::v0::Constant::create(ov::element::i64, {2}, {1,2}); - auto leaf_8 = tensor_map.at("leaf_8").get_node_shared_ptr(); - auto shape_of_leaf_8 = std::make_shared(leaf_8); - auto gather_leaf_8 = std::make_shared(shape_of_leaf_8, two_1d, zero_1d); - auto stop = std::make_shared(ov::OutputVector{token_len, gather_leaf_8}, 0); + auto inp_pos = tensor_map.at("inp_pos").get_node_shared_ptr(); + auto shape_of_inp_pos = std::make_shared(inp_pos); + auto gather_inp_pos = std::make_shared(shape_of_inp_pos, two_1d, zero_1d); + auto stop = std::make_shared(ov::OutputVector{token_len, gather_inp_pos}, 0); mask_sliced = std::make_shared(mask, zero_2d, stop, one_2d, axes); mask_sliced = std::make_shared(mask_sliced, zero_1d);