Fix for Phi3

This commit is contained in:
cavusmustafa 2025-10-02 11:24:40 -07:00 committed by Mustafa Cavus
parent e7252920e1
commit 05d7abae8c
5 changed files with 29 additions and 23 deletions

View File

@ -47,10 +47,10 @@ OutputVector translate_flash_attn_ext(const NodeContext& context) {
auto zero_1d = ov::op::v0::Constant::create(ov::element::i64, {1}, {0});
auto two_1d = ov::op::v0::Constant::create(ov::element::i64, {1}, {2});
auto axes = ov::op::v0::Constant::create(ov::element::i64, {2}, {1,2});
auto leaf_8 = context.get_input("leaf_8");
auto shape_of_leaf_8 = std::make_shared<ov::op::v3::ShapeOf>(leaf_8);
auto gather_leaf_8 = std::make_shared<ov::op::v8::Gather>(shape_of_leaf_8, two_1d, zero_1d);
auto stop = std::make_shared<ov::op::v0::Concat>(ov::OutputVector{token_len, gather_leaf_8}, 0);
auto inp_pos = context.get_input("inp_pos");
auto shape_of_inp_pos = std::make_shared<ov::op::v3::ShapeOf>(inp_pos);
auto gather_inp_pos = std::make_shared<ov::op::v8::Gather>(shape_of_inp_pos, two_1d, zero_1d);
auto stop = std::make_shared<ov::op::v0::Concat>(ov::OutputVector{token_len, gather_inp_pos}, 0);
mask_sliced =
std::make_shared<ov::op::v8::Slice>(mask, zero_2d, stop, one_2d, axes);
mask_sliced = std::make_shared<ov::op::v0::Unsqueeze>(mask_sliced, zero_1d);

View File

@ -7,6 +7,7 @@
#include <openvino/op/reshape.hpp>
#include <openvino/op/slice.hpp>
#include <openvino/op/transpose.hpp>
#include <openvino/op/unsqueeze.hpp>
#include "../node_context.hpp"
#include "../op_table.hpp"
@ -23,13 +24,18 @@ OutputVector translate_permute(const NodeContext& context) {
int op_case = context.get_op_case();
FRONT_END_CHECK_IMPLEMENTED(op_case == 1 || op_case == 2 || op_case == 3, "Unsupported PERMUTE case");
ov::Output<Node> res;
auto zero = ov::op::v0::Constant::create(ov::element::i64, {1}, {0});
if (op_case == 1) {
if (context.is_static()) {
res = std::make_shared<ov::op::v1::Transpose>(context.get_input(0),
ov::op::v0::Constant::create(ov::element::i64, {3}, {1, 0, 2}));
} else {
res = std::make_shared<ov::op::v1::Transpose>(context.get_input(0),
auto src = context.get_input(0);
if (src.get_partial_shape().rank() == 3) {
src = std::make_shared<ov::op::v0::Unsqueeze>(src, zero);
}
res = std::make_shared<ov::op::v1::Transpose>(src,
ov::op::v0::Constant::create(ov::element::i64, {4}, {0, 2, 1, 3}));
}
} else {
@ -43,7 +49,6 @@ OutputVector translate_permute(const NodeContext& context) {
attention_size = context.get_input("attention_size_swa");
}
auto zero = ov::op::v0::Constant::create(ov::element::i64, {1}, {0});
auto one = ov::op::v0::Constant::create(ov::element::i64, {1}, {1});
if (context.is_static()) {
@ -57,6 +62,9 @@ OutputVector translate_permute(const NodeContext& context) {
res = std::make_shared<ov::op::v1::Transpose>(src_slice,
ov::op::v0::Constant::create(ov::element::i64, {3}, {1, 0, 2}));
} else {
if (src.get_partial_shape().rank() == 3) {
src = std::make_shared<ov::op::v0::Unsqueeze>(src, zero);
}
res = std::make_shared<ov::op::v1::Transpose>(src,
ov::op::v0::Constant::create(ov::element::i64, {4}, {0, 2, 1, 3}));
}

View File

@ -1,3 +1,4 @@
#include <cassert>
#include <cstdint>
#include <memory>
#include <openvino/core/node.hpp>
@ -8,7 +9,6 @@
#include <openvino/op/convert.hpp>
#include <openvino/op/gather.hpp>
#include <openvino/op/reshape.hpp>
#include <openvino/op/unsqueeze.hpp>
#include <openvino/op/scatter_update.hpp>
#include <openvino/op/shape_of.hpp>
#include <openvino/op/slice.hpp>
@ -55,14 +55,12 @@ OutputVector translate_set_rows(const NodeContext& context) {
auto updated = std::make_shared<ov::op::v3::ScatterUpdate>(dst_reshaped, indices_reshaped, data_reshaped, zero);
res = std::make_shared<ov::op::v1::Reshape>(updated, std::make_shared<ov::op::v0::ShapeOf>(dst), false);
} else {
// TODO: Better solution would be to reshape the data into 4D at first place (for stateful model)
if (data.get_partial_shape().rank() + 1 == dst.get_partial_shape().rank()) {
data = std::make_shared<ov::op::v0::Unsqueeze>(data, zero);
}
int concat_axis = 1;
if (context.is_static())
concat_axis = 0;
res = std::make_shared<ov::op::v0::Concat>(OutputVector{dst, data}, concat_axis);
assert(dst.get_partial_shape().rank() == 4 && dst.get_partial_shape()[2].is_static() && dst.get_partial_shape()[3].is_static());
int64_t dim2 = dst.get_partial_shape()[2].get_length();
int64_t dim3 = dst.get_partial_shape()[3].get_length();
data = std::make_shared<ov::op::v1::Reshape>(
data, ov::op::v0::Constant::create(ov::element::i64, {4}, {(int64_t) 1, (int64_t) -1, dim2, dim3}), false);
res = std::make_shared<ov::op::v0::Concat>(OutputVector{dst, data}, 1);
}
return rename_outputs_with_suffix({res}, context.get_name());
}

View File

@ -64,10 +64,10 @@ OutputVector translate_soft_max(const NodeContext& context) {
auto zero_1d = ov::op::v0::Constant::create(ov::element::i64, {1}, {0});
auto two_1d = ov::op::v0::Constant::create(ov::element::i64, {1}, {2});
auto axes = ov::op::v0::Constant::create(ov::element::i64, {2}, {1,2});
auto leaf_8 = context.get_input("leaf_8");
auto shape_of_leaf_8 = std::make_shared<ov::op::v3::ShapeOf>(leaf_8);
auto gather_leaf_8 = std::make_shared<ov::op::v8::Gather>(shape_of_leaf_8, two_1d, zero_1d);
auto stop = std::make_shared<ov::op::v0::Concat>(ov::OutputVector{token_len, gather_leaf_8}, 0);
auto inp_pos = context.get_input("inp_pos");
auto shape_of_inp_pos = std::make_shared<ov::op::v3::ShapeOf>(inp_pos);
auto gather_inp_pos = std::make_shared<ov::op::v8::Gather>(shape_of_inp_pos, two_1d, zero_1d);
auto stop = std::make_shared<ov::op::v0::Concat>(ov::OutputVector{token_len, gather_inp_pos}, 0);
mask_node_sliced =
std::make_shared<ov::op::v8::Slice>(mask_node, zero_2d, stop, one_2d, axes);
if (!(context.is_static())) {

View File

@ -93,10 +93,10 @@ void add_sliced_mask(TensorMap& tensor_map, GgmlDecoder& ggml_model_decoder) {
auto zero_1d = ov::op::v0::Constant::create(ov::element::i64, {1}, {0});
auto two_1d = ov::op::v0::Constant::create(ov::element::i64, {1}, {2});
auto axes = ov::op::v0::Constant::create(ov::element::i64, {2}, {1,2});
auto leaf_8 = tensor_map.at("leaf_8").get_node_shared_ptr();
auto shape_of_leaf_8 = std::make_shared<ov::op::v3::ShapeOf>(leaf_8);
auto gather_leaf_8 = std::make_shared<ov::op::v8::Gather>(shape_of_leaf_8, two_1d, zero_1d);
auto stop = std::make_shared<ov::op::v0::Concat>(ov::OutputVector{token_len, gather_leaf_8}, 0);
auto inp_pos = tensor_map.at("inp_pos").get_node_shared_ptr();
auto shape_of_inp_pos = std::make_shared<ov::op::v3::ShapeOf>(inp_pos);
auto gather_inp_pos = std::make_shared<ov::op::v8::Gather>(shape_of_inp_pos, two_1d, zero_1d);
auto stop = std::make_shared<ov::op::v0::Concat>(ov::OutputVector{token_len, gather_inp_pos}, 0);
mask_sliced =
std::make_shared<ov::op::v8::Slice>(mask, zero_2d, stop, one_2d, axes);
mask_sliced = std::make_shared<ov::op::v0::Unsqueeze>(mask_sliced, zero_1d);