Fix for Phi3
This commit is contained in:
parent
e7252920e1
commit
05d7abae8c
|
|
@ -47,10 +47,10 @@ OutputVector translate_flash_attn_ext(const NodeContext& context) {
|
|||
auto zero_1d = ov::op::v0::Constant::create(ov::element::i64, {1}, {0});
|
||||
auto two_1d = ov::op::v0::Constant::create(ov::element::i64, {1}, {2});
|
||||
auto axes = ov::op::v0::Constant::create(ov::element::i64, {2}, {1,2});
|
||||
auto leaf_8 = context.get_input("leaf_8");
|
||||
auto shape_of_leaf_8 = std::make_shared<ov::op::v3::ShapeOf>(leaf_8);
|
||||
auto gather_leaf_8 = std::make_shared<ov::op::v8::Gather>(shape_of_leaf_8, two_1d, zero_1d);
|
||||
auto stop = std::make_shared<ov::op::v0::Concat>(ov::OutputVector{token_len, gather_leaf_8}, 0);
|
||||
auto inp_pos = context.get_input("inp_pos");
|
||||
auto shape_of_inp_pos = std::make_shared<ov::op::v3::ShapeOf>(inp_pos);
|
||||
auto gather_inp_pos = std::make_shared<ov::op::v8::Gather>(shape_of_inp_pos, two_1d, zero_1d);
|
||||
auto stop = std::make_shared<ov::op::v0::Concat>(ov::OutputVector{token_len, gather_inp_pos}, 0);
|
||||
mask_sliced =
|
||||
std::make_shared<ov::op::v8::Slice>(mask, zero_2d, stop, one_2d, axes);
|
||||
mask_sliced = std::make_shared<ov::op::v0::Unsqueeze>(mask_sliced, zero_1d);
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@
|
|||
#include <openvino/op/reshape.hpp>
|
||||
#include <openvino/op/slice.hpp>
|
||||
#include <openvino/op/transpose.hpp>
|
||||
#include <openvino/op/unsqueeze.hpp>
|
||||
|
||||
#include "../node_context.hpp"
|
||||
#include "../op_table.hpp"
|
||||
|
|
@ -23,13 +24,18 @@ OutputVector translate_permute(const NodeContext& context) {
|
|||
int op_case = context.get_op_case();
|
||||
FRONT_END_CHECK_IMPLEMENTED(op_case == 1 || op_case == 2 || op_case == 3, "Unsupported PERMUTE case");
|
||||
ov::Output<Node> res;
|
||||
auto zero = ov::op::v0::Constant::create(ov::element::i64, {1}, {0});
|
||||
|
||||
if (op_case == 1) {
|
||||
if (context.is_static()) {
|
||||
res = std::make_shared<ov::op::v1::Transpose>(context.get_input(0),
|
||||
ov::op::v0::Constant::create(ov::element::i64, {3}, {1, 0, 2}));
|
||||
} else {
|
||||
res = std::make_shared<ov::op::v1::Transpose>(context.get_input(0),
|
||||
auto src = context.get_input(0);
|
||||
if (src.get_partial_shape().rank() == 3) {
|
||||
src = std::make_shared<ov::op::v0::Unsqueeze>(src, zero);
|
||||
}
|
||||
res = std::make_shared<ov::op::v1::Transpose>(src,
|
||||
ov::op::v0::Constant::create(ov::element::i64, {4}, {0, 2, 1, 3}));
|
||||
}
|
||||
} else {
|
||||
|
|
@ -43,7 +49,6 @@ OutputVector translate_permute(const NodeContext& context) {
|
|||
attention_size = context.get_input("attention_size_swa");
|
||||
}
|
||||
|
||||
auto zero = ov::op::v0::Constant::create(ov::element::i64, {1}, {0});
|
||||
auto one = ov::op::v0::Constant::create(ov::element::i64, {1}, {1});
|
||||
|
||||
if (context.is_static()) {
|
||||
|
|
@ -57,6 +62,9 @@ OutputVector translate_permute(const NodeContext& context) {
|
|||
res = std::make_shared<ov::op::v1::Transpose>(src_slice,
|
||||
ov::op::v0::Constant::create(ov::element::i64, {3}, {1, 0, 2}));
|
||||
} else {
|
||||
if (src.get_partial_shape().rank() == 3) {
|
||||
src = std::make_shared<ov::op::v0::Unsqueeze>(src, zero);
|
||||
}
|
||||
res = std::make_shared<ov::op::v1::Transpose>(src,
|
||||
ov::op::v0::Constant::create(ov::element::i64, {4}, {0, 2, 1, 3}));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
#include <cassert>
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include <openvino/core/node.hpp>
|
||||
|
|
@ -8,7 +9,6 @@
|
|||
#include <openvino/op/convert.hpp>
|
||||
#include <openvino/op/gather.hpp>
|
||||
#include <openvino/op/reshape.hpp>
|
||||
#include <openvino/op/unsqueeze.hpp>
|
||||
#include <openvino/op/scatter_update.hpp>
|
||||
#include <openvino/op/shape_of.hpp>
|
||||
#include <openvino/op/slice.hpp>
|
||||
|
|
@ -55,14 +55,12 @@ OutputVector translate_set_rows(const NodeContext& context) {
|
|||
auto updated = std::make_shared<ov::op::v3::ScatterUpdate>(dst_reshaped, indices_reshaped, data_reshaped, zero);
|
||||
res = std::make_shared<ov::op::v1::Reshape>(updated, std::make_shared<ov::op::v0::ShapeOf>(dst), false);
|
||||
} else {
|
||||
// TODO: Better solution would be to reshape the data into 4D at first place (for stateful model)
|
||||
if (data.get_partial_shape().rank() + 1 == dst.get_partial_shape().rank()) {
|
||||
data = std::make_shared<ov::op::v0::Unsqueeze>(data, zero);
|
||||
}
|
||||
int concat_axis = 1;
|
||||
if (context.is_static())
|
||||
concat_axis = 0;
|
||||
res = std::make_shared<ov::op::v0::Concat>(OutputVector{dst, data}, concat_axis);
|
||||
assert(dst.get_partial_shape().rank() == 4 && dst.get_partial_shape()[2].is_static() && dst.get_partial_shape()[3].is_static());
|
||||
int64_t dim2 = dst.get_partial_shape()[2].get_length();
|
||||
int64_t dim3 = dst.get_partial_shape()[3].get_length();
|
||||
data = std::make_shared<ov::op::v1::Reshape>(
|
||||
data, ov::op::v0::Constant::create(ov::element::i64, {4}, {(int64_t) 1, (int64_t) -1, dim2, dim3}), false);
|
||||
res = std::make_shared<ov::op::v0::Concat>(OutputVector{dst, data}, 1);
|
||||
}
|
||||
return rename_outputs_with_suffix({res}, context.get_name());
|
||||
}
|
||||
|
|
|
|||
|
|
@ -64,10 +64,10 @@ OutputVector translate_soft_max(const NodeContext& context) {
|
|||
auto zero_1d = ov::op::v0::Constant::create(ov::element::i64, {1}, {0});
|
||||
auto two_1d = ov::op::v0::Constant::create(ov::element::i64, {1}, {2});
|
||||
auto axes = ov::op::v0::Constant::create(ov::element::i64, {2}, {1,2});
|
||||
auto leaf_8 = context.get_input("leaf_8");
|
||||
auto shape_of_leaf_8 = std::make_shared<ov::op::v3::ShapeOf>(leaf_8);
|
||||
auto gather_leaf_8 = std::make_shared<ov::op::v8::Gather>(shape_of_leaf_8, two_1d, zero_1d);
|
||||
auto stop = std::make_shared<ov::op::v0::Concat>(ov::OutputVector{token_len, gather_leaf_8}, 0);
|
||||
auto inp_pos = context.get_input("inp_pos");
|
||||
auto shape_of_inp_pos = std::make_shared<ov::op::v3::ShapeOf>(inp_pos);
|
||||
auto gather_inp_pos = std::make_shared<ov::op::v8::Gather>(shape_of_inp_pos, two_1d, zero_1d);
|
||||
auto stop = std::make_shared<ov::op::v0::Concat>(ov::OutputVector{token_len, gather_inp_pos}, 0);
|
||||
mask_node_sliced =
|
||||
std::make_shared<ov::op::v8::Slice>(mask_node, zero_2d, stop, one_2d, axes);
|
||||
if (!(context.is_static())) {
|
||||
|
|
|
|||
|
|
@ -93,10 +93,10 @@ void add_sliced_mask(TensorMap& tensor_map, GgmlDecoder& ggml_model_decoder) {
|
|||
auto zero_1d = ov::op::v0::Constant::create(ov::element::i64, {1}, {0});
|
||||
auto two_1d = ov::op::v0::Constant::create(ov::element::i64, {1}, {2});
|
||||
auto axes = ov::op::v0::Constant::create(ov::element::i64, {2}, {1,2});
|
||||
auto leaf_8 = tensor_map.at("leaf_8").get_node_shared_ptr();
|
||||
auto shape_of_leaf_8 = std::make_shared<ov::op::v3::ShapeOf>(leaf_8);
|
||||
auto gather_leaf_8 = std::make_shared<ov::op::v8::Gather>(shape_of_leaf_8, two_1d, zero_1d);
|
||||
auto stop = std::make_shared<ov::op::v0::Concat>(ov::OutputVector{token_len, gather_leaf_8}, 0);
|
||||
auto inp_pos = tensor_map.at("inp_pos").get_node_shared_ptr();
|
||||
auto shape_of_inp_pos = std::make_shared<ov::op::v3::ShapeOf>(inp_pos);
|
||||
auto gather_inp_pos = std::make_shared<ov::op::v8::Gather>(shape_of_inp_pos, two_1d, zero_1d);
|
||||
auto stop = std::make_shared<ov::op::v0::Concat>(ov::OutputVector{token_len, gather_inp_pos}, 0);
|
||||
mask_sliced =
|
||||
std::make_shared<ov::op::v8::Slice>(mask, zero_2d, stop, one_2d, axes);
|
||||
mask_sliced = std::make_shared<ov::op::v0::Unsqueeze>(mask_sliced, zero_1d);
|
||||
|
|
|
|||
Loading…
Reference in New Issue