Add cgraph tensor output name to OV op name

This commit is contained in:
Yu, Zijun 2025-05-22 10:32:18 +08:00 committed by Mustafa Cavus
parent fd324366d0
commit 8ce5cc597a
17 changed files with 71 additions and 47 deletions

View File

@ -11,10 +11,9 @@ namespace op {
OutputVector translate_add(const NodeContext& context) {
num_inputs_check(context, 2, 2);
auto lhs = context.get_input(0);
auto rhs = context.get_input(1);
auto add = std::make_shared<ov::op::v1::Add>(lhs, rhs);
return {add};
auto res = std::make_shared<ov::op::v1::Add>(context.get_input(0), context.get_input(1));
return rename_outputs_with_suffix({res}, context.get_name());
}
} // namespace op

View File

@ -22,16 +22,15 @@ OutputVector translate_cont(const NodeContext& context) {
auto src_shape = context.get_input_shape(0).to_shape();
auto dst_shape = context.get_output_shape(0).to_shape();
ov::Output<Node> res;
if (op_case == 1) {
// The input comes from a PERMUTE
dst_shape[1] = -1;
auto result = std::make_shared<ov::op::v1::Reshape>(
res = std::make_shared<ov::op::v1::Reshape>(
context.get_input(0),
ov::op::v0::Constant::create(ov::element::i64, {dst_shape.size()}, dst_shape),
false);
return {result};
} else {
// The input comes from a VIEW
// Currently all cases are slicing at lowest dim
@ -43,13 +42,13 @@ OutputVector translate_cont(const NodeContext& context) {
std::vector<int64_t> end = {(int64_t)src_shape[0], INT_MAX, split_addr + (int64_t)src_shape[2]};
std::vector<int64_t> strides = {1, 1, 1};
auto begin_const = ov::op::v0::Constant::create(ov::element::i64, {begin.size()}, begin);
auto begin_const = ov::op::v0::Constant::create(element::i64, {begin.size()}, begin);
auto end_const = ov::op::v0::Constant::create(ov::element::i64, {end.size()}, end);
auto strides_const = ov::op::v0::Constant::create(ov::element::i64, {strides.size()}, strides);
auto slice = std::make_shared<ov::op::v8::Slice>(context.get_input(0), begin_const, end_const, strides_const);
return {slice};
res = std::make_shared<ov::op::v8::Slice>(context.get_input(0), begin_const, end_const, strides_const);
}
return rename_outputs_with_suffix({res}, context.get_name());
}
} // namespace op

View File

@ -33,6 +33,7 @@ OutputVector translate_cpy(const NodeContext& context) {
auto src0 = context.get_input(0);
auto src1 = context.get_input(1);
auto past_token_len = context.get_input("past_token_len");
ov::Output<Node> res;
auto src0_shape = context.get_input_shape(0).to_shape();
auto output_shape = context.get_output_shape(0).to_shape();
@ -63,8 +64,7 @@ OutputVector translate_cpy(const NodeContext& context) {
indices,
ov::op::v0::Constant::create(ov::element::i64, {1}, std::vector<int64_t>{1}));
auto res = std::make_shared<ov::op::v3::ScatterNDUpdate>(reshaped_src1, indices, src0);
return {res};
res = std::make_shared<ov::op::v3::ScatterNDUpdate>(reshaped_src1, indices, src0);
} else {
// Write V to cache_v
int64_t total_head_size = src0_shape[1];
@ -99,10 +99,10 @@ OutputVector translate_cpy(const NodeContext& context) {
ov::op::v0::Constant::create(ov::element::i64, {3}, std::vector<int64_t>{1, total_head_size, -1}),
false);
auto res = std::make_shared<ov::op::v0::Concat>(ov::OutputVector{src1_left, reshaped_src0, src1_right}, 2);
return {res};
res = std::make_shared<ov::op::v0::Concat>(ov::OutputVector{src1_left, reshaped_src0, src1_right}, 2);
}
return rename_outputs_with_suffix({res}, context.get_name());
}
} // namespace op

View File

@ -30,7 +30,7 @@ OutputVector translate_get_rows(const NodeContext& context) {
res = std::make_shared<ov::op::v0::Convert>(res, context.get_output_type(0));
}
return {res};
return rename_outputs_with_suffix({res}, context.get_name());
}
} // namespace op

View File

@ -12,7 +12,7 @@ OutputVector translate_mul(const NodeContext& context) {
num_inputs_check(context, 2, 2);
auto res = std::make_shared<ov::op::v1::Multiply>(context.get_input(0), context.get_input(1));
return {res};
return rename_outputs_with_suffix({res}, context.get_name());
}
} // namespace op

View File

@ -25,12 +25,13 @@ OutputVector translate_mulmat(const NodeContext& context) {
int op_case = context.get_op_case();
FRONT_END_CHECK_IMPLEMENTED(op_case == 1 || op_case == 2, "Unsupported MULMAT case");
ov::Output<Node> res;
if (op_case == 1) {
auto src0 = context.get_input(0);
auto src1 = std::make_shared<ov::op::v0::Convert>(context.get_input(1), context.get_input_type(0));
auto result_lp = std::make_shared<ov::op::v0::MatMul>(src1, src0, false, true);
auto result = std::make_shared<ov::op::v0::Convert>(result_lp, context.get_output_type(0));
return {result};
res = std::make_shared<ov::op::v0::Convert>(result_lp, context.get_output_type(0));
} else {
/*
Two cases here:
@ -118,10 +119,10 @@ OutputVector translate_mulmat(const NodeContext& context) {
}
auto result_lp = std::make_shared<ov::op::v0::MatMul>(A, B, false, true);
auto result = std::make_shared<ov::op::v0::Convert>(result_lp, context.get_output_type(0));
return {result};
res = std::make_shared<ov::op::v0::Convert>(result_lp, context.get_output_type(0));
}
return rename_outputs_with_suffix({res}, context.get_name());
}
} // namespace op

View File

@ -15,7 +15,7 @@ OutputVector translate_permute(const NodeContext& context) {
auto perm = argsort_descend(context.get_output_stride(0));
auto res = std::make_shared<ov::op::v1::Transpose>(context.get_input(0),
ov::op::v0::Constant::create(ov::element::i64, {3}, perm));
return {res};
return rename_outputs_with_suffix({res}, context.get_name());
}
} // namespace op

View File

@ -37,8 +37,8 @@ OutputVector translate_reshape(const NodeContext& context) {
{3},
std::vector<int64_t>{(int64_t)output_shape[0], -1, (int64_t)output_shape[2]});
}
Output<Node> res = std::make_shared<ov::op::v1::Reshape>(context.get_input(0), new_shape_node, false);
return {res};
auto res = std::make_shared<ov::op::v1::Reshape>(context.get_input(0), new_shape_node, false);
return rename_outputs_with_suffix({res}, context.get_name());
}
} // namespace op

View File

@ -39,7 +39,7 @@ OutputVector translate_rms_norm(const NodeContext& context) {
auto res = std::make_shared<ov::op::v1::Multiply>(input_node, scale);
return {res};
return rename_outputs_with_suffix({res}, context.get_name());
}
} // namespace op

View File

@ -52,6 +52,8 @@ void ggml_rope_yarn_corr_dims(int n_dims,
OutputVector translate_rope(const NodeContext& context) {
num_inputs_check(context, 2, 3);
ov::Output<Node> res;
auto data_node = context.get_input(0);
auto pos_node = context.get_input(1);
pos_node = std::make_shared<ov::op::v0::Convert>(pos_node, ov::element::f32);
@ -141,9 +143,7 @@ OutputVector translate_rope(const NodeContext& context) {
ov::element::i64,
Shape{3},
std::vector<int64_t>{-1, input_shape[1].get_length(), input_shape[2].get_length()});
auto reshaped = std::make_shared<ov::op::v1::Reshape>(stack, shape_const, false);
return {reshaped};
res = std::make_shared<ov::op::v1::Reshape>(stack, shape_const, false);
} else {
auto slice_node =
std::make_shared<ov::op::v1::Split>(data_node,
@ -160,9 +160,10 @@ OutputVector translate_rope(const NodeContext& context) {
std::make_shared<ov::op::v1::Multiply>(slice_data_node_0, sin_theta_node),
std::make_shared<ov::op::v1::Multiply>(slice_data_node_1, cos_theta_node));
auto res_node = std::make_shared<ov::op::v0::Concat>(ov::OutputVector{first_half_node, second_half_node}, 2);
return {res_node};
res = std::make_shared<ov::op::v0::Concat>(ov::OutputVector{first_half_node, second_half_node}, 2);
}
return rename_outputs_with_suffix({res}, context.get_name());
}
} // namespace op

View File

@ -19,7 +19,7 @@ OutputVector translate_scale(const NodeContext& context) {
auto res = std::make_shared<ov::op::v1::Multiply>(context.get_input(0), scale_node);
return {res};
return rename_outputs_with_suffix({res}, context.get_name());
}
} // namespace op

View File

@ -24,6 +24,7 @@ OutputVector translate_soft_max(const NodeContext& context) {
num_inputs_check(context, 1, 2);
auto input_node = context.get_input(0);
ov::Output<Node> res;
float scale = 1.0f;
float max_bias = 0.0f;
@ -56,13 +57,13 @@ OutputVector translate_soft_max(const NodeContext& context) {
}
// Stride slice mask node
Output<Node> mask_begin_node = ov::op::v0::Constant::create(ov::element::i64, Shape{3}, {0, 0, 0});
Output<Node> slice_start = ov::op::v0::Constant::create(ov::element::i64, Shape{3}, {0, 0, 0});
auto one = ov::op::v0::Constant::create(ov::element::i64, ov::Shape{1}, {1});
auto input_last_two_dim = get_dimensions(input_node.get_node_shared_ptr(), {1, 2});
auto mask_slice_shape = std::make_shared<ov::op::v0::Concat>(ov::NodeVector{one, input_last_two_dim}, 0);
Output<Node> mask_stride_node = ov::op::v0::Constant::create(ov::element::i64, Shape{3}, {1, 1, 1});
auto mask_node_sliced =
std::make_shared<ov::op::v8::Slice>(mask_node, mask_begin_node, mask_slice_shape, mask_stride_node);
auto token_len = get_dimensions(input_node.get_node_shared_ptr(), {1});
auto total_token_len = get_dimensions(mask_node.get_node_shared_ptr(), {2});
auto slice_end = std::make_shared<ov::op::v0::Concat>(ov::NodeVector{one, token_len, total_token_len}, 0);
Output<Node> slice_stride = ov::op::v0::Constant::create(ov::element::i64, Shape{3}, {1, 1, 1});
auto mask_node_sliced = std::make_shared<ov::op::v8::Slice>(mask_node, slice_start, slice_end, slice_stride);
// slope * mask
auto slope_node =
@ -73,13 +74,13 @@ OutputVector translate_soft_max(const NodeContext& context) {
auto input_slope_mask_node = std::make_shared<ov::op::v1::Add>(input_node, slope_mask_node);
// Calculate softmax
auto res = std::make_shared<ov::op::v8::Softmax>(input_slope_mask_node, 2);
return {res};
res = std::make_shared<ov::op::v8::Softmax>(input_slope_mask_node, 2);
} else {
// Directly softmax
auto res = std::make_shared<ov::op::v8::Softmax>(input_node, 0);
return {res};
res = std::make_shared<ov::op::v8::Softmax>(input_node, 0);
}
return rename_outputs_with_suffix({res}, context.get_name());
}
} // namespace op

View File

@ -14,7 +14,7 @@ OutputVector translate_transpose(const NodeContext& context) {
auto perm = argsort_descend(context.get_output_stride(0));
auto res = std::make_shared<ov::op::v1::Transpose>(context.get_input(0),
ov::op::v0::Constant::create(ov::element::i64, {3}, perm));
return {res};
return rename_outputs_with_suffix({res}, context.get_name());
}
} // namespace op

View File

@ -17,7 +17,7 @@ OutputVector translate_unary_silu(const NodeContext& context) {
auto sigmoid = std::make_shared<ov::op::v0::Sigmoid>(input);
auto res = std::make_shared<ov::op::v1::Multiply>(input, sigmoid);
return {res};
return rename_outputs_with_suffix({res}, context.get_name());
}
} // namespace op

View File

@ -1,5 +1,8 @@
#include "translate_session.hpp"
#include <cstdlib>
#include <openvino/pass/constant_folding.hpp>
#include "input_model.hpp"
namespace ov {
@ -91,11 +94,18 @@ std::shared_ptr<Model> TranslateSession::translate_graph(const frontend::InputMo
used_params.push_back(param);
}
}
if (auto diff = params.size() - used_params.size()) {
std::cout << diff << " parameters are not used in the model." << std::endl;
if (getenv("GGML_OPENVINO_PROFILING")) {
if (auto diff = params.size() - used_params.size()) {
std::cout << diff << " parameters are not used in the model." << std::endl;
}
}
resulting_model = std::make_shared<Model>(results, used_params);
ov::pass::Manager manager;
manager.set_per_pass_validation(true);
manager.register_pass<ov::pass::ConstantFolding>();
manager.run_passes(resulting_model);
return resulting_model;
}

View File

@ -47,6 +47,17 @@ std::shared_ptr<ov::Node> get_dimensions(const std::shared_ptr<ov::Node>& node,
return get_dimensions(std::make_shared<ov::op::v3::ShapeOf>(node), dims);
}
OutputVector rename_outputs_with_suffix(const OutputVector& outputs, const std::string& suffix) {
for (const auto& output : outputs) {
auto node = output.get_node_shared_ptr();
std::string name = node->get_friendly_name();
name += "_";
name += suffix;
node->set_friendly_name(name);
}
return outputs;
}
} // namespace ggml
} // namespace frontend
} // namespace ov

View File

@ -55,6 +55,8 @@ std::vector<T> permute(const std::vector<T>& x, const std::vector<int>& perm) {
std::shared_ptr<ov::Node> get_dimensions(const std::shared_ptr<op::v3::ShapeOf>& shape, const std::vector<int>& dims);
std::shared_ptr<ov::Node> get_dimensions(const std::shared_ptr<ov::Node>& node, const std::vector<int>& dims);
OutputVector rename_outputs_with_suffix(const OutputVector& outputs, const std::string& suffix);
namespace op {
template <typename T>
OutputVector translate_1to1_match_2_inputs(const NodeContext& context) {