Add cgraph tensor output name to OV op name

This commit is contained in:
Yu, Zijun 2025-05-22 10:32:18 +08:00 committed by Mustafa Cavus
parent fd324366d0
commit 8ce5cc597a
17 changed files with 71 additions and 47 deletions

View File

@ -11,10 +11,9 @@ namespace op {
OutputVector translate_add(const NodeContext& context) { OutputVector translate_add(const NodeContext& context) {
num_inputs_check(context, 2, 2); num_inputs_check(context, 2, 2);
auto lhs = context.get_input(0); auto res = std::make_shared<ov::op::v1::Add>(context.get_input(0), context.get_input(1));
auto rhs = context.get_input(1);
auto add = std::make_shared<ov::op::v1::Add>(lhs, rhs); return rename_outputs_with_suffix({res}, context.get_name());
return {add};
} }
} // namespace op } // namespace op

View File

@ -22,16 +22,15 @@ OutputVector translate_cont(const NodeContext& context) {
auto src_shape = context.get_input_shape(0).to_shape(); auto src_shape = context.get_input_shape(0).to_shape();
auto dst_shape = context.get_output_shape(0).to_shape(); auto dst_shape = context.get_output_shape(0).to_shape();
ov::Output<Node> res;
if (op_case == 1) { if (op_case == 1) {
// The input comes from a PERMUTE // The input comes from a PERMUTE
dst_shape[1] = -1; dst_shape[1] = -1;
auto result = std::make_shared<ov::op::v1::Reshape>( res = std::make_shared<ov::op::v1::Reshape>(
context.get_input(0), context.get_input(0),
ov::op::v0::Constant::create(ov::element::i64, {dst_shape.size()}, dst_shape), ov::op::v0::Constant::create(ov::element::i64, {dst_shape.size()}, dst_shape),
false); false);
return {result};
} else { } else {
// The input comes from a VIEW // The input comes from a VIEW
// Currently all cases are slicing at lowest dim // Currently all cases are slicing at lowest dim
@ -43,13 +42,13 @@ OutputVector translate_cont(const NodeContext& context) {
std::vector<int64_t> end = {(int64_t)src_shape[0], INT_MAX, split_addr + (int64_t)src_shape[2]}; std::vector<int64_t> end = {(int64_t)src_shape[0], INT_MAX, split_addr + (int64_t)src_shape[2]};
std::vector<int64_t> strides = {1, 1, 1}; std::vector<int64_t> strides = {1, 1, 1};
auto begin_const = ov::op::v0::Constant::create(ov::element::i64, {begin.size()}, begin); auto begin_const = ov::op::v0::Constant::create(element::i64, {begin.size()}, begin);
auto end_const = ov::op::v0::Constant::create(ov::element::i64, {end.size()}, end); auto end_const = ov::op::v0::Constant::create(ov::element::i64, {end.size()}, end);
auto strides_const = ov::op::v0::Constant::create(ov::element::i64, {strides.size()}, strides); auto strides_const = ov::op::v0::Constant::create(ov::element::i64, {strides.size()}, strides);
auto slice = std::make_shared<ov::op::v8::Slice>(context.get_input(0), begin_const, end_const, strides_const); res = std::make_shared<ov::op::v8::Slice>(context.get_input(0), begin_const, end_const, strides_const);
return {slice};
} }
return rename_outputs_with_suffix({res}, context.get_name());
} }
} // namespace op } // namespace op

View File

@ -33,6 +33,7 @@ OutputVector translate_cpy(const NodeContext& context) {
auto src0 = context.get_input(0); auto src0 = context.get_input(0);
auto src1 = context.get_input(1); auto src1 = context.get_input(1);
auto past_token_len = context.get_input("past_token_len"); auto past_token_len = context.get_input("past_token_len");
ov::Output<Node> res;
auto src0_shape = context.get_input_shape(0).to_shape(); auto src0_shape = context.get_input_shape(0).to_shape();
auto output_shape = context.get_output_shape(0).to_shape(); auto output_shape = context.get_output_shape(0).to_shape();
@ -63,8 +64,7 @@ OutputVector translate_cpy(const NodeContext& context) {
indices, indices,
ov::op::v0::Constant::create(ov::element::i64, {1}, std::vector<int64_t>{1})); ov::op::v0::Constant::create(ov::element::i64, {1}, std::vector<int64_t>{1}));
auto res = std::make_shared<ov::op::v3::ScatterNDUpdate>(reshaped_src1, indices, src0); res = std::make_shared<ov::op::v3::ScatterNDUpdate>(reshaped_src1, indices, src0);
return {res};
} else { } else {
// Write V to cache_v // Write V to cache_v
int64_t total_head_size = src0_shape[1]; int64_t total_head_size = src0_shape[1];
@ -99,10 +99,10 @@ OutputVector translate_cpy(const NodeContext& context) {
ov::op::v0::Constant::create(ov::element::i64, {3}, std::vector<int64_t>{1, total_head_size, -1}), ov::op::v0::Constant::create(ov::element::i64, {3}, std::vector<int64_t>{1, total_head_size, -1}),
false); false);
auto res = std::make_shared<ov::op::v0::Concat>(ov::OutputVector{src1_left, reshaped_src0, src1_right}, 2); res = std::make_shared<ov::op::v0::Concat>(ov::OutputVector{src1_left, reshaped_src0, src1_right}, 2);
return {res};
} }
return rename_outputs_with_suffix({res}, context.get_name());
} }
} // namespace op } // namespace op

View File

@ -30,7 +30,7 @@ OutputVector translate_get_rows(const NodeContext& context) {
res = std::make_shared<ov::op::v0::Convert>(res, context.get_output_type(0)); res = std::make_shared<ov::op::v0::Convert>(res, context.get_output_type(0));
} }
return {res}; return rename_outputs_with_suffix({res}, context.get_name());
} }
} // namespace op } // namespace op

View File

@ -12,7 +12,7 @@ OutputVector translate_mul(const NodeContext& context) {
num_inputs_check(context, 2, 2); num_inputs_check(context, 2, 2);
auto res = std::make_shared<ov::op::v1::Multiply>(context.get_input(0), context.get_input(1)); auto res = std::make_shared<ov::op::v1::Multiply>(context.get_input(0), context.get_input(1));
return {res}; return rename_outputs_with_suffix({res}, context.get_name());
} }
} // namespace op } // namespace op

View File

@ -25,12 +25,13 @@ OutputVector translate_mulmat(const NodeContext& context) {
int op_case = context.get_op_case(); int op_case = context.get_op_case();
FRONT_END_CHECK_IMPLEMENTED(op_case == 1 || op_case == 2, "Unsupported MULMAT case"); FRONT_END_CHECK_IMPLEMENTED(op_case == 1 || op_case == 2, "Unsupported MULMAT case");
ov::Output<Node> res;
if (op_case == 1) { if (op_case == 1) {
auto src0 = context.get_input(0); auto src0 = context.get_input(0);
auto src1 = std::make_shared<ov::op::v0::Convert>(context.get_input(1), context.get_input_type(0)); auto src1 = std::make_shared<ov::op::v0::Convert>(context.get_input(1), context.get_input_type(0));
auto result_lp = std::make_shared<ov::op::v0::MatMul>(src1, src0, false, true); auto result_lp = std::make_shared<ov::op::v0::MatMul>(src1, src0, false, true);
auto result = std::make_shared<ov::op::v0::Convert>(result_lp, context.get_output_type(0)); res = std::make_shared<ov::op::v0::Convert>(result_lp, context.get_output_type(0));
return {result};
} else { } else {
/* /*
Two cases here: Two cases here:
@ -118,10 +119,10 @@ OutputVector translate_mulmat(const NodeContext& context) {
} }
auto result_lp = std::make_shared<ov::op::v0::MatMul>(A, B, false, true); auto result_lp = std::make_shared<ov::op::v0::MatMul>(A, B, false, true);
auto result = std::make_shared<ov::op::v0::Convert>(result_lp, context.get_output_type(0)); res = std::make_shared<ov::op::v0::Convert>(result_lp, context.get_output_type(0));
return {result};
} }
return rename_outputs_with_suffix({res}, context.get_name());
} }
} // namespace op } // namespace op

View File

@ -15,7 +15,7 @@ OutputVector translate_permute(const NodeContext& context) {
auto perm = argsort_descend(context.get_output_stride(0)); auto perm = argsort_descend(context.get_output_stride(0));
auto res = std::make_shared<ov::op::v1::Transpose>(context.get_input(0), auto res = std::make_shared<ov::op::v1::Transpose>(context.get_input(0),
ov::op::v0::Constant::create(ov::element::i64, {3}, perm)); ov::op::v0::Constant::create(ov::element::i64, {3}, perm));
return {res}; return rename_outputs_with_suffix({res}, context.get_name());
} }
} // namespace op } // namespace op

View File

@ -37,8 +37,8 @@ OutputVector translate_reshape(const NodeContext& context) {
{3}, {3},
std::vector<int64_t>{(int64_t)output_shape[0], -1, (int64_t)output_shape[2]}); std::vector<int64_t>{(int64_t)output_shape[0], -1, (int64_t)output_shape[2]});
} }
Output<Node> res = std::make_shared<ov::op::v1::Reshape>(context.get_input(0), new_shape_node, false); auto res = std::make_shared<ov::op::v1::Reshape>(context.get_input(0), new_shape_node, false);
return {res}; return rename_outputs_with_suffix({res}, context.get_name());
} }
} // namespace op } // namespace op

View File

@ -39,7 +39,7 @@ OutputVector translate_rms_norm(const NodeContext& context) {
auto res = std::make_shared<ov::op::v1::Multiply>(input_node, scale); auto res = std::make_shared<ov::op::v1::Multiply>(input_node, scale);
return {res}; return rename_outputs_with_suffix({res}, context.get_name());
} }
} // namespace op } // namespace op

View File

@ -52,6 +52,8 @@ void ggml_rope_yarn_corr_dims(int n_dims,
OutputVector translate_rope(const NodeContext& context) { OutputVector translate_rope(const NodeContext& context) {
num_inputs_check(context, 2, 3); num_inputs_check(context, 2, 3);
ov::Output<Node> res;
auto data_node = context.get_input(0); auto data_node = context.get_input(0);
auto pos_node = context.get_input(1); auto pos_node = context.get_input(1);
pos_node = std::make_shared<ov::op::v0::Convert>(pos_node, ov::element::f32); pos_node = std::make_shared<ov::op::v0::Convert>(pos_node, ov::element::f32);
@ -141,9 +143,7 @@ OutputVector translate_rope(const NodeContext& context) {
ov::element::i64, ov::element::i64,
Shape{3}, Shape{3},
std::vector<int64_t>{-1, input_shape[1].get_length(), input_shape[2].get_length()}); std::vector<int64_t>{-1, input_shape[1].get_length(), input_shape[2].get_length()});
auto reshaped = std::make_shared<ov::op::v1::Reshape>(stack, shape_const, false); res = std::make_shared<ov::op::v1::Reshape>(stack, shape_const, false);
return {reshaped};
} else { } else {
auto slice_node = auto slice_node =
std::make_shared<ov::op::v1::Split>(data_node, std::make_shared<ov::op::v1::Split>(data_node,
@ -160,9 +160,10 @@ OutputVector translate_rope(const NodeContext& context) {
std::make_shared<ov::op::v1::Multiply>(slice_data_node_0, sin_theta_node), std::make_shared<ov::op::v1::Multiply>(slice_data_node_0, sin_theta_node),
std::make_shared<ov::op::v1::Multiply>(slice_data_node_1, cos_theta_node)); std::make_shared<ov::op::v1::Multiply>(slice_data_node_1, cos_theta_node));
auto res_node = std::make_shared<ov::op::v0::Concat>(ov::OutputVector{first_half_node, second_half_node}, 2); res = std::make_shared<ov::op::v0::Concat>(ov::OutputVector{first_half_node, second_half_node}, 2);
return {res_node};
} }
return rename_outputs_with_suffix({res}, context.get_name());
} }
} // namespace op } // namespace op

View File

@ -19,7 +19,7 @@ OutputVector translate_scale(const NodeContext& context) {
auto res = std::make_shared<ov::op::v1::Multiply>(context.get_input(0), scale_node); auto res = std::make_shared<ov::op::v1::Multiply>(context.get_input(0), scale_node);
return {res}; return rename_outputs_with_suffix({res}, context.get_name());
} }
} // namespace op } // namespace op

View File

@ -24,6 +24,7 @@ OutputVector translate_soft_max(const NodeContext& context) {
num_inputs_check(context, 1, 2); num_inputs_check(context, 1, 2);
auto input_node = context.get_input(0); auto input_node = context.get_input(0);
ov::Output<Node> res;
float scale = 1.0f; float scale = 1.0f;
float max_bias = 0.0f; float max_bias = 0.0f;
@ -56,13 +57,13 @@ OutputVector translate_soft_max(const NodeContext& context) {
} }
// Stride slice mask node // Stride slice mask node
Output<Node> mask_begin_node = ov::op::v0::Constant::create(ov::element::i64, Shape{3}, {0, 0, 0}); Output<Node> slice_start = ov::op::v0::Constant::create(ov::element::i64, Shape{3}, {0, 0, 0});
auto one = ov::op::v0::Constant::create(ov::element::i64, ov::Shape{1}, {1}); auto one = ov::op::v0::Constant::create(ov::element::i64, ov::Shape{1}, {1});
auto input_last_two_dim = get_dimensions(input_node.get_node_shared_ptr(), {1, 2}); auto token_len = get_dimensions(input_node.get_node_shared_ptr(), {1});
auto mask_slice_shape = std::make_shared<ov::op::v0::Concat>(ov::NodeVector{one, input_last_two_dim}, 0); auto total_token_len = get_dimensions(mask_node.get_node_shared_ptr(), {2});
Output<Node> mask_stride_node = ov::op::v0::Constant::create(ov::element::i64, Shape{3}, {1, 1, 1}); auto slice_end = std::make_shared<ov::op::v0::Concat>(ov::NodeVector{one, token_len, total_token_len}, 0);
auto mask_node_sliced = Output<Node> slice_stride = ov::op::v0::Constant::create(ov::element::i64, Shape{3}, {1, 1, 1});
std::make_shared<ov::op::v8::Slice>(mask_node, mask_begin_node, mask_slice_shape, mask_stride_node); auto mask_node_sliced = std::make_shared<ov::op::v8::Slice>(mask_node, slice_start, slice_end, slice_stride);
// slope * mask // slope * mask
auto slope_node = auto slope_node =
@ -73,13 +74,13 @@ OutputVector translate_soft_max(const NodeContext& context) {
auto input_slope_mask_node = std::make_shared<ov::op::v1::Add>(input_node, slope_mask_node); auto input_slope_mask_node = std::make_shared<ov::op::v1::Add>(input_node, slope_mask_node);
// Calculate softmax // Calculate softmax
auto res = std::make_shared<ov::op::v8::Softmax>(input_slope_mask_node, 2); res = std::make_shared<ov::op::v8::Softmax>(input_slope_mask_node, 2);
return {res};
} else { } else {
// Directly softmax // Directly softmax
auto res = std::make_shared<ov::op::v8::Softmax>(input_node, 0); res = std::make_shared<ov::op::v8::Softmax>(input_node, 0);
return {res};
} }
return rename_outputs_with_suffix({res}, context.get_name());
} }
} // namespace op } // namespace op

View File

@ -14,7 +14,7 @@ OutputVector translate_transpose(const NodeContext& context) {
auto perm = argsort_descend(context.get_output_stride(0)); auto perm = argsort_descend(context.get_output_stride(0));
auto res = std::make_shared<ov::op::v1::Transpose>(context.get_input(0), auto res = std::make_shared<ov::op::v1::Transpose>(context.get_input(0),
ov::op::v0::Constant::create(ov::element::i64, {3}, perm)); ov::op::v0::Constant::create(ov::element::i64, {3}, perm));
return {res}; return rename_outputs_with_suffix({res}, context.get_name());
} }
} // namespace op } // namespace op

View File

@ -17,7 +17,7 @@ OutputVector translate_unary_silu(const NodeContext& context) {
auto sigmoid = std::make_shared<ov::op::v0::Sigmoid>(input); auto sigmoid = std::make_shared<ov::op::v0::Sigmoid>(input);
auto res = std::make_shared<ov::op::v1::Multiply>(input, sigmoid); auto res = std::make_shared<ov::op::v1::Multiply>(input, sigmoid);
return {res}; return rename_outputs_with_suffix({res}, context.get_name());
} }
} // namespace op } // namespace op

View File

@ -1,5 +1,8 @@
#include "translate_session.hpp" #include "translate_session.hpp"
#include <cstdlib>
#include <openvino/pass/constant_folding.hpp>
#include "input_model.hpp" #include "input_model.hpp"
namespace ov { namespace ov {
@ -91,11 +94,18 @@ std::shared_ptr<Model> TranslateSession::translate_graph(const frontend::InputMo
used_params.push_back(param); used_params.push_back(param);
} }
} }
if (auto diff = params.size() - used_params.size()) { if (getenv("GGML_OPENVINO_PROFILING")) {
std::cout << diff << " parameters are not used in the model." << std::endl; if (auto diff = params.size() - used_params.size()) {
std::cout << diff << " parameters are not used in the model." << std::endl;
}
} }
resulting_model = std::make_shared<Model>(results, used_params); resulting_model = std::make_shared<Model>(results, used_params);
ov::pass::Manager manager;
manager.set_per_pass_validation(true);
manager.register_pass<ov::pass::ConstantFolding>();
manager.run_passes(resulting_model);
return resulting_model; return resulting_model;
} }

View File

@ -47,6 +47,17 @@ std::shared_ptr<ov::Node> get_dimensions(const std::shared_ptr<ov::Node>& node,
return get_dimensions(std::make_shared<ov::op::v3::ShapeOf>(node), dims); return get_dimensions(std::make_shared<ov::op::v3::ShapeOf>(node), dims);
} }
OutputVector rename_outputs_with_suffix(const OutputVector& outputs, const std::string& suffix) {
for (const auto& output : outputs) {
auto node = output.get_node_shared_ptr();
std::string name = node->get_friendly_name();
name += "_";
name += suffix;
node->set_friendly_name(name);
}
return outputs;
}
} // namespace ggml } // namespace ggml
} // namespace frontend } // namespace frontend
} // namespace ov } // namespace ov

View File

@ -55,6 +55,8 @@ std::vector<T> permute(const std::vector<T>& x, const std::vector<int>& perm) {
std::shared_ptr<ov::Node> get_dimensions(const std::shared_ptr<op::v3::ShapeOf>& shape, const std::vector<int>& dims); std::shared_ptr<ov::Node> get_dimensions(const std::shared_ptr<op::v3::ShapeOf>& shape, const std::vector<int>& dims);
std::shared_ptr<ov::Node> get_dimensions(const std::shared_ptr<ov::Node>& node, const std::vector<int>& dims); std::shared_ptr<ov::Node> get_dimensions(const std::shared_ptr<ov::Node>& node, const std::vector<int>& dims);
OutputVector rename_outputs_with_suffix(const OutputVector& outputs, const std::string& suffix);
namespace op { namespace op {
template <typename T> template <typename T>
OutputVector translate_1to1_match_2_inputs(const NodeContext& context) { OutputVector translate_1to1_match_2_inputs(const NodeContext& context) {