OV Frontend supports GET_ROWS/RMS_NORM/MUL/MUL_MAT/ROPE/SCALE/SOFTMAX/ADD adjacent op graph conversion
This commit is contained in:
parent
95ae982d59
commit
9a7b7d8d6d
|
|
@ -1279,7 +1279,6 @@ static const std::set<std::string>& openvino_ops = []() -> const std::set<std::s
|
|||
case GGML_OP_ADD:
|
||||
return true;
|
||||
case GGML_OP_MUL:
|
||||
return true;
|
||||
case GGML_OP_MUL_MAT:
|
||||
return false;
|
||||
case GGML_OP_UNARY:
|
||||
|
|
|
|||
|
|
@ -38,6 +38,8 @@ public:
|
|||
|
||||
virtual const std::string& get_node_op_name(const std::string& name) const = 0;
|
||||
|
||||
virtual std::string& get_op_node_name(const std::string& name, const int index = -1) = 0;
|
||||
|
||||
// virtual const struct tensor_info get_node_op_info(const std::string& name) const = 0;
|
||||
|
||||
virtual PartialShape get_output_shape(const std::string& name) const = 0;
|
||||
|
|
|
|||
|
|
@ -33,6 +33,7 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::map<std::string, gg
|
|||
outputs[node_name] = node;
|
||||
m_input_names.push_back(src0_name);
|
||||
m_node_op_name[src0_name] = ggml_op_name(node->op);
|
||||
m_op_node_name.emplace_back(src0_name, ggml_op_name(node->op));
|
||||
m_output_names.push_back(node_name);
|
||||
break;
|
||||
}
|
||||
|
|
@ -43,6 +44,7 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::map<std::string, gg
|
|||
outputs[node_name] = node;
|
||||
m_input_names.push_back(src0_name);
|
||||
m_node_op_name[src0_name] = ggml_op_name(node->op);
|
||||
m_op_node_name.emplace_back(src0_name, ggml_op_name(node->op));
|
||||
m_output_names.push_back(node_name);
|
||||
m_continuous = true;
|
||||
|
||||
|
|
@ -67,13 +69,15 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::map<std::string, gg
|
|||
outputs[node_name] = node;
|
||||
m_input_names.push_back(src0_name);
|
||||
m_node_op_name[src0_name] = ggml_op_name(node->op);
|
||||
m_op_node_name.emplace_back(src0_name, ggml_op_name(node->op));
|
||||
m_output_names.push_back(node_name);
|
||||
|
||||
const size_t element_size = ggml_type_size(node->src[0]->type);
|
||||
size_t valid_elems = static_cast<size_t>(node->src[0]->ne[0]); // 3072
|
||||
size_t num_rows = static_cast<size_t>(node->src[0]->ne[1]); // 7
|
||||
size_t phys_stride = static_cast<size_t>(node->src[0]->nb[1]) / element_size; // 9216
|
||||
size_t total_phys = (num_rows - 1) * phys_stride + valid_elems; // 6*9216 + 3072 = 58368
|
||||
// size_t total_phys = (num_rows - 1) * phys_stride + valid_elems; // 6*9216 + 3072 = 58368
|
||||
size_t total_phys = num_rows * phys_stride; // 7 * 9216 = 64512
|
||||
ov::Shape flat_input_shape = { total_phys };
|
||||
auto flat_input_param = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, flat_input_shape);
|
||||
m_params.push_back(flat_input_param);
|
||||
|
|
@ -87,6 +91,7 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::map<std::string, gg
|
|||
outputs[node_name] = node;
|
||||
m_input_names.push_back(src0_name);
|
||||
m_node_op_name[src0_name] = ggml_op_name(node->op);
|
||||
m_op_node_name.emplace_back(src0_name, ggml_op_name(node->op));
|
||||
m_output_names.push_back(node_name);
|
||||
|
||||
size_t valid_i = static_cast<size_t>(node->src[0]->ne[0]); // 96
|
||||
|
|
@ -108,6 +113,7 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::map<std::string, gg
|
|||
outputs[node_name] = node;
|
||||
m_input_names.push_back(src0_name);
|
||||
m_node_op_name[src0_name] = ggml_op_name(node->op);
|
||||
m_op_node_name.emplace_back(src0_name, ggml_op_name(node->op));
|
||||
m_output_names.push_back(node_name);
|
||||
m_continuous = true;
|
||||
|
||||
|
|
@ -130,6 +136,7 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::map<std::string, gg
|
|||
outputs[node_name] = node;
|
||||
m_input_names.push_back(node_name);
|
||||
m_node_op_name[node_name] = ggml_op_name(node->op);
|
||||
m_op_node_name.emplace_back(src0_name, ggml_op_name(node->op));
|
||||
m_output_names.push_back(node_name);
|
||||
m_continuous = false;
|
||||
break;
|
||||
|
|
@ -161,10 +168,12 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::map<std::string, gg
|
|||
case GGML_OP_VIEW:
|
||||
{
|
||||
// std::string node_name = std::string(node->name) + "_" + std::to_string(node->view_offs) + "_output_" + ggml_op_name(node->op);
|
||||
// std::string node_name = std::string(node->name) + "_" + std::to_string(node->view_offs);
|
||||
inputs[node_name] = node;
|
||||
outputs[node_name] = node;
|
||||
m_input_names.push_back(node_name);
|
||||
m_node_op_name[node_name] = ggml_op_name(node->op);
|
||||
m_op_node_name.emplace_back(node_name, ggml_op_name(node->op));
|
||||
m_output_names.push_back(node_name);
|
||||
break;
|
||||
}
|
||||
|
|
@ -175,6 +184,7 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::map<std::string, gg
|
|||
outputs[node_name] = node;
|
||||
m_input_names.push_back(src0_name);
|
||||
m_node_op_name[src0_name] = ggml_op_name(node->op);
|
||||
m_op_node_name.emplace_back(src0_name, ggml_op_name(node->op));
|
||||
m_output_names.push_back(node_name);
|
||||
break;
|
||||
}
|
||||
|
|
@ -199,8 +209,10 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::map<std::string, gg
|
|||
outputs[node_name] = node;
|
||||
m_input_names.push_back(src0_name);
|
||||
m_node_op_name[src0_name] = ggml_op_name(node->op);
|
||||
m_op_node_name.emplace_back(src0_name, ggml_op_name(node->op));
|
||||
m_input_names.push_back(src1_name);
|
||||
m_node_op_name[src1_name] = ggml_op_name(node->op);
|
||||
m_op_node_name.emplace_back(src1_name, ggml_op_name(node->op));
|
||||
m_output_names.push_back(node_name);
|
||||
break;
|
||||
}
|
||||
|
|
@ -216,6 +228,7 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::map<std::string, gg
|
|||
outputs[node_name] = node;
|
||||
m_input_names.push_back(src0_name);
|
||||
m_node_op_name[src0_name] = ggml_op_name(node->op);
|
||||
m_op_node_name.emplace_back(src0_name, ggml_op_name(node->op));
|
||||
m_output_names.push_back(node_name);
|
||||
if (node->src[1]) {
|
||||
// std::string src1_name = std::string(node->src[1]->name) + "_" + std::to_string(node->src[1]->view_offs) + "_input_" + ggml_op_name(node->src[1]->op);
|
||||
|
|
@ -223,6 +236,7 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::map<std::string, gg
|
|||
std::string src1_name = std::string(node->src[1]->name);
|
||||
inputs[src1_name] = node->src[1];
|
||||
m_node_op_name[src1_name] = ggml_op_name(node->op);
|
||||
m_op_node_name.emplace_back(src1_name, ggml_op_name(node->op));
|
||||
m_input_names.push_back(src1_name);
|
||||
}
|
||||
break;
|
||||
|
|
@ -237,8 +251,10 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::map<std::string, gg
|
|||
inputs[src1_name] = node->src[1];
|
||||
m_input_names.push_back(src0_name);
|
||||
m_node_op_name[src0_name] = ggml_op_name(node->op);
|
||||
m_op_node_name.emplace_back(src0_name, ggml_op_name(node->op));
|
||||
m_input_names.push_back(src1_name);
|
||||
m_node_op_name[src1_name] = ggml_op_name(node->op);
|
||||
m_op_node_name.emplace_back(src1_name, ggml_op_name(node->op));
|
||||
outputs[node_name] = node;
|
||||
m_output_names.push_back(node_name);
|
||||
if (node->src[2]) {
|
||||
|
|
@ -248,6 +264,7 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::map<std::string, gg
|
|||
inputs[src2_name] = node->src[2];
|
||||
m_input_names.push_back(src2_name);
|
||||
m_node_op_name[src2_name] = ggml_op_name(node->op);
|
||||
m_op_node_name.emplace_back(src2_name, ggml_op_name(node->op));
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
|
@ -359,8 +376,8 @@ GgmlOvDecoder::GgmlOvDecoder(struct ggml_tensor * node, struct ggml_cgraph * cgr
|
|||
if (m_node) {
|
||||
set_input_output(m_node, m_inputs, m_outputs);
|
||||
} else {
|
||||
// for (int node_n = 0; node_n < m_cgraph->n_nodes; node_n++) {
|
||||
for (int node_n = start_index; node_n <= end_index; node_n++) {
|
||||
for (int node_n = 0; node_n < m_cgraph->n_nodes; node_n++) {
|
||||
// for (int node_n = start_index; node_n <= end_index; node_n++) {
|
||||
auto cur_node = m_cgraph->nodes[node_n];
|
||||
m_nodes.push_back(cur_node);
|
||||
// Init model input and output
|
||||
|
|
@ -446,6 +463,21 @@ const std::string& GgmlOvDecoder::get_node_op_name(const std::string& name) cons
|
|||
return (it != m_node_op_name.end()) ? it->second : empty_str;
|
||||
}
|
||||
|
||||
std::string& GgmlOvDecoder::get_op_node_name(const std::string& key_name, const int index) {
|
||||
if (index == -1) {
|
||||
for (size_t i = 0; i < m_op_node_name.size(); ++i) {
|
||||
if (m_op_node_name[i].first == key_name) {
|
||||
return m_op_node_name[i].second;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
return m_op_node_name[index].second;
|
||||
}
|
||||
|
||||
static std::string empty_string = "";
|
||||
return empty_string; // empty string
|
||||
}
|
||||
|
||||
const std::vector<std::shared_ptr<ov::op::v0::Parameter>>& GgmlOvDecoder::get_params() const {
|
||||
return m_params;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -72,7 +72,7 @@ public:
|
|||
}
|
||||
|
||||
virtual const std::string& get_node_op_name(const std::string& name) const override;
|
||||
// virtual const std::string& get_node_op_info(const std::string& name) const override;
|
||||
std::string& get_op_node_name(const std::string& key_name, const int index) override;
|
||||
|
||||
virtual const std::vector<std::shared_ptr<ov::op::v0::Parameter>>& get_params() const override;
|
||||
|
||||
|
|
@ -92,5 +92,6 @@ private:
|
|||
bool m_continuous;
|
||||
std::map<std::string, std::string> m_node_op_name;
|
||||
std::vector<std::shared_ptr<ov::op::v0::Parameter>> m_params;
|
||||
std::vector<std::pair<std::string, std::string>> m_op_node_name;
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -14,9 +14,11 @@ std::map<std::string, ov::Tensor> get_ggml_graph_input_tensors(std::shared_ptr<G
|
|||
std::map<std::string, ov::Tensor> input_tensors;
|
||||
auto input_names = ggml_decoder->get_input_names();
|
||||
// auto node_name = ggml_decoder->get_op_name();
|
||||
size_t iter = 0;
|
||||
for (size_t inp = 0; inp < input_names.size(); ++inp) {
|
||||
auto name = input_names[inp];
|
||||
auto node_op_name = ggml_decoder->get_node_op_name(name);
|
||||
std::string op_node_name = ggml_decoder->get_op_node_name(name, iter++);
|
||||
// auto node_op_name = ggml_decoder->get_node_op_name(name);
|
||||
auto input_data = ggml_decoder->get_input_ggml_tensor(name)->data;
|
||||
#ifdef GGML_OPENVINO_DEBUG
|
||||
printf("Subgraph input %d: %g\n", inp, *(double*)(input_data));
|
||||
|
|
@ -26,12 +28,12 @@ std::map<std::string, ov::Tensor> get_ggml_graph_input_tensors(std::shared_ptr<G
|
|||
// if (node_op_name == "CPY" && (input_shape[0] != 7)) {
|
||||
// input_tensor = ov::Tensor(ggml_decoder->get_input_type(name), {80000}, input_data);
|
||||
|
||||
if (node_op_name == "CONT" && ggml_decoder->check_if_continuous()) {
|
||||
if (op_node_name == "CONT" && ggml_decoder->check_if_continuous()) {
|
||||
ov::Shape flat_shape = { ggml_decoder->get_input_shape(name).to_shape()[0] *
|
||||
ggml_decoder->get_input_shape(name).to_shape()[1] *
|
||||
ggml_decoder->get_input_shape(name).to_shape()[2] };
|
||||
input_tensor = ov::Tensor(ggml_decoder->get_input_type(name), flat_shape, input_data);
|
||||
} else if ( node_op_name == "CONT" &&
|
||||
} else if ( op_node_name == "CONT" &&
|
||||
!ggml_decoder->check_if_continuous() &&
|
||||
input_shape[0] == 1) {
|
||||
size_t valid_elems = static_cast<size_t>(ggml_decoder->get_input_shape(name).to_shape()[2]); // 3072
|
||||
|
|
@ -40,17 +42,18 @@ std::map<std::string, ov::Tensor> get_ggml_graph_input_tensors(std::shared_ptr<G
|
|||
size_t element_size = input_type.size();
|
||||
std::vector<size_t> strides = ggml_decoder->get_input_stride(name);
|
||||
size_t phys_stride = static_cast<size_t>(strides[1]) / element_size;
|
||||
size_t total_phys = (num_rows - 1) * phys_stride + valid_elems;
|
||||
// size_t total_phys = (num_rows - 1) * phys_stride + valid_elems;
|
||||
size_t total_phys = num_rows* phys_stride;
|
||||
ov::Shape flat_input_shape = { total_phys };
|
||||
input_tensor = ov::Tensor(ggml_decoder->get_input_type(name), flat_input_shape, input_data);
|
||||
} else if (node_op_name == "CONT") {
|
||||
} else if (op_node_name == "CONT") {
|
||||
size_t valid_i = static_cast<size_t>(ggml_decoder->get_input_shape(name).to_shape()[2]); // 96
|
||||
size_t valid_j = static_cast<size_t>(ggml_decoder->get_input_shape(name).to_shape()[1]); // 32
|
||||
size_t valid_k = static_cast<size_t>(ggml_decoder->get_input_shape(name).to_shape()[0]); // 7
|
||||
size_t total_valid = valid_i * valid_j * valid_k; // 96 * 32 * 7 = 21504
|
||||
ov::Shape flat_input_shape = { total_valid };
|
||||
input_tensor = ov::Tensor(ggml_decoder->get_input_type(name), flat_input_shape, input_data);
|
||||
} else if (node_op_name == "MUL_MAT") {
|
||||
} else if (op_node_name == "MUL_MAT") {
|
||||
ov::Shape flat_shape = { ggml_decoder->get_input_shape(name).to_shape()[0] *
|
||||
ggml_decoder->get_input_shape(name).to_shape()[1] *
|
||||
ggml_decoder->get_input_shape(name).to_shape()[2] };
|
||||
|
|
@ -144,7 +147,7 @@ enum ggml_status openvino_frontend_compute(ggml_backend_t backend, struct ggml_c
|
|||
|
||||
// Set input tensor
|
||||
for (size_t i = 0; i < input_names.size(); i++) {
|
||||
infer_request.set_input_tensor(i, input_tensors[input_names[i]]);
|
||||
infer_request.set_input_tensor(i, input_tensors[input_names[i]]);
|
||||
|
||||
// auto input_tensor = infer_request.get_input_tensor(i);
|
||||
// auto input_shape = input_tensor.get_shape();
|
||||
|
|
|
|||
Loading…
Reference in New Issue