From 081b52667bd7de113b781c55165641c339239e29 Mon Sep 17 00:00:00 2001 From: zhanmyz Date: Tue, 25 Feb 2025 17:29:43 +0800 Subject: [PATCH] Execute singel CONT operator is OK --- ggml/src/ggml-openvino.cpp | 8 +- ggml/src/ggml-openvino/decoder.h | 2 + ggml/src/ggml-openvino/ggml-decoder.cpp | 129 +++++++++++++----------- ggml/src/ggml-openvino/ggml-decoder.h | 2 + 4 files changed, 78 insertions(+), 63 deletions(-) diff --git a/ggml/src/ggml-openvino.cpp b/ggml/src/ggml-openvino.cpp index 2e20e8e39b..e1c294a1d9 100644 --- a/ggml/src/ggml-openvino.cpp +++ b/ggml/src/ggml-openvino.cpp @@ -998,8 +998,8 @@ static enum ggml_status ggml_backend_openvino_graph_compute(ggml_backend_t backe // ggml_backend_openvino_dup_bytes(cgraph->nodes[i]); } else if (std::find(view_indices.begin(), view_indices.end(), i) != view_indices.end()) { ggml_backend_openvino_view(cgraph->nodes[i]); - } else if (std::find(cpy_indices.begin(), cpy_indices.end(), i) != cpy_indices.end()) { - ggml_backend_openvino_cpy(cgraph->nodes[i]); + // } else if (std::find(cpy_indices.begin(), cpy_indices.end(), i) != cpy_indices.end()) { + // ggml_backend_openvino_cpy(cgraph->nodes[i]); } else if (std::find(transpose_indices.begin(), transpose_indices.end(), i) != transpose_indices.end()) { ggml_backend_openvino_transpose(cgraph->nodes[i]); } else if (std::find(permute_indices.begin(), permute_indices.end(), i) != permute_indices.end()) { @@ -1010,8 +1010,8 @@ static enum ggml_status ggml_backend_openvino_graph_compute(ggml_backend_t backe // Process a range of nodes with openvino_frontend_compute int start_index = i; while (i < cgraph->n_nodes && - std::find(cpy_indices.begin(), cpy_indices.end(), i) == cpy_indices.end() && - //std::find(cont_indices.begin(), cont_indices.end(), i) == cont_indices.end() && + // std::find(cpy_indices.begin(), cpy_indices.end(), i) == cpy_indices.end() && + // std::find(cont_indices.begin(), cont_indices.end(), i) == cont_indices.end() && std::find(mul_mat_indices.begin(), mul_mat_indices.end(), i) == mul_mat_indices.end()) { i++; } diff --git a/ggml/src/ggml-openvino/decoder.h b/ggml/src/ggml-openvino/decoder.h index ef18c12144..9a884a3374 100644 --- a/ggml/src/ggml-openvino/decoder.h +++ b/ggml/src/ggml-openvino/decoder.h @@ -42,6 +42,8 @@ public: virtual PartialShape get_output_shape(const std::string& name) const = 0; + virtual std::vector get_output_stride(const std::string& name) const = 0; + virtual element::Type get_output_type(const std::string& name) const = 0; virtual int32_t* get_output_op_params(const std::string& name) const = 0; diff --git a/ggml/src/ggml-openvino/ggml-decoder.cpp b/ggml/src/ggml-openvino/ggml-decoder.cpp index a412f8b75a..6a249c103f 100644 --- a/ggml/src/ggml-openvino/ggml-decoder.cpp +++ b/ggml/src/ggml-openvino/ggml-decoder.cpp @@ -7,8 +7,11 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::map& inputs, std::map& outputs) { m_node_op_name[node->name] = ggml_op_name(node->op); - std::string src0_name = std::string(node->src[0]->name) + "_" + std::to_string(node->src[0]->view_offs) + "_input_" + ggml_op_name(node->src[0]->op); - std::string node_name = std::string(node->name) + "_" + std::to_string(node->view_offs) + "_output_" + ggml_op_name(node->op); + // Execute singel CONT operator is OK + std::string src0_name = std::string(node->src[0]->name) + "_" + std::to_string(node->src[0]->view_offs) + "_" + ggml_op_name(node->src[0]->op); + std::string node_name = std::string(node->name) + "_" + std::to_string(node->view_offs) + "_" + ggml_op_name(node->op); + // std::string src0_name = std::string(node->src[0]->name) + "_" + std::to_string(node->src[0]->view_offs); + // std::string node_name = std::string(node->name) + "_" + std::to_string(node->view_offs); switch (node->op) { // Unary OPs case GGML_OP_UNARY: @@ -17,21 +20,21 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::mapsrc[0]->name] = node->src[0]; - outputs[node->name] = node; - m_input_names.push_back(node->src[0]->name); - m_node_op_name[node->src[0]->name] = ggml_op_name(node->op); - m_output_names.push_back(node->name); + inputs[src0_name] = node->src[0]; + outputs[node_name] = node; + m_input_names.push_back(src0_name); + m_node_op_name[src0_name] = ggml_op_name(node->op); + m_output_names.push_back(node_name); break; } case GGML_OP_CONT: { if (ggml_is_contiguous(node->src[0]) && ggml_is_contiguous(node)) { - inputs[node->src[0]->name] = node->src[0]; - outputs[node->name] = node; - m_input_names.push_back(node->src[0]->name); - m_node_op_name[node->src[0]->name] = ggml_op_name(node->op); - m_output_names.push_back(node->name); + inputs[src0_name] = node->src[0]; + outputs[node_name] = node; + m_input_names.push_back(src0_name); + m_node_op_name[src0_name] = ggml_op_name(node->op); + m_output_names.push_back(node_name); m_continuous = true; ov::Shape flat_shape = { static_cast(ggml_nelements(node)) }; @@ -51,11 +54,11 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::mapsrc[0]->ne[0] * ggml_type_size(node->src[0]->type)); // } - inputs[node->src[0]->name] = node->src[0]; - outputs[node->name] = node; - m_input_names.push_back(node->src[0]->name); - m_node_op_name[node->src[0]->name] = ggml_op_name(node->op); - m_output_names.push_back(node->name); + inputs[src0_name] = node->src[0]; + outputs[node_name] = node; + m_input_names.push_back(src0_name); + m_node_op_name[src0_name] = ggml_op_name(node->op); + m_output_names.push_back(node_name); const size_t element_size = ggml_type_size(node->src[0]->type); size_t valid_elems = static_cast(node->src[0]->ne[0]); // 3072 @@ -71,11 +74,11 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::mapsrc[0]->name] = node->src[0]; - outputs[node->name] = node; - m_input_names.push_back(node->src[0]->name); - m_node_op_name[node->src[0]->name] = ggml_op_name(node->op); - m_output_names.push_back(node->name); + inputs[src0_name] = node->src[0]; + outputs[node_name] = node; + m_input_names.push_back(src0_name); + m_node_op_name[src0_name] = ggml_op_name(node->op); + m_output_names.push_back(node_name); size_t valid_i = static_cast(node->src[0]->ne[0]); // 96 size_t valid_j = static_cast(node->src[0]->ne[1]); // 32 @@ -98,6 +101,10 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::mapop); m_output_names.push_back(node_name); m_continuous = true; + + ov::Shape src_shape(node->src[0]->ne, node->src[0]->ne + 4); + auto input_param = std::make_shared(ov::element::f32, src_shape); + m_params.push_back(input_param); break; } else { for (int64_t i1 = 0; i1 < node->ne[1]; ++i1) { // ne[1] = 3072 @@ -118,57 +125,52 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::mapsrc[0]->name] = node->src[0]; - // std::string temp_name = node->src[0]->name + std::string("_cpy_tmp"); + // inputs[src0_name] = node->src[0]; + // std::string temp_name = src0_name + std::string("_cpy_tmp"); // inputs[temp_name] = node; - // outputs[node->name] = node; - // m_input_names.push_back(node->src[0]->name); + // outputs[node_name] = node; + // m_input_names.push_back(src0_name); // m_input_names.push_back(temp_name); - // m_node_op_name[node->src[0]->name] = ggml_op_name(node->op); + // m_node_op_name[src0_name] = ggml_op_name(node->op); // m_node_op_name[temp_name] = ggml_op_name(node->op); + // m_output_names.push_back(node_name); + // m_continuous = false; - // m_output_names.push_back(node->name); - - // ov::Shape flat_src0_shape = {80000}; + // ov::Shape flat_src0_shape = {node->src[0]->nb[2]}; // auto param_src0 = std::make_shared(ov::element::f32, flat_src0_shape); // m_params.push_back(param_src0); - // std::cout << "decoder ADDR-0: " << param_src0.get() << std::endl; - - // ov::Shape flat_dst_shape = {200000, 1}; + // ov::Shape flat_dst_shape = {node->nb[2], 1}; // auto param_dst_base = std::make_shared(ov::element::f16, flat_dst_shape); // m_params.push_back(param_dst_base); - // std::cout << "decoder ADDR-1: " << param_dst_base.get() << std::endl; - - // m_continuous = false; - // break; + break; } } // For view, input is node itself case GGML_OP_VIEW: { - inputs[node->name] = node; - outputs[node->name] = node; - m_input_names.push_back(node->name); - m_node_op_name[node->name] = ggml_op_name(node->op); - m_output_names.push_back(node->name); + inputs[node_name] = node; + outputs[node_name] = node; + m_input_names.push_back(node_name); + m_node_op_name[node_name] = ggml_op_name(node->op); + m_output_names.push_back(node_name); break; } // SCALE case GGML_OP_SCALE: { - inputs[src0_name] = node->src[0]; + inputs[node_name] = node->src[0]; outputs[node_name] = node; m_input_names.push_back(node_name); - // m_node_op_name[node_name] = ggml_op_name(node->op); + m_node_op_name[node_name] = ggml_op_name(node->op); m_output_names.push_back(node_name); break; } case GGML_OP_MUL_MAT: { - std::string src1_name = std::string(node->src[1]->name) + "_" + std::to_string(node->src[1]->view_offs) + "_input_" + ggml_op_name(node->src[1]->op); + std::string src1_name = std::string(node->src[1]->name) + "_" + std::to_string(node->src[1]->view_offs); if (!ggml_is_contiguous(node->src[1]) || node->src[1]->ne[0] * node->src[1]->nb[0] != node->src[1]->nb[1]) { m_continuous = false; } else { @@ -198,7 +200,7 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::mapop); m_output_names.push_back(node_name); if (node->src[1]) { - std::string src1_name = std::string(node->src[1]->name) + "_" + std::to_string(node->src[1]->view_offs) + "_input_" + ggml_op_name(node->src[1]->op); + std::string src1_name = std::string(node->src[1]->name) + "_" + std::to_string(node->src[1]->view_offs); inputs[src1_name] = node->src[1]; m_node_op_name[src1_name] = ggml_op_name(node->op); m_input_names.push_back(src1_name); @@ -208,20 +210,20 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::mapsrc[1]->name) + "_" + std::to_string(node->src[1]->view_offs) + "_input_" + ggml_op_name(node->src[1]->op); - inputs[node->src[0]->name] = node->src[0]; - inputs[node->src[1]->name] = node->src[1]; - m_input_names.push_back(node->src[0]->name); - m_node_op_name[node->src[0]->name] = ggml_op_name(node->op); - m_input_names.push_back(node->src[1]->name); - m_node_op_name[node->src[1]->name] = ggml_op_name(node->op); - outputs[node->name] = node; - m_output_names.push_back(node->name); + std::string src1_name = std::string(node->src[1]->name) + "_" + std::to_string(node->src[1]->view_offs); + inputs[src0_name] = node->src[0]; + inputs[src1_name] = node->src[1]; + m_input_names.push_back(src0_name); + m_node_op_name[src0_name] = ggml_op_name(node->op); + m_input_names.push_back(src1_name); + m_node_op_name[src1_name] = ggml_op_name(node->op); + outputs[node_name] = node; + m_output_names.push_back(node_name); if (node->src[2]) { - std::string src2_name = std::string(node->src[2]->name) + "_" + std::to_string(node->src[2]->view_offs) + "_input_" + ggml_op_name(node->src[2]->op); - inputs[node->src[2]->name] = node->src[2]; - m_input_names.push_back(node->src[2]->name); - m_node_op_name[node->src[2]->name] = ggml_op_name(node->op); + std::string src2_name = std::string(node->src[2]->name) + "_" + std::to_string(node->src[2]->view_offs); + inputs[src2_name] = node->src[2]; + m_input_names.push_back(src2_name); + m_node_op_name[src2_name] = ggml_op_name(node->op); } break; } @@ -358,6 +360,15 @@ std::vector GgmlOvDecoder::get_input_stride(const std::string& name) con return stride; } +std::vector GgmlOvDecoder::get_output_stride(const std::string& name) const { + std::vector stride; + ggml_tensor * node = m_outputs.at(name); + for (int i = GGML_MAX_DIMS - 2; i >= 0 ; --i) { + stride.push_back(static_cast(node->nb[i])); + } + return stride; +} + ov::element::Type GgmlOvDecoder::get_input_type(const std::string& name) const { ov::element::Type type = ov::element::dynamic; switch (m_inputs.at(name)->type) { diff --git a/ggml/src/ggml-openvino/ggml-decoder.h b/ggml/src/ggml-openvino/ggml-decoder.h index 0921fd8bb5..98c418dd6a 100644 --- a/ggml/src/ggml-openvino/ggml-decoder.h +++ b/ggml/src/ggml-openvino/ggml-decoder.h @@ -39,6 +39,8 @@ public: virtual ov::PartialShape get_output_shape(const std::string& name) const override; + virtual std::vector get_output_stride(const std::string& name) const override; + virtual ov::element::Type get_output_type(const std::string& name) const override; virtual int32_t* get_output_op_params(const std::string& name) const override;