From 901f7347ff3517e0436e815b6adf2cc271930369 Mon Sep 17 00:00:00 2001 From: zhanmyz Date: Sat, 1 Mar 2025 22:18:43 +0800 Subject: [PATCH] Execute CONT & VIEW operators in OV Frontend is OK --- ggml/src/ggml-openvino.cpp | 69 ++++++++++++++++++------- ggml/src/ggml-openvino/ggml-decoder.cpp | 53 +++++++++++-------- ggml/src/ggml-openvino/utils.cpp | 20 +++---- 3 files changed, 91 insertions(+), 51 deletions(-) diff --git a/ggml/src/ggml-openvino.cpp b/ggml/src/ggml-openvino.cpp index e1c294a1d9..35f04f32c3 100644 --- a/ggml/src/ggml-openvino.cpp +++ b/ggml/src/ggml-openvino.cpp @@ -482,6 +482,9 @@ void ggml_backend_openvino_mul_mat(struct ggml_tensor * dst) { // flat shapes: ov::Shape flat_shape_src0 = { total_src0 }; ov::Shape flat_shape_src1 = { total_src1 }; + // Same as above + // ov::Shape flat_shape_src0 = { ggml_nelements(src0) }; + // ov::Shape flat_shape_src1 = { ggml_nelements(src1) }; // Create a Parameter node for collecting non-continuous data auto param_src0 = std::make_shared(ov::element::f16, flat_shape_src0); @@ -526,9 +529,6 @@ void ggml_backend_openvino_mul_mat(struct ggml_tensor * dst) { auto batched_matmul = std::make_shared(B, A, false, false); // batched_matmul output: shape = [32,7,32] - std::vector full_dst_shape = { dst->ne[2], dst->ne[1], dst->ne[0]}; - auto final_shape_const = ov::op::v0::Constant::create(ov::element::i64, { full_dst_shape.size() }, full_dst_shape); - auto model = std::make_shared(ov::NodeVector{ batched_matmul }, ov::ParameterVector{param_src0, param_src1}); ov::Core core; @@ -541,7 +541,7 @@ void ggml_backend_openvino_mul_mat(struct ggml_tensor * dst) { infer_request.set_input_tensor(0, tensor_src0); infer_request.set_input_tensor(1, tensor_src1); - ov::Tensor tensor_dst(ov::element::f32, ov::Shape(full_dst_shape.begin(), full_dst_shape.end()), dst->data); + ov::Tensor tensor_dst(ov::element::f32, { dst->ne[0], dst->ne[1], dst->ne[2]}, dst->data); infer_request.set_output_tensor(0, tensor_dst); infer_request.infer(); @@ -564,6 +564,9 @@ void ggml_backend_openvino_mul_mat(struct ggml_tensor * dst) { ov::Shape flat_shape_src0 = { total_src0 }; ov::Shape flat_shape_src1 = { total_src1 }; + // Same as above + // ov::Shape flat_shape_src0 = { ggml_nelements(src0) }; + // ov::Shape flat_shape_src1 = { ggml_nelements(src1) }; auto param_flat_src0 = std::make_shared(ov::element::f16, flat_shape_src0); auto param_flat_src1 = std::make_shared(ov::element::f32, flat_shape_src1); @@ -602,6 +605,7 @@ void ggml_backend_openvino_mul_mat(struct ggml_tensor * dst) { std::shared_ptr matmul = std::make_shared(reshape_src1, A_for_mul, false, false); auto model = std::make_shared(ov::NodeVector{matmul}, ov::ParameterVector{param_flat_src0, param_flat_src1}); + // ov::save_model(model, "/home/user/zhan/merge_git_commits/llama.cpp-ov/002_backend_mulmat_model.xml"); auto compiled_model = core.compile_model(model, "CPU"); auto infer_request = compiled_model.create_infer_request(); @@ -618,8 +622,35 @@ void ggml_backend_openvino_reshape(ggml_tensor *dst) { } void ggml_backend_openvino_view(ggml_tensor *dst) { + ov::Core core; + ov::Shape tensor_shape{static_cast(dst->ne[3]), static_cast(dst->ne[2]), static_cast(dst->ne[1]), static_cast(dst->ne[0])}; - GGML_UNUSED(dst); + // auto param = std::make_shared(ov::element::f32, tensor_shape); + auto param = std::make_shared(ov::element::f16, tensor_shape); + + auto reshaped = std::make_shared(param, + ov::op::v0::Constant::create(ov::element::i64, { tensor_shape.size() }, tensor_shape), + false); + + auto model = std::make_shared(ov::NodeVector{reshaped}, ov::ParameterVector{param}); + // ov::save_model(model, "/home/user/zhan/merge_git_commits/llama.cpp-ov/003_backend_view_model.xml"); + + auto compiled_model = core.compile_model(model, "CPU"); + + ov::InferRequest infer_request = compiled_model.create_infer_request(); + + // ov::Tensor input_tensor(ov::element::f32, tensor_shape, dst->data); + ov::Tensor input_tensor(ov::element::f16, tensor_shape, dst->data); + // infer_request.set_tensor(param, input_tensor); + infer_request.set_input_tensor(0, input_tensor); + + // ov::Tensor output_tensor(ov::element::f32, tensor_shape, dst->data); + ov::Tensor output_tensor(ov::element::f16, tensor_shape, dst->data); + infer_request.set_output_tensor(0, output_tensor); + + infer_request.infer(); + // auto output_tensor = infer_request.get_output_tensor(0); + // dst->data = output_tensor.data(); } void ggml_backend_openvino_dup_bytes(struct ggml_tensor *dst) { @@ -992,31 +1023,33 @@ static enum ggml_status ggml_backend_openvino_graph_compute(ggml_backend_t backe // openvino_frontend_compute(backend, cgraph); // Process nodes in order for (int i = 0; i < cgraph->n_nodes; i++) { - if (std::find(reshape_indices.begin(), reshape_indices.end(), i) != reshape_indices.end()) { - ggml_backend_openvino_reshape(cgraph->nodes[i]); + if (std::find(permute_indices.begin(), permute_indices.end(), i) != permute_indices.end()) { + ggml_backend_openvino_permute(cgraph->nodes[i]); // } else if (std::find(cont_indices.begin(), cont_indices.end(), i) != cont_indices.end()) { // ggml_backend_openvino_dup_bytes(cgraph->nodes[i]); - } else if (std::find(view_indices.begin(), view_indices.end(), i) != view_indices.end()) { - ggml_backend_openvino_view(cgraph->nodes[i]); + // } else if (std::find(view_indices.begin(), view_indices.end(), i) != view_indices.end()) { + // ggml_backend_openvino_view(cgraph->nodes[i]); // } else if (std::find(cpy_indices.begin(), cpy_indices.end(), i) != cpy_indices.end()) { // ggml_backend_openvino_cpy(cgraph->nodes[i]); } else if (std::find(transpose_indices.begin(), transpose_indices.end(), i) != transpose_indices.end()) { ggml_backend_openvino_transpose(cgraph->nodes[i]); - } else if (std::find(permute_indices.begin(), permute_indices.end(), i) != permute_indices.end()) { - ggml_backend_openvino_permute(cgraph->nodes[i]); - } else if (std::find(mul_mat_indices.begin(), mul_mat_indices.end(), i) != mul_mat_indices.end()) { - ggml_backend_openvino_mul_mat(cgraph->nodes[i]); + } else if (std::find(reshape_indices.begin(), reshape_indices.end(), i) != reshape_indices.end()) { + ggml_backend_openvino_reshape(cgraph->nodes[i]); + // } else if (std::find(mul_mat_indices.begin(), mul_mat_indices.end(), i) != mul_mat_indices.end()) { + // ggml_backend_openvino_mul_mat(cgraph->nodes[i]); } else { // Process a range of nodes with openvino_frontend_compute int start_index = i; - while (i < cgraph->n_nodes && - // std::find(cpy_indices.begin(), cpy_indices.end(), i) == cpy_indices.end() && - // std::find(cont_indices.begin(), cont_indices.end(), i) == cont_indices.end() && - std::find(mul_mat_indices.begin(), mul_mat_indices.end(), i) == mul_mat_indices.end()) { + while (i < cgraph->n_nodes + // && std::find(view_indices.begin(), view_indices.end(), i) == view_indices.end() + // && std::find(cpy_indices.begin(), cpy_indices.end(), i) == cpy_indices.end() + // && std::find(cont_indices.begin(), cont_indices.end(), i) == cont_indices.end() + // && std::find(mul_mat_indices.begin(), mul_mat_indices.end(), i) == mul_mat_indices.end() + ) { i++; } if (start_index < i) { - openvino_frontend_compute(backend, cgraph, start_index, --i); + openvino_frontend_compute(backend, cgraph, start_index, --i); } } } diff --git a/ggml/src/ggml-openvino/ggml-decoder.cpp b/ggml/src/ggml-openvino/ggml-decoder.cpp index 6a249c103f..fab8d4aed6 100644 --- a/ggml/src/ggml-openvino/ggml-decoder.cpp +++ b/ggml/src/ggml-openvino/ggml-decoder.cpp @@ -6,12 +6,20 @@ #include void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::map& inputs, std::map& outputs) { - m_node_op_name[node->name] = ggml_op_name(node->op); + // m_node_op_name[node->name] = ggml_op_name(node->op); + + // std::string src0_name = std::string(node->src[0]->name) + "_" + std::to_string(node->src[0]->view_offs) + "_input_" + ggml_op_name(node->src[0]->op); + // std::string node_name = std::string(node->name) + "_" + std::to_string(node->view_offs) + "_output_" + ggml_op_name(node->op); + // Execute singel CONT operator is OK - std::string src0_name = std::string(node->src[0]->name) + "_" + std::to_string(node->src[0]->view_offs) + "_" + ggml_op_name(node->src[0]->op); - std::string node_name = std::string(node->name) + "_" + std::to_string(node->view_offs) + "_" + ggml_op_name(node->op); + // std::string src0_name = std::string(node->src[0]->name) + "_" + std::to_string(node->src[0]->view_offs) + "_" + ggml_op_name(node->src[0]->op); + // std::string node_name = std::string(node->name) + "_" + std::to_string(node->view_offs) + "_" + ggml_op_name(node->op); + // std::string src0_name = std::string(node->src[0]->name) + "_" + std::to_string(node->src[0]->view_offs); // std::string node_name = std::string(node->name) + "_" + std::to_string(node->view_offs); + + std::string src0_name = std::string(node->src[0]->name); + std::string node_name = std::string(node->name); switch (node->op) { // Unary OPs case GGML_OP_UNARY: @@ -151,6 +159,7 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::mapname) + "_" + std::to_string(node->view_offs) + "_output_" + ggml_op_name(node->op); inputs[node_name] = node; outputs[node_name] = node; m_input_names.push_back(node_name); @@ -161,21 +170,29 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::mapsrc[0]; + inputs[src0_name] = node->src[0]; outputs[node_name] = node; - m_input_names.push_back(node_name); - m_node_op_name[node_name] = ggml_op_name(node->op); + m_input_names.push_back(src0_name); + m_node_op_name[src0_name] = ggml_op_name(node->op); m_output_names.push_back(node_name); break; } case GGML_OP_MUL_MAT: { - std::string src1_name = std::string(node->src[1]->name) + "_" + std::to_string(node->src[1]->view_offs); + ov::Shape flat_shape_src0 = { node->src[0]->ne[0]*node->src[0]->ne[1]*node->src[0]->ne[2] }; + ov::Shape flat_shape_src1 = { node->src[1]->ne[0]*node->src[1]->ne[1]*node->src[1]->ne[2] }; + auto param_src0 = std::make_shared(ov::element::f16, flat_shape_src0); + auto param_src1 = std::make_shared(ov::element::f32, flat_shape_src1); + m_params.push_back(param_src0); + m_params.push_back(param_src1); if (!ggml_is_contiguous(node->src[1]) || node->src[1]->ne[0] * node->src[1]->nb[0] != node->src[1]->nb[1]) { m_continuous = false; } else { m_continuous = true; } + // std::string src1_name = std::string(node->src[1]->name) + "_" + std::to_string(node->src[1]->view_offs) + "_input_" + ggml_op_name(node->src[1]->op); + // std::string src1_name = std::string(node->src[1]->name) + "_" + std::to_string(node->src[1]->view_offs); + std::string src1_name = std::string(node->src[1]->name); inputs[src0_name] = node->src[0]; inputs[src1_name] = node->src[1]; outputs[node_name] = node; @@ -200,7 +217,8 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::mapop); m_output_names.push_back(node_name); if (node->src[1]) { - std::string src1_name = std::string(node->src[1]->name) + "_" + std::to_string(node->src[1]->view_offs); + // std::string src1_name = std::string(node->src[1]->name) + "_" + std::to_string(node->src[1]->view_offs); + std::string src1_name = std::string(node->src[1]->name); inputs[src1_name] = node->src[1]; m_node_op_name[src1_name] = ggml_op_name(node->op); m_input_names.push_back(src1_name); @@ -210,7 +228,8 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::mapsrc[1]->name) + "_" + std::to_string(node->src[1]->view_offs); + // std::string src1_name = std::string(node->src[1]->name) + "_" + std::to_string(node->src[1]->view_offs); + std::string src1_name = std::string(node->src[1]->name); inputs[src0_name] = node->src[0]; inputs[src1_name] = node->src[1]; m_input_names.push_back(src0_name); @@ -220,7 +239,8 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::mapsrc[2]) { - std::string src2_name = std::string(node->src[2]->name) + "_" + std::to_string(node->src[2]->view_offs); + // std::string src2_name = std::string(node->src[2]->name) + "_" + std::to_string(node->src[2]->view_offs); + std::string src2_name = std::string(node->src[2]->name); inputs[src2_name] = node->src[2]; m_input_names.push_back(src2_name); m_node_op_name[src2_name] = ggml_op_name(node->op); @@ -334,13 +354,6 @@ ov::PartialShape GgmlOvDecoder::get_input_shape(const std::string& name) const { ggml_tensor * node = m_inputs.at(name); std::vector shape; - // [TODO], 在这里判断如果是MUL_MAT就设置shape为一维 - if(m_node_op_name.at(name) == "MUL_MAT") { - shape.push_back(static_cast(node->ne[0] * node->ne[1] * node->ne[2])); - input_shape = ov::PartialShape(shape); - return input_shape; - } - for (int i = GGML_MAX_DIMS - 2; i >= 0 ; --i) { if (node->ne[i] == 0) { return input_shape; @@ -405,10 +418,8 @@ std::vector GgmlOvDecoder::get_input_names() const { const std::string& GgmlOvDecoder::get_node_op_name(const std::string& name) const { auto it = m_node_op_name.find(name); - if (it != m_node_op_name.end()) { - return it->second; - } - return ""; + static const std::string empty_str; + return (it != m_node_op_name.end()) ? it->second : empty_str; } const std::vector>& GgmlOvDecoder::get_params() const { diff --git a/ggml/src/ggml-openvino/utils.cpp b/ggml/src/ggml-openvino/utils.cpp index 8fa1f99a01..21edad596b 100644 --- a/ggml/src/ggml-openvino/utils.cpp +++ b/ggml/src/ggml-openvino/utils.cpp @@ -26,18 +26,9 @@ std::map get_ggml_graph_input_tensors(std::shared_ptrget_input_type(name), {80000}, input_data); - // } else if (node_op_name == "CONT" || node_op_name == "MUL_MAT") { - // // auto input_shape = ggml_decoder->get_input_shape(name).to_shape(); - // // size_t total_size = 1; - // // for (auto dim : input_shape) { - // // total_size *= dim; - // // } - // // ov::Shape new_shape = {total_size}; - // input_tensor = ov::Tensor(ggml_decoder->get_input_type(name), {ggml_decoder->get_input_shape(name).to_shape()[0]}, input_data); - // } else { if (node_op_name == "CONT" && ggml_decoder->check_if_continuous()) { - ov::Shape flat_shape = { ggml_decoder->get_input_shape(name).to_shape()[0] * - ggml_decoder->get_input_shape(name).to_shape()[1] * + ov::Shape flat_shape = { ggml_decoder->get_input_shape(name).to_shape()[0] * + ggml_decoder->get_input_shape(name).to_shape()[1] * ggml_decoder->get_input_shape(name).to_shape()[2] }; input_tensor = ov::Tensor(ggml_decoder->get_input_type(name), flat_shape, input_data); } else if ( node_op_name == "CONT" && @@ -59,6 +50,11 @@ std::map get_ggml_graph_input_tensors(std::shared_ptrget_input_type(name), flat_input_shape, input_data); + } else if (node_op_name == "MUL_MAT") { + ov::Shape flat_shape = { ggml_decoder->get_input_shape(name).to_shape()[0] * + ggml_decoder->get_input_shape(name).to_shape()[1] * + ggml_decoder->get_input_shape(name).to_shape()[2] }; + input_tensor = ov::Tensor(ggml_decoder->get_input_type(name), flat_shape, input_data); } else { input_tensor = ov::Tensor(ggml_decoder->get_input_type(name), ggml_decoder->get_input_shape(name).to_shape(), input_data); } @@ -125,7 +121,7 @@ enum ggml_status openvino_frontend_compute(ggml_backend_t backend, struct ggml_c // Convert InputModel -> ov::Model std::shared_ptr model = front_end->convert(input_model); - ov::save_model(model, "/home/user/zhan/merge_git_commits/llama.cpp-ov/001_model.xml"); + // ov::save_model(model, "/home/user/zhan/merge_git_commits/llama.cpp-ov/001_model.xml"); if (!model) { GGML_LOG_ERROR("Model is not converted \n");