From 95ae982d590e8844517f1ffed910a7642150732f Mon Sep 17 00:00:00 2001 From: zhanmyz Date: Tue, 4 Mar 2025 00:05:00 +0800 Subject: [PATCH] OV Frontend supports GET_ROWS/RMS_NORM/MUL/MUL_MAT graph conversion of consecutive OPs --- ggml/src/ggml-openvino.cpp | 64 +++++++++++++------------ ggml/src/ggml-openvino/ggml-decoder.cpp | 46 +++++++++++++----- ggml/src/ggml-openvino/utils.cpp | 11 ++++- 3 files changed, 78 insertions(+), 43 deletions(-) diff --git a/ggml/src/ggml-openvino.cpp b/ggml/src/ggml-openvino.cpp index 35f04f32c3..883e43365f 100644 --- a/ggml/src/ggml-openvino.cpp +++ b/ggml/src/ggml-openvino.cpp @@ -1020,39 +1020,41 @@ static enum ggml_status ggml_backend_openvino_graph_compute(ggml_backend_t backe } } + int end_node = cgraph->n_nodes - 1; + openvino_frontend_compute(backend, cgraph, 0, end_node); // openvino_frontend_compute(backend, cgraph); // Process nodes in order - for (int i = 0; i < cgraph->n_nodes; i++) { - if (std::find(permute_indices.begin(), permute_indices.end(), i) != permute_indices.end()) { - ggml_backend_openvino_permute(cgraph->nodes[i]); - // } else if (std::find(cont_indices.begin(), cont_indices.end(), i) != cont_indices.end()) { - // ggml_backend_openvino_dup_bytes(cgraph->nodes[i]); - // } else if (std::find(view_indices.begin(), view_indices.end(), i) != view_indices.end()) { - // ggml_backend_openvino_view(cgraph->nodes[i]); - // } else if (std::find(cpy_indices.begin(), cpy_indices.end(), i) != cpy_indices.end()) { - // ggml_backend_openvino_cpy(cgraph->nodes[i]); - } else if (std::find(transpose_indices.begin(), transpose_indices.end(), i) != transpose_indices.end()) { - ggml_backend_openvino_transpose(cgraph->nodes[i]); - } else if (std::find(reshape_indices.begin(), reshape_indices.end(), i) != reshape_indices.end()) { - ggml_backend_openvino_reshape(cgraph->nodes[i]); - // } else if (std::find(mul_mat_indices.begin(), mul_mat_indices.end(), i) != mul_mat_indices.end()) { - // ggml_backend_openvino_mul_mat(cgraph->nodes[i]); - } else { - // Process a range of nodes with openvino_frontend_compute - int start_index = i; - while (i < cgraph->n_nodes - // && std::find(view_indices.begin(), view_indices.end(), i) == view_indices.end() - // && std::find(cpy_indices.begin(), cpy_indices.end(), i) == cpy_indices.end() - // && std::find(cont_indices.begin(), cont_indices.end(), i) == cont_indices.end() - // && std::find(mul_mat_indices.begin(), mul_mat_indices.end(), i) == mul_mat_indices.end() - ) { - i++; - } - if (start_index < i) { - openvino_frontend_compute(backend, cgraph, start_index, --i); - } - } - } + // for (int i = 0; i < cgraph->n_nodes; i++) { + // if (std::find(permute_indices.begin(), permute_indices.end(), i) != permute_indices.end()) { + // ggml_backend_openvino_permute(cgraph->nodes[i]); + // // } else if (std::find(cont_indices.begin(), cont_indices.end(), i) != cont_indices.end()) { + // // ggml_backend_openvino_dup_bytes(cgraph->nodes[i]); + // // } else if (std::find(view_indices.begin(), view_indices.end(), i) != view_indices.end()) { + // // ggml_backend_openvino_view(cgraph->nodes[i]); + // // } else if (std::find(cpy_indices.begin(), cpy_indices.end(), i) != cpy_indices.end()) { + // // ggml_backend_openvino_cpy(cgraph->nodes[i]); + // // } else if (std::find(transpose_indices.begin(), transpose_indices.end(), i) != transpose_indices.end()) { + // // ggml_backend_openvino_transpose(cgraph->nodes[i]); + // // } else if (std::find(reshape_indices.begin(), reshape_indices.end(), i) != reshape_indices.end()) { + // // ggml_backend_openvino_reshape(cgraph->nodes[i]); + // // } else if (std::find(mul_mat_indices.begin(), mul_mat_indices.end(), i) != mul_mat_indices.end()) { + // // ggml_backend_openvino_mul_mat(cgraph->nodes[i]); + // } else { + // // Process a range of nodes with openvino_frontend_compute + // int start_index = i; + // while (i < cgraph->n_nodes + // // && std::find(view_indices.begin(), view_indices.end(), i) == view_indices.end() + // // && std::find(cpy_indices.begin(), cpy_indices.end(), i) == cpy_indices.end() + // // && std::find(cont_indices.begin(), cont_indices.end(), i) == cont_indices.end() + // // && std::find(mul_mat_indices.begin(), mul_mat_indices.end(), i) == mul_mat_indices.end() + // ) { + // i++; + // } + // if (start_index < i) { + // openvino_frontend_compute(backend, cgraph, start_index, --i); + // } + // } + // } return GGML_STATUS_SUCCESS; diff --git a/ggml/src/ggml-openvino/ggml-decoder.cpp b/ggml/src/ggml-openvino/ggml-decoder.cpp index fab8d4aed6..90755ec9a6 100644 --- a/ggml/src/ggml-openvino/ggml-decoder.cpp +++ b/ggml/src/ggml-openvino/ggml-decoder.cpp @@ -20,6 +20,7 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::mapsrc[0]->name); std::string node_name = std::string(node->name); + switch (node->op) { // Unary OPs case GGML_OP_UNARY: @@ -110,7 +111,7 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::mapsrc[0]->ne, node->src[0]->ne + 4); + ov::Shape src_shape(node->src[0]->ne, node->src[0]->ne + 3); auto input_param = std::make_shared(ov::element::f32, src_shape); m_params.push_back(input_param); break; @@ -217,6 +218,7 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::mapop); m_output_names.push_back(node_name); if (node->src[1]) { + // std::string src1_name = std::string(node->src[1]->name) + "_" + std::to_string(node->src[1]->view_offs) + "_input_" + ggml_op_name(node->src[1]->op); // std::string src1_name = std::string(node->src[1]->name) + "_" + std::to_string(node->src[1]->view_offs); std::string src1_name = std::string(node->src[1]->name); inputs[src1_name] = node->src[1]; @@ -228,6 +230,7 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::mapsrc[1]->name) + "_" + std::to_string(node->src[1]->view_offs) + "_input_" + ggml_op_name(node->src[1]->op); // std::string src1_name = std::string(node->src[1]->name) + "_" + std::to_string(node->src[1]->view_offs); std::string src1_name = std::string(node->src[1]->name); inputs[src0_name] = node->src[0]; @@ -239,6 +242,7 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::mapsrc[2]) { + // std::string src2_name = std::string(node->src[2]->name) + "_" + std::to_string(node->src[2]->view_offs) + "_input_" + ggml_op_name(node->src[2]->op); // std::string src2_name = std::string(node->src[2]->name) + "_" + std::to_string(node->src[2]->view_offs); std::string src2_name = std::string(node->src[2]->name); inputs[src2_name] = node->src[2]; @@ -253,7 +257,7 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::mapn_nodes; i++) { struct ggml_tensor * node = cgraph->nodes[i]; @@ -269,9 +280,14 @@ void ggml_graph_op_print(const struct ggml_cgraph * cgraph) { << std::setw(5) << node->ne[0] << ", " << std::setw(5) << node->ne[1] << ", " << std::setw(5) << node->ne[2] << "] " - << std::left << std::setw(16) << ggml_op_name(node->op) << std::right << " " - << " " << node->name - << ((node->flags & GGML_TENSOR_FLAG_PARAM) ? "x" : node->grad ? "g" : " ") << "\n"; + << std::left << std::setw(20) << ggml_op_name(node->op) << std::right << " " + << std::left << std::setw(44) << node->name << std::right + << ((node->flags & GGML_TENSOR_FLAG_PARAM) ? "x" : node->grad ? "g" : " ") + << std::setw(2) << "[ " + << std::setw(0) << node->nb[0] << ", " + << std::setw(5) << node->nb[1] << ", " + << std::setw(5) << node->nb[2] << "] " + << "\n"; if (node->src[0]) { file << std::setw(10) << " [ " @@ -279,15 +295,19 @@ void ggml_graph_op_print(const struct ggml_cgraph * cgraph) { << std::setw(5) << node->src[0]->ne[1] << ", " << std::setw(5) << node->src[0]->ne[2] << "] " << std::setw(12) - << "0: " << ggml_op_name(node->src[0]->op) << " "; + << "0: " << std::left << std::setw(12) << ggml_op_name(node->src[0]->op) << std::right; // // Custom logic to handle '\000' // const char* name_ptr = node->src[0]->name; // while (*name_ptr != '\0' || *(name_ptr + 1) != '\0' || *(name_ptr + 2) != '\0') { // file << *name_ptr; // name_ptr++; // } - file << node->src[0]->name; - file << "\n"; + file << std::left << std::setw(30) << node->src[0]->name << std::right + << std::setw(16) << "[ " + << std::setw(0) << node->src[0]->nb[0] << ", " + << std::setw(5) << node->src[0]->nb[1] << ", " + << std::setw(5) << node->src[0]->nb[2] << "] " + << "\n"; } if (node->src[1]) { file << std::setw(10) << " [ " @@ -295,15 +315,19 @@ void ggml_graph_op_print(const struct ggml_cgraph * cgraph) { << std::setw(5) << node->src[1]->ne[1] << ", " << std::setw(5) << node->src[1]->ne[2] << "] " << std::setw(12) - << "1: " << ggml_op_name(node->src[1]->op) << " "; + << "1: " << std::left << std::setw(12) << ggml_op_name(node->src[1]->op) << std::right; // // Custom logic to handle '\000' // const char* name_ptr = node->src[1]->name; // while (*name_ptr != '\0' || *(name_ptr + 1) != '\0' || *(name_ptr + 2) != '\0') { // file << *name_ptr; // name_ptr++; // } - file << node->src[1]->name; - file << "\n"; + file << std::left << std::setw(30) << node->src[1]->name << std::right + << std::setw(16) << "[ " + << std::setw(0) << node->src[1]->nb[0] << ", " + << std::setw(5) << node->src[1]->nb[1] << ", " + << std::setw(5) << node->src[1]->nb[2] << "] " + << "\n"; } } diff --git a/ggml/src/ggml-openvino/utils.cpp b/ggml/src/ggml-openvino/utils.cpp index 21edad596b..4b25c13689 100644 --- a/ggml/src/ggml-openvino/utils.cpp +++ b/ggml/src/ggml-openvino/utils.cpp @@ -121,7 +121,7 @@ enum ggml_status openvino_frontend_compute(ggml_backend_t backend, struct ggml_c // Convert InputModel -> ov::Model std::shared_ptr model = front_end->convert(input_model); - // ov::save_model(model, "/home/user/zhan/merge_git_commits/llama.cpp-ov/001_model.xml"); + ov::save_model(model, "/home/user/zhan/merge_git_commits/llama.cpp-ov/001_model.xml"); if (!model) { GGML_LOG_ERROR("Model is not converted \n"); @@ -145,6 +145,14 @@ enum ggml_status openvino_frontend_compute(ggml_backend_t backend, struct ggml_c // Set input tensor for (size_t i = 0; i < input_names.size(); i++) { infer_request.set_input_tensor(i, input_tensors[input_names[i]]); + + // auto input_tensor = infer_request.get_input_tensor(i); + // auto input_shape = input_tensor.get_shape(); + // std::cout << "Input tensor " << i << " shape: "; + // for (const auto& dim : input_shape) { + // std::cout << dim << " "; + // } + // std::cout << std::endl; } infer_request.infer(); @@ -155,6 +163,7 @@ enum ggml_status openvino_frontend_compute(ggml_backend_t backend, struct ggml_c for (size_t i = 0; i < output_names.size(); i++) { // std::string op_name = ggml_decoder->get_node_op_name(output_names[i]); auto output_tensor = infer_request.get_output_tensor(i); + // output_tensor.get_shape(); std::memcpy(output_tensors[output_names[i]], output_tensor.data(), output_tensor.get_byte_size()); #ifdef GGML_OPENVINO_DEBUG printf("Output %s after: %g\n", output_names[i].c_str(), *(double*)(output_tensor.data()));