diff --git a/ggml/src/ggml-openvino.cpp b/ggml/src/ggml-openvino.cpp index 2279df1d6d..b9f1b89722 100644 --- a/ggml/src/ggml-openvino.cpp +++ b/ggml/src/ggml-openvino.cpp @@ -1045,25 +1045,12 @@ static enum ggml_status ggml_backend_openvino_graph_compute(ggml_backend_t backe ggml_backend_openvino_transpose(cgraph->nodes[i]); } else if (std::find(cpy_indices.begin(), cpy_indices.end(), i) != cpy_indices.end()) { ggml_backend_openvino_cpy(cgraph->nodes[i]); - // } else if (std::find(permute_indices.begin(), permute_indices.end(), i) != permute_indices.end()) { - // ggml_backend_openvino_permute(cgraph->nodes[i]); } else if (std::find(view_indices.begin(), view_indices.end(), i) != view_indices.end()) { ggml_backend_openvino_view(cgraph->nodes[i]); } else if (std::find(cont_indices.begin(), cont_indices.end(), i) != cont_indices.end()) { ggml_backend_openvino_dup_bytes(cgraph->nodes[i]); } else if (std::find(reshape_indices.begin(), reshape_indices.end(), i) != reshape_indices.end()) { ggml_backend_openvino_reshape(cgraph->nodes[i]); - - // } else if (std::find(mul_mat_indices.begin(), mul_mat_indices.end(), i) != mul_mat_indices.end()) { - // ggml_backend_openvino_mul_mat(cgraph->nodes[i]); - // } else if (std::find(view_indices_prompt.begin(), view_indices_prompt.end(), i) != view_indices_prompt.end()) { - // ggml_backend_openvino_view(cgraph->nodes[i]); - // }else if (std::find(view_split.begin(), view_split.end(), i) != view_split.end()) { - // ggml_backend_openvino_view(cgraph->nodes[i]); - // }else if (std::find(cpy_split_16.begin(), cpy_split_16.end(), i) != cpy_split_16.end()) { - // ggml_backend_openvino_cpy(cgraph->nodes[i]); - // }else if (std::find(cpy_split_19.begin(), cpy_split_19.end(), i) != cpy_split_19.end()) { - // ggml_backend_openvino_cpy(cgraph->nodes[i]); } else { // Process a range of nodes with openvino_frontend_compute int start_index = i; @@ -1071,16 +1058,9 @@ static enum ggml_status ggml_backend_openvino_graph_compute(ggml_backend_t backe && std::find(add_indices.begin(), add_indices.end(), i) == add_indices.end() && std::find(transpose_indices.begin(), transpose_indices.end(), i) == transpose_indices.end() && std::find(cpy_indices.begin(), cpy_indices.end(), i) == cpy_indices.end() - // && std::find(permute_indices.begin(), permute_indices.end(), i) == permute_indices.end() && std::find(view_indices.begin(), view_indices.end(), i) == view_indices.end() && std::find(cont_indices.begin(), cont_indices.end(), i) == cont_indices.end() && std::find(reshape_indices.begin(), reshape_indices.end(), i) == reshape_indices.end() - - // && std::find(mul_mat_indices.begin(), mul_mat_indices.end(), i) == mul_mat_indices.end() - // && (std::find(view_indices_prompt.begin(), view_indices_prompt.end(), i) == view_indices_prompt.end()) - // && std::find(view_split.begin(), view_split.end(), i) == view_split.end() - // && std::find(cpy_split_16.begin(), cpy_split_16.end(), i) == cpy_split_16.end() - // && std::find(cpy_split_19.begin(), cpy_split_19.end(), i) == cpy_split_19.end() ) { i++; } diff --git a/ggml/src/ggml-openvino/ggml-decoder.cpp b/ggml/src/ggml-openvino/ggml-decoder.cpp index ec827e8006..3b396c05f7 100644 --- a/ggml/src/ggml-openvino/ggml-decoder.cpp +++ b/ggml/src/ggml-openvino/ggml-decoder.cpp @@ -92,8 +92,6 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::mapsrc[1]->name); inputs[src0_name] = node->src[0]; - // inputs[src1_name] = node->src[1]; - // outputs[node_name] = node; src1_name = std::string(node->src[1]->view_src->name); inputs[src1_name] = node->src[1]; node_name = std::string(node->view_src->name); @@ -110,9 +108,6 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::map(node->src[0]->ne[0])}; auto input1_param = std::make_shared(ov::element::f32, input1_shape); m_params.push_back(input1_param); - // ov::Shape input2_shape = { static_cast(node->src[1]->ne[2]), - // static_cast(node->src[1]->ne[1]), - // static_cast(node->src[1]->ne[0])}; ov::Shape input2_shape = { static_cast(node->src[1]->ne[2]), static_cast(node->src[1]->ne[1]), static_cast(node->src[1]->view_src->ne[0])}; @@ -122,8 +117,6 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::mapsrc[1]->name); inputs[src0_name] = node->src[0]; - // inputs[src1_name] = node->src[1]; - // outputs[node_name] = node; src1_name = std::string(node->src[1]->view_src->name); inputs[src1_name] = node->src[1]; node_name = std::string(node->view_src->name); @@ -152,44 +145,10 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::mapsrc[0]; - // if (node->ne[0] == 21504 || node->ne[0] == 7 - // || node->ne[0] == 3072 && node->src[0]->ne[0] == 98304 - // || node->ne[0] == 1 && node->src[0]->ne[0] == 98304) { - // // if (node->ne[0] == 21504 || node->ne[0] == 7) { - // node_name = std::string(node->view_src->name); - // outputs[node_name] = node; - // } else { - // outputs[node_name] = node; - // } - // if (node->ne[0] == 3072 && node->ne[1] == 1 && node->ne[2] == 1) { - // outputs[src0_name] = node; - // m_output_names.push_back(src0_name); - // } else { - // outputs[node_name] = node; - // m_output_names.push_back(node_name); - // } outputs[node_name] = node; m_input_names.push_back(src0_name); m_op_node_name.emplace_back(src0_name, ggml_op_name(node->op)); m_output_names.push_back(node_name); - - // ov::Shape input_shape = { static_cast(node->src[0]->ne[2]), - // static_cast(node->src[0]->ne[1]), - // static_cast(node->src[0]->ne[0])}; - // auto type = get_input_type(src0_name); - // auto input_param = std::make_shared(type, input_shape); - // m_params.push_back(input_param); - - // if (node->ne[0] > node->ne[1] && (node->ne[0] * node->nb[0] != node->nb[1]) && node->ne[2] == 1) { - // m_continuous = false; - // } else { - // m_continuous = true; - - // } - // m_continuous = false; - - // [TODO]: multiple cases - break; } // SCALE @@ -211,11 +170,6 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::mapsrc[1]->name); inputs[src0_name] = node->src[0]; - // if (node->ne[0] == 32 &&node->src[0]->type == GGML_TYPE_I32) { - // static_cast(inputs[src0_name]->data)[0] = 1; - // } else if (node->ne[0] == 32 && node->src[0]->type == GGML_TYPE_F16) { - // static_cast(inputs[src0_name]->data)[0] = static_cast(1); - // } inputs[src1_name] = node->src[1]; outputs[node_name] = node; m_input_names.push_back(src0_name); diff --git a/ggml/src/ggml-openvino/utils.cpp b/ggml/src/ggml-openvino/utils.cpp index 642f2b6662..736c7f690b 100644 --- a/ggml/src/ggml-openvino/utils.cpp +++ b/ggml/src/ggml-openvino/utils.cpp @@ -11,12 +11,9 @@ std::shared_ptr get_ggml_decoder(struct ggml_cgraph * cgraph, con return std::make_shared(nullptr, cgraph, start_index, end_index); } -// std::map get_ggml_graph_input_tensors(std::shared_ptr ggml_decoder) { std::vector> get_ggml_graph_input_tensors(std::shared_ptr ggml_decoder, bool flag) { - // std::map input_tensors; std::vector> input_tensors; auto input_names = ggml_decoder->get_input_names(); - // auto node_name = ggml_decoder->get_op_name(); size_t op_iter = 0; for (size_t inp = 0; inp < input_names.size(); ++inp) { auto name = input_names[inp]; @@ -40,48 +37,9 @@ std::vector> get_ggml_graph_input_tensors(std const size_t dim2 = static_cast(ggml_decoder->get_input_shape(name).to_shape()[0]); size_t phys_stride = static_cast(input_stride[1]) / element_size; ov::Shape input_shape = { dim2, num_rows, phys_stride }; // {1, 7, 9216 } - // if (!flag) { - // std::cout << "CONT input shape: " << input_shape << std::endl; - // } input_tensor = ov::Tensor(ggml_decoder->get_input_type(name), input_shape, input_data); - // if(!flag) { - // std::cout << std::left << "*[" << std::setw(2) << inp << "]*: " - // << "Input Name: " << std::setw(20) << name - // << "Type: " << std::setw(5) << ggml_decoder->get_input_type(name) - // << "OP: " << std::setw(10) << op_node_name - // << "CONT: " << check_if_contiguous - // << ", shape: " << std::setw(4) << input_tensor.get_shape()[0] << " " << std::setw(4) << input_tensor.get_shape()[1] << " " << input_tensor.get_shape()[2] - // << ", address: " - // << std::setw(15) << input_tensor.data() << " " - // << std::setw(15) << ((float*)input_tensor.data())[0] - // << std::setw(15) << ((float*)input_tensor.data())[1] - // << ", ne[0]: " - // << std::setw(15) << ((float*)input_tensor.data())[input_tensor.get_shape()[0]] << std::right - // << std::setw(15) << ((float*)input_tensor.data())[input_tensor.get_shape()[0] + 1] << std::right - // << std::right - // << std::endl; - // } } else { input_tensor = ov::Tensor(ggml_decoder->get_input_type(name), ggml_decoder->get_input_shape(name).to_shape(), input_data); - // if(!flag) { - // std::cout << std::left << "[ " << std::setw(2) << inp << " ]: " - // << "Input Name: " << std::setw(20) << name - // << "Type: " << std::setw(5) << ggml_decoder->get_input_type(name) - // << "OP: " << std::setw(10) << op_node_name - // << "CONT: " << check_if_contiguous - // << ", shape: " << std::setw(4) << input_tensor.get_shape()[0] << " " << std::setw(4) << input_tensor.get_shape()[1] << " " << input_tensor.get_shape()[2] - // << ", address: " - // << std::setw(15) << input_tensor.data() << " " - // << std::setw(15) << ((float*)input_tensor.data())[0] - // << std::setw(15) << ((float*)input_tensor.data())[1] - // << ", ne[0]-1: " - // << std::setw(15) << ((float*)input_tensor.data())[input_tensor.get_shape()[0]-1] - // << ", ne[0]: " - // << std::setw(15) << ((float*)input_tensor.data())[input_tensor.get_shape()[0]] << std::right - // << std::setw(15) << ((float*)input_tensor.data())[input_tensor.get_shape()[0] + 1] << std::right - // << std::right - // << std::endl; - // } } // input_tensors[name] = input_tensor; @@ -146,13 +104,6 @@ enum ggml_status openvino_frontend_compute(ggml_backend_t backend, struct ggml_c // Convert InputModel -> ov::Model std::shared_ptr model = front_end->convert(input_model); - // ov::save_model(model, "/home/user/zhan/merge_git_commits/llama.cpp-ov/001_model.xml"); - - // auto cloned_model = model->clone(); - // std::string model_dir = "/home/user/zhan/merge_git_commits/llama.cpp-ov"; - // auto path_base = model_dir + "/" + cloned_model->get_name(); - // // ov::pass::VisualizeTree(path_base + ".svg").run_on_model(cloned_model); - // ov::serialize(cloned_model, path_base + ".xml", path_base + ".bin"); if (!model) { GGML_LOG_ERROR("Model is not converted \n"); @@ -162,14 +113,7 @@ enum ggml_status openvino_frontend_compute(ggml_backend_t backend, struct ggml_c #endif } - // model = core.read_model("/home/user/zhan/merge_git_commits/llama.cpp-ov/replaceWithInputLayer_000_model.xml"); - // Loading a model to the device - // std::cout << "Compile ..." << std::endl; ov::CompiledModel compiled_model = core.compile_model(model); - // ov::save_model(compiled_model.get_runtime_model(), "/home/user/zhan/merge_git_commits/llama.cpp-ov/001_compile_model.xml"); - // std::ofstream output_file("/home/user/zhan/merge_git_commits/llama.cpp-ov/000_compile_model.xml"); - // compiled_model.export_model(output_file); - // output_file.close(); // Create infer request ov::InferRequest infer_request = compiled_model.create_infer_request(); @@ -180,19 +124,9 @@ enum ggml_status openvino_frontend_compute(ggml_backend_t backend, struct ggml_c // Set input tensor for (size_t i = 0; i < input_names.size(); i++) { - // infer_request.set_input_tensor(i, input_tensors[input_names[i]]); infer_request.set_input_tensor(i, input_tensors.at(i).second); - - // auto input_tensor = infer_request.get_input_tensor(i); - // auto input_shape = input_tensor.get_shape(); - // std::cout << "Input tensor " << i << " shape: "; - // for (const auto& dim : input_shape) { - // std::cout << dim << " "; - // } - // std::cout << std::endl; } - // std::cout << "Infer ..." << std::endl; infer_request.infer(); // Set dst data for outputs @@ -201,130 +135,6 @@ enum ggml_status openvino_frontend_compute(ggml_backend_t backend, struct ggml_c for (size_t i = 0; i < output_names.size(); i++) { auto output_tensor = infer_request.get_output_tensor(i); std::memcpy(output_tensors[output_names[i]], output_tensor.data(), output_tensor.get_byte_size()); - // if(!flag) { - // auto tensor = ggml_decoder->get_output_ggml_tensor(output_names[i]); - // std::cout << std::left << "[ " << std::setw(2) << i << " ]: " - // << "output_names: " << std::setw(20) << output_names[i] - // << ", shape: " << std::setw(4) << tensor->ne[0] << " " << std::setw(4) << tensor->ne[1] << " " << tensor->ne[2] - // << ", address: " - // << std::setw(15) << tensor->data << " " - // << std::setw(15) << static_cast(((float*)output_tensor.data())[0]) - // << std::setw(15) << static_cast(((float*)output_tensor.data())[1]) - // << ", ne[0]-1: " - // << std::setw(15) << static_cast(((float*)output_tensor.data())[tensor->ne[0] - 1]) - // << ", ne[0]: " - // << std::setw(15) << static_cast(((float*)output_tensor.data())[tensor->ne[0]]) << std::right - // << std::setw(15) << static_cast(((float*)output_tensor.data())[tensor->ne[0] + 1]) << std::right - // << std::right - // << std::endl; - // if (i == 19) { - // auto output_tensor_18 = infer_request.get_output_tensor(18); - // auto tensor = ggml_decoder->get_output_ggml_tensor(output_names[18]); - // std::cout << std::left << " " << std::setw(2) << 18 << " : " - // << "output_names: " << std::setw(20) << output_names[18] - // << ", shape: " << std::setw(4) << tensor->ne[0] << " " << std::setw(4) << tensor->ne[1] << " " << tensor->ne[2] - // << ", address: " - // << std::setw(15) << tensor->data << " " - // << std::setw(15) << static_cast(((float*)output_tensor_18.data())[0]) - // << std::setw(15) << static_cast(((float*)output_tensor_18.data())[1]) - // << ", ne[0]-1: " - // << std::setw(15) << static_cast(((float*)output_tensor_18.data())[tensor->ne[0] - 1]) - // << ", ne[0]: " - // << std::setw(15) << static_cast(((float*)output_tensor_18.data())[tensor->ne[0]]) << std::right - // << std::setw(15) << static_cast(((float*)output_tensor_18.data())[tensor->ne[0] + 1]) << std::right - // << std::right - // << std::endl; - // } - // if(i == 23) { - // auto output_tensor_15 = infer_request.get_output_tensor(15); - // auto tensor = ggml_decoder->get_output_ggml_tensor(output_names[15]); - // std::cout << std::left << " " << std::setw(2) << 15 << " : " - // << "output_names: " << std::setw(20) << output_names[15] - // << ", shape: " << std::setw(4) << tensor->ne[0] << " " << std::setw(4) << tensor->ne[1] << " " << tensor->ne[2] - // << ", address: " - // << std::setw(15) << tensor->data << " " - // << std::setw(15) << static_cast(((float*)output_tensor_15.data())[0]) - // << std::setw(15) << static_cast(((float*)output_tensor_15.data())[1]) - // << ", ne[0]-1: " - // << std::setw(15) << static_cast(((float*)output_tensor_15.data())[tensor->ne[0] - 1]) - // << ", ne[0]: " - // << std::setw(15) << static_cast(((float*)output_tensor_15.data())[tensor->ne[0]]) << std::right - // << std::setw(15) << static_cast(((float*)output_tensor_15.data())[tensor->ne[0] + 1]) << std::right - // << std::right - // << std::endl; - // auto cache_k_l0_20 = ggml_decoder->get_input_names()[20]; - // // auto input_data = ggml_decoder->get_input_ggml_tensor(cache_k_l0_20)->data; - // auto input_tensor = input_tensors.at(20).second; - // std::cout << std::left << " " << std::setw(2) << 20 << " : " - // << "Input Name: " << std::setw(20) << cache_k_l0_20 - // << ", shape: " << std::setw(4) << input_tensor.get_shape()[0] << " " << std::setw(4) << input_tensor.get_shape()[1] << " " << input_tensor.get_shape()[2] - // << ", address: " - // << std::setw(15) << input_tensor.data() << " " - // << std::setw(15) << ((float*)input_tensor.data())[0] - // << std::setw(15) << ((float*)input_tensor.data())[1] - // << ", ne[0]-1: " - // << std::setw(15) << ((float*)input_tensor.data())[input_tensor.get_shape()[0] - 1] - // << ", ne[0]: " - // << std::setw(15) << ((float*)input_tensor.data())[input_tensor.get_shape()[0]] << std::right - // << std::setw(15) << ((float*)input_tensor.data())[input_tensor.get_shape()[0] + 1] << std::right - // << std::right - // << std::endl; - - // auto cache_k_l0_27 = ggml_decoder->get_input_names()[27]; - // // auto input_data = ggml_decoder->get_input_ggml_tensor(cache_k_l0_20)->data; - // auto input_tensor_27 = input_tensors.at(27).second; - // std::cout << std::left << " " << std::setw(2) << 27 << " : " - // << "Input Name: " << std::setw(20) << cache_k_l0_27 - // << ", shape: " << std::setw(4) << input_tensor_27.get_shape()[0] << " " << std::setw(4) << input_tensor_27.get_shape()[1] << " " << input_tensor_27.get_shape()[2] - // << ", address: " - // << std::setw(15) << input_tensor_27.data() << " " - // << std::setw(15) << ((float*)input_tensor_27.data())[0] - // << std::setw(15) << ((float*)input_tensor_27.data())[1] - // << ", ne[0]-1: " - // << std::setw(15) << ((float*)input_tensor_27.data())[input_tensor_27.get_shape()[0] - 1] - // << ", ne[0]: " - // << std::setw(15) << ((float*)input_tensor_27.data())[input_tensor_27.get_shape()[0]] << std::right - // << std::setw(15) << ((float*)input_tensor_27.data())[input_tensor_27.get_shape()[0] + 1] << std::right - // << std::right - // << std::endl; - - // auto cache_k_l0_29 = ggml_decoder->get_input_names()[29]; - // // auto input_data = ggml_decoder->get_input_ggml_tensor(cache_k_l0_20)->data; - // auto input_tensor_29 = input_tensors.at(29).second; - // std::cout << std::left << " " << std::setw(2) << 29 << " : " - // << "Input Name: " << std::setw(20) << cache_k_l0_29 - // << ", shape: " << std::setw(4) << input_tensor_29.get_shape()[0] << " " << std::setw(4) << input_tensor_29.get_shape()[1] << " " << input_tensor_29.get_shape()[2] - // << ", address: " - // << std::setw(15) << input_tensor_29.data() << " " - // << std::setw(15) << ((float*)input_tensor_29.data())[0] - // << std::setw(15) << ((float*)input_tensor_29.data())[1] - // << ", ne[0]-1: " - // << std::setw(15) << ((float*)input_tensor_29.data())[input_tensor_29.get_shape()[0] - 1] - // << ", ne[0]: " - // << std::setw(15) << ((float*)input_tensor_29.data())[input_tensor_29.get_shape()[0]] << std::right - // << std::setw(15) << ((float*)input_tensor_29.data())[input_tensor_29.get_shape()[0] + 1] << std::right - // << std::right - // << std::endl; - - // auto cache_k_l0_30 = ggml_decoder->get_input_names()[30]; - // // auto input_data = ggml_decoder->get_input_ggml_tensor(cache_k_l0_20)->data; - // auto input_tensor_30 = input_tensors.at(30).second; - // std::cout << std::left << " " << std::setw(2) << 30 << " : " - // << "Input Name: " << std::setw(20) << cache_k_l0_30 - // << ", shape: " << std::setw(4) << input_tensor_30.get_shape()[0] << " " << std::setw(4) << input_tensor_30.get_shape()[1] << " " << input_tensor_30.get_shape()[2] - // << ", address: " - // << std::setw(15) << input_tensor_30.data() << " " - // << std::setw(15) << ((float*)input_tensor_30.data())[0] - // << std::setw(15) << ((float*)input_tensor_30.data())[1] - // << ", ne[0]-1: " - // << std::setw(15) << ((float*)input_tensor_30.data())[input_tensor_30.get_shape()[0] - 1] - // << ", ne[0]: " - // << std::setw(15) << ((float*)input_tensor_30.data())[input_tensor_30.get_shape()[0]] << std::right - // << std::setw(15) << ((float*)input_tensor_30.data())[input_tensor_30.get_shape()[0] + 1] << std::right - // << std::right - // << std::endl; - // } - // } #ifdef GGML_OPENVINO_DEBUG printf("Output %s after: %g\n", output_names[i].c_str(), *(double*)(output_tensor.data())); #endif