From e95f29cbc02f01be2654517784db2799e16f6bea Mon Sep 17 00:00:00 2001 From: yumengbo Date: Thu, 12 Dec 2024 13:13:31 +0800 Subject: [PATCH] Fix issue for output memory copy of infer request --- .../src/ggml-openvino/ggml-graph-iterator.cpp | 16 +++++------- ggml/src/ggml-openvino/utils.cpp | 26 +++++++++---------- 2 files changed, 19 insertions(+), 23 deletions(-) diff --git a/ggml/src/ggml-openvino/ggml-graph-iterator.cpp b/ggml/src/ggml-openvino/ggml-graph-iterator.cpp index 44e119a1ac..5c06179023 100644 --- a/ggml/src/ggml-openvino/ggml-graph-iterator.cpp +++ b/ggml/src/ggml-openvino/ggml-graph-iterator.cpp @@ -21,22 +21,20 @@ void GgmlOvGraphIterator::initialize_decoders() { // m_decoders.resize(static_cast(nodes_size)); for (int i = 0; i < nodes_size; ++i) { - // Skip View Op - // if (m_cgraph->nodes[i] ->op == GGML_OP_PERMUTE - // || m_cgraph->nodes[i] ->op == GGML_OP_CPY ) { - // continue; - // } auto decoder = std::make_shared(m_cgraph->nodes[i], m_cgraph); m_decoders.push_back(decoder); for (size_t inp = 0; inp < decoder->get_input_size(); ++inp) { - // if (i == 0 || decoder->is_graph_input(inp)) { + // Skip duplicate input name + if (std::find(m_input_names.begin(), m_input_names.end(), decoder->get_input_name(inp)) == m_input_names.end()) { m_input_names.push_back(decoder->get_input_name(inp)); - // } + } } for (size_t inp = 0; inp < decoder->get_output_size(); ++inp) { - // if (i == nodes_size - 1 || decoder->is_graph_output(inp)) { + // Skip duplicate output name + auto output_name = decoder->get_output_name(inp); + if (std::find(m_output_names.begin(), m_output_names.end(), output_name) == m_output_names.end()) { m_output_names.push_back(decoder->get_output_name(inp)); - // } + } } } diff --git a/ggml/src/ggml-openvino/utils.cpp b/ggml/src/ggml-openvino/utils.cpp index db52b1f81d..2dfe837cbd 100644 --- a/ggml/src/ggml-openvino/utils.cpp +++ b/ggml/src/ggml-openvino/utils.cpp @@ -29,8 +29,8 @@ std::map get_ggml_graph_input_tensors(std::shared_ptr get_ggml_graph_output_tensors(std::shared_ptr ggml_graph_iterator) { - std::map output_tensors; +std::map get_ggml_graph_output_dst(std::shared_ptr ggml_graph_iterator) { + std::map output_tensors; auto output_names = ggml_graph_iterator->get_output_names(); ggml_graph_iterator->reset(); for (; !ggml_graph_iterator->is_end(); ggml_graph_iterator->next()) { @@ -41,8 +41,7 @@ std::map get_ggml_graph_output_tensors(std::shared_ptr< #ifdef GGML_OPENVINO_DEBUG printf("Output %d: %g\n", inp, *(double*)(output_data)); #endif - ov::Tensor output_tensor = ov::Tensor(decoder->get_output_type(inp), decoder->get_output_shape(inp).to_shape(), output_data); - output_tensors[decoder->get_output_name(inp)] = output_tensor; + output_tensors[decoder->get_output_name(inp)] = output_data; } } } @@ -100,7 +99,6 @@ enum ggml_status openvino_frontend_compute(ggml_backend_t backend, struct ggml_c #endif } - // Loading a model to the device ov::CompiledModel compiled_model = core.compile_model(model); @@ -113,18 +111,18 @@ enum ggml_status openvino_frontend_compute(ggml_backend_t backend, struct ggml_c // Set input tensor for (size_t i = 0; i < input_names.size(); i++) { - infer_request.set_input_tensor(i, input_tensors[input_names[i]]); - } - - // Set output tensor - - auto output_names = ggml_graph_iterator->get_output_names(); - auto output_tensors = get_ggml_graph_output_tensors(ggml_graph_iterator); - for (size_t i = 0; i < output_names.size(); i++) { - infer_request.set_output_tensor(i, output_tensors[output_names[i]]); + infer_request.set_input_tensor(i, input_tensors[input_names[i]]); } infer_request.infer(); + + // Set dst data for outputs + auto output_names = ggml_graph_iterator->get_output_names(); + auto output_tensors = get_ggml_graph_output_dst(ggml_graph_iterator); + for (size_t i = 0; i < output_names.size(); i++) { + auto output_tensor = infer_request.get_output_tensor(i); + std::memcpy(output_tensors[output_names[i]], output_tensor.data(), output_tensor.get_byte_size()); + } return GGML_STATUS_SUCCESS; GGML_UNUSED(backend);