Fix issue for output memory copy of infer request

This commit is contained in:
yumengbo 2024-12-12 13:13:31 +08:00 committed by Mustafa Cavus
parent 8c5a609f8d
commit e95f29cbc0
2 changed files with 19 additions and 23 deletions

View File

@ -21,22 +21,20 @@ void GgmlOvGraphIterator::initialize_decoders() {
// m_decoders.resize(static_cast<size_t>(nodes_size));
for (int i = 0; i < nodes_size; ++i) {
// Skip View Op
// if (m_cgraph->nodes[i] ->op == GGML_OP_PERMUTE
// || m_cgraph->nodes[i] ->op == GGML_OP_CPY ) {
// continue;
// }
auto decoder = std::make_shared<GgmlOvDecoder>(m_cgraph->nodes[i], m_cgraph);
m_decoders.push_back(decoder);
for (size_t inp = 0; inp < decoder->get_input_size(); ++inp) {
// if (i == 0 || decoder->is_graph_input(inp)) {
// Skip duplicate input name
if (std::find(m_input_names.begin(), m_input_names.end(), decoder->get_input_name(inp)) == m_input_names.end()) {
m_input_names.push_back(decoder->get_input_name(inp));
// }
}
}
for (size_t inp = 0; inp < decoder->get_output_size(); ++inp) {
// if (i == nodes_size - 1 || decoder->is_graph_output(inp)) {
// Skip duplicate output name
auto output_name = decoder->get_output_name(inp);
if (std::find(m_output_names.begin(), m_output_names.end(), output_name) == m_output_names.end()) {
m_output_names.push_back(decoder->get_output_name(inp));
// }
}
}
}

View File

@ -29,8 +29,8 @@ std::map<std::string, ov::Tensor> get_ggml_graph_input_tensors(std::shared_ptr<G
return input_tensors;
}
std::map<std::string, ov::Tensor> get_ggml_graph_output_tensors(std::shared_ptr<GgmlOvGraphIterator> ggml_graph_iterator) {
std::map<std::string, ov::Tensor> output_tensors;
std::map<std::string, void*> get_ggml_graph_output_dst(std::shared_ptr<GgmlOvGraphIterator> ggml_graph_iterator) {
std::map<std::string, void*> output_tensors;
auto output_names = ggml_graph_iterator->get_output_names();
ggml_graph_iterator->reset();
for (; !ggml_graph_iterator->is_end(); ggml_graph_iterator->next()) {
@ -41,8 +41,7 @@ std::map<std::string, ov::Tensor> get_ggml_graph_output_tensors(std::shared_ptr<
#ifdef GGML_OPENVINO_DEBUG
printf("Output %d: %g\n", inp, *(double*)(output_data));
#endif
ov::Tensor output_tensor = ov::Tensor(decoder->get_output_type(inp), decoder->get_output_shape(inp).to_shape(), output_data);
output_tensors[decoder->get_output_name(inp)] = output_tensor;
output_tensors[decoder->get_output_name(inp)] = output_data;
}
}
}
@ -100,7 +99,6 @@ enum ggml_status openvino_frontend_compute(ggml_backend_t backend, struct ggml_c
#endif
}
// Loading a model to the device
ov::CompiledModel compiled_model = core.compile_model(model);
@ -113,18 +111,18 @@ enum ggml_status openvino_frontend_compute(ggml_backend_t backend, struct ggml_c
// Set input tensor
for (size_t i = 0; i < input_names.size(); i++) {
infer_request.set_input_tensor(i, input_tensors[input_names[i]]);
}
// Set output tensor
auto output_names = ggml_graph_iterator->get_output_names();
auto output_tensors = get_ggml_graph_output_tensors(ggml_graph_iterator);
for (size_t i = 0; i < output_names.size(); i++) {
infer_request.set_output_tensor(i, output_tensors[output_names[i]]);
infer_request.set_input_tensor(i, input_tensors[input_names[i]]);
}
infer_request.infer();
// Set dst data for outputs
auto output_names = ggml_graph_iterator->get_output_names();
auto output_tensors = get_ggml_graph_output_dst(ggml_graph_iterator);
for (size_t i = 0; i < output_names.size(); i++) {
auto output_tensor = infer_request.get_output_tensor(i);
std::memcpy(output_tensors[output_names[i]], output_tensor.data(), output_tensor.get_byte_size());
}
return GGML_STATUS_SUCCESS;
GGML_UNUSED(backend);