Fix issue for output memory copy of infer request
This commit is contained in:
parent
8c5a609f8d
commit
e95f29cbc0
|
|
@ -21,22 +21,20 @@ void GgmlOvGraphIterator::initialize_decoders() {
|
||||||
// m_decoders.resize(static_cast<size_t>(nodes_size));
|
// m_decoders.resize(static_cast<size_t>(nodes_size));
|
||||||
|
|
||||||
for (int i = 0; i < nodes_size; ++i) {
|
for (int i = 0; i < nodes_size; ++i) {
|
||||||
// Skip View Op
|
|
||||||
// if (m_cgraph->nodes[i] ->op == GGML_OP_PERMUTE
|
|
||||||
// || m_cgraph->nodes[i] ->op == GGML_OP_CPY ) {
|
|
||||||
// continue;
|
|
||||||
// }
|
|
||||||
auto decoder = std::make_shared<GgmlOvDecoder>(m_cgraph->nodes[i], m_cgraph);
|
auto decoder = std::make_shared<GgmlOvDecoder>(m_cgraph->nodes[i], m_cgraph);
|
||||||
m_decoders.push_back(decoder);
|
m_decoders.push_back(decoder);
|
||||||
for (size_t inp = 0; inp < decoder->get_input_size(); ++inp) {
|
for (size_t inp = 0; inp < decoder->get_input_size(); ++inp) {
|
||||||
// if (i == 0 || decoder->is_graph_input(inp)) {
|
// Skip duplicate input name
|
||||||
|
if (std::find(m_input_names.begin(), m_input_names.end(), decoder->get_input_name(inp)) == m_input_names.end()) {
|
||||||
m_input_names.push_back(decoder->get_input_name(inp));
|
m_input_names.push_back(decoder->get_input_name(inp));
|
||||||
// }
|
}
|
||||||
}
|
}
|
||||||
for (size_t inp = 0; inp < decoder->get_output_size(); ++inp) {
|
for (size_t inp = 0; inp < decoder->get_output_size(); ++inp) {
|
||||||
// if (i == nodes_size - 1 || decoder->is_graph_output(inp)) {
|
// Skip duplicate output name
|
||||||
|
auto output_name = decoder->get_output_name(inp);
|
||||||
|
if (std::find(m_output_names.begin(), m_output_names.end(), output_name) == m_output_names.end()) {
|
||||||
m_output_names.push_back(decoder->get_output_name(inp));
|
m_output_names.push_back(decoder->get_output_name(inp));
|
||||||
// }
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -29,8 +29,8 @@ std::map<std::string, ov::Tensor> get_ggml_graph_input_tensors(std::shared_ptr<G
|
||||||
return input_tensors;
|
return input_tensors;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::map<std::string, ov::Tensor> get_ggml_graph_output_tensors(std::shared_ptr<GgmlOvGraphIterator> ggml_graph_iterator) {
|
std::map<std::string, void*> get_ggml_graph_output_dst(std::shared_ptr<GgmlOvGraphIterator> ggml_graph_iterator) {
|
||||||
std::map<std::string, ov::Tensor> output_tensors;
|
std::map<std::string, void*> output_tensors;
|
||||||
auto output_names = ggml_graph_iterator->get_output_names();
|
auto output_names = ggml_graph_iterator->get_output_names();
|
||||||
ggml_graph_iterator->reset();
|
ggml_graph_iterator->reset();
|
||||||
for (; !ggml_graph_iterator->is_end(); ggml_graph_iterator->next()) {
|
for (; !ggml_graph_iterator->is_end(); ggml_graph_iterator->next()) {
|
||||||
|
|
@ -41,8 +41,7 @@ std::map<std::string, ov::Tensor> get_ggml_graph_output_tensors(std::shared_ptr<
|
||||||
#ifdef GGML_OPENVINO_DEBUG
|
#ifdef GGML_OPENVINO_DEBUG
|
||||||
printf("Output %d: %g\n", inp, *(double*)(output_data));
|
printf("Output %d: %g\n", inp, *(double*)(output_data));
|
||||||
#endif
|
#endif
|
||||||
ov::Tensor output_tensor = ov::Tensor(decoder->get_output_type(inp), decoder->get_output_shape(inp).to_shape(), output_data);
|
output_tensors[decoder->get_output_name(inp)] = output_data;
|
||||||
output_tensors[decoder->get_output_name(inp)] = output_tensor;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -100,7 +99,6 @@ enum ggml_status openvino_frontend_compute(ggml_backend_t backend, struct ggml_c
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// Loading a model to the device
|
// Loading a model to the device
|
||||||
ov::CompiledModel compiled_model = core.compile_model(model);
|
ov::CompiledModel compiled_model = core.compile_model(model);
|
||||||
|
|
||||||
|
|
@ -113,18 +111,18 @@ enum ggml_status openvino_frontend_compute(ggml_backend_t backend, struct ggml_c
|
||||||
|
|
||||||
// Set input tensor
|
// Set input tensor
|
||||||
for (size_t i = 0; i < input_names.size(); i++) {
|
for (size_t i = 0; i < input_names.size(); i++) {
|
||||||
infer_request.set_input_tensor(i, input_tensors[input_names[i]]);
|
infer_request.set_input_tensor(i, input_tensors[input_names[i]]);
|
||||||
}
|
|
||||||
|
|
||||||
// Set output tensor
|
|
||||||
|
|
||||||
auto output_names = ggml_graph_iterator->get_output_names();
|
|
||||||
auto output_tensors = get_ggml_graph_output_tensors(ggml_graph_iterator);
|
|
||||||
for (size_t i = 0; i < output_names.size(); i++) {
|
|
||||||
infer_request.set_output_tensor(i, output_tensors[output_names[i]]);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
infer_request.infer();
|
infer_request.infer();
|
||||||
|
|
||||||
|
// Set dst data for outputs
|
||||||
|
auto output_names = ggml_graph_iterator->get_output_names();
|
||||||
|
auto output_tensors = get_ggml_graph_output_dst(ggml_graph_iterator);
|
||||||
|
for (size_t i = 0; i < output_names.size(); i++) {
|
||||||
|
auto output_tensor = infer_request.get_output_tensor(i);
|
||||||
|
std::memcpy(output_tensors[output_names[i]], output_tensor.data(), output_tensor.get_byte_size());
|
||||||
|
}
|
||||||
|
|
||||||
return GGML_STATUS_SUCCESS;
|
return GGML_STATUS_SUCCESS;
|
||||||
GGML_UNUSED(backend);
|
GGML_UNUSED(backend);
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue