diff --git a/ggml/src/ggml-openvino/ggml-decoder.cpp b/ggml/src/ggml-openvino/ggml-decoder.cpp index 4d82c756cd..b367987372 100644 --- a/ggml/src/ggml-openvino/ggml-decoder.cpp +++ b/ggml/src/ggml-openvino/ggml-decoder.cpp @@ -10,13 +10,21 @@ GgmlOvDecoder::GgmlOvDecoder(struct ggml_tensor * node, struct ggml_cgraph * cgr // Unary OPs case GGML_OP_UNARY: case GGML_OP_RESHAPE: - case GGML_OP_VIEW: + case GGML_OP_TRANSPOSE: + case GGML_OP_PERMUTE: + case GGML_OP_CONT: + case GGML_OP_CPY: + case GGML_OP_RMS_NORM: { m_inputs.push_back(m_node->src[0]); m_outputs.push_back(m_node); - #ifdef GGML_OPENVINO_DEBUG - GGML_LOG_INFO("Decoder input 0: %f \n", *(float*)(m_node->src[0]->data)); - #endif + break; + } + // For view, input is m_node itself + case GGML_OP_VIEW: + { + m_inputs.push_back(m_node); + m_outputs.push_back(m_node); break; } // SCALE @@ -24,12 +32,6 @@ GgmlOvDecoder::GgmlOvDecoder(struct ggml_tensor * node, struct ggml_cgraph * cgr { m_inputs.push_back(m_node->src[0]); m_outputs.push_back(m_node); - #ifdef GGML_OPENVINO_DEBUG - float v; - memcpy(&v, m_node->op_params, sizeof(float)); - GGML_LOG_INFO("Decoder input 0: %f \n", *(float*)(m_node->src[0]->data)); - GGML_LOG_INFO("Scale: %f \n", v); - #endif break; } // OPs with 2 inputs @@ -39,14 +41,20 @@ GgmlOvDecoder::GgmlOvDecoder(struct ggml_tensor * node, struct ggml_cgraph * cgr case GGML_OP_MUL_MAT: case GGML_OP_SUB: case GGML_OP_GET_ROWS: + case GGML_OP_SOFT_MAX: { m_inputs.push_back(m_node->src[0]); m_inputs.push_back(m_node->src[1]); m_outputs.push_back(m_node); - #ifdef GGML_OPENVINO_DEBUG - GGML_LOG_INFO("Decoder input 0: %f \n", *(float*)(m_node->src[0]->data)); - GGML_LOG_INFO("Decoder input 1: %f \n", *(float*)(m_node->src[1]->data)); - #endif + break; + } + // OPs with 3 inputs: + case GGML_OP_ROPE: + { + m_inputs.push_back(m_node->src[0]); + m_inputs.push_back(m_node->src[1]); + m_inputs.push_back(m_node->src[2]); // ??? + m_outputs.push_back(m_node); break; } default: @@ -130,7 +138,6 @@ ov::PartialShape GgmlOvDecoder::get_output_shape(size_t index) const { ov::element::Type GgmlOvDecoder::get_output_type(size_t index) const { // TODO: Change to Output ov::element::Type type = ov::element::dynamic; - // GGML_LOG_DEBUG("%d\n", m_outputs[index]->type); switch (m_outputs[index]->type) { case GGML_TYPE_F32: type = ov::element::f32; @@ -179,6 +186,8 @@ const std::string& GgmlOvDecoder::get_op_type() const { {GGML_OP_ACC, "GGML_OP_ACC"}, {GGML_OP_ADD, "GGML_OP_ADD"}, {GGML_OP_ADD1, "GGML_OP_ADD1"}, + {GGML_OP_CONT, "GGML_OP_CONT"}, + {GGML_OP_CPY, "GGML_OP_CPY"}, {GGML_OP_DIV, "GGML_OP_DIV"}, {GGML_OP_DUP, "GGML_OP_DUP"}, {GGML_OP_GET_ROWS, "GGML_OP_GET_ROWS"}, @@ -186,8 +195,12 @@ const std::string& GgmlOvDecoder::get_op_type() const { {GGML_OP_MUL_MAT, "GGML_OP_MUL_MAT"}, {GGML_OP_PERMUTE, "GGML_OP_PERMUTE"}, {GGML_OP_RESHAPE, "GGML_OP_RESHAPE"}, + {GGML_OP_RMS_NORM, "GGML_OP_RMS_NORM"}, + {GGML_OP_ROPE, "GGML_OP_ROPE"}, {GGML_OP_SCALE, "GGML_OP_SCALE"}, + {GGML_OP_SOFT_MAX, "GGML_OP_SOFT_MAX"}, {GGML_OP_SUB, "GGML_OP_SUB"}, + {GGML_OP_TRANSPOSE, "GGML_OP_TRANSPOSE"}, {GGML_OP_UNARY, "GGML_OP_UNARY"}, {GGML_OP_VIEW, "GGML_OP_VIEW"} }; diff --git a/ggml/src/ggml-openvino/ggml-decoder.h b/ggml/src/ggml-openvino/ggml-decoder.h index 1eaba59426..ceae589ed4 100644 --- a/ggml/src/ggml-openvino/ggml-decoder.h +++ b/ggml/src/ggml-openvino/ggml-decoder.h @@ -59,6 +59,10 @@ public: return m_inputs[index]; } + const ggml_tensor* get_output_ggml_tensor(size_t index) const { + return m_outputs[index]; + } + // virtual const std::vector& outputs() const override; // virtual size_t output(size_t index) const override; diff --git a/ggml/src/ggml-openvino/ggml-graph-iterator.cpp b/ggml/src/ggml-openvino/ggml-graph-iterator.cpp index 17a9b7ecfe..44e119a1ac 100644 --- a/ggml/src/ggml-openvino/ggml-graph-iterator.cpp +++ b/ggml/src/ggml-openvino/ggml-graph-iterator.cpp @@ -15,16 +15,17 @@ GgmlOvGraphIterator::GgmlOvGraphIterator(struct ggml_cgraph * cgraph) #endif } - void GgmlOvGraphIterator::initialize_decoders() { +void GgmlOvGraphIterator::initialize_decoders() { auto nodes_size = m_cgraph->n_nodes; // Initialize decoder for each node // m_decoders.resize(static_cast(nodes_size)); for (int i = 0; i < nodes_size; ++i) { // Skip View Op - if (m_cgraph->nodes[i] ->op == GGML_OP_VIEW || m_cgraph->nodes[i] ->op == GGML_OP_PERMUTE) { - continue; - } + // if (m_cgraph->nodes[i] ->op == GGML_OP_PERMUTE + // || m_cgraph->nodes[i] ->op == GGML_OP_CPY ) { + // continue; + // } auto decoder = std::make_shared(m_cgraph->nodes[i], m_cgraph); m_decoders.push_back(decoder); for (size_t inp = 0; inp < decoder->get_input_size(); ++inp) { @@ -33,9 +34,9 @@ GgmlOvGraphIterator::GgmlOvGraphIterator(struct ggml_cgraph * cgraph) // } } for (size_t inp = 0; inp < decoder->get_output_size(); ++inp) { - if (i == nodes_size - 1 || decoder->is_graph_output(inp)) { + // if (i == nodes_size - 1 || decoder->is_graph_output(inp)) { m_output_names.push_back(decoder->get_output_name(inp)); - } + // } } } @@ -71,20 +72,20 @@ std::vector GgmlOvGraphIterator::get_output_names() const { void GgmlOvGraphIterator::dump_graph_iterator() const { for (size_t i = 0; i < m_decoders.size(); ++i) { - GGML_LOG_INFO("OP %zu: %s\n", i, m_decoders[i]->get_op_name().c_str()); + GGML_LOG_INFO("\nOP %zu: %s\n", i, m_decoders[i]->get_op_name().c_str()); for (size_t inp = 0; inp < m_decoders[i]->get_input_size(); ++inp) { ov::PartialShape pshape = std::dynamic_pointer_cast(m_decoders[i])->get_input_shape(inp); ov::element::Type ptype = std::dynamic_pointer_cast(m_decoders[i])->get_input_type(inp); - GGML_LOG_INFO("Input name: %s\n", std::dynamic_pointer_cast(m_decoders[i])->get_input_name(inp).c_str()); - GGML_LOG_INFO("Input shape: %s\n", pshape.to_string().c_str()); - GGML_LOG_INFO("Input type: %s\n", ptype.to_string().c_str()); + GGML_LOG_INFO("- Input name: %s\n", std::dynamic_pointer_cast(m_decoders[i])->get_input_name(inp).c_str()); + GGML_LOG_INFO(" Input shape: %s\n", pshape.to_string().c_str()); + GGML_LOG_INFO(" Input type: %s\n", ptype.to_string().c_str()); } for (size_t outp = 0; outp < std::dynamic_pointer_cast(m_decoders[i])->get_output_size(); ++outp) { ov::PartialShape pshape = std::dynamic_pointer_cast(m_decoders[i])->get_output_shape(outp); ov::element::Type ptype = std::dynamic_pointer_cast(m_decoders[i])->get_output_type(outp); - GGML_LOG_INFO("Output name: %s\n", std::dynamic_pointer_cast(m_decoders[i])->get_output_name(outp).c_str()); - GGML_LOG_INFO("Output shape: %s\n", pshape.to_string().c_str()); - GGML_LOG_INFO("Output type: %s\n", ptype.to_string().c_str()); + GGML_LOG_INFO("- Output name: %s\n", std::dynamic_pointer_cast(m_decoders[i])->get_output_name(outp).c_str()); + GGML_LOG_INFO(" Output shape: %s\n", pshape.to_string().c_str()); + GGML_LOG_INFO(" Output type: %s\n", ptype.to_string().c_str()); } } diff --git a/ggml/src/ggml-openvino/utils.cpp b/ggml/src/ggml-openvino/utils.cpp index 905e2f4197..db52b1f81d 100644 --- a/ggml/src/ggml-openvino/utils.cpp +++ b/ggml/src/ggml-openvino/utils.cpp @@ -18,6 +18,9 @@ std::map get_ggml_graph_input_tensors(std::shared_ptrget_input_size(); ++inp) { if (std::find(input_names.begin(), input_names.end(), decoder->get_input_name(inp)) != input_names.end()) { auto input_data = decoder->get_input_ggml_tensor(inp)->data; + #ifdef GGML_OPENVINO_DEBUG + printf("Subgraph input %d: %g\n", inp, *(double*)(input_data)); + #endif ov::Tensor input_tensor = ov::Tensor(decoder->get_input_type(inp), decoder->get_input_shape(inp).to_shape(), input_data); input_tensors[decoder->get_input_name(inp)] = input_tensor; } @@ -26,6 +29,27 @@ std::map get_ggml_graph_input_tensors(std::shared_ptr get_ggml_graph_output_tensors(std::shared_ptr ggml_graph_iterator) { + std::map output_tensors; + auto output_names = ggml_graph_iterator->get_output_names(); + ggml_graph_iterator->reset(); + for (; !ggml_graph_iterator->is_end(); ggml_graph_iterator->next()) { + auto decoder = std::dynamic_pointer_cast(ggml_graph_iterator->get_decoder()); + for (size_t inp = 0; inp < decoder->get_output_size(); ++inp) { + if (std::find(output_names.begin(), output_names.end(), decoder->get_output_name(inp)) != output_names.end()) { + auto output_data = decoder->get_output_ggml_tensor(inp)->data; + #ifdef GGML_OPENVINO_DEBUG + printf("Output %d: %g\n", inp, *(double*)(output_data)); + #endif + ov::Tensor output_tensor = ov::Tensor(decoder->get_output_type(inp), decoder->get_output_shape(inp).to_shape(), output_data); + output_tensors[decoder->get_output_name(inp)] = output_tensor; + } + } + } + return output_tensors; +} + + static ov::frontend::FrontEnd::Ptr get_ggml_frontend() { ov::frontend::FrontEnd::Ptr front_end = nullptr; auto fem = ov::frontend::FrontEndManager(); @@ -92,16 +116,15 @@ enum ggml_status openvino_frontend_compute(ggml_backend_t backend, struct ggml_c infer_request.set_input_tensor(i, input_tensors[input_names[i]]); } - infer_request.infer(); + // Set output tensor - ov::Tensor output_tensor = infer_request.get_output_tensor(); - // Put data in output tensor to the last node -> data in cgraph - // Get output type - ggml_tensor* dst = cgraph->nodes[cgraph->n_nodes - 1]; - std::memcpy(dst->data, output_tensor.data(), output_tensor.get_byte_size()); - #ifdef GGML_OPENVINO_DEBUG - GGML_LOG_INFO("Output: %f\n", *output_tensor.data()); - #endif + auto output_names = ggml_graph_iterator->get_output_names(); + auto output_tensors = get_ggml_graph_output_tensors(ggml_graph_iterator); + for (size_t i = 0; i < output_names.size(); i++) { + infer_request.set_output_tensor(i, output_tensors[output_names[i]]); + } + + infer_request.infer(); return GGML_STATUS_SUCCESS; GGML_UNUSED(backend);