diff --git a/ggml/src/ggml-openvino.cpp b/ggml/src/ggml-openvino.cpp index 370c0c5d98..34d692a8cf 100644 --- a/ggml/src/ggml-openvino.cpp +++ b/ggml/src/ggml-openvino.cpp @@ -1,6 +1,7 @@ #include "ggml-openvino.h" #include "ggml-backend-impl.h" #include "ggml-impl.h" +#include "ggml-openvino/utils.h" #include #include @@ -234,33 +235,35 @@ static void ggml_backend_openvino_mul(ggml_tensor * dst) { } static enum ggml_status ggml_backend_openvino_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) { - for (int i = 0; i < cgraph->n_nodes; i++) { - struct ggml_tensor * node = cgraph->nodes[i]; + // for (int i = 0; i < cgraph->n_nodes; i++) { + // struct ggml_tensor * node = cgraph->nodes[i]; - if (node->op == GGML_OP_NONE || ggml_is_empty(node)) { - return GGML_STATUS_SUCCESS; - } + // if (node->op == GGML_OP_NONE || ggml_is_empty(node)) { + // return GGML_STATUS_SUCCESS; + // } - switch (node->op) { - case GGML_OP_PERMUTE: - case GGML_OP_RESHAPE: - case GGML_OP_TRANSPOSE: - case GGML_OP_VIEW: - break; - case GGML_OP_ADD: - { - ggml_backend_openvino_add(node); - } break; - case GGML_OP_MUL: - { - ggml_backend_openvino_mul(node); - } break; - case GGML_OP_MUL_MAT: - break; - default: - GGML_ABORT("%s: unsupported op %s\n", __func__, ggml_op_desc(node)); - } - } + // switch (node->op) { + // case GGML_OP_PERMUTE: + // case GGML_OP_RESHAPE: + // case GGML_OP_TRANSPOSE: + // case GGML_OP_VIEW: + // break; + // case GGML_OP_ADD: + // { + // ggml_backend_openvino_add(node); + // } break; + // case GGML_OP_MUL: + // { + // ggml_backend_openvino_mul(node); + // } break; + // case GGML_OP_MUL_MAT: + // break; + // default: + // GGML_ABORT("%s: unsupported op %s\n", __func__, ggml_op_desc(node)); + // } + // } + + openvino_frontend_compute(backend, cgraph); return GGML_STATUS_SUCCESS; diff --git a/ggml/src/ggml-openvino/README.md b/ggml/src/ggml-openvino/README.md new file mode 100644 index 0000000000..46c2adb438 --- /dev/null +++ b/ggml/src/ggml-openvino/README.md @@ -0,0 +1,30 @@ +# Instructions to Modify and Build ggml with OpenVINO + +## Step 1: Modify the Source Code + +In order to change the frontend `.so` path to the path to `.so` file, you need to add path to the `.so` file in cmake compiler option: +1. Open a terminal and navigate to the root directory of this repo. +2. Run the following commands to configure: + ```sh + mkdir build + cmake -B build -DGGML_OV_FRONTEND="${openvino_repo_dir}/bin/intel64/Release/libopenvino_ggml_frontend.so" + ``` +Where GGML_OV_FRONTEND should point to the path to `libopenvino_ggml_frontend.so` file. + +## Step 2: Build the Project + +After modifying the source code, you need to build the project using CMake. Follow these steps: + +1. (Optional) Enable debug option for ggml-openvino, this will output dump of subgraph sent to OpenVINO, information after convert ggml_cgraph to GraphIterator, and calculation input value/output value of each OP: + ```sh + cmake -B build -DGGML_OPENVINO_DEBUG=ON + ``` + +2. Run the following commands to configure and build the project: + ```sh + cmake -B build -DGGML_OPENVINO=ON + cmake --build build -j + ``` + +This will configure the project with OpenVINO support and build it using multiple cores for faster compilation. + diff --git a/ggml/src/ggml-openvino/decoder.h b/ggml/src/ggml-openvino/decoder.h new file mode 100644 index 0000000000..d2ef7587b8 --- /dev/null +++ b/ggml/src/ggml-openvino/decoder.h @@ -0,0 +1,54 @@ +#pragma once + +#include "openvino/core/node.hpp" +#include "openvino/frontend/decoder.hpp" + +namespace ov { +namespace frontend { +namespace ggml { + +// TODO: Directly include from openvino +class GgmlDecoder : public DecoderBase { +public: + virtual ov::Any get_attribute(const std::string& name) const = 0; + + virtual PartialShape get_input_shape(size_t index) const = 0; + + virtual element::Type get_input_type(size_t index) const = 0; + + virtual size_t get_input_size() const = 0; + + virtual void get_input_node(size_t input_port_idx, + std::string& producer_name, + std::string& producer_output_port_name, + size_t& producer_output_port_index) const = 0; + + virtual bool is_graph_input(size_t index) const = 0; + + virtual std::string& get_input_name(size_t index) const = 0; + + virtual PartialShape get_output_shape(size_t index) const = 0; + + virtual element::Type get_output_type(size_t index) const = 0; + + virtual size_t get_output_size() const = 0; + + virtual bool is_graph_output(size_t index) const = 0; + + virtual int32_t* get_output_op_params(size_t index) const = 0; + + virtual std::string& get_output_name(size_t index) const = 0; + + virtual const std::string& get_op_type() const = 0; + + virtual const std::string& get_op_name() const = 0; + + // virtual const std::vector& outputs() const = 0; + + // virtual size_t output(size_t index) const = 0; + +}; + +} // namespace ggml +} // namespace frontend +} // namespace ov diff --git a/ggml/src/ggml-openvino/ggml-decoder.cpp b/ggml/src/ggml-openvino/ggml-decoder.cpp new file mode 100644 index 0000000000..4d82c756cd --- /dev/null +++ b/ggml/src/ggml-openvino/ggml-decoder.cpp @@ -0,0 +1,203 @@ +#include "ggml-decoder.h" +#include +#include + +GgmlOvDecoder::GgmlOvDecoder(struct ggml_tensor * node, struct ggml_cgraph * cgraph) + :m_cgraph(cgraph), + m_node(node), + m_op_name(std::string(m_node->name)) { + switch (m_node->op) { + // Unary OPs + case GGML_OP_UNARY: + case GGML_OP_RESHAPE: + case GGML_OP_VIEW: + { + m_inputs.push_back(m_node->src[0]); + m_outputs.push_back(m_node); + #ifdef GGML_OPENVINO_DEBUG + GGML_LOG_INFO("Decoder input 0: %f \n", *(float*)(m_node->src[0]->data)); + #endif + break; + } + // SCALE + case GGML_OP_SCALE: + { + m_inputs.push_back(m_node->src[0]); + m_outputs.push_back(m_node); + #ifdef GGML_OPENVINO_DEBUG + float v; + memcpy(&v, m_node->op_params, sizeof(float)); + GGML_LOG_INFO("Decoder input 0: %f \n", *(float*)(m_node->src[0]->data)); + GGML_LOG_INFO("Scale: %f \n", v); + #endif + break; + } + // OPs with 2 inputs + case GGML_OP_ADD: + case GGML_OP_DIV: + case GGML_OP_MUL: + case GGML_OP_MUL_MAT: + case GGML_OP_SUB: + case GGML_OP_GET_ROWS: + { + m_inputs.push_back(m_node->src[0]); + m_inputs.push_back(m_node->src[1]); + m_outputs.push_back(m_node); + #ifdef GGML_OPENVINO_DEBUG + GGML_LOG_INFO("Decoder input 0: %f \n", *(float*)(m_node->src[0]->data)); + GGML_LOG_INFO("Decoder input 1: %f \n", *(float*)(m_node->src[1]->data)); + #endif + break; + } + default: + break; + } +} + +ov::PartialShape GgmlOvDecoder::get_input_shape(size_t index) const { + ov::PartialShape input_shape; + // Use input_node->ne + ggml_tensor * node = m_inputs[index]; + std::vector shape; + // GGML_MAX_DIMS + // for (int i = 0; i < GGML_MAX_DIMS; ++i) { + for (int i = GGML_MAX_DIMS - 2; i >= 0 ; --i) { + if (node->ne[i] == 0) { + return input_shape; + } + shape.push_back(static_cast(node->ne[i])); + } + input_shape = ov::PartialShape(shape); + return input_shape; +} + +ov::element::Type GgmlOvDecoder::get_input_type(size_t index) const { + ov::element::Type type = ov::element::dynamic; + // GGML_LOG_DEBUG("%d\n", m_inputs[index]->type); + switch (m_inputs[index]->type) { + case GGML_TYPE_F32: + type = ov::element::f32; + break; + case GGML_TYPE_F16: + type = ov::element::f16; + break; + case GGML_TYPE_I64: + type = ov::element::i64; + break; + case GGML_TYPE_I32: + type = ov::element::i32; + break; + default: + break; + } + return type; +} + +size_t GgmlOvDecoder::get_input_size() const { + return m_inputs.size(); +} + +bool GgmlOvDecoder::is_graph_input(size_t index) const { + if (m_inputs[index]->flags & GGML_TENSOR_FLAG_INPUT ) { + return true; + } + return false; +} + +std::string& GgmlOvDecoder::get_input_name(size_t index) const { + m_name = std::string(m_inputs[index]->name); + return m_name; +} + +ov::PartialShape GgmlOvDecoder::get_output_shape(size_t index) const { + ov::PartialShape output_shape; + // Use input_node->ne + ggml_tensor * node = m_outputs[index]; + std::vector shape; + // GGML_MAX_DIMS + // for (int i = 0; i < GGML_MAX_DIMS; ++i) { + for (int i = GGML_MAX_DIMS - 2; i >= 0 ; --i) { + if (node->ne[i] == 0 ) { + // empty if any dimension has no elements + return output_shape; + } + shape.push_back(static_cast(node->ne[i])); + } + output_shape = ov::PartialShape(shape); + return output_shape; +} + +ov::element::Type GgmlOvDecoder::get_output_type(size_t index) const { + // TODO: Change to Output + ov::element::Type type = ov::element::dynamic; + // GGML_LOG_DEBUG("%d\n", m_outputs[index]->type); + switch (m_outputs[index]->type) { + case GGML_TYPE_F32: + type = ov::element::f32; + break; + case GGML_TYPE_F16: + type = ov::element::f16; + break; + case GGML_TYPE_I64: + type = ov::element::i64; + break; + case GGML_TYPE_I32: + type = ov::element::i32; + break; + default: + break; + } + return type; +} + +bool GgmlOvDecoder::is_graph_output(size_t index) const { + if (m_outputs[index]->flags & GGML_TENSOR_FLAG_OUTPUT) { + return true; + } + return false; +} + +int32_t* GgmlOvDecoder::get_output_op_params(size_t index) const{ + return m_outputs[index]->op_params; +} + +size_t GgmlOvDecoder::get_output_size() const { + return m_outputs.size(); +} + +std::string& GgmlOvDecoder::get_output_name(size_t index) const { + m_name = std::string(m_outputs[index]->name); + return m_name; +} + +const std::string& GgmlOvDecoder::get_op_name() const { + return m_op_name; +} + +const std::string& GgmlOvDecoder::get_op_type() const { + static const std::map opTypeMap = { + {GGML_OP_ACC, "GGML_OP_ACC"}, + {GGML_OP_ADD, "GGML_OP_ADD"}, + {GGML_OP_ADD1, "GGML_OP_ADD1"}, + {GGML_OP_DIV, "GGML_OP_DIV"}, + {GGML_OP_DUP, "GGML_OP_DUP"}, + {GGML_OP_GET_ROWS, "GGML_OP_GET_ROWS"}, + {GGML_OP_MUL, "GGML_OP_MUL"}, + {GGML_OP_MUL_MAT, "GGML_OP_MUL_MAT"}, + {GGML_OP_PERMUTE, "GGML_OP_PERMUTE"}, + {GGML_OP_RESHAPE, "GGML_OP_RESHAPE"}, + {GGML_OP_SCALE, "GGML_OP_SCALE"}, + {GGML_OP_SUB, "GGML_OP_SUB"}, + {GGML_OP_UNARY, "GGML_OP_UNARY"}, + {GGML_OP_VIEW, "GGML_OP_VIEW"} + }; + auto it = opTypeMap.find(m_node->op); + if (it != opTypeMap.end()) { + return it->second; + } else { + static const std::string unknown_op = "UNKNOWN_OP"; + return unknown_op; + } + // static std::string op_type = ggml_op_name(m_node->op); + // return op_type; +} diff --git a/ggml/src/ggml-openvino/ggml-decoder.h b/ggml/src/ggml-openvino/ggml-decoder.h new file mode 100644 index 0000000000..3048e2e7e9 --- /dev/null +++ b/ggml/src/ggml-openvino/ggml-decoder.h @@ -0,0 +1,69 @@ +#pragma once + +#include "decoder.h" +#include "ggml.h" + +class GgmlOvDecoder : public ov::frontend::ggml::GgmlDecoder { +public: + using ov::frontend::ggml::GgmlDecoder::GgmlDecoder; + GgmlOvDecoder(struct ggml_tensor * node, struct ggml_cgraph * cgraph); + + virtual ov::Any get_attribute(const std::string& name) const override { + return nullptr; + GGML_UNUSED(name); + } + + virtual ov::PartialShape get_input_shape(size_t index) const override; + + virtual ov::element::Type get_input_type(size_t index) const override; + + virtual size_t get_input_size() const override; + + virtual void get_input_node(size_t input_port_idx, + std::string& producer_name, + std::string& producer_output_port_name, + size_t& producer_output_port_index) const override { + GGML_UNUSED(input_port_idx); + GGML_UNUSED(producer_name); + GGML_UNUSED(producer_output_port_name); + GGML_UNUSED(producer_output_port_index); + } + + virtual bool is_graph_input(size_t index) const override; + + virtual std::string& get_input_name(size_t index) const override; + + virtual ov::PartialShape get_output_shape(size_t index) const override; + + virtual ov::element::Type get_output_type(size_t index) const override; + + virtual size_t get_output_size() const override; + + virtual bool is_graph_output(size_t index) const override; + + virtual int32_t* get_output_op_params(size_t index) const override; + + virtual std::string& get_output_name(size_t index) const override; + + virtual const std::string& get_op_type() const override; + + virtual const std::string& get_op_name() const override; + + const ggml_tensor* get_input_ggml_tensor(size_t index) const { + return m_inputs[index]; + } + + // virtual const std::vector& outputs() const override; + + // virtual size_t output(size_t index) const override; + +private: + size_t m_index; + struct ggml_cgraph * m_cgraph; + std::vector m_inputs; + std::vector m_outputs; + ggml_tensor * m_node; + const std::string m_op_name; + mutable std::string m_name; +}; + diff --git a/ggml/src/ggml-openvino/ggml-graph-iterator.cpp b/ggml/src/ggml-openvino/ggml-graph-iterator.cpp new file mode 100644 index 0000000000..17a9b7ecfe --- /dev/null +++ b/ggml/src/ggml-openvino/ggml-graph-iterator.cpp @@ -0,0 +1,96 @@ +#include "ggml-graph-iterator.h" +#include +#include + +namespace ov { +namespace frontend { +namespace tensorflow { +namespace ggml { + +GgmlOvGraphIterator::GgmlOvGraphIterator(struct ggml_cgraph * cgraph) + :m_cgraph(cgraph) { + initialize_decoders(); + #ifdef GGML_OPENVINO_DEBUG + dump_graph_iterator(); + #endif +} + + void GgmlOvGraphIterator::initialize_decoders() { + auto nodes_size = m_cgraph->n_nodes; + // Initialize decoder for each node + // m_decoders.resize(static_cast(nodes_size)); + + for (int i = 0; i < nodes_size; ++i) { + // Skip View Op + if (m_cgraph->nodes[i] ->op == GGML_OP_VIEW || m_cgraph->nodes[i] ->op == GGML_OP_PERMUTE) { + continue; + } + auto decoder = std::make_shared(m_cgraph->nodes[i], m_cgraph); + m_decoders.push_back(decoder); + for (size_t inp = 0; inp < decoder->get_input_size(); ++inp) { + // if (i == 0 || decoder->is_graph_input(inp)) { + m_input_names.push_back(decoder->get_input_name(inp)); + // } + } + for (size_t inp = 0; inp < decoder->get_output_size(); ++inp) { + if (i == nodes_size - 1 || decoder->is_graph_output(inp)) { + m_output_names.push_back(decoder->get_output_name(inp)); + } + } + } + +} + +void GgmlOvGraphIterator::reset() { + node_index = 0; + } + +size_t GgmlOvGraphIterator::size() const { + return m_decoders.size(); +} + +void GgmlOvGraphIterator::next() { + node_index++; +} + +bool GgmlOvGraphIterator::is_end() const { + return node_index >= m_decoders.size(); +} + +std::shared_ptr GgmlOvGraphIterator::get_decoder() const { + return m_decoders[node_index]; +} + +std::vector GgmlOvGraphIterator::get_input_names() const { + return m_input_names; +} + +std::vector GgmlOvGraphIterator::get_output_names() const { + return m_output_names; +} + +void GgmlOvGraphIterator::dump_graph_iterator() const { + for (size_t i = 0; i < m_decoders.size(); ++i) { + GGML_LOG_INFO("OP %zu: %s\n", i, m_decoders[i]->get_op_name().c_str()); + for (size_t inp = 0; inp < m_decoders[i]->get_input_size(); ++inp) { + ov::PartialShape pshape = std::dynamic_pointer_cast(m_decoders[i])->get_input_shape(inp); + ov::element::Type ptype = std::dynamic_pointer_cast(m_decoders[i])->get_input_type(inp); + GGML_LOG_INFO("Input name: %s\n", std::dynamic_pointer_cast(m_decoders[i])->get_input_name(inp).c_str()); + GGML_LOG_INFO("Input shape: %s\n", pshape.to_string().c_str()); + GGML_LOG_INFO("Input type: %s\n", ptype.to_string().c_str()); + } + for (size_t outp = 0; outp < std::dynamic_pointer_cast(m_decoders[i])->get_output_size(); ++outp) { + ov::PartialShape pshape = std::dynamic_pointer_cast(m_decoders[i])->get_output_shape(outp); + ov::element::Type ptype = std::dynamic_pointer_cast(m_decoders[i])->get_output_type(outp); + GGML_LOG_INFO("Output name: %s\n", std::dynamic_pointer_cast(m_decoders[i])->get_output_name(outp).c_str()); + GGML_LOG_INFO("Output shape: %s\n", pshape.to_string().c_str()); + GGML_LOG_INFO("Output type: %s\n", ptype.to_string().c_str()); + + } + } +} + +} +} // namespace ggml +} // namespace frontend +} // namespace ov diff --git a/ggml/src/ggml-openvino/ggml-graph-iterator.h b/ggml/src/ggml-openvino/ggml-graph-iterator.h new file mode 100644 index 0000000000..305afb5c98 --- /dev/null +++ b/ggml/src/ggml-openvino/ggml-graph-iterator.h @@ -0,0 +1,61 @@ +#pragma once + +#include "graph_iterator.h" +#include "ggml-decoder.h" +#include + +// To remove tensorflow +namespace ov { +namespace frontend { +namespace tensorflow { +namespace ggml { + +class GgmlOvGraphIterator : public GgmlGraphIterator { + +protected: + void initialize_decoders(); + +public: + using Ptr = std::shared_ptr; + GgmlOvGraphIterator(struct ggml_cgraph * cgraph); + + /// \brief Get a number of operation nodes in the sgraph + virtual size_t size() const override; + + /// \brief Set iterator to the start position + virtual void reset() override; + + /// \brief Move to the next node in the graph + virtual void next() override; + + /// \brief Returns true if iterator goes out of the range of available nodes + virtual bool is_end() const override; + + /// \brief Return a pointer to a decoder of the current node + virtual std::shared_ptr get_decoder() const override; + + virtual std::shared_ptr get_body_graph_iterator(const std::string& func_name) const override { + return nullptr; + GGML_UNUSED(func_name); + } + + /// \brief Returns a vector of input names in the original order + virtual std::vector get_input_names() const override; + + /// \brief Returns a vector of output names in the original order + virtual std::vector get_output_names() const override; + + virtual void dump_graph_iterator() const; + +private: + struct ggml_cgraph * m_cgraph; + size_t node_index = 0; + std::vector> m_decoders; + std::vector m_input_names; + std::vector m_output_names; +}; + +} +} // namespace ggml +} // namespace frontend +} // namespace ov diff --git a/ggml/src/ggml-openvino/graph_iterator.h b/ggml/src/ggml-openvino/graph_iterator.h new file mode 100644 index 0000000000..e0b475e445 --- /dev/null +++ b/ggml/src/ggml-openvino/graph_iterator.h @@ -0,0 +1,43 @@ +#pragma once + +#include "openvino/frontend/graph_iterator.hpp" + +namespace ov { +namespace frontend { +namespace tensorflow { // To be Removed +namespace ggml { + +// TODO: Directly include from openvino +class GgmlGraphIterator : public GraphIterator { +public: + + virtual size_t size() const = 0; + + virtual void reset() = 0; + + virtual void next() = 0; + + virtual bool is_end() const = 0; + + virtual std::shared_ptr get_decoder() const = 0; + + virtual std::vector get_input_names() const = 0; + + virtual std::vector get_output_names() const = 0; + + virtual std::shared_ptr get_body_graph_iterator(const std::string& func_name) const = 0; + + virtual std::map get_input_names_map() const { + return {}; + } + + virtual std::map get_output_names_map() const { + return {}; + } + +}; + +} +} // namespace ggml +} // namespace frontend +} // namespace ov diff --git a/ggml/src/ggml-openvino/utils.cpp b/ggml/src/ggml-openvino/utils.cpp new file mode 100644 index 0000000000..905e2f4197 --- /dev/null +++ b/ggml/src/ggml-openvino/utils.cpp @@ -0,0 +1,108 @@ +#include "utils.h" +#include "ggml-backend-impl.h" +#include +#include + +using ov::frontend::tensorflow::ggml::GgmlOvGraphIterator; + +std::shared_ptr get_ggml_graph_iterator(struct ggml_cgraph * cgraph) { + return std::make_shared(cgraph); +} + +std::map get_ggml_graph_input_tensors(std::shared_ptr ggml_graph_iterator) { + std::map input_tensors; + auto input_names = ggml_graph_iterator->get_input_names(); + ggml_graph_iterator->reset(); + for (; !ggml_graph_iterator->is_end(); ggml_graph_iterator->next()) { + auto decoder = std::dynamic_pointer_cast(ggml_graph_iterator->get_decoder()); + for (size_t inp = 0; inp < decoder->get_input_size(); ++inp) { + if (std::find(input_names.begin(), input_names.end(), decoder->get_input_name(inp)) != input_names.end()) { + auto input_data = decoder->get_input_ggml_tensor(inp)->data; + ov::Tensor input_tensor = ov::Tensor(decoder->get_input_type(inp), decoder->get_input_shape(inp).to_shape(), input_data); + input_tensors[decoder->get_input_name(inp)] = input_tensor; + } + } + } + return input_tensors; +} + +static ov::frontend::FrontEnd::Ptr get_ggml_frontend() { + ov::frontend::FrontEnd::Ptr front_end = nullptr; + auto fem = ov::frontend::FrontEndManager(); + std::string fe_so_path; +#ifdef GGML_OV_FRONTEND + fe_so_path = GGML_OV_FRONTEND; +#endif + fem.register_front_end("ggml", fe_so_path); + front_end = fem.load_by_framework("ggml"); + return front_end; +} + +enum ggml_status openvino_frontend_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) { + ov::Core core; + auto devices = core.get_available_devices(); + // Get GGML Frontend + auto front_end = get_ggml_frontend(); + if (!front_end) { + GGML_LOG_ERROR("GGML FrontEnd is not initialized \n"); + return GGML_STATUS_FAILED; + } else { + #ifdef GGML_OPENVINO_DEBUG + GGML_LOG_INFO("GGML FrontEnd is initialized \n"); + #endif + } + + auto ggml_graph_iterator = get_ggml_graph_iterator(cgraph); + std::shared_ptr graph_iterator = ggml_graph_iterator; + + // Load GraphIterator -> InputModel + ov::frontend::InputModel::Ptr input_model = front_end->load(graph_iterator); + if (!input_model) { + GGML_LOG_ERROR("Input Model is not loaded \n"); + return GGML_STATUS_FAILED; + } else { + #ifdef GGML_OPENVINO_DEBUG + GGML_LOG_INFO("Input Model loaded \n"); + #endif + } + + // Convert InputModel -> ov::Model + std::shared_ptr model = front_end->convert(input_model); + if (!model) { + GGML_LOG_ERROR("Model is not converted \n"); + } else { + #ifdef GGML_OPENVINO_DEBUG + GGML_LOG_INFO("Model converted \n"); + #endif + } + + + // Loading a model to the device + ov::CompiledModel compiled_model = core.compile_model(model); + + // Create infer request + ov::InferRequest infer_request = compiled_model.create_infer_request(); + + // Get input tensor + auto input_names = ggml_graph_iterator->get_input_names(); + auto input_tensors = get_ggml_graph_input_tensors(ggml_graph_iterator); + + // Set input tensor + for (size_t i = 0; i < input_names.size(); i++) { + infer_request.set_input_tensor(i, input_tensors[input_names[i]]); + } + + infer_request.infer(); + + ov::Tensor output_tensor = infer_request.get_output_tensor(); + // Put data in output tensor to the last node -> data in cgraph + // Get output type + ggml_tensor* dst = cgraph->nodes[cgraph->n_nodes - 1]; + std::memcpy(dst->data, output_tensor.data(), output_tensor.get_byte_size()); + #ifdef GGML_OPENVINO_DEBUG + GGML_LOG_INFO("Output: %f\n", *output_tensor.data()); + #endif + + return GGML_STATUS_SUCCESS; + GGML_UNUSED(backend); +} diff --git a/ggml/src/ggml-openvino/utils.h b/ggml/src/ggml-openvino/utils.h new file mode 100644 index 0000000000..15dd46ed4e --- /dev/null +++ b/ggml/src/ggml-openvino/utils.h @@ -0,0 +1,6 @@ +#include "ggml-graph-iterator.h" +#include "ggml-backend-impl.h" + +std::shared_ptr get_ggml_graph_iterator(struct ggml_cgraph * cgraph); + +enum ggml_status openvino_frontend_compute (ggml_backend_t backend, struct ggml_cgraph * cgraph);