add OpenVINO frontend convert process steps

2024-12-04 14:09:13 +08:00 · 2024-12-04 14:09:13 +08:00 · 77d68146a8
parent 0a81aa19f7
commit 77d68146a8
10 changed files with 698 additions and 25 deletions
--- a/ggml/src/ggml-openvino.cpp
+++ b/ggml/src/ggml-openvino.cpp
@ -1,6 +1,7 @@
 #include "ggml-openvino.h"
 #include "ggml-backend-impl.h"
 #include "ggml-impl.h"
+#include "ggml-openvino/utils.h"

 #include <string>
 #include <mutex>
@ -234,33 +235,35 @@ static void ggml_backend_openvino_mul(ggml_tensor * dst) {
 }

 static enum ggml_status ggml_backend_openvino_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
-    for (int i = 0; i < cgraph->n_nodes; i++) {
-        struct ggml_tensor * node = cgraph->nodes[i];
+    // for (int i = 0; i < cgraph->n_nodes; i++) {
+    //     struct ggml_tensor * node = cgraph->nodes[i];

-        if (node->op == GGML_OP_NONE || ggml_is_empty(node)) {
-            return GGML_STATUS_SUCCESS;
-        }
+    //     if (node->op == GGML_OP_NONE || ggml_is_empty(node)) {
+    //         return GGML_STATUS_SUCCESS;
+    //     }

-        switch (node->op) {
-            case GGML_OP_PERMUTE:
-            case GGML_OP_RESHAPE:
-            case GGML_OP_TRANSPOSE:
-            case GGML_OP_VIEW:
-                break;
-            case GGML_OP_ADD:
-                {
-                    ggml_backend_openvino_add(node);
-                } break;
-            case GGML_OP_MUL:
-                {
-                    ggml_backend_openvino_mul(node);
-                } break;
-            case GGML_OP_MUL_MAT:
-                break;
-            default:
-                GGML_ABORT("%s: unsupported op %s\n", __func__, ggml_op_desc(node));
-        }
-    }
+    //     switch (node->op) {
+    //         case GGML_OP_PERMUTE:
+    //         case GGML_OP_RESHAPE:
+    //         case GGML_OP_TRANSPOSE:
+    //         case GGML_OP_VIEW:
+    //             break;
+    //         case GGML_OP_ADD:
+    //             {
+    //                 ggml_backend_openvino_add(node);
+    //             } break;
+    //         case GGML_OP_MUL:
+    //             {
+    //                 ggml_backend_openvino_mul(node);
+    //             } break;
+    //         case GGML_OP_MUL_MAT:
+    //             break;
+    //         default:
+    //             GGML_ABORT("%s: unsupported op %s\n", __func__, ggml_op_desc(node));
+    //     }
+    // }
+
+    openvino_frontend_compute(backend, cgraph);

    return GGML_STATUS_SUCCESS;

--- a/ggml/src/ggml-openvino/README.md
+++ b/ggml/src/ggml-openvino/README.md
@ -0,0 +1,30 @@
+# Instructions to Modify and Build ggml with OpenVINO
+
+## Step 1: Modify the Source Code
+
+In order to change the frontend `.so` path to the path to `.so` file, you need to add path to the `.so` file in cmake compiler option:
+1. Open a terminal and navigate to the root directory of this repo.
+2. Run the following commands to configure:
+   ```sh
+   mkdir build
+   cmake -B build -DGGML_OV_FRONTEND="${openvino_repo_dir}/bin/intel64/Release/libopenvino_ggml_frontend.so"
+   ```
+Where GGML_OV_FRONTEND should point to the path to `libopenvino_ggml_frontend.so` file.
+
+## Step 2: Build the Project
+
+After modifying the source code, you need to build the project using CMake. Follow these steps:
+
+1. (Optional) Enable debug option for ggml-openvino, this will output dump of subgraph sent to OpenVINO, information after convert ggml_cgraph to GraphIterator, and calculation input value/output value of each OP:
+   ```sh
+    cmake -B build -DGGML_OPENVINO_DEBUG=ON
+    ```
+
+2. Run the following commands to configure and build the project:
+   ```sh
+   cmake -B build -DGGML_OPENVINO=ON
+   cmake --build build -j
+   ```
+
+This will configure the project with OpenVINO support and build it using multiple cores for faster compilation.
+
--- a/ggml/src/ggml-openvino/decoder.h
+++ b/ggml/src/ggml-openvino/decoder.h
@ -0,0 +1,54 @@
+#pragma once
+
+#include "openvino/core/node.hpp"
+#include "openvino/frontend/decoder.hpp"
+
+namespace ov {
+namespace frontend {
+namespace ggml {
+
+// TODO: Directly include from openvino
+class GgmlDecoder : public DecoderBase {
+public:
+    virtual ov::Any get_attribute(const std::string& name) const = 0;
+
+    virtual PartialShape get_input_shape(size_t index) const = 0;
+
+    virtual element::Type get_input_type(size_t index) const = 0;
+
+    virtual size_t get_input_size() const = 0;
+
+    virtual void get_input_node(size_t input_port_idx,
+                                std::string& producer_name,
+                                std::string& producer_output_port_name,
+                                size_t& producer_output_port_index) const = 0;
+
+    virtual bool is_graph_input(size_t index) const = 0;
+
+    virtual std::string& get_input_name(size_t index) const = 0;
+
+    virtual PartialShape get_output_shape(size_t index) const = 0;
+
+    virtual element::Type get_output_type(size_t index) const = 0;
+
+    virtual size_t get_output_size() const = 0;
+
+    virtual bool is_graph_output(size_t index) const = 0;
+
+    virtual int32_t* get_output_op_params(size_t index) const = 0;
+
+    virtual std::string& get_output_name(size_t index) const = 0;
+
+    virtual const std::string& get_op_type() const = 0;
+
+    virtual const std::string& get_op_name() const = 0;
+
+    // virtual const std::vector<size_t>& outputs() const = 0;
+
+    // virtual size_t output(size_t index) const = 0;
+
+};
+
+}  // namespace ggml
+}  // namespace frontend
+}  // namespace ov
--- a/ggml/src/ggml-openvino/ggml-decoder.cpp
+++ b/ggml/src/ggml-openvino/ggml-decoder.cpp
@ -0,0 +1,203 @@
+#include "ggml-decoder.h"
+#include <ggml.h>
+#include <ggml-impl.h>
+
+GgmlOvDecoder::GgmlOvDecoder(struct ggml_tensor * node, struct ggml_cgraph * cgraph)
+    :m_cgraph(cgraph),
+     m_node(node),
+     m_op_name(std::string(m_node->name)) {
+    switch (m_node->op) {
+        // Unary OPs 
+        case GGML_OP_UNARY:
+        case GGML_OP_RESHAPE:
+        case GGML_OP_VIEW:
+        {
+            m_inputs.push_back(m_node->src[0]);
+            m_outputs.push_back(m_node);
+            #ifdef GGML_OPENVINO_DEBUG
+                GGML_LOG_INFO("Decoder input 0: %f \n", *(float*)(m_node->src[0]->data));
+            #endif
+            break;
+        }
+        // SCALE
+        case GGML_OP_SCALE:
+        {
+            m_inputs.push_back(m_node->src[0]);
+            m_outputs.push_back(m_node);
+            #ifdef GGML_OPENVINO_DEBUG
+                float v;
+                memcpy(&v, m_node->op_params, sizeof(float));
+                GGML_LOG_INFO("Decoder input 0: %f \n", *(float*)(m_node->src[0]->data));
+                GGML_LOG_INFO("Scale: %f \n", v);
+            #endif
+            break;
+        }
+        // OPs with 2 inputs
+        case GGML_OP_ADD:
+        case GGML_OP_DIV:
+        case GGML_OP_MUL:
+        case GGML_OP_MUL_MAT:
+        case GGML_OP_SUB:        
+        case GGML_OP_GET_ROWS:
+        {
+            m_inputs.push_back(m_node->src[0]);
+            m_inputs.push_back(m_node->src[1]);
+            m_outputs.push_back(m_node);
+            #ifdef GGML_OPENVINO_DEBUG
+                GGML_LOG_INFO("Decoder input 0: %f \n", *(float*)(m_node->src[0]->data));
+                GGML_LOG_INFO("Decoder input 1: %f \n", *(float*)(m_node->src[1]->data));
+            #endif
+            break;
+        } 
+        default:
+            break;
+    }
+}
+
+ov::PartialShape GgmlOvDecoder::get_input_shape(size_t index) const {
+    ov::PartialShape input_shape;
+    // Use input_node->ne 
+    ggml_tensor * node = m_inputs[index];
+    std::vector<size_t> shape;
+    // GGML_MAX_DIMS
+    // for (int i = 0; i < GGML_MAX_DIMS; ++i) {
+    for (int i = GGML_MAX_DIMS - 2; i >= 0 ; --i) {
+        if (node->ne[i] == 0) {
+            return input_shape;
+        }
+        shape.push_back(static_cast<size_t>(node->ne[i]));
+    }
+    input_shape = ov::PartialShape(shape);
+    return input_shape;
+}
+
+ov::element::Type GgmlOvDecoder::get_input_type(size_t index) const {
+    ov::element::Type type = ov::element::dynamic;
+    // GGML_LOG_DEBUG("%d\n", m_inputs[index]->type);
+    switch (m_inputs[index]->type) {
+        case GGML_TYPE_F32:
+            type = ov::element::f32;
+            break;
+        case GGML_TYPE_F16:
+            type = ov::element::f16;
+            break;
+        case GGML_TYPE_I64:
+            type = ov::element::i64;
+            break;
+        case GGML_TYPE_I32:
+            type = ov::element::i32;
+            break;
+        default:
+            break;
+    }
+    return type;
+}
+
+size_t GgmlOvDecoder::get_input_size() const {
+    return m_inputs.size();
+}
+
+bool GgmlOvDecoder::is_graph_input(size_t index) const {
+    if (m_inputs[index]->flags & GGML_TENSOR_FLAG_INPUT ) {
+        return true;
+    }
+    return false;
+}
+
+std::string& GgmlOvDecoder::get_input_name(size_t index) const {
+    m_name = std::string(m_inputs[index]->name);
+    return m_name;
+}
+
+ov::PartialShape GgmlOvDecoder::get_output_shape(size_t index) const {
+    ov::PartialShape output_shape;
+    // Use input_node->ne 
+    ggml_tensor * node = m_outputs[index];
+    std::vector<size_t> shape;
+    // GGML_MAX_DIMS
+    // for (int i = 0; i < GGML_MAX_DIMS; ++i) {
+    for (int i = GGML_MAX_DIMS - 2; i >= 0 ; --i) {
+        if (node->ne[i] == 0 ) {
+            // empty if any dimension has no elements
+            return output_shape;
+        }
+        shape.push_back(static_cast<size_t>(node->ne[i]));
+    }
+    output_shape = ov::PartialShape(shape);
+    return output_shape;
+}
+
+ov::element::Type GgmlOvDecoder::get_output_type(size_t index) const {
+    // TODO: Change to Output
+    ov::element::Type type = ov::element::dynamic;
+    // GGML_LOG_DEBUG("%d\n", m_outputs[index]->type);
+    switch (m_outputs[index]->type) {
+        case GGML_TYPE_F32:
+            type = ov::element::f32;
+            break;
+        case GGML_TYPE_F16:
+            type = ov::element::f16;
+            break;
+        case GGML_TYPE_I64:
+            type = ov::element::i64;
+            break;
+        case GGML_TYPE_I32:
+            type = ov::element::i32;
+            break;
+        default:
+            break;
+    }
+    return type;
+}
+
+bool GgmlOvDecoder::is_graph_output(size_t index) const {
+    if (m_outputs[index]->flags & GGML_TENSOR_FLAG_OUTPUT) {
+        return true;
+    }
+    return false;
+}
+
+int32_t* GgmlOvDecoder::get_output_op_params(size_t index) const{
+    return m_outputs[index]->op_params;
+}
+
+size_t GgmlOvDecoder::get_output_size() const {
+    return m_outputs.size();
+}
+
+std::string& GgmlOvDecoder::get_output_name(size_t index) const {
+    m_name = std::string(m_outputs[index]->name);
+    return m_name;
+}
+
+const std::string& GgmlOvDecoder::get_op_name() const {
+    return m_op_name;
+}
+
+const std::string& GgmlOvDecoder::get_op_type() const {
+    static const std::map<ggml_op, std::string> opTypeMap = {
+        {GGML_OP_ACC, "GGML_OP_ACC"},
+        {GGML_OP_ADD, "GGML_OP_ADD"},
+        {GGML_OP_ADD1, "GGML_OP_ADD1"},
+        {GGML_OP_DIV, "GGML_OP_DIV"},
+        {GGML_OP_DUP, "GGML_OP_DUP"},
+        {GGML_OP_GET_ROWS, "GGML_OP_GET_ROWS"},
+        {GGML_OP_MUL, "GGML_OP_MUL"},
+        {GGML_OP_MUL_MAT, "GGML_OP_MUL_MAT"},
+        {GGML_OP_PERMUTE, "GGML_OP_PERMUTE"},
+        {GGML_OP_RESHAPE, "GGML_OP_RESHAPE"},
+        {GGML_OP_SCALE, "GGML_OP_SCALE"},
+        {GGML_OP_SUB, "GGML_OP_SUB"},
+        {GGML_OP_UNARY, "GGML_OP_UNARY"},
+        {GGML_OP_VIEW, "GGML_OP_VIEW"}
+    };
+    auto it = opTypeMap.find(m_node->op);
+    if (it != opTypeMap.end()) {
+        return it->second;
+    } else {
+        static const std::string unknown_op = "UNKNOWN_OP";
+        return unknown_op;
+    }
+    // static std::string op_type = ggml_op_name(m_node->op);
+    // return op_type;
+}
--- a/ggml/src/ggml-openvino/ggml-decoder.h
+++ b/ggml/src/ggml-openvino/ggml-decoder.h
@ -0,0 +1,69 @@
+#pragma once
+
+#include "decoder.h"
+#include "ggml.h"
+
+class GgmlOvDecoder : public ov::frontend::ggml::GgmlDecoder {
+public:
+    using ov::frontend::ggml::GgmlDecoder::GgmlDecoder;
+    GgmlOvDecoder(struct ggml_tensor * node, struct ggml_cgraph * cgraph);
+
+    virtual ov::Any get_attribute(const std::string& name) const override {
+        return nullptr;
+        GGML_UNUSED(name);
+    }
+
+    virtual ov::PartialShape get_input_shape(size_t index) const override;
+
+    virtual ov::element::Type get_input_type(size_t index) const override;
+
+    virtual size_t get_input_size() const override;
+
+    virtual void get_input_node(size_t input_port_idx,
+                                std::string& producer_name,
+                                std::string& producer_output_port_name,
+                                size_t& producer_output_port_index) const override {
+        GGML_UNUSED(input_port_idx);
+        GGML_UNUSED(producer_name);
+        GGML_UNUSED(producer_output_port_name);
+        GGML_UNUSED(producer_output_port_index);
+    }
+
+    virtual bool is_graph_input(size_t index) const override;
+
+    virtual std::string& get_input_name(size_t index) const override;
+
+    virtual ov::PartialShape get_output_shape(size_t index) const override;
+
+    virtual ov::element::Type get_output_type(size_t index) const override;
+
+    virtual size_t get_output_size() const override; 
+
+    virtual bool is_graph_output(size_t index) const override;
+
+    virtual int32_t* get_output_op_params(size_t index) const override;
+
+    virtual std::string& get_output_name(size_t index) const override;
+
+    virtual const std::string& get_op_type() const override;
+
+    virtual const std::string& get_op_name() const override;
+
+    const ggml_tensor* get_input_ggml_tensor(size_t index) const {
+        return m_inputs[index];
+    }
+
+    // virtual const std::vector<size_t>& outputs() const override;
+
+    // virtual size_t output(size_t index) const override;
+
+private:
+    size_t m_index;
+    struct ggml_cgraph * m_cgraph;
+    std::vector<ggml_tensor *> m_inputs;
+    std::vector<ggml_tensor *> m_outputs;
+    ggml_tensor * m_node;
+    const std::string m_op_name;
+    mutable std::string m_name;
+};
+
--- a/ggml/src/ggml-openvino/ggml-graph-iterator.cpp
+++ b/ggml/src/ggml-openvino/ggml-graph-iterator.cpp
@ -0,0 +1,96 @@
+#include "ggml-graph-iterator.h"
+#include <ggml.h>
+#include <ggml-impl.h>
+
+namespace ov {
+namespace frontend {
+namespace tensorflow { 
+namespace ggml {
+
+GgmlOvGraphIterator::GgmlOvGraphIterator(struct ggml_cgraph * cgraph) 
+    :m_cgraph(cgraph) {
+    initialize_decoders(); 
+    #ifdef GGML_OPENVINO_DEBUG   
+        dump_graph_iterator();
+    #endif
+}
+
+ void GgmlOvGraphIterator::initialize_decoders() {
+    auto nodes_size = m_cgraph->n_nodes;
+    // Initialize decoder for each node
+    // m_decoders.resize(static_cast<size_t>(nodes_size));
+
+    for (int i = 0; i < nodes_size; ++i) {
+        // Skip View Op
+        if (m_cgraph->nodes[i] ->op == GGML_OP_VIEW || m_cgraph->nodes[i] ->op == GGML_OP_PERMUTE) {
+            continue;
+        }
+        auto decoder = std::make_shared<GgmlOvDecoder>(m_cgraph->nodes[i], m_cgraph);
+        m_decoders.push_back(decoder);
+        for (size_t inp = 0; inp < decoder->get_input_size(); ++inp) {
+            // if (i == 0 || decoder->is_graph_input(inp)) {
+                m_input_names.push_back(decoder->get_input_name(inp));
+            // }
+        }
+        for (size_t inp = 0; inp < decoder->get_output_size(); ++inp) {
+            if (i == nodes_size - 1 || decoder->is_graph_output(inp)) {
+                m_output_names.push_back(decoder->get_output_name(inp));
+            }
+        }
+    }
+
+}
+
+void GgmlOvGraphIterator::reset() {
+        node_index = 0;
+    }
+
+size_t GgmlOvGraphIterator::size() const  {
+    return m_decoders.size();
+}
+
+void GgmlOvGraphIterator::next()  {
+    node_index++;
+}
+
+bool GgmlOvGraphIterator::is_end() const {
+    return node_index >= m_decoders.size();
+}
+
+std::shared_ptr<DecoderBase> GgmlOvGraphIterator::get_decoder() const {
+    return m_decoders[node_index];
+}
+
+std::vector<std::string> GgmlOvGraphIterator::get_input_names() const {
+    return m_input_names;
+}
+
+std::vector<std::string> GgmlOvGraphIterator::get_output_names() const {
+    return m_output_names;
+}
+
+void GgmlOvGraphIterator::dump_graph_iterator() const {
+    for (size_t i = 0; i < m_decoders.size(); ++i) {
+        GGML_LOG_INFO("OP %zu: %s\n", i, m_decoders[i]->get_op_name().c_str());
+        for (size_t inp = 0; inp < m_decoders[i]->get_input_size(); ++inp) {
+            ov::PartialShape pshape = std::dynamic_pointer_cast<GgmlOvDecoder>(m_decoders[i])->get_input_shape(inp);
+            ov::element::Type ptype = std::dynamic_pointer_cast<GgmlOvDecoder>(m_decoders[i])->get_input_type(inp); 
+            GGML_LOG_INFO("Input name: %s\n", std::dynamic_pointer_cast<GgmlOvDecoder>(m_decoders[i])->get_input_name(inp).c_str());
+            GGML_LOG_INFO("Input shape: %s\n", pshape.to_string().c_str());
+            GGML_LOG_INFO("Input type: %s\n", ptype.to_string().c_str());
+        }
+        for (size_t outp = 0; outp < std::dynamic_pointer_cast<GgmlOvDecoder>(m_decoders[i])->get_output_size(); ++outp) {
+            ov::PartialShape pshape = std::dynamic_pointer_cast<GgmlOvDecoder>(m_decoders[i])->get_output_shape(outp);
+            ov::element::Type ptype = std::dynamic_pointer_cast<GgmlOvDecoder>(m_decoders[i])->get_output_type(outp); 
+            GGML_LOG_INFO("Output name: %s\n", std::dynamic_pointer_cast<GgmlOvDecoder>(m_decoders[i])->get_output_name(outp).c_str());
+            GGML_LOG_INFO("Output shape: %s\n", pshape.to_string().c_str());
+            GGML_LOG_INFO("Output type: %s\n", ptype.to_string().c_str());
+            
+        }   
+    }
+}
+    
+}
+}  // namespace ggml
+}  // namespace frontend
+}  // namespace ov
--- a/ggml/src/ggml-openvino/ggml-graph-iterator.h
+++ b/ggml/src/ggml-openvino/ggml-graph-iterator.h
@ -0,0 +1,61 @@
+#pragma once
+
+#include "graph_iterator.h"
+#include "ggml-decoder.h"
+#include <ggml-impl.h>
+
+// To remove tensorflow
+namespace ov {
+namespace frontend {
+namespace tensorflow { 
+namespace ggml {
+
+class GgmlOvGraphIterator : public GgmlGraphIterator {
+
+protected: 
+    void initialize_decoders();
+
+public:
+    using Ptr = std::shared_ptr<GgmlOvGraphIterator>;
+    GgmlOvGraphIterator(struct ggml_cgraph * cgraph);
+
+    /// \brief Get a number of operation nodes in the sgraph
+    virtual size_t size() const override;
+
+    /// \brief Set iterator to the start position
+    virtual void reset() override;
+
+    /// \brief Move to the next node in the graph
+    virtual void next() override;
+
+    /// \brief Returns true if iterator goes out of the range of available nodes
+    virtual bool is_end() const override;
+
+    /// \brief Return a pointer to a decoder of the current node
+    virtual std::shared_ptr<DecoderBase> get_decoder() const override;
+
+    virtual std::shared_ptr<GraphIterator> get_body_graph_iterator(const std::string& func_name) const override {
+        return nullptr;
+        GGML_UNUSED(func_name);
+    }
+
+    /// \brief Returns a vector of input names in the original order
+    virtual std::vector<std::string> get_input_names() const override;
+
+    /// \brief Returns a vector of output names in the original order
+    virtual std::vector<std::string> get_output_names() const override;
+
+    virtual void dump_graph_iterator() const;
+
+private:
+    struct ggml_cgraph * m_cgraph;
+    size_t node_index = 0;
+    std::vector<std::shared_ptr<DecoderBase>> m_decoders;
+    std::vector<std::string> m_input_names;
+    std::vector<std::string> m_output_names;
+};
+
+}
+}  // namespace ggml
+}  // namespace frontend
+}  // namespace ov
--- a/ggml/src/ggml-openvino/graph_iterator.h
+++ b/ggml/src/ggml-openvino/graph_iterator.h
@ -0,0 +1,43 @@
+#pragma once
+
+#include "openvino/frontend/graph_iterator.hpp"
+
+namespace ov {
+namespace frontend {
+namespace tensorflow { // To be Removed
+namespace ggml {
+
+// TODO: Directly include from openvino
+class GgmlGraphIterator : public GraphIterator {
+public:
+
+    virtual size_t size() const = 0;
+
+    virtual void reset() = 0;
+
+    virtual void next() = 0;
+
+    virtual bool is_end() const = 0;
+
+    virtual std::shared_ptr<DecoderBase> get_decoder() const = 0;
+
+    virtual std::vector<std::string> get_input_names() const = 0;
+
+    virtual std::vector<std::string> get_output_names() const = 0;
+
+    virtual std::shared_ptr<GraphIterator> get_body_graph_iterator(const std::string& func_name) const = 0;
+
+    virtual std::map<std::string, std::string> get_input_names_map() const {
+        return {};
+    }
+
+    virtual std::map<std::string, std::string> get_output_names_map() const {
+        return {};
+    }
+    
+};
+
+}
+}  // namespace ggml
+}  // namespace frontend
+}  // namespace ov
--- a/ggml/src/ggml-openvino/utils.cpp
+++ b/ggml/src/ggml-openvino/utils.cpp
@ -0,0 +1,108 @@
+#include "utils.h"
+#include "ggml-backend-impl.h"
+#include <openvino/frontend/manager.hpp>
+#include <openvino/openvino.hpp>
+
+using ov::frontend::tensorflow::ggml::GgmlOvGraphIterator;
+
+std::shared_ptr<GgmlOvGraphIterator> get_ggml_graph_iterator(struct ggml_cgraph * cgraph) {
+    return std::make_shared<GgmlOvGraphIterator>(cgraph);
+}
+
+std::map<std::string, ov::Tensor> get_ggml_graph_input_tensors(std::shared_ptr<GgmlOvGraphIterator> ggml_graph_iterator) {
+    std::map<std::string, ov::Tensor> input_tensors;
+    auto input_names = ggml_graph_iterator->get_input_names();
+    ggml_graph_iterator->reset();
+    for (; !ggml_graph_iterator->is_end(); ggml_graph_iterator->next()) {
+        auto decoder = std::dynamic_pointer_cast<GgmlOvDecoder>(ggml_graph_iterator->get_decoder()); 
+        for (size_t inp = 0; inp < decoder->get_input_size(); ++inp) {
+            if (std::find(input_names.begin(), input_names.end(), decoder->get_input_name(inp)) != input_names.end()) {
+                auto input_data = decoder->get_input_ggml_tensor(inp)->data;
+                ov::Tensor input_tensor = ov::Tensor(decoder->get_input_type(inp), decoder->get_input_shape(inp).to_shape(), input_data);
+                input_tensors[decoder->get_input_name(inp)] = input_tensor;
+            }
+        }
+    }
+    return input_tensors;
+}
+
+static ov::frontend::FrontEnd::Ptr get_ggml_frontend() {
+    ov::frontend::FrontEnd::Ptr front_end = nullptr;
+    auto fem = ov::frontend::FrontEndManager();
+    std::string fe_so_path;
+#ifdef GGML_OV_FRONTEND
+    fe_so_path = GGML_OV_FRONTEND;
+#endif
+    fem.register_front_end("ggml", fe_so_path);
+    front_end = fem.load_by_framework("ggml");
+    return front_end;
+}
+
+enum ggml_status openvino_frontend_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
+    ov::Core core;
+    auto devices = core.get_available_devices();
+    // Get GGML Frontend 
+    auto front_end = get_ggml_frontend();
+    if (!front_end) {
+        GGML_LOG_ERROR("GGML FrontEnd is not initialized \n");
+        return GGML_STATUS_FAILED;
+    } else {
+        #ifdef GGML_OPENVINO_DEBUG
+            GGML_LOG_INFO("GGML FrontEnd is initialized \n");
+        #endif
+    }
+
+    auto ggml_graph_iterator = get_ggml_graph_iterator(cgraph);
+    std::shared_ptr<ov::frontend::tensorflow::GraphIterator> graph_iterator = ggml_graph_iterator;
+    
+    // Load GraphIterator -> InputModel
+    ov::frontend::InputModel::Ptr input_model = front_end->load(graph_iterator);
+    if (!input_model) {
+        GGML_LOG_ERROR("Input Model is not loaded \n");
+        return GGML_STATUS_FAILED;
+    } else {
+        #ifdef GGML_OPENVINO_DEBUG
+            GGML_LOG_INFO("Input Model loaded \n");
+        #endif
+    }
+
+    // Convert InputModel -> ov::Model 
+    std::shared_ptr<ov::Model> model = front_end->convert(input_model);
+    if (!model) {
+        GGML_LOG_ERROR("Model is not converted \n");
+    } else {
+        #ifdef GGML_OPENVINO_DEBUG
+            GGML_LOG_INFO("Model converted \n");
+        #endif
+    }
+
+
+    //  Loading a model to the device
+    ov::CompiledModel compiled_model = core.compile_model(model);
+
+    // Create infer request
+    ov::InferRequest infer_request = compiled_model.create_infer_request();
+
+    // Get input tensor
+    auto input_names = ggml_graph_iterator->get_input_names();
+    auto input_tensors = get_ggml_graph_input_tensors(ggml_graph_iterator);
+
+    // Set input tensor
+    for (size_t i = 0; i < input_names.size(); i++) {
+        infer_request.set_input_tensor(i, input_tensors[input_names[i]]);
+    }
+
+    infer_request.infer();
+
+    ov::Tensor output_tensor = infer_request.get_output_tensor();
+    // Put data in output tensor to the last node -> data in cgraph
+    // Get output type
+    ggml_tensor* dst = cgraph->nodes[cgraph->n_nodes - 1];
+    std::memcpy(dst->data, output_tensor.data(), output_tensor.get_byte_size());
+    #ifdef GGML_OPENVINO_DEBUG
+        GGML_LOG_INFO("Output: %f\n", *output_tensor.data<float>());
+    #endif
+    
+    return GGML_STATUS_SUCCESS;
+    GGML_UNUSED(backend);
+}
--- a/ggml/src/ggml-openvino/utils.h
+++ b/ggml/src/ggml-openvino/utils.h
@ -0,0 +1,6 @@
+#include "ggml-graph-iterator.h"
+#include "ggml-backend-impl.h"
+
+std::shared_ptr<ov::frontend::tensorflow::ggml::GgmlOvGraphIterator> get_ggml_graph_iterator(struct ggml_cgraph * cgraph);
+
+enum ggml_status openvino_frontend_compute (ggml_backend_t backend, struct ggml_cgraph * cgraph);