add OpenVINO frontend convert process steps
This commit is contained in:
parent
0a81aa19f7
commit
77d68146a8
|
|
@ -1,6 +1,7 @@
|
|||
#include "ggml-openvino.h"
|
||||
#include "ggml-backend-impl.h"
|
||||
#include "ggml-impl.h"
|
||||
#include "ggml-openvino/utils.h"
|
||||
|
||||
#include <string>
|
||||
#include <mutex>
|
||||
|
|
@ -234,33 +235,35 @@ static void ggml_backend_openvino_mul(ggml_tensor * dst) {
|
|||
}
|
||||
|
||||
static enum ggml_status ggml_backend_openvino_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
|
||||
for (int i = 0; i < cgraph->n_nodes; i++) {
|
||||
struct ggml_tensor * node = cgraph->nodes[i];
|
||||
// for (int i = 0; i < cgraph->n_nodes; i++) {
|
||||
// struct ggml_tensor * node = cgraph->nodes[i];
|
||||
|
||||
if (node->op == GGML_OP_NONE || ggml_is_empty(node)) {
|
||||
return GGML_STATUS_SUCCESS;
|
||||
}
|
||||
// if (node->op == GGML_OP_NONE || ggml_is_empty(node)) {
|
||||
// return GGML_STATUS_SUCCESS;
|
||||
// }
|
||||
|
||||
switch (node->op) {
|
||||
case GGML_OP_PERMUTE:
|
||||
case GGML_OP_RESHAPE:
|
||||
case GGML_OP_TRANSPOSE:
|
||||
case GGML_OP_VIEW:
|
||||
break;
|
||||
case GGML_OP_ADD:
|
||||
{
|
||||
ggml_backend_openvino_add(node);
|
||||
} break;
|
||||
case GGML_OP_MUL:
|
||||
{
|
||||
ggml_backend_openvino_mul(node);
|
||||
} break;
|
||||
case GGML_OP_MUL_MAT:
|
||||
break;
|
||||
default:
|
||||
GGML_ABORT("%s: unsupported op %s\n", __func__, ggml_op_desc(node));
|
||||
}
|
||||
}
|
||||
// switch (node->op) {
|
||||
// case GGML_OP_PERMUTE:
|
||||
// case GGML_OP_RESHAPE:
|
||||
// case GGML_OP_TRANSPOSE:
|
||||
// case GGML_OP_VIEW:
|
||||
// break;
|
||||
// case GGML_OP_ADD:
|
||||
// {
|
||||
// ggml_backend_openvino_add(node);
|
||||
// } break;
|
||||
// case GGML_OP_MUL:
|
||||
// {
|
||||
// ggml_backend_openvino_mul(node);
|
||||
// } break;
|
||||
// case GGML_OP_MUL_MAT:
|
||||
// break;
|
||||
// default:
|
||||
// GGML_ABORT("%s: unsupported op %s\n", __func__, ggml_op_desc(node));
|
||||
// }
|
||||
// }
|
||||
|
||||
openvino_frontend_compute(backend, cgraph);
|
||||
|
||||
return GGML_STATUS_SUCCESS;
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,30 @@
|
|||
# Instructions to Modify and Build ggml with OpenVINO
|
||||
|
||||
## Step 1: Modify the Source Code
|
||||
|
||||
In order to change the frontend `.so` path to the path to `.so` file, you need to add path to the `.so` file in cmake compiler option:
|
||||
1. Open a terminal and navigate to the root directory of this repo.
|
||||
2. Run the following commands to configure:
|
||||
```sh
|
||||
mkdir build
|
||||
cmake -B build -DGGML_OV_FRONTEND="${openvino_repo_dir}/bin/intel64/Release/libopenvino_ggml_frontend.so"
|
||||
```
|
||||
Where GGML_OV_FRONTEND should point to the path to `libopenvino_ggml_frontend.so` file.
|
||||
|
||||
## Step 2: Build the Project
|
||||
|
||||
After modifying the source code, you need to build the project using CMake. Follow these steps:
|
||||
|
||||
1. (Optional) Enable debug option for ggml-openvino, this will output dump of subgraph sent to OpenVINO, information after convert ggml_cgraph to GraphIterator, and calculation input value/output value of each OP:
|
||||
```sh
|
||||
cmake -B build -DGGML_OPENVINO_DEBUG=ON
|
||||
```
|
||||
|
||||
2. Run the following commands to configure and build the project:
|
||||
```sh
|
||||
cmake -B build -DGGML_OPENVINO=ON
|
||||
cmake --build build -j
|
||||
```
|
||||
|
||||
This will configure the project with OpenVINO support and build it using multiple cores for faster compilation.
|
||||
|
||||
|
|
@ -0,0 +1,54 @@
|
|||
#pragma once
|
||||
|
||||
#include "openvino/core/node.hpp"
|
||||
#include "openvino/frontend/decoder.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace frontend {
|
||||
namespace ggml {
|
||||
|
||||
// TODO: Directly include from openvino
|
||||
class GgmlDecoder : public DecoderBase {
|
||||
public:
|
||||
virtual ov::Any get_attribute(const std::string& name) const = 0;
|
||||
|
||||
virtual PartialShape get_input_shape(size_t index) const = 0;
|
||||
|
||||
virtual element::Type get_input_type(size_t index) const = 0;
|
||||
|
||||
virtual size_t get_input_size() const = 0;
|
||||
|
||||
virtual void get_input_node(size_t input_port_idx,
|
||||
std::string& producer_name,
|
||||
std::string& producer_output_port_name,
|
||||
size_t& producer_output_port_index) const = 0;
|
||||
|
||||
virtual bool is_graph_input(size_t index) const = 0;
|
||||
|
||||
virtual std::string& get_input_name(size_t index) const = 0;
|
||||
|
||||
virtual PartialShape get_output_shape(size_t index) const = 0;
|
||||
|
||||
virtual element::Type get_output_type(size_t index) const = 0;
|
||||
|
||||
virtual size_t get_output_size() const = 0;
|
||||
|
||||
virtual bool is_graph_output(size_t index) const = 0;
|
||||
|
||||
virtual int32_t* get_output_op_params(size_t index) const = 0;
|
||||
|
||||
virtual std::string& get_output_name(size_t index) const = 0;
|
||||
|
||||
virtual const std::string& get_op_type() const = 0;
|
||||
|
||||
virtual const std::string& get_op_name() const = 0;
|
||||
|
||||
// virtual const std::vector<size_t>& outputs() const = 0;
|
||||
|
||||
// virtual size_t output(size_t index) const = 0;
|
||||
|
||||
};
|
||||
|
||||
} // namespace ggml
|
||||
} // namespace frontend
|
||||
} // namespace ov
|
||||
|
|
@ -0,0 +1,203 @@
|
|||
#include "ggml-decoder.h"
|
||||
#include <ggml.h>
|
||||
#include <ggml-impl.h>
|
||||
|
||||
GgmlOvDecoder::GgmlOvDecoder(struct ggml_tensor * node, struct ggml_cgraph * cgraph)
|
||||
:m_cgraph(cgraph),
|
||||
m_node(node),
|
||||
m_op_name(std::string(m_node->name)) {
|
||||
switch (m_node->op) {
|
||||
// Unary OPs
|
||||
case GGML_OP_UNARY:
|
||||
case GGML_OP_RESHAPE:
|
||||
case GGML_OP_VIEW:
|
||||
{
|
||||
m_inputs.push_back(m_node->src[0]);
|
||||
m_outputs.push_back(m_node);
|
||||
#ifdef GGML_OPENVINO_DEBUG
|
||||
GGML_LOG_INFO("Decoder input 0: %f \n", *(float*)(m_node->src[0]->data));
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
// SCALE
|
||||
case GGML_OP_SCALE:
|
||||
{
|
||||
m_inputs.push_back(m_node->src[0]);
|
||||
m_outputs.push_back(m_node);
|
||||
#ifdef GGML_OPENVINO_DEBUG
|
||||
float v;
|
||||
memcpy(&v, m_node->op_params, sizeof(float));
|
||||
GGML_LOG_INFO("Decoder input 0: %f \n", *(float*)(m_node->src[0]->data));
|
||||
GGML_LOG_INFO("Scale: %f \n", v);
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
// OPs with 2 inputs
|
||||
case GGML_OP_ADD:
|
||||
case GGML_OP_DIV:
|
||||
case GGML_OP_MUL:
|
||||
case GGML_OP_MUL_MAT:
|
||||
case GGML_OP_SUB:
|
||||
case GGML_OP_GET_ROWS:
|
||||
{
|
||||
m_inputs.push_back(m_node->src[0]);
|
||||
m_inputs.push_back(m_node->src[1]);
|
||||
m_outputs.push_back(m_node);
|
||||
#ifdef GGML_OPENVINO_DEBUG
|
||||
GGML_LOG_INFO("Decoder input 0: %f \n", *(float*)(m_node->src[0]->data));
|
||||
GGML_LOG_INFO("Decoder input 1: %f \n", *(float*)(m_node->src[1]->data));
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
ov::PartialShape GgmlOvDecoder::get_input_shape(size_t index) const {
|
||||
ov::PartialShape input_shape;
|
||||
// Use input_node->ne
|
||||
ggml_tensor * node = m_inputs[index];
|
||||
std::vector<size_t> shape;
|
||||
// GGML_MAX_DIMS
|
||||
// for (int i = 0; i < GGML_MAX_DIMS; ++i) {
|
||||
for (int i = GGML_MAX_DIMS - 2; i >= 0 ; --i) {
|
||||
if (node->ne[i] == 0) {
|
||||
return input_shape;
|
||||
}
|
||||
shape.push_back(static_cast<size_t>(node->ne[i]));
|
||||
}
|
||||
input_shape = ov::PartialShape(shape);
|
||||
return input_shape;
|
||||
}
|
||||
|
||||
ov::element::Type GgmlOvDecoder::get_input_type(size_t index) const {
|
||||
ov::element::Type type = ov::element::dynamic;
|
||||
// GGML_LOG_DEBUG("%d\n", m_inputs[index]->type);
|
||||
switch (m_inputs[index]->type) {
|
||||
case GGML_TYPE_F32:
|
||||
type = ov::element::f32;
|
||||
break;
|
||||
case GGML_TYPE_F16:
|
||||
type = ov::element::f16;
|
||||
break;
|
||||
case GGML_TYPE_I64:
|
||||
type = ov::element::i64;
|
||||
break;
|
||||
case GGML_TYPE_I32:
|
||||
type = ov::element::i32;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return type;
|
||||
}
|
||||
|
||||
size_t GgmlOvDecoder::get_input_size() const {
|
||||
return m_inputs.size();
|
||||
}
|
||||
|
||||
bool GgmlOvDecoder::is_graph_input(size_t index) const {
|
||||
if (m_inputs[index]->flags & GGML_TENSOR_FLAG_INPUT ) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
std::string& GgmlOvDecoder::get_input_name(size_t index) const {
|
||||
m_name = std::string(m_inputs[index]->name);
|
||||
return m_name;
|
||||
}
|
||||
|
||||
ov::PartialShape GgmlOvDecoder::get_output_shape(size_t index) const {
|
||||
ov::PartialShape output_shape;
|
||||
// Use input_node->ne
|
||||
ggml_tensor * node = m_outputs[index];
|
||||
std::vector<size_t> shape;
|
||||
// GGML_MAX_DIMS
|
||||
// for (int i = 0; i < GGML_MAX_DIMS; ++i) {
|
||||
for (int i = GGML_MAX_DIMS - 2; i >= 0 ; --i) {
|
||||
if (node->ne[i] == 0 ) {
|
||||
// empty if any dimension has no elements
|
||||
return output_shape;
|
||||
}
|
||||
shape.push_back(static_cast<size_t>(node->ne[i]));
|
||||
}
|
||||
output_shape = ov::PartialShape(shape);
|
||||
return output_shape;
|
||||
}
|
||||
|
||||
ov::element::Type GgmlOvDecoder::get_output_type(size_t index) const {
|
||||
// TODO: Change to Output
|
||||
ov::element::Type type = ov::element::dynamic;
|
||||
// GGML_LOG_DEBUG("%d\n", m_outputs[index]->type);
|
||||
switch (m_outputs[index]->type) {
|
||||
case GGML_TYPE_F32:
|
||||
type = ov::element::f32;
|
||||
break;
|
||||
case GGML_TYPE_F16:
|
||||
type = ov::element::f16;
|
||||
break;
|
||||
case GGML_TYPE_I64:
|
||||
type = ov::element::i64;
|
||||
break;
|
||||
case GGML_TYPE_I32:
|
||||
type = ov::element::i32;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return type;
|
||||
}
|
||||
|
||||
bool GgmlOvDecoder::is_graph_output(size_t index) const {
|
||||
if (m_outputs[index]->flags & GGML_TENSOR_FLAG_OUTPUT) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
int32_t* GgmlOvDecoder::get_output_op_params(size_t index) const{
|
||||
return m_outputs[index]->op_params;
|
||||
}
|
||||
|
||||
size_t GgmlOvDecoder::get_output_size() const {
|
||||
return m_outputs.size();
|
||||
}
|
||||
|
||||
std::string& GgmlOvDecoder::get_output_name(size_t index) const {
|
||||
m_name = std::string(m_outputs[index]->name);
|
||||
return m_name;
|
||||
}
|
||||
|
||||
const std::string& GgmlOvDecoder::get_op_name() const {
|
||||
return m_op_name;
|
||||
}
|
||||
|
||||
const std::string& GgmlOvDecoder::get_op_type() const {
|
||||
static const std::map<ggml_op, std::string> opTypeMap = {
|
||||
{GGML_OP_ACC, "GGML_OP_ACC"},
|
||||
{GGML_OP_ADD, "GGML_OP_ADD"},
|
||||
{GGML_OP_ADD1, "GGML_OP_ADD1"},
|
||||
{GGML_OP_DIV, "GGML_OP_DIV"},
|
||||
{GGML_OP_DUP, "GGML_OP_DUP"},
|
||||
{GGML_OP_GET_ROWS, "GGML_OP_GET_ROWS"},
|
||||
{GGML_OP_MUL, "GGML_OP_MUL"},
|
||||
{GGML_OP_MUL_MAT, "GGML_OP_MUL_MAT"},
|
||||
{GGML_OP_PERMUTE, "GGML_OP_PERMUTE"},
|
||||
{GGML_OP_RESHAPE, "GGML_OP_RESHAPE"},
|
||||
{GGML_OP_SCALE, "GGML_OP_SCALE"},
|
||||
{GGML_OP_SUB, "GGML_OP_SUB"},
|
||||
{GGML_OP_UNARY, "GGML_OP_UNARY"},
|
||||
{GGML_OP_VIEW, "GGML_OP_VIEW"}
|
||||
};
|
||||
auto it = opTypeMap.find(m_node->op);
|
||||
if (it != opTypeMap.end()) {
|
||||
return it->second;
|
||||
} else {
|
||||
static const std::string unknown_op = "UNKNOWN_OP";
|
||||
return unknown_op;
|
||||
}
|
||||
// static std::string op_type = ggml_op_name(m_node->op);
|
||||
// return op_type;
|
||||
}
|
||||
|
|
@ -0,0 +1,69 @@
|
|||
#pragma once
|
||||
|
||||
#include "decoder.h"
|
||||
#include "ggml.h"
|
||||
|
||||
class GgmlOvDecoder : public ov::frontend::ggml::GgmlDecoder {
|
||||
public:
|
||||
using ov::frontend::ggml::GgmlDecoder::GgmlDecoder;
|
||||
GgmlOvDecoder(struct ggml_tensor * node, struct ggml_cgraph * cgraph);
|
||||
|
||||
virtual ov::Any get_attribute(const std::string& name) const override {
|
||||
return nullptr;
|
||||
GGML_UNUSED(name);
|
||||
}
|
||||
|
||||
virtual ov::PartialShape get_input_shape(size_t index) const override;
|
||||
|
||||
virtual ov::element::Type get_input_type(size_t index) const override;
|
||||
|
||||
virtual size_t get_input_size() const override;
|
||||
|
||||
virtual void get_input_node(size_t input_port_idx,
|
||||
std::string& producer_name,
|
||||
std::string& producer_output_port_name,
|
||||
size_t& producer_output_port_index) const override {
|
||||
GGML_UNUSED(input_port_idx);
|
||||
GGML_UNUSED(producer_name);
|
||||
GGML_UNUSED(producer_output_port_name);
|
||||
GGML_UNUSED(producer_output_port_index);
|
||||
}
|
||||
|
||||
virtual bool is_graph_input(size_t index) const override;
|
||||
|
||||
virtual std::string& get_input_name(size_t index) const override;
|
||||
|
||||
virtual ov::PartialShape get_output_shape(size_t index) const override;
|
||||
|
||||
virtual ov::element::Type get_output_type(size_t index) const override;
|
||||
|
||||
virtual size_t get_output_size() const override;
|
||||
|
||||
virtual bool is_graph_output(size_t index) const override;
|
||||
|
||||
virtual int32_t* get_output_op_params(size_t index) const override;
|
||||
|
||||
virtual std::string& get_output_name(size_t index) const override;
|
||||
|
||||
virtual const std::string& get_op_type() const override;
|
||||
|
||||
virtual const std::string& get_op_name() const override;
|
||||
|
||||
const ggml_tensor* get_input_ggml_tensor(size_t index) const {
|
||||
return m_inputs[index];
|
||||
}
|
||||
|
||||
// virtual const std::vector<size_t>& outputs() const override;
|
||||
|
||||
// virtual size_t output(size_t index) const override;
|
||||
|
||||
private:
|
||||
size_t m_index;
|
||||
struct ggml_cgraph * m_cgraph;
|
||||
std::vector<ggml_tensor *> m_inputs;
|
||||
std::vector<ggml_tensor *> m_outputs;
|
||||
ggml_tensor * m_node;
|
||||
const std::string m_op_name;
|
||||
mutable std::string m_name;
|
||||
};
|
||||
|
||||
|
|
@ -0,0 +1,96 @@
|
|||
#include "ggml-graph-iterator.h"
|
||||
#include <ggml.h>
|
||||
#include <ggml-impl.h>
|
||||
|
||||
namespace ov {
|
||||
namespace frontend {
|
||||
namespace tensorflow {
|
||||
namespace ggml {
|
||||
|
||||
GgmlOvGraphIterator::GgmlOvGraphIterator(struct ggml_cgraph * cgraph)
|
||||
:m_cgraph(cgraph) {
|
||||
initialize_decoders();
|
||||
#ifdef GGML_OPENVINO_DEBUG
|
||||
dump_graph_iterator();
|
||||
#endif
|
||||
}
|
||||
|
||||
void GgmlOvGraphIterator::initialize_decoders() {
|
||||
auto nodes_size = m_cgraph->n_nodes;
|
||||
// Initialize decoder for each node
|
||||
// m_decoders.resize(static_cast<size_t>(nodes_size));
|
||||
|
||||
for (int i = 0; i < nodes_size; ++i) {
|
||||
// Skip View Op
|
||||
if (m_cgraph->nodes[i] ->op == GGML_OP_VIEW || m_cgraph->nodes[i] ->op == GGML_OP_PERMUTE) {
|
||||
continue;
|
||||
}
|
||||
auto decoder = std::make_shared<GgmlOvDecoder>(m_cgraph->nodes[i], m_cgraph);
|
||||
m_decoders.push_back(decoder);
|
||||
for (size_t inp = 0; inp < decoder->get_input_size(); ++inp) {
|
||||
// if (i == 0 || decoder->is_graph_input(inp)) {
|
||||
m_input_names.push_back(decoder->get_input_name(inp));
|
||||
// }
|
||||
}
|
||||
for (size_t inp = 0; inp < decoder->get_output_size(); ++inp) {
|
||||
if (i == nodes_size - 1 || decoder->is_graph_output(inp)) {
|
||||
m_output_names.push_back(decoder->get_output_name(inp));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void GgmlOvGraphIterator::reset() {
|
||||
node_index = 0;
|
||||
}
|
||||
|
||||
size_t GgmlOvGraphIterator::size() const {
|
||||
return m_decoders.size();
|
||||
}
|
||||
|
||||
void GgmlOvGraphIterator::next() {
|
||||
node_index++;
|
||||
}
|
||||
|
||||
bool GgmlOvGraphIterator::is_end() const {
|
||||
return node_index >= m_decoders.size();
|
||||
}
|
||||
|
||||
std::shared_ptr<DecoderBase> GgmlOvGraphIterator::get_decoder() const {
|
||||
return m_decoders[node_index];
|
||||
}
|
||||
|
||||
std::vector<std::string> GgmlOvGraphIterator::get_input_names() const {
|
||||
return m_input_names;
|
||||
}
|
||||
|
||||
std::vector<std::string> GgmlOvGraphIterator::get_output_names() const {
|
||||
return m_output_names;
|
||||
}
|
||||
|
||||
void GgmlOvGraphIterator::dump_graph_iterator() const {
|
||||
for (size_t i = 0; i < m_decoders.size(); ++i) {
|
||||
GGML_LOG_INFO("OP %zu: %s\n", i, m_decoders[i]->get_op_name().c_str());
|
||||
for (size_t inp = 0; inp < m_decoders[i]->get_input_size(); ++inp) {
|
||||
ov::PartialShape pshape = std::dynamic_pointer_cast<GgmlOvDecoder>(m_decoders[i])->get_input_shape(inp);
|
||||
ov::element::Type ptype = std::dynamic_pointer_cast<GgmlOvDecoder>(m_decoders[i])->get_input_type(inp);
|
||||
GGML_LOG_INFO("Input name: %s\n", std::dynamic_pointer_cast<GgmlOvDecoder>(m_decoders[i])->get_input_name(inp).c_str());
|
||||
GGML_LOG_INFO("Input shape: %s\n", pshape.to_string().c_str());
|
||||
GGML_LOG_INFO("Input type: %s\n", ptype.to_string().c_str());
|
||||
}
|
||||
for (size_t outp = 0; outp < std::dynamic_pointer_cast<GgmlOvDecoder>(m_decoders[i])->get_output_size(); ++outp) {
|
||||
ov::PartialShape pshape = std::dynamic_pointer_cast<GgmlOvDecoder>(m_decoders[i])->get_output_shape(outp);
|
||||
ov::element::Type ptype = std::dynamic_pointer_cast<GgmlOvDecoder>(m_decoders[i])->get_output_type(outp);
|
||||
GGML_LOG_INFO("Output name: %s\n", std::dynamic_pointer_cast<GgmlOvDecoder>(m_decoders[i])->get_output_name(outp).c_str());
|
||||
GGML_LOG_INFO("Output shape: %s\n", pshape.to_string().c_str());
|
||||
GGML_LOG_INFO("Output type: %s\n", ptype.to_string().c_str());
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
} // namespace ggml
|
||||
} // namespace frontend
|
||||
} // namespace ov
|
||||
|
|
@ -0,0 +1,61 @@
|
|||
#pragma once
|
||||
|
||||
#include "graph_iterator.h"
|
||||
#include "ggml-decoder.h"
|
||||
#include <ggml-impl.h>
|
||||
|
||||
// To remove tensorflow
|
||||
namespace ov {
|
||||
namespace frontend {
|
||||
namespace tensorflow {
|
||||
namespace ggml {
|
||||
|
||||
class GgmlOvGraphIterator : public GgmlGraphIterator {
|
||||
|
||||
protected:
|
||||
void initialize_decoders();
|
||||
|
||||
public:
|
||||
using Ptr = std::shared_ptr<GgmlOvGraphIterator>;
|
||||
GgmlOvGraphIterator(struct ggml_cgraph * cgraph);
|
||||
|
||||
/// \brief Get a number of operation nodes in the sgraph
|
||||
virtual size_t size() const override;
|
||||
|
||||
/// \brief Set iterator to the start position
|
||||
virtual void reset() override;
|
||||
|
||||
/// \brief Move to the next node in the graph
|
||||
virtual void next() override;
|
||||
|
||||
/// \brief Returns true if iterator goes out of the range of available nodes
|
||||
virtual bool is_end() const override;
|
||||
|
||||
/// \brief Return a pointer to a decoder of the current node
|
||||
virtual std::shared_ptr<DecoderBase> get_decoder() const override;
|
||||
|
||||
virtual std::shared_ptr<GraphIterator> get_body_graph_iterator(const std::string& func_name) const override {
|
||||
return nullptr;
|
||||
GGML_UNUSED(func_name);
|
||||
}
|
||||
|
||||
/// \brief Returns a vector of input names in the original order
|
||||
virtual std::vector<std::string> get_input_names() const override;
|
||||
|
||||
/// \brief Returns a vector of output names in the original order
|
||||
virtual std::vector<std::string> get_output_names() const override;
|
||||
|
||||
virtual void dump_graph_iterator() const;
|
||||
|
||||
private:
|
||||
struct ggml_cgraph * m_cgraph;
|
||||
size_t node_index = 0;
|
||||
std::vector<std::shared_ptr<DecoderBase>> m_decoders;
|
||||
std::vector<std::string> m_input_names;
|
||||
std::vector<std::string> m_output_names;
|
||||
};
|
||||
|
||||
}
|
||||
} // namespace ggml
|
||||
} // namespace frontend
|
||||
} // namespace ov
|
||||
|
|
@ -0,0 +1,43 @@
|
|||
#pragma once
|
||||
|
||||
#include "openvino/frontend/graph_iterator.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace frontend {
|
||||
namespace tensorflow { // To be Removed
|
||||
namespace ggml {
|
||||
|
||||
// TODO: Directly include from openvino
|
||||
class GgmlGraphIterator : public GraphIterator {
|
||||
public:
|
||||
|
||||
virtual size_t size() const = 0;
|
||||
|
||||
virtual void reset() = 0;
|
||||
|
||||
virtual void next() = 0;
|
||||
|
||||
virtual bool is_end() const = 0;
|
||||
|
||||
virtual std::shared_ptr<DecoderBase> get_decoder() const = 0;
|
||||
|
||||
virtual std::vector<std::string> get_input_names() const = 0;
|
||||
|
||||
virtual std::vector<std::string> get_output_names() const = 0;
|
||||
|
||||
virtual std::shared_ptr<GraphIterator> get_body_graph_iterator(const std::string& func_name) const = 0;
|
||||
|
||||
virtual std::map<std::string, std::string> get_input_names_map() const {
|
||||
return {};
|
||||
}
|
||||
|
||||
virtual std::map<std::string, std::string> get_output_names_map() const {
|
||||
return {};
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
} // namespace ggml
|
||||
} // namespace frontend
|
||||
} // namespace ov
|
||||
|
|
@ -0,0 +1,108 @@
|
|||
#include "utils.h"
|
||||
#include "ggml-backend-impl.h"
|
||||
#include <openvino/frontend/manager.hpp>
|
||||
#include <openvino/openvino.hpp>
|
||||
|
||||
using ov::frontend::tensorflow::ggml::GgmlOvGraphIterator;
|
||||
|
||||
std::shared_ptr<GgmlOvGraphIterator> get_ggml_graph_iterator(struct ggml_cgraph * cgraph) {
|
||||
return std::make_shared<GgmlOvGraphIterator>(cgraph);
|
||||
}
|
||||
|
||||
std::map<std::string, ov::Tensor> get_ggml_graph_input_tensors(std::shared_ptr<GgmlOvGraphIterator> ggml_graph_iterator) {
|
||||
std::map<std::string, ov::Tensor> input_tensors;
|
||||
auto input_names = ggml_graph_iterator->get_input_names();
|
||||
ggml_graph_iterator->reset();
|
||||
for (; !ggml_graph_iterator->is_end(); ggml_graph_iterator->next()) {
|
||||
auto decoder = std::dynamic_pointer_cast<GgmlOvDecoder>(ggml_graph_iterator->get_decoder());
|
||||
for (size_t inp = 0; inp < decoder->get_input_size(); ++inp) {
|
||||
if (std::find(input_names.begin(), input_names.end(), decoder->get_input_name(inp)) != input_names.end()) {
|
||||
auto input_data = decoder->get_input_ggml_tensor(inp)->data;
|
||||
ov::Tensor input_tensor = ov::Tensor(decoder->get_input_type(inp), decoder->get_input_shape(inp).to_shape(), input_data);
|
||||
input_tensors[decoder->get_input_name(inp)] = input_tensor;
|
||||
}
|
||||
}
|
||||
}
|
||||
return input_tensors;
|
||||
}
|
||||
|
||||
static ov::frontend::FrontEnd::Ptr get_ggml_frontend() {
|
||||
ov::frontend::FrontEnd::Ptr front_end = nullptr;
|
||||
auto fem = ov::frontend::FrontEndManager();
|
||||
std::string fe_so_path;
|
||||
#ifdef GGML_OV_FRONTEND
|
||||
fe_so_path = GGML_OV_FRONTEND;
|
||||
#endif
|
||||
fem.register_front_end("ggml", fe_so_path);
|
||||
front_end = fem.load_by_framework("ggml");
|
||||
return front_end;
|
||||
}
|
||||
|
||||
enum ggml_status openvino_frontend_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
|
||||
ov::Core core;
|
||||
auto devices = core.get_available_devices();
|
||||
// Get GGML Frontend
|
||||
auto front_end = get_ggml_frontend();
|
||||
if (!front_end) {
|
||||
GGML_LOG_ERROR("GGML FrontEnd is not initialized \n");
|
||||
return GGML_STATUS_FAILED;
|
||||
} else {
|
||||
#ifdef GGML_OPENVINO_DEBUG
|
||||
GGML_LOG_INFO("GGML FrontEnd is initialized \n");
|
||||
#endif
|
||||
}
|
||||
|
||||
auto ggml_graph_iterator = get_ggml_graph_iterator(cgraph);
|
||||
std::shared_ptr<ov::frontend::tensorflow::GraphIterator> graph_iterator = ggml_graph_iterator;
|
||||
|
||||
// Load GraphIterator -> InputModel
|
||||
ov::frontend::InputModel::Ptr input_model = front_end->load(graph_iterator);
|
||||
if (!input_model) {
|
||||
GGML_LOG_ERROR("Input Model is not loaded \n");
|
||||
return GGML_STATUS_FAILED;
|
||||
} else {
|
||||
#ifdef GGML_OPENVINO_DEBUG
|
||||
GGML_LOG_INFO("Input Model loaded \n");
|
||||
#endif
|
||||
}
|
||||
|
||||
// Convert InputModel -> ov::Model
|
||||
std::shared_ptr<ov::Model> model = front_end->convert(input_model);
|
||||
if (!model) {
|
||||
GGML_LOG_ERROR("Model is not converted \n");
|
||||
} else {
|
||||
#ifdef GGML_OPENVINO_DEBUG
|
||||
GGML_LOG_INFO("Model converted \n");
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
// Loading a model to the device
|
||||
ov::CompiledModel compiled_model = core.compile_model(model);
|
||||
|
||||
// Create infer request
|
||||
ov::InferRequest infer_request = compiled_model.create_infer_request();
|
||||
|
||||
// Get input tensor
|
||||
auto input_names = ggml_graph_iterator->get_input_names();
|
||||
auto input_tensors = get_ggml_graph_input_tensors(ggml_graph_iterator);
|
||||
|
||||
// Set input tensor
|
||||
for (size_t i = 0; i < input_names.size(); i++) {
|
||||
infer_request.set_input_tensor(i, input_tensors[input_names[i]]);
|
||||
}
|
||||
|
||||
infer_request.infer();
|
||||
|
||||
ov::Tensor output_tensor = infer_request.get_output_tensor();
|
||||
// Put data in output tensor to the last node -> data in cgraph
|
||||
// Get output type
|
||||
ggml_tensor* dst = cgraph->nodes[cgraph->n_nodes - 1];
|
||||
std::memcpy(dst->data, output_tensor.data(), output_tensor.get_byte_size());
|
||||
#ifdef GGML_OPENVINO_DEBUG
|
||||
GGML_LOG_INFO("Output: %f\n", *output_tensor.data<float>());
|
||||
#endif
|
||||
|
||||
return GGML_STATUS_SUCCESS;
|
||||
GGML_UNUSED(backend);
|
||||
}
|
||||
|
|
@ -0,0 +1,6 @@
|
|||
#include "ggml-graph-iterator.h"
|
||||
#include "ggml-backend-impl.h"
|
||||
|
||||
std::shared_ptr<ov::frontend::tensorflow::ggml::GgmlOvGraphIterator> get_ggml_graph_iterator(struct ggml_cgraph * cgraph);
|
||||
|
||||
enum ggml_status openvino_frontend_compute (ggml_backend_t backend, struct ggml_cgraph * cgraph);
|
||||
Loading…
Reference in New Issue