FEAT: Add all conversion code from ov side

This commit is contained in:
Yu, Zijun 2025-05-09 13:04:20 +08:00 committed by Mustafa Cavus
parent f15a2cc057
commit 0d009fe61a
31 changed files with 1465 additions and 15 deletions

View File

@ -692,7 +692,11 @@ To read documentation for how to build on IBM Z & LinuxONE, [click here](./build
git submodule update --init --recursive
export OPENVINO_LLAMA_PATH=$(pwd)
```
Before building, change "ENABLE_OV_GGML_FRONTEND" from true to false in the CMakePresets.json file since we already have the code from the ov side in this branch of llama.cpp (`full_backend`). You could also build the master branch of ov instead.
```
cmake --preset Release
cmake --build build/Release
```
@ -700,7 +704,7 @@ To read documentation for how to build on IBM Z & LinuxONE, [click here](./build
### Build llama.cpp-ov
```bash
git clone https://github.com/intel-sandbox/llama.cpp-ov.git -b dev_backend_openvino
git clone https://github.com/intel-sandbox/llama.cpp-ov.git -b full_backend
cd llama.cpp-ov
cmake --preset ReleaseOV

View File

@ -5,8 +5,8 @@
#include <memory>
#include <vector>
#include "decoder.h"
#include "ggml.h"
#include "openvino/decoder.hpp"
class GgmlOvDecoder : public ov::frontend::ggml::GgmlDecoder {
public:

View File

@ -8,7 +8,6 @@ namespace ov {
namespace frontend {
namespace ggml {
// TODO: Directly include from openvino
class GgmlDecoder : public DecoderBase {
public:
virtual ov::Any get_attribute(const std::string& name) const = 0;

View File

@ -0,0 +1,27 @@
#include "frontend.hpp"
#include "input_model.hpp"
#include "op_table.hpp"
#include "translate_session.hpp"
namespace ov {
namespace frontend {
namespace ggml {
FrontEnd::FrontEnd() {}
std::shared_ptr<Model> FrontEnd::convert(const InputModel::Ptr& model) {
auto ggml_model = std::dynamic_pointer_cast<ggml::InputModel>(model);
FRONT_END_GENERAL_CHECK(ggml_model, "Invalid input model");
std::shared_ptr<Model> converted_model;
const auto& supported_ops = get_supported_ops();
{
TranslateSession translate_session(model, supported_ops);
converted_model = translate_session.get_converted_model();
}
return converted_model;
}
} // namespace ggml
} // namespace frontend
} // namespace ov

View File

@ -0,0 +1,23 @@
// Copyright (C) 2018-2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <openvino/frontend/frontend.hpp>
namespace ov {
namespace frontend {
namespace ggml {
class FrontEnd {
public:
using Ptr = std::shared_ptr<FrontEnd>;
FrontEnd();
static std::shared_ptr<Model> convert(const InputModel::Ptr& model);
};
} // namespace ggml
} // namespace frontend
} // namespace ov

View File

@ -0,0 +1,17 @@
#include "input_model.hpp"
#include "decoder.hpp"
namespace ov {
namespace frontend {
namespace ggml {
InputModel::InputModel(const std::shared_ptr<GgmlDecoder>& gdecoder) : m_decoder(gdecoder) {}
const std::shared_ptr<GgmlDecoder>& InputModel::get_model_decoder() const {
return m_decoder;
}
} // namespace ggml
} // namespace frontend
} // namespace ov

View File

@ -0,0 +1,29 @@
#pragma once
#include <openvino/frontend/input_model.hpp>
#include "decoder.hpp"
namespace ov {
namespace frontend {
namespace ggml {
class FrontEnd;
class GgmlDecoder;
using ov::frontend::ggml::GgmlDecoder;
class InputModel : public ov::frontend::InputModel {
friend class ::ov::frontend::ggml::FrontEnd;
public:
explicit InputModel(const std::shared_ptr<GgmlDecoder>& gdecoder);
const std::shared_ptr<GgmlDecoder>& get_model_decoder() const;
private:
std::shared_ptr<GgmlDecoder> m_decoder;
};
} // namespace ggml
} // namespace frontend
} // namespace ov

View File

@ -0,0 +1,100 @@
#pragma once
#include <openvino/frontend/node_context.hpp>
#include "decoder.hpp"
namespace ov {
namespace frontend {
namespace ggml {
class TranslateSession;
typedef std::map<std::string, Output<Node>> TensorMap;
class NodeContext : public frontend::NodeContext {
public:
NodeContext(const std::shared_ptr<GgmlDecoder>& decoder,
std::shared_ptr<TensorMap>& tensor_map,
TranslateSession* translate_session = nullptr)
: ov::frontend::NodeContext(decoder->get_op_type()),
m_decoder(decoder),
m_tensor_map(tensor_map),
m_translate_session(translate_session) {
m_input_names = decoder->get_input_names();
m_output_names = decoder->get_output_names();
}
TranslateSession* get_translate_session() const {
return m_translate_session;
}
size_t get_input_size() const override {
return m_decoder->get_input_size();
}
Any get_input_type(size_t index) const {
return m_decoder->get_input_type(m_input_names[index]);
}
PartialShape get_input_shape(size_t index) const {
return m_decoder->get_input_shape(m_input_names[index]);
}
std::vector<size_t> get_input_stride(size_t index) const {
return m_decoder->get_input_stride(m_input_names[index]);
}
PartialShape get_output_shape(size_t index) const {
return m_decoder->get_output_shape(m_output_names[index]);
}
std::vector<size_t> get_output_stride(size_t index) const {
return m_decoder->get_output_stride(m_output_names[index]);
}
int32_t* get_input_op_params(size_t index) const {
return m_decoder->get_input_op_params(m_input_names[index]);
}
int32_t* get_output_op_params(size_t index) const {
return m_decoder->get_output_op_params(m_output_names[index]);
}
ov::element::Type get_output_type(size_t index) const {
return m_decoder->get_output_type(m_output_names[index]);
}
Output<Node> get_input(int idx) const override {
return m_tensor_map->at(m_decoder->get_input_name(idx));
}
Output<Node> get_input(const std::string& name) const override {
return m_tensor_map->at(name);
}
const std::string& get_name() const override {
return m_decoder->get_op_name();
}
ov::Any get_attribute_as_any(const std::string& name) const override {
return m_decoder->get_attribute(name);
}
bool check_if_continuous() const {
return m_decoder->check_if_continuous();
}
private:
std::shared_ptr<GgmlDecoder> m_decoder;
std::shared_ptr<TensorMap>& m_tensor_map;
TranslateSession* m_translate_session;
std::vector<std::string> m_input_names;
std::vector<std::string> m_output_names;
};
using CreatorFunction = std::function<ov::OutputVector(const ov::frontend::ggml::NodeContext&)>;
} // namespace ggml
} // namespace frontend
} // namespace ov

View File

@ -0,0 +1,23 @@
#include "openvino/op/add.hpp"
#include "../node_context.hpp"
#include "../utils.hpp"
namespace ov {
namespace frontend {
namespace ggml {
namespace op {
OutputVector translate_add(const NodeContext& context) {
num_inputs_check(context, 2, 2);
auto lhs = context.get_input(0);
auto rhs = context.get_input(1);
auto add = std::make_shared<ov::op::v1::Add>(lhs, rhs);
return {add};
};
} // namespace op
} // namespace ggml
} // namespace frontend
} // namespace ov

View File

@ -0,0 +1,56 @@
#include <climits>
#include <cstdint>
#include <memory>
#include <vector>
#include "../node_context.hpp"
#include "../utils.hpp"
#include "openvino/op/reshape.hpp"
#include "openvino/op/slice.hpp"
namespace ov {
namespace frontend {
namespace ggml {
namespace op {
OutputVector translate_cont(const NodeContext& context) {
num_inputs_check(context, 1, 1);
auto src_shape = context.get_input_shape(0).to_shape();
auto dst_shape = context.get_output_shape(0).to_shape();
bool continuous = context.check_if_continuous();
if (continuous) {
// The input comes from a PERMUTE
dst_shape[1] = -1;
auto result = std::make_shared<ov::op::v1::Reshape>(
context.get_input(0),
ov::op::v0::Constant::create(ov::element::i64, {dst_shape.size()}, dst_shape),
false);
return {result};
} else {
// The input comes from a VIEW
// Currently all cases are slicing at lowest dim
int32_t* op_params = context.get_input_op_params(0);
auto output_stride = context.get_output_stride(0);
int64_t split_addr = op_params[0] / output_stride[2];
std::vector<int64_t> begin = {0, 0, split_addr};
std::vector<int64_t> end = {(int64_t)src_shape[0], INT_MAX, split_addr + (int64_t)src_shape[2]};
std::vector<int64_t> strides = {1, 1, 1};
auto begin_const = ov::op::v0::Constant::create(ov::element::i64, {begin.size()}, begin);
auto end_const = ov::op::v0::Constant::create(ov::element::i64, {end.size()}, end);
auto strides_const = ov::op::v0::Constant::create(ov::element::i64, {strides.size()}, strides);
auto slice = std::make_shared<ov::op::v8::Slice>(context.get_input(0), begin_const, end_const, strides_const);
return {slice};
}
};
} // namespace op
} // namespace ggml
} // namespace frontend
} // namespace ov

View File

@ -0,0 +1,106 @@
#include <cstdint>
#include <memory>
#include <vector>
#include "../node_context.hpp"
#include "../utils.hpp"
#include "openvino/core/node.hpp"
#include "openvino/core/node_output.hpp"
#include "openvino/op/add.hpp"
#include "openvino/op/constant.hpp"
#include "openvino/op/convert_like.hpp"
#include "openvino/op/range.hpp"
#include "openvino/op/reshape.hpp"
#include "openvino/op/scatter_nd_update.hpp"
#include "openvino/op/transpose.hpp"
#include "openvino/op/unsqueeze.hpp"
namespace ov {
namespace frontend {
namespace ggml {
namespace op {
OutputVector translate_cpy(const NodeContext& context) {
num_inputs_check(context, 2, 2);
auto src0 = context.get_input(0);
auto src1 = context.get_input(1);
auto past_token_len = context.get_input("past_token_len");
auto src0_shape = context.get_input_shape(0).to_shape();
auto output_shape = context.get_output_shape(0).to_shape();
bool continuous = context.check_if_continuous();
std::vector<size_t> input0_strides = context.get_input_stride(0);
std::vector<size_t> output_strides = context.get_output_stride(0);
auto one = ov::op::v0::Constant::create(ov::element::i64, ov::Shape{}, {1});
src0 = std::make_shared<ov::op::v1::ConvertLike>(src0, src1);
if (continuous) {
// Write K to cache_k
int64_t head_size = src0_shape[2];
int64_t num_heads = src0_shape[1];
auto reshaped_src1_shape =
ov::op::v0::Constant::create(ov::element::i64, {3}, std::vector<int64_t>{-1, num_heads, head_size});
auto reshaped_src1 = std::make_shared<ov::op::v1::Reshape>(src1, reshaped_src1_shape, false);
auto token_len = get_dimensions(src0.get_node_shared_ptr(), {0});
token_len = std::make_shared<ov::op::v1::Reshape>(token_len,
ov::op::v0::Constant::create(ov::element::i64, {0}, {}),
false);
auto total_token_len = std::make_shared<ov::op::v1::Add>(past_token_len, token_len);
std::shared_ptr<ov::Node> indices =
std::make_shared<ov::op::v4::Range>(past_token_len, total_token_len, one, ov::element::i64);
indices = std::make_shared<ov::op::v0::Unsqueeze>(
indices,
ov::op::v0::Constant::create(ov::element::i64, {1}, std::vector<int64_t>{1}));
auto res = std::make_shared<ov::op::v3::ScatterNDUpdate>(reshaped_src1, indices, src0);
return {res};
} else {
// Write V to cache_v
int64_t total_head_size = src0_shape[1];
auto reshaped_src0 = std::make_shared<ov::op::v1::Reshape>(
src0,
ov::op::v0::Constant::create(ov::element::i64, {2}, std::vector<int64_t>{total_head_size, -1}),
false);
auto transposed_src0 =
std::make_shared<ov::op::v1::Transpose>(reshaped_src0,
ov::op::v0::Constant::create(ov::element::i64, {2}, {1, 0}));
auto reshaped_src1 = std::make_shared<ov::op::v1::Reshape>(
src1,
ov::op::v0::Constant::create(ov::element::i64, {2}, std::vector<int64_t>{total_head_size, -1}),
false);
auto transposed_src1 =
std::make_shared<ov::op::v1::Transpose>(reshaped_src1,
ov::op::v0::Constant::create(ov::element::i64, {2}, {1, 0}));
auto token_len = get_dimensions(src0.get_node_shared_ptr(), {2});
token_len = std::make_shared<ov::op::v1::Reshape>(token_len,
ov::op::v0::Constant::create(ov::element::i64, {0}, {}),
false);
auto total_token_len = std::make_shared<ov::op::v1::Add>(past_token_len, token_len);
std::shared_ptr<ov::Node> indices =
std::make_shared<ov::op::v4::Range>(past_token_len, total_token_len, one, ov::element::i64);
indices = std::make_shared<ov::op::v0::Unsqueeze>(
indices,
ov::op::v0::Constant::create(ov::element::i64, {1}, std::vector<int64_t>{1}));
auto res = std::make_shared<ov::op::v3::ScatterNDUpdate>(transposed_src1, indices, transposed_src0);
auto transposed_res =
std::make_shared<ov::op::v1::Transpose>(res, ov::op::v0::Constant::create(ov::element::i64, {2}, {1, 0}));
auto reshaped_res = std::make_shared<ov::op::v1::Reshape>(
transposed_res,
ov::op::v0::Constant::create(ov::element::i64, {3}, std::vector<int64_t>{1, total_head_size, -1}),
false);
return {reshaped_res};
}
};
} // namespace op
} // namespace ggml
} // namespace frontend
} // namespace ov

View File

@ -0,0 +1,40 @@
#include <cstdint>
#include <vector>
#include "../node_context.hpp"
#include "../utils.hpp"
#include "openvino/core/node.hpp"
#include "openvino/core/node_output.hpp"
#include "openvino/op/constant.hpp"
#include "openvino/op/convert.hpp"
#include "openvino/op/gather.hpp"
#include "openvino/op/reshape.hpp"
namespace ov {
namespace frontend {
namespace ggml {
namespace op {
OutputVector translate_get_rows(const NodeContext& context) {
num_inputs_check(context, 2, 2);
auto data_node = context.get_input(0);
auto indices_node = context.get_input(1);
auto indices_shape = get_dimensions(indices_node.get_node_shared_ptr(), {2});
Output<Node> indice_reshaped = std::make_shared<ov::op::v1::Reshape>(indices_node, indices_shape, false);
auto axis_node = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{}, {1});
Output<Node> res = std::make_shared<ov::op::v8::Gather>(data_node, indice_reshaped, axis_node);
if (res.get_element_type() != context.get_output_type(0)) {
res = std::make_shared<ov::op::v0::Convert>(res, context.get_output_type(0));
}
return {res};
};
} // namespace op
} // namespace ggml
} // namespace frontend
} // namespace ov

View File

@ -0,0 +1,28 @@
#include <cstdint>
#include <vector>
#include "../node_context.hpp"
#include "../utils.hpp"
#include "openvino/core/node.hpp"
#include "openvino/core/node_output.hpp"
#include "openvino/op/broadcast.hpp"
#include "openvino/op/constant.hpp"
#include "openvino/op/multiply.hpp"
#include "openvino/op/reshape.hpp"
namespace ov {
namespace frontend {
namespace ggml {
namespace op {
OutputVector translate_mul(const NodeContext& context) {
num_inputs_check(context, 2, 2);
auto res = std::make_shared<ov::op::v1::Multiply>(context.get_input(0), context.get_input(1));
return {res};
};
} // namespace op
} // namespace ggml
} // namespace frontend
} // namespace ov

View File

@ -0,0 +1,127 @@
#include <cstddef>
#include <cstdint>
#include <memory>
#include <vector>
#include "../node_context.hpp"
#include "../utils.hpp"
#include "openvino/core/node.hpp"
#include "openvino/core/node_output.hpp"
#include "openvino/op/concat.hpp"
#include "openvino/op/constant.hpp"
#include "openvino/op/convert_like.hpp"
#include "openvino/op/matmul.hpp"
#include "openvino/op/reshape.hpp"
#include "openvino/op/slice.hpp"
#include "openvino/op/transpose.hpp"
namespace ov {
namespace frontend {
namespace ggml {
namespace op {
OutputVector translate_mulmat(const NodeContext& context) {
num_inputs_check(context, 2, 2);
bool continuous = context.check_if_continuous();
if (continuous) {
auto src1 = context.get_input(1);
auto src0_converted = std::make_shared<ov::op::v1::ConvertLike>(context.get_input(0), src1);
auto result = std::make_shared<ov::op::v0::MatMul>(src1, src0_converted, false, true);
return {result};
} else {
/*
Two cases here:
- 21: [ 96, 32, 32, 1] VIEW k-0 [ 2, 6144, 192, 6144]
[ 196608, 1, 1, 1] 0: NONE cache_k_l0 [ 2, 393216, 393216, 393216]
- 22: [ 96, 7, 32, 1] PERMUTE q-0 [ 4, 12288, 384, 86016]
[ 96, 32, 7, 1] 0: SCALE Qcur-0 [ 4, 384, 12288, 86016]
- 23: [ 32, 7, 32, 1] MUL_MAT kq-0 [ 4, 128, 896, 28672]
[ 96, 32, 32, 1] 0: VIEW k-0 [ 2, 6144, 192, 6144]
[ 96, 7, 32, 1] 1: PERMUTE q-0 [ 4, 12288, 384, 86016]
- 20: [ 32, 96, 32, 1] VIEW v-0 [ 2, 128, 12288, 393216]
[ 196608, 1, 1, 1] 0: NONE cache_v_l0 [ 2, 393216, 393216, 393216]
- 25: [ 96, 7, 32, 1] MUL_MAT kqv-0 [ 4, 384, 2688, 86016]
[ 32, 96, 32, 1] 0: VIEW v-0 [ 2, 128, 12288, 393216]
[ 32, 7, 32, 1] 1: SOFT_MAX kq_soft_max_ext-0 [ 4, 128, 896, 28672]
For case 1, for src0, Reshape + Slice + Transpose
For case 2, for src0, Reshape + Slice
*/
ov::Output<ov::Node> A;
ov::Output<ov::Node> B;
auto attention_size = context.get_input("attention_size");
auto src0 = context.get_input(0);
auto src0_shape = context.get_input_shape(0).to_shape();
auto src0_stride = context.get_input_stride(0);
auto permuted = is_permuted(src0_stride);
auto token_dim = permuted ? 0 : 2;
auto src0_perm = argsort_descend(src0_stride);
auto src0_original_shape_ = permute(src0_shape, src0_perm);
std::vector<int64_t> src0_original_shape(src0_original_shape_.begin(), src0_original_shape_.end());
src0_original_shape[token_dim] = -1;
auto src0_slice_shape = src0_original_shape;
src0_slice_shape.erase(src0_slice_shape.begin() + token_dim);
auto src0_reshape_shape =
ov::op::v0::Constant::create(ov::element::i64, {src0_original_shape.size()}, src0_original_shape);
auto src0_reshape = std::make_shared<ov::op::v1::Reshape>(src0, src0_reshape_shape, false);
std::shared_ptr<ov::Node> slice_end;
if (permuted) {
slice_end = std::make_shared<ov::op::v0::Concat>(
ov::OutputVector{attention_size, ov::op::v0::Constant::create(ov::element::i64, {2}, src0_slice_shape)},
0);
} else {
slice_end = std::make_shared<ov::op::v0::Concat>(
ov::OutputVector{ov::op::v0::Constant::create(ov::element::i64, {2}, src0_slice_shape), attention_size},
0);
}
auto slice_start = ov::op::v0::Constant::create(ov::element::i64, {3}, std::vector<int64_t>(3, 0));
auto slice_step = ov::op::v0::Constant::create(ov::element::i64, {3}, std::vector<int64_t>(3, 1));
auto src0_slice = std::make_shared<ov::op::v8::Slice>(src0_reshape, slice_start, slice_end, slice_step);
if (permuted) {
B = std::make_shared<ov::op::v1::Transpose>(
src0_slice,
ov::op::v0::Constant::create(ov::element::i64, {src0_perm.size()}, src0_perm));
} else {
B = src0_slice;
}
A = context.get_input(1);
B = std::make_shared<ov::op::v1::ConvertLike>(B, A);
int64_t num_heads = context.get_input_shape(1).to_shape()[0];
int64_t num_heads_kv = src0_shape[0];
int64_t kv_num_heads_factor = num_heads / num_heads_kv;
if (kv_num_heads_factor > 1) {
auto num_heads_node = ov::op::v0::Constant::create(ov::element::i64, {1}, std::vector<int64_t>{num_heads});
auto num_heads_kv_node =
ov::op::v0::Constant::create(ov::element::i64, {1}, std::vector<int64_t>{num_heads_kv});
auto B_shape_last_two = get_dimensions(B.get_node_shared_ptr(), {1, 2});
auto one = ov::op::v0::Constant::create(ov::element::i64, {1}, {1});
std::shared_ptr<ov::Node> new_B_shape =
std::make_shared<ov::op::v0::Concat>(ov::OutputVector{num_heads_kv_node, one, B_shape_last_two}, 0);
B = std::make_shared<ov::op::v1::Reshape>(B, new_B_shape, false);
B = std::make_shared<ov::op::v0::Concat>(ov::OutputVector(kv_num_heads_factor, B), 1);
new_B_shape = std::make_shared<ov::op::v0::Concat>(ov::OutputVector{num_heads_node, B_shape_last_two}, 0);
B = std::make_shared<ov::op::v1::Reshape>(B, new_B_shape, false);
}
auto result = std::make_shared<ov::op::v0::MatMul>(A, B, false, true);
return {result};
}
};
} // namespace op
} // namespace ggml
} // namespace frontend
} // namespace ov

View File

@ -0,0 +1,22 @@
#include "../node_context.hpp"
#include "../utils.hpp"
#include "openvino/op/constant.hpp"
#include "openvino/op/transpose.hpp"
namespace ov {
namespace frontend {
namespace ggml {
namespace op {
OutputVector translate_permute(const NodeContext& context) {
num_inputs_check(context, 1, 1);
// TODO: make this more general
auto res = std::make_shared<ov::op::v1::Transpose>(context.get_input(0),
ov::op::v0::Constant::create(ov::element::i64, {3}, {1, 0, 2}));
return {res};
};
} // namespace op
} // namespace ggml
} // namespace frontend
} // namespace ov

View File

@ -0,0 +1,35 @@
#include "openvino/op/reshape.hpp"
#include <cstdint>
#include <vector>
#include "../node_context.hpp"
#include "../utils.hpp"
#include "openvino/core/node.hpp"
#include "openvino/core/node_output.hpp"
#include "openvino/op/constant.hpp"
namespace ov {
namespace frontend {
namespace ggml {
namespace op {
OutputVector translate_reshape(const NodeContext& context) {
num_inputs_check(context, 1, 1);
if (context.get_input_shape(0) == context.get_output_shape(0)) {
return {context.get_input(0)};
}
auto output_shape = context.get_output_shape(0).to_shape();
auto new_shape_node =
ov::op::v0::Constant::create(ov::element::i64,
{3},
std::vector<int64_t>{-1, (int64_t)output_shape[1], (int64_t)output_shape[2]});
Output<Node> res = std::make_shared<ov::op::v1::Reshape>(context.get_input(0), new_shape_node, false);
return {res};
};
} // namespace op
} // namespace ggml
} // namespace frontend
} // namespace ov

View File

@ -0,0 +1,47 @@
#include "../node_context.hpp"
#include "../utils.hpp"
#include "openvino/op/add.hpp"
#include "openvino/op/constant.hpp"
#include "openvino/op/divide.hpp"
#include "openvino/op/multiply.hpp"
#include "openvino/op/reduce_sum.hpp"
#include "openvino/op/sqrt.hpp"
namespace ov {
namespace frontend {
namespace ggml {
namespace op {
OutputVector translate_rms_norm(const NodeContext& context) {
num_inputs_check(context, 1, 1);
ov::Shape input_shape = context.get_input_shape(0).to_shape();
auto input_node = context.get_input(0);
auto square = std::make_shared<ov::op::v1::Multiply>(input_node, input_node);
auto reduce_sum =
std::make_shared<ov::op::v1::ReduceSum>(square,
ov::op::v0::Constant::create(ov::element::i64, ov::Shape{1}, {2}),
true);
auto mean = std::make_shared<ov::op::v1::Divide>(
reduce_sum,
ov::op::v0::Constant::create(ov::element::f32, ov::Shape{}, {static_cast<float>(input_shape[2])}));
float eps;
memcpy(&eps, context.get_output_op_params(0), sizeof(float));
auto rms = std::make_shared<ov::op::v0::Sqrt>(
std::make_shared<ov::op::v1::Add>(mean, ov::op::v0::Constant::create(ov::element::f32, ov::Shape{}, {eps})));
auto scale =
std::make_shared<ov::op::v1::Divide>(ov::op::v0::Constant::create(ov::element::f32, ov::Shape{}, {1.0f}), rms);
auto res = std::make_shared<ov::op::v1::Multiply>(input_node, scale);
return {res};
};
} // namespace op
} // namespace ggml
} // namespace frontend
} // namespace ov

View File

@ -0,0 +1,171 @@
#include <cstdint>
#include <memory>
#include <vector>
#include "../node_context.hpp"
#include "../utils.hpp"
#include "openvino/core/node.hpp"
#include "openvino/core/node_output.hpp"
#include "openvino/op/add.hpp"
#include "openvino/op/broadcast.hpp"
#include "openvino/op/concat.hpp"
#include "openvino/op/constant.hpp"
#include "openvino/op/convert.hpp"
#include "openvino/op/cos.hpp"
#include "openvino/op/divide.hpp"
#include "openvino/op/multiply.hpp"
#include "openvino/op/reshape.hpp"
#include "openvino/op/shape_of.hpp"
#include "openvino/op/sin.hpp"
#include "openvino/op/slice.hpp"
#include "openvino/op/split.hpp"
#include "openvino/op/subtract.hpp"
#include "openvino/op/transpose.hpp"
#define GGML_ROPE_TYPE_NEOX 2
#define MIN(a, b) ((a) < (b) ? (a) : (b))
#define MAX(a, b) ((a) > (b) ? (a) : (b))
namespace ov {
namespace frontend {
namespace ggml {
namespace op {
static float ggml_rope_yarn_corr_dim(int n_dims, int n_ctx_orig, float n_rot, float base) {
return n_dims * logf(n_ctx_orig / (n_rot * 2 * (float)M_PI)) / (2 * logf(base));
}
void ggml_rope_yarn_corr_dims(int n_dims,
int n_ctx_orig,
float freq_base,
float beta_fast,
float beta_slow,
float dims[2]) {
float start = floorf(ggml_rope_yarn_corr_dim(n_dims, n_ctx_orig, beta_fast, freq_base));
float end = ceilf(ggml_rope_yarn_corr_dim(n_dims, n_ctx_orig, beta_slow, freq_base));
dims[0] = MAX(0, start);
dims[1] = MIN(n_dims - 1, end);
}
OutputVector translate_rope(const NodeContext& context) {
num_inputs_check(context, 2, 3);
auto data_node = context.get_input(0);
auto pos_node = context.get_input(1);
pos_node = std::make_shared<ov::op::v0::Convert>(pos_node, ov::element::f32);
auto permutation_node =
std::make_shared<ov::op::v0::Constant>(ov::element::i64, ov::Shape{3}, std::vector<int64_t>{2, 1, 0});
Output<Node> pos_node_reshaped = std::make_shared<ov::op::v1::Transpose>(pos_node, permutation_node);
auto output_shape = context.get_output_shape(0);
float freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow;
int32_t* op_params = context.get_output_op_params(0);
const int n_dims = op_params[1];
const int mode = op_params[2];
const int n_ctx_orig = op_params[4];
memcpy(&freq_base, op_params + 5, sizeof(float));
memcpy(&freq_scale, op_params + 6, sizeof(float));
memcpy(&ext_factor, op_params + 7, sizeof(float));
memcpy(&attn_factor, op_params + 8, sizeof(float));
memcpy(&beta_fast, op_params + 9, sizeof(float));
memcpy(&beta_slow, op_params + 10, sizeof(float));
const float theta_scale = powf(freq_base, -2.0f / n_dims);
// TODO: corr_dims is not used in the current implementation
float corr_dims[2];
ggml_rope_yarn_corr_dims(n_dims, n_ctx_orig, freq_base, beta_fast, beta_slow, corr_dims);
const bool is_neox = mode & GGML_ROPE_TYPE_NEOX;
// TODO: GGML_OP_ROPE_BACK -> false
bool forward = true;
const float sin_sign = forward ? 1.0f : -1.0f;
const int64_t ne0 = output_shape[2].get_length();
std::vector<float> factor(ne0 / 2);
factor[0] = freq_scale;
for (int64_t i = 1; i < ne0 / 2; i++) {
factor[i] = theta_scale * factor[i - 1];
}
Output<Node> factor_node =
std::make_shared<ov::op::v0::Constant>(ov::element::f32, ov::Shape{factor.size()}, factor);
if (context.get_input_size() == 3) {
auto freq_factors_node = context.get_input(2);
factor_node = std::make_shared<ov::op::v1::Divide>(factor_node, freq_factors_node);
}
auto half_last_dim = ov::op::v0::Constant::create(ov::element::i64, Shape{1}, {output_shape[2].get_length() / 2});
Output<Node> input_shape_node = std::make_shared<ov::op::v0::Concat>(
OutputVector{get_dimensions(data_node.get_node_shared_ptr(), {0, 1}), half_last_dim},
0);
Output<Node> factor_broadcasted_node = std::make_shared<ov::op::v3::Broadcast>(factor_node, input_shape_node);
Output<Node> cos_factor_broadcasted_node = std::make_shared<ov::op::v0::Cos>(
std::make_shared<ov::op::v1::Multiply>(factor_broadcasted_node, pos_node_reshaped));
Output<Node> sin_factor_broadcasted_node = std::make_shared<ov::op::v0::Sin>(
std::make_shared<ov::op::v1::Multiply>(factor_broadcasted_node, pos_node_reshaped));
float mscale = attn_factor;
Output<Node> mscale_node =
std::make_shared<ov::op::v0::Constant>(ov::element::f32, ov::Shape{}, std::vector<float>{mscale});
Output<Node> mscale_sin_sign_node =
std::make_shared<ov::op::v0::Constant>(ov::element::f32, ov::Shape{}, std::vector<float>{mscale * sin_sign});
Output<Node> cos_theta_node = std::make_shared<ov::op::v1::Multiply>(cos_factor_broadcasted_node, mscale_node);
Output<Node> sin_theta_node = std::make_shared<ov::op::v1::Multiply>(sin_factor_broadcasted_node, mscale_node);
if (!is_neox) {
auto input_shape = context.get_input_shape(0);
auto begin_even = ov::op::v0::Constant::create(ov::element::i64, Shape{3}, {0, 0, 0});
auto begin_odd = ov::op::v0::Constant::create(ov::element::i64, Shape{3}, {0, 0, 1});
auto end = std::make_shared<ov::op::v0::ShapeOf>(data_node);
auto stride = ov::op::v0::Constant::create(ov::element::i64, Shape{3}, {1, 1, 2});
auto even_slice = std::make_shared<ov::op::v8::Slice>(data_node, begin_even, end, stride);
auto odd_slice = std::make_shared<ov::op::v8::Slice>(data_node, begin_odd, end, stride);
auto first_half =
std::make_shared<ov::op::v1::Subtract>(std::make_shared<ov::op::v1::Multiply>(even_slice, cos_theta_node),
std::make_shared<ov::op::v1::Multiply>(odd_slice, sin_theta_node));
auto second_half =
std::make_shared<ov::op::v1::Add>(std::make_shared<ov::op::v1::Multiply>(even_slice, sin_theta_node),
std::make_shared<ov::op::v1::Multiply>(odd_slice, cos_theta_node));
auto stack = std::make_shared<ov::op::v0::Concat>(OutputVector{first_half, second_half}, 2);
auto shape_const = ov::op::v0::Constant::create(
ov::element::i64,
Shape{3},
std::vector<int64_t>{-1, input_shape[1].get_length(), input_shape[2].get_length()});
auto reshaped = std::make_shared<ov::op::v1::Reshape>(stack, shape_const, false);
return {reshaped};
} else {
auto slice_node =
std::make_shared<ov::op::v1::Split>(data_node,
ov::op::v0::Constant::create(ov::element::i64, ov::Shape{}, {2}),
2);
Output<Node> slice_data_node_0 = slice_node->outputs()[0];
Output<Node> slice_data_node_1 = slice_node->outputs()[1];
auto first_half_node = std::make_shared<ov::op::v1::Subtract>(
std::make_shared<ov::op::v1::Multiply>(slice_data_node_0, cos_theta_node),
std::make_shared<ov::op::v1::Multiply>(slice_data_node_1, sin_theta_node));
auto second_half_node = std::make_shared<ov::op::v1::Add>(
std::make_shared<ov::op::v1::Multiply>(slice_data_node_0, sin_theta_node),
std::make_shared<ov::op::v1::Multiply>(slice_data_node_1, cos_theta_node));
auto res_node = std::make_shared<ov::op::v0::Concat>(ov::OutputVector{first_half_node, second_half_node}, 2);
return {res_node};
}
};
} // namespace op
} // namespace ggml
} // namespace frontend
} // namespace ov

View File

@ -0,0 +1,31 @@
#include <cstdint>
#include <vector>
#include "../node_context.hpp"
#include "../utils.hpp"
#include "openvino/core/node.hpp"
#include "openvino/core/node_output.hpp"
#include "openvino/op/constant.hpp"
#include "openvino/op/multiply.hpp"
namespace ov {
namespace frontend {
namespace ggml {
namespace op {
OutputVector translate_scale(const NodeContext& context) {
num_inputs_check(context, 1, 1);
float scale;
memcpy(&scale, context.get_output_op_params(0), sizeof(float));
auto scale_node = std::make_shared<ov::op::v0::Constant>(ov::element::f32, ov::Shape{}, std::vector<float>{scale});
auto res = std::make_shared<ov::op::v1::Multiply>(context.get_input(0), scale_node);
return {res};
};
} // namespace op
} // namespace ggml
} // namespace frontend
} // namespace ov

View File

@ -0,0 +1,88 @@
#include <cstdint>
#include <memory>
#include <vector>
#include "../node_context.hpp"
#include "../utils.hpp"
#include "openvino/core/node.hpp"
#include "openvino/core/node_output.hpp"
#include "openvino/op/add.hpp"
#include "openvino/op/concat.hpp"
#include "openvino/op/constant.hpp"
#include "openvino/op/convert.hpp"
#include "openvino/op/multiply.hpp"
#include "openvino/op/slice.hpp"
#include "openvino/op/softmax.hpp"
namespace ov {
namespace frontend {
namespace ggml {
namespace op {
OutputVector translate_soft_max(const NodeContext& context) {
num_inputs_check(context, 1, 2);
auto input_node = context.get_input(0);
float scale = 1.0f;
float max_bias = 0.0f;
auto op_params = context.get_output_op_params(0);
memcpy(&scale, (float*)op_params + 0, sizeof(float));
memcpy(&max_bias, (float*)op_params + 1, sizeof(float));
const uint32_t n_head = context.get_input_shape(0)[0].get_length();
const uint32_t n_head_log2 = 1u << (uint32_t)floor(log2(n_head));
// const float m0 = powf(2.0f, -(max_bias ) / n_head_log2);
// const float m1 = powf(2.0f, -(max_bias / 2.0f) / n_head_log2);
const float slope = (max_bias > 0.0f) ? 1.0f : 1.0f;
// const float slope = (max_bias > 0.0f) ? h < n_head_log2 ? powf(m0, h + 1) : powf(m1, 2*(h - n_head_log2) + 1)
// : 1.0f;
if (scale != 1.0f) {
auto scale_node =
std::make_shared<ov::op::v0::Constant>(ov::element::f32, ov::Shape{}, std::vector<float>{scale});
input_node = std::make_shared<ov::op::v1::Multiply>(input_node, scale_node);
}
if (context.get_input_size() == 2) {
// Calculate mask then softmax
auto mask_node = context.get_input(1);
ov::element::Type mask_type = (context.get_input_type(1)).as<ov::element::Type>();
if (mask_type == ov::element::f16) {
// Convert f16 to f32
mask_node = std::make_shared<ov::op::v0::Convert>(mask_node, ov::element::f32);
}
// Stride slice mask node
Output<Node> mask_begin_node = ov::op::v0::Constant::create(ov::element::i64, Shape{3}, {0, 0, 0});
auto one = ov::op::v0::Constant::create(ov::element::i64, ov::Shape{1}, {1});
auto input_last_two_dim = get_dimensions(input_node.get_node_shared_ptr(), {1, 2});
auto mask_slice_shape = std::make_shared<ov::op::v0::Concat>(ov::NodeVector{one, input_last_two_dim}, 0);
Output<Node> mask_stride_node = ov::op::v0::Constant::create(ov::element::i64, Shape{3}, {1, 1, 1});
auto mask_node_sliced =
std::make_shared<ov::op::v8::Slice>(mask_node, mask_begin_node, mask_slice_shape, mask_stride_node);
// slope * mask
auto slope_node =
std::make_shared<ov::op::v0::Constant>(ov::element::f32, ov::Shape{}, std::vector<float>{slope});
auto slope_mask_node = std::make_shared<ov::op::v1::Multiply>(mask_node_sliced, slope_node);
// input + slope * mask
auto input_slope_mask_node = std::make_shared<ov::op::v1::Add>(input_node, slope_mask_node);
// Calculate softmax
auto res = std::make_shared<ov::op::v8::Softmax>(input_slope_mask_node, 2);
return {res};
} else {
// Directly softmax
auto res = std::make_shared<ov::op::v8::Softmax>(input_node, 0);
return {res};
}
};
} // namespace op
} // namespace ggml
} // namespace frontend
} // namespace ov

View File

@ -0,0 +1,23 @@
#include "openvino/op/transpose.hpp"
#include "../node_context.hpp"
#include "../utils.hpp"
namespace ov {
namespace frontend {
namespace ggml {
namespace op {
OutputVector translate_transpose(const NodeContext& context) {
num_inputs_check(context, 1, 1);
auto perm = argsort_descend(context.get_output_stride(0));
auto res = std::make_shared<ov::op::v1::Transpose>(context.get_input(0),
ov::op::v0::Constant::create(ov::element::i64, {3}, perm));
return {res};
};
} // namespace op
} // namespace ggml
} // namespace frontend
} // namespace ov

View File

@ -0,0 +1,24 @@
#include <cstdint>
#include <vector>
#include "../node_context.hpp"
#include "../utils.hpp"
#include "openvino/core/node.hpp"
#include "openvino/core/node_output.hpp"
namespace ov {
namespace frontend {
namespace ggml {
namespace op {
OutputVector translate_unary(const NodeContext& context) {
num_inputs_check(context, 1, 1);
return {context.get_input(0)};
};
} // namespace op
} // namespace ggml
} // namespace frontend
} // namespace ov

View File

@ -0,0 +1,29 @@
#include <cstdint>
#include <vector>
#include "../node_context.hpp"
#include "../utils.hpp"
#include "openvino/core/node.hpp"
#include "openvino/core/node_output.hpp"
#include "openvino/op/multiply.hpp"
#include "openvino/op/sigmoid.hpp"
namespace ov {
namespace frontend {
namespace ggml {
namespace op {
OutputVector translate_unary_silu(const NodeContext& context) {
num_inputs_check(context, 1, 1);
auto input = context.get_input(0);
auto sigmoid = std::make_shared<ov::op::v0::Sigmoid>(input);
auto res = std::make_shared<ov::op::v1::Multiply>(input, sigmoid);
return {res};
};
} // namespace op
} // namespace ggml
} // namespace frontend
} // namespace ov

View File

@ -0,0 +1,26 @@
#include <cstdint>
#include <vector>
#include "../utils.hpp"
#include "openvino/core/node.hpp"
#include "openvino/core/node_output.hpp"
#include "openvino/op/constant.hpp"
#include "openvino/op/convert.hpp"
#include "openvino/op/reshape.hpp"
#include "openvino/op/strided_slice.hpp"
namespace ov {
namespace frontend {
namespace ggml {
namespace op {
OutputVector translate_view(const NodeContext& context) {
num_inputs_check(context, 1, 1);
return {context.get_input(0)};
};
} // namespace op
} // namespace ggml
} // namespace frontend
} // namespace ov

View File

@ -0,0 +1,64 @@
#include "op_table.hpp"
#include <openvino/op/add.hpp>
#include <openvino/op/divide.hpp>
#include <openvino/op/gather.hpp>
#include <openvino/op/matmul.hpp>
#include <openvino/op/multiply.hpp>
#include <openvino/op/subtract.hpp>
#include "utils.hpp"
using namespace ov::op;
namespace ov {
namespace frontend {
namespace ggml {
namespace op {
#define GGML_OP_CONVERTER(op) OutputVector op(const NodeContext& node)
GGML_OP_CONVERTER(translate_add);
GGML_OP_CONVERTER(translate_cont);
GGML_OP_CONVERTER(translate_cpy);
GGML_OP_CONVERTER(translate_get_rows);
GGML_OP_CONVERTER(translate_mul);
GGML_OP_CONVERTER(translate_mulmat);
GGML_OP_CONVERTER(translate_permute);
GGML_OP_CONVERTER(translate_reshape);
GGML_OP_CONVERTER(translate_rms_norm);
GGML_OP_CONVERTER(translate_rope);
GGML_OP_CONVERTER(translate_scale);
GGML_OP_CONVERTER(translate_unary_silu);
GGML_OP_CONVERTER(translate_soft_max);
GGML_OP_CONVERTER(translate_transpose);
GGML_OP_CONVERTER(translate_unary);
GGML_OP_CONVERTER(translate_view);
} // namespace op
const std::unordered_map<std::string, CreatorFunction> get_supported_ops() {
return {{"GGML_OP_ADD", op::translate_1to1_match_2_inputs<v1::Add>},
{"GGML_OP_ADD1", op::translate_1to1_match_2_inputs<v1::Add>},
{"GGML_OP_CONT", op::translate_cont},
{"GGML_OP_CPY", op::translate_cpy},
{"GGML_OP_DIV", op::translate_1to1_match_2_inputs<v1::Divide>},
{"GGML_OP_GET_ROWS", op::translate_get_rows},
// {"GGML_OP_MUL", op::translate_1to1_match_2_inputs<v1::Multiply>},
{"GGML_OP_MUL", op::translate_mul},
{"GGML_OP_MUL_MAT", op::translate_mulmat},
{"GGML_OP_PERMUTE", op::translate_permute},
{"GGML_OP_RESHAPE", op::translate_reshape},
{"GGML_OP_RMS_NORM", op::translate_rms_norm},
{"GGML_OP_ROPE", op::translate_rope},
{"GGML_OP_SCALE", op::translate_scale},
{"GGML_OP_SOFT_MAX", op::translate_soft_max},
{"GGML_OP_SUB", op::translate_1to1_match_2_inputs<v1::Subtract>},
{"GGML_OP_TRANSPOSE", op::translate_transpose},
{"GGML_UNARY_OP_SILU", op::translate_unary_silu},
{"GGML_OP_VIEW", op::translate_view}};
};
} // namespace ggml
} // namespace frontend
} // namespace ov

View File

@ -0,0 +1,13 @@
#pragma once
#include "node_context.hpp"
namespace ov {
namespace frontend {
namespace ggml {
const std::unordered_map<std::string, CreatorFunction> get_supported_ops();
} // namespace ggml
} // namespace frontend
} // namespace ov

View File

@ -0,0 +1,145 @@
#include "translate_session.hpp"
#include <exception>
#include <fstream>
#include "input_model.hpp"
namespace ov {
namespace frontend {
namespace ggml {
using namespace ov::op;
TranslateSession::TranslateSession(const frontend::InputModel::Ptr& input_model,
const std::unordered_map<std::string, CreatorFunction>& translator_map)
: m_input_model(input_model),
m_translator_map(translator_map),
m_ov_model(nullptr) {}
std::shared_ptr<Model> TranslateSession::get_converted_model() {
if (m_ov_model) {
return m_ov_model;
}
m_ov_model = translate_graph(m_input_model);
// print_model_topology();
return m_ov_model;
}
void TranslateSession::print_model_topology() {
try {
std::ofstream outfile("model_topology.txt", std::ios::out | std::ios::app);
if (!outfile.is_open()) {
throw std::runtime_error("Failed to open file for writing model topology.");
}
outfile << "============ Model ============" << std::endl;
for (const auto& op : m_ov_model->get_ordered_ops()) {
outfile << "Operation: " << op->get_friendly_name() << std::endl;
outfile << " Inputs:" << std::endl;
for (const auto& input : op->inputs()) {
outfile << " " << input.get_node()->get_friendly_name() << " -> " << input.get_element_type() << " "
<< input.get_shape() << std::endl;
}
outfile << " Outputs:" << std::endl;
for (const auto& output : op->outputs()) {
outfile << " " << output.get_node()->get_friendly_name() << " -> " << output.get_element_type()
<< " " << output.get_shape() << std::endl;
}
outfile << std::endl;
}
outfile << "===============================" << std::endl;
outfile.close();
} catch (const std::exception& ex) {
std::cout << ex.what() << std::endl;
}
}
std::shared_ptr<Model> TranslateSession::translate_graph(const frontend::InputModel::Ptr& input_model) {
ov::ParameterVector params;
ov::ResultVector results;
auto tensor_map = std::make_shared<TensorMap>();
std::shared_ptr<Model> resulting_model;
const auto& ggml_model = std::dynamic_pointer_cast<InputModel>(input_model);
std::shared_ptr<GgmlDecoder> ggml_model_decoder = ggml_model->get_model_decoder();
FRONT_END_GENERAL_CHECK(ggml_model, "nullptr for InputModel is given for translation into OV Model");
const auto& model_inputs = ggml_model->get_inputs();
const auto& model_outputs = ggml_model->get_outputs();
for (const auto& it : ggml_model_decoder->get_model_inputs()) {
params.push_back(std::dynamic_pointer_cast<ov::op::v0::Parameter>(it.second));
(*tensor_map)[it.first] = it.second;
}
for (const auto& it : ggml_model_decoder->get_model_extra_inputs()) {
params.push_back(std::dynamic_pointer_cast<ov::op::v0::Parameter>(it.second));
(*tensor_map)[it.first] = it.second;
}
for (const auto& it : ggml_model_decoder->get_model_weights()) {
(*tensor_map)[it.first] = it.second;
}
auto node_visitor = [&](std::shared_ptr<GgmlDecoder> node) {
auto operation_type = node->get_op_type();
ov::OutputVector converted_outputs;
auto it = m_translator_map.find(operation_type);
if (it != m_translator_map.end()) {
try {
NodeContext node_context(node, tensor_map, this);
converted_outputs = it->second(node_context);
} catch (const std::exception& ex) {
std::cout << ex.what() << std::endl;
}
} else {
// TODO
}
const auto& node_output_names = node->get_output_names();
FRONT_END_OP_CONVERSION_CHECK(node_output_names.size() == converted_outputs.size(),
"Number of ",
operation_type,
" outputs greater than number of converted outputs, which are ",
node_output_names.size(),
" and ",
converted_outputs.size(),
" respectively.");
for (size_t i = 0; i < node_output_names.size(); ++i) {
auto output_name = node_output_names[i];
if (i < converted_outputs.size() && converted_outputs[i].get_node_shared_ptr() != nullptr) {
(*tensor_map)[output_name] = converted_outputs[i];
}
}
};
ggml_model_decoder->visit_subgraph(node_visitor);
for (const auto& name : ggml_model_decoder->get_model_output_names()) {
FRONT_END_GENERAL_CHECK(tensor_map->find(name) != tensor_map->end(),
"Output name not found in tensor map: ",
name);
auto result = std::make_shared<v0::Result>(tensor_map->at(name));
// result->set_friendly_name(it);
results.push_back(result);
}
ov::ParameterVector used_params;
for (const auto& param : params) {
if (!param->output(0).get_target_inputs().empty()) {
used_params.push_back(param);
}
}
if (auto diff = params.size() - used_params.size()) {
std::cout << diff << " parameters are not used in the model." << std::endl;
}
resulting_model = std::make_shared<Model>(results, used_params);
return resulting_model;
}
} // namespace ggml
} // namespace frontend
} // namespace ov

View File

@ -0,0 +1,27 @@
#pragma once
#include "input_model.hpp"
#include "node_context.hpp"
namespace ov {
namespace frontend {
namespace ggml {
class TranslateSession {
public:
TranslateSession(const frontend::InputModel::Ptr& input_model,
const std::unordered_map<std::string, CreatorFunction>& translator_map);
std::shared_ptr<Model> get_converted_model();
std::shared_ptr<Model> translate_graph(const frontend::InputModel::Ptr& input_model);
private:
void print_model_topology();
const frontend::InputModel::Ptr m_input_model;
const std::unordered_map<std::string, CreatorFunction>& m_translator_map;
std::shared_ptr<Model> m_ov_model;
};
} // namespace ggml
} // namespace frontend
} // namespace ov

View File

@ -0,0 +1,52 @@
#include "utils.hpp"
#include <ctime>
#include <memory>
#include <openvino/op/gather.hpp>
#include <openvino/op/shape_of.hpp>
#include <string>
namespace ov {
namespace frontend {
namespace ggml {
std::string getCurrentTime() {
std::time_t now = std::time(nullptr);
char buf[100];
std::strftime(buf, sizeof(buf), "%Y-%m-%d %H:%M:%S", std::localtime(&now));
return buf;
}
void num_inputs_check(const NodeContext& context, size_t min_inputs, size_t max_inputs) {
auto input_size = context.get_input_size();
FRONT_END_OP_CONVERSION_CHECK(input_size >= min_inputs, "Got less inputs than expected");
FRONT_END_OP_CONVERSION_CHECK(input_size <= max_inputs, "Got more inputs than expected");
}
int non_cont_dim(std::vector<size_t> ne, std::vector<size_t> nb) {
int dim = nb.size() - 1;
size_t bytes = nb[dim];
for (int i = dim; i > 0; i--) {
bytes *= ne[i];
if (bytes != nb[i - 1]) {
return i;
}
}
return 0;
}
std::shared_ptr<ov::Node> get_dimensions(const std::shared_ptr<ov::op::v3::ShapeOf>& shape,
const std::vector<int>& dims) {
using namespace ov::op;
const auto zero = v0::Constant::create(ov::element::i32, ov::Shape{}, {0});
const auto dims_const = v0::Constant::create(ov::element::i32, ov::Shape{dims.size()}, dims);
return std::make_shared<v8::Gather>(shape, dims_const, zero);
}
std::shared_ptr<ov::Node> get_dimensions(const std::shared_ptr<ov::Node>& node, const std::vector<int>& dims) {
return get_dimensions(std::make_shared<ov::op::v3::ShapeOf>(node), dims);
}
} // namespace ggml
} // namespace frontend
} // namespace ov

View File

@ -0,0 +1,68 @@
#pragma once
#include <openvino/op/shape_of.hpp>
#include "node_context.hpp"
namespace ov {
namespace frontend {
namespace ggml {
void dump_ov_model(const std::shared_ptr<ov::Model> model);
void num_inputs_check(const NodeContext& context, size_t min_inputs, size_t max_inputs);
int non_cont_dim(std::vector<size_t> ne, std::vector<size_t> nb);
template <typename T>
std::vector<int> argsort_descend(const std::vector<T>& v) {
std::vector<int> idx(v.size());
std::iota(idx.begin(), idx.end(), 0);
std::sort(idx.begin(), idx.end(), [&v](int i1, int i2) {
return v[i1] > v[i2];
});
return idx;
}
template <typename T>
std::vector<T> sorted_descend(std::vector<T> v) {
std::sort(v.begin(), v.end(), [](T a, T b) {
return a > b;
});
return v;
}
template <typename T>
bool is_permuted(const std::vector<T>& strides) {
for (size_t i = 0; i < strides.size() - 1; ++i) {
if (strides[i] < strides[i + 1]) {
return true;
}
}
return false;
}
template <typename T>
std::vector<T> permute(const std::vector<T>& x, const std::vector<int>& perm) {
std::vector<T> result;
result.reserve(perm.size());
for (int i : perm) {
result.push_back(x[i]);
}
return result;
}
std::shared_ptr<ov::Node> get_dimensions(const std::shared_ptr<op::v3::ShapeOf>& shape, const std::vector<int>& dims);
std::shared_ptr<ov::Node> get_dimensions(const std::shared_ptr<ov::Node>& node, const std::vector<int>& dims);
namespace op {
template <typename T>
OutputVector translate_1to1_match_2_inputs(const NodeContext& context) {
num_inputs_check(context, 2, 2);
return {std::make_shared<T>(context.get_input(0), context.get_input(1))};
}
} // namespace op
} // namespace ggml
} // namespace frontend
} // namespace ov

View File

@ -14,6 +14,8 @@
#include "ggml-impl.h"
#include "ggml.h"
#include "openvino/frontend.hpp"
#include "openvino/input_model.hpp"
std::shared_ptr<GgmlOvDecoder> get_ggml_decoder(struct ggml_cgraph* cgraph) {
return std::make_shared<GgmlOvDecoder>(nullptr, cgraph);
@ -56,11 +58,11 @@ enum ggml_status openvino_frontend_compute(ggml_backend_t backend, struct ggml_c
}
// auto devices = core.get_available_devices();
static auto front_end = get_ggml_frontend();
if (!front_end) {
GGML_LOG_ERROR("GGML FrontEnd is not initialized \n");
return GGML_STATUS_FAILED;
}
// static auto front_end = get_ggml_frontend();
// if (!front_end) {
// GGML_LOG_ERROR("GGML FrontEnd is not initialized \n");
// return GGML_STATUS_FAILED;
// }
using CachedItem = std::pair<std::shared_ptr<ov::Model>, ov::CompiledModel>;
static std::unordered_map<struct ggml_cgraph*, CachedItem> compiled_cache;
@ -79,14 +81,18 @@ enum ggml_status openvino_frontend_compute(ggml_backend_t backend, struct ggml_c
compiled_model = it->second.second;
compile_end_time = ggml_time_us();
} else {
std::shared_ptr<ov::frontend::DecoderBase> graph_decoder = ggml_decoder;
ov::frontend::InputModel::Ptr input_model = front_end->load(graph_decoder);
if (!input_model) {
GGML_LOG_ERROR("Input Model is not loaded \n");
return GGML_STATUS_FAILED;
}
// std::shared_ptr<ov::frontend::DecoderBase> graph_decoder = ggml_decoder;
// ov::frontend::InputModel::Ptr input_model = front_end->load(graph_decoder);
// if (!input_model) {
// GGML_LOG_ERROR("Input Model is not loaded \n");
// return GGML_STATUS_FAILED;
// }
// model = front_end->convert(input_model);
ov::frontend::InputModel::Ptr input_model = std::make_shared<ov::frontend::ggml::InputModel>(ggml_decoder);
model = ov::frontend::ggml::FrontEnd::convert(input_model);
model = front_end->convert(input_model);
conversion_end_time = ggml_time_us();
if (getenv("GGML_OPENVINO_DUMP_IR")) {