FEAT: Add all conversion code from ov side
This commit is contained in:
parent
f15a2cc057
commit
0d009fe61a
|
|
@ -692,7 +692,11 @@ To read documentation for how to build on IBM Z & LinuxONE, [click here](./build
|
|||
git submodule update --init --recursive
|
||||
|
||||
export OPENVINO_LLAMA_PATH=$(pwd)
|
||||
```
|
||||
|
||||
Before building, change "ENABLE_OV_GGML_FRONTEND" from true to false in the CMakePresets.json file since we already have the code from the ov side in this branch of llama.cpp (`full_backend`). You could also build the master branch of ov instead.
|
||||
|
||||
```
|
||||
cmake --preset Release
|
||||
cmake --build build/Release
|
||||
```
|
||||
|
|
@ -700,7 +704,7 @@ To read documentation for how to build on IBM Z & LinuxONE, [click here](./build
|
|||
### Build llama.cpp-ov
|
||||
|
||||
```bash
|
||||
git clone https://github.com/intel-sandbox/llama.cpp-ov.git -b dev_backend_openvino
|
||||
git clone https://github.com/intel-sandbox/llama.cpp-ov.git -b full_backend
|
||||
cd llama.cpp-ov
|
||||
|
||||
cmake --preset ReleaseOV
|
||||
|
|
|
|||
|
|
@ -5,8 +5,8 @@
|
|||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "decoder.h"
|
||||
#include "ggml.h"
|
||||
#include "openvino/decoder.hpp"
|
||||
|
||||
class GgmlOvDecoder : public ov::frontend::ggml::GgmlDecoder {
|
||||
public:
|
||||
|
|
|
|||
|
|
@ -8,7 +8,6 @@ namespace ov {
|
|||
namespace frontend {
|
||||
namespace ggml {
|
||||
|
||||
// TODO: Directly include from openvino
|
||||
class GgmlDecoder : public DecoderBase {
|
||||
public:
|
||||
virtual ov::Any get_attribute(const std::string& name) const = 0;
|
||||
|
|
@ -0,0 +1,27 @@
|
|||
#include "frontend.hpp"
|
||||
|
||||
#include "input_model.hpp"
|
||||
#include "op_table.hpp"
|
||||
#include "translate_session.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace frontend {
|
||||
namespace ggml {
|
||||
|
||||
FrontEnd::FrontEnd() {}
|
||||
|
||||
std::shared_ptr<Model> FrontEnd::convert(const InputModel::Ptr& model) {
|
||||
auto ggml_model = std::dynamic_pointer_cast<ggml::InputModel>(model);
|
||||
FRONT_END_GENERAL_CHECK(ggml_model, "Invalid input model");
|
||||
std::shared_ptr<Model> converted_model;
|
||||
const auto& supported_ops = get_supported_ops();
|
||||
{
|
||||
TranslateSession translate_session(model, supported_ops);
|
||||
converted_model = translate_session.get_converted_model();
|
||||
}
|
||||
return converted_model;
|
||||
}
|
||||
|
||||
} // namespace ggml
|
||||
} // namespace frontend
|
||||
} // namespace ov
|
||||
|
|
@ -0,0 +1,23 @@
|
|||
// Copyright (C) 2018-2024 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <openvino/frontend/frontend.hpp>
|
||||
|
||||
namespace ov {
|
||||
namespace frontend {
|
||||
namespace ggml {
|
||||
|
||||
class FrontEnd {
|
||||
public:
|
||||
using Ptr = std::shared_ptr<FrontEnd>;
|
||||
FrontEnd();
|
||||
|
||||
static std::shared_ptr<Model> convert(const InputModel::Ptr& model);
|
||||
};
|
||||
|
||||
} // namespace ggml
|
||||
} // namespace frontend
|
||||
} // namespace ov
|
||||
|
|
@ -0,0 +1,17 @@
|
|||
#include "input_model.hpp"
|
||||
|
||||
#include "decoder.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace frontend {
|
||||
namespace ggml {
|
||||
|
||||
InputModel::InputModel(const std::shared_ptr<GgmlDecoder>& gdecoder) : m_decoder(gdecoder) {}
|
||||
|
||||
const std::shared_ptr<GgmlDecoder>& InputModel::get_model_decoder() const {
|
||||
return m_decoder;
|
||||
}
|
||||
|
||||
} // namespace ggml
|
||||
} // namespace frontend
|
||||
} // namespace ov
|
||||
|
|
@ -0,0 +1,29 @@
|
|||
#pragma once
|
||||
|
||||
#include <openvino/frontend/input_model.hpp>
|
||||
|
||||
#include "decoder.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace frontend {
|
||||
namespace ggml {
|
||||
|
||||
class FrontEnd;
|
||||
class GgmlDecoder;
|
||||
using ov::frontend::ggml::GgmlDecoder;
|
||||
|
||||
class InputModel : public ov::frontend::InputModel {
|
||||
friend class ::ov::frontend::ggml::FrontEnd;
|
||||
|
||||
public:
|
||||
explicit InputModel(const std::shared_ptr<GgmlDecoder>& gdecoder);
|
||||
|
||||
const std::shared_ptr<GgmlDecoder>& get_model_decoder() const;
|
||||
|
||||
private:
|
||||
std::shared_ptr<GgmlDecoder> m_decoder;
|
||||
};
|
||||
|
||||
} // namespace ggml
|
||||
} // namespace frontend
|
||||
} // namespace ov
|
||||
|
|
@ -0,0 +1,100 @@
|
|||
#pragma once
|
||||
|
||||
#include <openvino/frontend/node_context.hpp>
|
||||
|
||||
#include "decoder.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace frontend {
|
||||
namespace ggml {
|
||||
|
||||
class TranslateSession;
|
||||
|
||||
typedef std::map<std::string, Output<Node>> TensorMap;
|
||||
|
||||
class NodeContext : public frontend::NodeContext {
|
||||
public:
|
||||
NodeContext(const std::shared_ptr<GgmlDecoder>& decoder,
|
||||
std::shared_ptr<TensorMap>& tensor_map,
|
||||
TranslateSession* translate_session = nullptr)
|
||||
: ov::frontend::NodeContext(decoder->get_op_type()),
|
||||
m_decoder(decoder),
|
||||
m_tensor_map(tensor_map),
|
||||
m_translate_session(translate_session) {
|
||||
m_input_names = decoder->get_input_names();
|
||||
m_output_names = decoder->get_output_names();
|
||||
}
|
||||
|
||||
TranslateSession* get_translate_session() const {
|
||||
return m_translate_session;
|
||||
}
|
||||
|
||||
size_t get_input_size() const override {
|
||||
return m_decoder->get_input_size();
|
||||
}
|
||||
|
||||
Any get_input_type(size_t index) const {
|
||||
return m_decoder->get_input_type(m_input_names[index]);
|
||||
}
|
||||
|
||||
PartialShape get_input_shape(size_t index) const {
|
||||
return m_decoder->get_input_shape(m_input_names[index]);
|
||||
}
|
||||
|
||||
std::vector<size_t> get_input_stride(size_t index) const {
|
||||
return m_decoder->get_input_stride(m_input_names[index]);
|
||||
}
|
||||
|
||||
PartialShape get_output_shape(size_t index) const {
|
||||
return m_decoder->get_output_shape(m_output_names[index]);
|
||||
}
|
||||
|
||||
std::vector<size_t> get_output_stride(size_t index) const {
|
||||
return m_decoder->get_output_stride(m_output_names[index]);
|
||||
}
|
||||
|
||||
int32_t* get_input_op_params(size_t index) const {
|
||||
return m_decoder->get_input_op_params(m_input_names[index]);
|
||||
}
|
||||
|
||||
int32_t* get_output_op_params(size_t index) const {
|
||||
return m_decoder->get_output_op_params(m_output_names[index]);
|
||||
}
|
||||
|
||||
ov::element::Type get_output_type(size_t index) const {
|
||||
return m_decoder->get_output_type(m_output_names[index]);
|
||||
}
|
||||
|
||||
Output<Node> get_input(int idx) const override {
|
||||
return m_tensor_map->at(m_decoder->get_input_name(idx));
|
||||
}
|
||||
|
||||
Output<Node> get_input(const std::string& name) const override {
|
||||
return m_tensor_map->at(name);
|
||||
}
|
||||
|
||||
const std::string& get_name() const override {
|
||||
return m_decoder->get_op_name();
|
||||
}
|
||||
|
||||
ov::Any get_attribute_as_any(const std::string& name) const override {
|
||||
return m_decoder->get_attribute(name);
|
||||
}
|
||||
|
||||
bool check_if_continuous() const {
|
||||
return m_decoder->check_if_continuous();
|
||||
}
|
||||
|
||||
private:
|
||||
std::shared_ptr<GgmlDecoder> m_decoder;
|
||||
std::shared_ptr<TensorMap>& m_tensor_map;
|
||||
TranslateSession* m_translate_session;
|
||||
std::vector<std::string> m_input_names;
|
||||
std::vector<std::string> m_output_names;
|
||||
};
|
||||
|
||||
using CreatorFunction = std::function<ov::OutputVector(const ov::frontend::ggml::NodeContext&)>;
|
||||
|
||||
} // namespace ggml
|
||||
} // namespace frontend
|
||||
} // namespace ov
|
||||
|
|
@ -0,0 +1,23 @@
|
|||
#include "openvino/op/add.hpp"
|
||||
|
||||
#include "../node_context.hpp"
|
||||
#include "../utils.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace frontend {
|
||||
namespace ggml {
|
||||
namespace op {
|
||||
|
||||
OutputVector translate_add(const NodeContext& context) {
|
||||
num_inputs_check(context, 2, 2);
|
||||
|
||||
auto lhs = context.get_input(0);
|
||||
auto rhs = context.get_input(1);
|
||||
auto add = std::make_shared<ov::op::v1::Add>(lhs, rhs);
|
||||
return {add};
|
||||
};
|
||||
|
||||
} // namespace op
|
||||
} // namespace ggml
|
||||
} // namespace frontend
|
||||
} // namespace ov
|
||||
|
|
@ -0,0 +1,56 @@
|
|||
|
||||
#include <climits>
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "../node_context.hpp"
|
||||
#include "../utils.hpp"
|
||||
#include "openvino/op/reshape.hpp"
|
||||
#include "openvino/op/slice.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace frontend {
|
||||
namespace ggml {
|
||||
namespace op {
|
||||
|
||||
OutputVector translate_cont(const NodeContext& context) {
|
||||
num_inputs_check(context, 1, 1);
|
||||
|
||||
auto src_shape = context.get_input_shape(0).to_shape();
|
||||
auto dst_shape = context.get_output_shape(0).to_shape();
|
||||
|
||||
bool continuous = context.check_if_continuous();
|
||||
if (continuous) {
|
||||
// The input comes from a PERMUTE
|
||||
dst_shape[1] = -1;
|
||||
auto result = std::make_shared<ov::op::v1::Reshape>(
|
||||
context.get_input(0),
|
||||
ov::op::v0::Constant::create(ov::element::i64, {dst_shape.size()}, dst_shape),
|
||||
false);
|
||||
|
||||
return {result};
|
||||
} else {
|
||||
// The input comes from a VIEW
|
||||
// Currently all cases are slicing at lowest dim
|
||||
int32_t* op_params = context.get_input_op_params(0);
|
||||
auto output_stride = context.get_output_stride(0);
|
||||
|
||||
int64_t split_addr = op_params[0] / output_stride[2];
|
||||
std::vector<int64_t> begin = {0, 0, split_addr};
|
||||
std::vector<int64_t> end = {(int64_t)src_shape[0], INT_MAX, split_addr + (int64_t)src_shape[2]};
|
||||
std::vector<int64_t> strides = {1, 1, 1};
|
||||
|
||||
auto begin_const = ov::op::v0::Constant::create(ov::element::i64, {begin.size()}, begin);
|
||||
auto end_const = ov::op::v0::Constant::create(ov::element::i64, {end.size()}, end);
|
||||
auto strides_const = ov::op::v0::Constant::create(ov::element::i64, {strides.size()}, strides);
|
||||
auto slice = std::make_shared<ov::op::v8::Slice>(context.get_input(0), begin_const, end_const, strides_const);
|
||||
|
||||
return {slice};
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace op
|
||||
} // namespace ggml
|
||||
} // namespace frontend
|
||||
} // namespace ov
|
||||
|
|
@ -0,0 +1,106 @@
|
|||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "../node_context.hpp"
|
||||
#include "../utils.hpp"
|
||||
#include "openvino/core/node.hpp"
|
||||
#include "openvino/core/node_output.hpp"
|
||||
#include "openvino/op/add.hpp"
|
||||
#include "openvino/op/constant.hpp"
|
||||
#include "openvino/op/convert_like.hpp"
|
||||
#include "openvino/op/range.hpp"
|
||||
#include "openvino/op/reshape.hpp"
|
||||
#include "openvino/op/scatter_nd_update.hpp"
|
||||
#include "openvino/op/transpose.hpp"
|
||||
#include "openvino/op/unsqueeze.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace frontend {
|
||||
namespace ggml {
|
||||
namespace op {
|
||||
|
||||
OutputVector translate_cpy(const NodeContext& context) {
|
||||
num_inputs_check(context, 2, 2);
|
||||
auto src0 = context.get_input(0);
|
||||
auto src1 = context.get_input(1);
|
||||
auto past_token_len = context.get_input("past_token_len");
|
||||
|
||||
auto src0_shape = context.get_input_shape(0).to_shape();
|
||||
auto output_shape = context.get_output_shape(0).to_shape();
|
||||
bool continuous = context.check_if_continuous();
|
||||
|
||||
std::vector<size_t> input0_strides = context.get_input_stride(0);
|
||||
std::vector<size_t> output_strides = context.get_output_stride(0);
|
||||
|
||||
auto one = ov::op::v0::Constant::create(ov::element::i64, ov::Shape{}, {1});
|
||||
|
||||
src0 = std::make_shared<ov::op::v1::ConvertLike>(src0, src1);
|
||||
if (continuous) {
|
||||
// Write K to cache_k
|
||||
int64_t head_size = src0_shape[2];
|
||||
int64_t num_heads = src0_shape[1];
|
||||
|
||||
auto reshaped_src1_shape =
|
||||
ov::op::v0::Constant::create(ov::element::i64, {3}, std::vector<int64_t>{-1, num_heads, head_size});
|
||||
auto reshaped_src1 = std::make_shared<ov::op::v1::Reshape>(src1, reshaped_src1_shape, false);
|
||||
|
||||
auto token_len = get_dimensions(src0.get_node_shared_ptr(), {0});
|
||||
token_len = std::make_shared<ov::op::v1::Reshape>(token_len,
|
||||
ov::op::v0::Constant::create(ov::element::i64, {0}, {}),
|
||||
false);
|
||||
auto total_token_len = std::make_shared<ov::op::v1::Add>(past_token_len, token_len);
|
||||
std::shared_ptr<ov::Node> indices =
|
||||
std::make_shared<ov::op::v4::Range>(past_token_len, total_token_len, one, ov::element::i64);
|
||||
indices = std::make_shared<ov::op::v0::Unsqueeze>(
|
||||
indices,
|
||||
ov::op::v0::Constant::create(ov::element::i64, {1}, std::vector<int64_t>{1}));
|
||||
|
||||
auto res = std::make_shared<ov::op::v3::ScatterNDUpdate>(reshaped_src1, indices, src0);
|
||||
return {res};
|
||||
} else {
|
||||
// Write V to cache_v
|
||||
int64_t total_head_size = src0_shape[1];
|
||||
|
||||
auto reshaped_src0 = std::make_shared<ov::op::v1::Reshape>(
|
||||
src0,
|
||||
ov::op::v0::Constant::create(ov::element::i64, {2}, std::vector<int64_t>{total_head_size, -1}),
|
||||
false);
|
||||
auto transposed_src0 =
|
||||
std::make_shared<ov::op::v1::Transpose>(reshaped_src0,
|
||||
ov::op::v0::Constant::create(ov::element::i64, {2}, {1, 0}));
|
||||
|
||||
auto reshaped_src1 = std::make_shared<ov::op::v1::Reshape>(
|
||||
src1,
|
||||
ov::op::v0::Constant::create(ov::element::i64, {2}, std::vector<int64_t>{total_head_size, -1}),
|
||||
false);
|
||||
auto transposed_src1 =
|
||||
std::make_shared<ov::op::v1::Transpose>(reshaped_src1,
|
||||
ov::op::v0::Constant::create(ov::element::i64, {2}, {1, 0}));
|
||||
|
||||
auto token_len = get_dimensions(src0.get_node_shared_ptr(), {2});
|
||||
token_len = std::make_shared<ov::op::v1::Reshape>(token_len,
|
||||
ov::op::v0::Constant::create(ov::element::i64, {0}, {}),
|
||||
false);
|
||||
auto total_token_len = std::make_shared<ov::op::v1::Add>(past_token_len, token_len);
|
||||
std::shared_ptr<ov::Node> indices =
|
||||
std::make_shared<ov::op::v4::Range>(past_token_len, total_token_len, one, ov::element::i64);
|
||||
indices = std::make_shared<ov::op::v0::Unsqueeze>(
|
||||
indices,
|
||||
ov::op::v0::Constant::create(ov::element::i64, {1}, std::vector<int64_t>{1}));
|
||||
|
||||
auto res = std::make_shared<ov::op::v3::ScatterNDUpdate>(transposed_src1, indices, transposed_src0);
|
||||
auto transposed_res =
|
||||
std::make_shared<ov::op::v1::Transpose>(res, ov::op::v0::Constant::create(ov::element::i64, {2}, {1, 0}));
|
||||
auto reshaped_res = std::make_shared<ov::op::v1::Reshape>(
|
||||
transposed_res,
|
||||
ov::op::v0::Constant::create(ov::element::i64, {3}, std::vector<int64_t>{1, total_head_size, -1}),
|
||||
false);
|
||||
return {reshaped_res};
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace op
|
||||
} // namespace ggml
|
||||
} // namespace frontend
|
||||
} // namespace ov
|
||||
|
|
@ -0,0 +1,40 @@
|
|||
#include <cstdint>
|
||||
#include <vector>
|
||||
|
||||
#include "../node_context.hpp"
|
||||
#include "../utils.hpp"
|
||||
#include "openvino/core/node.hpp"
|
||||
#include "openvino/core/node_output.hpp"
|
||||
#include "openvino/op/constant.hpp"
|
||||
#include "openvino/op/convert.hpp"
|
||||
#include "openvino/op/gather.hpp"
|
||||
#include "openvino/op/reshape.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace frontend {
|
||||
namespace ggml {
|
||||
namespace op {
|
||||
|
||||
OutputVector translate_get_rows(const NodeContext& context) {
|
||||
num_inputs_check(context, 2, 2);
|
||||
|
||||
auto data_node = context.get_input(0);
|
||||
auto indices_node = context.get_input(1);
|
||||
|
||||
auto indices_shape = get_dimensions(indices_node.get_node_shared_ptr(), {2});
|
||||
Output<Node> indice_reshaped = std::make_shared<ov::op::v1::Reshape>(indices_node, indices_shape, false);
|
||||
|
||||
auto axis_node = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{}, {1});
|
||||
|
||||
Output<Node> res = std::make_shared<ov::op::v8::Gather>(data_node, indice_reshaped, axis_node);
|
||||
if (res.get_element_type() != context.get_output_type(0)) {
|
||||
res = std::make_shared<ov::op::v0::Convert>(res, context.get_output_type(0));
|
||||
}
|
||||
|
||||
return {res};
|
||||
};
|
||||
|
||||
} // namespace op
|
||||
} // namespace ggml
|
||||
} // namespace frontend
|
||||
} // namespace ov
|
||||
|
|
@ -0,0 +1,28 @@
|
|||
#include <cstdint>
|
||||
#include <vector>
|
||||
|
||||
#include "../node_context.hpp"
|
||||
#include "../utils.hpp"
|
||||
#include "openvino/core/node.hpp"
|
||||
#include "openvino/core/node_output.hpp"
|
||||
#include "openvino/op/broadcast.hpp"
|
||||
#include "openvino/op/constant.hpp"
|
||||
#include "openvino/op/multiply.hpp"
|
||||
#include "openvino/op/reshape.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace frontend {
|
||||
namespace ggml {
|
||||
namespace op {
|
||||
|
||||
OutputVector translate_mul(const NodeContext& context) {
|
||||
num_inputs_check(context, 2, 2);
|
||||
|
||||
auto res = std::make_shared<ov::op::v1::Multiply>(context.get_input(0), context.get_input(1));
|
||||
return {res};
|
||||
};
|
||||
|
||||
} // namespace op
|
||||
} // namespace ggml
|
||||
} // namespace frontend
|
||||
} // namespace ov
|
||||
|
|
@ -0,0 +1,127 @@
|
|||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "../node_context.hpp"
|
||||
#include "../utils.hpp"
|
||||
#include "openvino/core/node.hpp"
|
||||
#include "openvino/core/node_output.hpp"
|
||||
#include "openvino/op/concat.hpp"
|
||||
#include "openvino/op/constant.hpp"
|
||||
#include "openvino/op/convert_like.hpp"
|
||||
#include "openvino/op/matmul.hpp"
|
||||
#include "openvino/op/reshape.hpp"
|
||||
#include "openvino/op/slice.hpp"
|
||||
#include "openvino/op/transpose.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace frontend {
|
||||
namespace ggml {
|
||||
namespace op {
|
||||
|
||||
OutputVector translate_mulmat(const NodeContext& context) {
|
||||
num_inputs_check(context, 2, 2);
|
||||
|
||||
bool continuous = context.check_if_continuous();
|
||||
if (continuous) {
|
||||
auto src1 = context.get_input(1);
|
||||
auto src0_converted = std::make_shared<ov::op::v1::ConvertLike>(context.get_input(0), src1);
|
||||
auto result = std::make_shared<ov::op::v0::MatMul>(src1, src0_converted, false, true);
|
||||
return {result};
|
||||
} else {
|
||||
/*
|
||||
Two cases here:
|
||||
- 21: [ 96, 32, 32, 1] VIEW k-0 [ 2, 6144, 192, 6144]
|
||||
[ 196608, 1, 1, 1] 0: NONE cache_k_l0 [ 2, 393216, 393216, 393216]
|
||||
- 22: [ 96, 7, 32, 1] PERMUTE q-0 [ 4, 12288, 384, 86016]
|
||||
[ 96, 32, 7, 1] 0: SCALE Qcur-0 [ 4, 384, 12288, 86016]
|
||||
- 23: [ 32, 7, 32, 1] MUL_MAT kq-0 [ 4, 128, 896, 28672]
|
||||
[ 96, 32, 32, 1] 0: VIEW k-0 [ 2, 6144, 192, 6144]
|
||||
[ 96, 7, 32, 1] 1: PERMUTE q-0 [ 4, 12288, 384, 86016]
|
||||
|
||||
- 20: [ 32, 96, 32, 1] VIEW v-0 [ 2, 128, 12288, 393216]
|
||||
[ 196608, 1, 1, 1] 0: NONE cache_v_l0 [ 2, 393216, 393216, 393216]
|
||||
- 25: [ 96, 7, 32, 1] MUL_MAT kqv-0 [ 4, 384, 2688, 86016]
|
||||
[ 32, 96, 32, 1] 0: VIEW v-0 [ 2, 128, 12288, 393216]
|
||||
[ 32, 7, 32, 1] 1: SOFT_MAX kq_soft_max_ext-0 [ 4, 128, 896, 28672]
|
||||
|
||||
For case 1, for src0, Reshape + Slice + Transpose
|
||||
For case 2, for src0, Reshape + Slice
|
||||
*/
|
||||
ov::Output<ov::Node> A;
|
||||
ov::Output<ov::Node> B;
|
||||
|
||||
auto attention_size = context.get_input("attention_size");
|
||||
|
||||
auto src0 = context.get_input(0);
|
||||
auto src0_shape = context.get_input_shape(0).to_shape();
|
||||
auto src0_stride = context.get_input_stride(0);
|
||||
auto permuted = is_permuted(src0_stride);
|
||||
auto token_dim = permuted ? 0 : 2;
|
||||
|
||||
auto src0_perm = argsort_descend(src0_stride);
|
||||
auto src0_original_shape_ = permute(src0_shape, src0_perm);
|
||||
std::vector<int64_t> src0_original_shape(src0_original_shape_.begin(), src0_original_shape_.end());
|
||||
src0_original_shape[token_dim] = -1;
|
||||
|
||||
auto src0_slice_shape = src0_original_shape;
|
||||
src0_slice_shape.erase(src0_slice_shape.begin() + token_dim);
|
||||
|
||||
auto src0_reshape_shape =
|
||||
ov::op::v0::Constant::create(ov::element::i64, {src0_original_shape.size()}, src0_original_shape);
|
||||
auto src0_reshape = std::make_shared<ov::op::v1::Reshape>(src0, src0_reshape_shape, false);
|
||||
|
||||
std::shared_ptr<ov::Node> slice_end;
|
||||
if (permuted) {
|
||||
slice_end = std::make_shared<ov::op::v0::Concat>(
|
||||
ov::OutputVector{attention_size, ov::op::v0::Constant::create(ov::element::i64, {2}, src0_slice_shape)},
|
||||
0);
|
||||
} else {
|
||||
slice_end = std::make_shared<ov::op::v0::Concat>(
|
||||
ov::OutputVector{ov::op::v0::Constant::create(ov::element::i64, {2}, src0_slice_shape), attention_size},
|
||||
0);
|
||||
}
|
||||
auto slice_start = ov::op::v0::Constant::create(ov::element::i64, {3}, std::vector<int64_t>(3, 0));
|
||||
auto slice_step = ov::op::v0::Constant::create(ov::element::i64, {3}, std::vector<int64_t>(3, 1));
|
||||
auto src0_slice = std::make_shared<ov::op::v8::Slice>(src0_reshape, slice_start, slice_end, slice_step);
|
||||
|
||||
if (permuted) {
|
||||
B = std::make_shared<ov::op::v1::Transpose>(
|
||||
src0_slice,
|
||||
ov::op::v0::Constant::create(ov::element::i64, {src0_perm.size()}, src0_perm));
|
||||
} else {
|
||||
B = src0_slice;
|
||||
}
|
||||
|
||||
A = context.get_input(1);
|
||||
B = std::make_shared<ov::op::v1::ConvertLike>(B, A);
|
||||
|
||||
int64_t num_heads = context.get_input_shape(1).to_shape()[0];
|
||||
int64_t num_heads_kv = src0_shape[0];
|
||||
int64_t kv_num_heads_factor = num_heads / num_heads_kv;
|
||||
if (kv_num_heads_factor > 1) {
|
||||
auto num_heads_node = ov::op::v0::Constant::create(ov::element::i64, {1}, std::vector<int64_t>{num_heads});
|
||||
auto num_heads_kv_node =
|
||||
ov::op::v0::Constant::create(ov::element::i64, {1}, std::vector<int64_t>{num_heads_kv});
|
||||
auto B_shape_last_two = get_dimensions(B.get_node_shared_ptr(), {1, 2});
|
||||
|
||||
auto one = ov::op::v0::Constant::create(ov::element::i64, {1}, {1});
|
||||
std::shared_ptr<ov::Node> new_B_shape =
|
||||
std::make_shared<ov::op::v0::Concat>(ov::OutputVector{num_heads_kv_node, one, B_shape_last_two}, 0);
|
||||
B = std::make_shared<ov::op::v1::Reshape>(B, new_B_shape, false);
|
||||
|
||||
B = std::make_shared<ov::op::v0::Concat>(ov::OutputVector(kv_num_heads_factor, B), 1);
|
||||
new_B_shape = std::make_shared<ov::op::v0::Concat>(ov::OutputVector{num_heads_node, B_shape_last_two}, 0);
|
||||
B = std::make_shared<ov::op::v1::Reshape>(B, new_B_shape, false);
|
||||
}
|
||||
|
||||
auto result = std::make_shared<ov::op::v0::MatMul>(A, B, false, true);
|
||||
return {result};
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace op
|
||||
} // namespace ggml
|
||||
} // namespace frontend
|
||||
} // namespace ov
|
||||
|
|
@ -0,0 +1,22 @@
|
|||
#include "../node_context.hpp"
|
||||
#include "../utils.hpp"
|
||||
#include "openvino/op/constant.hpp"
|
||||
#include "openvino/op/transpose.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace frontend {
|
||||
namespace ggml {
|
||||
namespace op {
|
||||
OutputVector translate_permute(const NodeContext& context) {
|
||||
num_inputs_check(context, 1, 1);
|
||||
|
||||
// TODO: make this more general
|
||||
auto res = std::make_shared<ov::op::v1::Transpose>(context.get_input(0),
|
||||
ov::op::v0::Constant::create(ov::element::i64, {3}, {1, 0, 2}));
|
||||
|
||||
return {res};
|
||||
};
|
||||
} // namespace op
|
||||
} // namespace ggml
|
||||
} // namespace frontend
|
||||
} // namespace ov
|
||||
|
|
@ -0,0 +1,35 @@
|
|||
#include "openvino/op/reshape.hpp"
|
||||
|
||||
#include <cstdint>
|
||||
#include <vector>
|
||||
|
||||
#include "../node_context.hpp"
|
||||
#include "../utils.hpp"
|
||||
#include "openvino/core/node.hpp"
|
||||
#include "openvino/core/node_output.hpp"
|
||||
#include "openvino/op/constant.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace frontend {
|
||||
namespace ggml {
|
||||
namespace op {
|
||||
|
||||
OutputVector translate_reshape(const NodeContext& context) {
|
||||
num_inputs_check(context, 1, 1);
|
||||
if (context.get_input_shape(0) == context.get_output_shape(0)) {
|
||||
return {context.get_input(0)};
|
||||
}
|
||||
|
||||
auto output_shape = context.get_output_shape(0).to_shape();
|
||||
auto new_shape_node =
|
||||
ov::op::v0::Constant::create(ov::element::i64,
|
||||
{3},
|
||||
std::vector<int64_t>{-1, (int64_t)output_shape[1], (int64_t)output_shape[2]});
|
||||
Output<Node> res = std::make_shared<ov::op::v1::Reshape>(context.get_input(0), new_shape_node, false);
|
||||
return {res};
|
||||
};
|
||||
|
||||
} // namespace op
|
||||
} // namespace ggml
|
||||
} // namespace frontend
|
||||
} // namespace ov
|
||||
|
|
@ -0,0 +1,47 @@
|
|||
#include "../node_context.hpp"
|
||||
#include "../utils.hpp"
|
||||
#include "openvino/op/add.hpp"
|
||||
#include "openvino/op/constant.hpp"
|
||||
#include "openvino/op/divide.hpp"
|
||||
#include "openvino/op/multiply.hpp"
|
||||
#include "openvino/op/reduce_sum.hpp"
|
||||
#include "openvino/op/sqrt.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace frontend {
|
||||
namespace ggml {
|
||||
namespace op {
|
||||
|
||||
OutputVector translate_rms_norm(const NodeContext& context) {
|
||||
num_inputs_check(context, 1, 1);
|
||||
|
||||
ov::Shape input_shape = context.get_input_shape(0).to_shape();
|
||||
auto input_node = context.get_input(0);
|
||||
auto square = std::make_shared<ov::op::v1::Multiply>(input_node, input_node);
|
||||
|
||||
auto reduce_sum =
|
||||
std::make_shared<ov::op::v1::ReduceSum>(square,
|
||||
ov::op::v0::Constant::create(ov::element::i64, ov::Shape{1}, {2}),
|
||||
true);
|
||||
|
||||
auto mean = std::make_shared<ov::op::v1::Divide>(
|
||||
reduce_sum,
|
||||
ov::op::v0::Constant::create(ov::element::f32, ov::Shape{}, {static_cast<float>(input_shape[2])}));
|
||||
|
||||
float eps;
|
||||
memcpy(&eps, context.get_output_op_params(0), sizeof(float));
|
||||
auto rms = std::make_shared<ov::op::v0::Sqrt>(
|
||||
std::make_shared<ov::op::v1::Add>(mean, ov::op::v0::Constant::create(ov::element::f32, ov::Shape{}, {eps})));
|
||||
|
||||
auto scale =
|
||||
std::make_shared<ov::op::v1::Divide>(ov::op::v0::Constant::create(ov::element::f32, ov::Shape{}, {1.0f}), rms);
|
||||
|
||||
auto res = std::make_shared<ov::op::v1::Multiply>(input_node, scale);
|
||||
|
||||
return {res};
|
||||
};
|
||||
|
||||
} // namespace op
|
||||
} // namespace ggml
|
||||
} // namespace frontend
|
||||
} // namespace ov
|
||||
|
|
@ -0,0 +1,171 @@
|
|||
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "../node_context.hpp"
|
||||
#include "../utils.hpp"
|
||||
#include "openvino/core/node.hpp"
|
||||
#include "openvino/core/node_output.hpp"
|
||||
#include "openvino/op/add.hpp"
|
||||
#include "openvino/op/broadcast.hpp"
|
||||
#include "openvino/op/concat.hpp"
|
||||
#include "openvino/op/constant.hpp"
|
||||
#include "openvino/op/convert.hpp"
|
||||
#include "openvino/op/cos.hpp"
|
||||
#include "openvino/op/divide.hpp"
|
||||
#include "openvino/op/multiply.hpp"
|
||||
#include "openvino/op/reshape.hpp"
|
||||
#include "openvino/op/shape_of.hpp"
|
||||
#include "openvino/op/sin.hpp"
|
||||
#include "openvino/op/slice.hpp"
|
||||
#include "openvino/op/split.hpp"
|
||||
#include "openvino/op/subtract.hpp"
|
||||
#include "openvino/op/transpose.hpp"
|
||||
|
||||
#define GGML_ROPE_TYPE_NEOX 2
|
||||
|
||||
#define MIN(a, b) ((a) < (b) ? (a) : (b))
|
||||
#define MAX(a, b) ((a) > (b) ? (a) : (b))
|
||||
|
||||
namespace ov {
|
||||
namespace frontend {
|
||||
namespace ggml {
|
||||
namespace op {
|
||||
|
||||
static float ggml_rope_yarn_corr_dim(int n_dims, int n_ctx_orig, float n_rot, float base) {
|
||||
return n_dims * logf(n_ctx_orig / (n_rot * 2 * (float)M_PI)) / (2 * logf(base));
|
||||
}
|
||||
|
||||
void ggml_rope_yarn_corr_dims(int n_dims,
|
||||
int n_ctx_orig,
|
||||
float freq_base,
|
||||
float beta_fast,
|
||||
float beta_slow,
|
||||
float dims[2]) {
|
||||
float start = floorf(ggml_rope_yarn_corr_dim(n_dims, n_ctx_orig, beta_fast, freq_base));
|
||||
float end = ceilf(ggml_rope_yarn_corr_dim(n_dims, n_ctx_orig, beta_slow, freq_base));
|
||||
dims[0] = MAX(0, start);
|
||||
dims[1] = MIN(n_dims - 1, end);
|
||||
}
|
||||
|
||||
OutputVector translate_rope(const NodeContext& context) {
|
||||
num_inputs_check(context, 2, 3);
|
||||
|
||||
auto data_node = context.get_input(0);
|
||||
auto pos_node = context.get_input(1);
|
||||
pos_node = std::make_shared<ov::op::v0::Convert>(pos_node, ov::element::f32);
|
||||
|
||||
auto permutation_node =
|
||||
std::make_shared<ov::op::v0::Constant>(ov::element::i64, ov::Shape{3}, std::vector<int64_t>{2, 1, 0});
|
||||
Output<Node> pos_node_reshaped = std::make_shared<ov::op::v1::Transpose>(pos_node, permutation_node);
|
||||
|
||||
auto output_shape = context.get_output_shape(0);
|
||||
|
||||
float freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow;
|
||||
int32_t* op_params = context.get_output_op_params(0);
|
||||
const int n_dims = op_params[1];
|
||||
const int mode = op_params[2];
|
||||
const int n_ctx_orig = op_params[4];
|
||||
memcpy(&freq_base, op_params + 5, sizeof(float));
|
||||
memcpy(&freq_scale, op_params + 6, sizeof(float));
|
||||
memcpy(&ext_factor, op_params + 7, sizeof(float));
|
||||
memcpy(&attn_factor, op_params + 8, sizeof(float));
|
||||
memcpy(&beta_fast, op_params + 9, sizeof(float));
|
||||
memcpy(&beta_slow, op_params + 10, sizeof(float));
|
||||
|
||||
const float theta_scale = powf(freq_base, -2.0f / n_dims);
|
||||
|
||||
// TODO: corr_dims is not used in the current implementation
|
||||
float corr_dims[2];
|
||||
ggml_rope_yarn_corr_dims(n_dims, n_ctx_orig, freq_base, beta_fast, beta_slow, corr_dims);
|
||||
|
||||
const bool is_neox = mode & GGML_ROPE_TYPE_NEOX;
|
||||
|
||||
// TODO: GGML_OP_ROPE_BACK -> false
|
||||
bool forward = true;
|
||||
const float sin_sign = forward ? 1.0f : -1.0f;
|
||||
|
||||
const int64_t ne0 = output_shape[2].get_length();
|
||||
std::vector<float> factor(ne0 / 2);
|
||||
factor[0] = freq_scale;
|
||||
for (int64_t i = 1; i < ne0 / 2; i++) {
|
||||
factor[i] = theta_scale * factor[i - 1];
|
||||
}
|
||||
|
||||
Output<Node> factor_node =
|
||||
std::make_shared<ov::op::v0::Constant>(ov::element::f32, ov::Shape{factor.size()}, factor);
|
||||
if (context.get_input_size() == 3) {
|
||||
auto freq_factors_node = context.get_input(2);
|
||||
factor_node = std::make_shared<ov::op::v1::Divide>(factor_node, freq_factors_node);
|
||||
}
|
||||
|
||||
auto half_last_dim = ov::op::v0::Constant::create(ov::element::i64, Shape{1}, {output_shape[2].get_length() / 2});
|
||||
Output<Node> input_shape_node = std::make_shared<ov::op::v0::Concat>(
|
||||
OutputVector{get_dimensions(data_node.get_node_shared_ptr(), {0, 1}), half_last_dim},
|
||||
0);
|
||||
Output<Node> factor_broadcasted_node = std::make_shared<ov::op::v3::Broadcast>(factor_node, input_shape_node);
|
||||
|
||||
Output<Node> cos_factor_broadcasted_node = std::make_shared<ov::op::v0::Cos>(
|
||||
std::make_shared<ov::op::v1::Multiply>(factor_broadcasted_node, pos_node_reshaped));
|
||||
Output<Node> sin_factor_broadcasted_node = std::make_shared<ov::op::v0::Sin>(
|
||||
std::make_shared<ov::op::v1::Multiply>(factor_broadcasted_node, pos_node_reshaped));
|
||||
|
||||
float mscale = attn_factor;
|
||||
Output<Node> mscale_node =
|
||||
std::make_shared<ov::op::v0::Constant>(ov::element::f32, ov::Shape{}, std::vector<float>{mscale});
|
||||
Output<Node> mscale_sin_sign_node =
|
||||
std::make_shared<ov::op::v0::Constant>(ov::element::f32, ov::Shape{}, std::vector<float>{mscale * sin_sign});
|
||||
Output<Node> cos_theta_node = std::make_shared<ov::op::v1::Multiply>(cos_factor_broadcasted_node, mscale_node);
|
||||
Output<Node> sin_theta_node = std::make_shared<ov::op::v1::Multiply>(sin_factor_broadcasted_node, mscale_node);
|
||||
|
||||
if (!is_neox) {
|
||||
auto input_shape = context.get_input_shape(0);
|
||||
|
||||
auto begin_even = ov::op::v0::Constant::create(ov::element::i64, Shape{3}, {0, 0, 0});
|
||||
auto begin_odd = ov::op::v0::Constant::create(ov::element::i64, Shape{3}, {0, 0, 1});
|
||||
auto end = std::make_shared<ov::op::v0::ShapeOf>(data_node);
|
||||
auto stride = ov::op::v0::Constant::create(ov::element::i64, Shape{3}, {1, 1, 2});
|
||||
auto even_slice = std::make_shared<ov::op::v8::Slice>(data_node, begin_even, end, stride);
|
||||
auto odd_slice = std::make_shared<ov::op::v8::Slice>(data_node, begin_odd, end, stride);
|
||||
|
||||
auto first_half =
|
||||
std::make_shared<ov::op::v1::Subtract>(std::make_shared<ov::op::v1::Multiply>(even_slice, cos_theta_node),
|
||||
std::make_shared<ov::op::v1::Multiply>(odd_slice, sin_theta_node));
|
||||
auto second_half =
|
||||
std::make_shared<ov::op::v1::Add>(std::make_shared<ov::op::v1::Multiply>(even_slice, sin_theta_node),
|
||||
std::make_shared<ov::op::v1::Multiply>(odd_slice, cos_theta_node));
|
||||
|
||||
auto stack = std::make_shared<ov::op::v0::Concat>(OutputVector{first_half, second_half}, 2);
|
||||
auto shape_const = ov::op::v0::Constant::create(
|
||||
ov::element::i64,
|
||||
Shape{3},
|
||||
std::vector<int64_t>{-1, input_shape[1].get_length(), input_shape[2].get_length()});
|
||||
auto reshaped = std::make_shared<ov::op::v1::Reshape>(stack, shape_const, false);
|
||||
|
||||
return {reshaped};
|
||||
} else {
|
||||
auto slice_node =
|
||||
std::make_shared<ov::op::v1::Split>(data_node,
|
||||
ov::op::v0::Constant::create(ov::element::i64, ov::Shape{}, {2}),
|
||||
2);
|
||||
Output<Node> slice_data_node_0 = slice_node->outputs()[0];
|
||||
Output<Node> slice_data_node_1 = slice_node->outputs()[1];
|
||||
|
||||
auto first_half_node = std::make_shared<ov::op::v1::Subtract>(
|
||||
std::make_shared<ov::op::v1::Multiply>(slice_data_node_0, cos_theta_node),
|
||||
std::make_shared<ov::op::v1::Multiply>(slice_data_node_1, sin_theta_node));
|
||||
|
||||
auto second_half_node = std::make_shared<ov::op::v1::Add>(
|
||||
std::make_shared<ov::op::v1::Multiply>(slice_data_node_0, sin_theta_node),
|
||||
std::make_shared<ov::op::v1::Multiply>(slice_data_node_1, cos_theta_node));
|
||||
|
||||
auto res_node = std::make_shared<ov::op::v0::Concat>(ov::OutputVector{first_half_node, second_half_node}, 2);
|
||||
return {res_node};
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace op
|
||||
} // namespace ggml
|
||||
} // namespace frontend
|
||||
} // namespace ov
|
||||
|
|
@ -0,0 +1,31 @@
|
|||
#include <cstdint>
|
||||
#include <vector>
|
||||
|
||||
#include "../node_context.hpp"
|
||||
#include "../utils.hpp"
|
||||
#include "openvino/core/node.hpp"
|
||||
#include "openvino/core/node_output.hpp"
|
||||
#include "openvino/op/constant.hpp"
|
||||
#include "openvino/op/multiply.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace frontend {
|
||||
namespace ggml {
|
||||
namespace op {
|
||||
|
||||
OutputVector translate_scale(const NodeContext& context) {
|
||||
num_inputs_check(context, 1, 1);
|
||||
|
||||
float scale;
|
||||
memcpy(&scale, context.get_output_op_params(0), sizeof(float));
|
||||
auto scale_node = std::make_shared<ov::op::v0::Constant>(ov::element::f32, ov::Shape{}, std::vector<float>{scale});
|
||||
|
||||
auto res = std::make_shared<ov::op::v1::Multiply>(context.get_input(0), scale_node);
|
||||
|
||||
return {res};
|
||||
};
|
||||
|
||||
} // namespace op
|
||||
} // namespace ggml
|
||||
} // namespace frontend
|
||||
} // namespace ov
|
||||
|
|
@ -0,0 +1,88 @@
|
|||
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "../node_context.hpp"
|
||||
#include "../utils.hpp"
|
||||
#include "openvino/core/node.hpp"
|
||||
#include "openvino/core/node_output.hpp"
|
||||
#include "openvino/op/add.hpp"
|
||||
#include "openvino/op/concat.hpp"
|
||||
#include "openvino/op/constant.hpp"
|
||||
#include "openvino/op/convert.hpp"
|
||||
#include "openvino/op/multiply.hpp"
|
||||
#include "openvino/op/slice.hpp"
|
||||
#include "openvino/op/softmax.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace frontend {
|
||||
namespace ggml {
|
||||
namespace op {
|
||||
|
||||
OutputVector translate_soft_max(const NodeContext& context) {
|
||||
num_inputs_check(context, 1, 2);
|
||||
|
||||
auto input_node = context.get_input(0);
|
||||
|
||||
float scale = 1.0f;
|
||||
float max_bias = 0.0f;
|
||||
auto op_params = context.get_output_op_params(0);
|
||||
memcpy(&scale, (float*)op_params + 0, sizeof(float));
|
||||
memcpy(&max_bias, (float*)op_params + 1, sizeof(float));
|
||||
|
||||
const uint32_t n_head = context.get_input_shape(0)[0].get_length();
|
||||
const uint32_t n_head_log2 = 1u << (uint32_t)floor(log2(n_head));
|
||||
|
||||
// const float m0 = powf(2.0f, -(max_bias ) / n_head_log2);
|
||||
// const float m1 = powf(2.0f, -(max_bias / 2.0f) / n_head_log2);
|
||||
const float slope = (max_bias > 0.0f) ? 1.0f : 1.0f;
|
||||
// const float slope = (max_bias > 0.0f) ? h < n_head_log2 ? powf(m0, h + 1) : powf(m1, 2*(h - n_head_log2) + 1)
|
||||
// : 1.0f;
|
||||
|
||||
if (scale != 1.0f) {
|
||||
auto scale_node =
|
||||
std::make_shared<ov::op::v0::Constant>(ov::element::f32, ov::Shape{}, std::vector<float>{scale});
|
||||
input_node = std::make_shared<ov::op::v1::Multiply>(input_node, scale_node);
|
||||
}
|
||||
|
||||
if (context.get_input_size() == 2) {
|
||||
// Calculate mask then softmax
|
||||
auto mask_node = context.get_input(1);
|
||||
ov::element::Type mask_type = (context.get_input_type(1)).as<ov::element::Type>();
|
||||
if (mask_type == ov::element::f16) {
|
||||
// Convert f16 to f32
|
||||
mask_node = std::make_shared<ov::op::v0::Convert>(mask_node, ov::element::f32);
|
||||
}
|
||||
|
||||
// Stride slice mask node
|
||||
Output<Node> mask_begin_node = ov::op::v0::Constant::create(ov::element::i64, Shape{3}, {0, 0, 0});
|
||||
auto one = ov::op::v0::Constant::create(ov::element::i64, ov::Shape{1}, {1});
|
||||
auto input_last_two_dim = get_dimensions(input_node.get_node_shared_ptr(), {1, 2});
|
||||
auto mask_slice_shape = std::make_shared<ov::op::v0::Concat>(ov::NodeVector{one, input_last_two_dim}, 0);
|
||||
Output<Node> mask_stride_node = ov::op::v0::Constant::create(ov::element::i64, Shape{3}, {1, 1, 1});
|
||||
auto mask_node_sliced =
|
||||
std::make_shared<ov::op::v8::Slice>(mask_node, mask_begin_node, mask_slice_shape, mask_stride_node);
|
||||
|
||||
// slope * mask
|
||||
auto slope_node =
|
||||
std::make_shared<ov::op::v0::Constant>(ov::element::f32, ov::Shape{}, std::vector<float>{slope});
|
||||
auto slope_mask_node = std::make_shared<ov::op::v1::Multiply>(mask_node_sliced, slope_node);
|
||||
|
||||
// input + slope * mask
|
||||
auto input_slope_mask_node = std::make_shared<ov::op::v1::Add>(input_node, slope_mask_node);
|
||||
|
||||
// Calculate softmax
|
||||
auto res = std::make_shared<ov::op::v8::Softmax>(input_slope_mask_node, 2);
|
||||
return {res};
|
||||
} else {
|
||||
// Directly softmax
|
||||
auto res = std::make_shared<ov::op::v8::Softmax>(input_node, 0);
|
||||
return {res};
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace op
|
||||
} // namespace ggml
|
||||
} // namespace frontend
|
||||
} // namespace ov
|
||||
|
|
@ -0,0 +1,23 @@
|
|||
#include "openvino/op/transpose.hpp"
|
||||
|
||||
#include "../node_context.hpp"
|
||||
#include "../utils.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace frontend {
|
||||
namespace ggml {
|
||||
namespace op {
|
||||
|
||||
OutputVector translate_transpose(const NodeContext& context) {
|
||||
num_inputs_check(context, 1, 1);
|
||||
|
||||
auto perm = argsort_descend(context.get_output_stride(0));
|
||||
auto res = std::make_shared<ov::op::v1::Transpose>(context.get_input(0),
|
||||
ov::op::v0::Constant::create(ov::element::i64, {3}, perm));
|
||||
return {res};
|
||||
};
|
||||
|
||||
} // namespace op
|
||||
} // namespace ggml
|
||||
} // namespace frontend
|
||||
} // namespace ov
|
||||
|
|
@ -0,0 +1,24 @@
|
|||
|
||||
#include <cstdint>
|
||||
#include <vector>
|
||||
|
||||
#include "../node_context.hpp"
|
||||
#include "../utils.hpp"
|
||||
#include "openvino/core/node.hpp"
|
||||
#include "openvino/core/node_output.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace frontend {
|
||||
namespace ggml {
|
||||
namespace op {
|
||||
|
||||
OutputVector translate_unary(const NodeContext& context) {
|
||||
num_inputs_check(context, 1, 1);
|
||||
|
||||
return {context.get_input(0)};
|
||||
};
|
||||
|
||||
} // namespace op
|
||||
} // namespace ggml
|
||||
} // namespace frontend
|
||||
} // namespace ov
|
||||
|
|
@ -0,0 +1,29 @@
|
|||
#include <cstdint>
|
||||
#include <vector>
|
||||
|
||||
#include "../node_context.hpp"
|
||||
#include "../utils.hpp"
|
||||
#include "openvino/core/node.hpp"
|
||||
#include "openvino/core/node_output.hpp"
|
||||
#include "openvino/op/multiply.hpp"
|
||||
#include "openvino/op/sigmoid.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace frontend {
|
||||
namespace ggml {
|
||||
namespace op {
|
||||
|
||||
OutputVector translate_unary_silu(const NodeContext& context) {
|
||||
num_inputs_check(context, 1, 1);
|
||||
|
||||
auto input = context.get_input(0);
|
||||
auto sigmoid = std::make_shared<ov::op::v0::Sigmoid>(input);
|
||||
auto res = std::make_shared<ov::op::v1::Multiply>(input, sigmoid);
|
||||
|
||||
return {res};
|
||||
};
|
||||
|
||||
} // namespace op
|
||||
} // namespace ggml
|
||||
} // namespace frontend
|
||||
} // namespace ov
|
||||
|
|
@ -0,0 +1,26 @@
|
|||
#include <cstdint>
|
||||
#include <vector>
|
||||
|
||||
#include "../utils.hpp"
|
||||
#include "openvino/core/node.hpp"
|
||||
#include "openvino/core/node_output.hpp"
|
||||
#include "openvino/op/constant.hpp"
|
||||
#include "openvino/op/convert.hpp"
|
||||
#include "openvino/op/reshape.hpp"
|
||||
#include "openvino/op/strided_slice.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace frontend {
|
||||
namespace ggml {
|
||||
namespace op {
|
||||
|
||||
OutputVector translate_view(const NodeContext& context) {
|
||||
num_inputs_check(context, 1, 1);
|
||||
|
||||
return {context.get_input(0)};
|
||||
};
|
||||
|
||||
} // namespace op
|
||||
} // namespace ggml
|
||||
} // namespace frontend
|
||||
} // namespace ov
|
||||
|
|
@ -0,0 +1,64 @@
|
|||
#include "op_table.hpp"
|
||||
|
||||
#include <openvino/op/add.hpp>
|
||||
#include <openvino/op/divide.hpp>
|
||||
#include <openvino/op/gather.hpp>
|
||||
#include <openvino/op/matmul.hpp>
|
||||
#include <openvino/op/multiply.hpp>
|
||||
#include <openvino/op/subtract.hpp>
|
||||
|
||||
#include "utils.hpp"
|
||||
|
||||
using namespace ov::op;
|
||||
namespace ov {
|
||||
namespace frontend {
|
||||
namespace ggml {
|
||||
|
||||
namespace op {
|
||||
|
||||
#define GGML_OP_CONVERTER(op) OutputVector op(const NodeContext& node)
|
||||
|
||||
GGML_OP_CONVERTER(translate_add);
|
||||
GGML_OP_CONVERTER(translate_cont);
|
||||
GGML_OP_CONVERTER(translate_cpy);
|
||||
GGML_OP_CONVERTER(translate_get_rows);
|
||||
GGML_OP_CONVERTER(translate_mul);
|
||||
GGML_OP_CONVERTER(translate_mulmat);
|
||||
GGML_OP_CONVERTER(translate_permute);
|
||||
GGML_OP_CONVERTER(translate_reshape);
|
||||
GGML_OP_CONVERTER(translate_rms_norm);
|
||||
GGML_OP_CONVERTER(translate_rope);
|
||||
GGML_OP_CONVERTER(translate_scale);
|
||||
GGML_OP_CONVERTER(translate_unary_silu);
|
||||
GGML_OP_CONVERTER(translate_soft_max);
|
||||
GGML_OP_CONVERTER(translate_transpose);
|
||||
GGML_OP_CONVERTER(translate_unary);
|
||||
GGML_OP_CONVERTER(translate_view);
|
||||
|
||||
} // namespace op
|
||||
|
||||
const std::unordered_map<std::string, CreatorFunction> get_supported_ops() {
|
||||
return {{"GGML_OP_ADD", op::translate_1to1_match_2_inputs<v1::Add>},
|
||||
{"GGML_OP_ADD1", op::translate_1to1_match_2_inputs<v1::Add>},
|
||||
{"GGML_OP_CONT", op::translate_cont},
|
||||
{"GGML_OP_CPY", op::translate_cpy},
|
||||
{"GGML_OP_DIV", op::translate_1to1_match_2_inputs<v1::Divide>},
|
||||
{"GGML_OP_GET_ROWS", op::translate_get_rows},
|
||||
// {"GGML_OP_MUL", op::translate_1to1_match_2_inputs<v1::Multiply>},
|
||||
{"GGML_OP_MUL", op::translate_mul},
|
||||
{"GGML_OP_MUL_MAT", op::translate_mulmat},
|
||||
{"GGML_OP_PERMUTE", op::translate_permute},
|
||||
{"GGML_OP_RESHAPE", op::translate_reshape},
|
||||
{"GGML_OP_RMS_NORM", op::translate_rms_norm},
|
||||
{"GGML_OP_ROPE", op::translate_rope},
|
||||
{"GGML_OP_SCALE", op::translate_scale},
|
||||
{"GGML_OP_SOFT_MAX", op::translate_soft_max},
|
||||
{"GGML_OP_SUB", op::translate_1to1_match_2_inputs<v1::Subtract>},
|
||||
{"GGML_OP_TRANSPOSE", op::translate_transpose},
|
||||
{"GGML_UNARY_OP_SILU", op::translate_unary_silu},
|
||||
{"GGML_OP_VIEW", op::translate_view}};
|
||||
};
|
||||
|
||||
} // namespace ggml
|
||||
} // namespace frontend
|
||||
} // namespace ov
|
||||
|
|
@ -0,0 +1,13 @@
|
|||
#pragma once
|
||||
|
||||
#include "node_context.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace frontend {
|
||||
namespace ggml {
|
||||
|
||||
const std::unordered_map<std::string, CreatorFunction> get_supported_ops();
|
||||
|
||||
} // namespace ggml
|
||||
} // namespace frontend
|
||||
} // namespace ov
|
||||
|
|
@ -0,0 +1,145 @@
|
|||
#include "translate_session.hpp"
|
||||
|
||||
#include <exception>
|
||||
#include <fstream>
|
||||
|
||||
#include "input_model.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace frontend {
|
||||
namespace ggml {
|
||||
|
||||
using namespace ov::op;
|
||||
|
||||
TranslateSession::TranslateSession(const frontend::InputModel::Ptr& input_model,
|
||||
const std::unordered_map<std::string, CreatorFunction>& translator_map)
|
||||
: m_input_model(input_model),
|
||||
m_translator_map(translator_map),
|
||||
m_ov_model(nullptr) {}
|
||||
|
||||
std::shared_ptr<Model> TranslateSession::get_converted_model() {
|
||||
if (m_ov_model) {
|
||||
return m_ov_model;
|
||||
}
|
||||
m_ov_model = translate_graph(m_input_model);
|
||||
// print_model_topology();
|
||||
return m_ov_model;
|
||||
}
|
||||
|
||||
void TranslateSession::print_model_topology() {
|
||||
try {
|
||||
std::ofstream outfile("model_topology.txt", std::ios::out | std::ios::app);
|
||||
if (!outfile.is_open()) {
|
||||
throw std::runtime_error("Failed to open file for writing model topology.");
|
||||
}
|
||||
|
||||
outfile << "============ Model ============" << std::endl;
|
||||
for (const auto& op : m_ov_model->get_ordered_ops()) {
|
||||
outfile << "Operation: " << op->get_friendly_name() << std::endl;
|
||||
outfile << " Inputs:" << std::endl;
|
||||
for (const auto& input : op->inputs()) {
|
||||
outfile << " " << input.get_node()->get_friendly_name() << " -> " << input.get_element_type() << " "
|
||||
<< input.get_shape() << std::endl;
|
||||
}
|
||||
outfile << " Outputs:" << std::endl;
|
||||
for (const auto& output : op->outputs()) {
|
||||
outfile << " " << output.get_node()->get_friendly_name() << " -> " << output.get_element_type()
|
||||
<< " " << output.get_shape() << std::endl;
|
||||
}
|
||||
outfile << std::endl;
|
||||
}
|
||||
outfile << "===============================" << std::endl;
|
||||
outfile.close();
|
||||
} catch (const std::exception& ex) {
|
||||
std::cout << ex.what() << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
std::shared_ptr<Model> TranslateSession::translate_graph(const frontend::InputModel::Ptr& input_model) {
|
||||
ov::ParameterVector params;
|
||||
ov::ResultVector results;
|
||||
auto tensor_map = std::make_shared<TensorMap>();
|
||||
std::shared_ptr<Model> resulting_model;
|
||||
|
||||
const auto& ggml_model = std::dynamic_pointer_cast<InputModel>(input_model);
|
||||
std::shared_ptr<GgmlDecoder> ggml_model_decoder = ggml_model->get_model_decoder();
|
||||
|
||||
FRONT_END_GENERAL_CHECK(ggml_model, "nullptr for InputModel is given for translation into OV Model");
|
||||
const auto& model_inputs = ggml_model->get_inputs();
|
||||
const auto& model_outputs = ggml_model->get_outputs();
|
||||
|
||||
for (const auto& it : ggml_model_decoder->get_model_inputs()) {
|
||||
params.push_back(std::dynamic_pointer_cast<ov::op::v0::Parameter>(it.second));
|
||||
(*tensor_map)[it.first] = it.second;
|
||||
}
|
||||
|
||||
for (const auto& it : ggml_model_decoder->get_model_extra_inputs()) {
|
||||
params.push_back(std::dynamic_pointer_cast<ov::op::v0::Parameter>(it.second));
|
||||
(*tensor_map)[it.first] = it.second;
|
||||
}
|
||||
|
||||
for (const auto& it : ggml_model_decoder->get_model_weights()) {
|
||||
(*tensor_map)[it.first] = it.second;
|
||||
}
|
||||
|
||||
auto node_visitor = [&](std::shared_ptr<GgmlDecoder> node) {
|
||||
auto operation_type = node->get_op_type();
|
||||
ov::OutputVector converted_outputs;
|
||||
auto it = m_translator_map.find(operation_type);
|
||||
if (it != m_translator_map.end()) {
|
||||
try {
|
||||
NodeContext node_context(node, tensor_map, this);
|
||||
converted_outputs = it->second(node_context);
|
||||
} catch (const std::exception& ex) {
|
||||
std::cout << ex.what() << std::endl;
|
||||
}
|
||||
} else {
|
||||
// TODO
|
||||
}
|
||||
|
||||
const auto& node_output_names = node->get_output_names();
|
||||
FRONT_END_OP_CONVERSION_CHECK(node_output_names.size() == converted_outputs.size(),
|
||||
"Number of ",
|
||||
operation_type,
|
||||
" outputs greater than number of converted outputs, which are ",
|
||||
node_output_names.size(),
|
||||
" and ",
|
||||
converted_outputs.size(),
|
||||
" respectively.");
|
||||
|
||||
for (size_t i = 0; i < node_output_names.size(); ++i) {
|
||||
auto output_name = node_output_names[i];
|
||||
if (i < converted_outputs.size() && converted_outputs[i].get_node_shared_ptr() != nullptr) {
|
||||
(*tensor_map)[output_name] = converted_outputs[i];
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
ggml_model_decoder->visit_subgraph(node_visitor);
|
||||
|
||||
for (const auto& name : ggml_model_decoder->get_model_output_names()) {
|
||||
FRONT_END_GENERAL_CHECK(tensor_map->find(name) != tensor_map->end(),
|
||||
"Output name not found in tensor map: ",
|
||||
name);
|
||||
auto result = std::make_shared<v0::Result>(tensor_map->at(name));
|
||||
// result->set_friendly_name(it);
|
||||
results.push_back(result);
|
||||
}
|
||||
|
||||
ov::ParameterVector used_params;
|
||||
for (const auto& param : params) {
|
||||
if (!param->output(0).get_target_inputs().empty()) {
|
||||
used_params.push_back(param);
|
||||
}
|
||||
}
|
||||
if (auto diff = params.size() - used_params.size()) {
|
||||
std::cout << diff << " parameters are not used in the model." << std::endl;
|
||||
}
|
||||
resulting_model = std::make_shared<Model>(results, used_params);
|
||||
|
||||
return resulting_model;
|
||||
}
|
||||
|
||||
} // namespace ggml
|
||||
} // namespace frontend
|
||||
} // namespace ov
|
||||
|
|
@ -0,0 +1,27 @@
|
|||
#pragma once
|
||||
|
||||
#include "input_model.hpp"
|
||||
#include "node_context.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace frontend {
|
||||
namespace ggml {
|
||||
|
||||
class TranslateSession {
|
||||
public:
|
||||
TranslateSession(const frontend::InputModel::Ptr& input_model,
|
||||
const std::unordered_map<std::string, CreatorFunction>& translator_map);
|
||||
|
||||
std::shared_ptr<Model> get_converted_model();
|
||||
std::shared_ptr<Model> translate_graph(const frontend::InputModel::Ptr& input_model);
|
||||
|
||||
private:
|
||||
void print_model_topology();
|
||||
const frontend::InputModel::Ptr m_input_model;
|
||||
const std::unordered_map<std::string, CreatorFunction>& m_translator_map;
|
||||
std::shared_ptr<Model> m_ov_model;
|
||||
};
|
||||
|
||||
} // namespace ggml
|
||||
} // namespace frontend
|
||||
} // namespace ov
|
||||
|
|
@ -0,0 +1,52 @@
|
|||
#include "utils.hpp"
|
||||
|
||||
#include <ctime>
|
||||
#include <memory>
|
||||
#include <openvino/op/gather.hpp>
|
||||
#include <openvino/op/shape_of.hpp>
|
||||
#include <string>
|
||||
|
||||
namespace ov {
|
||||
namespace frontend {
|
||||
namespace ggml {
|
||||
|
||||
std::string getCurrentTime() {
|
||||
std::time_t now = std::time(nullptr);
|
||||
char buf[100];
|
||||
std::strftime(buf, sizeof(buf), "%Y-%m-%d %H:%M:%S", std::localtime(&now));
|
||||
return buf;
|
||||
}
|
||||
|
||||
void num_inputs_check(const NodeContext& context, size_t min_inputs, size_t max_inputs) {
|
||||
auto input_size = context.get_input_size();
|
||||
FRONT_END_OP_CONVERSION_CHECK(input_size >= min_inputs, "Got less inputs than expected");
|
||||
FRONT_END_OP_CONVERSION_CHECK(input_size <= max_inputs, "Got more inputs than expected");
|
||||
}
|
||||
|
||||
int non_cont_dim(std::vector<size_t> ne, std::vector<size_t> nb) {
|
||||
int dim = nb.size() - 1;
|
||||
size_t bytes = nb[dim];
|
||||
for (int i = dim; i > 0; i--) {
|
||||
bytes *= ne[i];
|
||||
if (bytes != nb[i - 1]) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
std::shared_ptr<ov::Node> get_dimensions(const std::shared_ptr<ov::op::v3::ShapeOf>& shape,
|
||||
const std::vector<int>& dims) {
|
||||
using namespace ov::op;
|
||||
const auto zero = v0::Constant::create(ov::element::i32, ov::Shape{}, {0});
|
||||
const auto dims_const = v0::Constant::create(ov::element::i32, ov::Shape{dims.size()}, dims);
|
||||
return std::make_shared<v8::Gather>(shape, dims_const, zero);
|
||||
}
|
||||
|
||||
std::shared_ptr<ov::Node> get_dimensions(const std::shared_ptr<ov::Node>& node, const std::vector<int>& dims) {
|
||||
return get_dimensions(std::make_shared<ov::op::v3::ShapeOf>(node), dims);
|
||||
}
|
||||
|
||||
} // namespace ggml
|
||||
} // namespace frontend
|
||||
} // namespace ov
|
||||
|
|
@ -0,0 +1,68 @@
|
|||
#pragma once
|
||||
|
||||
#include <openvino/op/shape_of.hpp>
|
||||
|
||||
#include "node_context.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace frontend {
|
||||
namespace ggml {
|
||||
|
||||
void dump_ov_model(const std::shared_ptr<ov::Model> model);
|
||||
|
||||
void num_inputs_check(const NodeContext& context, size_t min_inputs, size_t max_inputs);
|
||||
|
||||
int non_cont_dim(std::vector<size_t> ne, std::vector<size_t> nb);
|
||||
|
||||
template <typename T>
|
||||
std::vector<int> argsort_descend(const std::vector<T>& v) {
|
||||
std::vector<int> idx(v.size());
|
||||
std::iota(idx.begin(), idx.end(), 0);
|
||||
std::sort(idx.begin(), idx.end(), [&v](int i1, int i2) {
|
||||
return v[i1] > v[i2];
|
||||
});
|
||||
return idx;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::vector<T> sorted_descend(std::vector<T> v) {
|
||||
std::sort(v.begin(), v.end(), [](T a, T b) {
|
||||
return a > b;
|
||||
});
|
||||
return v;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool is_permuted(const std::vector<T>& strides) {
|
||||
for (size_t i = 0; i < strides.size() - 1; ++i) {
|
||||
if (strides[i] < strides[i + 1]) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::vector<T> permute(const std::vector<T>& x, const std::vector<int>& perm) {
|
||||
std::vector<T> result;
|
||||
result.reserve(perm.size());
|
||||
for (int i : perm) {
|
||||
result.push_back(x[i]);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
std::shared_ptr<ov::Node> get_dimensions(const std::shared_ptr<op::v3::ShapeOf>& shape, const std::vector<int>& dims);
|
||||
std::shared_ptr<ov::Node> get_dimensions(const std::shared_ptr<ov::Node>& node, const std::vector<int>& dims);
|
||||
|
||||
namespace op {
|
||||
template <typename T>
|
||||
OutputVector translate_1to1_match_2_inputs(const NodeContext& context) {
|
||||
num_inputs_check(context, 2, 2);
|
||||
return {std::make_shared<T>(context.get_input(0), context.get_input(1))};
|
||||
}
|
||||
} // namespace op
|
||||
|
||||
} // namespace ggml
|
||||
} // namespace frontend
|
||||
} // namespace ov
|
||||
|
|
@ -14,6 +14,8 @@
|
|||
|
||||
#include "ggml-impl.h"
|
||||
#include "ggml.h"
|
||||
#include "openvino/frontend.hpp"
|
||||
#include "openvino/input_model.hpp"
|
||||
|
||||
std::shared_ptr<GgmlOvDecoder> get_ggml_decoder(struct ggml_cgraph* cgraph) {
|
||||
return std::make_shared<GgmlOvDecoder>(nullptr, cgraph);
|
||||
|
|
@ -56,11 +58,11 @@ enum ggml_status openvino_frontend_compute(ggml_backend_t backend, struct ggml_c
|
|||
}
|
||||
|
||||
// auto devices = core.get_available_devices();
|
||||
static auto front_end = get_ggml_frontend();
|
||||
if (!front_end) {
|
||||
GGML_LOG_ERROR("GGML FrontEnd is not initialized \n");
|
||||
return GGML_STATUS_FAILED;
|
||||
}
|
||||
// static auto front_end = get_ggml_frontend();
|
||||
// if (!front_end) {
|
||||
// GGML_LOG_ERROR("GGML FrontEnd is not initialized \n");
|
||||
// return GGML_STATUS_FAILED;
|
||||
// }
|
||||
|
||||
using CachedItem = std::pair<std::shared_ptr<ov::Model>, ov::CompiledModel>;
|
||||
static std::unordered_map<struct ggml_cgraph*, CachedItem> compiled_cache;
|
||||
|
|
@ -79,14 +81,18 @@ enum ggml_status openvino_frontend_compute(ggml_backend_t backend, struct ggml_c
|
|||
compiled_model = it->second.second;
|
||||
compile_end_time = ggml_time_us();
|
||||
} else {
|
||||
std::shared_ptr<ov::frontend::DecoderBase> graph_decoder = ggml_decoder;
|
||||
ov::frontend::InputModel::Ptr input_model = front_end->load(graph_decoder);
|
||||
if (!input_model) {
|
||||
GGML_LOG_ERROR("Input Model is not loaded \n");
|
||||
return GGML_STATUS_FAILED;
|
||||
}
|
||||
// std::shared_ptr<ov::frontend::DecoderBase> graph_decoder = ggml_decoder;
|
||||
// ov::frontend::InputModel::Ptr input_model = front_end->load(graph_decoder);
|
||||
// if (!input_model) {
|
||||
// GGML_LOG_ERROR("Input Model is not loaded \n");
|
||||
// return GGML_STATUS_FAILED;
|
||||
// }
|
||||
|
||||
// model = front_end->convert(input_model);
|
||||
|
||||
ov::frontend::InputModel::Ptr input_model = std::make_shared<ov::frontend::ggml::InputModel>(ggml_decoder);
|
||||
model = ov::frontend::ggml::FrontEnd::convert(input_model);
|
||||
|
||||
model = front_end->convert(input_model);
|
||||
conversion_end_time = ggml_time_us();
|
||||
|
||||
if (getenv("GGML_OPENVINO_DUMP_IR")) {
|
||||
|
|
|
|||
Loading…
Reference in New Issue