* Use find_package in CMake to configure OpenVINO

* Remove OPENVINO_OP_DEBUG
* Simplify set_input_output in decoder
* Fix CPY in set_input_output
* Use params from converted ov model in setting input
This commit is contained in:
Yu, Zijun 2025-04-14 18:04:03 +08:00 committed by Mustafa Cavus
parent 84be5c6f15
commit 651b2c06cb
3 changed files with 114 additions and 243 deletions

View File

@ -3,6 +3,7 @@
#include "ggml-impl.h"
#include "ggml-openvino.h"
#include "ggml-openvino/utils.h"
#include "ggml.h"
#include <string>
#include <mutex>
@ -1367,7 +1368,7 @@ static const std::set<std::string>& openvino_ops = []() -> const std::set<std::s
{GGML_OP_REPEAT, {"Tile"}},
{GGML_OP_RESHAPE, {"Reshape"}},
{GGML_OP_RMS_NORM, {"Multiply", "Divide", "Sqrt"}},
{GGML_OP_ROPE, {"Custom"}},
{GGML_OP_ROPE, {"Sin", "Cos", "Multiply", "Add", "Subtract", "Split", "StridedSlice", "Concat"}},
{GGML_OP_SCALE, {"Multiply", "Constant"}},
{GGML_OP_SET, {"Assign"}},
{GGML_OP_SIN, {"Sin"}},
@ -1383,23 +1384,38 @@ static const std::set<std::string>& openvino_ops = []() -> const std::set<std::s
{GGML_OP_TRANSPOSE, {"Transpose"}},
{GGML_OP_UPSCALE, {"Interpolate"}},
{GGML_OP_VIEW, {"Reshape"}},
{GGML_OP_CONT, {"Reshape", "StridedSlice"}},
{GGML_OP_CPY, {"Reshape", "ScatterNDUpdate"}},
{GGML_OP_WIN_PART, {"StridedSlice", "Concat", "Reshape", "Custom"}},
{GGML_OP_WIN_UNPART, {"Reshape", "Transpose", "Custom"}},
};
auto it = op_mapping.find(op->op);
if (it == op_mapping.end()) {
return false;
static const std::map<ggml_unary_op, std::vector<std::string>> op_mapping_unary = {
{GGML_UNARY_OP_SILU, {"Sigmoid", "Multiply"}},
};
std::vector<std::string> mapped_ops;
if (op->op == GGML_OP_UNARY) {
auto it = op_mapping_unary.find(ggml_get_unary_op(op));
if (it == op_mapping_unary.end()) {
return false;
}
mapped_ops = it->second;
} else {
auto it = op_mapping.find(op->op);
if (it == op_mapping.end()) {
return false;
}
mapped_ops = it->second;
}
for (const std::string& op_name : it->second) {
for (const std::string& op_name : mapped_ops) {
if (openvino_ops.count(op_name) == 0) {
return false;
}
}
return true;
#endif
}
static bool ggml_backend_openvino_device_supports_buft(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) {

View File

@ -6,222 +6,66 @@
#include <fstream>
void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::map<std::string, ggml_tensor *>& inputs, std::map<std::string, ggml_tensor *>& outputs) {
std::string node_name;
if (node->op == GGML_OP_CPY) {
// CPY updates the input tensor in place. For later ov op that uses the
// input tensor of CPY, we need to make sure they get the updated tensor
// by putting the src tensor name in the tensor_map in
// <openvino>/src/frontends/ggml/src/translate_session.cpp
node_name = std::string(node->view_src->name);
} else {
node_name = std::string(node->name);
}
std::string src0_name = std::string(node->src[0]->name);
std::string node_name = std::string(node->name);
inputs[src0_name] = node->src[0];
outputs[node_name] = node;
m_input_names.push_back(src0_name);
m_op_node_name.emplace_back(src0_name, ggml_op_name(node->op));
if (node->op == GGML_OP_CPY && node->view_src) {
m_output_names.push_back(node->view_src->name);
} else {
m_output_names.push_back(node_name);
}
if (node->src[1]) {
std::string src1_name = std::string(node->src[1]->name);
inputs[src1_name] = node->src[1];
m_input_names.push_back(src1_name);
m_op_node_name.emplace_back(src1_name, ggml_op_name(node->op));
}
if (node->src[2]) {
std::string src2_name = std::string(node->src[2]->name);
inputs[src2_name] = node->src[2];
m_input_names.push_back(src2_name);
m_op_node_name.emplace_back(src2_name, ggml_op_name(node->op));
}
switch (node->op) {
// Unary OPs
case GGML_OP_UNARY:
case GGML_OP_RESHAPE:
case GGML_OP_TRANSPOSE:
case GGML_OP_PERMUTE:
case GGML_OP_RMS_NORM:
{
inputs[src0_name] = node->src[0];
outputs[node_name] = node;
m_input_names.push_back(src0_name);
m_op_node_name.emplace_back(src0_name, ggml_op_name(node->op));
m_output_names.push_back(node_name);
break;
case GGML_OP_CONT: {
if (ggml_is_contiguous(node->src[0]) && ggml_is_contiguous(node) &&
(node->src[0]->ne[0] * node->src[0]->nb[0] == node->src[0]->nb[1])) {
m_continuous = true;
} else {
m_continuous = false;
}
case GGML_OP_CONT:
{
if (ggml_is_contiguous(node->src[0])
&& ggml_is_contiguous(node)
&& (node->src[0]->ne[0] * node->src[0]->nb[0] == node->src[0]->nb[1])) {
inputs[src0_name] = node->src[0];
outputs[node_name] = node;
m_input_names.push_back(src0_name);
m_op_node_name.emplace_back(src0_name, ggml_op_name(node->op));
m_output_names.push_back(node_name);
ov::Shape input_shape = { static_cast<size_t>(node->src[0]->ne[2]),
static_cast<size_t>(node->src[0]->ne[1]),
static_cast<size_t>(node->src[0]->ne[0])};
auto input_param = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, input_shape);
m_params.push_back(input_param);
m_continuous = true;
break;
}
if (node->src[0]->type == node->type && node->src[0]->ne[0] == node->ne[0] &&
node->src[0]->nb[0] == ggml_type_size(node->src[0]->type) &&
node->nb[0] == ggml_type_size(node->src[0]->type)) {
inputs[src0_name] = node->src[0];
outputs[node_name] = node;
m_input_names.push_back(src0_name);
m_op_node_name.emplace_back(src0_name, ggml_op_name(node->op));
m_output_names.push_back(node_name);
const size_t element_size = ggml_type_size(node->src[0]->type);
size_t valid_elems = static_cast<size_t>(node->src[0]->ne[0]); // 3072
size_t num_rows = static_cast<size_t>(node->src[0]->ne[1]); // 7
size_t dim2 = static_cast<size_t>(node->src[0]->ne[2]); // 1
size_t phys_stride = static_cast<size_t>(node->src[0]->nb[1]) / element_size; // 9216
// size_t total_phys = (num_rows - 1) * phys_stride + valid_elems; // 6*9216 + 3072 = 58368
size_t total_phys = num_rows * phys_stride; // 7 * 9216 = 64512
ov::Shape input_shape = { dim2, num_rows, phys_stride };
auto input_param = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, input_shape);
m_params.push_back(input_param);
m_continuous = false;
break;
}
if (ggml_is_contiguous(node)) {
inputs[src0_name] = node->src[0];
outputs[node_name] = node;
m_input_names.push_back(src0_name);
m_op_node_name.emplace_back(src0_name, ggml_op_name(node->op));
m_output_names.push_back(node_name);
ov::Shape input_shape = { static_cast<size_t>(node->src[0]->ne[2]),
static_cast<size_t>(node->src[0]->ne[1]),
static_cast<size_t>(node->src[0]->ne[0])};
auto input_param = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, input_shape);
m_params.push_back(input_param);
m_continuous = false;
break;
}
break;
}
case GGML_OP_CPY: {
m_continuous = ggml_is_contiguous(node);
break;
}
case GGML_OP_MUL_MAT: {
if (!ggml_is_contiguous(node->src[1]) ||
node->src[1]->ne[0] * node->src[1]->nb[0] != node->src[1]->nb[1]) {
m_continuous = false;
} else {
m_continuous = true;
}
case GGML_OP_CPY:
{
if (ggml_is_contiguous(node)) {
std::string src1_name = std::string(node->src[1]->name);
inputs[src0_name] = node->src[0];
src1_name = std::string(node->src[1]->view_src->name);
inputs[src1_name] = node->src[1];
node_name = std::string(node->view_src->name);
outputs[node_name] = node;
m_input_names.push_back(src0_name);
m_input_names.push_back(src1_name);
m_op_node_name.emplace_back(src0_name, ggml_op_name(node->op));
m_op_node_name.emplace_back(src1_name, ggml_op_name(node->op));
m_output_names.push_back(node_name);
m_continuous = true;
ov::Shape input1_shape = { static_cast<size_t>(node->src[0]->ne[2]),
static_cast<size_t>(node->src[0]->ne[1]),
static_cast<size_t>(node->src[0]->ne[0])};
auto input1_param = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, input1_shape);
m_params.push_back(input1_param);
ov::Shape input2_shape = { static_cast<size_t>(node->src[1]->ne[2]),
static_cast<size_t>(node->src[1]->ne[1]),
static_cast<size_t>(node->src[1]->view_src->ne[0])};
auto input2_param = std::make_shared<ov::op::v0::Parameter>(ov::element::f16, input2_shape);
m_params.push_back(input2_param);
break;
} else {
std::string src1_name = std::string(node->src[1]->name);
inputs[src0_name] = node->src[0];
src1_name = std::string(node->src[1]->view_src->name);
inputs[src1_name] = node->src[1];
node_name = std::string(node->view_src->name);
outputs[node_name] = node;
m_input_names.push_back(src0_name);
m_input_names.push_back(src1_name);
m_op_node_name.emplace_back(src0_name, ggml_op_name(node->op));
m_op_node_name.emplace_back(src1_name, ggml_op_name(node->op));
m_output_names.push_back(node_name);
ov::Shape input0_shape = { static_cast<size_t>(node->src[0]->ne[2]),
static_cast<size_t>(node->src[0]->ne[1]),
static_cast<size_t>(node->src[0]->ne[0])};
auto input0_param = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, input0_shape);
m_params.push_back(input0_param);
ov::Shape input1_shape = { 1, 1, static_cast<size_t>(node->src[1]->nb[2] / node->src[1]->nb[0])};
auto input1_param = std::make_shared<ov::op::v0::Parameter>(ov::element::f16, input1_shape);
m_params.push_back(input1_param);
m_continuous = false;
break;
}
}
// For view, input is node itself
case GGML_OP_VIEW:
{
inputs[src0_name] = node->src[0];
outputs[node_name] = node;
m_input_names.push_back(src0_name);
m_op_node_name.emplace_back(src0_name, ggml_op_name(node->op));
m_output_names.push_back(node_name);
break;
}
// SCALE
case GGML_OP_SCALE:
{
inputs[src0_name] = node->src[0];
outputs[node_name] = node;
m_input_names.push_back(src0_name);
m_op_node_name.emplace_back(src0_name, ggml_op_name(node->op));
m_output_names.push_back(node_name);
break;
}
case GGML_OP_MUL_MAT:
{
if (!ggml_is_contiguous(node->src[1]) || node->src[1]->ne[0] * node->src[1]->nb[0] != node->src[1]->nb[1]) {
m_continuous = false;
} else {
m_continuous = true;
}
std::string src1_name = std::string(node->src[1]->name);
inputs[src0_name] = node->src[0];
inputs[src1_name] = node->src[1];
outputs[node_name] = node;
m_input_names.push_back(src0_name);
m_op_node_name.emplace_back(src0_name, ggml_op_name(node->op));
m_input_names.push_back(src1_name);
m_op_node_name.emplace_back(src1_name, ggml_op_name(node->op));
m_output_names.push_back(node_name);
break;
}
// OPs with 2 inputs
case GGML_OP_ADD:
case GGML_OP_DIV:
case GGML_OP_MUL:
case GGML_OP_SUB:
case GGML_OP_GET_ROWS:
case GGML_OP_SOFT_MAX:
{
inputs[src0_name] = node->src[0];
outputs[node_name] = node;
m_input_names.push_back(src0_name);
m_op_node_name.emplace_back(src0_name, ggml_op_name(node->op));
m_output_names.push_back(node_name);
if (node->src[1]) {
std::string src1_name = std::string(node->src[1]->name);
inputs[src1_name] = node->src[1];
m_op_node_name.emplace_back(src1_name, ggml_op_name(node->op));
m_input_names.push_back(src1_name);
}
break;
}
// OPs with 3 inputs:
case GGML_OP_ROPE:
{
std::string src1_name = std::string(node->src[1]->name);
inputs[src0_name] = node->src[0];
inputs[src1_name] = node->src[1];
m_input_names.push_back(src0_name);
m_op_node_name.emplace_back(src0_name, ggml_op_name(node->op));
m_input_names.push_back(src1_name);
m_op_node_name.emplace_back(src1_name, ggml_op_name(node->op));
outputs[node_name] = node;
m_output_names.push_back(node_name);
if (node->src[2]) {
std::string src2_name = std::string(node->src[2]->name);
inputs[src2_name] = node->src[2];
m_input_names.push_back(src2_name);
m_op_node_name.emplace_back(src2_name, ggml_op_name(node->op));
}
break;
}
default:
break;
break;
}
default:
break;
}
}
@ -334,7 +178,6 @@ GgmlOvDecoder::GgmlOvDecoder(struct ggml_tensor * node, struct ggml_cgraph * cgr
m_op_node_name.clear();
m_decoders.clear();
// If first init
if (m_node) {
set_input_output(m_node, m_inputs, m_outputs);
} else {
@ -353,7 +196,7 @@ GgmlOvDecoder::GgmlOvDecoder(struct ggml_tensor * node, struct ggml_cgraph * cgr
ov::PartialShape GgmlOvDecoder::get_input_shape(const std::string& name) const {
ov::PartialShape input_shape;
// Use input_node->ne
// Use input_node->ne
ggml_tensor * node = m_inputs.at(name);
std::vector<size_t> shape;
@ -440,7 +283,6 @@ const std::vector<std::shared_ptr<ov::op::v0::Parameter>>& GgmlOvDecoder::get_pa
ov::PartialShape GgmlOvDecoder::get_output_shape(const std::string& name) const {
ov::PartialShape output_shape;
// Use input_node->ne
ggml_tensor * node = m_outputs.at(name);
std::vector<size_t> shape;
@ -552,10 +394,10 @@ const std::string& GgmlOvDecoder::get_op_type() const {
auto unary_it = unaryOpTypeMap.find(ggml_get_unary_op(m_node));
if (unary_it != unaryOpTypeMap.end()) {
return unary_it->second;
}
}
}
return it->second;
}
}
static const std::string unknown_op = "UNKNOWN_OP";
return unknown_op;
}

View File

@ -1,9 +1,11 @@
#include "utils.h"
#include "ggml-impl.h"
#include "ggml-backend-impl.h"
#include "ggml-impl.h"
#include <cstdlib>
#include <fstream>
#include <openvino/core/graph_util.hpp>
#include <openvino/frontend/manager.hpp>
#include <openvino/openvino.hpp>
#include <fstream>
using ov::frontend::ggml::GgmlDecoder;
@ -20,27 +22,14 @@ std::vector<std::pair<std::string, ov::Tensor>> get_ggml_graph_input_tensors(std
std::string op_node_name = ggml_decoder->get_op_node_name(name, op_iter++);
// auto node_op_name = ggml_decoder->get_node_op_name(name);
auto input_data = ggml_decoder->get_input_ggml_tensor(name)->data;
auto check_if_contiguous = ggml_is_contiguous(ggml_decoder->get_input_ggml_tensor(name));
#ifdef GGML_OPENVINO_DEBUG
printf("Subgraph input %d: %g\n", inp, *(double*)(input_data));
#endif
ov::Tensor input_tensor;
ov::Shape input_shape = ggml_decoder->get_input_shape(name).to_shape();
ov::element::Type input_type = ggml_decoder->get_input_type(name);
size_t element_size = input_type.size();
std::vector<size_t> input_stride = ggml_decoder->get_input_stride(name);
if (op_node_name == "CONT" && input_shape[0] == 1 // Except for the kqv_merge node
&& (input_shape[1] != 1 && flag || input_shape[2]*element_size!=input_stride[1])
) {
const size_t num_rows = static_cast<size_t>(ggml_decoder->get_input_shape(name).to_shape()[1]);
const size_t dim2 = static_cast<size_t>(ggml_decoder->get_input_shape(name).to_shape()[0]);
size_t phys_stride = static_cast<size_t>(input_stride[1]) / element_size;
ov::Shape input_shape = { dim2, num_rows, phys_stride }; // {1, 7, 9216 }
input_tensor = ov::Tensor(ggml_decoder->get_input_type(name), input_shape, input_data);
} else {
input_tensor = ov::Tensor(ggml_decoder->get_input_type(name), ggml_decoder->get_input_shape(name).to_shape(), input_data);
}
input_tensor = ov::Tensor(ggml_decoder->get_input_type(name), input_shape, input_data);
// input_tensors[name] = input_tensor;
input_tensors.emplace_back(name, input_tensor);
@ -49,6 +38,18 @@ std::vector<std::pair<std::string, ov::Tensor>> get_ggml_graph_input_tensors(std
return input_tensors;
}
ov::Tensor get_ggml_graph_input_tensor(std::shared_ptr<GgmlOvDecoder> ggml_decoder, std::string& name) {
auto input_data = ggml_decoder->get_input_ggml_tensor(name)->data;
#ifdef GGML_OPENVINO_DEBUG
printf("Subgraph input %s: %g\n", name.c_str(), *(double*)(input_data));
#endif
ov::Tensor input_tensor;
ov::Shape input_shape = ggml_decoder->get_input_shape(name).to_shape();
std::vector<size_t> input_stride = ggml_decoder->get_input_stride(name);
input_tensor = ov::Tensor(ggml_decoder->get_input_type(name), input_shape, input_data);
return input_tensor;
}
std::map<std::string, void*> get_ggml_graph_output_dst(std::shared_ptr<GgmlOvDecoder> ggml_decoder) {
std::map<std::string, void*> output_tensors;
auto output_names = ggml_decoder->get_output_names();
@ -79,7 +80,7 @@ static ov::frontend::FrontEnd::Ptr get_ggml_frontend() {
enum ggml_status openvino_frontend_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph, const int32_t start_index, const int32_t end_index, bool flag) {
static ov::Core core;
// auto devices = core.get_available_devices();
// Get GGML Frontend
// Get GGML Frontend
static auto front_end = get_ggml_frontend();
if (!front_end) {
GGML_LOG_ERROR("GGML FrontEnd is not initialized \n");
@ -102,9 +103,17 @@ enum ggml_status openvino_frontend_compute(ggml_backend_t backend, struct ggml_c
#endif
}
// Convert InputModel -> ov::Model
// Convert InputModel -> ov::Model
std::shared_ptr<ov::Model> model = front_end->convert(input_model);
if (getenv("OPENVINO_DUMP_GRAPH")) {
char timestamped_filename[64];
auto timestamp = (long long)ggml_time_us();
snprintf(timestamped_filename, sizeof(timestamped_filename),
"model_%lld.xml", timestamp);
ov::serialize(model, timestamped_filename);
}
if (!model) {
GGML_LOG_ERROR("Model is not converted \n");
} else {
@ -122,10 +131,14 @@ enum ggml_status openvino_frontend_compute(ggml_backend_t backend, struct ggml_c
auto input_names = ggml_decoder->get_input_names();
auto input_tensors = get_ggml_graph_input_tensors(ggml_decoder, flag);
// Set input tensor
for (size_t i = 0; i < input_names.size(); i++) {
infer_request.set_input_tensor(i, input_tensors.at(i).second);
auto ov_params = model->get_parameters();
for (size_t i = 0; i < ov_params.size(); i++) {
auto param_name = ov_params[i]->get_friendly_name();
infer_request.set_input_tensor(i, get_ggml_graph_input_tensor(ggml_decoder, param_name));
}
// for (size_t i = 0; i < input_names.size(); i++) {
// infer_request.set_input_tensor(i, input_tensors.at(i).second);
// }
infer_request.infer();