Execute CONT & VIEW operators in OV Frontend is OK
This commit is contained in:
parent
081b52667b
commit
901f7347ff
|
|
@ -482,6 +482,9 @@ void ggml_backend_openvino_mul_mat(struct ggml_tensor * dst) {
|
|||
// flat shapes:
|
||||
ov::Shape flat_shape_src0 = { total_src0 };
|
||||
ov::Shape flat_shape_src1 = { total_src1 };
|
||||
// Same as above
|
||||
// ov::Shape flat_shape_src0 = { ggml_nelements(src0) };
|
||||
// ov::Shape flat_shape_src1 = { ggml_nelements(src1) };
|
||||
|
||||
// Create a Parameter node for collecting non-continuous data
|
||||
auto param_src0 = std::make_shared<ov::op::v0::Parameter>(ov::element::f16, flat_shape_src0);
|
||||
|
|
@ -526,9 +529,6 @@ void ggml_backend_openvino_mul_mat(struct ggml_tensor * dst) {
|
|||
auto batched_matmul = std::make_shared<ov::op::v0::MatMul>(B, A, false, false);
|
||||
// batched_matmul output: shape = [32,7,32]
|
||||
|
||||
std::vector<int64_t> full_dst_shape = { dst->ne[2], dst->ne[1], dst->ne[0]};
|
||||
auto final_shape_const = ov::op::v0::Constant::create(ov::element::i64, { full_dst_shape.size() }, full_dst_shape);
|
||||
|
||||
auto model = std::make_shared<ov::Model>(ov::NodeVector{ batched_matmul }, ov::ParameterVector{param_src0, param_src1});
|
||||
|
||||
ov::Core core;
|
||||
|
|
@ -541,7 +541,7 @@ void ggml_backend_openvino_mul_mat(struct ggml_tensor * dst) {
|
|||
infer_request.set_input_tensor(0, tensor_src0);
|
||||
infer_request.set_input_tensor(1, tensor_src1);
|
||||
|
||||
ov::Tensor tensor_dst(ov::element::f32, ov::Shape(full_dst_shape.begin(), full_dst_shape.end()), dst->data);
|
||||
ov::Tensor tensor_dst(ov::element::f32, { dst->ne[0], dst->ne[1], dst->ne[2]}, dst->data);
|
||||
infer_request.set_output_tensor(0, tensor_dst);
|
||||
|
||||
infer_request.infer();
|
||||
|
|
@ -564,6 +564,9 @@ void ggml_backend_openvino_mul_mat(struct ggml_tensor * dst) {
|
|||
|
||||
ov::Shape flat_shape_src0 = { total_src0 };
|
||||
ov::Shape flat_shape_src1 = { total_src1 };
|
||||
// Same as above
|
||||
// ov::Shape flat_shape_src0 = { ggml_nelements(src0) };
|
||||
// ov::Shape flat_shape_src1 = { ggml_nelements(src1) };
|
||||
|
||||
auto param_flat_src0 = std::make_shared<ov::op::v0::Parameter>(ov::element::f16, flat_shape_src0);
|
||||
auto param_flat_src1 = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, flat_shape_src1);
|
||||
|
|
@ -602,6 +605,7 @@ void ggml_backend_openvino_mul_mat(struct ggml_tensor * dst) {
|
|||
|
||||
std::shared_ptr<ov::op::v0::MatMul> matmul = std::make_shared<ov::op::v0::MatMul>(reshape_src1, A_for_mul, false, false);
|
||||
auto model = std::make_shared<ov::Model>(ov::NodeVector{matmul}, ov::ParameterVector{param_flat_src0, param_flat_src1});
|
||||
// ov::save_model(model, "/home/user/zhan/merge_git_commits/llama.cpp-ov/002_backend_mulmat_model.xml");
|
||||
|
||||
auto compiled_model = core.compile_model(model, "CPU");
|
||||
auto infer_request = compiled_model.create_infer_request();
|
||||
|
|
@ -618,8 +622,35 @@ void ggml_backend_openvino_reshape(ggml_tensor *dst) {
|
|||
}
|
||||
|
||||
void ggml_backend_openvino_view(ggml_tensor *dst) {
|
||||
ov::Core core;
|
||||
ov::Shape tensor_shape{static_cast<size_t>(dst->ne[3]), static_cast<size_t>(dst->ne[2]), static_cast<size_t>(dst->ne[1]), static_cast<size_t>(dst->ne[0])};
|
||||
|
||||
GGML_UNUSED(dst);
|
||||
// auto param = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, tensor_shape);
|
||||
auto param = std::make_shared<ov::op::v0::Parameter>(ov::element::f16, tensor_shape);
|
||||
|
||||
auto reshaped = std::make_shared<ov::op::v1::Reshape>(param,
|
||||
ov::op::v0::Constant::create(ov::element::i64, { tensor_shape.size() }, tensor_shape),
|
||||
false);
|
||||
|
||||
auto model = std::make_shared<ov::Model>(ov::NodeVector{reshaped}, ov::ParameterVector{param});
|
||||
// ov::save_model(model, "/home/user/zhan/merge_git_commits/llama.cpp-ov/003_backend_view_model.xml");
|
||||
|
||||
auto compiled_model = core.compile_model(model, "CPU");
|
||||
|
||||
ov::InferRequest infer_request = compiled_model.create_infer_request();
|
||||
|
||||
// ov::Tensor input_tensor(ov::element::f32, tensor_shape, dst->data);
|
||||
ov::Tensor input_tensor(ov::element::f16, tensor_shape, dst->data);
|
||||
// infer_request.set_tensor(param, input_tensor);
|
||||
infer_request.set_input_tensor(0, input_tensor);
|
||||
|
||||
// ov::Tensor output_tensor(ov::element::f32, tensor_shape, dst->data);
|
||||
ov::Tensor output_tensor(ov::element::f16, tensor_shape, dst->data);
|
||||
infer_request.set_output_tensor(0, output_tensor);
|
||||
|
||||
infer_request.infer();
|
||||
// auto output_tensor = infer_request.get_output_tensor(0);
|
||||
// dst->data = output_tensor.data();
|
||||
}
|
||||
|
||||
void ggml_backend_openvino_dup_bytes(struct ggml_tensor *dst) {
|
||||
|
|
@ -992,31 +1023,33 @@ static enum ggml_status ggml_backend_openvino_graph_compute(ggml_backend_t backe
|
|||
// openvino_frontend_compute(backend, cgraph);
|
||||
// Process nodes in order
|
||||
for (int i = 0; i < cgraph->n_nodes; i++) {
|
||||
if (std::find(reshape_indices.begin(), reshape_indices.end(), i) != reshape_indices.end()) {
|
||||
ggml_backend_openvino_reshape(cgraph->nodes[i]);
|
||||
if (std::find(permute_indices.begin(), permute_indices.end(), i) != permute_indices.end()) {
|
||||
ggml_backend_openvino_permute(cgraph->nodes[i]);
|
||||
// } else if (std::find(cont_indices.begin(), cont_indices.end(), i) != cont_indices.end()) {
|
||||
// ggml_backend_openvino_dup_bytes(cgraph->nodes[i]);
|
||||
} else if (std::find(view_indices.begin(), view_indices.end(), i) != view_indices.end()) {
|
||||
ggml_backend_openvino_view(cgraph->nodes[i]);
|
||||
// } else if (std::find(view_indices.begin(), view_indices.end(), i) != view_indices.end()) {
|
||||
// ggml_backend_openvino_view(cgraph->nodes[i]);
|
||||
// } else if (std::find(cpy_indices.begin(), cpy_indices.end(), i) != cpy_indices.end()) {
|
||||
// ggml_backend_openvino_cpy(cgraph->nodes[i]);
|
||||
} else if (std::find(transpose_indices.begin(), transpose_indices.end(), i) != transpose_indices.end()) {
|
||||
ggml_backend_openvino_transpose(cgraph->nodes[i]);
|
||||
} else if (std::find(permute_indices.begin(), permute_indices.end(), i) != permute_indices.end()) {
|
||||
ggml_backend_openvino_permute(cgraph->nodes[i]);
|
||||
} else if (std::find(mul_mat_indices.begin(), mul_mat_indices.end(), i) != mul_mat_indices.end()) {
|
||||
ggml_backend_openvino_mul_mat(cgraph->nodes[i]);
|
||||
} else if (std::find(reshape_indices.begin(), reshape_indices.end(), i) != reshape_indices.end()) {
|
||||
ggml_backend_openvino_reshape(cgraph->nodes[i]);
|
||||
// } else if (std::find(mul_mat_indices.begin(), mul_mat_indices.end(), i) != mul_mat_indices.end()) {
|
||||
// ggml_backend_openvino_mul_mat(cgraph->nodes[i]);
|
||||
} else {
|
||||
// Process a range of nodes with openvino_frontend_compute
|
||||
int start_index = i;
|
||||
while (i < cgraph->n_nodes &&
|
||||
// std::find(cpy_indices.begin(), cpy_indices.end(), i) == cpy_indices.end() &&
|
||||
// std::find(cont_indices.begin(), cont_indices.end(), i) == cont_indices.end() &&
|
||||
std::find(mul_mat_indices.begin(), mul_mat_indices.end(), i) == mul_mat_indices.end()) {
|
||||
while (i < cgraph->n_nodes
|
||||
// && std::find(view_indices.begin(), view_indices.end(), i) == view_indices.end()
|
||||
// && std::find(cpy_indices.begin(), cpy_indices.end(), i) == cpy_indices.end()
|
||||
// && std::find(cont_indices.begin(), cont_indices.end(), i) == cont_indices.end()
|
||||
// && std::find(mul_mat_indices.begin(), mul_mat_indices.end(), i) == mul_mat_indices.end()
|
||||
) {
|
||||
i++;
|
||||
}
|
||||
if (start_index < i) {
|
||||
openvino_frontend_compute(backend, cgraph, start_index, --i);
|
||||
openvino_frontend_compute(backend, cgraph, start_index, --i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,12 +6,20 @@
|
|||
#include <fstream>
|
||||
|
||||
void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::map<std::string, ggml_tensor *>& inputs, std::map<std::string, ggml_tensor *>& outputs) {
|
||||
m_node_op_name[node->name] = ggml_op_name(node->op);
|
||||
// m_node_op_name[node->name] = ggml_op_name(node->op);
|
||||
|
||||
// std::string src0_name = std::string(node->src[0]->name) + "_" + std::to_string(node->src[0]->view_offs) + "_input_" + ggml_op_name(node->src[0]->op);
|
||||
// std::string node_name = std::string(node->name) + "_" + std::to_string(node->view_offs) + "_output_" + ggml_op_name(node->op);
|
||||
|
||||
// Execute singel CONT operator is OK
|
||||
std::string src0_name = std::string(node->src[0]->name) + "_" + std::to_string(node->src[0]->view_offs) + "_" + ggml_op_name(node->src[0]->op);
|
||||
std::string node_name = std::string(node->name) + "_" + std::to_string(node->view_offs) + "_" + ggml_op_name(node->op);
|
||||
// std::string src0_name = std::string(node->src[0]->name) + "_" + std::to_string(node->src[0]->view_offs) + "_" + ggml_op_name(node->src[0]->op);
|
||||
// std::string node_name = std::string(node->name) + "_" + std::to_string(node->view_offs) + "_" + ggml_op_name(node->op);
|
||||
|
||||
// std::string src0_name = std::string(node->src[0]->name) + "_" + std::to_string(node->src[0]->view_offs);
|
||||
// std::string node_name = std::string(node->name) + "_" + std::to_string(node->view_offs);
|
||||
|
||||
std::string src0_name = std::string(node->src[0]->name);
|
||||
std::string node_name = std::string(node->name);
|
||||
switch (node->op) {
|
||||
// Unary OPs
|
||||
case GGML_OP_UNARY:
|
||||
|
|
@ -151,6 +159,7 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::map<std::string, gg
|
|||
// For view, input is node itself
|
||||
case GGML_OP_VIEW:
|
||||
{
|
||||
// std::string node_name = std::string(node->name) + "_" + std::to_string(node->view_offs) + "_output_" + ggml_op_name(node->op);
|
||||
inputs[node_name] = node;
|
||||
outputs[node_name] = node;
|
||||
m_input_names.push_back(node_name);
|
||||
|
|
@ -161,21 +170,29 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::map<std::string, gg
|
|||
// SCALE
|
||||
case GGML_OP_SCALE:
|
||||
{
|
||||
inputs[node_name] = node->src[0];
|
||||
inputs[src0_name] = node->src[0];
|
||||
outputs[node_name] = node;
|
||||
m_input_names.push_back(node_name);
|
||||
m_node_op_name[node_name] = ggml_op_name(node->op);
|
||||
m_input_names.push_back(src0_name);
|
||||
m_node_op_name[src0_name] = ggml_op_name(node->op);
|
||||
m_output_names.push_back(node_name);
|
||||
break;
|
||||
}
|
||||
case GGML_OP_MUL_MAT:
|
||||
{
|
||||
std::string src1_name = std::string(node->src[1]->name) + "_" + std::to_string(node->src[1]->view_offs);
|
||||
ov::Shape flat_shape_src0 = { node->src[0]->ne[0]*node->src[0]->ne[1]*node->src[0]->ne[2] };
|
||||
ov::Shape flat_shape_src1 = { node->src[1]->ne[0]*node->src[1]->ne[1]*node->src[1]->ne[2] };
|
||||
auto param_src0 = std::make_shared<ov::op::v0::Parameter>(ov::element::f16, flat_shape_src0);
|
||||
auto param_src1 = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, flat_shape_src1);
|
||||
m_params.push_back(param_src0);
|
||||
m_params.push_back(param_src1);
|
||||
if (!ggml_is_contiguous(node->src[1]) || node->src[1]->ne[0] * node->src[1]->nb[0] != node->src[1]->nb[1]) {
|
||||
m_continuous = false;
|
||||
} else {
|
||||
m_continuous = true;
|
||||
}
|
||||
// std::string src1_name = std::string(node->src[1]->name) + "_" + std::to_string(node->src[1]->view_offs) + "_input_" + ggml_op_name(node->src[1]->op);
|
||||
// std::string src1_name = std::string(node->src[1]->name) + "_" + std::to_string(node->src[1]->view_offs);
|
||||
std::string src1_name = std::string(node->src[1]->name);
|
||||
inputs[src0_name] = node->src[0];
|
||||
inputs[src1_name] = node->src[1];
|
||||
outputs[node_name] = node;
|
||||
|
|
@ -200,7 +217,8 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::map<std::string, gg
|
|||
m_node_op_name[src0_name] = ggml_op_name(node->op);
|
||||
m_output_names.push_back(node_name);
|
||||
if (node->src[1]) {
|
||||
std::string src1_name = std::string(node->src[1]->name) + "_" + std::to_string(node->src[1]->view_offs);
|
||||
// std::string src1_name = std::string(node->src[1]->name) + "_" + std::to_string(node->src[1]->view_offs);
|
||||
std::string src1_name = std::string(node->src[1]->name);
|
||||
inputs[src1_name] = node->src[1];
|
||||
m_node_op_name[src1_name] = ggml_op_name(node->op);
|
||||
m_input_names.push_back(src1_name);
|
||||
|
|
@ -210,7 +228,8 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::map<std::string, gg
|
|||
// OPs with 3 inputs:
|
||||
case GGML_OP_ROPE:
|
||||
{
|
||||
std::string src1_name = std::string(node->src[1]->name) + "_" + std::to_string(node->src[1]->view_offs);
|
||||
// std::string src1_name = std::string(node->src[1]->name) + "_" + std::to_string(node->src[1]->view_offs);
|
||||
std::string src1_name = std::string(node->src[1]->name);
|
||||
inputs[src0_name] = node->src[0];
|
||||
inputs[src1_name] = node->src[1];
|
||||
m_input_names.push_back(src0_name);
|
||||
|
|
@ -220,7 +239,8 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::map<std::string, gg
|
|||
outputs[node_name] = node;
|
||||
m_output_names.push_back(node_name);
|
||||
if (node->src[2]) {
|
||||
std::string src2_name = std::string(node->src[2]->name) + "_" + std::to_string(node->src[2]->view_offs);
|
||||
// std::string src2_name = std::string(node->src[2]->name) + "_" + std::to_string(node->src[2]->view_offs);
|
||||
std::string src2_name = std::string(node->src[2]->name);
|
||||
inputs[src2_name] = node->src[2];
|
||||
m_input_names.push_back(src2_name);
|
||||
m_node_op_name[src2_name] = ggml_op_name(node->op);
|
||||
|
|
@ -334,13 +354,6 @@ ov::PartialShape GgmlOvDecoder::get_input_shape(const std::string& name) const {
|
|||
ggml_tensor * node = m_inputs.at(name);
|
||||
std::vector<size_t> shape;
|
||||
|
||||
// [TODO], 在这里判断如果是MUL_MAT就设置shape为一维
|
||||
if(m_node_op_name.at(name) == "MUL_MAT") {
|
||||
shape.push_back(static_cast<size_t>(node->ne[0] * node->ne[1] * node->ne[2]));
|
||||
input_shape = ov::PartialShape(shape);
|
||||
return input_shape;
|
||||
}
|
||||
|
||||
for (int i = GGML_MAX_DIMS - 2; i >= 0 ; --i) {
|
||||
if (node->ne[i] == 0) {
|
||||
return input_shape;
|
||||
|
|
@ -405,10 +418,8 @@ std::vector<std::string> GgmlOvDecoder::get_input_names() const {
|
|||
|
||||
const std::string& GgmlOvDecoder::get_node_op_name(const std::string& name) const {
|
||||
auto it = m_node_op_name.find(name);
|
||||
if (it != m_node_op_name.end()) {
|
||||
return it->second;
|
||||
}
|
||||
return "";
|
||||
static const std::string empty_str;
|
||||
return (it != m_node_op_name.end()) ? it->second : empty_str;
|
||||
}
|
||||
|
||||
const std::vector<std::shared_ptr<ov::op::v0::Parameter>>& GgmlOvDecoder::get_params() const {
|
||||
|
|
|
|||
|
|
@ -26,18 +26,9 @@ std::map<std::string, ov::Tensor> get_ggml_graph_input_tensors(std::shared_ptr<G
|
|||
// if (node_op_name == "CPY" && (input_shape[0] != 7)) {
|
||||
// input_tensor = ov::Tensor(ggml_decoder->get_input_type(name), {80000}, input_data);
|
||||
|
||||
// } else if (node_op_name == "CONT" || node_op_name == "MUL_MAT") {
|
||||
// // auto input_shape = ggml_decoder->get_input_shape(name).to_shape();
|
||||
// // size_t total_size = 1;
|
||||
// // for (auto dim : input_shape) {
|
||||
// // total_size *= dim;
|
||||
// // }
|
||||
// // ov::Shape new_shape = {total_size};
|
||||
// input_tensor = ov::Tensor(ggml_decoder->get_input_type(name), {ggml_decoder->get_input_shape(name).to_shape()[0]}, input_data);
|
||||
// } else {
|
||||
if (node_op_name == "CONT" && ggml_decoder->check_if_continuous()) {
|
||||
ov::Shape flat_shape = { ggml_decoder->get_input_shape(name).to_shape()[0] *
|
||||
ggml_decoder->get_input_shape(name).to_shape()[1] *
|
||||
ov::Shape flat_shape = { ggml_decoder->get_input_shape(name).to_shape()[0] *
|
||||
ggml_decoder->get_input_shape(name).to_shape()[1] *
|
||||
ggml_decoder->get_input_shape(name).to_shape()[2] };
|
||||
input_tensor = ov::Tensor(ggml_decoder->get_input_type(name), flat_shape, input_data);
|
||||
} else if ( node_op_name == "CONT" &&
|
||||
|
|
@ -59,6 +50,11 @@ std::map<std::string, ov::Tensor> get_ggml_graph_input_tensors(std::shared_ptr<G
|
|||
size_t total_valid = valid_i * valid_j * valid_k; // 96 * 32 * 7 = 21504
|
||||
ov::Shape flat_input_shape = { total_valid };
|
||||
input_tensor = ov::Tensor(ggml_decoder->get_input_type(name), flat_input_shape, input_data);
|
||||
} else if (node_op_name == "MUL_MAT") {
|
||||
ov::Shape flat_shape = { ggml_decoder->get_input_shape(name).to_shape()[0] *
|
||||
ggml_decoder->get_input_shape(name).to_shape()[1] *
|
||||
ggml_decoder->get_input_shape(name).to_shape()[2] };
|
||||
input_tensor = ov::Tensor(ggml_decoder->get_input_type(name), flat_shape, input_data);
|
||||
} else {
|
||||
input_tensor = ov::Tensor(ggml_decoder->get_input_type(name), ggml_decoder->get_input_shape(name).to_shape(), input_data);
|
||||
}
|
||||
|
|
@ -125,7 +121,7 @@ enum ggml_status openvino_frontend_compute(ggml_backend_t backend, struct ggml_c
|
|||
|
||||
// Convert InputModel -> ov::Model
|
||||
std::shared_ptr<ov::Model> model = front_end->convert(input_model);
|
||||
ov::save_model(model, "/home/user/zhan/merge_git_commits/llama.cpp-ov/001_model.xml");
|
||||
// ov::save_model(model, "/home/user/zhan/merge_git_commits/llama.cpp-ov/001_model.xml");
|
||||
|
||||
if (!model) {
|
||||
GGML_LOG_ERROR("Model is not converted \n");
|
||||
|
|
|
|||
Loading…
Reference in New Issue