1. Update the implementation of CPY node when it's non-contiguous
2. Remove duplicate get node operation function
This commit is contained in:
parent
cff473a9e2
commit
467a5ddf04
|
|
@ -849,6 +849,7 @@ static void ggml_backend_openvino_permute(const struct ggml_tensor * dst) {
|
|||
|
||||
void ggml_backend_openvino_cpy(struct ggml_tensor *dst) {
|
||||
const struct ggml_tensor *src0 = dst->src[0];
|
||||
const struct ggml_tensor *src1 = dst->src[1];
|
||||
assert(src0 != nullptr);
|
||||
assert(ggml_nelements(dst) == ggml_nelements(src0));
|
||||
|
||||
|
|
@ -889,64 +890,81 @@ void ggml_backend_openvino_cpy(struct ggml_tensor *dst) {
|
|||
infer_request.set_output_tensor(0, dst_tensor);
|
||||
infer_request.infer();
|
||||
} else {
|
||||
std::vector<int64_t> gather_idx;
|
||||
for (int row = 0; row < dst->src[0]->ne[1]; row++) {
|
||||
for (int col = 0; col < dst->src[0]->ne[0]; col++) {
|
||||
gather_idx.push_back((row*dst->src[0]->nb[1]+col*dst->src[0]->nb[0])/4);
|
||||
}
|
||||
}
|
||||
size_t N = gather_idx.size();
|
||||
ov::Shape gather_idx_shape = {N, 1};
|
||||
std::vector<int64_t> scatter_idx;
|
||||
for (int row = 0; row < dst->ne[1]; row++) {
|
||||
for (int col = 0; col < dst->ne[0]; col++) {
|
||||
scatter_idx.push_back(row * dst->nb[1] / 2 + col);
|
||||
}
|
||||
}
|
||||
ov::Shape scatter_idx_shape = {N, 1};
|
||||
int src0_elem_size = ggml_type_size(src0->type);
|
||||
int src1_elem_size = ggml_type_size(src1->type);
|
||||
|
||||
// param_src0 shape => 1D, rank=1, size is large enough. For example, row*col= 21504 + some padding, e.g. 80000
|
||||
// ov::Shape flat_src0_shape = {80000};
|
||||
ov::Shape flat_src0_shape = {dst->src[0]->nb[2]};
|
||||
auto param_src0 = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, flat_src0_shape);
|
||||
// auto param_src00 = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, flat_src0_shape);
|
||||
int src0_logical_cols = src0->ne[0];
|
||||
int src0_logical_rows = src0->ne[1];
|
||||
int src1_logical_cols = src1->ne[0];
|
||||
int src1_logical_rows = src1->ne[1];
|
||||
|
||||
int src0_phys_cols = src0->nb[0] / src0_elem_size;
|
||||
int src0_phys_rows = src0_logical_rows;
|
||||
|
||||
int src1_phys_cols = src1->nb[1] / src1_elem_size;
|
||||
int src1_phys_rows = src1_logical_rows;
|
||||
|
||||
ov::Shape src0_phys_shape = {1, static_cast<size_t>(src0_phys_rows), static_cast<size_t>(src0_phys_cols) };
|
||||
ov::Shape src1_phys_shape = {1, static_cast<size_t>(src1_phys_rows), static_cast<size_t>(src1_phys_cols) };
|
||||
|
||||
size_t logical_elems = static_cast<size_t>(src0_logical_cols * src0_logical_rows);
|
||||
size_t src_flat_size = 1 * src0_phys_cols * src0_phys_rows;
|
||||
size_t dst_flat_size = 1 * src1_phys_rows * src1_phys_cols;
|
||||
|
||||
ov::Core core;
|
||||
|
||||
std::vector<int64_t> gather_idx;
|
||||
gather_idx.reserve(logical_elems);
|
||||
for (int row = 0; row < src0_logical_rows; row++) {
|
||||
for (int col = 0; col < src0_logical_cols; col++) {
|
||||
gather_idx.push_back(static_cast<int64_t>(row + col * src0_phys_rows));
|
||||
}
|
||||
}
|
||||
ov::Shape gather_idx_shape = { logical_elems };
|
||||
|
||||
std::vector<int64_t> scatter_idx;
|
||||
scatter_idx.reserve(logical_elems);
|
||||
for (int row = 0; row < src1_logical_rows; row++) {
|
||||
for (int col = 0; col < src1_logical_cols; col++) {
|
||||
scatter_idx.push_back(static_cast<int64_t>(row * src1_phys_cols + col));
|
||||
}
|
||||
}
|
||||
ov::Shape scatter_idx_shape = { logical_elems, 1 };
|
||||
|
||||
auto param_src0 = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, src0_phys_shape);
|
||||
auto param_src1 = std::make_shared<ov::op::v0::Parameter>(ov::element::f16, src1_phys_shape);
|
||||
|
||||
auto src_flat_shape_const = ov::op::v0::Constant::create(ov::element::i64, {1},
|
||||
{ static_cast<int64_t>(src_flat_size) });
|
||||
auto reshape_src = std::make_shared<ov::op::v1::Reshape>(param_src0, src_flat_shape_const, false);
|
||||
auto dst_flat_shape_const = ov::op::v0::Constant::create(ov::element::i64, {1},
|
||||
{ static_cast<int64_t>(dst_flat_size) });
|
||||
auto reshape_dst = std::make_shared<ov::op::v1::Reshape>(param_src1, dst_flat_shape_const, false);
|
||||
|
||||
auto gather_indices_const = ov::op::v0::Constant::create(ov::element::i64, gather_idx_shape, gather_idx);
|
||||
auto gather_axis_const = ov::op::v0::Constant::create(ov::element::i64, {1}, {0});
|
||||
auto gathered = std::make_shared<ov::op::v8::Gather>(
|
||||
param_src0, gather_indices_const, gather_axis_const);
|
||||
|
||||
auto axis_const = ov::op::v0::Constant::create(ov::element::i64, {1}, {0});
|
||||
auto gathered = std::make_shared<ov::op::v8::Gather>(reshape_src, gather_indices_const, axis_const);
|
||||
auto converted = std::make_shared<ov::op::v0::Convert>(gathered, ov::element::f16);
|
||||
|
||||
// param_dst_base shape => 1D, rank=1, size够大, e.g. row=3072 => i up to 3071 => offset i*64=196544 + j*2, e.g.200000
|
||||
// ov::Shape flat_dst_shape = {200000, 1};
|
||||
ov::Shape flat_dst_shape = {dst->nb[2], 1};
|
||||
auto param_dst_base = std::make_shared<ov::op::v0::Parameter>(ov::element::f16, flat_dst_shape);
|
||||
// auto param_dst_base11 = std::make_shared<ov::op::v0::Parameter>(ov::element::f16, flat_dst_shape);
|
||||
|
||||
auto scatter_indices_const = ov::op::v0::Constant::create(ov::element::i64, scatter_idx_shape, scatter_idx);
|
||||
auto scatter = std::make_shared<ov::op::v3::ScatterNDUpdate>(reshape_dst, scatter_indices_const, converted);
|
||||
|
||||
// ScatterNDUpdate( base, scatter_indices, updates )
|
||||
// scatter_indices last dimension = 1 => each index is 1D coordinate
|
||||
auto scatter = std::make_shared<ov::op::v3::ScatterNDUpdate>(
|
||||
param_dst_base, scatter_indices_const, converted
|
||||
);
|
||||
|
||||
ov::ParameterVector params = { param_src0, param_dst_base };
|
||||
// ov::ParameterVector params = { param_src0};
|
||||
// ov::ParameterVector params = { param_src00, param_dst_base11};
|
||||
auto model = std::make_shared<ov::Model>(ov::OutputVector{ scatter }, params);
|
||||
std::vector<int64_t> dst_phys_shape_vec = {1, static_cast<int64_t>(src1_phys_rows),
|
||||
static_cast<int64_t>(src1_phys_cols) };
|
||||
auto dst_phys_shape_const = ov::op::v0::Constant::create(ov::element::i64, {3}, dst_phys_shape_vec);
|
||||
auto final_output = std::make_shared<ov::op::v1::Reshape>(scatter, dst_phys_shape_const, false);
|
||||
|
||||
ov::ParameterVector params = { param_src0, param_src1 };
|
||||
auto model = std::make_shared<ov::Model>(ov::OutputVector{ final_output }, params);
|
||||
auto compiled_model = core.compile_model(model, "CPU");
|
||||
auto infer_request = compiled_model.create_infer_request();
|
||||
|
||||
ov::Tensor tensor_src0(ov::element::f32, flat_src0_shape, src0->data);
|
||||
ov::Tensor tensor_dst_base(ov::element::f16, flat_dst_shape, dst->data);
|
||||
ov::Tensor tensor_src(ov::element::f32, src0_phys_shape, src0->data);
|
||||
ov::Tensor tensor_dst(ov::element::f16, src1_phys_shape, src1->data);
|
||||
infer_request.set_input_tensor(0, tensor_src);
|
||||
infer_request.set_input_tensor(1, tensor_dst);
|
||||
|
||||
infer_request.set_input_tensor(0, tensor_src0);
|
||||
infer_request.set_input_tensor(1, tensor_dst_base);
|
||||
|
||||
ov::Tensor out_tensor(ov::element::f16, flat_dst_shape, dst->data);
|
||||
ov::Tensor out_tensor(ov::element::f16, src1_phys_shape, dst->data);
|
||||
infer_request.set_output_tensor(0, out_tensor);
|
||||
|
||||
infer_request.infer();
|
||||
|
|
@ -986,15 +1004,17 @@ static enum ggml_status ggml_backend_openvino_graph_compute(ggml_backend_t backe
|
|||
|
||||
// Process nodes in order
|
||||
|
||||
// if (cgraph->nodes[0]->ne[1] == 1) {
|
||||
// bool prompt_process_flag = false;
|
||||
bool prompt_process_flag = true;
|
||||
if (cgraph->nodes[0]->ne[1] == 1) {
|
||||
prompt_process_flag = false;
|
||||
}
|
||||
// int end_node = cgraph->n_nodes - 1;
|
||||
// openvino_frontend_compute(backend, cgraph, 0, end_node, prompt_process_flag);
|
||||
// } else {
|
||||
|
||||
for (int i = 0; i < cgraph->n_nodes; i++) {
|
||||
if (std::find(permute_indices.begin(), permute_indices.end(), i) != permute_indices.end()) {
|
||||
// ggml_backend_openvino_permute(cgraph->nodes[i]);
|
||||
ggml_backend_openvino_permute(cgraph->nodes[i]);
|
||||
// } else if (std::find(mul_mat_indices.begin(), mul_mat_indices.end(), i) != mul_mat_indices.end()) {
|
||||
// ggml_backend_openvino_mul_mat(cgraph->nodes[i]);
|
||||
} else if (std::find(view_indices.begin(), view_indices.end(), i) != view_indices.end()) {
|
||||
|
|
@ -1020,7 +1040,7 @@ static enum ggml_status ggml_backend_openvino_graph_compute(ggml_backend_t backe
|
|||
i++;
|
||||
}
|
||||
if (start_index < i) {
|
||||
openvino_frontend_compute(backend, cgraph, start_index, --i);
|
||||
openvino_frontend_compute(backend, cgraph, start_index, --i, prompt_process_flag);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -36,8 +36,6 @@ public:
|
|||
|
||||
virtual std::vector<std::string> get_input_names() const = 0;
|
||||
|
||||
virtual const std::string& get_node_op_name(const std::string& name) const = 0;
|
||||
|
||||
virtual std::string& get_op_node_name(const std::string& name, const int index = -1) = 0;
|
||||
|
||||
// virtual const struct tensor_info get_node_op_info(const std::string& name) const = 0;
|
||||
|
|
|
|||
|
|
@ -6,18 +6,6 @@
|
|||
#include <fstream>
|
||||
|
||||
void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::map<std::string, ggml_tensor *>& inputs, std::map<std::string, ggml_tensor *>& outputs) {
|
||||
// m_node_op_name[node->name] = ggml_op_name(node->op);
|
||||
|
||||
// std::string src0_name = std::string(node->src[0]->name) + "_" + std::to_string(node->src[0]->view_offs) + "_input_" + ggml_op_name(node->src[0]->op);
|
||||
// std::string node_name = std::string(node->name) + "_" + std::to_string(node->view_offs) + "_output_" + ggml_op_name(node->op);
|
||||
|
||||
// Execute singel CONT operator is OK
|
||||
// std::string src0_name = std::string(node->src[0]->name) + "_" + std::to_string(node->src[0]->view_offs) + "_" + ggml_op_name(node->src[0]->op);
|
||||
// std::string node_name = std::string(node->name) + "_" + std::to_string(node->view_offs) + "_" + ggml_op_name(node->op);
|
||||
|
||||
// std::string src0_name = std::string(node->src[0]->name) + "_" + std::to_string(node->src[0]->view_offs);
|
||||
// std::string node_name = std::string(node->name) + "_" + std::to_string(node->view_offs);
|
||||
|
||||
std::string src0_name = std::string(node->src[0]->name);
|
||||
std::string node_name = std::string(node->name);
|
||||
|
||||
|
|
@ -32,7 +20,6 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::map<std::string, gg
|
|||
inputs[src0_name] = node->src[0];
|
||||
outputs[node_name] = node;
|
||||
m_input_names.push_back(src0_name);
|
||||
m_node_op_name[src0_name] = ggml_op_name(node->op);
|
||||
m_op_node_name.emplace_back(src0_name, ggml_op_name(node->op));
|
||||
m_output_names.push_back(node_name);
|
||||
break;
|
||||
|
|
@ -43,7 +30,6 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::map<std::string, gg
|
|||
inputs[src0_name] = node->src[0];
|
||||
outputs[node_name] = node;
|
||||
m_input_names.push_back(src0_name);
|
||||
m_node_op_name[src0_name] = ggml_op_name(node->op);
|
||||
m_op_node_name.emplace_back(src0_name, ggml_op_name(node->op));
|
||||
m_output_names.push_back(node_name);
|
||||
|
||||
|
|
@ -64,7 +50,6 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::map<std::string, gg
|
|||
inputs[src0_name] = node->src[0];
|
||||
outputs[node_name] = node;
|
||||
m_input_names.push_back(src0_name);
|
||||
m_node_op_name[src0_name] = ggml_op_name(node->op);
|
||||
m_op_node_name.emplace_back(src0_name, ggml_op_name(node->op));
|
||||
m_output_names.push_back(node_name);
|
||||
|
||||
|
|
@ -87,7 +72,6 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::map<std::string, gg
|
|||
inputs[src0_name] = node->src[0];
|
||||
outputs[node_name] = node;
|
||||
m_input_names.push_back(src0_name);
|
||||
m_node_op_name[src0_name] = ggml_op_name(node->op);
|
||||
m_op_node_name.emplace_back(src0_name, ggml_op_name(node->op));
|
||||
m_output_names.push_back(node_name);
|
||||
|
||||
|
|
@ -107,32 +91,45 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::map<std::string, gg
|
|||
inputs[src0_name] = node->src[0];
|
||||
outputs[node_name] = node;
|
||||
m_input_names.push_back(src0_name);
|
||||
m_node_op_name[src0_name] = ggml_op_name(node->op);
|
||||
m_op_node_name.emplace_back(src0_name, ggml_op_name(node->op));
|
||||
m_output_names.push_back(node_name);
|
||||
m_continuous = true;
|
||||
|
||||
// ov::Shape src_shape(node->src[0]->ne, node->src[0]->ne + 3);
|
||||
// auto input_param = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, src_shape);
|
||||
// m_params.push_back(input_param);
|
||||
ov::Shape input_shape = { static_cast<size_t>(node->src[0]->ne[2]),
|
||||
static_cast<size_t>(node->src[0]->ne[1]),
|
||||
static_cast<size_t>(node->src[0]->ne[0])};
|
||||
auto input_param = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, input_shape);
|
||||
m_params.push_back(input_param);
|
||||
break;
|
||||
} else {
|
||||
for (int64_t i1 = 0; i1 < node->ne[1]; ++i1) { // ne[1] = 3072
|
||||
for (int64_t i0 = 0; i0 < node->ne[0]; ++i0) { // ne[0] = 7
|
||||
int64_t src_index = i0 * node->src[0]->nb[0] / sizeof(float) + // stride in nb[0]
|
||||
i1 * node->src[0]->nb[1] / sizeof(float); // stride in nb[1]
|
||||
char *dst_ptr = static_cast<char *>(node->data) +
|
||||
i0 * node->nb[0] + i1 * node->nb[1];
|
||||
*(ggml_fp16_t *)dst_ptr = GGML_FP32_TO_FP16(((float*)node->src[0]->data)[src_index]);
|
||||
}
|
||||
}
|
||||
// inputs[node->src[0]->name] = node->src[0];
|
||||
inputs[node_name] = node;
|
||||
std::string src1_name = std::string(node->src[1]->name);
|
||||
inputs[src0_name] = node->src[0];
|
||||
inputs[src1_name] = node->src[1];
|
||||
outputs[node_name] = node;
|
||||
m_input_names.push_back(node_name);
|
||||
m_node_op_name[node_name] = ggml_op_name(node->op);
|
||||
m_input_names.push_back(src0_name);
|
||||
m_input_names.push_back(src1_name);
|
||||
m_op_node_name.emplace_back(src0_name, ggml_op_name(node->op));
|
||||
m_op_node_name.emplace_back(src1_name, ggml_op_name(node->op));
|
||||
m_output_names.push_back(node_name);
|
||||
|
||||
int src0_elem_size = ggml_type_size(node->src[0]->type);
|
||||
int src1_elem_size = ggml_type_size(node->src[1]->type);
|
||||
|
||||
int src0_logical_rows = node->src[0]->ne[1];
|
||||
int src1_logical_rows = node->src[1]->ne[1];
|
||||
|
||||
int src0_phys_cols = node->src[0]->nb[0] / src0_elem_size;
|
||||
int src0_phys_rows = src0_logical_rows;
|
||||
|
||||
int src1_phys_cols = node->src[1]->nb[1] / src1_elem_size;
|
||||
int src1_phys_rows = src1_logical_rows;
|
||||
ov::Shape src0_phys_shape = {1, static_cast<size_t>(src0_phys_rows), static_cast<size_t>(src0_phys_cols) };
|
||||
ov::Shape src1_phys_shape = {1, static_cast<size_t>(src1_phys_rows), static_cast<size_t>(src1_phys_cols) };
|
||||
auto input0_param = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, src0_phys_shape);
|
||||
auto input1_param = std::make_shared<ov::op::v0::Parameter>(ov::element::f16, src1_phys_shape);
|
||||
m_params.push_back(input0_param);
|
||||
m_params.push_back(input1_param);
|
||||
|
||||
m_continuous = false;
|
||||
|
||||
break;
|
||||
|
|
@ -144,7 +141,6 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::map<std::string, gg
|
|||
inputs[node_name] = node;
|
||||
outputs[node_name] = node;
|
||||
m_input_names.push_back(node_name);
|
||||
m_node_op_name[node_name] = ggml_op_name(node->op);
|
||||
m_op_node_name.emplace_back(node_name, ggml_op_name(node->op));
|
||||
m_output_names.push_back(node_name);
|
||||
break;
|
||||
|
|
@ -155,7 +151,6 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::map<std::string, gg
|
|||
inputs[src0_name] = node->src[0];
|
||||
outputs[node_name] = node;
|
||||
m_input_names.push_back(src0_name);
|
||||
m_node_op_name[src0_name] = ggml_op_name(node->op);
|
||||
m_op_node_name.emplace_back(src0_name, ggml_op_name(node->op));
|
||||
m_output_names.push_back(node_name);
|
||||
break;
|
||||
|
|
@ -167,17 +162,13 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::map<std::string, gg
|
|||
} else {
|
||||
m_continuous = true;
|
||||
}
|
||||
// std::string src1_name = std::string(node->src[1]->name) + "_" + std::to_string(node->src[1]->view_offs) + "_input_" + ggml_op_name(node->src[1]->op);
|
||||
// std::string src1_name = std::string(node->src[1]->name) + "_" + std::to_string(node->src[1]->view_offs);
|
||||
std::string src1_name = std::string(node->src[1]->name);
|
||||
inputs[src0_name] = node->src[0];
|
||||
inputs[src1_name] = node->src[1];
|
||||
outputs[node_name] = node;
|
||||
m_input_names.push_back(src0_name);
|
||||
m_node_op_name[src0_name] = ggml_op_name(node->op);
|
||||
m_op_node_name.emplace_back(src0_name, ggml_op_name(node->op));
|
||||
m_input_names.push_back(src1_name);
|
||||
m_node_op_name[src1_name] = ggml_op_name(node->op);
|
||||
m_op_node_name.emplace_back(src1_name, ggml_op_name(node->op));
|
||||
m_output_names.push_back(node_name);
|
||||
break;
|
||||
|
|
@ -193,15 +184,11 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::map<std::string, gg
|
|||
inputs[src0_name] = node->src[0];
|
||||
outputs[node_name] = node;
|
||||
m_input_names.push_back(src0_name);
|
||||
m_node_op_name[src0_name] = ggml_op_name(node->op);
|
||||
m_op_node_name.emplace_back(src0_name, ggml_op_name(node->op));
|
||||
m_output_names.push_back(node_name);
|
||||
if (node->src[1]) {
|
||||
// std::string src1_name = std::string(node->src[1]->name) + "_" + std::to_string(node->src[1]->view_offs) + "_input_" + ggml_op_name(node->src[1]->op);
|
||||
// std::string src1_name = std::string(node->src[1]->name) + "_" + std::to_string(node->src[1]->view_offs);
|
||||
std::string src1_name = std::string(node->src[1]->name);
|
||||
inputs[src1_name] = node->src[1];
|
||||
m_node_op_name[src1_name] = ggml_op_name(node->op);
|
||||
m_op_node_name.emplace_back(src1_name, ggml_op_name(node->op));
|
||||
m_input_names.push_back(src1_name);
|
||||
}
|
||||
|
|
@ -210,26 +197,19 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::map<std::string, gg
|
|||
// OPs with 3 inputs:
|
||||
case GGML_OP_ROPE:
|
||||
{
|
||||
// std::string src1_name = std::string(node->src[1]->name) + "_" + std::to_string(node->src[1]->view_offs) + "_input_" + ggml_op_name(node->src[1]->op);
|
||||
// std::string src1_name = std::string(node->src[1]->name) + "_" + std::to_string(node->src[1]->view_offs);
|
||||
std::string src1_name = std::string(node->src[1]->name);
|
||||
inputs[src0_name] = node->src[0];
|
||||
inputs[src1_name] = node->src[1];
|
||||
m_input_names.push_back(src0_name);
|
||||
m_node_op_name[src0_name] = ggml_op_name(node->op);
|
||||
m_op_node_name.emplace_back(src0_name, ggml_op_name(node->op));
|
||||
m_input_names.push_back(src1_name);
|
||||
m_node_op_name[src1_name] = ggml_op_name(node->op);
|
||||
m_op_node_name.emplace_back(src1_name, ggml_op_name(node->op));
|
||||
outputs[node_name] = node;
|
||||
m_output_names.push_back(node_name);
|
||||
if (node->src[2]) {
|
||||
// std::string src2_name = std::string(node->src[2]->name) + "_" + std::to_string(node->src[2]->view_offs) + "_input_" + ggml_op_name(node->src[2]->op);
|
||||
// std::string src2_name = std::string(node->src[2]->name) + "_" + std::to_string(node->src[2]->view_offs);
|
||||
std::string src2_name = std::string(node->src[2]->name);
|
||||
inputs[src2_name] = node->src[2];
|
||||
m_input_names.push_back(src2_name);
|
||||
m_node_op_name[src2_name] = ggml_op_name(node->op);
|
||||
m_op_node_name.emplace_back(src2_name, ggml_op_name(node->op));
|
||||
}
|
||||
break;
|
||||
|
|
@ -423,12 +403,6 @@ std::vector<std::string> GgmlOvDecoder::get_input_names() const {
|
|||
return m_input_names;
|
||||
}
|
||||
|
||||
const std::string& GgmlOvDecoder::get_node_op_name(const std::string& name) const {
|
||||
auto it = m_node_op_name.find(name);
|
||||
static const std::string empty_str;
|
||||
return (it != m_node_op_name.end()) ? it->second : empty_str;
|
||||
}
|
||||
|
||||
std::string& GgmlOvDecoder::get_op_node_name(const std::string& key_name, const int index) {
|
||||
if (index == -1) {
|
||||
for (size_t i = 0; i < m_op_node_name.size(); ++i) {
|
||||
|
|
|
|||
|
|
@ -71,7 +71,6 @@ public:
|
|||
return m_continuous;
|
||||
}
|
||||
|
||||
virtual const std::string& get_node_op_name(const std::string& name) const override;
|
||||
std::string& get_op_node_name(const std::string& key_name, const int index) override;
|
||||
|
||||
virtual const std::vector<std::shared_ptr<ov::op::v0::Parameter>>& get_params() const override;
|
||||
|
|
@ -90,7 +89,6 @@ private:
|
|||
std::string m_op_name;
|
||||
mutable std::string m_name;
|
||||
bool m_continuous;
|
||||
std::map<std::string, std::string> m_node_op_name;
|
||||
std::vector<std::shared_ptr<ov::op::v0::Parameter>> m_params;
|
||||
std::vector<std::pair<std::string, std::string>> m_op_node_name;
|
||||
};
|
||||
|
|
|
|||
|
|
@ -22,24 +22,35 @@ std::vector<std::pair<std::string, ov::Tensor>> get_ggml_graph_input_tensors(std
|
|||
std::string op_node_name = ggml_decoder->get_op_node_name(name, op_iter++);
|
||||
// auto node_op_name = ggml_decoder->get_node_op_name(name);
|
||||
auto input_data = ggml_decoder->get_input_ggml_tensor(name)->data;
|
||||
auto check_if_contiguous = ggml_is_contiguous(ggml_decoder->get_input_ggml_tensor(name));
|
||||
#ifdef GGML_OPENVINO_DEBUG
|
||||
printf("Subgraph input %d: %g\n", inp, *(double*)(input_data));
|
||||
#endif
|
||||
ov::Tensor input_tensor;
|
||||
auto input_shape = ggml_decoder->get_input_shape(name).to_shape();
|
||||
// if (node_op_name == "CPY" && (input_shape[0] != 7)) {
|
||||
// input_tensor = ov::Tensor(ggml_decoder->get_input_type(name), {80000}, input_data);
|
||||
|
||||
if (flag & op_node_name == "CONT" && input_shape[0] == 1 && input_shape[1] != 1) {
|
||||
std::vector<size_t> input_stride = ggml_decoder->get_input_stride(name);
|
||||
ov::element::Type input_type = ggml_decoder->get_input_type(name);
|
||||
size_t element_size = input_type.size();
|
||||
// const size_t valid_elems = static_cast<size_t>(ggml_decoder->get_input_shape(name).to_shape()[2]);
|
||||
const size_t num_rows = static_cast<size_t>(ggml_decoder->get_input_shape(name).to_shape()[1]);
|
||||
const size_t dim2 = static_cast<size_t>(ggml_decoder->get_input_shape(name).to_shape()[0]);
|
||||
size_t phys_stride = static_cast<size_t>(input_stride[1]) / element_size;
|
||||
ov::Shape input_shape = { dim2, num_rows, phys_stride }; // {1, 7, 9216 }
|
||||
input_tensor = ov::Tensor(ggml_decoder->get_input_type(name), input_shape, input_data);
|
||||
} else if (op_node_name == "CPY" && (!check_if_contiguous || input_shape[2] == 1)) { //[TODO]: Temporarily determine whether the node corresponding to the input tensor of the Phi-3 model CPY is continuous
|
||||
std::vector<size_t> input_stride = ggml_decoder->get_input_stride(name);
|
||||
ov::element::Type input_type = ggml_decoder->get_input_type(name);
|
||||
size_t element_size = input_type.size();
|
||||
ov::Shape phys_shape;
|
||||
static int iter = 0;
|
||||
if (iter++ % 2 == 0) {
|
||||
phys_shape = {1, input_shape[1], input_stride[2] / element_size};
|
||||
input_tensor = ov::Tensor(ov::element::f32, phys_shape, input_data);
|
||||
} else {
|
||||
phys_shape = {1, input_shape[1], input_stride[1] / element_size};
|
||||
input_tensor = ov::Tensor(ov::element::f16, phys_shape, input_data);
|
||||
}
|
||||
} else {
|
||||
input_tensor = ov::Tensor(ggml_decoder->get_input_type(name), ggml_decoder->get_input_shape(name).to_shape(), input_data);
|
||||
}
|
||||
|
|
@ -105,7 +116,7 @@ enum ggml_status openvino_frontend_compute(ggml_backend_t backend, struct ggml_c
|
|||
|
||||
// Convert InputModel -> ov::Model
|
||||
std::shared_ptr<ov::Model> model = front_end->convert(input_model);
|
||||
ov::save_model(model, "/home/user/zhan/merge_git_commits/llama.cpp-ov/001_model.xml");
|
||||
// ov::save_model(model, "/home/user/zhan/merge_git_commits/llama.cpp-ov/001_model.xml");
|
||||
|
||||
if (!model) {
|
||||
GGML_LOG_ERROR("Model is not converted \n");
|
||||
|
|
@ -117,7 +128,7 @@ enum ggml_status openvino_frontend_compute(ggml_backend_t backend, struct ggml_c
|
|||
|
||||
// Loading a model to the device
|
||||
ov::CompiledModel compiled_model = core.compile_model(model);
|
||||
ov::save_model(compiled_model.get_runtime_model(), "/home/user/zhan/merge_git_commits/llama.cpp-ov/001_compile_model.xml");
|
||||
// ov::save_model(compiled_model.get_runtime_model(), "/home/user/zhan/merge_git_commits/llama.cpp-ov/001_compile_model.xml");
|
||||
|
||||
// Create infer request
|
||||
ov::InferRequest infer_request = compiled_model.create_infer_request();
|
||||
|
|
|
|||
Loading…
Reference in New Issue