1. All operators implemented using OpenVINO can be successfully executed individually.
2. VIEW op output tensor shape is not same with CONT(non-contiguous) input tensor shape 3. CPY(non-contiguous) can't be implemented with original input/output tensor shape and data(need change the original shape when create input/output tensor) Currently. VIEW op executed in the ggml backend and others executed in the OpenVINO Frontend.
This commit is contained in:
parent
e08a7fda33
commit
cff473a9e2
|
|
@ -537,8 +537,7 @@ void ggml_backend_openvino_mul_mat(struct ggml_tensor * dst) {
|
|||
auto reshape_output = std::make_shared<ov::op::v1::Reshape>(
|
||||
batched_matmul,
|
||||
ov::op::v0::Constant::create(ov::element::i64, {3}, final_output_shape),
|
||||
false
|
||||
);
|
||||
false);
|
||||
|
||||
auto model = std::make_shared<ov::Model>(ov::NodeVector{ reshape_output },
|
||||
ov::ParameterVector{ param_src0, param_src1 });
|
||||
|
|
@ -659,6 +658,7 @@ void ggml_backend_openvino_view(ggml_tensor *dst) {
|
|||
false);
|
||||
|
||||
auto model = std::make_shared<ov::Model>(ov::NodeVector{reshaped}, ov::ParameterVector{param});
|
||||
// auto model = std::make_shared<ov::Model>(ov::NodeVector{param}, ov::ParameterVector{param});
|
||||
// ov::save_model(model, "/home/user/zhan/merge_git_commits/llama.cpp-ov/003_backend_view_model.xml");
|
||||
|
||||
auto compiled_model = core.compile_model(model, "CPU");
|
||||
|
|
@ -742,106 +742,91 @@ void ggml_backend_openvino_dup_bytes(struct ggml_tensor *dst) {
|
|||
const size_t nb0 = dst->nb[0];
|
||||
|
||||
if (src0->type == dst->type && ne00 == dst->ne[0] && nb00 == element_size && nb0 == element_size) {
|
||||
const size_t valid_elems = static_cast<size_t>(src0->ne[0]);
|
||||
const size_t num_rows = static_cast<size_t>(src0->ne[1]);
|
||||
const size_t dim2 = static_cast<size_t>(src0->ne[2]);
|
||||
const size_t dim3 = static_cast<size_t>(src0->ne[3]);
|
||||
const size_t valid_elems = static_cast<size_t>(src0->ne[0]); // 3072
|
||||
const size_t num_rows = static_cast<size_t>(src0->ne[1]); // 7
|
||||
const size_t dim2 = static_cast<size_t>(src0->ne[2]); // 1
|
||||
|
||||
size_t phys_stride = static_cast<size_t>(src0->nb[1]) / element_size;
|
||||
size_t total_logical = valid_elems * num_rows * dim2 * dim3;
|
||||
size_t phys_stride = static_cast<size_t>(src0->nb[1]) / element_size; // 9216
|
||||
|
||||
std::vector<float> contiguous_data(total_logical);
|
||||
ov::Shape input_shape = { dim2, num_rows, phys_stride }; // 如 {1, 7, 9216 }
|
||||
ov::Shape logical_shape = { dim2, num_rows, valid_elems }; // {1, 7, 3072}
|
||||
|
||||
for (size_t j = 0; j < num_rows; j++) {
|
||||
const float *src_row = reinterpret_cast<const float*>(src0->data) + j * phys_stride;
|
||||
float *dst_row = contiguous_data.data() + j * valid_elems;
|
||||
std::copy(src_row, src_row + valid_elems, dst_row);
|
||||
}
|
||||
auto input_param = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, input_shape);
|
||||
|
||||
ov::Shape logical_shape = { dim2, num_rows, valid_elems};
|
||||
auto input_param = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, logical_shape);
|
||||
auto identity_const = ov::op::v0::Constant::create(ov::element::i64,
|
||||
{ logical_shape.size() },
|
||||
std::vector<int64_t>(logical_shape.begin(), logical_shape.end()));
|
||||
auto identity_op = std::make_shared<ov::op::v1::Reshape>(input_param, identity_const, false);
|
||||
std::vector<int64_t> begin = { 0, 0, 0 };
|
||||
std::vector<int64_t> end = { static_cast<int64_t>(dim2),
|
||||
static_cast<int64_t>(num_rows),
|
||||
static_cast<int64_t>(valid_elems) };
|
||||
std::vector<int64_t> strides = { 1, 1, 1 };
|
||||
|
||||
auto model = std::make_shared<ov::Model>(ov::OutputVector{identity_op},
|
||||
ov::ParameterVector{input_param});
|
||||
auto begin_const = ov::op::v0::Constant::create(ov::element::i64, { begin.size() }, begin);
|
||||
auto end_const = ov::op::v0::Constant::create(ov::element::i64, { end.size() }, end);
|
||||
auto strides_const = ov::op::v0::Constant::create(ov::element::i64, { strides.size() }, strides);
|
||||
|
||||
std::vector<int64_t> begin_mask = {0, 0, 0};
|
||||
std::vector<int64_t> end_mask = {0, 0, 0};
|
||||
auto slice = std::make_shared<ov::op::v1::StridedSlice>(
|
||||
input_param,
|
||||
begin_const,
|
||||
end_const,
|
||||
strides_const,
|
||||
begin_mask,
|
||||
end_mask
|
||||
);
|
||||
|
||||
auto model = std::make_shared<ov::Model>(ov::OutputVector{ slice },
|
||||
ov::ParameterVector{ input_param });
|
||||
|
||||
ov::Core core;
|
||||
auto compiled_model = core.compile_model(model, "CPU");
|
||||
auto infer_request = compiled_model.create_infer_request();
|
||||
|
||||
ov::Tensor input_tensor(ov::element::f32, logical_shape, contiguous_data.data());
|
||||
//[NOTE]: input_shape should be {1, 7, 9216} not the original shap of src0.
|
||||
ov::Tensor input_tensor(ov::element::f32, input_shape, src0->data);
|
||||
infer_request.set_input_tensor(0, input_tensor);
|
||||
|
||||
ov::Tensor output_tensor(ov::element::f32, logical_shape, dst->data);
|
||||
infer_request.set_output_tensor(0, output_tensor);
|
||||
|
||||
infer_request.infer();
|
||||
/*
|
||||
for (size_t i01 = 0; i01 < ne01; ++i01) {
|
||||
const char *src_row = reinterpret_cast<const char *>(src0->data) + i01 * nb01;
|
||||
char *dst_row = reinterpret_cast<char *>(dst->data) + i01 * dst->nb[1];
|
||||
|
||||
ov::Tensor src_row_tensor(ov::element::f32, {ne00}, const_cast<void *>(reinterpret_cast<const void *>(src_row)));
|
||||
ov::Tensor dst_row_tensor(ov::element::f32, {ne00}, reinterpret_cast<void *>(dst_row));
|
||||
|
||||
std::memcpy(dst_row_tensor.data<float>(), src_row_tensor.data<float>(), ne00 * sizeof(float));
|
||||
}*/
|
||||
return;
|
||||
}
|
||||
|
||||
// Case 3: Non-contiguous source, contiguous destination
|
||||
const int64_t ne02 = src0->ne[2];
|
||||
const int64_t ne03 = src0->ne[3];
|
||||
const int64_t nb02 = src0->nb[2];
|
||||
const int64_t nb03 = src0->nb[3];
|
||||
|
||||
// dst->ne =[3072,7,1,1], dst->nb =[4,12288,86016,86016], dst->type=GGML_TYPE_F32
|
||||
// dst->src[0]->ne=[96,32,7,1], dst->src[0]->nb=[4,2688,384,86016], dst->src[0]->type=GGML_TYPE_F32
|
||||
if (ggml_is_contiguous(dst)) {
|
||||
size_t valid_i = static_cast<size_t>(src0->ne[0]); // 96
|
||||
size_t valid_j = static_cast<size_t>(src0->ne[1]); // 32
|
||||
size_t valid_k = static_cast<size_t>(src0->ne[2]); // 7
|
||||
size_t valid_l = static_cast<size_t>(src0->ne[3]); // 1
|
||||
|
||||
size_t total_valid = valid_i * valid_j * valid_k; // 96 * 32 * 7 = 21504
|
||||
size_t stride_j = static_cast<size_t>(src0->nb[1]) / element_size; // 672
|
||||
size_t stride_k = static_cast<size_t>(src0->nb[2]) / element_size; // 96
|
||||
ov::Shape src_shape = { valid_k, valid_j, valid_i }; // {7, 32, 96};
|
||||
auto src_param = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, src_shape);
|
||||
|
||||
std::vector<float> contiguous_data(total_valid);
|
||||
const float *src_data = reinterpret_cast<const float*>(src0->data);
|
||||
for (size_t k = 0; k < valid_k; k++) {
|
||||
for (size_t j = 0; j < valid_j; j++) {
|
||||
for (size_t i = 0; i < valid_i; i++) {
|
||||
size_t out_index = k * (valid_i * valid_j) + j * valid_i + i;
|
||||
size_t src_index = j * stride_j + k * stride_k + i;
|
||||
contiguous_data[out_index] = src_data[src_index];
|
||||
}
|
||||
}
|
||||
}
|
||||
ov::Shape input_shape = { valid_j, valid_k, valid_i }; // {32, 7, 96}
|
||||
auto tmp_param = ov::op::v0::Constant::create(ov::element::i64, { input_shape.size() }, input_shape);
|
||||
auto input_param = std::make_shared<ov::op::v1::Reshape>(src_param, tmp_param, false);
|
||||
|
||||
// ov::Shape input_shape = { dst->src[0]->ne[0], dst->src[0]->ne[1], dst->src[0]->ne[2] };
|
||||
ov::Shape input_shape = { dst->src[0]->ne[2], dst->src[0]->ne[1], dst->src[0]->ne[0]};
|
||||
auto input_param = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, input_shape);
|
||||
// 添加 Transpose 节点,将 {32,7,96} 变换为 {7,32,96},恢复逻辑顺序
|
||||
// 这里交换第 0 与第 1 维,即 permutation = {1, 0, 2}
|
||||
std::vector<int64_t> order = {1, 0, 2};
|
||||
auto order_const = ov::op::v0::Constant::create(ov::element::i64, {order.size()}, order);
|
||||
auto transpose = std::make_shared<ov::op::v1::Transpose>(input_param, order_const);
|
||||
|
||||
// ov::Shape target_shape = { dst->ne[0], dst->ne[1], dst->ne[2] };
|
||||
// std::vector<int64_t> target_shape_vec = { static_cast<int64_t>(dst->ne[0]),
|
||||
// static_cast<int64_t>(dst->ne[1]), dst->ne[2]};
|
||||
ov::Shape target_shape = { dst->ne[2], dst->ne[1], dst->ne[0] };
|
||||
ov::Shape target_shape = { dst->ne[2], dst->ne[1], dst->ne[0] }; // {1, 7, 3072}
|
||||
std::vector<int64_t> target_shape_vec = { static_cast<int64_t>(dst->ne[2]),
|
||||
static_cast<int64_t>(dst->ne[1]), dst->ne[0]};
|
||||
auto reshape_const = ov::op::v0::Constant::create(ov::element::i64, {3}, target_shape_vec);
|
||||
auto reshaped = std::make_shared<ov::op::v1::Reshape>(input_param, reshape_const, false);
|
||||
|
||||
auto model = std::make_shared<ov::Model>(ov::OutputVector{reshaped}, ov::ParameterVector{input_param});
|
||||
static_cast<int64_t>(dst->ne[1]),
|
||||
static_cast<int64_t>(dst->ne[0]) };
|
||||
auto reshape_const = ov::op::v0::Constant::create(ov::element::i64, { target_shape_vec.size() }, target_shape_vec);
|
||||
auto reshaped = std::make_shared<ov::op::v1::Reshape>(transpose, reshape_const, false);
|
||||
|
||||
auto model = std::make_shared<ov::Model>(ov::OutputVector{ reshaped },
|
||||
ov::ParameterVector{ src_param });
|
||||
ov::Core core;
|
||||
auto compiled_model = core.compile_model(model, "CPU");
|
||||
auto infer_request = compiled_model.create_infer_request();
|
||||
|
||||
ov::Tensor input_tensor(ov::element::f32, input_shape, contiguous_data.data());
|
||||
ov::Tensor input_tensor(ov::element::f32, src_shape, src0->data);
|
||||
infer_request.set_input_tensor(0, input_tensor);
|
||||
|
||||
ov::Tensor output_tensor(ov::element::f32, target_shape, dst->data);
|
||||
|
|
@ -998,40 +983,48 @@ static enum ggml_status ggml_backend_openvino_graph_compute(ggml_backend_t backe
|
|||
}
|
||||
}
|
||||
|
||||
int end_node = cgraph->n_nodes - 1;
|
||||
openvino_frontend_compute(backend, cgraph, 0, end_node);
|
||||
// openvino_frontend_compute(backend, cgraph);
|
||||
|
||||
// Process nodes in order
|
||||
// for (int i = 0; i < cgraph->n_nodes; i++) {
|
||||
// if (std::find(permute_indices.begin(), permute_indices.end(), i) != permute_indices.end()) {
|
||||
// ggml_backend_openvino_permute(cgraph->nodes[i]);
|
||||
// } else if (std::find(cont_indices.begin(), cont_indices.end(), i) != cont_indices.end()) {
|
||||
// ggml_backend_openvino_dup_bytes(cgraph->nodes[i]);
|
||||
// } else if (std::find(view_indices.begin(), view_indices.end(), i) != view_indices.end()) {
|
||||
// ggml_backend_openvino_view(cgraph->nodes[i]);
|
||||
// } else if (std::find(cpy_indices.begin(), cpy_indices.end(), i) != cpy_indices.end()) {
|
||||
// ggml_backend_openvino_cpy(cgraph->nodes[i]);
|
||||
// } else if (std::find(transpose_indices.begin(), transpose_indices.end(), i) != transpose_indices.end()) {
|
||||
// ggml_backend_openvino_transpose(cgraph->nodes[i]);
|
||||
// } else if (std::find(reshape_indices.begin(), reshape_indices.end(), i) != reshape_indices.end()) {
|
||||
// ggml_backend_openvino_reshape(cgraph->nodes[i]);
|
||||
// } else if (std::find(mul_mat_indices.begin(), mul_mat_indices.end(), i) != mul_mat_indices.end()) {
|
||||
// ggml_backend_openvino_mul_mat(cgraph->nodes[i]);
|
||||
// } else {
|
||||
// // Process a range of nodes with openvino_frontend_compute
|
||||
// int start_index = i;
|
||||
// while (i < cgraph->n_nodes
|
||||
// && std::find(view_indices.begin(), view_indices.end(), i) == view_indices.end()
|
||||
// && std::find(cpy_indices.begin(), cpy_indices.end(), i) == cpy_indices.end()
|
||||
// && std::find(cont_indices.begin(), cont_indices.end(), i) == cont_indices.end()
|
||||
// && std::find(mul_mat_indices.begin(), mul_mat_indices.end(), i) == mul_mat_indices.end()
|
||||
// ) {
|
||||
// i++;
|
||||
// }
|
||||
// if (start_index < i) {
|
||||
// openvino_frontend_compute(backend, cgraph, start_index, --i);
|
||||
// }
|
||||
// }
|
||||
|
||||
// if (cgraph->nodes[0]->ne[1] == 1) {
|
||||
// bool prompt_process_flag = false;
|
||||
// int end_node = cgraph->n_nodes - 1;
|
||||
// openvino_frontend_compute(backend, cgraph, 0, end_node, prompt_process_flag);
|
||||
// } else {
|
||||
|
||||
for (int i = 0; i < cgraph->n_nodes; i++) {
|
||||
if (std::find(permute_indices.begin(), permute_indices.end(), i) != permute_indices.end()) {
|
||||
// ggml_backend_openvino_permute(cgraph->nodes[i]);
|
||||
// } else if (std::find(mul_mat_indices.begin(), mul_mat_indices.end(), i) != mul_mat_indices.end()) {
|
||||
// ggml_backend_openvino_mul_mat(cgraph->nodes[i]);
|
||||
} else if (std::find(view_indices.begin(), view_indices.end(), i) != view_indices.end()) {
|
||||
ggml_backend_openvino_view(cgraph->nodes[i]);
|
||||
// } else if (std::find(cont_indices.begin(), cont_indices.end(), i) != cont_indices.end()) {
|
||||
// ggml_backend_openvino_dup_bytes(cgraph->nodes[i]);
|
||||
// } else if (std::find(transpose_indices.begin(), transpose_indices.end(), i) != transpose_indices.end()) {
|
||||
// ggml_backend_openvino_transpose(cgraph->nodes[i]);
|
||||
// } else if (std::find(reshape_indices.begin(), reshape_indices.end(), i) != reshape_indices.end()) {
|
||||
// ggml_backend_openvino_reshape(cgraph->nodes[i]);
|
||||
// } else if (std::find(cpy_indices.begin(), cpy_indices.end(), i) != cpy_indices.end()) {
|
||||
// ggml_backend_openvino_cpy(cgraph->nodes[i]);
|
||||
} else {
|
||||
// Process a range of nodes with openvino_frontend_compute
|
||||
int start_index = i;
|
||||
while (i < cgraph->n_nodes
|
||||
// && std::find(mul_mat_indices.begin(), mul_mat_indices.end(), i) == mul_mat_indices.end()
|
||||
&& std::find(view_indices.begin(), view_indices.end(), i) == view_indices.end()
|
||||
// && std::find(cont_indices.begin(), cont_indices.end(), i) == cont_indices.end()
|
||||
// && std::find(reshape_indices.begin(), reshape_indices.end(), i) == reshape_indices.end()
|
||||
// && std::find(cpy_indices.begin(), cpy_indices.end(), i) == cpy_indices.end()
|
||||
) {
|
||||
i++;
|
||||
}
|
||||
if (start_index < i) {
|
||||
openvino_frontend_compute(backend, cgraph, start_index, --i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// }
|
||||
|
||||
return GGML_STATUS_SUCCESS;
|
||||
|
|
|
|||
|
|
@ -46,12 +46,14 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::map<std::string, gg
|
|||
m_node_op_name[src0_name] = ggml_op_name(node->op);
|
||||
m_op_node_name.emplace_back(src0_name, ggml_op_name(node->op));
|
||||
m_output_names.push_back(node_name);
|
||||
|
||||
ov::Shape input_shape = { static_cast<size_t>(node->src[0]->ne[2]),
|
||||
static_cast<size_t>(node->src[0]->ne[1]),
|
||||
static_cast<size_t>(node->src[0]->ne[0])};
|
||||
auto input_param = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, input_shape);
|
||||
m_params.push_back(input_param);
|
||||
|
||||
m_continuous = true;
|
||||
|
||||
// ov::Shape flat_shape = { static_cast<size_t>(ggml_nelements(node)) };
|
||||
// auto input_param = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, flat_shape);
|
||||
// m_params.push_back(input_param);
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
@ -59,12 +61,6 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::map<std::string, gg
|
|||
node->src[0]->nb[0] == ggml_type_size(node->src[0]->type) &&
|
||||
node->nb[0] == ggml_type_size(node->src[0]->type)) {
|
||||
|
||||
// for (size_t i01 = 0; i01 < node->src[0]->ne[1]; ++i01) {
|
||||
// const char *src_row = reinterpret_cast<const char *>(node->src[0]->data) + i01 * node->src[0]->nb[1];
|
||||
// char *dst_row = reinterpret_cast<char *>(node->data) + i01 * node->nb[1];
|
||||
// std::memcpy(dst_row, src_row, node->src[0]->ne[0] * ggml_type_size(node->src[0]->type));
|
||||
// }
|
||||
|
||||
inputs[src0_name] = node->src[0];
|
||||
outputs[node_name] = node;
|
||||
m_input_names.push_back(src0_name);
|
||||
|
|
@ -72,15 +68,16 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::map<std::string, gg
|
|||
m_op_node_name.emplace_back(src0_name, ggml_op_name(node->op));
|
||||
m_output_names.push_back(node_name);
|
||||
|
||||
// const size_t element_size = ggml_type_size(node->src[0]->type);
|
||||
// size_t valid_elems = static_cast<size_t>(node->src[0]->ne[0]); // 3072
|
||||
// size_t num_rows = static_cast<size_t>(node->src[0]->ne[1]); // 7
|
||||
// size_t phys_stride = static_cast<size_t>(node->src[0]->nb[1]) / element_size; // 9216
|
||||
// // size_t total_phys = (num_rows - 1) * phys_stride + valid_elems; // 6*9216 + 3072 = 58368
|
||||
// size_t total_phys = num_rows * phys_stride; // 7 * 9216 = 64512
|
||||
// ov::Shape flat_input_shape = { total_phys };
|
||||
// auto flat_input_param = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, flat_input_shape);
|
||||
// m_params.push_back(flat_input_param);
|
||||
const size_t element_size = ggml_type_size(node->src[0]->type);
|
||||
size_t valid_elems = static_cast<size_t>(node->src[0]->ne[0]); // 3072
|
||||
size_t num_rows = static_cast<size_t>(node->src[0]->ne[1]); // 7
|
||||
size_t dim2 = static_cast<size_t>(node->src[0]->ne[2]); // 1
|
||||
size_t phys_stride = static_cast<size_t>(node->src[0]->nb[1]) / element_size; // 9216
|
||||
// size_t total_phys = (num_rows - 1) * phys_stride + valid_elems; // 6*9216 + 3072 = 58368
|
||||
size_t total_phys = num_rows * phys_stride; // 7 * 9216 = 64512
|
||||
ov::Shape input_shape = { dim2, num_rows, phys_stride };
|
||||
auto input_param = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, input_shape);
|
||||
m_params.push_back(input_param);
|
||||
|
||||
m_continuous = false;
|
||||
break;
|
||||
|
|
@ -94,13 +91,11 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::map<std::string, gg
|
|||
m_op_node_name.emplace_back(src0_name, ggml_op_name(node->op));
|
||||
m_output_names.push_back(node_name);
|
||||
|
||||
// size_t valid_i = static_cast<size_t>(node->src[0]->ne[0]); // 96
|
||||
// size_t valid_j = static_cast<size_t>(node->src[0]->ne[1]); // 32
|
||||
// size_t valid_k = static_cast<size_t>(node->src[0]->ne[2]); // 7
|
||||
// size_t total_valid = valid_i * valid_j * valid_k; // 96 * 32 * 7 = 21504
|
||||
// ov::Shape flat_input_shape = { total_valid };
|
||||
// auto input_param = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, flat_input_shape);
|
||||
// m_params.push_back(input_param);
|
||||
ov::Shape input_shape = { static_cast<size_t>(node->src[0]->ne[2]),
|
||||
static_cast<size_t>(node->src[0]->ne[1]),
|
||||
static_cast<size_t>(node->src[0]->ne[0])};
|
||||
auto input_param = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, input_shape);
|
||||
m_params.push_back(input_param);
|
||||
|
||||
m_continuous = false;
|
||||
break;
|
||||
|
|
@ -117,9 +112,9 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::map<std::string, gg
|
|||
m_output_names.push_back(node_name);
|
||||
m_continuous = true;
|
||||
|
||||
ov::Shape src_shape(node->src[0]->ne, node->src[0]->ne + 3);
|
||||
auto input_param = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, src_shape);
|
||||
m_params.push_back(input_param);
|
||||
// ov::Shape src_shape(node->src[0]->ne, node->src[0]->ne + 3);
|
||||
// auto input_param = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, src_shape);
|
||||
// m_params.push_back(input_param);
|
||||
break;
|
||||
} else {
|
||||
for (int64_t i1 = 0; i1 < node->ne[1]; ++i1) { // ne[1] = 3072
|
||||
|
|
@ -139,27 +134,6 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::map<std::string, gg
|
|||
m_op_node_name.emplace_back(src0_name, ggml_op_name(node->op));
|
||||
m_output_names.push_back(node_name);
|
||||
m_continuous = false;
|
||||
break;
|
||||
|
||||
// inputs[src0_name] = node->src[0];
|
||||
// std::string temp_name = src0_name + std::string("_cpy_tmp");
|
||||
// inputs[temp_name] = node;
|
||||
|
||||
// outputs[node_name] = node;
|
||||
// m_input_names.push_back(src0_name);
|
||||
// m_input_names.push_back(temp_name);
|
||||
// m_node_op_name[src0_name] = ggml_op_name(node->op);
|
||||
// m_node_op_name[temp_name] = ggml_op_name(node->op);
|
||||
// m_output_names.push_back(node_name);
|
||||
// m_continuous = false;
|
||||
|
||||
// ov::Shape flat_src0_shape = {node->src[0]->nb[2]};
|
||||
// auto param_src0 = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, flat_src0_shape);
|
||||
// m_params.push_back(param_src0);
|
||||
|
||||
// ov::Shape flat_dst_shape = {node->nb[2], 1};
|
||||
// auto param_dst_base = std::make_shared<ov::op::v0::Parameter>(ov::element::f16, flat_dst_shape);
|
||||
// m_params.push_back(param_dst_base);
|
||||
|
||||
break;
|
||||
}
|
||||
|
|
@ -167,8 +141,6 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::map<std::string, gg
|
|||
// For view, input is node itself
|
||||
case GGML_OP_VIEW:
|
||||
{
|
||||
// std::string node_name = std::string(node->name) + "_" + std::to_string(node->view_offs) + "_output_" + ggml_op_name(node->op);
|
||||
// std::string node_name = std::string(node->name) + "_" + std::to_string(node->view_offs);
|
||||
inputs[node_name] = node;
|
||||
outputs[node_name] = node;
|
||||
m_input_names.push_back(node_name);
|
||||
|
|
@ -190,12 +162,6 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::map<std::string, gg
|
|||
}
|
||||
case GGML_OP_MUL_MAT:
|
||||
{
|
||||
// ov::Shape flat_shape_src0 = { node->src[0]->ne[0]*node->src[0]->ne[1]*node->src[0]->ne[2] };
|
||||
// ov::Shape flat_shape_src1 = { node->src[1]->ne[0]*node->src[1]->ne[1]*node->src[1]->ne[2] };
|
||||
// auto param_src0 = std::make_shared<ov::op::v0::Parameter>(ov::element::f16, flat_shape_src0);
|
||||
// auto param_src1 = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, flat_shape_src1);
|
||||
// m_params.push_back(param_src0);
|
||||
// m_params.push_back(param_src1);
|
||||
if (!ggml_is_contiguous(node->src[1]) || node->src[1]->ne[0] * node->src[1]->nb[0] != node->src[1]->nb[1]) {
|
||||
m_continuous = false;
|
||||
} else {
|
||||
|
|
@ -376,8 +342,8 @@ GgmlOvDecoder::GgmlOvDecoder(struct ggml_tensor * node, struct ggml_cgraph * cgr
|
|||
if (m_node) {
|
||||
set_input_output(m_node, m_inputs, m_outputs);
|
||||
} else {
|
||||
for (int node_n = 0; node_n < m_cgraph->n_nodes; node_n++) {
|
||||
// for (int node_n = start_index; node_n <= end_index; node_n++) {
|
||||
// for (int node_n = 0; node_n < m_cgraph->n_nodes; node_n++) {
|
||||
for (int node_n = start_index; node_n <= end_index; node_n++) {
|
||||
auto cur_node = m_cgraph->nodes[node_n];
|
||||
m_nodes.push_back(cur_node);
|
||||
// Init model input and output
|
||||
|
|
|
|||
|
|
@ -10,8 +10,10 @@ std::shared_ptr<GgmlOvDecoder> get_ggml_decoder(struct ggml_cgraph * cgraph, con
|
|||
return std::make_shared<GgmlOvDecoder>(nullptr, cgraph, start_index, end_index);
|
||||
}
|
||||
|
||||
std::map<std::string, ov::Tensor> get_ggml_graph_input_tensors(std::shared_ptr<GgmlOvDecoder> ggml_decoder) {
|
||||
std::map<std::string, ov::Tensor> input_tensors;
|
||||
// std::map<std::string, ov::Tensor> get_ggml_graph_input_tensors(std::shared_ptr<GgmlOvDecoder> ggml_decoder) {
|
||||
std::vector<std::pair<std::string, ov::Tensor>> get_ggml_graph_input_tensors(std::shared_ptr<GgmlOvDecoder> ggml_decoder, bool flag) {
|
||||
// std::map<std::string, ov::Tensor> input_tensors;
|
||||
std::vector<std::pair<std::string, ov::Tensor>> input_tensors;
|
||||
auto input_names = ggml_decoder->get_input_names();
|
||||
// auto node_name = ggml_decoder->get_op_name();
|
||||
size_t op_iter = 0;
|
||||
|
|
@ -19,10 +21,7 @@ std::map<std::string, ov::Tensor> get_ggml_graph_input_tensors(std::shared_ptr<G
|
|||
auto name = input_names[inp];
|
||||
std::string op_node_name = ggml_decoder->get_op_node_name(name, op_iter++);
|
||||
// auto node_op_name = ggml_decoder->get_node_op_name(name);
|
||||
ov::element::Type input_type = ggml_decoder->get_input_type(name);
|
||||
size_t element_size = input_type.size();
|
||||
auto input_data = ggml_decoder->get_input_ggml_tensor(name)->data;
|
||||
std::vector<size_t> input_stride = ggml_decoder->get_input_stride(name);
|
||||
#ifdef GGML_OPENVINO_DEBUG
|
||||
printf("Subgraph input %d: %g\n", inp, *(double*)(input_data));
|
||||
#endif
|
||||
|
|
@ -31,58 +30,22 @@ std::map<std::string, ov::Tensor> get_ggml_graph_input_tensors(std::shared_ptr<G
|
|||
// if (node_op_name == "CPY" && (input_shape[0] != 7)) {
|
||||
// input_tensor = ov::Tensor(ggml_decoder->get_input_type(name), {80000}, input_data);
|
||||
|
||||
if (op_node_name == "CONT" && !ggml_decoder->check_if_continuous() && input_shape[0] == 1) {
|
||||
const size_t valid_elems = static_cast<size_t>(ggml_decoder->get_input_shape(name).to_shape()[2]);
|
||||
if (flag & op_node_name == "CONT" && input_shape[0] == 1 && input_shape[1] != 1) {
|
||||
std::vector<size_t> input_stride = ggml_decoder->get_input_stride(name);
|
||||
ov::element::Type input_type = ggml_decoder->get_input_type(name);
|
||||
size_t element_size = input_type.size();
|
||||
// const size_t valid_elems = static_cast<size_t>(ggml_decoder->get_input_shape(name).to_shape()[2]);
|
||||
const size_t num_rows = static_cast<size_t>(ggml_decoder->get_input_shape(name).to_shape()[1]);
|
||||
const size_t dim2 = static_cast<size_t>(ggml_decoder->get_input_shape(name).to_shape()[0]);
|
||||
size_t phys_stride = static_cast<size_t>(input_stride[1]) / element_size;
|
||||
size_t total_logical = valid_elems * num_rows * dim2;
|
||||
|
||||
std::vector<float> contiguous_data(total_logical);
|
||||
|
||||
for (size_t j = 0; j < num_rows; j++) {
|
||||
const float *src_row = reinterpret_cast<const float*>(input_data) + j * phys_stride;
|
||||
float *dst_row = contiguous_data.data() + j * valid_elems;
|
||||
std::copy(src_row, src_row + valid_elems, dst_row);
|
||||
}
|
||||
input_tensor = ov::Tensor(ggml_decoder->get_input_type(name),
|
||||
ggml_decoder->get_input_shape(name).to_shape(),
|
||||
contiguous_data.data());
|
||||
} else if (op_node_name == "CONT" && !ggml_decoder->check_if_continuous()){
|
||||
size_t valid_i = static_cast<size_t>(ggml_decoder->get_input_shape(name).to_shape()[2]); // 96
|
||||
size_t valid_j = static_cast<size_t>(ggml_decoder->get_input_shape(name).to_shape()[1]); // 32
|
||||
size_t valid_k = static_cast<size_t>(ggml_decoder->get_input_shape(name).to_shape()[0]); // 7
|
||||
|
||||
size_t total_valid = valid_i * valid_j * valid_k; // 96 * 32 * 7 = 21504
|
||||
size_t stride_j = static_cast<size_t>(input_stride[1]) / element_size; // 672
|
||||
size_t stride_k = static_cast<size_t>(input_stride[0]) / element_size; // 96
|
||||
|
||||
std::vector<float> contiguous_data(total_valid);
|
||||
const float *src_data = reinterpret_cast<const float*>(input_data);
|
||||
for (size_t k = 0; k < valid_k; k++) {
|
||||
for (size_t j = 0; j < valid_j; j++) {
|
||||
for (size_t i = 0; i < valid_i; i++) {
|
||||
size_t out_index = k * (valid_i * valid_j) + j * valid_i + i;
|
||||
size_t src_index = j * stride_j + k * stride_k + i;
|
||||
contiguous_data[out_index] = src_data[src_index];
|
||||
}
|
||||
}
|
||||
}
|
||||
input_tensor = ov::Tensor(ggml_decoder->get_input_type(name),
|
||||
ggml_decoder->get_input_shape(name).to_shape(),
|
||||
contiguous_data.data());
|
||||
// } else if (op_node_name == "MUL_MAT") {
|
||||
// ov::Shape flat_shape = { ggml_decoder->get_input_shape(name).to_shape()[0] *
|
||||
// ggml_decoder->get_input_shape(name).to_shape()[1] *
|
||||
// ggml_decoder->get_input_shape(name).to_shape()[2] };
|
||||
// input_tensor = ov::Tensor(ggml_decoder->get_input_type(name), flat_shape, input_data);
|
||||
ov::Shape input_shape = { dim2, num_rows, phys_stride }; // {1, 7, 9216 }
|
||||
input_tensor = ov::Tensor(ggml_decoder->get_input_type(name), input_shape, input_data);
|
||||
} else {
|
||||
input_tensor = ov::Tensor(ggml_decoder->get_input_type(name), ggml_decoder->get_input_shape(name).to_shape(), input_data);
|
||||
}
|
||||
// input_tensor = ov::Tensor(ggml_decoder->get_input_type(name), ggml_decoder->get_input_shape(name).to_shape(), input_data);
|
||||
// }
|
||||
|
||||
input_tensors[name] = input_tensor;
|
||||
// input_tensors[name] = input_tensor;
|
||||
input_tensors.emplace_back(name, input_tensor);
|
||||
}
|
||||
return input_tensors;
|
||||
}
|
||||
|
|
@ -114,11 +77,11 @@ static ov::frontend::FrontEnd::Ptr get_ggml_frontend() {
|
|||
return front_end;
|
||||
}
|
||||
|
||||
enum ggml_status openvino_frontend_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph, const int32_t start_index, const int32_t end_index) {
|
||||
ov::Core core;
|
||||
auto devices = core.get_available_devices();
|
||||
enum ggml_status openvino_frontend_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph, const int32_t start_index, const int32_t end_index, bool flag) {
|
||||
static ov::Core core;
|
||||
// auto devices = core.get_available_devices();
|
||||
// Get GGML Frontend
|
||||
auto front_end = get_ggml_frontend();
|
||||
static auto front_end = get_ggml_frontend();
|
||||
if (!front_end) {
|
||||
GGML_LOG_ERROR("GGML FrontEnd is not initialized \n");
|
||||
return GGML_STATUS_FAILED;
|
||||
|
|
@ -161,11 +124,12 @@ enum ggml_status openvino_frontend_compute(ggml_backend_t backend, struct ggml_c
|
|||
|
||||
// Get input tensor
|
||||
auto input_names = ggml_decoder->get_input_names();
|
||||
auto input_tensors = get_ggml_graph_input_tensors(ggml_decoder);
|
||||
auto input_tensors = get_ggml_graph_input_tensors(ggml_decoder, flag);
|
||||
|
||||
// Set input tensor
|
||||
for (size_t i = 0; i < input_names.size(); i++) {
|
||||
infer_request.set_input_tensor(i, input_tensors[input_names[i]]);
|
||||
// infer_request.set_input_tensor(i, input_tensors[input_names[i]]);
|
||||
infer_request.set_input_tensor(i, input_tensors.at(i).second);
|
||||
|
||||
// auto input_tensor = infer_request.get_input_tensor(i);
|
||||
// auto input_shape = input_tensor.get_shape();
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
#include "ggml-decoder.h"
|
||||
#include "ggml-backend-impl.h"
|
||||
|
||||
enum ggml_status openvino_frontend_compute (ggml_backend_t backend, struct ggml_cgraph * cgraph, const int32_t start_index=0, const int32_t end_index=0);
|
||||
enum ggml_status openvino_frontend_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph, const int32_t start_index=0, const int32_t end_index=0, bool flag = true);
|
||||
|
|
|
|||
Loading…
Reference in New Issue