From cff473a9e20df37c9fc32c30009b9abfe12ed948 Mon Sep 17 00:00:00 2001 From: zhanmyz Date: Sun, 9 Mar 2025 23:35:18 +0800 Subject: [PATCH] 1. All operators implemented using OpenVINO can be successfully executed individually. 2. VIEW op output tensor shape is not same with CONT(non-contiguous) input tensor shape 3. CPY(non-contiguous) can't be implemented with original input/output tensor shape and data(need change the original shape when create input/output tensor) Currently. VIEW op executed in the ggml backend and others executed in the OpenVINO Frontend. --- ggml/src/ggml-openvino.cpp | 191 ++++++++++++------------ ggml/src/ggml-openvino/ggml-decoder.cpp | 88 ++++------- ggml/src/ggml-openvino/utils.cpp | 76 +++------- ggml/src/ggml-openvino/utils.h | 2 +- 4 files changed, 140 insertions(+), 217 deletions(-) diff --git a/ggml/src/ggml-openvino.cpp b/ggml/src/ggml-openvino.cpp index 230edded11..082ab27458 100644 --- a/ggml/src/ggml-openvino.cpp +++ b/ggml/src/ggml-openvino.cpp @@ -537,8 +537,7 @@ void ggml_backend_openvino_mul_mat(struct ggml_tensor * dst) { auto reshape_output = std::make_shared( batched_matmul, ov::op::v0::Constant::create(ov::element::i64, {3}, final_output_shape), - false - ); + false); auto model = std::make_shared(ov::NodeVector{ reshape_output }, ov::ParameterVector{ param_src0, param_src1 }); @@ -659,6 +658,7 @@ void ggml_backend_openvino_view(ggml_tensor *dst) { false); auto model = std::make_shared(ov::NodeVector{reshaped}, ov::ParameterVector{param}); + // auto model = std::make_shared(ov::NodeVector{param}, ov::ParameterVector{param}); // ov::save_model(model, "/home/user/zhan/merge_git_commits/llama.cpp-ov/003_backend_view_model.xml"); auto compiled_model = core.compile_model(model, "CPU"); @@ -742,106 +742,91 @@ void ggml_backend_openvino_dup_bytes(struct ggml_tensor *dst) { const size_t nb0 = dst->nb[0]; if (src0->type == dst->type && ne00 == dst->ne[0] && nb00 == element_size && nb0 == element_size) { - const size_t valid_elems = static_cast(src0->ne[0]); - const size_t num_rows = static_cast(src0->ne[1]); - const size_t dim2 = static_cast(src0->ne[2]); - const size_t dim3 = static_cast(src0->ne[3]); + const size_t valid_elems = static_cast(src0->ne[0]); // 3072 + const size_t num_rows = static_cast(src0->ne[1]); // 7 + const size_t dim2 = static_cast(src0->ne[2]); // 1 - size_t phys_stride = static_cast(src0->nb[1]) / element_size; - size_t total_logical = valid_elems * num_rows * dim2 * dim3; + size_t phys_stride = static_cast(src0->nb[1]) / element_size; // 9216 - std::vector contiguous_data(total_logical); + ov::Shape input_shape = { dim2, num_rows, phys_stride }; // 如 {1, 7, 9216 } + ov::Shape logical_shape = { dim2, num_rows, valid_elems }; // {1, 7, 3072} - for (size_t j = 0; j < num_rows; j++) { - const float *src_row = reinterpret_cast(src0->data) + j * phys_stride; - float *dst_row = contiguous_data.data() + j * valid_elems; - std::copy(src_row, src_row + valid_elems, dst_row); - } + auto input_param = std::make_shared(ov::element::f32, input_shape); - ov::Shape logical_shape = { dim2, num_rows, valid_elems}; - auto input_param = std::make_shared(ov::element::f32, logical_shape); - auto identity_const = ov::op::v0::Constant::create(ov::element::i64, - { logical_shape.size() }, - std::vector(logical_shape.begin(), logical_shape.end())); - auto identity_op = std::make_shared(input_param, identity_const, false); + std::vector begin = { 0, 0, 0 }; + std::vector end = { static_cast(dim2), + static_cast(num_rows), + static_cast(valid_elems) }; + std::vector strides = { 1, 1, 1 }; - auto model = std::make_shared(ov::OutputVector{identity_op}, - ov::ParameterVector{input_param}); + auto begin_const = ov::op::v0::Constant::create(ov::element::i64, { begin.size() }, begin); + auto end_const = ov::op::v0::Constant::create(ov::element::i64, { end.size() }, end); + auto strides_const = ov::op::v0::Constant::create(ov::element::i64, { strides.size() }, strides); + + std::vector begin_mask = {0, 0, 0}; + std::vector end_mask = {0, 0, 0}; + auto slice = std::make_shared( + input_param, + begin_const, + end_const, + strides_const, + begin_mask, + end_mask + ); + + auto model = std::make_shared(ov::OutputVector{ slice }, + ov::ParameterVector{ input_param }); ov::Core core; auto compiled_model = core.compile_model(model, "CPU"); auto infer_request = compiled_model.create_infer_request(); - ov::Tensor input_tensor(ov::element::f32, logical_shape, contiguous_data.data()); + //[NOTE]: input_shape should be {1, 7, 9216} not the original shap of src0. + ov::Tensor input_tensor(ov::element::f32, input_shape, src0->data); infer_request.set_input_tensor(0, input_tensor); ov::Tensor output_tensor(ov::element::f32, logical_shape, dst->data); infer_request.set_output_tensor(0, output_tensor); infer_request.infer(); - /* - for (size_t i01 = 0; i01 < ne01; ++i01) { - const char *src_row = reinterpret_cast(src0->data) + i01 * nb01; - char *dst_row = reinterpret_cast(dst->data) + i01 * dst->nb[1]; - - ov::Tensor src_row_tensor(ov::element::f32, {ne00}, const_cast(reinterpret_cast(src_row))); - ov::Tensor dst_row_tensor(ov::element::f32, {ne00}, reinterpret_cast(dst_row)); - - std::memcpy(dst_row_tensor.data(), src_row_tensor.data(), ne00 * sizeof(float)); - }*/ return; } // Case 3: Non-contiguous source, contiguous destination - const int64_t ne02 = src0->ne[2]; - const int64_t ne03 = src0->ne[3]; - const int64_t nb02 = src0->nb[2]; - const int64_t nb03 = src0->nb[3]; - // dst->ne =[3072,7,1,1], dst->nb =[4,12288,86016,86016], dst->type=GGML_TYPE_F32 // dst->src[0]->ne=[96,32,7,1], dst->src[0]->nb=[4,2688,384,86016], dst->src[0]->type=GGML_TYPE_F32 if (ggml_is_contiguous(dst)) { size_t valid_i = static_cast(src0->ne[0]); // 96 size_t valid_j = static_cast(src0->ne[1]); // 32 size_t valid_k = static_cast(src0->ne[2]); // 7 - size_t valid_l = static_cast(src0->ne[3]); // 1 - size_t total_valid = valid_i * valid_j * valid_k; // 96 * 32 * 7 = 21504 - size_t stride_j = static_cast(src0->nb[1]) / element_size; // 672 - size_t stride_k = static_cast(src0->nb[2]) / element_size; // 96 + ov::Shape src_shape = { valid_k, valid_j, valid_i }; // {7, 32, 96}; + auto src_param = std::make_shared(ov::element::f32, src_shape); - std::vector contiguous_data(total_valid); - const float *src_data = reinterpret_cast(src0->data); - for (size_t k = 0; k < valid_k; k++) { - for (size_t j = 0; j < valid_j; j++) { - for (size_t i = 0; i < valid_i; i++) { - size_t out_index = k * (valid_i * valid_j) + j * valid_i + i; - size_t src_index = j * stride_j + k * stride_k + i; - contiguous_data[out_index] = src_data[src_index]; - } - } - } + ov::Shape input_shape = { valid_j, valid_k, valid_i }; // {32, 7, 96} + auto tmp_param = ov::op::v0::Constant::create(ov::element::i64, { input_shape.size() }, input_shape); + auto input_param = std::make_shared(src_param, tmp_param, false); - // ov::Shape input_shape = { dst->src[0]->ne[0], dst->src[0]->ne[1], dst->src[0]->ne[2] }; - ov::Shape input_shape = { dst->src[0]->ne[2], dst->src[0]->ne[1], dst->src[0]->ne[0]}; - auto input_param = std::make_shared(ov::element::f32, input_shape); + // 添加 Transpose 节点,将 {32,7,96} 变换为 {7,32,96},恢复逻辑顺序 + // 这里交换第 0 与第 1 维,即 permutation = {1, 0, 2} + std::vector order = {1, 0, 2}; + auto order_const = ov::op::v0::Constant::create(ov::element::i64, {order.size()}, order); + auto transpose = std::make_shared(input_param, order_const); - // ov::Shape target_shape = { dst->ne[0], dst->ne[1], dst->ne[2] }; - // std::vector target_shape_vec = { static_cast(dst->ne[0]), - // static_cast(dst->ne[1]), dst->ne[2]}; - ov::Shape target_shape = { dst->ne[2], dst->ne[1], dst->ne[0] }; + ov::Shape target_shape = { dst->ne[2], dst->ne[1], dst->ne[0] }; // {1, 7, 3072} std::vector target_shape_vec = { static_cast(dst->ne[2]), - static_cast(dst->ne[1]), dst->ne[0]}; - auto reshape_const = ov::op::v0::Constant::create(ov::element::i64, {3}, target_shape_vec); - auto reshaped = std::make_shared(input_param, reshape_const, false); - - auto model = std::make_shared(ov::OutputVector{reshaped}, ov::ParameterVector{input_param}); + static_cast(dst->ne[1]), + static_cast(dst->ne[0]) }; + auto reshape_const = ov::op::v0::Constant::create(ov::element::i64, { target_shape_vec.size() }, target_shape_vec); + auto reshaped = std::make_shared(transpose, reshape_const, false); + auto model = std::make_shared(ov::OutputVector{ reshaped }, + ov::ParameterVector{ src_param }); ov::Core core; auto compiled_model = core.compile_model(model, "CPU"); auto infer_request = compiled_model.create_infer_request(); - ov::Tensor input_tensor(ov::element::f32, input_shape, contiguous_data.data()); + ov::Tensor input_tensor(ov::element::f32, src_shape, src0->data); infer_request.set_input_tensor(0, input_tensor); ov::Tensor output_tensor(ov::element::f32, target_shape, dst->data); @@ -998,40 +983,48 @@ static enum ggml_status ggml_backend_openvino_graph_compute(ggml_backend_t backe } } - int end_node = cgraph->n_nodes - 1; - openvino_frontend_compute(backend, cgraph, 0, end_node); - // openvino_frontend_compute(backend, cgraph); + // Process nodes in order - // for (int i = 0; i < cgraph->n_nodes; i++) { - // if (std::find(permute_indices.begin(), permute_indices.end(), i) != permute_indices.end()) { - // ggml_backend_openvino_permute(cgraph->nodes[i]); - // } else if (std::find(cont_indices.begin(), cont_indices.end(), i) != cont_indices.end()) { - // ggml_backend_openvino_dup_bytes(cgraph->nodes[i]); - // } else if (std::find(view_indices.begin(), view_indices.end(), i) != view_indices.end()) { - // ggml_backend_openvino_view(cgraph->nodes[i]); - // } else if (std::find(cpy_indices.begin(), cpy_indices.end(), i) != cpy_indices.end()) { - // ggml_backend_openvino_cpy(cgraph->nodes[i]); - // } else if (std::find(transpose_indices.begin(), transpose_indices.end(), i) != transpose_indices.end()) { - // ggml_backend_openvino_transpose(cgraph->nodes[i]); - // } else if (std::find(reshape_indices.begin(), reshape_indices.end(), i) != reshape_indices.end()) { - // ggml_backend_openvino_reshape(cgraph->nodes[i]); - // } else if (std::find(mul_mat_indices.begin(), mul_mat_indices.end(), i) != mul_mat_indices.end()) { - // ggml_backend_openvino_mul_mat(cgraph->nodes[i]); - // } else { - // // Process a range of nodes with openvino_frontend_compute - // int start_index = i; - // while (i < cgraph->n_nodes - // && std::find(view_indices.begin(), view_indices.end(), i) == view_indices.end() - // && std::find(cpy_indices.begin(), cpy_indices.end(), i) == cpy_indices.end() - // && std::find(cont_indices.begin(), cont_indices.end(), i) == cont_indices.end() - // && std::find(mul_mat_indices.begin(), mul_mat_indices.end(), i) == mul_mat_indices.end() - // ) { - // i++; - // } - // if (start_index < i) { - // openvino_frontend_compute(backend, cgraph, start_index, --i); - // } - // } + + // if (cgraph->nodes[0]->ne[1] == 1) { + // bool prompt_process_flag = false; + // int end_node = cgraph->n_nodes - 1; + // openvino_frontend_compute(backend, cgraph, 0, end_node, prompt_process_flag); + // } else { + + for (int i = 0; i < cgraph->n_nodes; i++) { + if (std::find(permute_indices.begin(), permute_indices.end(), i) != permute_indices.end()) { + // ggml_backend_openvino_permute(cgraph->nodes[i]); + // } else if (std::find(mul_mat_indices.begin(), mul_mat_indices.end(), i) != mul_mat_indices.end()) { + // ggml_backend_openvino_mul_mat(cgraph->nodes[i]); + } else if (std::find(view_indices.begin(), view_indices.end(), i) != view_indices.end()) { + ggml_backend_openvino_view(cgraph->nodes[i]); + // } else if (std::find(cont_indices.begin(), cont_indices.end(), i) != cont_indices.end()) { + // ggml_backend_openvino_dup_bytes(cgraph->nodes[i]); + // } else if (std::find(transpose_indices.begin(), transpose_indices.end(), i) != transpose_indices.end()) { + // ggml_backend_openvino_transpose(cgraph->nodes[i]); + // } else if (std::find(reshape_indices.begin(), reshape_indices.end(), i) != reshape_indices.end()) { + // ggml_backend_openvino_reshape(cgraph->nodes[i]); + // } else if (std::find(cpy_indices.begin(), cpy_indices.end(), i) != cpy_indices.end()) { + // ggml_backend_openvino_cpy(cgraph->nodes[i]); + } else { + // Process a range of nodes with openvino_frontend_compute + int start_index = i; + while (i < cgraph->n_nodes + // && std::find(mul_mat_indices.begin(), mul_mat_indices.end(), i) == mul_mat_indices.end() + && std::find(view_indices.begin(), view_indices.end(), i) == view_indices.end() + // && std::find(cont_indices.begin(), cont_indices.end(), i) == cont_indices.end() + // && std::find(reshape_indices.begin(), reshape_indices.end(), i) == reshape_indices.end() + // && std::find(cpy_indices.begin(), cpy_indices.end(), i) == cpy_indices.end() + ) { + i++; + } + if (start_index < i) { + openvino_frontend_compute(backend, cgraph, start_index, --i); + } + } + } + // } return GGML_STATUS_SUCCESS; diff --git a/ggml/src/ggml-openvino/ggml-decoder.cpp b/ggml/src/ggml-openvino/ggml-decoder.cpp index 90bfdcd103..2b04cd632a 100644 --- a/ggml/src/ggml-openvino/ggml-decoder.cpp +++ b/ggml/src/ggml-openvino/ggml-decoder.cpp @@ -46,12 +46,14 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::mapop); m_op_node_name.emplace_back(src0_name, ggml_op_name(node->op)); m_output_names.push_back(node_name); + + ov::Shape input_shape = { static_cast(node->src[0]->ne[2]), + static_cast(node->src[0]->ne[1]), + static_cast(node->src[0]->ne[0])}; + auto input_param = std::make_shared(ov::element::f32, input_shape); + m_params.push_back(input_param); + m_continuous = true; - - // ov::Shape flat_shape = { static_cast(ggml_nelements(node)) }; - // auto input_param = std::make_shared(ov::element::f32, flat_shape); - // m_params.push_back(input_param); - break; } @@ -59,12 +61,6 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::mapsrc[0]->nb[0] == ggml_type_size(node->src[0]->type) && node->nb[0] == ggml_type_size(node->src[0]->type)) { - // for (size_t i01 = 0; i01 < node->src[0]->ne[1]; ++i01) { - // const char *src_row = reinterpret_cast(node->src[0]->data) + i01 * node->src[0]->nb[1]; - // char *dst_row = reinterpret_cast(node->data) + i01 * node->nb[1]; - // std::memcpy(dst_row, src_row, node->src[0]->ne[0] * ggml_type_size(node->src[0]->type)); - // } - inputs[src0_name] = node->src[0]; outputs[node_name] = node; m_input_names.push_back(src0_name); @@ -72,15 +68,16 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::mapop)); m_output_names.push_back(node_name); - // const size_t element_size = ggml_type_size(node->src[0]->type); - // size_t valid_elems = static_cast(node->src[0]->ne[0]); // 3072 - // size_t num_rows = static_cast(node->src[0]->ne[1]); // 7 - // size_t phys_stride = static_cast(node->src[0]->nb[1]) / element_size; // 9216 - // // size_t total_phys = (num_rows - 1) * phys_stride + valid_elems; // 6*9216 + 3072 = 58368 - // size_t total_phys = num_rows * phys_stride; // 7 * 9216 = 64512 - // ov::Shape flat_input_shape = { total_phys }; - // auto flat_input_param = std::make_shared(ov::element::f32, flat_input_shape); - // m_params.push_back(flat_input_param); + const size_t element_size = ggml_type_size(node->src[0]->type); + size_t valid_elems = static_cast(node->src[0]->ne[0]); // 3072 + size_t num_rows = static_cast(node->src[0]->ne[1]); // 7 + size_t dim2 = static_cast(node->src[0]->ne[2]); // 1 + size_t phys_stride = static_cast(node->src[0]->nb[1]) / element_size; // 9216 + // size_t total_phys = (num_rows - 1) * phys_stride + valid_elems; // 6*9216 + 3072 = 58368 + size_t total_phys = num_rows * phys_stride; // 7 * 9216 = 64512 + ov::Shape input_shape = { dim2, num_rows, phys_stride }; + auto input_param = std::make_shared(ov::element::f32, input_shape); + m_params.push_back(input_param); m_continuous = false; break; @@ -94,13 +91,11 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::mapop)); m_output_names.push_back(node_name); - // size_t valid_i = static_cast(node->src[0]->ne[0]); // 96 - // size_t valid_j = static_cast(node->src[0]->ne[1]); // 32 - // size_t valid_k = static_cast(node->src[0]->ne[2]); // 7 - // size_t total_valid = valid_i * valid_j * valid_k; // 96 * 32 * 7 = 21504 - // ov::Shape flat_input_shape = { total_valid }; - // auto input_param = std::make_shared(ov::element::f32, flat_input_shape); - // m_params.push_back(input_param); + ov::Shape input_shape = { static_cast(node->src[0]->ne[2]), + static_cast(node->src[0]->ne[1]), + static_cast(node->src[0]->ne[0])}; + auto input_param = std::make_shared(ov::element::f32, input_shape); + m_params.push_back(input_param); m_continuous = false; break; @@ -117,9 +112,9 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::mapsrc[0]->ne, node->src[0]->ne + 3); - auto input_param = std::make_shared(ov::element::f32, src_shape); - m_params.push_back(input_param); + // ov::Shape src_shape(node->src[0]->ne, node->src[0]->ne + 3); + // auto input_param = std::make_shared(ov::element::f32, src_shape); + // m_params.push_back(input_param); break; } else { for (int64_t i1 = 0; i1 < node->ne[1]; ++i1) { // ne[1] = 3072 @@ -139,27 +134,6 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::mapop)); m_output_names.push_back(node_name); m_continuous = false; - break; - - // inputs[src0_name] = node->src[0]; - // std::string temp_name = src0_name + std::string("_cpy_tmp"); - // inputs[temp_name] = node; - - // outputs[node_name] = node; - // m_input_names.push_back(src0_name); - // m_input_names.push_back(temp_name); - // m_node_op_name[src0_name] = ggml_op_name(node->op); - // m_node_op_name[temp_name] = ggml_op_name(node->op); - // m_output_names.push_back(node_name); - // m_continuous = false; - - // ov::Shape flat_src0_shape = {node->src[0]->nb[2]}; - // auto param_src0 = std::make_shared(ov::element::f32, flat_src0_shape); - // m_params.push_back(param_src0); - - // ov::Shape flat_dst_shape = {node->nb[2], 1}; - // auto param_dst_base = std::make_shared(ov::element::f16, flat_dst_shape); - // m_params.push_back(param_dst_base); break; } @@ -167,8 +141,6 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::mapname) + "_" + std::to_string(node->view_offs) + "_output_" + ggml_op_name(node->op); - // std::string node_name = std::string(node->name) + "_" + std::to_string(node->view_offs); inputs[node_name] = node; outputs[node_name] = node; m_input_names.push_back(node_name); @@ -190,12 +162,6 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::mapsrc[0]->ne[0]*node->src[0]->ne[1]*node->src[0]->ne[2] }; - // ov::Shape flat_shape_src1 = { node->src[1]->ne[0]*node->src[1]->ne[1]*node->src[1]->ne[2] }; - // auto param_src0 = std::make_shared(ov::element::f16, flat_shape_src0); - // auto param_src1 = std::make_shared(ov::element::f32, flat_shape_src1); - // m_params.push_back(param_src0); - // m_params.push_back(param_src1); if (!ggml_is_contiguous(node->src[1]) || node->src[1]->ne[0] * node->src[1]->nb[0] != node->src[1]->nb[1]) { m_continuous = false; } else { @@ -376,8 +342,8 @@ GgmlOvDecoder::GgmlOvDecoder(struct ggml_tensor * node, struct ggml_cgraph * cgr if (m_node) { set_input_output(m_node, m_inputs, m_outputs); } else { - for (int node_n = 0; node_n < m_cgraph->n_nodes; node_n++) { - // for (int node_n = start_index; node_n <= end_index; node_n++) { + // for (int node_n = 0; node_n < m_cgraph->n_nodes; node_n++) { + for (int node_n = start_index; node_n <= end_index; node_n++) { auto cur_node = m_cgraph->nodes[node_n]; m_nodes.push_back(cur_node); // Init model input and output diff --git a/ggml/src/ggml-openvino/utils.cpp b/ggml/src/ggml-openvino/utils.cpp index a0234ebd30..c44aa2568b 100644 --- a/ggml/src/ggml-openvino/utils.cpp +++ b/ggml/src/ggml-openvino/utils.cpp @@ -10,8 +10,10 @@ std::shared_ptr get_ggml_decoder(struct ggml_cgraph * cgraph, con return std::make_shared(nullptr, cgraph, start_index, end_index); } -std::map get_ggml_graph_input_tensors(std::shared_ptr ggml_decoder) { - std::map input_tensors; +// std::map get_ggml_graph_input_tensors(std::shared_ptr ggml_decoder) { +std::vector> get_ggml_graph_input_tensors(std::shared_ptr ggml_decoder, bool flag) { + // std::map input_tensors; + std::vector> input_tensors; auto input_names = ggml_decoder->get_input_names(); // auto node_name = ggml_decoder->get_op_name(); size_t op_iter = 0; @@ -19,10 +21,7 @@ std::map get_ggml_graph_input_tensors(std::shared_ptrget_op_node_name(name, op_iter++); // auto node_op_name = ggml_decoder->get_node_op_name(name); - ov::element::Type input_type = ggml_decoder->get_input_type(name); - size_t element_size = input_type.size(); auto input_data = ggml_decoder->get_input_ggml_tensor(name)->data; - std::vector input_stride = ggml_decoder->get_input_stride(name); #ifdef GGML_OPENVINO_DEBUG printf("Subgraph input %d: %g\n", inp, *(double*)(input_data)); #endif @@ -31,58 +30,22 @@ std::map get_ggml_graph_input_tensors(std::shared_ptrget_input_type(name), {80000}, input_data); - if (op_node_name == "CONT" && !ggml_decoder->check_if_continuous() && input_shape[0] == 1) { - const size_t valid_elems = static_cast(ggml_decoder->get_input_shape(name).to_shape()[2]); + if (flag & op_node_name == "CONT" && input_shape[0] == 1 && input_shape[1] != 1) { + std::vector input_stride = ggml_decoder->get_input_stride(name); + ov::element::Type input_type = ggml_decoder->get_input_type(name); + size_t element_size = input_type.size(); + // const size_t valid_elems = static_cast(ggml_decoder->get_input_shape(name).to_shape()[2]); const size_t num_rows = static_cast(ggml_decoder->get_input_shape(name).to_shape()[1]); const size_t dim2 = static_cast(ggml_decoder->get_input_shape(name).to_shape()[0]); size_t phys_stride = static_cast(input_stride[1]) / element_size; - size_t total_logical = valid_elems * num_rows * dim2; - - std::vector contiguous_data(total_logical); - - for (size_t j = 0; j < num_rows; j++) { - const float *src_row = reinterpret_cast(input_data) + j * phys_stride; - float *dst_row = contiguous_data.data() + j * valid_elems; - std::copy(src_row, src_row + valid_elems, dst_row); - } - input_tensor = ov::Tensor(ggml_decoder->get_input_type(name), - ggml_decoder->get_input_shape(name).to_shape(), - contiguous_data.data()); - } else if (op_node_name == "CONT" && !ggml_decoder->check_if_continuous()){ - size_t valid_i = static_cast(ggml_decoder->get_input_shape(name).to_shape()[2]); // 96 - size_t valid_j = static_cast(ggml_decoder->get_input_shape(name).to_shape()[1]); // 32 - size_t valid_k = static_cast(ggml_decoder->get_input_shape(name).to_shape()[0]); // 7 - - size_t total_valid = valid_i * valid_j * valid_k; // 96 * 32 * 7 = 21504 - size_t stride_j = static_cast(input_stride[1]) / element_size; // 672 - size_t stride_k = static_cast(input_stride[0]) / element_size; // 96 - - std::vector contiguous_data(total_valid); - const float *src_data = reinterpret_cast(input_data); - for (size_t k = 0; k < valid_k; k++) { - for (size_t j = 0; j < valid_j; j++) { - for (size_t i = 0; i < valid_i; i++) { - size_t out_index = k * (valid_i * valid_j) + j * valid_i + i; - size_t src_index = j * stride_j + k * stride_k + i; - contiguous_data[out_index] = src_data[src_index]; - } - } - } - input_tensor = ov::Tensor(ggml_decoder->get_input_type(name), - ggml_decoder->get_input_shape(name).to_shape(), - contiguous_data.data()); - // } else if (op_node_name == "MUL_MAT") { - // ov::Shape flat_shape = { ggml_decoder->get_input_shape(name).to_shape()[0] * - // ggml_decoder->get_input_shape(name).to_shape()[1] * - // ggml_decoder->get_input_shape(name).to_shape()[2] }; - // input_tensor = ov::Tensor(ggml_decoder->get_input_type(name), flat_shape, input_data); + ov::Shape input_shape = { dim2, num_rows, phys_stride }; // {1, 7, 9216 } + input_tensor = ov::Tensor(ggml_decoder->get_input_type(name), input_shape, input_data); } else { input_tensor = ov::Tensor(ggml_decoder->get_input_type(name), ggml_decoder->get_input_shape(name).to_shape(), input_data); } - // input_tensor = ov::Tensor(ggml_decoder->get_input_type(name), ggml_decoder->get_input_shape(name).to_shape(), input_data); - // } - input_tensors[name] = input_tensor; + // input_tensors[name] = input_tensor; + input_tensors.emplace_back(name, input_tensor); } return input_tensors; } @@ -114,11 +77,11 @@ static ov::frontend::FrontEnd::Ptr get_ggml_frontend() { return front_end; } -enum ggml_status openvino_frontend_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph, const int32_t start_index, const int32_t end_index) { - ov::Core core; - auto devices = core.get_available_devices(); +enum ggml_status openvino_frontend_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph, const int32_t start_index, const int32_t end_index, bool flag) { + static ov::Core core; + // auto devices = core.get_available_devices(); // Get GGML Frontend - auto front_end = get_ggml_frontend(); + static auto front_end = get_ggml_frontend(); if (!front_end) { GGML_LOG_ERROR("GGML FrontEnd is not initialized \n"); return GGML_STATUS_FAILED; @@ -161,11 +124,12 @@ enum ggml_status openvino_frontend_compute(ggml_backend_t backend, struct ggml_c // Get input tensor auto input_names = ggml_decoder->get_input_names(); - auto input_tensors = get_ggml_graph_input_tensors(ggml_decoder); + auto input_tensors = get_ggml_graph_input_tensors(ggml_decoder, flag); // Set input tensor for (size_t i = 0; i < input_names.size(); i++) { - infer_request.set_input_tensor(i, input_tensors[input_names[i]]); + // infer_request.set_input_tensor(i, input_tensors[input_names[i]]); + infer_request.set_input_tensor(i, input_tensors.at(i).second); // auto input_tensor = infer_request.get_input_tensor(i); // auto input_shape = input_tensor.get_shape(); diff --git a/ggml/src/ggml-openvino/utils.h b/ggml/src/ggml-openvino/utils.h index fc5268d98a..7806c418cb 100644 --- a/ggml/src/ggml-openvino/utils.h +++ b/ggml/src/ggml-openvino/utils.h @@ -1,4 +1,4 @@ #include "ggml-decoder.h" #include "ggml-backend-impl.h" -enum ggml_status openvino_frontend_compute (ggml_backend_t backend, struct ggml_cgraph * cgraph, const int32_t start_index=0, const int32_t end_index=0); +enum ggml_status openvino_frontend_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph, const int32_t start_index=0, const int32_t end_index=0, bool flag = true);