diff --git a/ggml/src/ggml-openvino.cpp b/ggml/src/ggml-openvino.cpp index 109003d686..230edded11 100644 --- a/ggml/src/ggml-openvino.cpp +++ b/ggml/src/ggml-openvino.cpp @@ -480,12 +480,12 @@ void ggml_backend_openvino_mul_mat(struct ggml_tensor * dst) { size_t total_src0 = indices_src0.size(); // = 96 * 32 * 32 size_t total_src1 = indices_src1.size(); // = 96 * 7 * 32 - ov::Shape orig_shape_src0 = { static_cast(src0->ne[0]), + ov::Shape orig_shape_src0 = { static_cast(src0->ne[2]), static_cast(src0->ne[1]), - static_cast(src0->ne[2])}; - ov::Shape orig_shape_src1 = { static_cast(src1->ne[0]), + static_cast(src0->ne[0])}; + ov::Shape orig_shape_src1 = { static_cast(src1->ne[2]), static_cast(src1->ne[1]), - static_cast(src1->ne[2])}; + static_cast(src1->ne[0])}; auto param_src0 = std::make_shared(ov::element::f16, orig_shape_src0); auto param_src1 = std::make_shared(ov::element::f32, orig_shape_src1); @@ -573,12 +573,12 @@ void ggml_backend_openvino_mul_mat(struct ggml_tensor * dst) { std::vector eff_shape_src1 = get_effective_shape(src1); std::vector eff_shape_dst = get_effective_shape(dst); - ov::Shape orig_shape_src0 = { static_cast(src0->ne[0]), - static_cast(src0->ne[1]), - static_cast(src0->ne[2])}; - ov::Shape orig_shape_src1 = { static_cast(src1->ne[0]), - static_cast(src1->ne[1]), - static_cast(src1->ne[2])}; + ov::Shape orig_shape_src0 = { static_cast(src0->ne[2]), + static_cast(src0->ne[1]), + static_cast(src0->ne[0])}; + ov::Shape orig_shape_src1 = { static_cast(src1->ne[2]), + static_cast(src1->ne[1]), + static_cast(src1->ne[0])}; auto param_src0 = std::make_shared(ov::element::f16, orig_shape_src0); auto param_src1 = std::make_shared(ov::element::f32, orig_shape_src1); @@ -999,40 +999,40 @@ static enum ggml_status ggml_backend_openvino_graph_compute(ggml_backend_t backe } int end_node = cgraph->n_nodes - 1; - // openvino_frontend_compute(backend, cgraph, 0, end_node); + openvino_frontend_compute(backend, cgraph, 0, end_node); // openvino_frontend_compute(backend, cgraph); // Process nodes in order - for (int i = 0; i < cgraph->n_nodes; i++) { - if (std::find(permute_indices.begin(), permute_indices.end(), i) != permute_indices.end()) { - ggml_backend_openvino_permute(cgraph->nodes[i]); - } else if (std::find(cont_indices.begin(), cont_indices.end(), i) != cont_indices.end()) { - ggml_backend_openvino_dup_bytes(cgraph->nodes[i]); - } else if (std::find(view_indices.begin(), view_indices.end(), i) != view_indices.end()) { - ggml_backend_openvino_view(cgraph->nodes[i]); - } else if (std::find(cpy_indices.begin(), cpy_indices.end(), i) != cpy_indices.end()) { - ggml_backend_openvino_cpy(cgraph->nodes[i]); - } else if (std::find(transpose_indices.begin(), transpose_indices.end(), i) != transpose_indices.end()) { - ggml_backend_openvino_transpose(cgraph->nodes[i]); - } else if (std::find(reshape_indices.begin(), reshape_indices.end(), i) != reshape_indices.end()) { - ggml_backend_openvino_reshape(cgraph->nodes[i]); - } else if (std::find(mul_mat_indices.begin(), mul_mat_indices.end(), i) != mul_mat_indices.end()) { - ggml_backend_openvino_mul_mat(cgraph->nodes[i]); - } else { - // Process a range of nodes with openvino_frontend_compute - int start_index = i; - while (i < cgraph->n_nodes - && std::find(view_indices.begin(), view_indices.end(), i) == view_indices.end() - && std::find(cpy_indices.begin(), cpy_indices.end(), i) == cpy_indices.end() - && std::find(cont_indices.begin(), cont_indices.end(), i) == cont_indices.end() - && std::find(mul_mat_indices.begin(), mul_mat_indices.end(), i) == mul_mat_indices.end() - ) { - i++; - } - if (start_index < i) { - openvino_frontend_compute(backend, cgraph, start_index, --i); - } - } - } + // for (int i = 0; i < cgraph->n_nodes; i++) { + // if (std::find(permute_indices.begin(), permute_indices.end(), i) != permute_indices.end()) { + // ggml_backend_openvino_permute(cgraph->nodes[i]); + // } else if (std::find(cont_indices.begin(), cont_indices.end(), i) != cont_indices.end()) { + // ggml_backend_openvino_dup_bytes(cgraph->nodes[i]); + // } else if (std::find(view_indices.begin(), view_indices.end(), i) != view_indices.end()) { + // ggml_backend_openvino_view(cgraph->nodes[i]); + // } else if (std::find(cpy_indices.begin(), cpy_indices.end(), i) != cpy_indices.end()) { + // ggml_backend_openvino_cpy(cgraph->nodes[i]); + // } else if (std::find(transpose_indices.begin(), transpose_indices.end(), i) != transpose_indices.end()) { + // ggml_backend_openvino_transpose(cgraph->nodes[i]); + // } else if (std::find(reshape_indices.begin(), reshape_indices.end(), i) != reshape_indices.end()) { + // ggml_backend_openvino_reshape(cgraph->nodes[i]); + // } else if (std::find(mul_mat_indices.begin(), mul_mat_indices.end(), i) != mul_mat_indices.end()) { + // ggml_backend_openvino_mul_mat(cgraph->nodes[i]); + // } else { + // // Process a range of nodes with openvino_frontend_compute + // int start_index = i; + // while (i < cgraph->n_nodes + // && std::find(view_indices.begin(), view_indices.end(), i) == view_indices.end() + // && std::find(cpy_indices.begin(), cpy_indices.end(), i) == cpy_indices.end() + // && std::find(cont_indices.begin(), cont_indices.end(), i) == cont_indices.end() + // && std::find(mul_mat_indices.begin(), mul_mat_indices.end(), i) == mul_mat_indices.end() + // ) { + // i++; + // } + // if (start_index < i) { + // openvino_frontend_compute(backend, cgraph, start_index, --i); + // } + // } + // } return GGML_STATUS_SUCCESS; @@ -1257,14 +1257,13 @@ static const std::set& openvino_ops = []() -> const std::set(ggml_nelements(node)) }; - auto input_param = std::make_shared(ov::element::f32, flat_shape); - m_params.push_back(input_param); + // ov::Shape flat_shape = { static_cast(ggml_nelements(node)) }; + // auto input_param = std::make_shared(ov::element::f32, flat_shape); + // m_params.push_back(input_param); break; } @@ -72,15 +72,15 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::mapop)); m_output_names.push_back(node_name); - const size_t element_size = ggml_type_size(node->src[0]->type); - size_t valid_elems = static_cast(node->src[0]->ne[0]); // 3072 - size_t num_rows = static_cast(node->src[0]->ne[1]); // 7 - size_t phys_stride = static_cast(node->src[0]->nb[1]) / element_size; // 9216 - // size_t total_phys = (num_rows - 1) * phys_stride + valid_elems; // 6*9216 + 3072 = 58368 - size_t total_phys = num_rows * phys_stride; // 7 * 9216 = 64512 - ov::Shape flat_input_shape = { total_phys }; - auto flat_input_param = std::make_shared(ov::element::f32, flat_input_shape); - m_params.push_back(flat_input_param); + // const size_t element_size = ggml_type_size(node->src[0]->type); + // size_t valid_elems = static_cast(node->src[0]->ne[0]); // 3072 + // size_t num_rows = static_cast(node->src[0]->ne[1]); // 7 + // size_t phys_stride = static_cast(node->src[0]->nb[1]) / element_size; // 9216 + // // size_t total_phys = (num_rows - 1) * phys_stride + valid_elems; // 6*9216 + 3072 = 58368 + // size_t total_phys = num_rows * phys_stride; // 7 * 9216 = 64512 + // ov::Shape flat_input_shape = { total_phys }; + // auto flat_input_param = std::make_shared(ov::element::f32, flat_input_shape); + // m_params.push_back(flat_input_param); m_continuous = false; break; @@ -94,13 +94,13 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::mapop)); m_output_names.push_back(node_name); - size_t valid_i = static_cast(node->src[0]->ne[0]); // 96 - size_t valid_j = static_cast(node->src[0]->ne[1]); // 32 - size_t valid_k = static_cast(node->src[0]->ne[2]); // 7 - size_t total_valid = valid_i * valid_j * valid_k; // 96 * 32 * 7 = 21504 - ov::Shape flat_input_shape = { total_valid }; - auto input_param = std::make_shared(ov::element::f32, flat_input_shape); - m_params.push_back(input_param); + // size_t valid_i = static_cast(node->src[0]->ne[0]); // 96 + // size_t valid_j = static_cast(node->src[0]->ne[1]); // 32 + // size_t valid_k = static_cast(node->src[0]->ne[2]); // 7 + // size_t total_valid = valid_i * valid_j * valid_k; // 96 * 32 * 7 = 21504 + // ov::Shape flat_input_shape = { total_valid }; + // auto input_param = std::make_shared(ov::element::f32, flat_input_shape); + // m_params.push_back(input_param); m_continuous = false; break; @@ -190,12 +190,12 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::mapsrc[0]->ne[0]*node->src[0]->ne[1]*node->src[0]->ne[2] }; - ov::Shape flat_shape_src1 = { node->src[1]->ne[0]*node->src[1]->ne[1]*node->src[1]->ne[2] }; - auto param_src0 = std::make_shared(ov::element::f16, flat_shape_src0); - auto param_src1 = std::make_shared(ov::element::f32, flat_shape_src1); - m_params.push_back(param_src0); - m_params.push_back(param_src1); + // ov::Shape flat_shape_src0 = { node->src[0]->ne[0]*node->src[0]->ne[1]*node->src[0]->ne[2] }; + // ov::Shape flat_shape_src1 = { node->src[1]->ne[0]*node->src[1]->ne[1]*node->src[1]->ne[2] }; + // auto param_src0 = std::make_shared(ov::element::f16, flat_shape_src0); + // auto param_src1 = std::make_shared(ov::element::f32, flat_shape_src1); + // m_params.push_back(param_src0); + // m_params.push_back(param_src1); if (!ggml_is_contiguous(node->src[1]) || node->src[1]->ne[0] * node->src[1]->nb[0] != node->src[1]->nb[1]) { m_continuous = false; } else { diff --git a/ggml/src/ggml-openvino/utils.cpp b/ggml/src/ggml-openvino/utils.cpp index 8f27bbc97d..a0234ebd30 100644 --- a/ggml/src/ggml-openvino/utils.cpp +++ b/ggml/src/ggml-openvino/utils.cpp @@ -14,12 +14,15 @@ std::map get_ggml_graph_input_tensors(std::shared_ptr input_tensors; auto input_names = ggml_decoder->get_input_names(); // auto node_name = ggml_decoder->get_op_name(); - size_t iter = 0; + size_t op_iter = 0; for (size_t inp = 0; inp < input_names.size(); ++inp) { auto name = input_names[inp]; - std::string op_node_name = ggml_decoder->get_op_node_name(name, iter++); + std::string op_node_name = ggml_decoder->get_op_node_name(name, op_iter++); // auto node_op_name = ggml_decoder->get_node_op_name(name); + ov::element::Type input_type = ggml_decoder->get_input_type(name); + size_t element_size = input_type.size(); auto input_data = ggml_decoder->get_input_ggml_tensor(name)->data; + std::vector input_stride = ggml_decoder->get_input_stride(name); #ifdef GGML_OPENVINO_DEBUG printf("Subgraph input %d: %g\n", inp, *(double*)(input_data)); #endif @@ -28,36 +31,51 @@ std::map get_ggml_graph_input_tensors(std::shared_ptrget_input_type(name), {80000}, input_data); - if (op_node_name == "CONT" && ggml_decoder->check_if_continuous()) { - ov::Shape flat_shape = { ggml_decoder->get_input_shape(name).to_shape()[0] * - ggml_decoder->get_input_shape(name).to_shape()[1] * - ggml_decoder->get_input_shape(name).to_shape()[2] }; - input_tensor = ov::Tensor(ggml_decoder->get_input_type(name), flat_shape, input_data); - } else if ( op_node_name == "CONT" && - !ggml_decoder->check_if_continuous() && - input_shape[0] == 1) { - size_t valid_elems = static_cast(ggml_decoder->get_input_shape(name).to_shape()[2]); // 3072 - size_t num_rows = static_cast(ggml_decoder->get_input_shape(name).to_shape()[1]); // 7 - ov::element::Type input_type = ggml_decoder->get_input_type(name); - size_t element_size = input_type.size(); - std::vector strides = ggml_decoder->get_input_stride(name); - size_t phys_stride = static_cast(strides[1]) / element_size; - // size_t total_phys = (num_rows - 1) * phys_stride + valid_elems; - size_t total_phys = num_rows* phys_stride; - ov::Shape flat_input_shape = { total_phys }; - input_tensor = ov::Tensor(ggml_decoder->get_input_type(name), flat_input_shape, input_data); - } else if (op_node_name == "CONT") { + if (op_node_name == "CONT" && !ggml_decoder->check_if_continuous() && input_shape[0] == 1) { + const size_t valid_elems = static_cast(ggml_decoder->get_input_shape(name).to_shape()[2]); + const size_t num_rows = static_cast(ggml_decoder->get_input_shape(name).to_shape()[1]); + const size_t dim2 = static_cast(ggml_decoder->get_input_shape(name).to_shape()[0]); + size_t phys_stride = static_cast(input_stride[1]) / element_size; + size_t total_logical = valid_elems * num_rows * dim2; + + std::vector contiguous_data(total_logical); + + for (size_t j = 0; j < num_rows; j++) { + const float *src_row = reinterpret_cast(input_data) + j * phys_stride; + float *dst_row = contiguous_data.data() + j * valid_elems; + std::copy(src_row, src_row + valid_elems, dst_row); + } + input_tensor = ov::Tensor(ggml_decoder->get_input_type(name), + ggml_decoder->get_input_shape(name).to_shape(), + contiguous_data.data()); + } else if (op_node_name == "CONT" && !ggml_decoder->check_if_continuous()){ size_t valid_i = static_cast(ggml_decoder->get_input_shape(name).to_shape()[2]); // 96 size_t valid_j = static_cast(ggml_decoder->get_input_shape(name).to_shape()[1]); // 32 size_t valid_k = static_cast(ggml_decoder->get_input_shape(name).to_shape()[0]); // 7 + size_t total_valid = valid_i * valid_j * valid_k; // 96 * 32 * 7 = 21504 - ov::Shape flat_input_shape = { total_valid }; - input_tensor = ov::Tensor(ggml_decoder->get_input_type(name), flat_input_shape, input_data); - } else if (op_node_name == "MUL_MAT") { - ov::Shape flat_shape = { ggml_decoder->get_input_shape(name).to_shape()[0] * - ggml_decoder->get_input_shape(name).to_shape()[1] * - ggml_decoder->get_input_shape(name).to_shape()[2] }; - input_tensor = ov::Tensor(ggml_decoder->get_input_type(name), flat_shape, input_data); + size_t stride_j = static_cast(input_stride[1]) / element_size; // 672 + size_t stride_k = static_cast(input_stride[0]) / element_size; // 96 + + std::vector contiguous_data(total_valid); + const float *src_data = reinterpret_cast(input_data); + for (size_t k = 0; k < valid_k; k++) { + for (size_t j = 0; j < valid_j; j++) { + for (size_t i = 0; i < valid_i; i++) { + size_t out_index = k * (valid_i * valid_j) + j * valid_i + i; + size_t src_index = j * stride_j + k * stride_k + i; + contiguous_data[out_index] = src_data[src_index]; + } + } + } + input_tensor = ov::Tensor(ggml_decoder->get_input_type(name), + ggml_decoder->get_input_shape(name).to_shape(), + contiguous_data.data()); + // } else if (op_node_name == "MUL_MAT") { + // ov::Shape flat_shape = { ggml_decoder->get_input_shape(name).to_shape()[0] * + // ggml_decoder->get_input_shape(name).to_shape()[1] * + // ggml_decoder->get_input_shape(name).to_shape()[2] }; + // input_tensor = ov::Tensor(ggml_decoder->get_input_type(name), flat_shape, input_data); } else { input_tensor = ov::Tensor(ggml_decoder->get_input_type(name), ggml_decoder->get_input_shape(name).to_shape(), input_data); }