diff --git a/ggml/src/ggml-openvino.cpp b/ggml/src/ggml-openvino.cpp index 07aff4b72e..444ccdf366 100644 --- a/ggml/src/ggml-openvino.cpp +++ b/ggml/src/ggml-openvino.cpp @@ -815,9 +815,9 @@ static enum ggml_status ggml_backend_openvino_graph_compute(ggml_backend_t backe } else if (std::find(reshape_indices.begin(), reshape_indices.end(), i) != reshape_indices.end()) { ggml_backend_openvino_reshape(cgraph->nodes[i]); } else if (std::find(view_indices.begin(), view_indices.end(), i) != view_indices.end()) { - ggml_backend_openvino_view(cgraph->nodes[i]); - } else if (std::find(cpy_indices.begin(), cpy_indices.end(), i) != cpy_indices.end()) { - ggml_backend_openvino_cpy(cgraph->nodes[i]); + ggml_backend_openvino_view(cgraph->nodes[i]); + // } else if (std::find(cpy_indices.begin(), cpy_indices.end(), i) != cpy_indices.end()) { + // ggml_backend_openvino_cpy(cgraph->nodes[i]); } else if (std::find(transpose_indices.begin(), transpose_indices.end(), i) != transpose_indices.end()) { ggml_backend_openvino_transpose(cgraph->nodes[i]); } else if (std::find(permute_indices.begin(), permute_indices.end(), i) != permute_indices.end()) { @@ -829,7 +829,6 @@ static enum ggml_status ggml_backend_openvino_graph_compute(ggml_backend_t backe int start_index = i; while (i < cgraph->n_nodes && std::find(cont_indices.begin(), cont_indices.end(), i) == cont_indices.end() && - std::find(cpy_indices.begin(), cpy_indices.end(), i) == cpy_indices.end() && std::find(mul_mat_indices.begin(), mul_mat_indices.end(), i) == mul_mat_indices.end()) { i++; } diff --git a/ggml/src/ggml-openvino/decoder.h b/ggml/src/ggml-openvino/decoder.h index c7f1bbd725..56f2ddcc80 100644 --- a/ggml/src/ggml-openvino/decoder.h +++ b/ggml/src/ggml-openvino/decoder.h @@ -51,6 +51,8 @@ public: // virtual size_t output(size_t index) const = 0; + virtual bool check_if_continuous() const = 0; + }; } // namespace ggml diff --git a/ggml/src/ggml-openvino/ggml-decoder.cpp b/ggml/src/ggml-openvino/ggml-decoder.cpp index 172c72ff50..355a95d978 100644 --- a/ggml/src/ggml-openvino/ggml-decoder.cpp +++ b/ggml/src/ggml-openvino/ggml-decoder.cpp @@ -1,6 +1,7 @@ #include "ggml-decoder.h" #include #include +#include void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::map& inputs, std::map& outputs) { switch (node->op) { @@ -9,8 +10,6 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::mapsrc[0]->name] = node->src[0]; @@ -19,6 +18,103 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::mapname); break; } + case GGML_OP_CONT: + { + if (ggml_is_contiguous(node->src[0]) && ggml_is_contiguous(node)) { + inputs[node->src[0]->name] = node->src[0]; + outputs[node->name] = node; + m_input_names.push_back(node->src[0]->name); + m_output_names.push_back(node->name); + m_continuous = true; + break; + } + + if (node->src[0]->type == node->type && node->src[0]->ne[0] == node->ne[0] && + node->src[0]->nb[0] == ggml_type_size(node->src[0]->type) && node->nb[0] == ggml_type_size(node->src[0]->type)) { + + for (size_t i01 = 0; i01 < node->src[0]->ne[1]; ++i01) { + const char *src_row = reinterpret_cast(node->src[0]->data) + i01 * node->src[0]->nb[1]; + char *dst_row = reinterpret_cast(node->data) + i01 * node->nb[1]; + std::memcpy(dst_row, src_row, node->src[0]->ne[0] * ggml_type_size(node->src[0]->type)); + } + + inputs[node->name] = node; + outputs[node->name] = node; + m_input_names.push_back(node->name); + m_output_names.push_back(node->name); + m_continuous = false; + break; + } + + // if (ggml_is_contiguous(node)) { + const size_t rs = node->src[0]->ne[0] * ggml_type_size(node->src[0]->type); // Row size in bytes for dst + + // Create OpenVINO tensors for source and destination + // The tensors are reshaped to a 2D structure (num_rows x ne00) for easier iteration and compatibility with the simplified loop. + ov::Tensor src_tensor(ov::element::f32, + ov::Shape{node->src[0]->ne[3] * node->src[0]->ne[2] * node->src[0]->ne[1], node->src[0]->ne[0]}, + node->src[0]->data); + ov::Tensor dst_tensor(ov::element::f32, + ov::Shape{node->src[0]->ne[3] * node->src[0]->ne[2] * node->src[0]->ne[1], node->src[0]->ne[0]}, + node->data); + + // Perform the copy in a single loop + const size_t num_rows = node->src[0]->ne[3] * node->src[0]->ne[2] * node->src[0]->ne[1]; + for (size_t row = 0; row < num_rows; ++row) { + // Calculate the source row pointer based on original strides + // The source row pointer is calculated based on the combined index row and the strides nb03, nb02, and nb01. + const char* src0_ptr = (char*)src_tensor.data() + + // Calculates which block of the i03 dimension the current row belongs to + (row / (node->src[0]->ne[2] * node->src[0]->ne[1])) * node->src[0]->nb[3] + // 0 + // Calculates which block of the i02 dimension the current row belongs to within the current i03 block. + ((row / node->src[0]->ne[1]) % node->src[0]->ne[2]) * node->src[0]->nb[2] + // 0, 0,......, 0,384, 384,......, 384,768,......, 2304 + // Calculates the position within the current i02 block in terms of the i01 index. + (row % node->src[0]->ne[1]) * node->src[0]->nb[1]; // 0,2688,......,83328, 0, 2688,......,83328, 0,......, 83328 + + // Destination row pointer is linear + // Since dst is contiguous, its rows are accessed linearly using a single stride rs, simplifying the destination pointer calculation. + char* dst_ptr = (char*)dst_tensor.data() + row * rs; + + // Copy row + std::memcpy(dst_ptr, src0_ptr, rs); + } + + inputs[node->name] = node; + outputs[node->name] = node; + m_input_names.push_back(node->name); + m_output_names.push_back(node->name); + m_continuous = false; + break; + //} + } + case GGML_OP_CPY: + { + if (ggml_is_contiguous(node)) { + inputs[node->src[0]->name] = node->src[0]; + outputs[node->name] = node; + m_input_names.push_back(node->src[0]->name); + m_output_names.push_back(node->name); + m_continuous = true; + break; + } else { + for (int64_t i1 = 0; i1 < node->ne[1]; ++i1) { // ne[1] = 3072 + for (int64_t i0 = 0; i0 < node->ne[0]; ++i0) { // ne[0] = 7 + int64_t src_index = i0 * node->src[0]->nb[0] / sizeof(float) + // stride in nb[0] + i1 * node->src[0]->nb[1] / sizeof(float); // stride in nb[1] + char *dst_ptr = static_cast(node->data) + + i0 * node->nb[0] + i1 * node->nb[1]; + *(ggml_fp16_t *)dst_ptr = GGML_FP32_TO_FP16(((float*)node->src[0]->data)[src_index]); + } + } + // inputs[node->src[0]->name] = node->src[0]; + inputs[node->name] = node; + outputs[node->name] = node; + m_input_names.push_back(node->name); + m_output_names.push_back(node->name); + m_continuous = false; + break; + } + } // For view, input is node itself case GGML_OP_VIEW: { diff --git a/ggml/src/ggml-openvino/ggml-decoder.h b/ggml/src/ggml-openvino/ggml-decoder.h index 2bb2f585f1..2afde161ee 100644 --- a/ggml/src/ggml-openvino/ggml-decoder.h +++ b/ggml/src/ggml-openvino/ggml-decoder.h @@ -62,6 +62,9 @@ public: return m_outputs.at(name); } + virtual bool check_if_continuous() const override { + return m_continuous; + } private: void set_input_output(ggml_tensor* node, std::map& inputs, std::map& outputs); @@ -75,5 +78,6 @@ private: std::vector> m_decoders; const std::string m_op_name; mutable std::string m_name; + bool m_continuous; };