Move CPY from GGML OV Backend to OV Frontend

2025-01-22 15:22:56 +08:00 · 2025-01-22 15:22:56 +08:00 · cb2729bc4a
parent 2b04bd43be
commit cb2729bc4a
4 changed files with 107 additions and 6 deletions
--- a/ggml/src/ggml-openvino.cpp
+++ b/ggml/src/ggml-openvino.cpp
@ -815,9 +815,9 @@ static enum ggml_status ggml_backend_openvino_graph_compute(ggml_backend_t backe
        } else if (std::find(reshape_indices.begin(), reshape_indices.end(), i) != reshape_indices.end()) {
            ggml_backend_openvino_reshape(cgraph->nodes[i]);
        } else if (std::find(view_indices.begin(), view_indices.end(), i) != view_indices.end()) {
-            ggml_backend_openvino_view(cgraph->nodes[i]);
-        } else if (std::find(cpy_indices.begin(), cpy_indices.end(), i) != cpy_indices.end()) {
-            ggml_backend_openvino_cpy(cgraph->nodes[i]);
+             ggml_backend_openvino_view(cgraph->nodes[i]);
+        // } else if (std::find(cpy_indices.begin(), cpy_indices.end(), i) != cpy_indices.end()) {
+        //    ggml_backend_openvino_cpy(cgraph->nodes[i]);
        } else if (std::find(transpose_indices.begin(), transpose_indices.end(), i) != transpose_indices.end()) {
            ggml_backend_openvino_transpose(cgraph->nodes[i]);
        } else if (std::find(permute_indices.begin(), permute_indices.end(), i) != permute_indices.end()) {
@ -829,7 +829,6 @@ static enum ggml_status ggml_backend_openvino_graph_compute(ggml_backend_t backe
            int start_index = i;
            while (i < cgraph->n_nodes &&
                    std::find(cont_indices.begin(), cont_indices.end(), i) == cont_indices.end() &&
-                    std::find(cpy_indices.begin(), cpy_indices.end(), i) == cpy_indices.end() &&
                    std::find(mul_mat_indices.begin(), mul_mat_indices.end(), i) == mul_mat_indices.end()) {
                i++;
            }
--- a/ggml/src/ggml-openvino/decoder.h
+++ b/ggml/src/ggml-openvino/decoder.h
@ -51,6 +51,8 @@ public:

    // virtual size_t output(size_t index) const = 0;

+    virtual bool check_if_continuous() const = 0;
+
 };

 }  // namespace ggml
--- a/ggml/src/ggml-openvino/ggml-decoder.cpp
+++ b/ggml/src/ggml-openvino/ggml-decoder.cpp
@ -1,6 +1,7 @@
 #include "ggml-decoder.h"
 #include <ggml.h>
 #include <ggml-impl.h>
+#include <ggml-cpu-impl.h>

 void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::map<std::string, ggml_tensor *>& inputs, std::map<std::string, ggml_tensor *>& outputs) {
    switch (node->op) {
@ -9,8 +10,6 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::map<std::string, gg
        case GGML_OP_RESHAPE:
        case GGML_OP_TRANSPOSE:
        case GGML_OP_PERMUTE:
-        case GGML_OP_CONT:
-        case GGML_OP_CPY:
        case GGML_OP_RMS_NORM:
        {
            inputs[node->src[0]->name] = node->src[0];
@ -19,6 +18,103 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::map<std::string, gg
            m_output_names.push_back(node->name);
            break;
        }
+        case GGML_OP_CONT:
+        {
+            if (ggml_is_contiguous(node->src[0]) && ggml_is_contiguous(node)) {
+                inputs[node->src[0]->name] = node->src[0];
+                outputs[node->name] = node;
+                m_input_names.push_back(node->src[0]->name);
+                m_output_names.push_back(node->name);
+                m_continuous = true;
+                break;
+            }
+
+            if (node->src[0]->type == node->type && node->src[0]->ne[0] == node->ne[0] &&
+                node->src[0]->nb[0] == ggml_type_size(node->src[0]->type) && node->nb[0] == ggml_type_size(node->src[0]->type)) {
+
+                for (size_t i01 = 0; i01 < node->src[0]->ne[1]; ++i01) {
+                    const char *src_row = reinterpret_cast<const char *>(node->src[0]->data) + i01 * node->src[0]->nb[1];
+                    char *dst_row = reinterpret_cast<char *>(node->data) + i01 * node->nb[1];
+                    std::memcpy(dst_row, src_row, node->src[0]->ne[0]  * ggml_type_size(node->src[0]->type));
+                }
+
+                inputs[node->name] = node;
+                outputs[node->name] = node;
+                m_input_names.push_back(node->name);
+                m_output_names.push_back(node->name);
+                m_continuous = false;
+                break;
+            }
+
+            // if (ggml_is_contiguous(node)) {
+                const size_t rs = node->src[0]->ne[0] * ggml_type_size(node->src[0]->type); // Row size in bytes for dst
+
+                // Create OpenVINO tensors for source and destination
+                // The tensors are reshaped to a 2D structure (num_rows x ne00) for easier iteration and compatibility with the simplified loop.
+                ov::Tensor src_tensor(ov::element::f32,
+                                      ov::Shape{node->src[0]->ne[3] * node->src[0]->ne[2] * node->src[0]->ne[1], node->src[0]->ne[0]},
+                                      node->src[0]->data);
+                ov::Tensor dst_tensor(ov::element::f32,
+                                      ov::Shape{node->src[0]->ne[3] * node->src[0]->ne[2] * node->src[0]->ne[1], node->src[0]->ne[0]}, 
+                                      node->data);
+
+                // Perform the copy in a single loop
+                const size_t num_rows = node->src[0]->ne[3] * node->src[0]->ne[2] * node->src[0]->ne[1];
+                for (size_t row = 0; row < num_rows; ++row) {
+                    // Calculate the source row pointer based on original strides
+                    // The source row pointer is calculated based on the combined index row and the strides nb03, nb02, and nb01.
+                    const char* src0_ptr = (char*)src_tensor.data() +
+                                            // Calculates which block of the i03 dimension the current row belongs to
+                                           (row / (node->src[0]->ne[2] * node->src[0]->ne[1])) * node->src[0]->nb[3] +   // 0
+                                            // Calculates which block of the i02 dimension the current row belongs to within the current i03 block.
+                                           ((row / node->src[0]->ne[1]) % node->src[0]->ne[2]) * node->src[0]->nb[2] +   // 0,   0,......,    0,384,  384,......,  384,768,......, 2304
+                                            // Calculates the position within the current i02 block in terms of the i01 index.
+                                           (row % node->src[0]->ne[1]) * node->src[0]->nb[1];             // 0,2688,......,83328,  0, 2688,......,83328,  0,......, 83328
+
+                // Destination row pointer is linear
+                // Since dst is contiguous, its rows are accessed linearly using a single stride rs, simplifying the destination pointer calculation.
+                char* dst_ptr = (char*)dst_tensor.data() + row * rs;
+
+                // Copy row
+                std::memcpy(dst_ptr, src0_ptr, rs);
+                }
+
+                inputs[node->name] = node;
+                outputs[node->name] = node;
+                m_input_names.push_back(node->name);
+                m_output_names.push_back(node->name);
+                m_continuous = false;
+                break;
+            //}
+        }
+        case GGML_OP_CPY:
+        {
+            if (ggml_is_contiguous(node)) {
+                inputs[node->src[0]->name] = node->src[0];
+                outputs[node->name] = node;
+                m_input_names.push_back(node->src[0]->name);
+                m_output_names.push_back(node->name);
+                m_continuous = true;
+                break;
+            } else {
+                for (int64_t i1 = 0; i1 < node->ne[1]; ++i1) {       // ne[1] = 3072
+                    for (int64_t i0 = 0; i0 < node->ne[0]; ++i0) {   // ne[0] = 7
+                        int64_t src_index = i0 * node->src[0]->nb[0] / sizeof(float) +  // stride in nb[0]
+                                            i1 * node->src[0]->nb[1] / sizeof(float);   // stride in nb[1]
+                        char *dst_ptr = static_cast<char *>(node->data) +
+                                i0 * node->nb[0] + i1 * node->nb[1];
+                        *(ggml_fp16_t *)dst_ptr = GGML_FP32_TO_FP16(((float*)node->src[0]->data)[src_index]);
+                    }
+                }
+                // inputs[node->src[0]->name] = node->src[0];
+                inputs[node->name] = node;
+                outputs[node->name] = node;
+                m_input_names.push_back(node->name);
+                m_output_names.push_back(node->name);
+                m_continuous = false;
+                break;
+            }
+        }
        // For view, input is node itself
        case GGML_OP_VIEW:
        {
--- a/ggml/src/ggml-openvino/ggml-decoder.h
+++ b/ggml/src/ggml-openvino/ggml-decoder.h
@ -62,6 +62,9 @@ public:
        return m_outputs.at(name);
    }

+    virtual bool check_if_continuous() const override {
+        return m_continuous;
+    }
 private:
    void set_input_output(ggml_tensor* node, std::map<std::string, ggml_tensor *>& inputs, std::map<std::string, ggml_tensor *>& outputs);

@ -75,5 +78,6 @@ private:
    std::vector<std::shared_ptr<GgmlOvDecoder>> m_decoders;
    const std::string m_op_name;
    mutable std::string m_name;
+    bool m_continuous;
 };