Fix test-backend-ops crash glu, get_rows, scale, rms_norm, add

2026-02-12 17:25:18 +08:00 · 2026-02-12 17:25:18 +08:00 · d5d673cde3
parent 0d74aba277
commit d5d673cde3
7 changed files with 129 additions and 31 deletions
--- a/ggml/src/ggml-openvino/ggml-decoder.cpp
+++ b/ggml/src/ggml-openvino/ggml-decoder.cpp
@ -95,9 +95,6 @@ GgmlOvDecoder::GgmlOvDecoder(ggml_cgraph * cgraph, std::map<std::string, std::sh
    m_model_weights = model_weights;
    for (int node_n = 0; node_n < cgraph->n_nodes; node_n++) {
        auto * cur_node = cgraph->nodes[node_n];
-        if (cur_node->op == GGML_OP_NONE) {
-            continue;
-        }
        set_input_output(cur_node, true);
    }
    for (int node_n = 0; node_n < cgraph->n_nodes; node_n++) {
@ -110,6 +107,9 @@ GgmlOvDecoder::GgmlOvDecoder(ggml_cgraph * cgraph, std::map<std::string, std::sh
    std::map<void *, ggml_tensor *> data_addr_map;
    std::unordered_set<std::string> output_name_set;
    for (const auto & node_info : m_node_info_list) {
+        if (node_info.node->op == GGML_OP_NONE) {
+            continue;
+        }
        for (const auto & it : node_info.node_inputs) {
            const auto & src_name = it.first;
            const auto & src_node = it.second;
@ -164,6 +164,7 @@ void GgmlOvDecoder::set_input_output(ggml_tensor * node, bool naive) {
        if (src->flags & GGML_TENSOR_FLAG_INPUT) {
            src_name = get_graph_input_ov_name(src, node);
        }
+        m_inputs[src_name] = src;
        current_node_info.node_inputs[src_name] = src;
        current_node_info.node_inputs_names.push_back(src_name);

@ -193,7 +194,6 @@ void GgmlOvDecoder::set_input_output(ggml_tensor * node, bool naive) {
                if (m_model_inputs.find(src_name) != m_model_inputs.end()) {
                    continue;
                }
-                m_inputs[src_name] = src;
                assert(stateful_kv_shape.rank().is_static());
                ov::PartialShape param_shape =
                    (stateful_kv_shape.rank().get_length() != 0) ? stateful_kv_shape : get_graph_input_shape(node, src);
@ -264,7 +264,7 @@ int GgmlOvDecoder::compute_op_case(const ggml_tensor * node) const {
            } else {
                op_case = 3;
            }
-        } else if (node->src[0]->src[0]->op == GGML_OP_ROPE || node->src[0]->src[0]->src[0]->op == GGML_OP_ROPE) {
+        } else {
            // rope'ed query tensor
            op_case = 4;
        }
@ -839,6 +839,9 @@ int32_t * GgmlOvDecoder::get_output_op_params(int node_idx) const {

 void GgmlOvDecoder::visit_subgraph(std::function<void(std::shared_ptr<GgmlDecoder>, int node_idx)> node_visitor) const {
    for (int node_idx = 0; node_idx < m_cgraph->n_nodes; node_idx++) {
+        if (m_cgraph->nodes[node_idx]->op == GGML_OP_NONE) {
+            continue;
+        }
        node_visitor(std::make_shared<GgmlOvDecoder>(*this), node_idx);
    }
 }
--- a/ggml/src/ggml-openvino/ggml-openvino.cpp
+++ b/ggml/src/ggml-openvino/ggml-openvino.cpp
@ -113,8 +113,8 @@ struct ggml_backend_openvino_buffer_context {

    ~ggml_backend_openvino_buffer_context() {
        // Clean up all tensor extras
-        GGML_LOG_DEBUG("Deleting OpenVINO buffer context #%zu for device %d, size %zu MB\n", id, device,
-                       size / 1024 / 1024);
+        // GGML_LOG_DEBUG("Deleting OpenVINO buffer context #%zu for device %d, size %zu MB\n", id, device,
+        //                size / 1024 / 1024);
        for (auto & pair : tensor_extras) {
            delete pair.second;
        }
@ -454,9 +454,9 @@ static size_t ggml_backend_openvino_buffer_type_get_alloc_size(ggml_backend_buff
    if (ggml_is_quantized(tensor->type) && tensor->ne[2] == 1 && tensor->ne[3] == 1) {
        ggml_openvino_extracted_layout layout = ggml_openvino_get_extracted_layout(tensor);
        if (layout.total_size > 0) {
-            GGML_LOG_DEBUG("%s: tensor %s needs %zu bytes (original %zu, extracted: weights=%zu scales=%zu zp=%zu)\n",
-                           __func__, tensor->name, layout.total_size, ggml_nbytes(tensor), layout.weights_size,
-                           layout.scales_size, layout.zp_size);
+            // GGML_LOG_DEBUG("%s: tensor %s needs %zu bytes (original %zu, extracted: weights=%zu scales=%zu zp=%zu)\n",
+            //                __func__, tensor->name, layout.total_size, ggml_nbytes(tensor), layout.weights_size,
+            //                layout.scales_size, layout.zp_size);
            return layout.total_size;
        }
    }
@ -763,8 +763,36 @@ static ggml_backend_buffer_type_t ggml_backend_openvino_device_get_host_buffer_t
    return ggml_backend_openvino_host_buffer_type(ctx->device);
 }

+static bool has_view_input(const ggml_tensor * op) {
+    for (int i = 0; i < GGML_MAX_SRC; i++) {
+        if (op->src[i] == nullptr) {
+            break;
+        }
+        if (op->src[i]->op == GGML_OP_VIEW) {
+            return true;
+        }
+    }
+    return false;
+}
+
 static bool is_op_unsupported_case(const ggml_tensor * op) {
    switch (op->op) {
+    case GGML_OP_GET_ROWS:
+    case GGML_OP_SET_ROWS: {
+        if (op->ne[3] != 1) {
+            return true;
+        }
+        break;
+    }
+    case GGML_OP_ADD:
+    case GGML_OP_MUL: {
+        for (int i = 0; i < 4; i++) {
+            if (op->src[0]->ne[i] != op->src[1]->ne[i] && (op->src[0]->ne[i] != 1 && op->src[1]->ne[i] != 1)) {
+                return true;
+            }
+        }
+        break;
+    }
    case GGML_OP_SOFT_MAX: {
        if (op->src[2] != nullptr) {
            GGML_LOG_WARN("OpenVINO backend does not support SOFT_MAX with sinks\n");
@ -876,7 +904,7 @@ static bool ggml_backend_openvino_device_supports_op(ggml_backend_dev_t dev, con
                                               GGML_TYPE_Q5_K, GGML_TYPE_Q8_0, GGML_TYPE_Q6_K};

    static const std::set<ggml_op> supported_ops{GGML_OP_NONE, GGML_OP_ADD, GGML_OP_MUL, GGML_OP_MUL_MAT, GGML_OP_VIEW,
-                                                 GGML_OP_CONT, GGML_OP_RESHAPE, GGML_OP_PERMUTE, GGML_OP_TRANSPOSE,
+                                                 /*GGML_OP_CONT,*/ GGML_OP_RESHAPE, GGML_OP_PERMUTE, GGML_OP_TRANSPOSE,
                                                 GGML_OP_GET_ROWS, GGML_OP_ROPE, GGML_OP_RMS_NORM, GGML_OP_SCALE,
                                                 // softmax is not updated due to replaced by flash_attn_ext
                                                 // GGML_OP_SOFT_MAX,
@ -896,6 +924,11 @@ static bool ggml_backend_openvino_device_supports_op(ggml_backend_dev_t dev, con
            GGML_LOG_WARN("OpenVINO backend does not support unary op %s\n", ggml_unary_op_name(ggml_get_unary_op(op)));
            return false;
        }
+        if (has_view_input(op)) {
+            GGML_LOG_WARN("OpenVINO backend does not support unary op %s with view input\n",
+                          ggml_unary_op_name(ggml_get_unary_op(op)));
+            return false;
+        }
        break;
    }
    case GGML_OP_GLU: {
@ -904,6 +937,11 @@ static bool ggml_backend_openvino_device_supports_op(ggml_backend_dev_t dev, con
            GGML_LOG_WARN("OpenVINO backend does not support GLU op %s\n", ggml_glu_op_name(ggml_get_glu_op(op)));
            return false;
        }
+        if (has_view_input(op)) {
+            GGML_LOG_WARN("OpenVINO backend does not support unary op %s with view input\n",
+                          ggml_glu_op_name(ggml_get_glu_op(op)));
+            return false;
+        }
        break;
    }
    default: {
@ -912,6 +950,14 @@ static bool ggml_backend_openvino_device_supports_op(ggml_backend_dev_t dev, con
            GGML_LOG_WARN("OpenVINO backend does not support op %s\n", ggml_op_name(op->op));
            return false;
        }
+        static std::set<ggml_op> ops_not_support_view_input{
+            GGML_OP_GET_ROWS,
+            GGML_OP_RMS_NORM,
+        };
+        if (ops_not_support_view_input.find(op->op) != ops_not_support_view_input.end() && has_view_input(op)) {
+            GGML_LOG_WARN("OpenVINO backend does not support op %s with view input\n", ggml_op_name(op->op));
+            return false;
+        }
    }
    }

--- a/ggml/src/ggml-openvino/openvino/op/get_rows.cpp
+++ b/ggml/src/ggml-openvino/openvino/op/get_rows.cpp
@ -34,9 +34,18 @@ OutputVector translate_get_rows(const NodeContext & context) {
    indices =
        std::make_shared<ov::op::v0::Squeeze>(indices, ov::op::v0::Constant::create(ov::element::i64, {2}, {0, 1}));
    if (data.get_partial_shape().rank() == 4) {
-        auto axis = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{}, {1});
-        data = std::make_shared<ov::op::v0::Squeeze>(data, ov::op::v0::Constant::create(ov::element::i64, {1}, {0}));
-        res = std::make_shared<ov::op::v8::Gather>(data, indices, axis, 1);
+        if (data.get_partial_shape()[1].get_length() == 1) {
+            // Work-around for a bug in ov cpu plugin for test-backend-ops
+            data = std::make_shared<ov::op::v0::Squeeze>(data,
+                                                         ov::op::v0::Constant::create(ov::element::i64, {2}, {0, 1}));
+            auto axis = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{}, {0});
+            res = std::make_shared<ov::op::v8::Gather>(data, indices, axis);
+        } else {
+            auto axis = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{}, {1});
+            data =
+                std::make_shared<ov::op::v0::Squeeze>(data, ov::op::v0::Constant::create(ov::element::i64, {1}, {0}));
+            res = std::make_shared<ov::op::v8::Gather>(data, indices, axis, 1);
+        }
    } else if (context.is_stateful() && data.get_partial_shape().rank() == 3) {
        auto axis = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{}, {1});
        res = std::make_shared<ov::op::v8::Gather>(data, indices, axis, 1);
--- a/ggml/src/ggml-openvino/openvino/op/glu_geglu.cpp
+++ b/ggml/src/ggml-openvino/openvino/op/glu_geglu.cpp
@ -9,7 +9,6 @@
 #include <openvino/op/multiply.hpp>
 #include <openvino/op/sigmoid.hpp>
 #include <openvino/op/slice.hpp>
-#include <openvino/op/split.hpp>

 namespace ov {
 namespace frontend {
@ -25,11 +24,23 @@ OutputVector translate_glu_geglu(const NodeContext & context) {
        src0 = context.get_input(0);
        src1 = context.get_input(1);
    } else {
+        // GGML splits along ne[0] (OV last axis) using floor division: nc = ne[0] / 2.
+        // Both halves are nc elements; if the dimension is odd, the last element is dropped.
+        // Use Slice instead of Split to handle odd dimensions correctly.
        auto combined = context.get_input(0);
-        auto split_axis = ov::op::v0::Constant::create(ov::element::i64, {}, {-1});
-        auto split = std::make_shared<ov::op::v1::Split>(combined, split_axis, 2);
-        src0 = split->output(0);
-        src1 = split->output(1);
+        auto combined_shape = combined.get_partial_shape();
+        int64_t last_dim_val = combined_shape[combined_shape.rank().get_length() - 1].get_length();
+        int64_t nc = last_dim_val / 2;
+
+        auto axis   = ov::op::v0::Constant::create(ov::element::i64, {1}, {-1});
+        auto step   = ov::op::v0::Constant::create(ov::element::i64, {1}, {1});
+        auto start0 = ov::op::v0::Constant::create(ov::element::i64, {1}, {0});
+        auto stop0  = ov::op::v0::Constant::create(ov::element::i64, {1}, {nc});
+        auto start1 = ov::op::v0::Constant::create(ov::element::i64, {1}, {nc});
+        auto stop1  = ov::op::v0::Constant::create(ov::element::i64, {1}, {2 * nc});
+
+        src0 = std::make_shared<ov::op::v8::Slice>(combined, start0, stop0, step, axis);
+        src1 = std::make_shared<ov::op::v8::Slice>(combined, start1, stop1, step, axis);
    }

    int32_t * params = context.get_output_op_params();
--- a/ggml/src/ggml-openvino/openvino/op/glu_swiglu.cpp
+++ b/ggml/src/ggml-openvino/openvino/op/glu_swiglu.cpp
@ -9,7 +9,6 @@
 #include <openvino/op/multiply.hpp>
 #include <openvino/op/sigmoid.hpp>
 #include <openvino/op/slice.hpp>
-#include <openvino/op/split.hpp>

 namespace ov {
 namespace frontend {
@ -25,11 +24,23 @@ OutputVector translate_glu_swiglu(const NodeContext & context) {
        src0 = context.get_input(0);
        src1 = context.get_input(1);
    } else {
+        // GGML splits along ne[0] (OV last axis) using floor division: nc = ne[0] / 2.
+        // Both halves are nc elements; if the dimension is odd, the last element is dropped.
+        // Use Slice instead of Split to handle odd dimensions correctly.
        auto combined = context.get_input(0);
-        auto split_axis = ov::op::v0::Constant::create(ov::element::i64, {}, {-1});
-        auto split = std::make_shared<ov::op::v1::Split>(combined, split_axis, 2);
-        src0 = split->output(0);
-        src1 = split->output(1);
+        auto combined_shape = combined.get_partial_shape();
+        int64_t last_dim_val = combined_shape[combined_shape.rank().get_length() - 1].get_length();
+        int64_t nc = last_dim_val / 2;
+
+        auto axis   = ov::op::v0::Constant::create(ov::element::i64, {1}, {-1});
+        auto step   = ov::op::v0::Constant::create(ov::element::i64, {1}, {1});
+        auto start0 = ov::op::v0::Constant::create(ov::element::i64, {1}, {0});
+        auto stop0  = ov::op::v0::Constant::create(ov::element::i64, {1}, {nc});
+        auto start1 = ov::op::v0::Constant::create(ov::element::i64, {1}, {nc});
+        auto stop1  = ov::op::v0::Constant::create(ov::element::i64, {1}, {2 * nc});
+
+        src0 = std::make_shared<ov::op::v8::Slice>(combined, start0, stop0, step, axis);
+        src1 = std::make_shared<ov::op::v8::Slice>(combined, start1, stop1, step, axis);
    }

    int32_t * params = context.get_output_op_params();
--- a/ggml/src/ggml-openvino/openvino/op/scale.cpp
+++ b/ggml/src/ggml-openvino/openvino/op/scale.cpp
@ -2,6 +2,7 @@
 #include "../op_table.hpp"
 #include "../utils.hpp"

+#include <openvino/op/add.hpp>
 #include <openvino/op/constant.hpp>
 #include <openvino/op/multiply.hpp>
 #include <vector>
@ -15,10 +16,21 @@ OutputVector translate_scale(const NodeContext & context) {
    num_inputs_check(context, 1, 1);

    float scale;
-    memcpy(&scale, context.get_output_op_params(), sizeof(float));
-    auto scale_node = std::make_shared<ov::op::v0::Constant>(ov::element::f32, ov::Shape{}, std::vector<float>{scale});
+    float bias;
+    memcpy(&scale, (float *) context.get_output_op_params() + 0, sizeof(float));
+    memcpy(&bias, (float *) context.get_output_op_params() + 1, sizeof(float));

-    auto res = std::make_shared<ov::op::v1::Multiply>(context.get_input(0), scale_node);
+    auto scale_node = std::make_shared<ov::op::v0::Constant>(ov::element::f32, ov::Shape{}, std::vector<float>{scale});
+    auto scaled = std::make_shared<ov::op::v1::Multiply>(context.get_input(0), scale_node);
+
+    std::shared_ptr<ov::Node> res;
+    if (bias != 0.0f) {
+        auto bias_node =
+            std::make_shared<ov::op::v0::Constant>(ov::element::f32, ov::Shape{}, std::vector<float>{bias});
+        res = std::make_shared<ov::op::v1::Add>(scaled, bias_node);
+    } else {
+        res = scaled;
+    }

    return rename_outputs_with_suffix({res}, context.get_name());
 }
--- a/ggml/src/ggml-openvino/utils.cpp
+++ b/ggml/src/ggml-openvino/utils.cpp
@ -59,9 +59,9 @@ enum ggml_status ov_graph_compute_dynamic(ggml_cgraph * cgraph, const std::strin
    static auto is_static = false;
    static size_t stateful_kv_size = 0;

-    // if (is_naive(cgraph)) {
-    //     return naive_compute(cgraph, core, device, config);
-    // }
+    if (is_naive(cgraph)) {
+        return naive_compute(cgraph, core, device, config);
+    }

    auto start_time = ggml_time_us();

@ -438,7 +438,13 @@ enum ggml_status ov_graph_compute_static(ggml_cgraph * cgraph) {

 bool is_naive(ggml_cgraph * cgraph) {
    constexpr int naive_graph_size_threshold = 20;
-    return cgraph->n_nodes < naive_graph_size_threshold;
+    int count = 0;
+    for (int i = 0; i < cgraph->n_nodes; i++) {
+        if (cgraph->nodes[i]->op != GGML_OP_NONE) {
+            count++;
+        }
+    }
+    return count < naive_graph_size_threshold;
 }

 enum ggml_status naive_compute(ggml_cgraph * cgraph,