Fix test-backend-ops crash glu, get_rows, scale, rms_norm, add

This commit is contained in:
Yu, Zijun 2026-02-12 17:25:18 +08:00
parent 0d74aba277
commit d5d673cde3
7 changed files with 129 additions and 31 deletions

View File

@ -95,9 +95,6 @@ GgmlOvDecoder::GgmlOvDecoder(ggml_cgraph * cgraph, std::map<std::string, std::sh
m_model_weights = model_weights;
for (int node_n = 0; node_n < cgraph->n_nodes; node_n++) {
auto * cur_node = cgraph->nodes[node_n];
if (cur_node->op == GGML_OP_NONE) {
continue;
}
set_input_output(cur_node, true);
}
for (int node_n = 0; node_n < cgraph->n_nodes; node_n++) {
@ -110,6 +107,9 @@ GgmlOvDecoder::GgmlOvDecoder(ggml_cgraph * cgraph, std::map<std::string, std::sh
std::map<void *, ggml_tensor *> data_addr_map;
std::unordered_set<std::string> output_name_set;
for (const auto & node_info : m_node_info_list) {
if (node_info.node->op == GGML_OP_NONE) {
continue;
}
for (const auto & it : node_info.node_inputs) {
const auto & src_name = it.first;
const auto & src_node = it.second;
@ -164,6 +164,7 @@ void GgmlOvDecoder::set_input_output(ggml_tensor * node, bool naive) {
if (src->flags & GGML_TENSOR_FLAG_INPUT) {
src_name = get_graph_input_ov_name(src, node);
}
m_inputs[src_name] = src;
current_node_info.node_inputs[src_name] = src;
current_node_info.node_inputs_names.push_back(src_name);
@ -193,7 +194,6 @@ void GgmlOvDecoder::set_input_output(ggml_tensor * node, bool naive) {
if (m_model_inputs.find(src_name) != m_model_inputs.end()) {
continue;
}
m_inputs[src_name] = src;
assert(stateful_kv_shape.rank().is_static());
ov::PartialShape param_shape =
(stateful_kv_shape.rank().get_length() != 0) ? stateful_kv_shape : get_graph_input_shape(node, src);
@ -264,7 +264,7 @@ int GgmlOvDecoder::compute_op_case(const ggml_tensor * node) const {
} else {
op_case = 3;
}
} else if (node->src[0]->src[0]->op == GGML_OP_ROPE || node->src[0]->src[0]->src[0]->op == GGML_OP_ROPE) {
} else {
// rope'ed query tensor
op_case = 4;
}
@ -839,6 +839,9 @@ int32_t * GgmlOvDecoder::get_output_op_params(int node_idx) const {
void GgmlOvDecoder::visit_subgraph(std::function<void(std::shared_ptr<GgmlDecoder>, int node_idx)> node_visitor) const {
for (int node_idx = 0; node_idx < m_cgraph->n_nodes; node_idx++) {
if (m_cgraph->nodes[node_idx]->op == GGML_OP_NONE) {
continue;
}
node_visitor(std::make_shared<GgmlOvDecoder>(*this), node_idx);
}
}

View File

@ -113,8 +113,8 @@ struct ggml_backend_openvino_buffer_context {
~ggml_backend_openvino_buffer_context() {
// Clean up all tensor extras
GGML_LOG_DEBUG("Deleting OpenVINO buffer context #%zu for device %d, size %zu MB\n", id, device,
size / 1024 / 1024);
// GGML_LOG_DEBUG("Deleting OpenVINO buffer context #%zu for device %d, size %zu MB\n", id, device,
// size / 1024 / 1024);
for (auto & pair : tensor_extras) {
delete pair.second;
}
@ -454,9 +454,9 @@ static size_t ggml_backend_openvino_buffer_type_get_alloc_size(ggml_backend_buff
if (ggml_is_quantized(tensor->type) && tensor->ne[2] == 1 && tensor->ne[3] == 1) {
ggml_openvino_extracted_layout layout = ggml_openvino_get_extracted_layout(tensor);
if (layout.total_size > 0) {
GGML_LOG_DEBUG("%s: tensor %s needs %zu bytes (original %zu, extracted: weights=%zu scales=%zu zp=%zu)\n",
__func__, tensor->name, layout.total_size, ggml_nbytes(tensor), layout.weights_size,
layout.scales_size, layout.zp_size);
// GGML_LOG_DEBUG("%s: tensor %s needs %zu bytes (original %zu, extracted: weights=%zu scales=%zu zp=%zu)\n",
// __func__, tensor->name, layout.total_size, ggml_nbytes(tensor), layout.weights_size,
// layout.scales_size, layout.zp_size);
return layout.total_size;
}
}
@ -763,8 +763,36 @@ static ggml_backend_buffer_type_t ggml_backend_openvino_device_get_host_buffer_t
return ggml_backend_openvino_host_buffer_type(ctx->device);
}
static bool has_view_input(const ggml_tensor * op) {
for (int i = 0; i < GGML_MAX_SRC; i++) {
if (op->src[i] == nullptr) {
break;
}
if (op->src[i]->op == GGML_OP_VIEW) {
return true;
}
}
return false;
}
static bool is_op_unsupported_case(const ggml_tensor * op) {
switch (op->op) {
case GGML_OP_GET_ROWS:
case GGML_OP_SET_ROWS: {
if (op->ne[3] != 1) {
return true;
}
break;
}
case GGML_OP_ADD:
case GGML_OP_MUL: {
for (int i = 0; i < 4; i++) {
if (op->src[0]->ne[i] != op->src[1]->ne[i] && (op->src[0]->ne[i] != 1 && op->src[1]->ne[i] != 1)) {
return true;
}
}
break;
}
case GGML_OP_SOFT_MAX: {
if (op->src[2] != nullptr) {
GGML_LOG_WARN("OpenVINO backend does not support SOFT_MAX with sinks\n");
@ -876,7 +904,7 @@ static bool ggml_backend_openvino_device_supports_op(ggml_backend_dev_t dev, con
GGML_TYPE_Q5_K, GGML_TYPE_Q8_0, GGML_TYPE_Q6_K};
static const std::set<ggml_op> supported_ops{GGML_OP_NONE, GGML_OP_ADD, GGML_OP_MUL, GGML_OP_MUL_MAT, GGML_OP_VIEW,
GGML_OP_CONT, GGML_OP_RESHAPE, GGML_OP_PERMUTE, GGML_OP_TRANSPOSE,
/*GGML_OP_CONT,*/ GGML_OP_RESHAPE, GGML_OP_PERMUTE, GGML_OP_TRANSPOSE,
GGML_OP_GET_ROWS, GGML_OP_ROPE, GGML_OP_RMS_NORM, GGML_OP_SCALE,
// softmax is not updated due to replaced by flash_attn_ext
// GGML_OP_SOFT_MAX,
@ -896,6 +924,11 @@ static bool ggml_backend_openvino_device_supports_op(ggml_backend_dev_t dev, con
GGML_LOG_WARN("OpenVINO backend does not support unary op %s\n", ggml_unary_op_name(ggml_get_unary_op(op)));
return false;
}
if (has_view_input(op)) {
GGML_LOG_WARN("OpenVINO backend does not support unary op %s with view input\n",
ggml_unary_op_name(ggml_get_unary_op(op)));
return false;
}
break;
}
case GGML_OP_GLU: {
@ -904,6 +937,11 @@ static bool ggml_backend_openvino_device_supports_op(ggml_backend_dev_t dev, con
GGML_LOG_WARN("OpenVINO backend does not support GLU op %s\n", ggml_glu_op_name(ggml_get_glu_op(op)));
return false;
}
if (has_view_input(op)) {
GGML_LOG_WARN("OpenVINO backend does not support unary op %s with view input\n",
ggml_glu_op_name(ggml_get_glu_op(op)));
return false;
}
break;
}
default: {
@ -912,6 +950,14 @@ static bool ggml_backend_openvino_device_supports_op(ggml_backend_dev_t dev, con
GGML_LOG_WARN("OpenVINO backend does not support op %s\n", ggml_op_name(op->op));
return false;
}
static std::set<ggml_op> ops_not_support_view_input{
GGML_OP_GET_ROWS,
GGML_OP_RMS_NORM,
};
if (ops_not_support_view_input.find(op->op) != ops_not_support_view_input.end() && has_view_input(op)) {
GGML_LOG_WARN("OpenVINO backend does not support op %s with view input\n", ggml_op_name(op->op));
return false;
}
}
}

View File

@ -34,9 +34,18 @@ OutputVector translate_get_rows(const NodeContext & context) {
indices =
std::make_shared<ov::op::v0::Squeeze>(indices, ov::op::v0::Constant::create(ov::element::i64, {2}, {0, 1}));
if (data.get_partial_shape().rank() == 4) {
auto axis = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{}, {1});
data = std::make_shared<ov::op::v0::Squeeze>(data, ov::op::v0::Constant::create(ov::element::i64, {1}, {0}));
res = std::make_shared<ov::op::v8::Gather>(data, indices, axis, 1);
if (data.get_partial_shape()[1].get_length() == 1) {
// Work-around for a bug in ov cpu plugin for test-backend-ops
data = std::make_shared<ov::op::v0::Squeeze>(data,
ov::op::v0::Constant::create(ov::element::i64, {2}, {0, 1}));
auto axis = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{}, {0});
res = std::make_shared<ov::op::v8::Gather>(data, indices, axis);
} else {
auto axis = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{}, {1});
data =
std::make_shared<ov::op::v0::Squeeze>(data, ov::op::v0::Constant::create(ov::element::i64, {1}, {0}));
res = std::make_shared<ov::op::v8::Gather>(data, indices, axis, 1);
}
} else if (context.is_stateful() && data.get_partial_shape().rank() == 3) {
auto axis = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{}, {1});
res = std::make_shared<ov::op::v8::Gather>(data, indices, axis, 1);

View File

@ -9,7 +9,6 @@
#include <openvino/op/multiply.hpp>
#include <openvino/op/sigmoid.hpp>
#include <openvino/op/slice.hpp>
#include <openvino/op/split.hpp>
namespace ov {
namespace frontend {
@ -25,11 +24,23 @@ OutputVector translate_glu_geglu(const NodeContext & context) {
src0 = context.get_input(0);
src1 = context.get_input(1);
} else {
// GGML splits along ne[0] (OV last axis) using floor division: nc = ne[0] / 2.
// Both halves are nc elements; if the dimension is odd, the last element is dropped.
// Use Slice instead of Split to handle odd dimensions correctly.
auto combined = context.get_input(0);
auto split_axis = ov::op::v0::Constant::create(ov::element::i64, {}, {-1});
auto split = std::make_shared<ov::op::v1::Split>(combined, split_axis, 2);
src0 = split->output(0);
src1 = split->output(1);
auto combined_shape = combined.get_partial_shape();
int64_t last_dim_val = combined_shape[combined_shape.rank().get_length() - 1].get_length();
int64_t nc = last_dim_val / 2;
auto axis = ov::op::v0::Constant::create(ov::element::i64, {1}, {-1});
auto step = ov::op::v0::Constant::create(ov::element::i64, {1}, {1});
auto start0 = ov::op::v0::Constant::create(ov::element::i64, {1}, {0});
auto stop0 = ov::op::v0::Constant::create(ov::element::i64, {1}, {nc});
auto start1 = ov::op::v0::Constant::create(ov::element::i64, {1}, {nc});
auto stop1 = ov::op::v0::Constant::create(ov::element::i64, {1}, {2 * nc});
src0 = std::make_shared<ov::op::v8::Slice>(combined, start0, stop0, step, axis);
src1 = std::make_shared<ov::op::v8::Slice>(combined, start1, stop1, step, axis);
}
int32_t * params = context.get_output_op_params();

View File

@ -9,7 +9,6 @@
#include <openvino/op/multiply.hpp>
#include <openvino/op/sigmoid.hpp>
#include <openvino/op/slice.hpp>
#include <openvino/op/split.hpp>
namespace ov {
namespace frontend {
@ -25,11 +24,23 @@ OutputVector translate_glu_swiglu(const NodeContext & context) {
src0 = context.get_input(0);
src1 = context.get_input(1);
} else {
// GGML splits along ne[0] (OV last axis) using floor division: nc = ne[0] / 2.
// Both halves are nc elements; if the dimension is odd, the last element is dropped.
// Use Slice instead of Split to handle odd dimensions correctly.
auto combined = context.get_input(0);
auto split_axis = ov::op::v0::Constant::create(ov::element::i64, {}, {-1});
auto split = std::make_shared<ov::op::v1::Split>(combined, split_axis, 2);
src0 = split->output(0);
src1 = split->output(1);
auto combined_shape = combined.get_partial_shape();
int64_t last_dim_val = combined_shape[combined_shape.rank().get_length() - 1].get_length();
int64_t nc = last_dim_val / 2;
auto axis = ov::op::v0::Constant::create(ov::element::i64, {1}, {-1});
auto step = ov::op::v0::Constant::create(ov::element::i64, {1}, {1});
auto start0 = ov::op::v0::Constant::create(ov::element::i64, {1}, {0});
auto stop0 = ov::op::v0::Constant::create(ov::element::i64, {1}, {nc});
auto start1 = ov::op::v0::Constant::create(ov::element::i64, {1}, {nc});
auto stop1 = ov::op::v0::Constant::create(ov::element::i64, {1}, {2 * nc});
src0 = std::make_shared<ov::op::v8::Slice>(combined, start0, stop0, step, axis);
src1 = std::make_shared<ov::op::v8::Slice>(combined, start1, stop1, step, axis);
}
int32_t * params = context.get_output_op_params();

View File

@ -2,6 +2,7 @@
#include "../op_table.hpp"
#include "../utils.hpp"
#include <openvino/op/add.hpp>
#include <openvino/op/constant.hpp>
#include <openvino/op/multiply.hpp>
#include <vector>
@ -15,10 +16,21 @@ OutputVector translate_scale(const NodeContext & context) {
num_inputs_check(context, 1, 1);
float scale;
memcpy(&scale, context.get_output_op_params(), sizeof(float));
auto scale_node = std::make_shared<ov::op::v0::Constant>(ov::element::f32, ov::Shape{}, std::vector<float>{scale});
float bias;
memcpy(&scale, (float *) context.get_output_op_params() + 0, sizeof(float));
memcpy(&bias, (float *) context.get_output_op_params() + 1, sizeof(float));
auto res = std::make_shared<ov::op::v1::Multiply>(context.get_input(0), scale_node);
auto scale_node = std::make_shared<ov::op::v0::Constant>(ov::element::f32, ov::Shape{}, std::vector<float>{scale});
auto scaled = std::make_shared<ov::op::v1::Multiply>(context.get_input(0), scale_node);
std::shared_ptr<ov::Node> res;
if (bias != 0.0f) {
auto bias_node =
std::make_shared<ov::op::v0::Constant>(ov::element::f32, ov::Shape{}, std::vector<float>{bias});
res = std::make_shared<ov::op::v1::Add>(scaled, bias_node);
} else {
res = scaled;
}
return rename_outputs_with_suffix({res}, context.get_name());
}

View File

@ -59,9 +59,9 @@ enum ggml_status ov_graph_compute_dynamic(ggml_cgraph * cgraph, const std::strin
static auto is_static = false;
static size_t stateful_kv_size = 0;
// if (is_naive(cgraph)) {
// return naive_compute(cgraph, core, device, config);
// }
if (is_naive(cgraph)) {
return naive_compute(cgraph, core, device, config);
}
auto start_time = ggml_time_us();
@ -438,7 +438,13 @@ enum ggml_status ov_graph_compute_static(ggml_cgraph * cgraph) {
bool is_naive(ggml_cgraph * cgraph) {
constexpr int naive_graph_size_threshold = 20;
return cgraph->n_nodes < naive_graph_size_threshold;
int count = 0;
for (int i = 0; i < cgraph->n_nodes; i++) {
if (cgraph->nodes[i]->op != GGML_OP_NONE) {
count++;
}
}
return count < naive_graph_size_threshold;
}
enum ggml_status naive_compute(ggml_cgraph * cgraph,