Fix test-backend-ops crash glu, get_rows, scale, rms_norm, add
This commit is contained in:
parent
0d74aba277
commit
d5d673cde3
|
|
@ -95,9 +95,6 @@ GgmlOvDecoder::GgmlOvDecoder(ggml_cgraph * cgraph, std::map<std::string, std::sh
|
|||
m_model_weights = model_weights;
|
||||
for (int node_n = 0; node_n < cgraph->n_nodes; node_n++) {
|
||||
auto * cur_node = cgraph->nodes[node_n];
|
||||
if (cur_node->op == GGML_OP_NONE) {
|
||||
continue;
|
||||
}
|
||||
set_input_output(cur_node, true);
|
||||
}
|
||||
for (int node_n = 0; node_n < cgraph->n_nodes; node_n++) {
|
||||
|
|
@ -110,6 +107,9 @@ GgmlOvDecoder::GgmlOvDecoder(ggml_cgraph * cgraph, std::map<std::string, std::sh
|
|||
std::map<void *, ggml_tensor *> data_addr_map;
|
||||
std::unordered_set<std::string> output_name_set;
|
||||
for (const auto & node_info : m_node_info_list) {
|
||||
if (node_info.node->op == GGML_OP_NONE) {
|
||||
continue;
|
||||
}
|
||||
for (const auto & it : node_info.node_inputs) {
|
||||
const auto & src_name = it.first;
|
||||
const auto & src_node = it.second;
|
||||
|
|
@ -164,6 +164,7 @@ void GgmlOvDecoder::set_input_output(ggml_tensor * node, bool naive) {
|
|||
if (src->flags & GGML_TENSOR_FLAG_INPUT) {
|
||||
src_name = get_graph_input_ov_name(src, node);
|
||||
}
|
||||
m_inputs[src_name] = src;
|
||||
current_node_info.node_inputs[src_name] = src;
|
||||
current_node_info.node_inputs_names.push_back(src_name);
|
||||
|
||||
|
|
@ -193,7 +194,6 @@ void GgmlOvDecoder::set_input_output(ggml_tensor * node, bool naive) {
|
|||
if (m_model_inputs.find(src_name) != m_model_inputs.end()) {
|
||||
continue;
|
||||
}
|
||||
m_inputs[src_name] = src;
|
||||
assert(stateful_kv_shape.rank().is_static());
|
||||
ov::PartialShape param_shape =
|
||||
(stateful_kv_shape.rank().get_length() != 0) ? stateful_kv_shape : get_graph_input_shape(node, src);
|
||||
|
|
@ -264,7 +264,7 @@ int GgmlOvDecoder::compute_op_case(const ggml_tensor * node) const {
|
|||
} else {
|
||||
op_case = 3;
|
||||
}
|
||||
} else if (node->src[0]->src[0]->op == GGML_OP_ROPE || node->src[0]->src[0]->src[0]->op == GGML_OP_ROPE) {
|
||||
} else {
|
||||
// rope'ed query tensor
|
||||
op_case = 4;
|
||||
}
|
||||
|
|
@ -839,6 +839,9 @@ int32_t * GgmlOvDecoder::get_output_op_params(int node_idx) const {
|
|||
|
||||
void GgmlOvDecoder::visit_subgraph(std::function<void(std::shared_ptr<GgmlDecoder>, int node_idx)> node_visitor) const {
|
||||
for (int node_idx = 0; node_idx < m_cgraph->n_nodes; node_idx++) {
|
||||
if (m_cgraph->nodes[node_idx]->op == GGML_OP_NONE) {
|
||||
continue;
|
||||
}
|
||||
node_visitor(std::make_shared<GgmlOvDecoder>(*this), node_idx);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -113,8 +113,8 @@ struct ggml_backend_openvino_buffer_context {
|
|||
|
||||
~ggml_backend_openvino_buffer_context() {
|
||||
// Clean up all tensor extras
|
||||
GGML_LOG_DEBUG("Deleting OpenVINO buffer context #%zu for device %d, size %zu MB\n", id, device,
|
||||
size / 1024 / 1024);
|
||||
// GGML_LOG_DEBUG("Deleting OpenVINO buffer context #%zu for device %d, size %zu MB\n", id, device,
|
||||
// size / 1024 / 1024);
|
||||
for (auto & pair : tensor_extras) {
|
||||
delete pair.second;
|
||||
}
|
||||
|
|
@ -454,9 +454,9 @@ static size_t ggml_backend_openvino_buffer_type_get_alloc_size(ggml_backend_buff
|
|||
if (ggml_is_quantized(tensor->type) && tensor->ne[2] == 1 && tensor->ne[3] == 1) {
|
||||
ggml_openvino_extracted_layout layout = ggml_openvino_get_extracted_layout(tensor);
|
||||
if (layout.total_size > 0) {
|
||||
GGML_LOG_DEBUG("%s: tensor %s needs %zu bytes (original %zu, extracted: weights=%zu scales=%zu zp=%zu)\n",
|
||||
__func__, tensor->name, layout.total_size, ggml_nbytes(tensor), layout.weights_size,
|
||||
layout.scales_size, layout.zp_size);
|
||||
// GGML_LOG_DEBUG("%s: tensor %s needs %zu bytes (original %zu, extracted: weights=%zu scales=%zu zp=%zu)\n",
|
||||
// __func__, tensor->name, layout.total_size, ggml_nbytes(tensor), layout.weights_size,
|
||||
// layout.scales_size, layout.zp_size);
|
||||
return layout.total_size;
|
||||
}
|
||||
}
|
||||
|
|
@ -763,8 +763,36 @@ static ggml_backend_buffer_type_t ggml_backend_openvino_device_get_host_buffer_t
|
|||
return ggml_backend_openvino_host_buffer_type(ctx->device);
|
||||
}
|
||||
|
||||
static bool has_view_input(const ggml_tensor * op) {
|
||||
for (int i = 0; i < GGML_MAX_SRC; i++) {
|
||||
if (op->src[i] == nullptr) {
|
||||
break;
|
||||
}
|
||||
if (op->src[i]->op == GGML_OP_VIEW) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool is_op_unsupported_case(const ggml_tensor * op) {
|
||||
switch (op->op) {
|
||||
case GGML_OP_GET_ROWS:
|
||||
case GGML_OP_SET_ROWS: {
|
||||
if (op->ne[3] != 1) {
|
||||
return true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case GGML_OP_ADD:
|
||||
case GGML_OP_MUL: {
|
||||
for (int i = 0; i < 4; i++) {
|
||||
if (op->src[0]->ne[i] != op->src[1]->ne[i] && (op->src[0]->ne[i] != 1 && op->src[1]->ne[i] != 1)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case GGML_OP_SOFT_MAX: {
|
||||
if (op->src[2] != nullptr) {
|
||||
GGML_LOG_WARN("OpenVINO backend does not support SOFT_MAX with sinks\n");
|
||||
|
|
@ -876,7 +904,7 @@ static bool ggml_backend_openvino_device_supports_op(ggml_backend_dev_t dev, con
|
|||
GGML_TYPE_Q5_K, GGML_TYPE_Q8_0, GGML_TYPE_Q6_K};
|
||||
|
||||
static const std::set<ggml_op> supported_ops{GGML_OP_NONE, GGML_OP_ADD, GGML_OP_MUL, GGML_OP_MUL_MAT, GGML_OP_VIEW,
|
||||
GGML_OP_CONT, GGML_OP_RESHAPE, GGML_OP_PERMUTE, GGML_OP_TRANSPOSE,
|
||||
/*GGML_OP_CONT,*/ GGML_OP_RESHAPE, GGML_OP_PERMUTE, GGML_OP_TRANSPOSE,
|
||||
GGML_OP_GET_ROWS, GGML_OP_ROPE, GGML_OP_RMS_NORM, GGML_OP_SCALE,
|
||||
// softmax is not updated due to replaced by flash_attn_ext
|
||||
// GGML_OP_SOFT_MAX,
|
||||
|
|
@ -896,6 +924,11 @@ static bool ggml_backend_openvino_device_supports_op(ggml_backend_dev_t dev, con
|
|||
GGML_LOG_WARN("OpenVINO backend does not support unary op %s\n", ggml_unary_op_name(ggml_get_unary_op(op)));
|
||||
return false;
|
||||
}
|
||||
if (has_view_input(op)) {
|
||||
GGML_LOG_WARN("OpenVINO backend does not support unary op %s with view input\n",
|
||||
ggml_unary_op_name(ggml_get_unary_op(op)));
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case GGML_OP_GLU: {
|
||||
|
|
@ -904,6 +937,11 @@ static bool ggml_backend_openvino_device_supports_op(ggml_backend_dev_t dev, con
|
|||
GGML_LOG_WARN("OpenVINO backend does not support GLU op %s\n", ggml_glu_op_name(ggml_get_glu_op(op)));
|
||||
return false;
|
||||
}
|
||||
if (has_view_input(op)) {
|
||||
GGML_LOG_WARN("OpenVINO backend does not support unary op %s with view input\n",
|
||||
ggml_glu_op_name(ggml_get_glu_op(op)));
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
|
|
@ -912,6 +950,14 @@ static bool ggml_backend_openvino_device_supports_op(ggml_backend_dev_t dev, con
|
|||
GGML_LOG_WARN("OpenVINO backend does not support op %s\n", ggml_op_name(op->op));
|
||||
return false;
|
||||
}
|
||||
static std::set<ggml_op> ops_not_support_view_input{
|
||||
GGML_OP_GET_ROWS,
|
||||
GGML_OP_RMS_NORM,
|
||||
};
|
||||
if (ops_not_support_view_input.find(op->op) != ops_not_support_view_input.end() && has_view_input(op)) {
|
||||
GGML_LOG_WARN("OpenVINO backend does not support op %s with view input\n", ggml_op_name(op->op));
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -34,9 +34,18 @@ OutputVector translate_get_rows(const NodeContext & context) {
|
|||
indices =
|
||||
std::make_shared<ov::op::v0::Squeeze>(indices, ov::op::v0::Constant::create(ov::element::i64, {2}, {0, 1}));
|
||||
if (data.get_partial_shape().rank() == 4) {
|
||||
auto axis = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{}, {1});
|
||||
data = std::make_shared<ov::op::v0::Squeeze>(data, ov::op::v0::Constant::create(ov::element::i64, {1}, {0}));
|
||||
res = std::make_shared<ov::op::v8::Gather>(data, indices, axis, 1);
|
||||
if (data.get_partial_shape()[1].get_length() == 1) {
|
||||
// Work-around for a bug in ov cpu plugin for test-backend-ops
|
||||
data = std::make_shared<ov::op::v0::Squeeze>(data,
|
||||
ov::op::v0::Constant::create(ov::element::i64, {2}, {0, 1}));
|
||||
auto axis = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{}, {0});
|
||||
res = std::make_shared<ov::op::v8::Gather>(data, indices, axis);
|
||||
} else {
|
||||
auto axis = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{}, {1});
|
||||
data =
|
||||
std::make_shared<ov::op::v0::Squeeze>(data, ov::op::v0::Constant::create(ov::element::i64, {1}, {0}));
|
||||
res = std::make_shared<ov::op::v8::Gather>(data, indices, axis, 1);
|
||||
}
|
||||
} else if (context.is_stateful() && data.get_partial_shape().rank() == 3) {
|
||||
auto axis = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{}, {1});
|
||||
res = std::make_shared<ov::op::v8::Gather>(data, indices, axis, 1);
|
||||
|
|
|
|||
|
|
@ -9,7 +9,6 @@
|
|||
#include <openvino/op/multiply.hpp>
|
||||
#include <openvino/op/sigmoid.hpp>
|
||||
#include <openvino/op/slice.hpp>
|
||||
#include <openvino/op/split.hpp>
|
||||
|
||||
namespace ov {
|
||||
namespace frontend {
|
||||
|
|
@ -25,11 +24,23 @@ OutputVector translate_glu_geglu(const NodeContext & context) {
|
|||
src0 = context.get_input(0);
|
||||
src1 = context.get_input(1);
|
||||
} else {
|
||||
// GGML splits along ne[0] (OV last axis) using floor division: nc = ne[0] / 2.
|
||||
// Both halves are nc elements; if the dimension is odd, the last element is dropped.
|
||||
// Use Slice instead of Split to handle odd dimensions correctly.
|
||||
auto combined = context.get_input(0);
|
||||
auto split_axis = ov::op::v0::Constant::create(ov::element::i64, {}, {-1});
|
||||
auto split = std::make_shared<ov::op::v1::Split>(combined, split_axis, 2);
|
||||
src0 = split->output(0);
|
||||
src1 = split->output(1);
|
||||
auto combined_shape = combined.get_partial_shape();
|
||||
int64_t last_dim_val = combined_shape[combined_shape.rank().get_length() - 1].get_length();
|
||||
int64_t nc = last_dim_val / 2;
|
||||
|
||||
auto axis = ov::op::v0::Constant::create(ov::element::i64, {1}, {-1});
|
||||
auto step = ov::op::v0::Constant::create(ov::element::i64, {1}, {1});
|
||||
auto start0 = ov::op::v0::Constant::create(ov::element::i64, {1}, {0});
|
||||
auto stop0 = ov::op::v0::Constant::create(ov::element::i64, {1}, {nc});
|
||||
auto start1 = ov::op::v0::Constant::create(ov::element::i64, {1}, {nc});
|
||||
auto stop1 = ov::op::v0::Constant::create(ov::element::i64, {1}, {2 * nc});
|
||||
|
||||
src0 = std::make_shared<ov::op::v8::Slice>(combined, start0, stop0, step, axis);
|
||||
src1 = std::make_shared<ov::op::v8::Slice>(combined, start1, stop1, step, axis);
|
||||
}
|
||||
|
||||
int32_t * params = context.get_output_op_params();
|
||||
|
|
|
|||
|
|
@ -9,7 +9,6 @@
|
|||
#include <openvino/op/multiply.hpp>
|
||||
#include <openvino/op/sigmoid.hpp>
|
||||
#include <openvino/op/slice.hpp>
|
||||
#include <openvino/op/split.hpp>
|
||||
|
||||
namespace ov {
|
||||
namespace frontend {
|
||||
|
|
@ -25,11 +24,23 @@ OutputVector translate_glu_swiglu(const NodeContext & context) {
|
|||
src0 = context.get_input(0);
|
||||
src1 = context.get_input(1);
|
||||
} else {
|
||||
// GGML splits along ne[0] (OV last axis) using floor division: nc = ne[0] / 2.
|
||||
// Both halves are nc elements; if the dimension is odd, the last element is dropped.
|
||||
// Use Slice instead of Split to handle odd dimensions correctly.
|
||||
auto combined = context.get_input(0);
|
||||
auto split_axis = ov::op::v0::Constant::create(ov::element::i64, {}, {-1});
|
||||
auto split = std::make_shared<ov::op::v1::Split>(combined, split_axis, 2);
|
||||
src0 = split->output(0);
|
||||
src1 = split->output(1);
|
||||
auto combined_shape = combined.get_partial_shape();
|
||||
int64_t last_dim_val = combined_shape[combined_shape.rank().get_length() - 1].get_length();
|
||||
int64_t nc = last_dim_val / 2;
|
||||
|
||||
auto axis = ov::op::v0::Constant::create(ov::element::i64, {1}, {-1});
|
||||
auto step = ov::op::v0::Constant::create(ov::element::i64, {1}, {1});
|
||||
auto start0 = ov::op::v0::Constant::create(ov::element::i64, {1}, {0});
|
||||
auto stop0 = ov::op::v0::Constant::create(ov::element::i64, {1}, {nc});
|
||||
auto start1 = ov::op::v0::Constant::create(ov::element::i64, {1}, {nc});
|
||||
auto stop1 = ov::op::v0::Constant::create(ov::element::i64, {1}, {2 * nc});
|
||||
|
||||
src0 = std::make_shared<ov::op::v8::Slice>(combined, start0, stop0, step, axis);
|
||||
src1 = std::make_shared<ov::op::v8::Slice>(combined, start1, stop1, step, axis);
|
||||
}
|
||||
|
||||
int32_t * params = context.get_output_op_params();
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@
|
|||
#include "../op_table.hpp"
|
||||
#include "../utils.hpp"
|
||||
|
||||
#include <openvino/op/add.hpp>
|
||||
#include <openvino/op/constant.hpp>
|
||||
#include <openvino/op/multiply.hpp>
|
||||
#include <vector>
|
||||
|
|
@ -15,10 +16,21 @@ OutputVector translate_scale(const NodeContext & context) {
|
|||
num_inputs_check(context, 1, 1);
|
||||
|
||||
float scale;
|
||||
memcpy(&scale, context.get_output_op_params(), sizeof(float));
|
||||
auto scale_node = std::make_shared<ov::op::v0::Constant>(ov::element::f32, ov::Shape{}, std::vector<float>{scale});
|
||||
float bias;
|
||||
memcpy(&scale, (float *) context.get_output_op_params() + 0, sizeof(float));
|
||||
memcpy(&bias, (float *) context.get_output_op_params() + 1, sizeof(float));
|
||||
|
||||
auto res = std::make_shared<ov::op::v1::Multiply>(context.get_input(0), scale_node);
|
||||
auto scale_node = std::make_shared<ov::op::v0::Constant>(ov::element::f32, ov::Shape{}, std::vector<float>{scale});
|
||||
auto scaled = std::make_shared<ov::op::v1::Multiply>(context.get_input(0), scale_node);
|
||||
|
||||
std::shared_ptr<ov::Node> res;
|
||||
if (bias != 0.0f) {
|
||||
auto bias_node =
|
||||
std::make_shared<ov::op::v0::Constant>(ov::element::f32, ov::Shape{}, std::vector<float>{bias});
|
||||
res = std::make_shared<ov::op::v1::Add>(scaled, bias_node);
|
||||
} else {
|
||||
res = scaled;
|
||||
}
|
||||
|
||||
return rename_outputs_with_suffix({res}, context.get_name());
|
||||
}
|
||||
|
|
|
|||
|
|
@ -59,9 +59,9 @@ enum ggml_status ov_graph_compute_dynamic(ggml_cgraph * cgraph, const std::strin
|
|||
static auto is_static = false;
|
||||
static size_t stateful_kv_size = 0;
|
||||
|
||||
// if (is_naive(cgraph)) {
|
||||
// return naive_compute(cgraph, core, device, config);
|
||||
// }
|
||||
if (is_naive(cgraph)) {
|
||||
return naive_compute(cgraph, core, device, config);
|
||||
}
|
||||
|
||||
auto start_time = ggml_time_us();
|
||||
|
||||
|
|
@ -438,7 +438,13 @@ enum ggml_status ov_graph_compute_static(ggml_cgraph * cgraph) {
|
|||
|
||||
bool is_naive(ggml_cgraph * cgraph) {
|
||||
constexpr int naive_graph_size_threshold = 20;
|
||||
return cgraph->n_nodes < naive_graph_size_threshold;
|
||||
int count = 0;
|
||||
for (int i = 0; i < cgraph->n_nodes; i++) {
|
||||
if (cgraph->nodes[i]->op != GGML_OP_NONE) {
|
||||
count++;
|
||||
}
|
||||
}
|
||||
return count < naive_graph_size_threshold;
|
||||
}
|
||||
|
||||
enum ggml_status naive_compute(ggml_cgraph * cgraph,
|
||||
|
|
|
|||
Loading…
Reference in New Issue