Solve the issue of abnormal model output caused by using OpenVINO ADD operator

This commit is contained in:
zhanmyz 2024-11-21 18:03:22 +08:00 committed by Mustafa Cavus
parent 9b9d51dddf
commit faa4a7de76
1 changed files with 52 additions and 107 deletions

View File

@ -51,10 +51,18 @@ static ggml_backend_buffer_type_t ggml_backend_openvino_get_default_buffer_type(
GGML_UNUSED(backend);
}
static void ggml_backend_openvino_add_forward(ggml_backend_openvino_context & ctx, ggml_tensor * dst) {
static void ggml_backend_openvino_add_forward(ggml_tensor * dst) {
// Step 1: get the input tensor src0 和 src1
const ggml_tensor *src0 = dst->src[0];
const ggml_tensor *src1 = dst->src[1];
const struct ggml_tensor *src0 = dst->src[0];
const struct ggml_tensor *src1 = dst->src[1];
ov::Core core;
// set the shape and stride of dst
dst->ne[0] = src0->ne[0];
dst->ne[1] = src0->ne[1];
dst->nb[0] = src0->nb[0];
dst->nb[1] = src0->nb[1];
if (src0 == nullptr || src1 == nullptr) {
std::cerr << "Error: src0 or src1 is null." << std::endl;
@ -71,76 +79,61 @@ static void ggml_backend_openvino_add_forward(ggml_backend_openvino_context & ct
return;
}
// Step 3: Initialize OpenVINO model and streams (only done on first call)
if (!ctx.is_initialized) {
try {
// define input tensor shape
ov::Shape input_shape = {static_cast<size_t>(src0->ne[0]), static_cast<size_t>(src0->ne[1])};
ov::Tensor input0 = ov::Tensor(ov::element::f32, {static_cast<size_t>(src0->ne[0]), static_cast<size_t>(src0->ne[1])}, src0->data);
ov::Tensor input1 = ov::Tensor(ov::element::f32, {static_cast<size_t>(src1->ne[0]), static_cast<size_t>(src1->ne[1])}, src1->data);
// creat OpenVINO input node
auto input0 = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, input_shape);
auto input1 = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, input_shape);
auto input0_param = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, ov::Shape{static_cast<size_t>(src0->ne[0]), static_cast<size_t>(src0->ne[1])});
auto input1_param = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, ov::Shape{static_cast<size_t>(src0->ne[0]), static_cast<size_t>(src0->ne[1])});
auto add = std::make_shared<ov::op::v1::Add>(input0_param, input1_param);
auto function = std::make_shared<ov::Model>(add, ov::ParameterVector{input0_param, input1_param});
// define add operation
auto add_node = std::make_shared<ov::op::v1::Add>(input0, input1);
// create model
auto model = std::make_shared<ov::Model>(add_node, ov::ParameterVector{input0, input1});
// compile model and store in context
// compile model and store in context
#ifdef GGML_OPENVINO_GPU
ctx.model = std::make_shared<ov::CompiledModel>(ctx.core.compile_model(model, "GPU"));
auto compiled_model = core.compile_model(function, "GPU");
#elif GGML_OPENVINO_NPU
ctx.model = std::make_shared<ov::CompiledModel>(ctx.core.compile_model(model, "NPU"));
auto compiled_model = core.compile_model(function, "NPU");
#else
ctx.model = std::make_shared<ov::CompiledModel>(ctx.core.compile_model(model, "CPU"));
auto compiled_model = core.compile_model(function, "CPU");
#endif
// initialize infer request
ctx.infer_request = ctx.model->create_infer_request();
ctx.is_initialized = true;
// std::cout << "OpenVINO add model initialized successfully." << std::endl;
} catch (const std::exception &e) {
std::cerr << "Error initializing OpenVINO model: " << e.what() << std::endl;
return;
}
}
// initialize infer request
auto infer_request = compiled_model.create_infer_request();
// Step 4: set input data, copy src0 and src1 data to OpenVINO input tensors
auto input_tensor0 = ctx.infer_request.get_tensor(ctx.model->input(0));
auto input_tensor1 = ctx.infer_request.get_tensor(ctx.model->input(1));
// Note: OpenVINO Tensor data is contiguous, make sure src0 and src1 data is contiguous.
std::memcpy(input_tensor0.data<float>(), src0->data, src0->nb[0] * src0->ne[0]);
std::memcpy(input_tensor1.data<float>(), src1->data, src1->nb[0] * src1->ne[0]);
infer_request.set_tensor(input0_param, input0);
infer_request.set_tensor(input1_param, input1);
// Step 5: execute inference
ctx.infer_request.infer();
infer_request.infer();
// Step 6: get output data
ov::Tensor output_tensor = ctx.infer_request.get_tensor(ctx.model->output(0));
ov::Tensor output = infer_request.get_tensor(compiled_model.output());
// Allocate memory for dst->data if not already allocated
if (dst->data == nullptr) {
dst->data = malloc(dst->nb[0] * dst->ne[0]);
if (dst->data == nullptr) {
std::cerr << "Error: Failed to allocate memory for dst->data." << std::endl;
return;
}
}
// Copy output data to dst
std::memcpy(dst->data, output_tensor.data<float>(), dst->nb[0] * dst->ne[0]);
// // Print results (optional, for debugging)
// float* dst_data = static_cast<float*>(dst->data);
// std::cout << "Output data:";
// for (int i = 0; i < std::min(10, static_cast<int>(dst->ne[0])); ++i) {
// std::cout << dst_data[i] << " ";
// // Allocate memory for dst->data if not already allocated
// if (dst->data == nullptr) {
// dst->data = malloc(dst->nb[0] * dst->ne[0]);
// if (dst->data == nullptr) {
// std::cerr << "Error: Failed to allocate memory for dst->data." << std::endl;
// return;
// }
// }
// std::cout << std::endl;
std::memcpy(dst->data, output.data(), output.get_byte_size());
if (dst->ne[0] != src0->ne[0] || dst->ne[1] != src0->ne[1]) {
std::cerr << "Error: dst tensor shape does not match input tensor shape." << std::endl;
return;
}
// float* dst_data1 = (float*)(dst->data);
// printf("Output data:");;
// for (int i = 0; i < (10 < (int)(dst->ne[0]) ? 10 : (int)(dst->ne[0])); ++i) {
// printf("%f ", dst_data1[i]);
// }
// printf("\n");
// fflush(stdout);
}
static void ggml_backend_openvino_add(ggml_backend_openvino_context & ctx, ggml_tensor * dst) {
static void ggml_backend_openvino_add(ggml_tensor * dst) {
// Placeholder for OpenVINO add operation
// GGML_ASSERT(ctx.device != 0);
GGML_ASSERT(dst->data != nullptr);
@ -163,7 +156,7 @@ static void ggml_backend_openvino_add(ggml_backend_openvino_context & ctx, ggml_
{
if (src1->type == GGML_TYPE_F32) {
{
ggml_backend_openvino_add_forward(ctx, dst);
ggml_backend_openvino_add_forward(dst);
}
}
else {
@ -181,16 +174,13 @@ static void test_op_for_NONE() {
}
static enum ggml_status ggml_backend_openvino_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
// TODO
ggml_backend_openvino_context * ctx = (ggml_backend_openvino_context *)backend->context;
for (int i = 0; i < cgraph->n_nodes; i++) {
struct ggml_tensor * node = cgraph->nodes[i];
switch (node->op) {
case GGML_OP_ADD:
// TODO
ggml_backend_openvino_add(*ctx, node);
ggml_backend_openvino_add(node);
break;
case GGML_OP_MUL_MAT:
case GGML_OP_OUT_PROD:
@ -405,53 +395,8 @@ static bool ggml_backend_openvino_device_supports_op(ggml_backend_dev_t dev, con
// ggml_backend_openvino_device_context * dev_ctx = (ggml_backend_openvino_device_context *) dev->context;
switch (op->op) {
case GGML_OP_UNARY:
return false;
case GGML_OP_NONE:
return false;
case GGML_OP_RESHAPE:
case GGML_OP_VIEW:
case GGML_OP_PERMUTE:
case GGML_OP_TRANSPOSE:
case GGML_OP_NORM:
return false;
case GGML_OP_ADD:
{
ov::op::v1::Add add;
//add.evaluate(op->outputs[0], op->inputs[1]);
return true;
}
case GGML_OP_ADD1:
case GGML_OP_SUB:
{
ov::op::v1::Subtract sub;
//sub.evaluate(TensorVector& outputs, const TensorVector& inputs);
return false;
}
case GGML_OP_MUL:
case GGML_OP_DIV:
case GGML_OP_RMS_NORM:
case GGML_OP_SCALE:
case GGML_OP_SQR:
case GGML_OP_SQRT:
case GGML_OP_SIN:
case GGML_OP_COS:
case GGML_OP_IM2COL:
case GGML_OP_POOL_2D:
case GGML_OP_SUM:
case GGML_OP_SUM_ROWS:
case GGML_OP_ARGSORT:
case GGML_OP_ACC:
case GGML_OP_GROUP_NORM:
case GGML_OP_UPSCALE:
case GGML_OP_PAD:
case GGML_OP_ARANGE:
case GGML_OP_TIMESTEP_EMBEDDING:
case GGML_OP_LEAKY_RELU:
case GGML_OP_CROSS_ENTROPY_LOSS:
case GGML_OP_CROSS_ENTROPY_LOSS_BACK:
case GGML_OP_OPT_STEP_ADAMW:
return false;
default:
return false;
}