OV Frontend supports GET_ROWS/RMS_NORM/MUL/MUL_MAT graph conversion of consecutive OPs
This commit is contained in:
parent
901f7347ff
commit
95ae982d59
|
|
@ -1020,39 +1020,41 @@ static enum ggml_status ggml_backend_openvino_graph_compute(ggml_backend_t backe
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int end_node = cgraph->n_nodes - 1;
|
||||||
|
openvino_frontend_compute(backend, cgraph, 0, end_node);
|
||||||
// openvino_frontend_compute(backend, cgraph);
|
// openvino_frontend_compute(backend, cgraph);
|
||||||
// Process nodes in order
|
// Process nodes in order
|
||||||
for (int i = 0; i < cgraph->n_nodes; i++) {
|
// for (int i = 0; i < cgraph->n_nodes; i++) {
|
||||||
if (std::find(permute_indices.begin(), permute_indices.end(), i) != permute_indices.end()) {
|
// if (std::find(permute_indices.begin(), permute_indices.end(), i) != permute_indices.end()) {
|
||||||
ggml_backend_openvino_permute(cgraph->nodes[i]);
|
// ggml_backend_openvino_permute(cgraph->nodes[i]);
|
||||||
// } else if (std::find(cont_indices.begin(), cont_indices.end(), i) != cont_indices.end()) {
|
// // } else if (std::find(cont_indices.begin(), cont_indices.end(), i) != cont_indices.end()) {
|
||||||
// ggml_backend_openvino_dup_bytes(cgraph->nodes[i]);
|
// // ggml_backend_openvino_dup_bytes(cgraph->nodes[i]);
|
||||||
// } else if (std::find(view_indices.begin(), view_indices.end(), i) != view_indices.end()) {
|
// // } else if (std::find(view_indices.begin(), view_indices.end(), i) != view_indices.end()) {
|
||||||
// ggml_backend_openvino_view(cgraph->nodes[i]);
|
// // ggml_backend_openvino_view(cgraph->nodes[i]);
|
||||||
// } else if (std::find(cpy_indices.begin(), cpy_indices.end(), i) != cpy_indices.end()) {
|
// // } else if (std::find(cpy_indices.begin(), cpy_indices.end(), i) != cpy_indices.end()) {
|
||||||
// ggml_backend_openvino_cpy(cgraph->nodes[i]);
|
// // ggml_backend_openvino_cpy(cgraph->nodes[i]);
|
||||||
} else if (std::find(transpose_indices.begin(), transpose_indices.end(), i) != transpose_indices.end()) {
|
// // } else if (std::find(transpose_indices.begin(), transpose_indices.end(), i) != transpose_indices.end()) {
|
||||||
ggml_backend_openvino_transpose(cgraph->nodes[i]);
|
// // ggml_backend_openvino_transpose(cgraph->nodes[i]);
|
||||||
} else if (std::find(reshape_indices.begin(), reshape_indices.end(), i) != reshape_indices.end()) {
|
// // } else if (std::find(reshape_indices.begin(), reshape_indices.end(), i) != reshape_indices.end()) {
|
||||||
ggml_backend_openvino_reshape(cgraph->nodes[i]);
|
// // ggml_backend_openvino_reshape(cgraph->nodes[i]);
|
||||||
// } else if (std::find(mul_mat_indices.begin(), mul_mat_indices.end(), i) != mul_mat_indices.end()) {
|
// // } else if (std::find(mul_mat_indices.begin(), mul_mat_indices.end(), i) != mul_mat_indices.end()) {
|
||||||
// ggml_backend_openvino_mul_mat(cgraph->nodes[i]);
|
// // ggml_backend_openvino_mul_mat(cgraph->nodes[i]);
|
||||||
} else {
|
// } else {
|
||||||
// Process a range of nodes with openvino_frontend_compute
|
// // Process a range of nodes with openvino_frontend_compute
|
||||||
int start_index = i;
|
// int start_index = i;
|
||||||
while (i < cgraph->n_nodes
|
// while (i < cgraph->n_nodes
|
||||||
// && std::find(view_indices.begin(), view_indices.end(), i) == view_indices.end()
|
// // && std::find(view_indices.begin(), view_indices.end(), i) == view_indices.end()
|
||||||
// && std::find(cpy_indices.begin(), cpy_indices.end(), i) == cpy_indices.end()
|
// // && std::find(cpy_indices.begin(), cpy_indices.end(), i) == cpy_indices.end()
|
||||||
// && std::find(cont_indices.begin(), cont_indices.end(), i) == cont_indices.end()
|
// // && std::find(cont_indices.begin(), cont_indices.end(), i) == cont_indices.end()
|
||||||
// && std::find(mul_mat_indices.begin(), mul_mat_indices.end(), i) == mul_mat_indices.end()
|
// // && std::find(mul_mat_indices.begin(), mul_mat_indices.end(), i) == mul_mat_indices.end()
|
||||||
) {
|
// ) {
|
||||||
i++;
|
// i++;
|
||||||
}
|
// }
|
||||||
if (start_index < i) {
|
// if (start_index < i) {
|
||||||
openvino_frontend_compute(backend, cgraph, start_index, --i);
|
// openvino_frontend_compute(backend, cgraph, start_index, --i);
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
|
|
||||||
return GGML_STATUS_SUCCESS;
|
return GGML_STATUS_SUCCESS;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -20,6 +20,7 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::map<std::string, gg
|
||||||
|
|
||||||
std::string src0_name = std::string(node->src[0]->name);
|
std::string src0_name = std::string(node->src[0]->name);
|
||||||
std::string node_name = std::string(node->name);
|
std::string node_name = std::string(node->name);
|
||||||
|
|
||||||
switch (node->op) {
|
switch (node->op) {
|
||||||
// Unary OPs
|
// Unary OPs
|
||||||
case GGML_OP_UNARY:
|
case GGML_OP_UNARY:
|
||||||
|
|
@ -110,7 +111,7 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::map<std::string, gg
|
||||||
m_output_names.push_back(node_name);
|
m_output_names.push_back(node_name);
|
||||||
m_continuous = true;
|
m_continuous = true;
|
||||||
|
|
||||||
ov::Shape src_shape(node->src[0]->ne, node->src[0]->ne + 4);
|
ov::Shape src_shape(node->src[0]->ne, node->src[0]->ne + 3);
|
||||||
auto input_param = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, src_shape);
|
auto input_param = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, src_shape);
|
||||||
m_params.push_back(input_param);
|
m_params.push_back(input_param);
|
||||||
break;
|
break;
|
||||||
|
|
@ -217,6 +218,7 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::map<std::string, gg
|
||||||
m_node_op_name[src0_name] = ggml_op_name(node->op);
|
m_node_op_name[src0_name] = ggml_op_name(node->op);
|
||||||
m_output_names.push_back(node_name);
|
m_output_names.push_back(node_name);
|
||||||
if (node->src[1]) {
|
if (node->src[1]) {
|
||||||
|
// std::string src1_name = std::string(node->src[1]->name) + "_" + std::to_string(node->src[1]->view_offs) + "_input_" + ggml_op_name(node->src[1]->op);
|
||||||
// std::string src1_name = std::string(node->src[1]->name) + "_" + std::to_string(node->src[1]->view_offs);
|
// std::string src1_name = std::string(node->src[1]->name) + "_" + std::to_string(node->src[1]->view_offs);
|
||||||
std::string src1_name = std::string(node->src[1]->name);
|
std::string src1_name = std::string(node->src[1]->name);
|
||||||
inputs[src1_name] = node->src[1];
|
inputs[src1_name] = node->src[1];
|
||||||
|
|
@ -228,6 +230,7 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::map<std::string, gg
|
||||||
// OPs with 3 inputs:
|
// OPs with 3 inputs:
|
||||||
case GGML_OP_ROPE:
|
case GGML_OP_ROPE:
|
||||||
{
|
{
|
||||||
|
// std::string src1_name = std::string(node->src[1]->name) + "_" + std::to_string(node->src[1]->view_offs) + "_input_" + ggml_op_name(node->src[1]->op);
|
||||||
// std::string src1_name = std::string(node->src[1]->name) + "_" + std::to_string(node->src[1]->view_offs);
|
// std::string src1_name = std::string(node->src[1]->name) + "_" + std::to_string(node->src[1]->view_offs);
|
||||||
std::string src1_name = std::string(node->src[1]->name);
|
std::string src1_name = std::string(node->src[1]->name);
|
||||||
inputs[src0_name] = node->src[0];
|
inputs[src0_name] = node->src[0];
|
||||||
|
|
@ -239,6 +242,7 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::map<std::string, gg
|
||||||
outputs[node_name] = node;
|
outputs[node_name] = node;
|
||||||
m_output_names.push_back(node_name);
|
m_output_names.push_back(node_name);
|
||||||
if (node->src[2]) {
|
if (node->src[2]) {
|
||||||
|
// std::string src2_name = std::string(node->src[2]->name) + "_" + std::to_string(node->src[2]->view_offs) + "_input_" + ggml_op_name(node->src[2]->op);
|
||||||
// std::string src2_name = std::string(node->src[2]->name) + "_" + std::to_string(node->src[2]->view_offs);
|
// std::string src2_name = std::string(node->src[2]->name) + "_" + std::to_string(node->src[2]->view_offs);
|
||||||
std::string src2_name = std::string(node->src[2]->name);
|
std::string src2_name = std::string(node->src[2]->name);
|
||||||
inputs[src2_name] = node->src[2];
|
inputs[src2_name] = node->src[2];
|
||||||
|
|
@ -253,7 +257,7 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, std::map<std::string, gg
|
||||||
}
|
}
|
||||||
|
|
||||||
void ggml_graph_op_print(const struct ggml_cgraph * cgraph) {
|
void ggml_graph_op_print(const struct ggml_cgraph * cgraph) {
|
||||||
std::ofstream file("2_graph_node_src_op_name.txt");
|
std::ofstream file("01_nodes.txt");
|
||||||
if (!file.is_open()) {
|
if (!file.is_open()) {
|
||||||
std::cerr << "Failed to open file" << std::endl;
|
std::cerr << "Failed to open file" << std::endl;
|
||||||
return;
|
return;
|
||||||
|
|
@ -262,6 +266,13 @@ void ggml_graph_op_print(const struct ggml_cgraph * cgraph) {
|
||||||
file << "=== GRAPH ===\n";
|
file << "=== GRAPH ===\n";
|
||||||
|
|
||||||
file << "n_nodes = " << cgraph->n_nodes << "\n";
|
file << "n_nodes = " << cgraph->n_nodes << "\n";
|
||||||
|
file << " " << std::setw(3) << "nodes"
|
||||||
|
<< std::setw(15) << "shape"
|
||||||
|
<< std::setw(16) << "op"
|
||||||
|
<< std::setw(20) << "name"
|
||||||
|
<< std::setw(3) << " "
|
||||||
|
<< std::setw(50) << "stride"
|
||||||
|
<< "\n";
|
||||||
for (int i = 0; i < cgraph->n_nodes; i++) {
|
for (int i = 0; i < cgraph->n_nodes; i++) {
|
||||||
struct ggml_tensor * node = cgraph->nodes[i];
|
struct ggml_tensor * node = cgraph->nodes[i];
|
||||||
|
|
||||||
|
|
@ -269,9 +280,14 @@ void ggml_graph_op_print(const struct ggml_cgraph * cgraph) {
|
||||||
<< std::setw(5) << node->ne[0] << ", "
|
<< std::setw(5) << node->ne[0] << ", "
|
||||||
<< std::setw(5) << node->ne[1] << ", "
|
<< std::setw(5) << node->ne[1] << ", "
|
||||||
<< std::setw(5) << node->ne[2] << "] "
|
<< std::setw(5) << node->ne[2] << "] "
|
||||||
<< std::left << std::setw(16) << ggml_op_name(node->op) << std::right << " "
|
<< std::left << std::setw(20) << ggml_op_name(node->op) << std::right << " "
|
||||||
<< " " << node->name
|
<< std::left << std::setw(44) << node->name << std::right
|
||||||
<< ((node->flags & GGML_TENSOR_FLAG_PARAM) ? "x" : node->grad ? "g" : " ") << "\n";
|
<< ((node->flags & GGML_TENSOR_FLAG_PARAM) ? "x" : node->grad ? "g" : " ")
|
||||||
|
<< std::setw(2) << "[ "
|
||||||
|
<< std::setw(0) << node->nb[0] << ", "
|
||||||
|
<< std::setw(5) << node->nb[1] << ", "
|
||||||
|
<< std::setw(5) << node->nb[2] << "] "
|
||||||
|
<< "\n";
|
||||||
|
|
||||||
if (node->src[0]) {
|
if (node->src[0]) {
|
||||||
file << std::setw(10) << " [ "
|
file << std::setw(10) << " [ "
|
||||||
|
|
@ -279,15 +295,19 @@ void ggml_graph_op_print(const struct ggml_cgraph * cgraph) {
|
||||||
<< std::setw(5) << node->src[0]->ne[1] << ", "
|
<< std::setw(5) << node->src[0]->ne[1] << ", "
|
||||||
<< std::setw(5) << node->src[0]->ne[2] << "] "
|
<< std::setw(5) << node->src[0]->ne[2] << "] "
|
||||||
<< std::setw(12)
|
<< std::setw(12)
|
||||||
<< "0: " << ggml_op_name(node->src[0]->op) << " ";
|
<< "0: " << std::left << std::setw(12) << ggml_op_name(node->src[0]->op) << std::right;
|
||||||
// // Custom logic to handle '\000'
|
// // Custom logic to handle '\000'
|
||||||
// const char* name_ptr = node->src[0]->name;
|
// const char* name_ptr = node->src[0]->name;
|
||||||
// while (*name_ptr != '\0' || *(name_ptr + 1) != '\0' || *(name_ptr + 2) != '\0') {
|
// while (*name_ptr != '\0' || *(name_ptr + 1) != '\0' || *(name_ptr + 2) != '\0') {
|
||||||
// file << *name_ptr;
|
// file << *name_ptr;
|
||||||
// name_ptr++;
|
// name_ptr++;
|
||||||
// }
|
// }
|
||||||
file << node->src[0]->name;
|
file << std::left << std::setw(30) << node->src[0]->name << std::right
|
||||||
file << "\n";
|
<< std::setw(16) << "[ "
|
||||||
|
<< std::setw(0) << node->src[0]->nb[0] << ", "
|
||||||
|
<< std::setw(5) << node->src[0]->nb[1] << ", "
|
||||||
|
<< std::setw(5) << node->src[0]->nb[2] << "] "
|
||||||
|
<< "\n";
|
||||||
}
|
}
|
||||||
if (node->src[1]) {
|
if (node->src[1]) {
|
||||||
file << std::setw(10) << " [ "
|
file << std::setw(10) << " [ "
|
||||||
|
|
@ -295,15 +315,19 @@ void ggml_graph_op_print(const struct ggml_cgraph * cgraph) {
|
||||||
<< std::setw(5) << node->src[1]->ne[1] << ", "
|
<< std::setw(5) << node->src[1]->ne[1] << ", "
|
||||||
<< std::setw(5) << node->src[1]->ne[2] << "] "
|
<< std::setw(5) << node->src[1]->ne[2] << "] "
|
||||||
<< std::setw(12)
|
<< std::setw(12)
|
||||||
<< "1: " << ggml_op_name(node->src[1]->op) << " ";
|
<< "1: " << std::left << std::setw(12) << ggml_op_name(node->src[1]->op) << std::right;
|
||||||
// // Custom logic to handle '\000'
|
// // Custom logic to handle '\000'
|
||||||
// const char* name_ptr = node->src[1]->name;
|
// const char* name_ptr = node->src[1]->name;
|
||||||
// while (*name_ptr != '\0' || *(name_ptr + 1) != '\0' || *(name_ptr + 2) != '\0') {
|
// while (*name_ptr != '\0' || *(name_ptr + 1) != '\0' || *(name_ptr + 2) != '\0') {
|
||||||
// file << *name_ptr;
|
// file << *name_ptr;
|
||||||
// name_ptr++;
|
// name_ptr++;
|
||||||
// }
|
// }
|
||||||
file << node->src[1]->name;
|
file << std::left << std::setw(30) << node->src[1]->name << std::right
|
||||||
file << "\n";
|
<< std::setw(16) << "[ "
|
||||||
|
<< std::setw(0) << node->src[1]->nb[0] << ", "
|
||||||
|
<< std::setw(5) << node->src[1]->nb[1] << ", "
|
||||||
|
<< std::setw(5) << node->src[1]->nb[2] << "] "
|
||||||
|
<< "\n";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -121,7 +121,7 @@ enum ggml_status openvino_frontend_compute(ggml_backend_t backend, struct ggml_c
|
||||||
|
|
||||||
// Convert InputModel -> ov::Model
|
// Convert InputModel -> ov::Model
|
||||||
std::shared_ptr<ov::Model> model = front_end->convert(input_model);
|
std::shared_ptr<ov::Model> model = front_end->convert(input_model);
|
||||||
// ov::save_model(model, "/home/user/zhan/merge_git_commits/llama.cpp-ov/001_model.xml");
|
ov::save_model(model, "/home/user/zhan/merge_git_commits/llama.cpp-ov/001_model.xml");
|
||||||
|
|
||||||
if (!model) {
|
if (!model) {
|
||||||
GGML_LOG_ERROR("Model is not converted \n");
|
GGML_LOG_ERROR("Model is not converted \n");
|
||||||
|
|
@ -145,6 +145,14 @@ enum ggml_status openvino_frontend_compute(ggml_backend_t backend, struct ggml_c
|
||||||
// Set input tensor
|
// Set input tensor
|
||||||
for (size_t i = 0; i < input_names.size(); i++) {
|
for (size_t i = 0; i < input_names.size(); i++) {
|
||||||
infer_request.set_input_tensor(i, input_tensors[input_names[i]]);
|
infer_request.set_input_tensor(i, input_tensors[input_names[i]]);
|
||||||
|
|
||||||
|
// auto input_tensor = infer_request.get_input_tensor(i);
|
||||||
|
// auto input_shape = input_tensor.get_shape();
|
||||||
|
// std::cout << "Input tensor " << i << " shape: ";
|
||||||
|
// for (const auto& dim : input_shape) {
|
||||||
|
// std::cout << dim << " ";
|
||||||
|
// }
|
||||||
|
// std::cout << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
infer_request.infer();
|
infer_request.infer();
|
||||||
|
|
@ -155,6 +163,7 @@ enum ggml_status openvino_frontend_compute(ggml_backend_t backend, struct ggml_c
|
||||||
for (size_t i = 0; i < output_names.size(); i++) {
|
for (size_t i = 0; i < output_names.size(); i++) {
|
||||||
// std::string op_name = ggml_decoder->get_node_op_name(output_names[i]);
|
// std::string op_name = ggml_decoder->get_node_op_name(output_names[i]);
|
||||||
auto output_tensor = infer_request.get_output_tensor(i);
|
auto output_tensor = infer_request.get_output_tensor(i);
|
||||||
|
// output_tensor.get_shape();
|
||||||
std::memcpy(output_tensors[output_names[i]], output_tensor.data(), output_tensor.get_byte_size());
|
std::memcpy(output_tensors[output_names[i]], output_tensor.data(), output_tensor.get_byte_size());
|
||||||
#ifdef GGML_OPENVINO_DEBUG
|
#ifdef GGML_OPENVINO_DEBUG
|
||||||
printf("Output %s after: %g\n", output_names[i].c_str(), *(double*)(output_tensor.data()));
|
printf("Output %s after: %g\n", output_names[i].c_str(), *(double*)(output_tensor.data()));
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue