diff --git a/ggml/src/ggml-openvino.cpp b/ggml/src/ggml-openvino.cpp index c33e3f2be0..ea12c05ac7 100644 --- a/ggml/src/ggml-openvino.cpp +++ b/ggml/src/ggml-openvino.cpp @@ -268,6 +268,7 @@ static enum ggml_status ggml_backend_openvino_graph_compute(ggml_backend_t backe return GGML_STATUS_SUCCESS; GGML_UNUSED(backend); + GGML_UNUSED(ctx); } static const ggml_backend_i ggml_backend_openvino_interface = { @@ -487,7 +488,7 @@ static bool ggml_backend_openvino_device_supports_op(ggml_backend_dev_t dev, con case GGML_OP_MUL_MAT: return false; default: - return true; + return false; } } diff --git a/ggml/src/ggml-openvino/decoder.h b/ggml/src/ggml-openvino/decoder.h index e047235d88..be943716f2 100644 --- a/ggml/src/ggml-openvino/decoder.h +++ b/ggml/src/ggml-openvino/decoder.h @@ -41,6 +41,10 @@ public: virtual size_t get_output_size() const = 0; + virtual bool is_graph_output(size_t index) const = 0; + + virtual std::string& get_output_name(size_t index) const = 0; + virtual const std::string& get_op_type() const = 0; virtual const std::string& get_op_name() const = 0; diff --git a/ggml/src/ggml-openvino/ggml-decoder.h b/ggml/src/ggml-openvino/ggml-decoder.h index 96398d3f83..1eaba59426 100644 --- a/ggml/src/ggml-openvino/ggml-decoder.h +++ b/ggml/src/ggml-openvino/ggml-decoder.h @@ -45,7 +45,11 @@ public: virtual std::string& get_output_name(size_t index) const override; - size_t get_output_size() const override; + virtual size_t get_output_size() const override; + + virtual bool is_graph_output(size_t index) const override; + + virtual std::string& get_output_name(size_t index) const override; virtual const std::string& get_op_type() const override; diff --git a/ggml/src/ggml-openvino/ggml-ov-frontend-utils.cpp b/ggml/src/ggml-openvino/ggml-ov-frontend-utils.cpp index fd5921b476..10107cbfd0 100644 --- a/ggml/src/ggml-openvino/ggml-ov-frontend-utils.cpp +++ b/ggml/src/ggml-openvino/ggml-ov-frontend-utils.cpp @@ -1,6 +1,7 @@ #include "ggml-ov-frontend-utils.h" #include "ggml-backend-impl.h" #include +#include using ov::frontend::tensorflow::ggml::GgmlOvGraphIterator; @@ -8,9 +9,27 @@ std::shared_ptr get_ggml_graph_iterator(struct ggml_cgraph return std::make_shared(cgraph); } +std::vector get_ggml_graph_input_tensors(std::shared_ptr ggml_graph_iterator) { + std::vector input_tensors; + auto input_names = ggml_graph_iterator->get_input_names(); + ggml_graph_iterator->reset(); + for (; !ggml_graph_iterator->is_end(); ggml_graph_iterator->next()) { + auto decoder = std::dynamic_pointer_cast(ggml_graph_iterator->get_decoder()); + for (size_t inp = 0; inp < decoder->get_input_size(); ++inp) { + if (std::find(input_names.begin(), input_names.end(), decoder->get_input_name(inp)) != input_names.end()) { + auto input_data = decoder->get_input_ggml_tensor(inp)->data; + ov::Tensor input_tensor = ov::Tensor(decoder->get_input_type(inp), decoder->get_input_shape(inp).to_shape(), input_data); + input_tensors.push_back(input_tensor); + } + } + } + return input_tensors; +} + static ov::frontend::FrontEnd::Ptr get_ggml_frontend() { ov::frontend::FrontEnd::Ptr front_end = nullptr; auto fem = ov::frontend::FrontEndManager(); + // std::string fe_so_path = "/home/yumeng/Code/test/openvino/bin/intel64/Release/libopenvino_ggml_frontend.so"; std::string fe_so_path = "/home/yumeng/Code/ov-ggml-frontend/openvino/bin/intel64/Release/libopenvino_ggml_frontend.so"; fem.register_front_end("ggml", fe_so_path); front_end = fem.load_by_framework("ggml"); @@ -18,36 +37,72 @@ static ov::frontend::FrontEnd::Ptr get_ggml_frontend() { } enum ggml_status openvino_frontend_compute (ggml_backend_t backend, struct ggml_cgraph * cgraph) { + ov::Core core; + auto devices = core.get_available_devices(); + #ifdef GGML_OPENVINO_DEBUG + GGML_LOG_INFO("Device numbers: %d\n", devices.size()); + #endif // Get GGML Frontend auto front_end = get_ggml_frontend(); if (!front_end) { GGML_LOG_ERROR("GGML FrontEnd is not initialized \n"); return GGML_STATUS_FAILED; } else { - GGML_LOG_ERROR("GGML FrontEnd is initialized \n"); + #ifdef GGML_OPENVINO_DEBUG + GGML_LOG_INFO("GGML FrontEnd is initialized \n"); + #endif } auto ggml_graph_iterator = get_ggml_graph_iterator(cgraph); std::shared_ptr graph_iterator = ggml_graph_iterator; - GGML_LOG_ERROR("Decoder count in current GraphIterator: %s\n", std::to_string(graph_iterator->size()).c_str()); // Load GraphIterator -> InputModel ov::frontend::InputModel::Ptr input_model = front_end->load(graph_iterator); if (!input_model) { - GGML_LOG_ERROR("\nInput Model is not loaded \n"); + GGML_LOG_ERROR("Input Model is not loaded \n"); return GGML_STATUS_FAILED; } else { - GGML_LOG_ERROR("\nInput Model loaded \n"); + #ifdef GGML_OPENVINO_DEBUG + GGML_LOG_INFO("Input Model loaded \n"); + #endif } // TODO: Convert InputModel -> ov::Model - // std::shared_ptr model = front_end->convert(input_model); - // if (!model) { - // GGML_LOG_ERROR("Model is not converted"); - // } + std::shared_ptr model = front_end->convert(input_model); + if (!model) { + GGML_LOG_ERROR("Model is not converted \n"); + } else { + #ifdef GGML_OPENVINO_DEBUG + GGML_LOG_INFO("Model converted \n"); + #endif + } - // TODO: Compute + // Loading a model to the device + ov::CompiledModel compiled_model = core.compile_model(model); + + // Create infer request + ov::InferRequest infer_request = compiled_model.create_infer_request(); + + // Get input tensor + auto input_tensor = get_ggml_graph_input_tensors(ggml_graph_iterator); + + // Set input tensor + for (size_t i = 0; i < input_tensor.size(); i++) { + infer_request.set_input_tensor(i, input_tensor[i]); + } + + infer_request.infer(); + + ov::Tensor output_tensor = infer_request.get_output_tensor(); + // Put data in output tensor to the last node -> data in cgraph + // Get output type + ggml_tensor* dst = cgraph->nodes[cgraph->n_nodes - 1]; + std::memcpy(dst->data, output_tensor.data(), output_tensor.get_byte_size()); + #ifdef GGML_OPENVINO_DEBUG + GGML_LOG_INFO("%f\n", *output_tensor.data()); + #endif + return GGML_STATUS_SUCCESS; GGML_UNUSED(backend); }