diff --git a/ggml/src/ggml-openvino/ggml-decoder.cpp b/ggml/src/ggml-openvino/ggml-decoder.cpp index cde99f3288..b20bfd0c76 100644 --- a/ggml/src/ggml-openvino/ggml-decoder.cpp +++ b/ggml/src/ggml-openvino/ggml-decoder.cpp @@ -76,13 +76,15 @@ GgmlOvDecoder::GgmlOvDecoder(struct ggml_cgraph* cgraph, add_extra_inputs(); } -GgmlOvDecoder::GgmlOvDecoder(struct ggml_cgraph* cgraph) { +GgmlOvDecoder::GgmlOvDecoder(struct ggml_cgraph* cgraph, + std::map>& model_weights) { if (getenv("GGML_OPENVINO_DUMP_CGRAPH")) { std::string filename = "cgraph.txt"; dump_cgraph(cgraph, filename); } m_cgraph = cgraph; + m_model_weights = model_weights; for (int node_n = 0; node_n < cgraph->n_nodes; node_n++) { auto* cur_node = cgraph->nodes[node_n]; if (cur_node->op == GGML_OP_NONE) { @@ -123,10 +125,12 @@ void GgmlOvDecoder::set_input_output(ggml_tensor* node, bool naive) { // Add model inputs and weights constants, if called for the whole graph if (naive) { - auto param_node = std::make_shared(get_ov_type(src), get_graph_input_shape(src)); - param_node->set_friendly_name(src_name); - param_node->output(0).get_tensor().set_names({src_name}); - m_model_inputs[src_name] = param_node; + if (m_model_weights.find(src_name) == m_model_weights.end()) { + auto param_node = std::make_shared(get_ov_type(src), get_graph_input_shape(src)); + param_node->set_friendly_name(src_name); + param_node->output(0).get_tensor().set_names({src_name}); + m_model_inputs[src_name] = param_node; + } } else if (!m_node && !src->view_src) { ggml_backend_buffer* buffer = src->buffer; @@ -381,7 +385,7 @@ std::map> GgmlOvDecoder::create_weight_no std::string src_name(src->name); if (!src->view_src) { ggml_backend_buffer* buffer = src->buffer; - if (buffer->usage == GGML_BACKEND_BUFFER_USAGE_WEIGHTS) { + if (buffer->usage == GGML_BACKEND_BUFFER_USAGE_WEIGHTS || ggml_is_quantized(src->type)) { bool should_create = false; { std::lock_guard lock(weights_mutex); diff --git a/ggml/src/ggml-openvino/ggml-decoder.h b/ggml/src/ggml-openvino/ggml-decoder.h index ae378273d3..df23c649f4 100644 --- a/ggml/src/ggml-openvino/ggml-decoder.h +++ b/ggml/src/ggml-openvino/ggml-decoder.h @@ -20,7 +20,7 @@ public: int context_size, int num_heads, int num_heads_kv, int head_size); // Naive graph decoder - GgmlOvDecoder(struct ggml_cgraph* cgraph); + GgmlOvDecoder(struct ggml_cgraph* cgraph, std::map>& model_weights); virtual ov::Any get_attribute(const std::string& name) const override { return nullptr; @@ -115,6 +115,8 @@ public: ov::PartialShape get_graph_input_shape(const ggml_tensor* src) const; + static void dump_cgraph(const struct ggml_cgraph* cgraph, std::string& filename); + static std::shared_ptr create_weight_node(ggml_tensor* tensor); static std::map> create_weight_nodes(struct ggml_cgraph* cgraph); @@ -126,7 +128,6 @@ public: private: void set_input_output(ggml_tensor* node, bool naive = false); void add_extra_inputs(); - static void dump_cgraph(const struct ggml_cgraph* cgraph, std::string& filename); static std::vector get_shape(const ggml_tensor* tensor); static std::vector get_stride(const ggml_tensor* tensor); static ov::element::Type get_ov_type(const ggml_tensor* tensor); diff --git a/ggml/src/ggml-openvino/ggml-openvino.cpp b/ggml/src/ggml-openvino/ggml-openvino.cpp index 23a92c58ac..4b743be688 100644 --- a/ggml/src/ggml-openvino/ggml-openvino.cpp +++ b/ggml/src/ggml-openvino/ggml-openvino.cpp @@ -403,14 +403,22 @@ static bool ggml_backend_openvino_device_supports_op(ggml_backend_dev_t dev, con return false; } for (int i = 0; i < GGML_MAX_SRC; i++) { - if (supported_types.find(op->type) == supported_types.end()) { - GGML_LOG_WARN("OpenVINO backend does not support tensor type %s\n", ggml_type_name(op->type)); + auto* src = op->src[i]; + if (src == nullptr) { + break; + } + if (supported_types.find(src->type) == supported_types.end()) { + GGML_LOG_WARN("OpenVINO backend does not support tensor type %s\n", ggml_type_name(src->type)); return false; } - if (op->src[i] != nullptr && op->src[i]->ne[3] != 1) { + if (src->ne[3] != 1) { GGML_LOG_WARN("OpenVINO backend does not support tensors with ne[3] != 1\n"); return false; } + if (ggml_is_quantized(src->type) && src->ne[2] != 1) { + GGML_LOG_WARN("OpenVINO backend does not support 3D quantized tensors\n"); + return false; + } } if (is_op_unsupported_case(op)) { diff --git a/ggml/src/ggml-openvino/utils.cpp b/ggml/src/ggml-openvino/utils.cpp index 473fa72f99..43fa0c469d 100644 --- a/ggml/src/ggml-openvino/utils.cpp +++ b/ggml/src/ggml-openvino/utils.cpp @@ -281,10 +281,14 @@ enum ggml_status naive_compute(struct ggml_cgraph* cgraph, return GGML_STATUS_FAILED; } - auto decoder = std::make_shared(cgraph); + auto model_weights = GgmlOvDecoder::create_weight_nodes(cgraph); + auto decoder = std::make_shared(cgraph, model_weights); auto input_model = std::make_shared(decoder); auto naive = true; auto model = ov::frontend::ggml::FrontEnd::convert(input_model, naive); + if (getenv("GGML_OPENVINO_DUMP_IR")) { + ov::serialize(model, "IR_naive.xml"); + } auto infer_request = core.compile_model(model, device, config).create_infer_request(); auto ov_params = model->get_parameters();