diff --git a/ggml/src/ggml-openvino/ggml-decoder.cpp b/ggml/src/ggml-openvino/ggml-decoder.cpp index fd56900728..a8e1ad5556 100644 --- a/ggml/src/ggml-openvino/ggml-decoder.cpp +++ b/ggml/src/ggml-openvino/ggml-decoder.cpp @@ -3,9 +3,11 @@ #include #include +#include #include #include #include +#include #include #include #include @@ -42,6 +44,12 @@ GgmlOvDecoder::GgmlOvDecoder(struct ggml_tensor* node, struct ggml_cgraph* cgrap dump_cgraph(m_cgraph); } + static bool weight_created = false; + if (!getenv("GGML_OPENVINO_WEIGHT_AS_INPUT") && !weight_created) { + add_weight_const_parallel(model_weights); + weight_created = true; + } + set_max_token_len(); for (int node_n = 0; node_n < m_cgraph->n_nodes; node_n++) { auto* cur_node = m_cgraph->nodes[node_n]; @@ -235,6 +243,43 @@ void GgmlOvDecoder::add_extra_inputs() { } } +void GgmlOvDecoder::add_weight_const_parallel(std::map>& model_weights) { + static std::mutex weights_mutex; + auto* nodes = m_cgraph->nodes; + auto n_nodes = m_cgraph->n_nodes; + std::for_each(std::execution::par, nodes, nodes + n_nodes, [&](ggml_tensor* node) { + for (int i = 0; i < GGML_MAX_SRC; i++) { + auto* src = node->src[i]; + if (src == nullptr) { + continue; + } + + std::string src_name(src->name); + if (!src->view_src) { + ggml_backend_buffer* buffer = src->buffer; + if (buffer->usage == GGML_BACKEND_BUFFER_USAGE_WEIGHTS) { + bool should_create = false; + { + std::lock_guard lock(weights_mutex); + if (model_weights.find(src_name) == model_weights.end()) { + model_weights[src_name] = nullptr; + should_create = true; + } + } + if (should_create) { + auto weight_node = create_weight_node(src); + weight_node->set_friendly_name(src_name); + { + std::lock_guard lock(weights_mutex); + model_weights[src_name] = weight_node; + } + } + } + } + } + }); +} + std::shared_ptr GgmlOvDecoder::create_weight_node(ggml_tensor* tensor) { std::shared_ptr weight_node; auto node_type = get_ov_type(tensor); diff --git a/ggml/src/ggml-openvino/ggml-decoder.h b/ggml/src/ggml-openvino/ggml-decoder.h index b8cc4c4cdf..4d4a928121 100644 --- a/ggml/src/ggml-openvino/ggml-decoder.h +++ b/ggml/src/ggml-openvino/ggml-decoder.h @@ -101,6 +101,8 @@ private: void set_max_token_len(); int64_t m_max_token_len; + void add_weight_const_parallel(std::map>& model_weights); + struct ggml_cgraph* m_cgraph; std::map m_inputs; std::vector m_input_names;