PERF: add weight constant in parallel

This commit is contained in:
Yu, Zijun 2025-05-14 17:48:56 +08:00 committed by Mustafa Cavus
parent c57f61494a
commit a30dc6e726
2 changed files with 47 additions and 0 deletions

View File

@ -3,9 +3,11 @@
#include <ggml-impl.h>
#include <ggml.h>
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <cstdlib>
#include <execution>
#include <fstream>
#include <iomanip>
#include <map>
@ -42,6 +44,12 @@ GgmlOvDecoder::GgmlOvDecoder(struct ggml_tensor* node, struct ggml_cgraph* cgrap
dump_cgraph(m_cgraph);
}
static bool weight_created = false;
if (!getenv("GGML_OPENVINO_WEIGHT_AS_INPUT") && !weight_created) {
add_weight_const_parallel(model_weights);
weight_created = true;
}
set_max_token_len();
for (int node_n = 0; node_n < m_cgraph->n_nodes; node_n++) {
auto* cur_node = m_cgraph->nodes[node_n];
@ -235,6 +243,43 @@ void GgmlOvDecoder::add_extra_inputs() {
}
}
void GgmlOvDecoder::add_weight_const_parallel(std::map<std::string, std::shared_ptr<ov::Node>>& model_weights) {
static std::mutex weights_mutex;
auto* nodes = m_cgraph->nodes;
auto n_nodes = m_cgraph->n_nodes;
std::for_each(std::execution::par, nodes, nodes + n_nodes, [&](ggml_tensor* node) {
for (int i = 0; i < GGML_MAX_SRC; i++) {
auto* src = node->src[i];
if (src == nullptr) {
continue;
}
std::string src_name(src->name);
if (!src->view_src) {
ggml_backend_buffer* buffer = src->buffer;
if (buffer->usage == GGML_BACKEND_BUFFER_USAGE_WEIGHTS) {
bool should_create = false;
{
std::lock_guard<std::mutex> lock(weights_mutex);
if (model_weights.find(src_name) == model_weights.end()) {
model_weights[src_name] = nullptr;
should_create = true;
}
}
if (should_create) {
auto weight_node = create_weight_node(src);
weight_node->set_friendly_name(src_name);
{
std::lock_guard<std::mutex> lock(weights_mutex);
model_weights[src_name] = weight_node;
}
}
}
}
}
});
}
std::shared_ptr<ov::Node> GgmlOvDecoder::create_weight_node(ggml_tensor* tensor) {
std::shared_ptr<ov::Node> weight_node;
auto node_type = get_ov_type(tensor);

View File

@ -101,6 +101,8 @@ private:
void set_max_token_len();
int64_t m_max_token_len;
void add_weight_const_parallel(std::map<std::string, std::shared_ptr<ov::Node>>& model_weights);
struct ggml_cgraph* m_cgraph;
std::map<std::string, ggml_tensor*> m_inputs;
std::vector<std::string> m_input_names;