diff --git a/ggml/src/ggml-qnn/backend-ops.cpp b/ggml/src/ggml-qnn/backend-ops.cpp index bd87cfc9e6..b138257b8c 100644 --- a/ggml/src/ggml-qnn/backend-ops.cpp +++ b/ggml/src/ggml-qnn/backend-ops.cpp @@ -78,6 +78,31 @@ bool execute_graph(qnn::ggml_qnn_graph *graph, const std::array +std::string get_graph_key(const std::string &op_name, const std::array &inputs, + const std::array &outputs) { + constexpr static const auto append_dimensions = [](std::string &key, const ggml_tensor *tensor) { + key += "_"; + key += std::to_string(tensor->ne[0]); + key += "x"; + key += std::to_string(tensor->ne[1]); + key += "x"; + key += std::to_string(tensor->ne[2]); + key += "x"; + key += std::to_string(tensor->ne[3]); + }; + + std::string graph_key(op_name); + for (auto &input : inputs) { + append_dimensions(graph_key, input); + } + for (auto &output : outputs) { + append_dimensions(graph_key, output); + } + + return graph_key; +} + template qnn::ggml_qnn_graph *get_qnn_graph_from_cache(ggml_backend_qnn_context *ctx, size_t op, const std::string &qnn_op, const std::array &inputs, @@ -87,16 +112,7 @@ qnn::ggml_qnn_graph *get_qnn_graph_from_cache(ggml_backend_qnn_context *ctx, siz auto &graph_cache = ctx->qnn_graph_cache; const auto *op_name = op < qnn::kGgmlUnaryOpStart ? ggml_op_name(ggml_op(op)) : ggml_unary_op_name(ggml_unary_op(op - qnn::kGgmlUnaryOpStart)); - std::string graph_key(op_name); - for (auto &input : inputs) { - graph_key += "_"; - graph_key += input->name; - } - for (auto &output : outputs) { - graph_key += "_"; - graph_key += output->name; - } - + auto graph_key = get_graph_key<_InputSize, _OutputSize>(op_name, inputs, outputs); auto it = graph_cache.find(graph_key); qnn::ggml_qnn_graph *graph_ptr = nullptr; if (it != graph_cache.end()) {