From 10bd671c08f7094c97316edbd12a59e207e0da34 Mon Sep 17 00:00:00 2001 From: nullname Date: Sat, 18 Jan 2025 22:15:27 +0800 Subject: [PATCH] [feat]add more op support (#18) * disable rpc buffer for npu * append input/output tensor size into unsupported op log * log dimensions for unsupported tensor * wip * split op config classes into separated file * fix reshape * wip * add op_constructor_with_type_param * set parameter for op_constructor_with_type_param func --- ggml/src/ggml-qnn/backend-ops.cpp | 24 +- ggml/src/ggml-qnn/graph.cpp | 12 +- ggml/src/ggml-qnn/op-config-caps.cpp | 208 +++++++++++++++++- .../{op-config.cpp => op-config-impl.cpp} | 27 +-- ggml/src/ggml-qnn/op-config-impl.hpp | 151 +++++++++++++ ggml/src/ggml-qnn/op-config.hpp | 136 +----------- ggml/src/ggml-qnn/tensor.hpp | 4 +- 7 files changed, 384 insertions(+), 178 deletions(-) rename ggml/src/ggml-qnn/{op-config.cpp => op-config-impl.cpp} (95%) create mode 100644 ggml/src/ggml-qnn/op-config-impl.hpp diff --git a/ggml/src/ggml-qnn/backend-ops.cpp b/ggml/src/ggml-qnn/backend-ops.cpp index 75c90e235b..8bbf26da52 100644 --- a/ggml/src/ggml-qnn/backend-ops.cpp +++ b/ggml/src/ggml-qnn/backend-ops.cpp @@ -25,7 +25,7 @@ bool qnn_is_op_valid(ggml_backend_qnn_device_context *ctx, const ggml_tensor *ds return false; } - const auto param_count = qnn::get_qnn_op_input_param_count(qnn::get_qnn_op_index(dst)); + const auto param_count = qnn::get_qnn_op_input_param_count(dst); switch (param_count) { case 1: return dst->src[0]; @@ -91,9 +91,13 @@ void get_graph_key_from_op(const ggml_tensor *op, std::string &output) { GGML_ASSERT(op->op != GGML_OP_NONE); output += ggml_op_desc(op); output += qnn::get_ggml_type_name(op->type); - const auto param_count = qnn::get_qnn_op_input_param_count(qnn::get_qnn_op_index(op)); + const auto param_count = qnn::get_qnn_op_input_param_count(op); for (size_t i = 0; i < param_count; ++i) { auto *input = op->src[i]; + if (!input) { + break; + } + output += '_'; append_tensor_dimensions(input, output); } @@ -224,7 +228,7 @@ bool qnn_generic_op_impl(ggml_backend_qnn_device_context *ctx, ggml_tensor *dst) #ifndef NDEBUG if (!succeed) { - const auto param_count = qnn::get_qnn_op_input_param_count(qnn::get_qnn_op_index(dst)); + const auto param_count = qnn::get_qnn_op_input_param_count(dst); for (size_t i = 0; i < param_count; ++i) { print_ggml_tensor(dst->src[i]); } @@ -409,7 +413,7 @@ bool ggnl_qnn_supports_op_tensor(ggml_backend_qnn_device_context *ctx, const ggm return false; } - const auto param_count = qnn::get_qnn_op_input_param_count(qnn::get_qnn_op_index(op)); + const auto param_count = qnn::get_qnn_op_input_param_count(op); for (size_t i = 0; i < param_count; ++i) { if (!ggml_qnn_supports_tensor(ctx, op->src[i])) { return false; @@ -479,12 +483,20 @@ bool device_supports_op(ggml_backend_qnn_device_context *ctx, const ggml_tensor } if (!kQnnOpsTable[qnn::get_qnn_op_index(op)]) { - QNN_LOG_DEBUG("[%s]unsupported op", ggml_op_name(op->op)); +#ifndef NDEBUG + std::string op_key; + get_graph_key_from_op(op, op_key); + QNN_LOG_DEBUG("[%s]unsupported op", op_key.c_str()); +#endif return false; } if (!ggnl_qnn_supports_op_tensor(ctx, op)) { - QNN_LOG_DEBUG("[%s]unsupported tensor", ggml_op_name(op->op)); +#ifndef NDEBUG + std::string tensor_dims; + append_tensor_dimensions(op, tensor_dims); + QNN_LOG_DEBUG("[%s]unsupported tensor(%s)", ggml_op_name(op->op), tensor_dims.c_str()); +#endif return false; } diff --git a/ggml/src/ggml-qnn/graph.cpp b/ggml/src/ggml-qnn/graph.cpp index 0210e1554a..680f5e23bd 100644 --- a/ggml/src/ggml-qnn/graph.cpp +++ b/ggml/src/ggml-qnn/graph.cpp @@ -15,7 +15,7 @@ using qnn_tensor_cache_t = std::unordered_mapsrc[i])); } @@ -56,14 +56,12 @@ qnn::qnn_op_config_ptr_t create_operation_from_op_tensor(ggml_tensor *dst, const QNNBackend device, Qnn_GraphHandle_t graph_handle, std::shared_ptr qnn_instance, bool is_intermediate, qnn_tensor_cache_t &tensor_cache) { - const auto op_index = qnn::get_qnn_op_index(dst); - auto qnn_op = qnn::create_op_constructor(op_index); - auto operation = qnn_op(name, qnn_instance); + auto operation = qnn::create_op(dst, name, qnn_instance); // input tensors qnn::qnn_tensor_array_t input_qnn_tensors; auto tensor_type = is_intermediate ? qnn::ggml_qnn_tensor::INTERMEDIATE : qnn::ggml_qnn_tensor::INPUT; - for (size_t i = 0; i < qnn::get_qnn_op_input_param_count(op_index); ++i) { + for (size_t i = 0; i < qnn::get_qnn_op_input_param_count(dst); ++i) { auto input_qnn_tensor = create_tensor_with_cache(dst->src[i], tensor_type, rank, device, graph_handle, qnn_instance, tensor_cache); input_qnn_tensors.push_back(input_qnn_tensor); @@ -92,7 +90,7 @@ bool bind_src_tensors(ggml_tensor *op, qnn::qnn_tensor_array_t &tensor_wrappers, return false; } - const auto param_count = qnn::get_qnn_op_input_param_count(qnn::get_qnn_op_index(op)); + const auto param_count = qnn::get_qnn_op_input_param_count(op); GGML_ASSERT(tensor_wrappers.size() == param_count); qnn_tensors.resize(param_count); for (size_t i = 0; i < param_count; ++i) { @@ -268,7 +266,7 @@ bool qnn_graph::build_graph_from_ggml_graph(const ggml_cgraph *cgraph) { continue; } - QNN_LOG_DEBUG("[%s]create op: %s", get_backend_name(_device), get_qnn_op_name(dst->op)); + QNN_LOG_DEBUG("[%s]create op: %s", get_backend_name(_device), get_qnn_op_name(dst)); auto operation = create_operation_from_op_tensor(dst, dst->name, rank, _device, _graph_handle, _qnn_instance, true, tensor_cache); // TODO: fix op name operations.push_back(operation); diff --git a/ggml/src/ggml-qnn/op-config-caps.cpp b/ggml/src/ggml-qnn/op-config-caps.cpp index 7fa3d11aff..9b28a76dd1 100644 --- a/ggml/src/ggml-qnn/op-config-caps.cpp +++ b/ggml/src/ggml-qnn/op-config-caps.cpp @@ -1,8 +1,10 @@ -#include "op-config.hpp" +#include "op-config-impl.hpp" namespace { +using op_constructor_t = std::shared_ptr (*)(const ggml_tensor *, const std::string &, + std::shared_ptr); using op_dims_calc_func_t = void (*)(const std::vector &input_dims, qnn::ggml_dimension_array_t &output_dims); @@ -24,6 +26,7 @@ struct qnn_op_caps_t { const char *qnn_op_name = nullptr; const size_t input_param_count = 0; op_dims_calc_func_t calc_dims_func = nullptr; + const char *qnn_param_name = nullptr; }; constexpr const qnn_op_caps_t kOpCaps[] = { @@ -80,7 +83,13 @@ constexpr const qnn_op_caps_t kOpCaps[] = { {}, // GGML_OP_CONCAT {}, // GGML_OP_SILU_BACK {}, // GGML_OP_NORM - {}, // GGML_OP_RMS_NORM + { + // GGML_OP_RMS_NORM + QNN_OP_RMS_NORM, // qnn_op_name + 1, // input_param_count + nullptr, // TODO: calc_dims_func + QNN_OP_RMS_NORM_PARAM_EPSILON, // qnn_param_name + }, {}, // GGML_OP_RMS_NORM_BACK {}, // GGML_OP_GROUP_NORM { @@ -187,9 +196,172 @@ static_assert(kOpCaps[GGML_OP_MUL_MAT].calc_dims_func == mat_mul_op_dims, "GGML_OP_ADD does not have element_wise_op_dims function"); static_assert(kOpCaps[GGML_OP_LOG].calc_dims_func == element_wise_op_dims, "GGML_OP_LOG does not have element_wise_op_dims function"); +static_assert(kOpCaps[GGML_OP_COUNT + GGML_UNARY_OP_GELU].input_param_count == 1, + "GGML_UNARY_OP_GELU does not have 1 input parameter"); static_assert(std::size(kOpCaps) == (GGML_OP_COUNT + GGML_UNARY_OP_COUNT), "GGML_OP_COUNT does not match the size of the kOpCaps table"); +std::shared_ptr mat_mul_op_constructor(const ggml_tensor *op, const std::string &instance_name, + std::shared_ptr qnn_instance) { + GGML_UNUSED(op); + QNN_LOG_DEBUG("create QNN_OP_MAT_MUL, name %s", instance_name.c_str()); + return std::make_shared(instance_name, qnn_instance); +} + +template +std::shared_ptr generic_op_constructor(const ggml_tensor *op, const std::string &instance_name, + std::shared_ptr qnn_instance) { + GGML_UNUSED(op); + static_assert(_op < std::size(kOpCaps)); + static_assert(kOpCaps[_op].qnn_op_name != nullptr); + return std::make_shared(instance_name, QNN_OP_PACKAGE_NAME_QTI_AISW, + kOpCaps[_op].qnn_op_name, qnn_instance); +} + +void add_type_parameters(std::shared_ptr op, const char *name, float value) { + Qnn_Scalar_t scalar = QNN_SCALAR_INIT; + scalar.dataType = QNN_DATATYPE_FLOAT_32; + scalar.floatValue = value; + op->add_scalar_param(name, scalar); +} + +template +std::shared_ptr op_constructor_with_type_param( + const ggml_tensor *op, const std::string &instance_name, std::shared_ptr qnn_instance) { + static_assert(std::is_base_of::value); + static_assert(_op < std::size(kOpCaps)); + + constexpr auto &op_caps = kOpCaps[_op]; + static_assert(op_caps.qnn_op_name != nullptr); + + _ggml_op_param_type op_param; + memcpy(&op_param, op->op_params, sizeof(op_param)); + auto qnn_op = std::make_shared<_qnn_op_type_name>(instance_name, QNN_OP_PACKAGE_NAME_QTI_AISW, op_caps.qnn_op_name, + qnn_instance); + if (op_caps.qnn_param_name) { + add_type_parameters(qnn_op, op_caps.qnn_param_name, op_param); + } + return qnn_op; +} + +constexpr const op_constructor_t kOpConstructors[] = { + nullptr, // GGML_OP_NONE + nullptr, // GGML_OP_DUP + generic_op_constructor, // GGML_OP_ADD + nullptr, // GGML_OP_ADD1 + nullptr, // GGML_OP_ACC + generic_op_constructor, // GGML_OP_SUB + generic_op_constructor, // GGML_OP_MUL + generic_op_constructor, // GGML_OP_DIV + nullptr, // GGML_OP_SQR + generic_op_constructor, // GGML_OP_SQRT + generic_op_constructor, // GGML_OP_LOG + nullptr, // GGML_OP_SIN + nullptr, // GGML_OP_COS + nullptr, // GGML_OP_SUM + nullptr, // GGML_OP_SUM_ROWS + nullptr, // GGML_OP_MEAN + nullptr, // GGML_OP_ARGMAX + nullptr, // GGML_OP_COUNT_EQUAL + nullptr, // GGML_OP_REPEAT + nullptr, // GGML_OP_REPEAT_BACK + nullptr, // GGML_OP_CONCAT + nullptr, // GGML_OP_SILU_BACK + nullptr, // GGML_OP_NORM + op_constructor_with_type_param, // GGML_OP_RMS_NORM + nullptr, // GGML_OP_RMS_NORM_BACK + nullptr, // GGML_OP_GROUP_NORM + + mat_mul_op_constructor, // GGML_OP_MUL_MAT + nullptr, // GGML_OP_MUL_MAT_ID + nullptr, // GGML_OP_OUT_PROD + + nullptr, // GGML_OP_SCALE + nullptr, // GGML_OP_SET + nullptr, // GGML_OP_CPY + nullptr, // GGML_OP_CONT + generic_op_constructor, // GGML_OP_RESHAPE + nullptr, // GGML_OP_VIEW + nullptr, // GGML_OP_PERMUTE + nullptr, // GGML_OP_TRANSPOSE + nullptr, // GGML_OP_GET_ROWS + nullptr, // GGML_OP_GET_ROWS_BACK + nullptr, // GGML_OP_DIAG + nullptr, // GGML_OP_DIAG_MASK_INF + nullptr, // GGML_OP_DIAG_MASK_ZERO + nullptr, // GGML_OP_SOFT_MAX + nullptr, // GGML_OP_SOFT_MAX_BACK + nullptr, // GGML_OP_ROPE + nullptr, // GGML_OP_ROPE_BACK + nullptr, // GGML_OP_CLAMP + nullptr, // GGML_OP_CONV_TRANSPOSE_1D + nullptr, // GGML_OP_IM2COL + nullptr, // GGML_OP_IM2COL_BACK + nullptr, // GGML_OP_CONV_TRANSPOSE_2D + nullptr, // GGML_OP_POOL_1D + nullptr, // GGML_OP_POOL_2D + nullptr, // GGML_OP_POOL_2D_BACK + nullptr, // GGML_OP_UPSCALE + nullptr, // GGML_OP_PAD + nullptr, // GGML_OP_PAD_REFLECT_1D + nullptr, // GGML_OP_ARANGE + nullptr, // GGML_OP_TIMESTEP_EMBEDDING + nullptr, // GGML_OP_ARGSORT + nullptr, // GGML_OP_LEAKY_RELU + + nullptr, // GGML_OP_FLASH_ATTN_EXT + nullptr, // GGML_OP_FLASH_ATTN_BACK + nullptr, // GGML_OP_SSM_CONV + nullptr, // GGML_OP_SSM_SCAN + nullptr, // GGML_OP_WIN_PART + nullptr, // GGML_OP_WIN_UNPART + nullptr, // GGML_OP_GET_REL_POS + nullptr, // GGML_OP_ADD_REL_POS + nullptr, // GGML_OP_RWKV_WKV6 + nullptr, // GGML_OP_GATED_LINEAR_ATTN + + nullptr, // GGML_OP_UNARY + + nullptr, // GGML_OP_MAP_UNARY + nullptr, // GGML_OP_MAP_BINARY + + nullptr, // GGML_OP_MAP_CUSTOM1_F32 + nullptr, // GGML_OP_MAP_CUSTOM2_F32 + nullptr, // GGML_OP_MAP_CUSTOM3_F32 + + nullptr, // GGML_OP_MAP_CUSTOM1 + nullptr, // GGML_OP_MAP_CUSTOM2 + nullptr, // GGML_OP_MAP_CUSTOM3 + + nullptr, // GGML_OP_CROSS_ENTROPY_LOSS + nullptr, // GGML_OP_CROSS_ENTROPY_LOSS_BACK + nullptr, // GGML_OP_OPT_STEP_ADAMW + + // ggml_unary_op + nullptr, // GGML_UNARY_OP_ABS + nullptr, // GGML_UNARY_OP_SGN + nullptr, // GGML_UNARY_OP_NEG + nullptr, // GGML_UNARY_OP_STEP + nullptr, // GGML_UNARY_OP_TANH + nullptr, // GGML_UNARY_OP_ELU + nullptr, // GGML_UNARY_OP_RELU + nullptr, // GGML_UNARY_OP_SIGMOID + nullptr, // GGML_UNARY_OP_GELU + nullptr, // GGML_UNARY_OP_GELU_QUICK + nullptr, // GGML_UNARY_OP_SILU + nullptr, // GGML_UNARY_OP_HARDSWISH + nullptr, // GGML_UNARY_OP_HARDSIGMOID + nullptr, // GGML_UNARY_OP_EXP +}; + +static_assert(kOpConstructors[GGML_OP_NONE] == nullptr, "GGML_OP_NONE does not match the nullptr function"); +static_assert(kOpConstructors[GGML_OP_ADD] == generic_op_constructor, + "GGML_OP_ADD does not match the generic_op_constructor function"); +static_assert(kOpConstructors[GGML_OP_MUL_MAT] == mat_mul_op_constructor, + "GGML_OP_MUL_MAT does not match the mat_mul_op_constructor function"); +static_assert(std::size(kOpConstructors) == (GGML_OP_COUNT + GGML_UNARY_OP_COUNT), + "GGML_OP_COUNT does not match the size of the kOpConstructors table"); + } // namespace namespace qnn { @@ -202,23 +374,35 @@ size_t get_qnn_op_index(const ggml_tensor *tensor) { return tensor->op; } -void get_ggml_op_output_dimensions(const std::vector &input_dims, size_t op, +void get_ggml_op_output_dimensions(const std::vector &input_dims, const ggml_tensor *op, ggml_dimension_array_t &output_dims) { - GGML_ASSERT(op < std::size(kOpCaps)); - auto get_dims = kOpCaps[op].calc_dims_func; + auto op_index = get_qnn_op_index(op); + GGML_ASSERT(op_index < std::size(kOpCaps)); + auto get_dims = kOpCaps[op_index].calc_dims_func; GGML_ASSERT(get_dims); get_dims(input_dims, output_dims); } -const char *get_qnn_op_name(size_t op) { - GGML_ASSERT(op < std::size(kOpCaps)); - GGML_ASSERT(kOpCaps[op].qnn_op_name); - return kOpCaps[op].qnn_op_name; +const char *get_qnn_op_name(const ggml_tensor *op) { + auto op_index = get_qnn_op_index(op); + GGML_ASSERT(op_index < std::size(kOpCaps)); + GGML_ASSERT(kOpCaps[op_index].qnn_op_name); + return kOpCaps[op_index].qnn_op_name; } -size_t get_qnn_op_input_param_count(size_t op) { - GGML_ASSERT(op < std::size(kOpCaps)); - return kOpCaps[op].input_param_count; +size_t get_qnn_op_input_param_count(const ggml_tensor *op) { + auto op_index = get_qnn_op_index(op); + GGML_ASSERT(op_index < std::size(kOpCaps)); + return kOpCaps[op_index].input_param_count; +} + +std::shared_ptr create_op(const ggml_tensor *op, const std::string &name, + std::shared_ptr qnn_instance) { + auto op_index = get_qnn_op_index(op); + GGML_ASSERT(op_index < std::size(kOpCaps)); + auto op_constructor = kOpConstructors[op_index]; + GGML_ASSERT(op_constructor); + return op_constructor(op, name, qnn_instance); } } // namespace qnn diff --git a/ggml/src/ggml-qnn/op-config.cpp b/ggml/src/ggml-qnn/op-config-impl.cpp similarity index 95% rename from ggml/src/ggml-qnn/op-config.cpp rename to ggml/src/ggml-qnn/op-config-impl.cpp index 7edb4078a5..19a1bf46ee 100644 --- a/ggml/src/ggml-qnn/op-config.cpp +++ b/ggml/src/ggml-qnn/op-config-impl.cpp @@ -1,4 +1,4 @@ -#include "op-config.hpp" +#include "op-config-impl.hpp" #include @@ -187,6 +187,13 @@ bool ggml_qnn_single_op_config::initialize_op_nodes(QNNBackend device, Qnn_Graph return true; } +bool ggml_qnn_rmsnorm_op_config::initialize_op_nodes(QNNBackend device, Qnn_GraphHandle_t graph_handle) { + constexpr const uint32_t kAxes[] = {0}; + add_tensor_param(QNN_OP_RMS_NORM_PARAM_AXES, {1}, 1, reinterpret_cast(kAxes), QNN_DATATYPE_UINT_32, + device, graph_handle); + return true; +} + void ggml_qnn_aggregate_op_config::set_input_tensors(qnn::qnn_tensor_array_t &tensor_inputs) { _tensor_inputs = tensor_inputs; } @@ -439,22 +446,4 @@ bool ggml_qnn_matmul_op_config::create_mat_mul_nodes(QNNBackend device, Qnn_Grap return true; } -ggml_op_constructor_t create_op_constructor(size_t op) { - std::string op_name = get_qnn_op_name(op); - if (op_name == QNN_OP_MAT_MUL) { - // For QNN_OP_MAT_MUL, we need to transpose the input tensor - return [](const std::string &instance_name, - std::shared_ptr qnn_instance) -> std::shared_ptr { - QNN_LOG_DEBUG("create QNN_OP_MAT_MUL, name %s", instance_name.c_str()); - return std::make_shared(instance_name, qnn_instance); - }; - } - - return [op_name](const std::string &instance_name, - std::shared_ptr qnn_instance) -> std::shared_ptr { - return std::make_shared(instance_name, QNN_OP_PACKAGE_NAME_QTI_AISW, op_name, - qnn_instance); - }; -} - } // namespace qnn diff --git a/ggml/src/ggml-qnn/op-config-impl.hpp b/ggml/src/ggml-qnn/op-config-impl.hpp new file mode 100644 index 0000000000..4a00ed2cc7 --- /dev/null +++ b/ggml/src/ggml-qnn/op-config-impl.hpp @@ -0,0 +1,151 @@ +#pragma once + +#include +#include +#include +#include + +#include "op-config.hpp" +#include "qnn-lib.hpp" +#include "qnn-types.hpp" +#include "tensor.hpp" + +namespace qnn { + +class ggml_qnn_op_config_base : public ggml_qnn_op_config { +public: + explicit ggml_qnn_op_config_base(const std::string &name, const std::string &package_name, + const std::string &op_type, std::shared_ptr qnn_instance) + : _name(name), _package_name(package_name), _op_type(op_type), _qnn_instance(qnn_instance) {} + + void add_scalar_param(const std::string &name, const Qnn_Scalar_t scalar); + bool add_tensor_param(const std::string &name, const qnn_dimension_array_t &dimensions, int rank, + const uint8_t *data, const Qnn_DataType_t data_type, QNNBackend device, + Qnn_GraphHandle_t graph_handle); + + void set_input_tensors(qnn::qnn_tensor_array_t &tensor_inputs) override; + void set_input_tensors(qnn::qnn_tensor_array_t &&tensor_inputs) override; + void set_output_tensors(qnn::qnn_tensor_array_t &tensor_inputs) override; + void set_output_tensors(qnn::qnn_tensor_array_t &&tensor_inputs) override; + bool add_op_to_graph(Qnn_GraphHandle_t graph_handle) override; + bool bind_input_tensors(const ggml_tensor_array_t &tensor_inputs) override; + bool bind_output_tensors(const ggml_tensor_array_t &tensor_outputs) override; + void unbind_input_tensors() override; + void unbind_output_tensors() override; + const qnn_tensor_array_t &get_input_tensors() override { return _tensor_inputs; } + const qnn_tensor_array_t &get_output_tensors() override { return _tensor_outputs; } + +protected: + Qnn_OpConfig_t get_op_config(); + + std::string _name; + std::string _package_name; + std::string _op_type; + std::shared_ptr _qnn_instance; + qnn_tensor_array_t _tensor_inputs; + qnn_tensor_array_t _tensor_outputs; + qnn_tensor_array_t _tensor_parameters; + std::vector _qnn_tensor_inputs; + std::vector _qnn_tensor_outputs; + std::vector _qnn_parameters; + std::vector _param_names; + + DISABLE_COPY(ggml_qnn_op_config_base); + DISABLE_MOVE(ggml_qnn_op_config_base); +}; + +class ggml_qnn_single_op_config : public ggml_qnn_op_config_base { +public: + explicit ggml_qnn_single_op_config(const std::string &name, const std::string &package_name, + const std::string &op_type, std::shared_ptr qnn_instance) + : ggml_qnn_op_config_base(name, package_name, op_type, qnn_instance) {} + + bool initialize_op_nodes(QNNBackend device, Qnn_GraphHandle_t graph_handle) override; + +private: + DISABLE_COPY(ggml_qnn_single_op_config); + DISABLE_MOVE(ggml_qnn_single_op_config); +}; + +class ggml_qnn_rmsnorm_op_config : public ggml_qnn_op_config_base { +public: + explicit ggml_qnn_rmsnorm_op_config(const std::string &name, const std::string &package_name, + const std::string &op_type, std::shared_ptr qnn_instance) + : ggml_qnn_op_config_base(name, package_name, op_type, qnn_instance) {} + + bool initialize_op_nodes(QNNBackend device, Qnn_GraphHandle_t graph_handle) override; + +private: + DISABLE_COPY(ggml_qnn_rmsnorm_op_config); + DISABLE_MOVE(ggml_qnn_rmsnorm_op_config); +}; + +class ggml_qnn_aggregate_op_config : public ggml_qnn_op_config { +public: + explicit ggml_qnn_aggregate_op_config(const std::string &name, std::shared_ptr qnn_instance) + : _name(name), _qnn_instance(qnn_instance) {} + + ~ggml_qnn_aggregate_op_config() { + _tensor_inputs.clear(); + _tensor_outputs.clear(); + _operations.clear(); + } + + void set_input_tensors(qnn::qnn_tensor_array_t &tensor_inputs) override; + void set_input_tensors(qnn::qnn_tensor_array_t &&tensor_inputs) override; + void set_output_tensors(qnn::qnn_tensor_array_t &tensor_inputs) override; + void set_output_tensors(qnn::qnn_tensor_array_t &&tensor_inputs) override; + bool add_op_to_graph(Qnn_GraphHandle_t graph_handle) override { + return qnn::add_op_to_graph(graph_handle, _operations); + } + + bool bind_input_tensors(const ggml_tensor_array_t &tensor_inputs) override; + bool bind_output_tensors(const ggml_tensor_array_t &tensor_outputs) override; + void unbind_input_tensors() override { + for (auto &tensor : _tensor_inputs) { + tensor->unbind(); + } + } + + void unbind_output_tensors() override { + for (auto &tensor : _tensor_outputs) { + tensor->unbind(); + } + } + + const qnn_tensor_array_t &get_input_tensors() override { return _tensor_inputs; } + const qnn_tensor_array_t &get_output_tensors() override { return _tensor_outputs; } + +protected: + std::string _name; + std::shared_ptr _qnn_instance; + + std::vector _operations; + qnn_tensor_array_t _tensor_inputs; + qnn_tensor_array_t _tensor_outputs; + +private: + DISABLE_COPY(ggml_qnn_aggregate_op_config); + DISABLE_MOVE(ggml_qnn_aggregate_op_config); +}; + +class ggml_qnn_matmul_op_config : public ggml_qnn_aggregate_op_config { +public: + ggml_qnn_matmul_op_config(const std::string &name, std::shared_ptr qnn_instance) + : ggml_qnn_aggregate_op_config(name, qnn_instance) {} + + bool initialize_op_nodes(QNNBackend device, Qnn_GraphHandle_t graph_handle) override; + +private: + qnn_tensor_ptr_t create_gather_nodes(QNNBackend device, Qnn_GraphHandle_t graph_handle, const int rank, + qnn_tensor_ptr_t tensor_input, qnn_dimension_array_t output_dimensions); + bool create_convert_nodes(QNNBackend device, Qnn_GraphHandle_t graph_handle, const int rank, + qnn_tensor_array_t &tensor_inputs, qnn_tensor_array_t &tensor_outputs); + bool create_mat_mul_nodes(QNNBackend device, Qnn_GraphHandle_t graph_handle, const int rank, + qnn_tensor_array_t &tensor_inputs, qnn_tensor_array_t &tensor_outputs); + + DISABLE_COPY(ggml_qnn_matmul_op_config); + DISABLE_MOVE(ggml_qnn_matmul_op_config); +}; + +} // namespace qnn diff --git a/ggml/src/ggml-qnn/op-config.hpp b/ggml/src/ggml-qnn/op-config.hpp index ca066520bc..075c56fed6 100644 --- a/ggml/src/ggml-qnn/op-config.hpp +++ b/ggml/src/ggml-qnn/op-config.hpp @@ -12,19 +12,16 @@ namespace qnn { -using ggml_op_constructor_t = - std::function(const std::string &, std::shared_ptr)>; - constexpr const size_t kGgmlUnaryOpStart = GGML_OP_COUNT; size_t get_qnn_op_index(const ggml_tensor *tensor); -void get_ggml_op_output_dimensions(const std::vector &input_dims, size_t op, +void get_ggml_op_output_dimensions(const std::vector &input_dims, const ggml_tensor *op, ggml_dimension_array_t &output_dims); -const char *get_qnn_op_name(size_t op); -size_t get_qnn_op_input_param_count(size_t op); - -ggml_op_constructor_t create_op_constructor(size_t op); +const char *get_qnn_op_name(const ggml_tensor *op); +size_t get_qnn_op_input_param_count(const ggml_tensor *op); +std::shared_ptr create_op(const ggml_tensor *op, const std::string &name, + std::shared_ptr qnn_instance); inline bool add_op_to_graph(Qnn_GraphHandle_t graph_handle, std::vector &operations) { for (auto &op : operations) { @@ -36,127 +33,4 @@ inline bool add_op_to_graph(Qnn_GraphHandle_t graph_handle, std::vector qnn_instance) - : _name(name), _package_name(package_name), _op_type(op_type), _qnn_instance(qnn_instance) {} - - void add_scalar_param(const std::string &name, const Qnn_Scalar_t scalar); - bool add_tensor_param(const std::string &name, const qnn_dimension_array_t &dimensions, int rank, - const uint8_t *data, const Qnn_DataType_t data_type, QNNBackend device, - Qnn_GraphHandle_t graph_handle); - - void set_input_tensors(qnn::qnn_tensor_array_t &tensor_inputs) override; - void set_input_tensors(qnn::qnn_tensor_array_t &&tensor_inputs) override; - void set_output_tensors(qnn::qnn_tensor_array_t &tensor_inputs) override; - void set_output_tensors(qnn::qnn_tensor_array_t &&tensor_inputs) override; - bool add_op_to_graph(Qnn_GraphHandle_t graph_handle) override; - bool bind_input_tensors(const ggml_tensor_array_t &tensor_inputs) override; - bool bind_output_tensors(const ggml_tensor_array_t &tensor_outputs) override; - void unbind_input_tensors() override; - void unbind_output_tensors() override; - const qnn_tensor_array_t &get_input_tensors() override { return _tensor_inputs; } - const qnn_tensor_array_t &get_output_tensors() override { return _tensor_outputs; } - -protected: - Qnn_OpConfig_t get_op_config(); - - std::string _name; - std::string _package_name; - std::string _op_type; - std::shared_ptr _qnn_instance; - qnn_tensor_array_t _tensor_inputs; - qnn_tensor_array_t _tensor_outputs; - qnn_tensor_array_t _tensor_parameters; - std::vector _qnn_tensor_inputs; - std::vector _qnn_tensor_outputs; - std::vector _qnn_parameters; - std::vector _param_names; - - DISABLE_COPY(ggml_qnn_op_config_base); - DISABLE_MOVE(ggml_qnn_op_config_base); -}; - -class ggml_qnn_single_op_config : public ggml_qnn_op_config_base { -public: - explicit ggml_qnn_single_op_config(const std::string &name, const std::string &package_name, - const std::string &op_type, std::shared_ptr qnn_instance) - : ggml_qnn_op_config_base(name, package_name, op_type, qnn_instance) {} - - bool initialize_op_nodes(QNNBackend device, Qnn_GraphHandle_t graph_handle) override; - -private: - DISABLE_COPY(ggml_qnn_single_op_config); - DISABLE_MOVE(ggml_qnn_single_op_config); -}; - -class ggml_qnn_aggregate_op_config : public ggml_qnn_op_config { -public: - explicit ggml_qnn_aggregate_op_config(const std::string &name, std::shared_ptr qnn_instance) - : _name(name), _qnn_instance(qnn_instance) {} - - ~ggml_qnn_aggregate_op_config() { - _tensor_inputs.clear(); - _tensor_outputs.clear(); - _operations.clear(); - } - - void set_input_tensors(qnn::qnn_tensor_array_t &tensor_inputs) override; - void set_input_tensors(qnn::qnn_tensor_array_t &&tensor_inputs) override; - void set_output_tensors(qnn::qnn_tensor_array_t &tensor_inputs) override; - void set_output_tensors(qnn::qnn_tensor_array_t &&tensor_inputs) override; - bool add_op_to_graph(Qnn_GraphHandle_t graph_handle) override { - return qnn::add_op_to_graph(graph_handle, _operations); - } - - bool bind_input_tensors(const ggml_tensor_array_t &tensor_inputs) override; - bool bind_output_tensors(const ggml_tensor_array_t &tensor_outputs) override; - void unbind_input_tensors() override { - for (auto &tensor : _tensor_inputs) { - tensor->unbind(); - } - } - - void unbind_output_tensors() override { - for (auto &tensor : _tensor_outputs) { - tensor->unbind(); - } - } - - const qnn_tensor_array_t &get_input_tensors() override { return _tensor_inputs; } - const qnn_tensor_array_t &get_output_tensors() override { return _tensor_outputs; } - -protected: - std::string _name; - std::shared_ptr _qnn_instance; - - std::vector _operations; - qnn_tensor_array_t _tensor_inputs; - qnn_tensor_array_t _tensor_outputs; - -private: - DISABLE_COPY(ggml_qnn_aggregate_op_config); - DISABLE_MOVE(ggml_qnn_aggregate_op_config); -}; - -class ggml_qnn_matmul_op_config : public ggml_qnn_aggregate_op_config { -public: - ggml_qnn_matmul_op_config(const std::string &name, std::shared_ptr qnn_instance) - : ggml_qnn_aggregate_op_config(name, qnn_instance) {} - - bool initialize_op_nodes(QNNBackend device, Qnn_GraphHandle_t graph_handle) override; - -private: - qnn_tensor_ptr_t create_gather_nodes(QNNBackend device, Qnn_GraphHandle_t graph_handle, const int rank, - qnn_tensor_ptr_t tensor_input, qnn_dimension_array_t output_dimensions); - bool create_convert_nodes(QNNBackend device, Qnn_GraphHandle_t graph_handle, const int rank, - qnn_tensor_array_t &tensor_inputs, qnn_tensor_array_t &tensor_outputs); - bool create_mat_mul_nodes(QNNBackend device, Qnn_GraphHandle_t graph_handle, const int rank, - qnn_tensor_array_t &tensor_inputs, qnn_tensor_array_t &tensor_outputs); - - DISABLE_COPY(ggml_qnn_matmul_op_config); - DISABLE_MOVE(ggml_qnn_matmul_op_config); -}; - } // namespace qnn diff --git a/ggml/src/ggml-qnn/tensor.hpp b/ggml/src/ggml-qnn/tensor.hpp index 3bd86891cb..9720e682c8 100644 --- a/ggml/src/ggml-qnn/tensor.hpp +++ b/ggml/src/ggml-qnn/tensor.hpp @@ -294,9 +294,7 @@ private: new_tensor_type); } - bool should_use_mem_handle() const { - return _device == QNN_BACKEND_NPU && QNN_TENSOR_GET_TYPE(_qnn_tensor) != QNN_TENSOR_TYPE_STATIC; - } + bool should_use_mem_handle() const { return false; } std::string _tensor_name; qnn_buffer_ptr _buffer;