#include "op-config-impl.hpp" namespace { using op_constructor_t = std::shared_ptr (*)(const ggml_tensor *, const std::string &, std::shared_ptr); using op_dims_calc_func_t = void (*)(const std::vector &input_dims, qnn::ggml_dimension_array_t &output_dims); void element_wise_op_dims(const std::vector &input_dims, qnn::ggml_dimension_array_t &output_dims) { for (size_t i = 1; i < std::size(output_dims); i++) { output_dims[i] = input_dims.front()[i]; } } void mat_mul_op_dims(const std::vector &input_dims, qnn::ggml_dimension_array_t &output_dims) { GGML_ASSERT(input_dims.size() == 2); output_dims[0] = input_dims.front()[1]; output_dims[1] = input_dims.back()[1]; } struct qnn_op_caps_t { const char *qnn_op_name = nullptr; const size_t input_param_count = 0; op_dims_calc_func_t calc_dims_func = nullptr; const char *qnn_param_name = nullptr; }; constexpr const qnn_op_caps_t kOpCaps[] = { {}, // GGML_OP_NONE {}, // GGML_OP_DUP { // GGML_OP_ADD QNN_OP_ELEMENT_WISE_ADD, // qnn_op_name 2, // input_param_count element_wise_op_dims, // calc_dims_func }, {}, // GGML_OP_ADD1 {}, // GGML_OP_ACC { // GGML_OP_SUB QNN_OP_ELEMENT_WISE_SUBTRACT, // qnn_op_name 2, // input_param_count element_wise_op_dims, // calc_dims_func }, { // GGML_OP_MUL QNN_OP_ELEMENT_WISE_MULTIPLY, // qnn_op_name 2, // input_param_count element_wise_op_dims, // calc_dims_func }, { // GGML_OP_DIV QNN_OP_ELEMENT_WISE_DIVIDE, // qnn_op_name 2, // input_param_count element_wise_op_dims, // calc_dims_func }, {}, // GGML_OP_SQR { // GGML_OP_SQRT QNN_OP_ELEMENT_WISE_SQUARE_ROOT, // qnn_op_name 1, // input_param_count element_wise_op_dims, // calc_dims_func }, { // GGML_OP_LOG QNN_OP_ELEMENT_WISE_LOG, // qnn_op_name 1, // input_param_count element_wise_op_dims, // calc_dims_func }, {}, // GGML_OP_SIN {}, // GGML_OP_COS {}, // GGML_OP_SUM {}, // GGML_OP_SUM_ROWS {}, // GGML_OP_MEAN {}, // GGML_OP_ARGMAX {}, // GGML_OP_COUNT_EQUAL {}, // GGML_OP_REPEAT {}, // GGML_OP_REPEAT_BACK {}, // GGML_OP_CONCAT {}, // GGML_OP_SILU_BACK {}, // GGML_OP_NORM { // GGML_OP_RMS_NORM QNN_OP_RMS_NORM, // qnn_op_name 1, // input_param_count nullptr, // TODO: calc_dims_func QNN_OP_RMS_NORM_PARAM_EPSILON, // qnn_param_name }, {}, // GGML_OP_RMS_NORM_BACK {}, // GGML_OP_GROUP_NORM { // GGML_OP_MUL_MAT QNN_OP_MAT_MUL, // qnn_op_name 2, // input_param_count mat_mul_op_dims, // calc_dims_func }, {}, // GGML_OP_MUL_MAT_ID {}, // GGML_OP_OUT_PROD {}, // GGML_OP_SCALE {}, // GGML_OP_SET {}, // GGML_OP_CPY {}, // GGML_OP_CONT { // GGML_OP_RESHAPE QNN_OP_RESHAPE, // qnn_op_name 1, // input_param_count nullptr, // TODO: calc_dims_func }, {}, // GGML_OP_VIEW {}, // GGML_OP_PERMUTE {}, // GGML_OP_TRANSPOSE {}, // GGML_OP_GET_ROWS {}, // GGML_OP_GET_ROWS_BACK {}, // GGML_OP_DIAG {}, // GGML_OP_DIAG_MASK_INF {}, // GGML_OP_DIAG_MASK_ZERO {}, // GGML_OP_SOFT_MAX {}, // GGML_OP_SOFT_MAX_BACK {}, // GGML_OP_ROPE {}, // GGML_OP_ROPE_BACK {}, // GGML_OP_CLAMP {}, // GGML_OP_CONV_TRANSPOSE_1D {}, // GGML_OP_IM2COL {}, // GGML_OP_IM2COL_BACK {}, // GGML_OP_CONV_TRANSPOSE_2D {}, // GGML_OP_POOL_1D {}, // GGML_OP_POOL_2D {}, // GGML_OP_POOL_2D_BACK {}, // GGML_OP_UPSCALE {}, // GGML_OP_PAD {}, // GGML_OP_PAD_REFLECT_1D {}, // GGML_OP_ARANGE {}, // GGML_OP_TIMESTEP_EMBEDDING {}, // GGML_OP_ARGSORT {}, // GGML_OP_LEAKY_RELU {}, // GGML_OP_FLASH_ATTN_EXT {}, // GGML_OP_FLASH_ATTN_BACK {}, // GGML_OP_SSM_CONV {}, // GGML_OP_SSM_SCAN {}, // GGML_OP_WIN_PART {}, // GGML_OP_WIN_UNPART {}, // GGML_OP_GET_REL_POS {}, // GGML_OP_ADD_REL_POS {}, // GGML_OP_RWKV_WKV6 {}, // GGML_OP_GATED_LINEAR_ATTN {}, // GGML_OP_UNARY {}, // GGML_OP_MAP_UNARY {}, // GGML_OP_MAP_BINARY {}, // GGML_OP_MAP_CUSTOM1_F32 {}, // GGML_OP_MAP_CUSTOM2_F32 {}, // GGML_OP_MAP_CUSTOM3_F32 {}, // GGML_OP_MAP_CUSTOM1 {}, // GGML_OP_MAP_CUSTOM2 {}, // GGML_OP_MAP_CUSTOM3 {}, // GGML_OP_CROSS_ENTROPY_LOSS {}, // GGML_OP_CROSS_ENTROPY_LOSS_BACK {}, // GGML_OP_OPT_STEP_ADAMW // ggml_unary_op {}, // GGML_UNARY_OP_ABS {}, // GGML_UNARY_OP_SGN {}, // GGML_UNARY_OP_NEG {}, // GGML_UNARY_OP_STEP {}, // GGML_UNARY_OP_TANH {}, // GGML_UNARY_OP_ELU {}, // GGML_UNARY_OP_RELU {}, // GGML_UNARY_OP_SIGMOID { // GGML_UNARY_OP_GELU QNN_OP_GELU, // qnn_op_name 1, // input_param_count nullptr, // TODO: calc_dims_func }, {}, // GGML_UNARY_OP_GELU_QUICK {}, // GGML_UNARY_OP_SILU {}, // GGML_UNARY_OP_HARDSWISH {}, // GGML_UNARY_OP_HARDSIGMOID {}, // GGML_UNARY_OP_EXP }; static_assert(kOpCaps[GGML_OP_NONE].calc_dims_func == nullptr, "GGML_OP_NONE should not have calc_dims_func function"); static_assert(kOpCaps[GGML_OP_ADD].calc_dims_func == element_wise_op_dims, "GGML_OP_ADD does not have element_wise_op_dims function"); static_assert(kOpCaps[GGML_OP_MUL_MAT].calc_dims_func == mat_mul_op_dims, "GGML_OP_ADD does not have element_wise_op_dims function"); static_assert(kOpCaps[GGML_OP_LOG].calc_dims_func == element_wise_op_dims, "GGML_OP_LOG does not have element_wise_op_dims function"); static_assert(kOpCaps[GGML_OP_COUNT + GGML_UNARY_OP_GELU].input_param_count == 1, "GGML_UNARY_OP_GELU does not have 1 input parameter"); static_assert(std::size(kOpCaps) == (GGML_OP_COUNT + GGML_UNARY_OP_COUNT), "GGML_OP_COUNT does not match the size of the kOpCaps table"); std::shared_ptr mat_mul_op_constructor(const ggml_tensor *op, const std::string &instance_name, std::shared_ptr qnn_instance) { GGML_UNUSED(op); QNN_LOG_DEBUG("create QNN_OP_MAT_MUL, name %s", instance_name.c_str()); return std::make_shared(instance_name, qnn_instance); } template std::shared_ptr generic_op_constructor(const ggml_tensor *op, const std::string &instance_name, std::shared_ptr qnn_instance) { GGML_UNUSED(op); static_assert(_op < std::size(kOpCaps)); static_assert(kOpCaps[_op].qnn_op_name != nullptr); return std::make_shared(instance_name, QNN_OP_PACKAGE_NAME_QTI_AISW, kOpCaps[_op].qnn_op_name, qnn_instance); } void add_type_parameters(std::shared_ptr op, const char *name, float value) { Qnn_Scalar_t scalar = QNN_SCALAR_INIT; scalar.dataType = QNN_DATATYPE_FLOAT_32; scalar.floatValue = value; op->add_scalar_param(name, scalar); } template std::shared_ptr op_constructor_with_type_param( const ggml_tensor *op, const std::string &instance_name, std::shared_ptr qnn_instance) { static_assert(std::is_base_of::value); static_assert(_op < std::size(kOpCaps)); constexpr auto &op_caps = kOpCaps[_op]; static_assert(op_caps.qnn_op_name != nullptr); _ggml_op_param_type op_param; memcpy(&op_param, op->op_params, sizeof(op_param)); auto qnn_op = std::make_shared<_qnn_op_type_name>(instance_name, QNN_OP_PACKAGE_NAME_QTI_AISW, op_caps.qnn_op_name, qnn_instance); if (op_caps.qnn_param_name) { add_type_parameters(qnn_op, op_caps.qnn_param_name, op_param); } return qnn_op; } constexpr const op_constructor_t kOpConstructors[] = { nullptr, // GGML_OP_NONE nullptr, // GGML_OP_DUP generic_op_constructor, // GGML_OP_ADD nullptr, // GGML_OP_ADD1 nullptr, // GGML_OP_ACC generic_op_constructor, // GGML_OP_SUB generic_op_constructor, // GGML_OP_MUL generic_op_constructor, // GGML_OP_DIV nullptr, // GGML_OP_SQR generic_op_constructor, // GGML_OP_SQRT generic_op_constructor, // GGML_OP_LOG nullptr, // GGML_OP_SIN nullptr, // GGML_OP_COS nullptr, // GGML_OP_SUM nullptr, // GGML_OP_SUM_ROWS nullptr, // GGML_OP_MEAN nullptr, // GGML_OP_ARGMAX nullptr, // GGML_OP_COUNT_EQUAL nullptr, // GGML_OP_REPEAT nullptr, // GGML_OP_REPEAT_BACK nullptr, // GGML_OP_CONCAT nullptr, // GGML_OP_SILU_BACK nullptr, // GGML_OP_NORM op_constructor_with_type_param, // GGML_OP_RMS_NORM nullptr, // GGML_OP_RMS_NORM_BACK nullptr, // GGML_OP_GROUP_NORM mat_mul_op_constructor, // GGML_OP_MUL_MAT nullptr, // GGML_OP_MUL_MAT_ID nullptr, // GGML_OP_OUT_PROD nullptr, // GGML_OP_SCALE nullptr, // GGML_OP_SET nullptr, // GGML_OP_CPY nullptr, // GGML_OP_CONT generic_op_constructor, // GGML_OP_RESHAPE nullptr, // GGML_OP_VIEW nullptr, // GGML_OP_PERMUTE nullptr, // GGML_OP_TRANSPOSE nullptr, // GGML_OP_GET_ROWS nullptr, // GGML_OP_GET_ROWS_BACK nullptr, // GGML_OP_DIAG nullptr, // GGML_OP_DIAG_MASK_INF nullptr, // GGML_OP_DIAG_MASK_ZERO nullptr, // GGML_OP_SOFT_MAX nullptr, // GGML_OP_SOFT_MAX_BACK nullptr, // GGML_OP_ROPE nullptr, // GGML_OP_ROPE_BACK nullptr, // GGML_OP_CLAMP nullptr, // GGML_OP_CONV_TRANSPOSE_1D nullptr, // GGML_OP_IM2COL nullptr, // GGML_OP_IM2COL_BACK nullptr, // GGML_OP_CONV_TRANSPOSE_2D nullptr, // GGML_OP_POOL_1D nullptr, // GGML_OP_POOL_2D nullptr, // GGML_OP_POOL_2D_BACK nullptr, // GGML_OP_UPSCALE nullptr, // GGML_OP_PAD nullptr, // GGML_OP_PAD_REFLECT_1D nullptr, // GGML_OP_ARANGE nullptr, // GGML_OP_TIMESTEP_EMBEDDING nullptr, // GGML_OP_ARGSORT nullptr, // GGML_OP_LEAKY_RELU nullptr, // GGML_OP_FLASH_ATTN_EXT nullptr, // GGML_OP_FLASH_ATTN_BACK nullptr, // GGML_OP_SSM_CONV nullptr, // GGML_OP_SSM_SCAN nullptr, // GGML_OP_WIN_PART nullptr, // GGML_OP_WIN_UNPART nullptr, // GGML_OP_GET_REL_POS nullptr, // GGML_OP_ADD_REL_POS nullptr, // GGML_OP_RWKV_WKV6 nullptr, // GGML_OP_GATED_LINEAR_ATTN nullptr, // GGML_OP_UNARY nullptr, // GGML_OP_MAP_UNARY nullptr, // GGML_OP_MAP_BINARY nullptr, // GGML_OP_MAP_CUSTOM1_F32 nullptr, // GGML_OP_MAP_CUSTOM2_F32 nullptr, // GGML_OP_MAP_CUSTOM3_F32 nullptr, // GGML_OP_MAP_CUSTOM1 nullptr, // GGML_OP_MAP_CUSTOM2 nullptr, // GGML_OP_MAP_CUSTOM3 nullptr, // GGML_OP_CROSS_ENTROPY_LOSS nullptr, // GGML_OP_CROSS_ENTROPY_LOSS_BACK nullptr, // GGML_OP_OPT_STEP_ADAMW // ggml_unary_op nullptr, // GGML_UNARY_OP_ABS nullptr, // GGML_UNARY_OP_SGN nullptr, // GGML_UNARY_OP_NEG nullptr, // GGML_UNARY_OP_STEP nullptr, // GGML_UNARY_OP_TANH nullptr, // GGML_UNARY_OP_ELU nullptr, // GGML_UNARY_OP_RELU nullptr, // GGML_UNARY_OP_SIGMOID nullptr, // GGML_UNARY_OP_GELU nullptr, // GGML_UNARY_OP_GELU_QUICK nullptr, // GGML_UNARY_OP_SILU nullptr, // GGML_UNARY_OP_HARDSWISH nullptr, // GGML_UNARY_OP_HARDSIGMOID nullptr, // GGML_UNARY_OP_EXP }; static_assert(kOpConstructors[GGML_OP_NONE] == nullptr, "GGML_OP_NONE does not match the nullptr function"); static_assert(kOpConstructors[GGML_OP_ADD] == generic_op_constructor, "GGML_OP_ADD does not match the generic_op_constructor function"); static_assert(kOpConstructors[GGML_OP_MUL_MAT] == mat_mul_op_constructor, "GGML_OP_MUL_MAT does not match the mat_mul_op_constructor function"); static_assert(std::size(kOpConstructors) == (GGML_OP_COUNT + GGML_UNARY_OP_COUNT), "GGML_OP_COUNT does not match the size of the kOpConstructors table"); } // namespace namespace qnn { size_t get_qnn_op_index(const ggml_tensor *tensor) { if (tensor->op == GGML_OP_UNARY) { return kGgmlUnaryOpStart + ggml_get_unary_op(tensor); } return tensor->op; } const char *get_qnn_op_name(const ggml_tensor *op) { auto op_index = get_qnn_op_index(op); GGML_ASSERT(op_index < std::size(kOpCaps)); GGML_ASSERT(kOpCaps[op_index].qnn_op_name); return kOpCaps[op_index].qnn_op_name; } size_t get_qnn_op_input_param_count(const ggml_tensor *op) { auto op_index = get_qnn_op_index(op); GGML_ASSERT(op_index < std::size(kOpCaps)); return kOpCaps[op_index].input_param_count; } std::shared_ptr create_op(const ggml_tensor *op, const std::string &name, std::shared_ptr qnn_instance) { auto op_index = get_qnn_op_index(op); GGML_ASSERT(op_index < std::size(kOpCaps)); auto op_constructor = kOpConstructors[op_index]; GGML_ASSERT(op_constructor); return op_constructor(op, name, qnn_instance); } } // namespace qnn