[feat]add more op support (#18)
* disable rpc buffer for npu * append input/output tensor size into unsupported op log * log dimensions for unsupported tensor * wip * split op config classes into separated file * fix reshape * wip * add op_constructor_with_type_param * set parameter for op_constructor_with_type_param func
This commit is contained in:
parent
5f93376f67
commit
10bd671c08
|
|
@ -25,7 +25,7 @@ bool qnn_is_op_valid(ggml_backend_qnn_device_context *ctx, const ggml_tensor *ds
|
|||
return false;
|
||||
}
|
||||
|
||||
const auto param_count = qnn::get_qnn_op_input_param_count(qnn::get_qnn_op_index(dst));
|
||||
const auto param_count = qnn::get_qnn_op_input_param_count(dst);
|
||||
switch (param_count) {
|
||||
case 1:
|
||||
return dst->src[0];
|
||||
|
|
@ -91,9 +91,13 @@ void get_graph_key_from_op(const ggml_tensor *op, std::string &output) {
|
|||
GGML_ASSERT(op->op != GGML_OP_NONE);
|
||||
output += ggml_op_desc(op);
|
||||
output += qnn::get_ggml_type_name(op->type);
|
||||
const auto param_count = qnn::get_qnn_op_input_param_count(qnn::get_qnn_op_index(op));
|
||||
const auto param_count = qnn::get_qnn_op_input_param_count(op);
|
||||
for (size_t i = 0; i < param_count; ++i) {
|
||||
auto *input = op->src[i];
|
||||
if (!input) {
|
||||
break;
|
||||
}
|
||||
|
||||
output += '_';
|
||||
append_tensor_dimensions(input, output);
|
||||
}
|
||||
|
|
@ -224,7 +228,7 @@ bool qnn_generic_op_impl(ggml_backend_qnn_device_context *ctx, ggml_tensor *dst)
|
|||
|
||||
#ifndef NDEBUG
|
||||
if (!succeed) {
|
||||
const auto param_count = qnn::get_qnn_op_input_param_count(qnn::get_qnn_op_index(dst));
|
||||
const auto param_count = qnn::get_qnn_op_input_param_count(dst);
|
||||
for (size_t i = 0; i < param_count; ++i) {
|
||||
print_ggml_tensor(dst->src[i]);
|
||||
}
|
||||
|
|
@ -409,7 +413,7 @@ bool ggnl_qnn_supports_op_tensor(ggml_backend_qnn_device_context *ctx, const ggm
|
|||
return false;
|
||||
}
|
||||
|
||||
const auto param_count = qnn::get_qnn_op_input_param_count(qnn::get_qnn_op_index(op));
|
||||
const auto param_count = qnn::get_qnn_op_input_param_count(op);
|
||||
for (size_t i = 0; i < param_count; ++i) {
|
||||
if (!ggml_qnn_supports_tensor(ctx, op->src[i])) {
|
||||
return false;
|
||||
|
|
@ -479,12 +483,20 @@ bool device_supports_op(ggml_backend_qnn_device_context *ctx, const ggml_tensor
|
|||
}
|
||||
|
||||
if (!kQnnOpsTable[qnn::get_qnn_op_index(op)]) {
|
||||
QNN_LOG_DEBUG("[%s]unsupported op", ggml_op_name(op->op));
|
||||
#ifndef NDEBUG
|
||||
std::string op_key;
|
||||
get_graph_key_from_op(op, op_key);
|
||||
QNN_LOG_DEBUG("[%s]unsupported op", op_key.c_str());
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!ggnl_qnn_supports_op_tensor(ctx, op)) {
|
||||
QNN_LOG_DEBUG("[%s]unsupported tensor", ggml_op_name(op->op));
|
||||
#ifndef NDEBUG
|
||||
std::string tensor_dims;
|
||||
append_tensor_dimensions(op, tensor_dims);
|
||||
QNN_LOG_DEBUG("[%s]unsupported tensor(%s)", ggml_op_name(op->op), tensor_dims.c_str());
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ using qnn_tensor_cache_t = std::unordered_map<ggml_tensor *, qnn::qnn_tensor_ptr
|
|||
|
||||
int get_op_max_rank(const ggml_tensor *op) {
|
||||
int max_rank = ggml_n_dims(op);
|
||||
const int count = (int)qnn::get_qnn_op_input_param_count(qnn::get_qnn_op_index(op));
|
||||
const int count = (int)qnn::get_qnn_op_input_param_count(op);
|
||||
for (int i = 0; i < count; ++i) {
|
||||
max_rank = std::max(max_rank, ggml_n_dims(op->src[i]));
|
||||
}
|
||||
|
|
@ -56,14 +56,12 @@ qnn::qnn_op_config_ptr_t create_operation_from_op_tensor(ggml_tensor *dst, const
|
|||
QNNBackend device, Qnn_GraphHandle_t graph_handle,
|
||||
std::shared_ptr<qnn::qnn_instance> qnn_instance,
|
||||
bool is_intermediate, qnn_tensor_cache_t &tensor_cache) {
|
||||
const auto op_index = qnn::get_qnn_op_index(dst);
|
||||
auto qnn_op = qnn::create_op_constructor(op_index);
|
||||
auto operation = qnn_op(name, qnn_instance);
|
||||
auto operation = qnn::create_op(dst, name, qnn_instance);
|
||||
|
||||
// input tensors
|
||||
qnn::qnn_tensor_array_t input_qnn_tensors;
|
||||
auto tensor_type = is_intermediate ? qnn::ggml_qnn_tensor::INTERMEDIATE : qnn::ggml_qnn_tensor::INPUT;
|
||||
for (size_t i = 0; i < qnn::get_qnn_op_input_param_count(op_index); ++i) {
|
||||
for (size_t i = 0; i < qnn::get_qnn_op_input_param_count(dst); ++i) {
|
||||
auto input_qnn_tensor =
|
||||
create_tensor_with_cache(dst->src[i], tensor_type, rank, device, graph_handle, qnn_instance, tensor_cache);
|
||||
input_qnn_tensors.push_back(input_qnn_tensor);
|
||||
|
|
@ -92,7 +90,7 @@ bool bind_src_tensors(ggml_tensor *op, qnn::qnn_tensor_array_t &tensor_wrappers,
|
|||
return false;
|
||||
}
|
||||
|
||||
const auto param_count = qnn::get_qnn_op_input_param_count(qnn::get_qnn_op_index(op));
|
||||
const auto param_count = qnn::get_qnn_op_input_param_count(op);
|
||||
GGML_ASSERT(tensor_wrappers.size() == param_count);
|
||||
qnn_tensors.resize(param_count);
|
||||
for (size_t i = 0; i < param_count; ++i) {
|
||||
|
|
@ -268,7 +266,7 @@ bool qnn_graph::build_graph_from_ggml_graph(const ggml_cgraph *cgraph) {
|
|||
continue;
|
||||
}
|
||||
|
||||
QNN_LOG_DEBUG("[%s]create op: %s", get_backend_name(_device), get_qnn_op_name(dst->op));
|
||||
QNN_LOG_DEBUG("[%s]create op: %s", get_backend_name(_device), get_qnn_op_name(dst));
|
||||
auto operation = create_operation_from_op_tensor(dst, dst->name, rank, _device, _graph_handle,
|
||||
_qnn_instance, true, tensor_cache); // TODO: fix op name
|
||||
operations.push_back(operation);
|
||||
|
|
|
|||
|
|
@ -1,8 +1,10 @@
|
|||
|
||||
#include "op-config.hpp"
|
||||
#include "op-config-impl.hpp"
|
||||
|
||||
namespace {
|
||||
|
||||
using op_constructor_t = std::shared_ptr<qnn::ggml_qnn_op_config> (*)(const ggml_tensor *, const std::string &,
|
||||
std::shared_ptr<qnn::qnn_instance>);
|
||||
using op_dims_calc_func_t = void (*)(const std::vector<const qnn::ggml_dimension_array_t> &input_dims,
|
||||
qnn::ggml_dimension_array_t &output_dims);
|
||||
|
||||
|
|
@ -24,6 +26,7 @@ struct qnn_op_caps_t {
|
|||
const char *qnn_op_name = nullptr;
|
||||
const size_t input_param_count = 0;
|
||||
op_dims_calc_func_t calc_dims_func = nullptr;
|
||||
const char *qnn_param_name = nullptr;
|
||||
};
|
||||
|
||||
constexpr const qnn_op_caps_t kOpCaps[] = {
|
||||
|
|
@ -80,7 +83,13 @@ constexpr const qnn_op_caps_t kOpCaps[] = {
|
|||
{}, // GGML_OP_CONCAT
|
||||
{}, // GGML_OP_SILU_BACK
|
||||
{}, // GGML_OP_NORM
|
||||
{}, // GGML_OP_RMS_NORM
|
||||
{
|
||||
// GGML_OP_RMS_NORM
|
||||
QNN_OP_RMS_NORM, // qnn_op_name
|
||||
1, // input_param_count
|
||||
nullptr, // TODO: calc_dims_func
|
||||
QNN_OP_RMS_NORM_PARAM_EPSILON, // qnn_param_name
|
||||
},
|
||||
{}, // GGML_OP_RMS_NORM_BACK
|
||||
{}, // GGML_OP_GROUP_NORM
|
||||
{
|
||||
|
|
@ -187,9 +196,172 @@ static_assert(kOpCaps[GGML_OP_MUL_MAT].calc_dims_func == mat_mul_op_dims,
|
|||
"GGML_OP_ADD does not have element_wise_op_dims function");
|
||||
static_assert(kOpCaps[GGML_OP_LOG].calc_dims_func == element_wise_op_dims,
|
||||
"GGML_OP_LOG does not have element_wise_op_dims function");
|
||||
static_assert(kOpCaps[GGML_OP_COUNT + GGML_UNARY_OP_GELU].input_param_count == 1,
|
||||
"GGML_UNARY_OP_GELU does not have 1 input parameter");
|
||||
static_assert(std::size(kOpCaps) == (GGML_OP_COUNT + GGML_UNARY_OP_COUNT),
|
||||
"GGML_OP_COUNT does not match the size of the kOpCaps table");
|
||||
|
||||
std::shared_ptr<qnn::ggml_qnn_op_config> mat_mul_op_constructor(const ggml_tensor *op, const std::string &instance_name,
|
||||
std::shared_ptr<qnn::qnn_instance> qnn_instance) {
|
||||
GGML_UNUSED(op);
|
||||
QNN_LOG_DEBUG("create QNN_OP_MAT_MUL, name %s", instance_name.c_str());
|
||||
return std::make_shared<qnn::ggml_qnn_matmul_op_config>(instance_name, qnn_instance);
|
||||
}
|
||||
|
||||
template <size_t _op>
|
||||
std::shared_ptr<qnn::ggml_qnn_op_config> generic_op_constructor(const ggml_tensor *op, const std::string &instance_name,
|
||||
std::shared_ptr<qnn::qnn_instance> qnn_instance) {
|
||||
GGML_UNUSED(op);
|
||||
static_assert(_op < std::size(kOpCaps));
|
||||
static_assert(kOpCaps[_op].qnn_op_name != nullptr);
|
||||
return std::make_shared<qnn::ggml_qnn_single_op_config>(instance_name, QNN_OP_PACKAGE_NAME_QTI_AISW,
|
||||
kOpCaps[_op].qnn_op_name, qnn_instance);
|
||||
}
|
||||
|
||||
void add_type_parameters(std::shared_ptr<qnn::ggml_qnn_op_config_base> op, const char *name, float value) {
|
||||
Qnn_Scalar_t scalar = QNN_SCALAR_INIT;
|
||||
scalar.dataType = QNN_DATATYPE_FLOAT_32;
|
||||
scalar.floatValue = value;
|
||||
op->add_scalar_param(name, scalar);
|
||||
}
|
||||
|
||||
template <size_t _op, typename _ggml_op_param_type, typename _qnn_op_type_name>
|
||||
std::shared_ptr<qnn::ggml_qnn_op_config> op_constructor_with_type_param(
|
||||
const ggml_tensor *op, const std::string &instance_name, std::shared_ptr<qnn::qnn_instance> qnn_instance) {
|
||||
static_assert(std::is_base_of<qnn::ggml_qnn_op_config_base, _qnn_op_type_name>::value);
|
||||
static_assert(_op < std::size(kOpCaps));
|
||||
|
||||
constexpr auto &op_caps = kOpCaps[_op];
|
||||
static_assert(op_caps.qnn_op_name != nullptr);
|
||||
|
||||
_ggml_op_param_type op_param;
|
||||
memcpy(&op_param, op->op_params, sizeof(op_param));
|
||||
auto qnn_op = std::make_shared<_qnn_op_type_name>(instance_name, QNN_OP_PACKAGE_NAME_QTI_AISW, op_caps.qnn_op_name,
|
||||
qnn_instance);
|
||||
if (op_caps.qnn_param_name) {
|
||||
add_type_parameters(qnn_op, op_caps.qnn_param_name, op_param);
|
||||
}
|
||||
return qnn_op;
|
||||
}
|
||||
|
||||
constexpr const op_constructor_t kOpConstructors[] = {
|
||||
nullptr, // GGML_OP_NONE
|
||||
nullptr, // GGML_OP_DUP
|
||||
generic_op_constructor<GGML_OP_ADD>, // GGML_OP_ADD
|
||||
nullptr, // GGML_OP_ADD1
|
||||
nullptr, // GGML_OP_ACC
|
||||
generic_op_constructor<GGML_OP_SUB>, // GGML_OP_SUB
|
||||
generic_op_constructor<GGML_OP_MUL>, // GGML_OP_MUL
|
||||
generic_op_constructor<GGML_OP_DIV>, // GGML_OP_DIV
|
||||
nullptr, // GGML_OP_SQR
|
||||
generic_op_constructor<GGML_OP_SQRT>, // GGML_OP_SQRT
|
||||
generic_op_constructor<GGML_OP_LOG>, // GGML_OP_LOG
|
||||
nullptr, // GGML_OP_SIN
|
||||
nullptr, // GGML_OP_COS
|
||||
nullptr, // GGML_OP_SUM
|
||||
nullptr, // GGML_OP_SUM_ROWS
|
||||
nullptr, // GGML_OP_MEAN
|
||||
nullptr, // GGML_OP_ARGMAX
|
||||
nullptr, // GGML_OP_COUNT_EQUAL
|
||||
nullptr, // GGML_OP_REPEAT
|
||||
nullptr, // GGML_OP_REPEAT_BACK
|
||||
nullptr, // GGML_OP_CONCAT
|
||||
nullptr, // GGML_OP_SILU_BACK
|
||||
nullptr, // GGML_OP_NORM
|
||||
op_constructor_with_type_param<GGML_OP_RMS_NORM, float, qnn::ggml_qnn_rmsnorm_op_config>, // GGML_OP_RMS_NORM
|
||||
nullptr, // GGML_OP_RMS_NORM_BACK
|
||||
nullptr, // GGML_OP_GROUP_NORM
|
||||
|
||||
mat_mul_op_constructor, // GGML_OP_MUL_MAT
|
||||
nullptr, // GGML_OP_MUL_MAT_ID
|
||||
nullptr, // GGML_OP_OUT_PROD
|
||||
|
||||
nullptr, // GGML_OP_SCALE
|
||||
nullptr, // GGML_OP_SET
|
||||
nullptr, // GGML_OP_CPY
|
||||
nullptr, // GGML_OP_CONT
|
||||
generic_op_constructor<GGML_OP_RESHAPE>, // GGML_OP_RESHAPE
|
||||
nullptr, // GGML_OP_VIEW
|
||||
nullptr, // GGML_OP_PERMUTE
|
||||
nullptr, // GGML_OP_TRANSPOSE
|
||||
nullptr, // GGML_OP_GET_ROWS
|
||||
nullptr, // GGML_OP_GET_ROWS_BACK
|
||||
nullptr, // GGML_OP_DIAG
|
||||
nullptr, // GGML_OP_DIAG_MASK_INF
|
||||
nullptr, // GGML_OP_DIAG_MASK_ZERO
|
||||
nullptr, // GGML_OP_SOFT_MAX
|
||||
nullptr, // GGML_OP_SOFT_MAX_BACK
|
||||
nullptr, // GGML_OP_ROPE
|
||||
nullptr, // GGML_OP_ROPE_BACK
|
||||
nullptr, // GGML_OP_CLAMP
|
||||
nullptr, // GGML_OP_CONV_TRANSPOSE_1D
|
||||
nullptr, // GGML_OP_IM2COL
|
||||
nullptr, // GGML_OP_IM2COL_BACK
|
||||
nullptr, // GGML_OP_CONV_TRANSPOSE_2D
|
||||
nullptr, // GGML_OP_POOL_1D
|
||||
nullptr, // GGML_OP_POOL_2D
|
||||
nullptr, // GGML_OP_POOL_2D_BACK
|
||||
nullptr, // GGML_OP_UPSCALE
|
||||
nullptr, // GGML_OP_PAD
|
||||
nullptr, // GGML_OP_PAD_REFLECT_1D
|
||||
nullptr, // GGML_OP_ARANGE
|
||||
nullptr, // GGML_OP_TIMESTEP_EMBEDDING
|
||||
nullptr, // GGML_OP_ARGSORT
|
||||
nullptr, // GGML_OP_LEAKY_RELU
|
||||
|
||||
nullptr, // GGML_OP_FLASH_ATTN_EXT
|
||||
nullptr, // GGML_OP_FLASH_ATTN_BACK
|
||||
nullptr, // GGML_OP_SSM_CONV
|
||||
nullptr, // GGML_OP_SSM_SCAN
|
||||
nullptr, // GGML_OP_WIN_PART
|
||||
nullptr, // GGML_OP_WIN_UNPART
|
||||
nullptr, // GGML_OP_GET_REL_POS
|
||||
nullptr, // GGML_OP_ADD_REL_POS
|
||||
nullptr, // GGML_OP_RWKV_WKV6
|
||||
nullptr, // GGML_OP_GATED_LINEAR_ATTN
|
||||
|
||||
nullptr, // GGML_OP_UNARY
|
||||
|
||||
nullptr, // GGML_OP_MAP_UNARY
|
||||
nullptr, // GGML_OP_MAP_BINARY
|
||||
|
||||
nullptr, // GGML_OP_MAP_CUSTOM1_F32
|
||||
nullptr, // GGML_OP_MAP_CUSTOM2_F32
|
||||
nullptr, // GGML_OP_MAP_CUSTOM3_F32
|
||||
|
||||
nullptr, // GGML_OP_MAP_CUSTOM1
|
||||
nullptr, // GGML_OP_MAP_CUSTOM2
|
||||
nullptr, // GGML_OP_MAP_CUSTOM3
|
||||
|
||||
nullptr, // GGML_OP_CROSS_ENTROPY_LOSS
|
||||
nullptr, // GGML_OP_CROSS_ENTROPY_LOSS_BACK
|
||||
nullptr, // GGML_OP_OPT_STEP_ADAMW
|
||||
|
||||
// ggml_unary_op
|
||||
nullptr, // GGML_UNARY_OP_ABS
|
||||
nullptr, // GGML_UNARY_OP_SGN
|
||||
nullptr, // GGML_UNARY_OP_NEG
|
||||
nullptr, // GGML_UNARY_OP_STEP
|
||||
nullptr, // GGML_UNARY_OP_TANH
|
||||
nullptr, // GGML_UNARY_OP_ELU
|
||||
nullptr, // GGML_UNARY_OP_RELU
|
||||
nullptr, // GGML_UNARY_OP_SIGMOID
|
||||
nullptr, // GGML_UNARY_OP_GELU
|
||||
nullptr, // GGML_UNARY_OP_GELU_QUICK
|
||||
nullptr, // GGML_UNARY_OP_SILU
|
||||
nullptr, // GGML_UNARY_OP_HARDSWISH
|
||||
nullptr, // GGML_UNARY_OP_HARDSIGMOID
|
||||
nullptr, // GGML_UNARY_OP_EXP
|
||||
};
|
||||
|
||||
static_assert(kOpConstructors[GGML_OP_NONE] == nullptr, "GGML_OP_NONE does not match the nullptr function");
|
||||
static_assert(kOpConstructors[GGML_OP_ADD] == generic_op_constructor<GGML_OP_ADD>,
|
||||
"GGML_OP_ADD does not match the generic_op_constructor<GGML_OP_ADD> function");
|
||||
static_assert(kOpConstructors[GGML_OP_MUL_MAT] == mat_mul_op_constructor,
|
||||
"GGML_OP_MUL_MAT does not match the mat_mul_op_constructor function");
|
||||
static_assert(std::size(kOpConstructors) == (GGML_OP_COUNT + GGML_UNARY_OP_COUNT),
|
||||
"GGML_OP_COUNT does not match the size of the kOpConstructors table");
|
||||
|
||||
} // namespace
|
||||
|
||||
namespace qnn {
|
||||
|
|
@ -202,23 +374,35 @@ size_t get_qnn_op_index(const ggml_tensor *tensor) {
|
|||
return tensor->op;
|
||||
}
|
||||
|
||||
void get_ggml_op_output_dimensions(const std::vector<const ggml_dimension_array_t> &input_dims, size_t op,
|
||||
void get_ggml_op_output_dimensions(const std::vector<const ggml_dimension_array_t> &input_dims, const ggml_tensor *op,
|
||||
ggml_dimension_array_t &output_dims) {
|
||||
GGML_ASSERT(op < std::size(kOpCaps));
|
||||
auto get_dims = kOpCaps[op].calc_dims_func;
|
||||
auto op_index = get_qnn_op_index(op);
|
||||
GGML_ASSERT(op_index < std::size(kOpCaps));
|
||||
auto get_dims = kOpCaps[op_index].calc_dims_func;
|
||||
GGML_ASSERT(get_dims);
|
||||
get_dims(input_dims, output_dims);
|
||||
}
|
||||
|
||||
const char *get_qnn_op_name(size_t op) {
|
||||
GGML_ASSERT(op < std::size(kOpCaps));
|
||||
GGML_ASSERT(kOpCaps[op].qnn_op_name);
|
||||
return kOpCaps[op].qnn_op_name;
|
||||
const char *get_qnn_op_name(const ggml_tensor *op) {
|
||||
auto op_index = get_qnn_op_index(op);
|
||||
GGML_ASSERT(op_index < std::size(kOpCaps));
|
||||
GGML_ASSERT(kOpCaps[op_index].qnn_op_name);
|
||||
return kOpCaps[op_index].qnn_op_name;
|
||||
}
|
||||
|
||||
size_t get_qnn_op_input_param_count(size_t op) {
|
||||
GGML_ASSERT(op < std::size(kOpCaps));
|
||||
return kOpCaps[op].input_param_count;
|
||||
size_t get_qnn_op_input_param_count(const ggml_tensor *op) {
|
||||
auto op_index = get_qnn_op_index(op);
|
||||
GGML_ASSERT(op_index < std::size(kOpCaps));
|
||||
return kOpCaps[op_index].input_param_count;
|
||||
}
|
||||
|
||||
std::shared_ptr<ggml_qnn_op_config> create_op(const ggml_tensor *op, const std::string &name,
|
||||
std::shared_ptr<qnn_instance> qnn_instance) {
|
||||
auto op_index = get_qnn_op_index(op);
|
||||
GGML_ASSERT(op_index < std::size(kOpCaps));
|
||||
auto op_constructor = kOpConstructors[op_index];
|
||||
GGML_ASSERT(op_constructor);
|
||||
return op_constructor(op, name, qnn_instance);
|
||||
}
|
||||
|
||||
} // namespace qnn
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
#include "op-config.hpp"
|
||||
#include "op-config-impl.hpp"
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
|
|
@ -187,6 +187,13 @@ bool ggml_qnn_single_op_config::initialize_op_nodes(QNNBackend device, Qnn_Graph
|
|||
return true;
|
||||
}
|
||||
|
||||
bool ggml_qnn_rmsnorm_op_config::initialize_op_nodes(QNNBackend device, Qnn_GraphHandle_t graph_handle) {
|
||||
constexpr const uint32_t kAxes[] = {0};
|
||||
add_tensor_param(QNN_OP_RMS_NORM_PARAM_AXES, {1}, 1, reinterpret_cast<const uint8_t *>(kAxes), QNN_DATATYPE_UINT_32,
|
||||
device, graph_handle);
|
||||
return true;
|
||||
}
|
||||
|
||||
void ggml_qnn_aggregate_op_config::set_input_tensors(qnn::qnn_tensor_array_t &tensor_inputs) {
|
||||
_tensor_inputs = tensor_inputs;
|
||||
}
|
||||
|
|
@ -439,22 +446,4 @@ bool ggml_qnn_matmul_op_config::create_mat_mul_nodes(QNNBackend device, Qnn_Grap
|
|||
return true;
|
||||
}
|
||||
|
||||
ggml_op_constructor_t create_op_constructor(size_t op) {
|
||||
std::string op_name = get_qnn_op_name(op);
|
||||
if (op_name == QNN_OP_MAT_MUL) {
|
||||
// For QNN_OP_MAT_MUL, we need to transpose the input tensor
|
||||
return [](const std::string &instance_name,
|
||||
std::shared_ptr<qnn::qnn_instance> qnn_instance) -> std::shared_ptr<qnn::ggml_qnn_op_config> {
|
||||
QNN_LOG_DEBUG("create QNN_OP_MAT_MUL, name %s", instance_name.c_str());
|
||||
return std::make_shared<qnn::ggml_qnn_matmul_op_config>(instance_name, qnn_instance);
|
||||
};
|
||||
}
|
||||
|
||||
return [op_name](const std::string &instance_name,
|
||||
std::shared_ptr<qnn::qnn_instance> qnn_instance) -> std::shared_ptr<qnn::ggml_qnn_op_config> {
|
||||
return std::make_shared<qnn::ggml_qnn_single_op_config>(instance_name, QNN_OP_PACKAGE_NAME_QTI_AISW, op_name,
|
||||
qnn_instance);
|
||||
};
|
||||
}
|
||||
|
||||
} // namespace qnn
|
||||
|
|
@ -0,0 +1,151 @@
|
|||
#pragma once
|
||||
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "op-config.hpp"
|
||||
#include "qnn-lib.hpp"
|
||||
#include "qnn-types.hpp"
|
||||
#include "tensor.hpp"
|
||||
|
||||
namespace qnn {
|
||||
|
||||
class ggml_qnn_op_config_base : public ggml_qnn_op_config {
|
||||
public:
|
||||
explicit ggml_qnn_op_config_base(const std::string &name, const std::string &package_name,
|
||||
const std::string &op_type, std::shared_ptr<qnn_instance> qnn_instance)
|
||||
: _name(name), _package_name(package_name), _op_type(op_type), _qnn_instance(qnn_instance) {}
|
||||
|
||||
void add_scalar_param(const std::string &name, const Qnn_Scalar_t scalar);
|
||||
bool add_tensor_param(const std::string &name, const qnn_dimension_array_t &dimensions, int rank,
|
||||
const uint8_t *data, const Qnn_DataType_t data_type, QNNBackend device,
|
||||
Qnn_GraphHandle_t graph_handle);
|
||||
|
||||
void set_input_tensors(qnn::qnn_tensor_array_t &tensor_inputs) override;
|
||||
void set_input_tensors(qnn::qnn_tensor_array_t &&tensor_inputs) override;
|
||||
void set_output_tensors(qnn::qnn_tensor_array_t &tensor_inputs) override;
|
||||
void set_output_tensors(qnn::qnn_tensor_array_t &&tensor_inputs) override;
|
||||
bool add_op_to_graph(Qnn_GraphHandle_t graph_handle) override;
|
||||
bool bind_input_tensors(const ggml_tensor_array_t &tensor_inputs) override;
|
||||
bool bind_output_tensors(const ggml_tensor_array_t &tensor_outputs) override;
|
||||
void unbind_input_tensors() override;
|
||||
void unbind_output_tensors() override;
|
||||
const qnn_tensor_array_t &get_input_tensors() override { return _tensor_inputs; }
|
||||
const qnn_tensor_array_t &get_output_tensors() override { return _tensor_outputs; }
|
||||
|
||||
protected:
|
||||
Qnn_OpConfig_t get_op_config();
|
||||
|
||||
std::string _name;
|
||||
std::string _package_name;
|
||||
std::string _op_type;
|
||||
std::shared_ptr<qnn_instance> _qnn_instance;
|
||||
qnn_tensor_array_t _tensor_inputs;
|
||||
qnn_tensor_array_t _tensor_outputs;
|
||||
qnn_tensor_array_t _tensor_parameters;
|
||||
std::vector<Qnn_Tensor_t> _qnn_tensor_inputs;
|
||||
std::vector<Qnn_Tensor_t> _qnn_tensor_outputs;
|
||||
std::vector<Qnn_Param_t> _qnn_parameters;
|
||||
std::vector<std::string> _param_names;
|
||||
|
||||
DISABLE_COPY(ggml_qnn_op_config_base);
|
||||
DISABLE_MOVE(ggml_qnn_op_config_base);
|
||||
};
|
||||
|
||||
class ggml_qnn_single_op_config : public ggml_qnn_op_config_base {
|
||||
public:
|
||||
explicit ggml_qnn_single_op_config(const std::string &name, const std::string &package_name,
|
||||
const std::string &op_type, std::shared_ptr<qnn_instance> qnn_instance)
|
||||
: ggml_qnn_op_config_base(name, package_name, op_type, qnn_instance) {}
|
||||
|
||||
bool initialize_op_nodes(QNNBackend device, Qnn_GraphHandle_t graph_handle) override;
|
||||
|
||||
private:
|
||||
DISABLE_COPY(ggml_qnn_single_op_config);
|
||||
DISABLE_MOVE(ggml_qnn_single_op_config);
|
||||
};
|
||||
|
||||
class ggml_qnn_rmsnorm_op_config : public ggml_qnn_op_config_base {
|
||||
public:
|
||||
explicit ggml_qnn_rmsnorm_op_config(const std::string &name, const std::string &package_name,
|
||||
const std::string &op_type, std::shared_ptr<qnn_instance> qnn_instance)
|
||||
: ggml_qnn_op_config_base(name, package_name, op_type, qnn_instance) {}
|
||||
|
||||
bool initialize_op_nodes(QNNBackend device, Qnn_GraphHandle_t graph_handle) override;
|
||||
|
||||
private:
|
||||
DISABLE_COPY(ggml_qnn_rmsnorm_op_config);
|
||||
DISABLE_MOVE(ggml_qnn_rmsnorm_op_config);
|
||||
};
|
||||
|
||||
class ggml_qnn_aggregate_op_config : public ggml_qnn_op_config {
|
||||
public:
|
||||
explicit ggml_qnn_aggregate_op_config(const std::string &name, std::shared_ptr<qnn_instance> qnn_instance)
|
||||
: _name(name), _qnn_instance(qnn_instance) {}
|
||||
|
||||
~ggml_qnn_aggregate_op_config() {
|
||||
_tensor_inputs.clear();
|
||||
_tensor_outputs.clear();
|
||||
_operations.clear();
|
||||
}
|
||||
|
||||
void set_input_tensors(qnn::qnn_tensor_array_t &tensor_inputs) override;
|
||||
void set_input_tensors(qnn::qnn_tensor_array_t &&tensor_inputs) override;
|
||||
void set_output_tensors(qnn::qnn_tensor_array_t &tensor_inputs) override;
|
||||
void set_output_tensors(qnn::qnn_tensor_array_t &&tensor_inputs) override;
|
||||
bool add_op_to_graph(Qnn_GraphHandle_t graph_handle) override {
|
||||
return qnn::add_op_to_graph(graph_handle, _operations);
|
||||
}
|
||||
|
||||
bool bind_input_tensors(const ggml_tensor_array_t &tensor_inputs) override;
|
||||
bool bind_output_tensors(const ggml_tensor_array_t &tensor_outputs) override;
|
||||
void unbind_input_tensors() override {
|
||||
for (auto &tensor : _tensor_inputs) {
|
||||
tensor->unbind();
|
||||
}
|
||||
}
|
||||
|
||||
void unbind_output_tensors() override {
|
||||
for (auto &tensor : _tensor_outputs) {
|
||||
tensor->unbind();
|
||||
}
|
||||
}
|
||||
|
||||
const qnn_tensor_array_t &get_input_tensors() override { return _tensor_inputs; }
|
||||
const qnn_tensor_array_t &get_output_tensors() override { return _tensor_outputs; }
|
||||
|
||||
protected:
|
||||
std::string _name;
|
||||
std::shared_ptr<qnn_instance> _qnn_instance;
|
||||
|
||||
std::vector<qnn_op_config_ptr_t> _operations;
|
||||
qnn_tensor_array_t _tensor_inputs;
|
||||
qnn_tensor_array_t _tensor_outputs;
|
||||
|
||||
private:
|
||||
DISABLE_COPY(ggml_qnn_aggregate_op_config);
|
||||
DISABLE_MOVE(ggml_qnn_aggregate_op_config);
|
||||
};
|
||||
|
||||
class ggml_qnn_matmul_op_config : public ggml_qnn_aggregate_op_config {
|
||||
public:
|
||||
ggml_qnn_matmul_op_config(const std::string &name, std::shared_ptr<qnn_instance> qnn_instance)
|
||||
: ggml_qnn_aggregate_op_config(name, qnn_instance) {}
|
||||
|
||||
bool initialize_op_nodes(QNNBackend device, Qnn_GraphHandle_t graph_handle) override;
|
||||
|
||||
private:
|
||||
qnn_tensor_ptr_t create_gather_nodes(QNNBackend device, Qnn_GraphHandle_t graph_handle, const int rank,
|
||||
qnn_tensor_ptr_t tensor_input, qnn_dimension_array_t output_dimensions);
|
||||
bool create_convert_nodes(QNNBackend device, Qnn_GraphHandle_t graph_handle, const int rank,
|
||||
qnn_tensor_array_t &tensor_inputs, qnn_tensor_array_t &tensor_outputs);
|
||||
bool create_mat_mul_nodes(QNNBackend device, Qnn_GraphHandle_t graph_handle, const int rank,
|
||||
qnn_tensor_array_t &tensor_inputs, qnn_tensor_array_t &tensor_outputs);
|
||||
|
||||
DISABLE_COPY(ggml_qnn_matmul_op_config);
|
||||
DISABLE_MOVE(ggml_qnn_matmul_op_config);
|
||||
};
|
||||
|
||||
} // namespace qnn
|
||||
|
|
@ -12,19 +12,16 @@
|
|||
|
||||
namespace qnn {
|
||||
|
||||
using ggml_op_constructor_t =
|
||||
std::function<std::shared_ptr<ggml_qnn_op_config>(const std::string &, std::shared_ptr<qnn_instance>)>;
|
||||
|
||||
constexpr const size_t kGgmlUnaryOpStart = GGML_OP_COUNT;
|
||||
|
||||
size_t get_qnn_op_index(const ggml_tensor *tensor);
|
||||
void get_ggml_op_output_dimensions(const std::vector<const ggml_dimension_array_t> &input_dims, size_t op,
|
||||
void get_ggml_op_output_dimensions(const std::vector<const ggml_dimension_array_t> &input_dims, const ggml_tensor *op,
|
||||
ggml_dimension_array_t &output_dims);
|
||||
|
||||
const char *get_qnn_op_name(size_t op);
|
||||
size_t get_qnn_op_input_param_count(size_t op);
|
||||
|
||||
ggml_op_constructor_t create_op_constructor(size_t op);
|
||||
const char *get_qnn_op_name(const ggml_tensor *op);
|
||||
size_t get_qnn_op_input_param_count(const ggml_tensor *op);
|
||||
std::shared_ptr<ggml_qnn_op_config> create_op(const ggml_tensor *op, const std::string &name,
|
||||
std::shared_ptr<qnn_instance> qnn_instance);
|
||||
|
||||
inline bool add_op_to_graph(Qnn_GraphHandle_t graph_handle, std::vector<qnn_op_config_ptr_t> &operations) {
|
||||
for (auto &op : operations) {
|
||||
|
|
@ -36,127 +33,4 @@ inline bool add_op_to_graph(Qnn_GraphHandle_t graph_handle, std::vector<qnn_op_c
|
|||
return true;
|
||||
}
|
||||
|
||||
class ggml_qnn_op_config_base : public ggml_qnn_op_config {
|
||||
public:
|
||||
explicit ggml_qnn_op_config_base(const std::string &name, const std::string &package_name,
|
||||
const std::string &op_type, std::shared_ptr<qnn_instance> qnn_instance)
|
||||
: _name(name), _package_name(package_name), _op_type(op_type), _qnn_instance(qnn_instance) {}
|
||||
|
||||
void add_scalar_param(const std::string &name, const Qnn_Scalar_t scalar);
|
||||
bool add_tensor_param(const std::string &name, const qnn_dimension_array_t &dimensions, int rank,
|
||||
const uint8_t *data, const Qnn_DataType_t data_type, QNNBackend device,
|
||||
Qnn_GraphHandle_t graph_handle);
|
||||
|
||||
void set_input_tensors(qnn::qnn_tensor_array_t &tensor_inputs) override;
|
||||
void set_input_tensors(qnn::qnn_tensor_array_t &&tensor_inputs) override;
|
||||
void set_output_tensors(qnn::qnn_tensor_array_t &tensor_inputs) override;
|
||||
void set_output_tensors(qnn::qnn_tensor_array_t &&tensor_inputs) override;
|
||||
bool add_op_to_graph(Qnn_GraphHandle_t graph_handle) override;
|
||||
bool bind_input_tensors(const ggml_tensor_array_t &tensor_inputs) override;
|
||||
bool bind_output_tensors(const ggml_tensor_array_t &tensor_outputs) override;
|
||||
void unbind_input_tensors() override;
|
||||
void unbind_output_tensors() override;
|
||||
const qnn_tensor_array_t &get_input_tensors() override { return _tensor_inputs; }
|
||||
const qnn_tensor_array_t &get_output_tensors() override { return _tensor_outputs; }
|
||||
|
||||
protected:
|
||||
Qnn_OpConfig_t get_op_config();
|
||||
|
||||
std::string _name;
|
||||
std::string _package_name;
|
||||
std::string _op_type;
|
||||
std::shared_ptr<qnn_instance> _qnn_instance;
|
||||
qnn_tensor_array_t _tensor_inputs;
|
||||
qnn_tensor_array_t _tensor_outputs;
|
||||
qnn_tensor_array_t _tensor_parameters;
|
||||
std::vector<Qnn_Tensor_t> _qnn_tensor_inputs;
|
||||
std::vector<Qnn_Tensor_t> _qnn_tensor_outputs;
|
||||
std::vector<Qnn_Param_t> _qnn_parameters;
|
||||
std::vector<std::string> _param_names;
|
||||
|
||||
DISABLE_COPY(ggml_qnn_op_config_base);
|
||||
DISABLE_MOVE(ggml_qnn_op_config_base);
|
||||
};
|
||||
|
||||
class ggml_qnn_single_op_config : public ggml_qnn_op_config_base {
|
||||
public:
|
||||
explicit ggml_qnn_single_op_config(const std::string &name, const std::string &package_name,
|
||||
const std::string &op_type, std::shared_ptr<qnn_instance> qnn_instance)
|
||||
: ggml_qnn_op_config_base(name, package_name, op_type, qnn_instance) {}
|
||||
|
||||
bool initialize_op_nodes(QNNBackend device, Qnn_GraphHandle_t graph_handle) override;
|
||||
|
||||
private:
|
||||
DISABLE_COPY(ggml_qnn_single_op_config);
|
||||
DISABLE_MOVE(ggml_qnn_single_op_config);
|
||||
};
|
||||
|
||||
class ggml_qnn_aggregate_op_config : public ggml_qnn_op_config {
|
||||
public:
|
||||
explicit ggml_qnn_aggregate_op_config(const std::string &name, std::shared_ptr<qnn_instance> qnn_instance)
|
||||
: _name(name), _qnn_instance(qnn_instance) {}
|
||||
|
||||
~ggml_qnn_aggregate_op_config() {
|
||||
_tensor_inputs.clear();
|
||||
_tensor_outputs.clear();
|
||||
_operations.clear();
|
||||
}
|
||||
|
||||
void set_input_tensors(qnn::qnn_tensor_array_t &tensor_inputs) override;
|
||||
void set_input_tensors(qnn::qnn_tensor_array_t &&tensor_inputs) override;
|
||||
void set_output_tensors(qnn::qnn_tensor_array_t &tensor_inputs) override;
|
||||
void set_output_tensors(qnn::qnn_tensor_array_t &&tensor_inputs) override;
|
||||
bool add_op_to_graph(Qnn_GraphHandle_t graph_handle) override {
|
||||
return qnn::add_op_to_graph(graph_handle, _operations);
|
||||
}
|
||||
|
||||
bool bind_input_tensors(const ggml_tensor_array_t &tensor_inputs) override;
|
||||
bool bind_output_tensors(const ggml_tensor_array_t &tensor_outputs) override;
|
||||
void unbind_input_tensors() override {
|
||||
for (auto &tensor : _tensor_inputs) {
|
||||
tensor->unbind();
|
||||
}
|
||||
}
|
||||
|
||||
void unbind_output_tensors() override {
|
||||
for (auto &tensor : _tensor_outputs) {
|
||||
tensor->unbind();
|
||||
}
|
||||
}
|
||||
|
||||
const qnn_tensor_array_t &get_input_tensors() override { return _tensor_inputs; }
|
||||
const qnn_tensor_array_t &get_output_tensors() override { return _tensor_outputs; }
|
||||
|
||||
protected:
|
||||
std::string _name;
|
||||
std::shared_ptr<qnn_instance> _qnn_instance;
|
||||
|
||||
std::vector<qnn_op_config_ptr_t> _operations;
|
||||
qnn_tensor_array_t _tensor_inputs;
|
||||
qnn_tensor_array_t _tensor_outputs;
|
||||
|
||||
private:
|
||||
DISABLE_COPY(ggml_qnn_aggregate_op_config);
|
||||
DISABLE_MOVE(ggml_qnn_aggregate_op_config);
|
||||
};
|
||||
|
||||
class ggml_qnn_matmul_op_config : public ggml_qnn_aggregate_op_config {
|
||||
public:
|
||||
ggml_qnn_matmul_op_config(const std::string &name, std::shared_ptr<qnn_instance> qnn_instance)
|
||||
: ggml_qnn_aggregate_op_config(name, qnn_instance) {}
|
||||
|
||||
bool initialize_op_nodes(QNNBackend device, Qnn_GraphHandle_t graph_handle) override;
|
||||
|
||||
private:
|
||||
qnn_tensor_ptr_t create_gather_nodes(QNNBackend device, Qnn_GraphHandle_t graph_handle, const int rank,
|
||||
qnn_tensor_ptr_t tensor_input, qnn_dimension_array_t output_dimensions);
|
||||
bool create_convert_nodes(QNNBackend device, Qnn_GraphHandle_t graph_handle, const int rank,
|
||||
qnn_tensor_array_t &tensor_inputs, qnn_tensor_array_t &tensor_outputs);
|
||||
bool create_mat_mul_nodes(QNNBackend device, Qnn_GraphHandle_t graph_handle, const int rank,
|
||||
qnn_tensor_array_t &tensor_inputs, qnn_tensor_array_t &tensor_outputs);
|
||||
|
||||
DISABLE_COPY(ggml_qnn_matmul_op_config);
|
||||
DISABLE_MOVE(ggml_qnn_matmul_op_config);
|
||||
};
|
||||
|
||||
} // namespace qnn
|
||||
|
|
|
|||
|
|
@ -294,9 +294,7 @@ private:
|
|||
new_tensor_type);
|
||||
}
|
||||
|
||||
bool should_use_mem_handle() const {
|
||||
return _device == QNN_BACKEND_NPU && QNN_TENSOR_GET_TYPE(_qnn_tensor) != QNN_TENSOR_TYPE_STATIC;
|
||||
}
|
||||
bool should_use_mem_handle() const { return false; }
|
||||
|
||||
std::string _tensor_name;
|
||||
qnn_buffer_ptr _buffer;
|
||||
|
|
|
|||
Loading…
Reference in New Issue