[feat]add more op support (#18)

* disable rpc buffer for npu

* append input/output tensor size into unsupported op log

* log dimensions for unsupported tensor

* wip

* split op config classes into separated file

* fix reshape

* wip

* add op_constructor_with_type_param

* set parameter for op_constructor_with_type_param func
This commit is contained in:
nullname 2025-01-18 22:15:27 +08:00 committed by GitHub
parent 5f93376f67
commit 10bd671c08
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 384 additions and 178 deletions

View File

@ -25,7 +25,7 @@ bool qnn_is_op_valid(ggml_backend_qnn_device_context *ctx, const ggml_tensor *ds
return false;
}
const auto param_count = qnn::get_qnn_op_input_param_count(qnn::get_qnn_op_index(dst));
const auto param_count = qnn::get_qnn_op_input_param_count(dst);
switch (param_count) {
case 1:
return dst->src[0];
@ -91,9 +91,13 @@ void get_graph_key_from_op(const ggml_tensor *op, std::string &output) {
GGML_ASSERT(op->op != GGML_OP_NONE);
output += ggml_op_desc(op);
output += qnn::get_ggml_type_name(op->type);
const auto param_count = qnn::get_qnn_op_input_param_count(qnn::get_qnn_op_index(op));
const auto param_count = qnn::get_qnn_op_input_param_count(op);
for (size_t i = 0; i < param_count; ++i) {
auto *input = op->src[i];
if (!input) {
break;
}
output += '_';
append_tensor_dimensions(input, output);
}
@ -224,7 +228,7 @@ bool qnn_generic_op_impl(ggml_backend_qnn_device_context *ctx, ggml_tensor *dst)
#ifndef NDEBUG
if (!succeed) {
const auto param_count = qnn::get_qnn_op_input_param_count(qnn::get_qnn_op_index(dst));
const auto param_count = qnn::get_qnn_op_input_param_count(dst);
for (size_t i = 0; i < param_count; ++i) {
print_ggml_tensor(dst->src[i]);
}
@ -409,7 +413,7 @@ bool ggnl_qnn_supports_op_tensor(ggml_backend_qnn_device_context *ctx, const ggm
return false;
}
const auto param_count = qnn::get_qnn_op_input_param_count(qnn::get_qnn_op_index(op));
const auto param_count = qnn::get_qnn_op_input_param_count(op);
for (size_t i = 0; i < param_count; ++i) {
if (!ggml_qnn_supports_tensor(ctx, op->src[i])) {
return false;
@ -479,12 +483,20 @@ bool device_supports_op(ggml_backend_qnn_device_context *ctx, const ggml_tensor
}
if (!kQnnOpsTable[qnn::get_qnn_op_index(op)]) {
QNN_LOG_DEBUG("[%s]unsupported op", ggml_op_name(op->op));
#ifndef NDEBUG
std::string op_key;
get_graph_key_from_op(op, op_key);
QNN_LOG_DEBUG("[%s]unsupported op", op_key.c_str());
#endif
return false;
}
if (!ggnl_qnn_supports_op_tensor(ctx, op)) {
QNN_LOG_DEBUG("[%s]unsupported tensor", ggml_op_name(op->op));
#ifndef NDEBUG
std::string tensor_dims;
append_tensor_dimensions(op, tensor_dims);
QNN_LOG_DEBUG("[%s]unsupported tensor(%s)", ggml_op_name(op->op), tensor_dims.c_str());
#endif
return false;
}

View File

@ -15,7 +15,7 @@ using qnn_tensor_cache_t = std::unordered_map<ggml_tensor *, qnn::qnn_tensor_ptr
int get_op_max_rank(const ggml_tensor *op) {
int max_rank = ggml_n_dims(op);
const int count = (int)qnn::get_qnn_op_input_param_count(qnn::get_qnn_op_index(op));
const int count = (int)qnn::get_qnn_op_input_param_count(op);
for (int i = 0; i < count; ++i) {
max_rank = std::max(max_rank, ggml_n_dims(op->src[i]));
}
@ -56,14 +56,12 @@ qnn::qnn_op_config_ptr_t create_operation_from_op_tensor(ggml_tensor *dst, const
QNNBackend device, Qnn_GraphHandle_t graph_handle,
std::shared_ptr<qnn::qnn_instance> qnn_instance,
bool is_intermediate, qnn_tensor_cache_t &tensor_cache) {
const auto op_index = qnn::get_qnn_op_index(dst);
auto qnn_op = qnn::create_op_constructor(op_index);
auto operation = qnn_op(name, qnn_instance);
auto operation = qnn::create_op(dst, name, qnn_instance);
// input tensors
qnn::qnn_tensor_array_t input_qnn_tensors;
auto tensor_type = is_intermediate ? qnn::ggml_qnn_tensor::INTERMEDIATE : qnn::ggml_qnn_tensor::INPUT;
for (size_t i = 0; i < qnn::get_qnn_op_input_param_count(op_index); ++i) {
for (size_t i = 0; i < qnn::get_qnn_op_input_param_count(dst); ++i) {
auto input_qnn_tensor =
create_tensor_with_cache(dst->src[i], tensor_type, rank, device, graph_handle, qnn_instance, tensor_cache);
input_qnn_tensors.push_back(input_qnn_tensor);
@ -92,7 +90,7 @@ bool bind_src_tensors(ggml_tensor *op, qnn::qnn_tensor_array_t &tensor_wrappers,
return false;
}
const auto param_count = qnn::get_qnn_op_input_param_count(qnn::get_qnn_op_index(op));
const auto param_count = qnn::get_qnn_op_input_param_count(op);
GGML_ASSERT(tensor_wrappers.size() == param_count);
qnn_tensors.resize(param_count);
for (size_t i = 0; i < param_count; ++i) {
@ -268,7 +266,7 @@ bool qnn_graph::build_graph_from_ggml_graph(const ggml_cgraph *cgraph) {
continue;
}
QNN_LOG_DEBUG("[%s]create op: %s", get_backend_name(_device), get_qnn_op_name(dst->op));
QNN_LOG_DEBUG("[%s]create op: %s", get_backend_name(_device), get_qnn_op_name(dst));
auto operation = create_operation_from_op_tensor(dst, dst->name, rank, _device, _graph_handle,
_qnn_instance, true, tensor_cache); // TODO: fix op name
operations.push_back(operation);

View File

@ -1,8 +1,10 @@
#include "op-config.hpp"
#include "op-config-impl.hpp"
namespace {
using op_constructor_t = std::shared_ptr<qnn::ggml_qnn_op_config> (*)(const ggml_tensor *, const std::string &,
std::shared_ptr<qnn::qnn_instance>);
using op_dims_calc_func_t = void (*)(const std::vector<const qnn::ggml_dimension_array_t> &input_dims,
qnn::ggml_dimension_array_t &output_dims);
@ -24,6 +26,7 @@ struct qnn_op_caps_t {
const char *qnn_op_name = nullptr;
const size_t input_param_count = 0;
op_dims_calc_func_t calc_dims_func = nullptr;
const char *qnn_param_name = nullptr;
};
constexpr const qnn_op_caps_t kOpCaps[] = {
@ -80,7 +83,13 @@ constexpr const qnn_op_caps_t kOpCaps[] = {
{}, // GGML_OP_CONCAT
{}, // GGML_OP_SILU_BACK
{}, // GGML_OP_NORM
{}, // GGML_OP_RMS_NORM
{
// GGML_OP_RMS_NORM
QNN_OP_RMS_NORM, // qnn_op_name
1, // input_param_count
nullptr, // TODO: calc_dims_func
QNN_OP_RMS_NORM_PARAM_EPSILON, // qnn_param_name
},
{}, // GGML_OP_RMS_NORM_BACK
{}, // GGML_OP_GROUP_NORM
{
@ -187,9 +196,172 @@ static_assert(kOpCaps[GGML_OP_MUL_MAT].calc_dims_func == mat_mul_op_dims,
"GGML_OP_ADD does not have element_wise_op_dims function");
static_assert(kOpCaps[GGML_OP_LOG].calc_dims_func == element_wise_op_dims,
"GGML_OP_LOG does not have element_wise_op_dims function");
static_assert(kOpCaps[GGML_OP_COUNT + GGML_UNARY_OP_GELU].input_param_count == 1,
"GGML_UNARY_OP_GELU does not have 1 input parameter");
static_assert(std::size(kOpCaps) == (GGML_OP_COUNT + GGML_UNARY_OP_COUNT),
"GGML_OP_COUNT does not match the size of the kOpCaps table");
std::shared_ptr<qnn::ggml_qnn_op_config> mat_mul_op_constructor(const ggml_tensor *op, const std::string &instance_name,
std::shared_ptr<qnn::qnn_instance> qnn_instance) {
GGML_UNUSED(op);
QNN_LOG_DEBUG("create QNN_OP_MAT_MUL, name %s", instance_name.c_str());
return std::make_shared<qnn::ggml_qnn_matmul_op_config>(instance_name, qnn_instance);
}
template <size_t _op>
std::shared_ptr<qnn::ggml_qnn_op_config> generic_op_constructor(const ggml_tensor *op, const std::string &instance_name,
std::shared_ptr<qnn::qnn_instance> qnn_instance) {
GGML_UNUSED(op);
static_assert(_op < std::size(kOpCaps));
static_assert(kOpCaps[_op].qnn_op_name != nullptr);
return std::make_shared<qnn::ggml_qnn_single_op_config>(instance_name, QNN_OP_PACKAGE_NAME_QTI_AISW,
kOpCaps[_op].qnn_op_name, qnn_instance);
}
void add_type_parameters(std::shared_ptr<qnn::ggml_qnn_op_config_base> op, const char *name, float value) {
Qnn_Scalar_t scalar = QNN_SCALAR_INIT;
scalar.dataType = QNN_DATATYPE_FLOAT_32;
scalar.floatValue = value;
op->add_scalar_param(name, scalar);
}
template <size_t _op, typename _ggml_op_param_type, typename _qnn_op_type_name>
std::shared_ptr<qnn::ggml_qnn_op_config> op_constructor_with_type_param(
const ggml_tensor *op, const std::string &instance_name, std::shared_ptr<qnn::qnn_instance> qnn_instance) {
static_assert(std::is_base_of<qnn::ggml_qnn_op_config_base, _qnn_op_type_name>::value);
static_assert(_op < std::size(kOpCaps));
constexpr auto &op_caps = kOpCaps[_op];
static_assert(op_caps.qnn_op_name != nullptr);
_ggml_op_param_type op_param;
memcpy(&op_param, op->op_params, sizeof(op_param));
auto qnn_op = std::make_shared<_qnn_op_type_name>(instance_name, QNN_OP_PACKAGE_NAME_QTI_AISW, op_caps.qnn_op_name,
qnn_instance);
if (op_caps.qnn_param_name) {
add_type_parameters(qnn_op, op_caps.qnn_param_name, op_param);
}
return qnn_op;
}
constexpr const op_constructor_t kOpConstructors[] = {
nullptr, // GGML_OP_NONE
nullptr, // GGML_OP_DUP
generic_op_constructor<GGML_OP_ADD>, // GGML_OP_ADD
nullptr, // GGML_OP_ADD1
nullptr, // GGML_OP_ACC
generic_op_constructor<GGML_OP_SUB>, // GGML_OP_SUB
generic_op_constructor<GGML_OP_MUL>, // GGML_OP_MUL
generic_op_constructor<GGML_OP_DIV>, // GGML_OP_DIV
nullptr, // GGML_OP_SQR
generic_op_constructor<GGML_OP_SQRT>, // GGML_OP_SQRT
generic_op_constructor<GGML_OP_LOG>, // GGML_OP_LOG
nullptr, // GGML_OP_SIN
nullptr, // GGML_OP_COS
nullptr, // GGML_OP_SUM
nullptr, // GGML_OP_SUM_ROWS
nullptr, // GGML_OP_MEAN
nullptr, // GGML_OP_ARGMAX
nullptr, // GGML_OP_COUNT_EQUAL
nullptr, // GGML_OP_REPEAT
nullptr, // GGML_OP_REPEAT_BACK
nullptr, // GGML_OP_CONCAT
nullptr, // GGML_OP_SILU_BACK
nullptr, // GGML_OP_NORM
op_constructor_with_type_param<GGML_OP_RMS_NORM, float, qnn::ggml_qnn_rmsnorm_op_config>, // GGML_OP_RMS_NORM
nullptr, // GGML_OP_RMS_NORM_BACK
nullptr, // GGML_OP_GROUP_NORM
mat_mul_op_constructor, // GGML_OP_MUL_MAT
nullptr, // GGML_OP_MUL_MAT_ID
nullptr, // GGML_OP_OUT_PROD
nullptr, // GGML_OP_SCALE
nullptr, // GGML_OP_SET
nullptr, // GGML_OP_CPY
nullptr, // GGML_OP_CONT
generic_op_constructor<GGML_OP_RESHAPE>, // GGML_OP_RESHAPE
nullptr, // GGML_OP_VIEW
nullptr, // GGML_OP_PERMUTE
nullptr, // GGML_OP_TRANSPOSE
nullptr, // GGML_OP_GET_ROWS
nullptr, // GGML_OP_GET_ROWS_BACK
nullptr, // GGML_OP_DIAG
nullptr, // GGML_OP_DIAG_MASK_INF
nullptr, // GGML_OP_DIAG_MASK_ZERO
nullptr, // GGML_OP_SOFT_MAX
nullptr, // GGML_OP_SOFT_MAX_BACK
nullptr, // GGML_OP_ROPE
nullptr, // GGML_OP_ROPE_BACK
nullptr, // GGML_OP_CLAMP
nullptr, // GGML_OP_CONV_TRANSPOSE_1D
nullptr, // GGML_OP_IM2COL
nullptr, // GGML_OP_IM2COL_BACK
nullptr, // GGML_OP_CONV_TRANSPOSE_2D
nullptr, // GGML_OP_POOL_1D
nullptr, // GGML_OP_POOL_2D
nullptr, // GGML_OP_POOL_2D_BACK
nullptr, // GGML_OP_UPSCALE
nullptr, // GGML_OP_PAD
nullptr, // GGML_OP_PAD_REFLECT_1D
nullptr, // GGML_OP_ARANGE
nullptr, // GGML_OP_TIMESTEP_EMBEDDING
nullptr, // GGML_OP_ARGSORT
nullptr, // GGML_OP_LEAKY_RELU
nullptr, // GGML_OP_FLASH_ATTN_EXT
nullptr, // GGML_OP_FLASH_ATTN_BACK
nullptr, // GGML_OP_SSM_CONV
nullptr, // GGML_OP_SSM_SCAN
nullptr, // GGML_OP_WIN_PART
nullptr, // GGML_OP_WIN_UNPART
nullptr, // GGML_OP_GET_REL_POS
nullptr, // GGML_OP_ADD_REL_POS
nullptr, // GGML_OP_RWKV_WKV6
nullptr, // GGML_OP_GATED_LINEAR_ATTN
nullptr, // GGML_OP_UNARY
nullptr, // GGML_OP_MAP_UNARY
nullptr, // GGML_OP_MAP_BINARY
nullptr, // GGML_OP_MAP_CUSTOM1_F32
nullptr, // GGML_OP_MAP_CUSTOM2_F32
nullptr, // GGML_OP_MAP_CUSTOM3_F32
nullptr, // GGML_OP_MAP_CUSTOM1
nullptr, // GGML_OP_MAP_CUSTOM2
nullptr, // GGML_OP_MAP_CUSTOM3
nullptr, // GGML_OP_CROSS_ENTROPY_LOSS
nullptr, // GGML_OP_CROSS_ENTROPY_LOSS_BACK
nullptr, // GGML_OP_OPT_STEP_ADAMW
// ggml_unary_op
nullptr, // GGML_UNARY_OP_ABS
nullptr, // GGML_UNARY_OP_SGN
nullptr, // GGML_UNARY_OP_NEG
nullptr, // GGML_UNARY_OP_STEP
nullptr, // GGML_UNARY_OP_TANH
nullptr, // GGML_UNARY_OP_ELU
nullptr, // GGML_UNARY_OP_RELU
nullptr, // GGML_UNARY_OP_SIGMOID
nullptr, // GGML_UNARY_OP_GELU
nullptr, // GGML_UNARY_OP_GELU_QUICK
nullptr, // GGML_UNARY_OP_SILU
nullptr, // GGML_UNARY_OP_HARDSWISH
nullptr, // GGML_UNARY_OP_HARDSIGMOID
nullptr, // GGML_UNARY_OP_EXP
};
static_assert(kOpConstructors[GGML_OP_NONE] == nullptr, "GGML_OP_NONE does not match the nullptr function");
static_assert(kOpConstructors[GGML_OP_ADD] == generic_op_constructor<GGML_OP_ADD>,
"GGML_OP_ADD does not match the generic_op_constructor<GGML_OP_ADD> function");
static_assert(kOpConstructors[GGML_OP_MUL_MAT] == mat_mul_op_constructor,
"GGML_OP_MUL_MAT does not match the mat_mul_op_constructor function");
static_assert(std::size(kOpConstructors) == (GGML_OP_COUNT + GGML_UNARY_OP_COUNT),
"GGML_OP_COUNT does not match the size of the kOpConstructors table");
} // namespace
namespace qnn {
@ -202,23 +374,35 @@ size_t get_qnn_op_index(const ggml_tensor *tensor) {
return tensor->op;
}
void get_ggml_op_output_dimensions(const std::vector<const ggml_dimension_array_t> &input_dims, size_t op,
void get_ggml_op_output_dimensions(const std::vector<const ggml_dimension_array_t> &input_dims, const ggml_tensor *op,
ggml_dimension_array_t &output_dims) {
GGML_ASSERT(op < std::size(kOpCaps));
auto get_dims = kOpCaps[op].calc_dims_func;
auto op_index = get_qnn_op_index(op);
GGML_ASSERT(op_index < std::size(kOpCaps));
auto get_dims = kOpCaps[op_index].calc_dims_func;
GGML_ASSERT(get_dims);
get_dims(input_dims, output_dims);
}
const char *get_qnn_op_name(size_t op) {
GGML_ASSERT(op < std::size(kOpCaps));
GGML_ASSERT(kOpCaps[op].qnn_op_name);
return kOpCaps[op].qnn_op_name;
const char *get_qnn_op_name(const ggml_tensor *op) {
auto op_index = get_qnn_op_index(op);
GGML_ASSERT(op_index < std::size(kOpCaps));
GGML_ASSERT(kOpCaps[op_index].qnn_op_name);
return kOpCaps[op_index].qnn_op_name;
}
size_t get_qnn_op_input_param_count(size_t op) {
GGML_ASSERT(op < std::size(kOpCaps));
return kOpCaps[op].input_param_count;
size_t get_qnn_op_input_param_count(const ggml_tensor *op) {
auto op_index = get_qnn_op_index(op);
GGML_ASSERT(op_index < std::size(kOpCaps));
return kOpCaps[op_index].input_param_count;
}
std::shared_ptr<ggml_qnn_op_config> create_op(const ggml_tensor *op, const std::string &name,
std::shared_ptr<qnn_instance> qnn_instance) {
auto op_index = get_qnn_op_index(op);
GGML_ASSERT(op_index < std::size(kOpCaps));
auto op_constructor = kOpConstructors[op_index];
GGML_ASSERT(op_constructor);
return op_constructor(op, name, qnn_instance);
}
} // namespace qnn

View File

@ -1,4 +1,4 @@
#include "op-config.hpp"
#include "op-config-impl.hpp"
#include <cstdint>
@ -187,6 +187,13 @@ bool ggml_qnn_single_op_config::initialize_op_nodes(QNNBackend device, Qnn_Graph
return true;
}
bool ggml_qnn_rmsnorm_op_config::initialize_op_nodes(QNNBackend device, Qnn_GraphHandle_t graph_handle) {
constexpr const uint32_t kAxes[] = {0};
add_tensor_param(QNN_OP_RMS_NORM_PARAM_AXES, {1}, 1, reinterpret_cast<const uint8_t *>(kAxes), QNN_DATATYPE_UINT_32,
device, graph_handle);
return true;
}
void ggml_qnn_aggregate_op_config::set_input_tensors(qnn::qnn_tensor_array_t &tensor_inputs) {
_tensor_inputs = tensor_inputs;
}
@ -439,22 +446,4 @@ bool ggml_qnn_matmul_op_config::create_mat_mul_nodes(QNNBackend device, Qnn_Grap
return true;
}
ggml_op_constructor_t create_op_constructor(size_t op) {
std::string op_name = get_qnn_op_name(op);
if (op_name == QNN_OP_MAT_MUL) {
// For QNN_OP_MAT_MUL, we need to transpose the input tensor
return [](const std::string &instance_name,
std::shared_ptr<qnn::qnn_instance> qnn_instance) -> std::shared_ptr<qnn::ggml_qnn_op_config> {
QNN_LOG_DEBUG("create QNN_OP_MAT_MUL, name %s", instance_name.c_str());
return std::make_shared<qnn::ggml_qnn_matmul_op_config>(instance_name, qnn_instance);
};
}
return [op_name](const std::string &instance_name,
std::shared_ptr<qnn::qnn_instance> qnn_instance) -> std::shared_ptr<qnn::ggml_qnn_op_config> {
return std::make_shared<qnn::ggml_qnn_single_op_config>(instance_name, QNN_OP_PACKAGE_NAME_QTI_AISW, op_name,
qnn_instance);
};
}
} // namespace qnn

View File

@ -0,0 +1,151 @@
#pragma once
#include <functional>
#include <memory>
#include <string>
#include <vector>
#include "op-config.hpp"
#include "qnn-lib.hpp"
#include "qnn-types.hpp"
#include "tensor.hpp"
namespace qnn {
class ggml_qnn_op_config_base : public ggml_qnn_op_config {
public:
explicit ggml_qnn_op_config_base(const std::string &name, const std::string &package_name,
const std::string &op_type, std::shared_ptr<qnn_instance> qnn_instance)
: _name(name), _package_name(package_name), _op_type(op_type), _qnn_instance(qnn_instance) {}
void add_scalar_param(const std::string &name, const Qnn_Scalar_t scalar);
bool add_tensor_param(const std::string &name, const qnn_dimension_array_t &dimensions, int rank,
const uint8_t *data, const Qnn_DataType_t data_type, QNNBackend device,
Qnn_GraphHandle_t graph_handle);
void set_input_tensors(qnn::qnn_tensor_array_t &tensor_inputs) override;
void set_input_tensors(qnn::qnn_tensor_array_t &&tensor_inputs) override;
void set_output_tensors(qnn::qnn_tensor_array_t &tensor_inputs) override;
void set_output_tensors(qnn::qnn_tensor_array_t &&tensor_inputs) override;
bool add_op_to_graph(Qnn_GraphHandle_t graph_handle) override;
bool bind_input_tensors(const ggml_tensor_array_t &tensor_inputs) override;
bool bind_output_tensors(const ggml_tensor_array_t &tensor_outputs) override;
void unbind_input_tensors() override;
void unbind_output_tensors() override;
const qnn_tensor_array_t &get_input_tensors() override { return _tensor_inputs; }
const qnn_tensor_array_t &get_output_tensors() override { return _tensor_outputs; }
protected:
Qnn_OpConfig_t get_op_config();
std::string _name;
std::string _package_name;
std::string _op_type;
std::shared_ptr<qnn_instance> _qnn_instance;
qnn_tensor_array_t _tensor_inputs;
qnn_tensor_array_t _tensor_outputs;
qnn_tensor_array_t _tensor_parameters;
std::vector<Qnn_Tensor_t> _qnn_tensor_inputs;
std::vector<Qnn_Tensor_t> _qnn_tensor_outputs;
std::vector<Qnn_Param_t> _qnn_parameters;
std::vector<std::string> _param_names;
DISABLE_COPY(ggml_qnn_op_config_base);
DISABLE_MOVE(ggml_qnn_op_config_base);
};
class ggml_qnn_single_op_config : public ggml_qnn_op_config_base {
public:
explicit ggml_qnn_single_op_config(const std::string &name, const std::string &package_name,
const std::string &op_type, std::shared_ptr<qnn_instance> qnn_instance)
: ggml_qnn_op_config_base(name, package_name, op_type, qnn_instance) {}
bool initialize_op_nodes(QNNBackend device, Qnn_GraphHandle_t graph_handle) override;
private:
DISABLE_COPY(ggml_qnn_single_op_config);
DISABLE_MOVE(ggml_qnn_single_op_config);
};
class ggml_qnn_rmsnorm_op_config : public ggml_qnn_op_config_base {
public:
explicit ggml_qnn_rmsnorm_op_config(const std::string &name, const std::string &package_name,
const std::string &op_type, std::shared_ptr<qnn_instance> qnn_instance)
: ggml_qnn_op_config_base(name, package_name, op_type, qnn_instance) {}
bool initialize_op_nodes(QNNBackend device, Qnn_GraphHandle_t graph_handle) override;
private:
DISABLE_COPY(ggml_qnn_rmsnorm_op_config);
DISABLE_MOVE(ggml_qnn_rmsnorm_op_config);
};
class ggml_qnn_aggregate_op_config : public ggml_qnn_op_config {
public:
explicit ggml_qnn_aggregate_op_config(const std::string &name, std::shared_ptr<qnn_instance> qnn_instance)
: _name(name), _qnn_instance(qnn_instance) {}
~ggml_qnn_aggregate_op_config() {
_tensor_inputs.clear();
_tensor_outputs.clear();
_operations.clear();
}
void set_input_tensors(qnn::qnn_tensor_array_t &tensor_inputs) override;
void set_input_tensors(qnn::qnn_tensor_array_t &&tensor_inputs) override;
void set_output_tensors(qnn::qnn_tensor_array_t &tensor_inputs) override;
void set_output_tensors(qnn::qnn_tensor_array_t &&tensor_inputs) override;
bool add_op_to_graph(Qnn_GraphHandle_t graph_handle) override {
return qnn::add_op_to_graph(graph_handle, _operations);
}
bool bind_input_tensors(const ggml_tensor_array_t &tensor_inputs) override;
bool bind_output_tensors(const ggml_tensor_array_t &tensor_outputs) override;
void unbind_input_tensors() override {
for (auto &tensor : _tensor_inputs) {
tensor->unbind();
}
}
void unbind_output_tensors() override {
for (auto &tensor : _tensor_outputs) {
tensor->unbind();
}
}
const qnn_tensor_array_t &get_input_tensors() override { return _tensor_inputs; }
const qnn_tensor_array_t &get_output_tensors() override { return _tensor_outputs; }
protected:
std::string _name;
std::shared_ptr<qnn_instance> _qnn_instance;
std::vector<qnn_op_config_ptr_t> _operations;
qnn_tensor_array_t _tensor_inputs;
qnn_tensor_array_t _tensor_outputs;
private:
DISABLE_COPY(ggml_qnn_aggregate_op_config);
DISABLE_MOVE(ggml_qnn_aggregate_op_config);
};
class ggml_qnn_matmul_op_config : public ggml_qnn_aggregate_op_config {
public:
ggml_qnn_matmul_op_config(const std::string &name, std::shared_ptr<qnn_instance> qnn_instance)
: ggml_qnn_aggregate_op_config(name, qnn_instance) {}
bool initialize_op_nodes(QNNBackend device, Qnn_GraphHandle_t graph_handle) override;
private:
qnn_tensor_ptr_t create_gather_nodes(QNNBackend device, Qnn_GraphHandle_t graph_handle, const int rank,
qnn_tensor_ptr_t tensor_input, qnn_dimension_array_t output_dimensions);
bool create_convert_nodes(QNNBackend device, Qnn_GraphHandle_t graph_handle, const int rank,
qnn_tensor_array_t &tensor_inputs, qnn_tensor_array_t &tensor_outputs);
bool create_mat_mul_nodes(QNNBackend device, Qnn_GraphHandle_t graph_handle, const int rank,
qnn_tensor_array_t &tensor_inputs, qnn_tensor_array_t &tensor_outputs);
DISABLE_COPY(ggml_qnn_matmul_op_config);
DISABLE_MOVE(ggml_qnn_matmul_op_config);
};
} // namespace qnn

View File

@ -12,19 +12,16 @@
namespace qnn {
using ggml_op_constructor_t =
std::function<std::shared_ptr<ggml_qnn_op_config>(const std::string &, std::shared_ptr<qnn_instance>)>;
constexpr const size_t kGgmlUnaryOpStart = GGML_OP_COUNT;
size_t get_qnn_op_index(const ggml_tensor *tensor);
void get_ggml_op_output_dimensions(const std::vector<const ggml_dimension_array_t> &input_dims, size_t op,
void get_ggml_op_output_dimensions(const std::vector<const ggml_dimension_array_t> &input_dims, const ggml_tensor *op,
ggml_dimension_array_t &output_dims);
const char *get_qnn_op_name(size_t op);
size_t get_qnn_op_input_param_count(size_t op);
ggml_op_constructor_t create_op_constructor(size_t op);
const char *get_qnn_op_name(const ggml_tensor *op);
size_t get_qnn_op_input_param_count(const ggml_tensor *op);
std::shared_ptr<ggml_qnn_op_config> create_op(const ggml_tensor *op, const std::string &name,
std::shared_ptr<qnn_instance> qnn_instance);
inline bool add_op_to_graph(Qnn_GraphHandle_t graph_handle, std::vector<qnn_op_config_ptr_t> &operations) {
for (auto &op : operations) {
@ -36,127 +33,4 @@ inline bool add_op_to_graph(Qnn_GraphHandle_t graph_handle, std::vector<qnn_op_c
return true;
}
class ggml_qnn_op_config_base : public ggml_qnn_op_config {
public:
explicit ggml_qnn_op_config_base(const std::string &name, const std::string &package_name,
const std::string &op_type, std::shared_ptr<qnn_instance> qnn_instance)
: _name(name), _package_name(package_name), _op_type(op_type), _qnn_instance(qnn_instance) {}
void add_scalar_param(const std::string &name, const Qnn_Scalar_t scalar);
bool add_tensor_param(const std::string &name, const qnn_dimension_array_t &dimensions, int rank,
const uint8_t *data, const Qnn_DataType_t data_type, QNNBackend device,
Qnn_GraphHandle_t graph_handle);
void set_input_tensors(qnn::qnn_tensor_array_t &tensor_inputs) override;
void set_input_tensors(qnn::qnn_tensor_array_t &&tensor_inputs) override;
void set_output_tensors(qnn::qnn_tensor_array_t &tensor_inputs) override;
void set_output_tensors(qnn::qnn_tensor_array_t &&tensor_inputs) override;
bool add_op_to_graph(Qnn_GraphHandle_t graph_handle) override;
bool bind_input_tensors(const ggml_tensor_array_t &tensor_inputs) override;
bool bind_output_tensors(const ggml_tensor_array_t &tensor_outputs) override;
void unbind_input_tensors() override;
void unbind_output_tensors() override;
const qnn_tensor_array_t &get_input_tensors() override { return _tensor_inputs; }
const qnn_tensor_array_t &get_output_tensors() override { return _tensor_outputs; }
protected:
Qnn_OpConfig_t get_op_config();
std::string _name;
std::string _package_name;
std::string _op_type;
std::shared_ptr<qnn_instance> _qnn_instance;
qnn_tensor_array_t _tensor_inputs;
qnn_tensor_array_t _tensor_outputs;
qnn_tensor_array_t _tensor_parameters;
std::vector<Qnn_Tensor_t> _qnn_tensor_inputs;
std::vector<Qnn_Tensor_t> _qnn_tensor_outputs;
std::vector<Qnn_Param_t> _qnn_parameters;
std::vector<std::string> _param_names;
DISABLE_COPY(ggml_qnn_op_config_base);
DISABLE_MOVE(ggml_qnn_op_config_base);
};
class ggml_qnn_single_op_config : public ggml_qnn_op_config_base {
public:
explicit ggml_qnn_single_op_config(const std::string &name, const std::string &package_name,
const std::string &op_type, std::shared_ptr<qnn_instance> qnn_instance)
: ggml_qnn_op_config_base(name, package_name, op_type, qnn_instance) {}
bool initialize_op_nodes(QNNBackend device, Qnn_GraphHandle_t graph_handle) override;
private:
DISABLE_COPY(ggml_qnn_single_op_config);
DISABLE_MOVE(ggml_qnn_single_op_config);
};
class ggml_qnn_aggregate_op_config : public ggml_qnn_op_config {
public:
explicit ggml_qnn_aggregate_op_config(const std::string &name, std::shared_ptr<qnn_instance> qnn_instance)
: _name(name), _qnn_instance(qnn_instance) {}
~ggml_qnn_aggregate_op_config() {
_tensor_inputs.clear();
_tensor_outputs.clear();
_operations.clear();
}
void set_input_tensors(qnn::qnn_tensor_array_t &tensor_inputs) override;
void set_input_tensors(qnn::qnn_tensor_array_t &&tensor_inputs) override;
void set_output_tensors(qnn::qnn_tensor_array_t &tensor_inputs) override;
void set_output_tensors(qnn::qnn_tensor_array_t &&tensor_inputs) override;
bool add_op_to_graph(Qnn_GraphHandle_t graph_handle) override {
return qnn::add_op_to_graph(graph_handle, _operations);
}
bool bind_input_tensors(const ggml_tensor_array_t &tensor_inputs) override;
bool bind_output_tensors(const ggml_tensor_array_t &tensor_outputs) override;
void unbind_input_tensors() override {
for (auto &tensor : _tensor_inputs) {
tensor->unbind();
}
}
void unbind_output_tensors() override {
for (auto &tensor : _tensor_outputs) {
tensor->unbind();
}
}
const qnn_tensor_array_t &get_input_tensors() override { return _tensor_inputs; }
const qnn_tensor_array_t &get_output_tensors() override { return _tensor_outputs; }
protected:
std::string _name;
std::shared_ptr<qnn_instance> _qnn_instance;
std::vector<qnn_op_config_ptr_t> _operations;
qnn_tensor_array_t _tensor_inputs;
qnn_tensor_array_t _tensor_outputs;
private:
DISABLE_COPY(ggml_qnn_aggregate_op_config);
DISABLE_MOVE(ggml_qnn_aggregate_op_config);
};
class ggml_qnn_matmul_op_config : public ggml_qnn_aggregate_op_config {
public:
ggml_qnn_matmul_op_config(const std::string &name, std::shared_ptr<qnn_instance> qnn_instance)
: ggml_qnn_aggregate_op_config(name, qnn_instance) {}
bool initialize_op_nodes(QNNBackend device, Qnn_GraphHandle_t graph_handle) override;
private:
qnn_tensor_ptr_t create_gather_nodes(QNNBackend device, Qnn_GraphHandle_t graph_handle, const int rank,
qnn_tensor_ptr_t tensor_input, qnn_dimension_array_t output_dimensions);
bool create_convert_nodes(QNNBackend device, Qnn_GraphHandle_t graph_handle, const int rank,
qnn_tensor_array_t &tensor_inputs, qnn_tensor_array_t &tensor_outputs);
bool create_mat_mul_nodes(QNNBackend device, Qnn_GraphHandle_t graph_handle, const int rank,
qnn_tensor_array_t &tensor_inputs, qnn_tensor_array_t &tensor_outputs);
DISABLE_COPY(ggml_qnn_matmul_op_config);
DISABLE_MOVE(ggml_qnn_matmul_op_config);
};
} // namespace qnn

View File

@ -294,9 +294,7 @@ private:
new_tensor_type);
}
bool should_use_mem_handle() const {
return _device == QNN_BACKEND_NPU && QNN_TENSOR_GET_TYPE(_qnn_tensor) != QNN_TENSOR_TYPE_STATIC;
}
bool should_use_mem_handle() const { return false; }
std::string _tensor_name;
qnn_buffer_ptr _buffer;