436 lines
19 KiB
C++
436 lines
19 KiB
C++
|
|
#include "op-config-impl.hpp"
|
|
|
|
namespace {
|
|
|
|
using op_constructor_t = std::shared_ptr<qnn::ggml_qnn_op_config> (*)(const ggml_tensor *, const std::string &,
|
|
std::shared_ptr<qnn::qnn_instance>);
|
|
|
|
using op_description_generator_t = void (*)(const ggml_tensor * op, bool append_dimensions,
|
|
ggml_type override_data_type, std::string & output);
|
|
|
|
void append_tensor_shape_and_type_impl(const ggml_tensor * tensor, ggml_type override_data_type, std::string & output) {
|
|
char buffer[256] = {};
|
|
const auto * type_name = qnn::get_ggml_type_name(std::min(tensor->type, override_data_type));
|
|
int len = 0;
|
|
switch (ggml_n_dims(tensor)) {
|
|
case 1:
|
|
len = snprintf(buffer, sizeof(buffer), "%ld%s", (long) tensor->ne[0], type_name);
|
|
break;
|
|
case 2:
|
|
len = snprintf(buffer, sizeof(buffer), "%ldx%ld%s", (long) tensor->ne[0], (long) tensor->ne[1], type_name);
|
|
break;
|
|
case 3:
|
|
len = snprintf(buffer, sizeof(buffer), "%ldx%ldx%ld%s", (long) tensor->ne[0], (long) tensor->ne[1],
|
|
(long) tensor->ne[2], type_name);
|
|
break;
|
|
case 4:
|
|
default:
|
|
len = snprintf(buffer, sizeof(buffer), "%ldx%ldx%ldx%ld%s", (long) tensor->ne[0], (long) tensor->ne[1],
|
|
(long) tensor->ne[2], (long) tensor->ne[3], type_name);
|
|
break;
|
|
}
|
|
GGML_ASSERT(len > 0 && len < (int) sizeof(buffer));
|
|
output.append(buffer, len);
|
|
}
|
|
|
|
void get_graph_key_from_op(const ggml_tensor * op, ggml_type override_data_type, std::string & output) {
|
|
output += ggml_op_desc(op);
|
|
output += qnn::get_ggml_type_name(op->type);
|
|
for (size_t i = 0; i < GGML_MAX_SRC && op->src[i]; ++i) {
|
|
auto * src = op->src[i];
|
|
if (!src) {
|
|
break;
|
|
}
|
|
|
|
output += '_';
|
|
append_tensor_shape_and_type_impl(src, override_data_type, output);
|
|
}
|
|
}
|
|
|
|
void get_op_key_with_src_op_desc(const ggml_tensor * op, std::string & output) {
|
|
output += ggml_op_desc(op);
|
|
output += '(';
|
|
if (op->src[0]) {
|
|
output += ggml_op_desc(op->src[0]);
|
|
}
|
|
for (size_t i = 1; i < GGML_MAX_SRC && op->src[i]; ++i) {
|
|
output += ',';
|
|
output += ggml_op_desc(op->src[i]);
|
|
}
|
|
output += ')';
|
|
}
|
|
|
|
void generic_get_op_desc(const ggml_tensor * op, bool append_dimensions, ggml_type override_data_type,
|
|
std::string & output) {
|
|
if (append_dimensions) {
|
|
get_graph_key_from_op(op, override_data_type, output);
|
|
} else {
|
|
get_op_key_with_src_op_desc(op, output);
|
|
}
|
|
}
|
|
|
|
struct qnn_op_caps_t {
|
|
const char * qnn_op_name = nullptr;
|
|
op_description_generator_t get_desc = nullptr;
|
|
const char * qnn_param_name = nullptr;
|
|
};
|
|
|
|
constexpr const qnn_op_caps_t kOpCaps[] = {
|
|
{}, // GGML_OP_NONE
|
|
{}, // GGML_OP_DUP
|
|
{
|
|
// GGML_OP_ADD
|
|
QNN_OP_ELEMENT_WISE_ADD, // qnn_op_name
|
|
},
|
|
{}, // GGML_OP_ADD1
|
|
{}, // GGML_OP_ACC
|
|
{
|
|
// GGML_OP_SUB
|
|
QNN_OP_ELEMENT_WISE_SUBTRACT, // qnn_op_name
|
|
},
|
|
{
|
|
// GGML_OP_MUL
|
|
QNN_OP_ELEMENT_WISE_MULTIPLY, // qnn_op_name
|
|
},
|
|
{
|
|
// GGML_OP_DIV
|
|
QNN_OP_ELEMENT_WISE_DIVIDE, // qnn_op_name
|
|
},
|
|
{}, // GGML_OP_SQR
|
|
{
|
|
// GGML_OP_SQRT
|
|
QNN_OP_ELEMENT_WISE_SQUARE_ROOT, // qnn_op_name
|
|
},
|
|
{
|
|
// GGML_OP_LOG
|
|
QNN_OP_ELEMENT_WISE_LOG, // qnn_op_name
|
|
},
|
|
{}, // GGML_OP_SIN
|
|
{}, // GGML_OP_COS
|
|
{}, // GGML_OP_SUM
|
|
{}, // GGML_OP_SUM_ROWS
|
|
{}, // GGML_OP_MEAN
|
|
{}, // GGML_OP_ARGMAX
|
|
{}, // GGML_OP_COUNT_EQUAL
|
|
{}, // GGML_OP_REPEAT
|
|
{}, // GGML_OP_REPEAT_BACK
|
|
{}, // GGML_OP_CONCAT
|
|
{}, // GGML_OP_SILU_BACK
|
|
{}, // GGML_OP_NORM
|
|
{
|
|
// GGML_OP_RMS_NORM
|
|
QNN_OP_RMS_NORM, // qnn_op_name
|
|
generic_get_op_desc, // get_desc
|
|
QNN_OP_RMS_NORM_PARAM_EPSILON, // qnn_param_name
|
|
},
|
|
{}, // GGML_OP_RMS_NORM_BACK
|
|
{}, // GGML_OP_GROUP_NORM
|
|
{
|
|
// GGML_OP_MUL_MAT
|
|
QNN_OP_MAT_MUL, // qnn_op_name
|
|
},
|
|
{}, // GGML_OP_MUL_MAT_ID
|
|
{}, // GGML_OP_OUT_PROD
|
|
{}, // GGML_OP_SCALE
|
|
{}, // GGML_OP_SET
|
|
{}, // GGML_OP_CPY
|
|
{}, // GGML_OP_CONT
|
|
{
|
|
// GGML_OP_RESHAPE
|
|
QNN_OP_RESHAPE, // qnn_op_name
|
|
},
|
|
{}, // GGML_OP_VIEW
|
|
{}, // GGML_OP_PERMUTE
|
|
{}, // GGML_OP_TRANSPOSE
|
|
{}, // GGML_OP_GET_ROWS
|
|
{}, // GGML_OP_GET_ROWS_BACK
|
|
{}, // GGML_OP_DIAG
|
|
{}, // GGML_OP_DIAG_MASK_INF
|
|
{}, // GGML_OP_DIAG_MASK_ZERO
|
|
{}, // GGML_OP_SOFT_MAX
|
|
{}, // GGML_OP_SOFT_MAX_BACK
|
|
{}, // GGML_OP_ROPE
|
|
{}, // GGML_OP_ROPE_BACK
|
|
{}, // GGML_OP_CLAMP
|
|
{}, // GGML_OP_CONV_TRANSPOSE_1D
|
|
{}, // GGML_OP_IM2COL
|
|
{}, // GGML_OP_IM2COL_BACK
|
|
{}, // GGML_OP_CONV_TRANSPOSE_2D
|
|
{}, // GGML_OP_POOL_1D
|
|
{}, // GGML_OP_POOL_2D
|
|
{}, // GGML_OP_POOL_2D_BACK
|
|
{}, // GGML_OP_UPSCALE
|
|
{}, // GGML_OP_PAD
|
|
{}, // GGML_OP_PAD_REFLECT_1D
|
|
{}, // GGML_OP_ARANGE
|
|
|
|
{}, // GGML_OP_TIMESTEP_EMBEDDING
|
|
{}, // GGML_OP_ARGSORT
|
|
{}, // GGML_OP_LEAKY_RELU
|
|
|
|
{}, // GGML_OP_FLASH_ATTN_EXT
|
|
{}, // GGML_OP_FLASH_ATTN_BACK
|
|
{}, // GGML_OP_SSM_CONV
|
|
{}, // GGML_OP_SSM_SCAN
|
|
{}, // GGML_OP_WIN_PART
|
|
{}, // GGML_OP_WIN_UNPART
|
|
{}, // GGML_OP_GET_REL_POS
|
|
{}, // GGML_OP_ADD_REL_POS
|
|
{}, // GGML_OP_RWKV_WKV6
|
|
{}, // GGML_OP_GATED_LINEAR_ATTN
|
|
|
|
{}, // GGML_OP_UNARY
|
|
|
|
{}, // GGML_OP_MAP_UNARY
|
|
{}, // GGML_OP_MAP_BINARY
|
|
|
|
{}, // GGML_OP_MAP_CUSTOM1_F32
|
|
{}, // GGML_OP_MAP_CUSTOM2_F32
|
|
{}, // GGML_OP_MAP_CUSTOM3_F32
|
|
|
|
{}, // GGML_OP_MAP_CUSTOM1
|
|
{}, // GGML_OP_MAP_CUSTOM2
|
|
{}, // GGML_OP_MAP_CUSTOM3
|
|
|
|
{}, // GGML_OP_CROSS_ENTROPY_LOSS
|
|
{}, // GGML_OP_CROSS_ENTROPY_LOSS_BACK
|
|
{}, // GGML_OP_OPT_STEP_ADAMW
|
|
|
|
// ggml_unary_op
|
|
{}, // GGML_UNARY_OP_ABS
|
|
{}, // GGML_UNARY_OP_SGN
|
|
{}, // GGML_UNARY_OP_NEG
|
|
{}, // GGML_UNARY_OP_STEP
|
|
{}, // GGML_UNARY_OP_TANH
|
|
{}, // GGML_UNARY_OP_ELU
|
|
{}, // GGML_UNARY_OP_RELU
|
|
{}, // GGML_UNARY_OP_SIGMOID
|
|
{
|
|
// GGML_UNARY_OP_GELU
|
|
QNN_OP_GELU, // qnn_op_name
|
|
},
|
|
{}, // GGML_UNARY_OP_GELU_QUICK
|
|
{}, // GGML_UNARY_OP_SILU
|
|
{}, // GGML_UNARY_OP_HARDSWISH
|
|
{}, // GGML_UNARY_OP_HARDSIGMOID
|
|
{}, // GGML_UNARY_OP_EXP
|
|
};
|
|
|
|
static_assert(kOpCaps[GGML_OP_NONE].get_desc == nullptr, "GGML_OP_NONE should not have get_desc function");
|
|
static_assert(kOpCaps[GGML_OP_ADD].qnn_op_name, "GGML_OP_ADD does not have qnn_op_name in the kOpCaps table");
|
|
static_assert(kOpCaps[GGML_OP_MUL_MAT].qnn_op_name, "GGML_OP_MUL_MAT does not have qnn_op_name in the kOpCaps table");
|
|
static_assert(kOpCaps[GGML_OP_MUL].qnn_op_name, "GGML_OP_MUL does not have qnn_op_name in the kOpCaps table");
|
|
static_assert(kOpCaps[GGML_OP_LOG].qnn_op_name, "GGML_OP_LOG does not have qnn_op_name in the kOpCaps table");
|
|
static_assert(std::size(kOpCaps) == (GGML_OP_COUNT + GGML_UNARY_OP_COUNT),
|
|
"GGML_OP_COUNT does not match the size of the kOpCaps table");
|
|
|
|
std::shared_ptr<qnn::ggml_qnn_op_config> mat_mul_op_constructor(const ggml_tensor * op,
|
|
const std::string & instance_name,
|
|
std::shared_ptr<qnn::qnn_instance> qnn_instance) {
|
|
GGML_UNUSED(op);
|
|
QNN_LOG_DEBUG("create QNN_OP_MAT_MUL, name %s\n", instance_name.c_str());
|
|
return std::make_shared<qnn::ggml_qnn_matmul_op_config>(instance_name, qnn_instance);
|
|
}
|
|
|
|
template <size_t _op>
|
|
std::shared_ptr<qnn::ggml_qnn_op_config> generic_op_constructor(const ggml_tensor * op,
|
|
const std::string & instance_name,
|
|
std::shared_ptr<qnn::qnn_instance> qnn_instance) {
|
|
GGML_UNUSED(op);
|
|
static_assert(_op < std::size(kOpCaps));
|
|
static_assert(kOpCaps[_op].qnn_op_name != nullptr);
|
|
return std::make_shared<qnn::ggml_qnn_single_op_config>(instance_name, QNN_OP_PACKAGE_NAME_QTI_AISW,
|
|
kOpCaps[_op].qnn_op_name, qnn_instance);
|
|
}
|
|
|
|
void add_type_parameters(std::shared_ptr<qnn::ggml_qnn_op_config_base> op, const char * name, float value) {
|
|
Qnn_Scalar_t scalar = QNN_SCALAR_INIT;
|
|
scalar.dataType = QNN_DATATYPE_FLOAT_32;
|
|
scalar.floatValue = value;
|
|
op->add_scalar_param(name, scalar);
|
|
}
|
|
|
|
template <size_t _op, typename _ggml_op_param_type, typename _qnn_op_type_name>
|
|
std::shared_ptr<qnn::ggml_qnn_op_config> op_constructor_with_type_param(
|
|
const ggml_tensor * op, const std::string & instance_name, std::shared_ptr<qnn::qnn_instance> qnn_instance) {
|
|
static_assert(std::is_base_of<qnn::ggml_qnn_op_config_base, _qnn_op_type_name>::value);
|
|
static_assert(_op < std::size(kOpCaps));
|
|
|
|
constexpr auto & op_caps = kOpCaps[_op];
|
|
static_assert(op_caps.qnn_op_name != nullptr);
|
|
|
|
_ggml_op_param_type op_param;
|
|
memcpy(&op_param, op->op_params, sizeof(op_param));
|
|
auto qnn_op = std::make_shared<_qnn_op_type_name>(instance_name, QNN_OP_PACKAGE_NAME_QTI_AISW, op_caps.qnn_op_name,
|
|
qnn_instance);
|
|
if (op_caps.qnn_param_name) {
|
|
add_type_parameters(qnn_op, op_caps.qnn_param_name, op_param);
|
|
}
|
|
return qnn_op;
|
|
}
|
|
|
|
constexpr const op_constructor_t kOpConstructors[] = {
|
|
nullptr, // GGML_OP_NONE
|
|
nullptr, // GGML_OP_DUP
|
|
generic_op_constructor<GGML_OP_ADD>, // GGML_OP_ADD
|
|
nullptr, // GGML_OP_ADD1
|
|
nullptr, // GGML_OP_ACC
|
|
generic_op_constructor<GGML_OP_SUB>, // GGML_OP_SUB
|
|
generic_op_constructor<GGML_OP_MUL>, // GGML_OP_MUL
|
|
generic_op_constructor<GGML_OP_DIV>, // GGML_OP_DIV
|
|
nullptr, // GGML_OP_SQR
|
|
generic_op_constructor<GGML_OP_SQRT>, // GGML_OP_SQRT
|
|
generic_op_constructor<GGML_OP_LOG>, // GGML_OP_LOG
|
|
nullptr, // GGML_OP_SIN
|
|
nullptr, // GGML_OP_COS
|
|
nullptr, // GGML_OP_SUM
|
|
nullptr, // GGML_OP_SUM_ROWS
|
|
nullptr, // GGML_OP_MEAN
|
|
nullptr, // GGML_OP_ARGMAX
|
|
nullptr, // GGML_OP_COUNT_EQUAL
|
|
nullptr, // GGML_OP_REPEAT
|
|
nullptr, // GGML_OP_REPEAT_BACK
|
|
nullptr, // GGML_OP_CONCAT
|
|
nullptr, // GGML_OP_SILU_BACK
|
|
nullptr, // GGML_OP_NORM
|
|
op_constructor_with_type_param<GGML_OP_RMS_NORM, float, qnn::ggml_qnn_rmsnorm_op_config>, // GGML_OP_RMS_NORM
|
|
nullptr, // GGML_OP_RMS_NORM_BACK
|
|
nullptr, // GGML_OP_GROUP_NORM
|
|
|
|
mat_mul_op_constructor, // GGML_OP_MUL_MAT
|
|
nullptr, // GGML_OP_MUL_MAT_ID
|
|
nullptr, // GGML_OP_OUT_PROD
|
|
|
|
nullptr, // GGML_OP_SCALE
|
|
nullptr, // GGML_OP_SET
|
|
nullptr, // GGML_OP_CPY
|
|
nullptr, // GGML_OP_CONT
|
|
generic_op_constructor<GGML_OP_RESHAPE>, // GGML_OP_RESHAPE
|
|
nullptr, // GGML_OP_VIEW
|
|
nullptr, // GGML_OP_PERMUTE
|
|
nullptr, // GGML_OP_TRANSPOSE
|
|
nullptr, // GGML_OP_GET_ROWS
|
|
nullptr, // GGML_OP_GET_ROWS_BACK
|
|
nullptr, // GGML_OP_DIAG
|
|
nullptr, // GGML_OP_DIAG_MASK_INF
|
|
nullptr, // GGML_OP_DIAG_MASK_ZERO
|
|
nullptr, // GGML_OP_SOFT_MAX
|
|
nullptr, // GGML_OP_SOFT_MAX_BACK
|
|
nullptr, // GGML_OP_ROPE
|
|
nullptr, // GGML_OP_ROPE_BACK
|
|
nullptr, // GGML_OP_CLAMP
|
|
nullptr, // GGML_OP_CONV_TRANSPOSE_1D
|
|
nullptr, // GGML_OP_IM2COL
|
|
nullptr, // GGML_OP_IM2COL_BACK
|
|
nullptr, // GGML_OP_CONV_TRANSPOSE_2D
|
|
nullptr, // GGML_OP_POOL_1D
|
|
nullptr, // GGML_OP_POOL_2D
|
|
nullptr, // GGML_OP_POOL_2D_BACK
|
|
nullptr, // GGML_OP_UPSCALE
|
|
nullptr, // GGML_OP_PAD
|
|
nullptr, // GGML_OP_PAD_REFLECT_1D
|
|
nullptr, // GGML_OP_ARANGE
|
|
nullptr, // GGML_OP_TIMESTEP_EMBEDDING
|
|
nullptr, // GGML_OP_ARGSORT
|
|
nullptr, // GGML_OP_LEAKY_RELU
|
|
|
|
nullptr, // GGML_OP_FLASH_ATTN_EXT
|
|
nullptr, // GGML_OP_FLASH_ATTN_BACK
|
|
nullptr, // GGML_OP_SSM_CONV
|
|
nullptr, // GGML_OP_SSM_SCAN
|
|
nullptr, // GGML_OP_WIN_PART
|
|
nullptr, // GGML_OP_WIN_UNPART
|
|
nullptr, // GGML_OP_GET_REL_POS
|
|
nullptr, // GGML_OP_ADD_REL_POS
|
|
nullptr, // GGML_OP_RWKV_WKV6
|
|
nullptr, // GGML_OP_GATED_LINEAR_ATTN
|
|
|
|
nullptr, // GGML_OP_UNARY
|
|
|
|
nullptr, // GGML_OP_MAP_UNARY
|
|
nullptr, // GGML_OP_MAP_BINARY
|
|
|
|
nullptr, // GGML_OP_MAP_CUSTOM1_F32
|
|
nullptr, // GGML_OP_MAP_CUSTOM2_F32
|
|
nullptr, // GGML_OP_MAP_CUSTOM3_F32
|
|
|
|
nullptr, // GGML_OP_MAP_CUSTOM1
|
|
nullptr, // GGML_OP_MAP_CUSTOM2
|
|
nullptr, // GGML_OP_MAP_CUSTOM3
|
|
|
|
nullptr, // GGML_OP_CROSS_ENTROPY_LOSS
|
|
nullptr, // GGML_OP_CROSS_ENTROPY_LOSS_BACK
|
|
nullptr, // GGML_OP_OPT_STEP_ADAMW
|
|
|
|
// ggml_unary_op
|
|
nullptr, // GGML_UNARY_OP_ABS
|
|
nullptr, // GGML_UNARY_OP_SGN
|
|
nullptr, // GGML_UNARY_OP_NEG
|
|
nullptr, // GGML_UNARY_OP_STEP
|
|
nullptr, // GGML_UNARY_OP_TANH
|
|
nullptr, // GGML_UNARY_OP_ELU
|
|
nullptr, // GGML_UNARY_OP_RELU
|
|
nullptr, // GGML_UNARY_OP_SIGMOID
|
|
nullptr, // GGML_UNARY_OP_GELU
|
|
nullptr, // GGML_UNARY_OP_GELU_QUICK
|
|
nullptr, // GGML_UNARY_OP_SILU
|
|
nullptr, // GGML_UNARY_OP_HARDSWISH
|
|
nullptr, // GGML_UNARY_OP_HARDSIGMOID
|
|
nullptr, // GGML_UNARY_OP_EXP
|
|
};
|
|
|
|
static_assert(kOpConstructors[GGML_OP_NONE] == nullptr, "GGML_OP_NONE does not match the nullptr function");
|
|
static_assert(kOpConstructors[GGML_OP_ADD] == generic_op_constructor<GGML_OP_ADD>,
|
|
"GGML_OP_ADD does not match the generic_op_constructor<GGML_OP_ADD> function");
|
|
static_assert(kOpConstructors[GGML_OP_MUL_MAT] == mat_mul_op_constructor,
|
|
"GGML_OP_MUL_MAT does not match the mat_mul_op_constructor function");
|
|
static_assert(std::size(kOpConstructors) == (GGML_OP_COUNT + GGML_UNARY_OP_COUNT),
|
|
"GGML_OP_COUNT does not match the size of the kOpConstructors table");
|
|
|
|
} // namespace
|
|
|
|
namespace qnn {
|
|
|
|
void append_tensor_shape_and_type(const ggml_tensor * tensor, std::string & output) {
|
|
append_tensor_shape_and_type_impl(tensor, GGML_TYPE_COUNT, output);
|
|
}
|
|
|
|
size_t get_qnn_op_index(const ggml_tensor * tensor) {
|
|
if (tensor->op == GGML_OP_UNARY) {
|
|
return kGgmlUnaryOpStart + ggml_get_unary_op(tensor);
|
|
}
|
|
|
|
return tensor->op;
|
|
}
|
|
|
|
const char * get_qnn_op_name(const ggml_tensor * op) {
|
|
auto op_index = get_qnn_op_index(op);
|
|
GGML_ASSERT(op_index < std::size(kOpCaps));
|
|
GGML_ASSERT(kOpCaps[op_index].qnn_op_name);
|
|
return kOpCaps[op_index].qnn_op_name;
|
|
}
|
|
|
|
void get_qnn_op_desc(const ggml_tensor * op, bool append_dimensions, ggml_type override_data_type,
|
|
std::string & output) {
|
|
auto op_index = get_qnn_op_index(op);
|
|
GGML_ASSERT(op_index < std::size(kOpCaps));
|
|
auto get_desc = kOpCaps[op_index].get_desc;
|
|
if (get_desc) {
|
|
get_desc(op, append_dimensions, override_data_type, output);
|
|
} else {
|
|
generic_get_op_desc(op, append_dimensions, override_data_type, output);
|
|
}
|
|
}
|
|
|
|
std::shared_ptr<ggml_qnn_op_config> create_op(const ggml_tensor * op, const std::string & name,
|
|
qnn_instance_ptr qnn_instance) {
|
|
auto op_index = get_qnn_op_index(op);
|
|
GGML_ASSERT(op_index < std::size(kOpCaps));
|
|
auto op_constructor = kOpConstructors[op_index];
|
|
GGML_ASSERT(op_constructor);
|
|
return op_constructor(op, name, qnn_instance);
|
|
}
|
|
|
|
} // namespace qnn
|