llama.cpp/ggml/src/ggml-qnn/op-config-caps.cpp

436 lines
19 KiB
C++

#include "op-config-impl.hpp"
namespace {
using op_constructor_t = std::shared_ptr<qnn::ggml_qnn_op_config> (*)(const ggml_tensor *, const std::string &,
std::shared_ptr<qnn::qnn_instance>);
using op_description_generator_t = void (*)(const ggml_tensor * op, bool append_dimensions,
ggml_type override_data_type, std::string & output);
void append_tensor_shape_and_type_impl(const ggml_tensor * tensor, ggml_type override_data_type, std::string & output) {
char buffer[256] = {};
const auto * type_name = qnn::get_ggml_type_name(std::min(tensor->type, override_data_type));
int len = 0;
switch (ggml_n_dims(tensor)) {
case 1:
len = snprintf(buffer, sizeof(buffer), "%ld%s", (long) tensor->ne[0], type_name);
break;
case 2:
len = snprintf(buffer, sizeof(buffer), "%ldx%ld%s", (long) tensor->ne[0], (long) tensor->ne[1], type_name);
break;
case 3:
len = snprintf(buffer, sizeof(buffer), "%ldx%ldx%ld%s", (long) tensor->ne[0], (long) tensor->ne[1],
(long) tensor->ne[2], type_name);
break;
case 4:
default:
len = snprintf(buffer, sizeof(buffer), "%ldx%ldx%ldx%ld%s", (long) tensor->ne[0], (long) tensor->ne[1],
(long) tensor->ne[2], (long) tensor->ne[3], type_name);
break;
}
GGML_ASSERT(len > 0 && len < (int) sizeof(buffer));
output.append(buffer, len);
}
void get_graph_key_from_op(const ggml_tensor * op, ggml_type override_data_type, std::string & output) {
output += ggml_op_desc(op);
output += qnn::get_ggml_type_name(op->type);
for (size_t i = 0; i < GGML_MAX_SRC && op->src[i]; ++i) {
auto * src = op->src[i];
if (!src) {
break;
}
output += '_';
append_tensor_shape_and_type_impl(src, override_data_type, output);
}
}
void get_op_key_with_src_op_desc(const ggml_tensor * op, std::string & output) {
output += ggml_op_desc(op);
output += '(';
if (op->src[0]) {
output += ggml_op_desc(op->src[0]);
}
for (size_t i = 1; i < GGML_MAX_SRC && op->src[i]; ++i) {
output += ',';
output += ggml_op_desc(op->src[i]);
}
output += ')';
}
void generic_get_op_desc(const ggml_tensor * op, bool append_dimensions, ggml_type override_data_type,
std::string & output) {
if (append_dimensions) {
get_graph_key_from_op(op, override_data_type, output);
} else {
get_op_key_with_src_op_desc(op, output);
}
}
struct qnn_op_caps_t {
const char * qnn_op_name = nullptr;
op_description_generator_t get_desc = nullptr;
const char * qnn_param_name = nullptr;
};
constexpr const qnn_op_caps_t kOpCaps[] = {
{}, // GGML_OP_NONE
{}, // GGML_OP_DUP
{
// GGML_OP_ADD
QNN_OP_ELEMENT_WISE_ADD, // qnn_op_name
},
{}, // GGML_OP_ADD1
{}, // GGML_OP_ACC
{
// GGML_OP_SUB
QNN_OP_ELEMENT_WISE_SUBTRACT, // qnn_op_name
},
{
// GGML_OP_MUL
QNN_OP_ELEMENT_WISE_MULTIPLY, // qnn_op_name
},
{
// GGML_OP_DIV
QNN_OP_ELEMENT_WISE_DIVIDE, // qnn_op_name
},
{}, // GGML_OP_SQR
{
// GGML_OP_SQRT
QNN_OP_ELEMENT_WISE_SQUARE_ROOT, // qnn_op_name
},
{
// GGML_OP_LOG
QNN_OP_ELEMENT_WISE_LOG, // qnn_op_name
},
{}, // GGML_OP_SIN
{}, // GGML_OP_COS
{}, // GGML_OP_SUM
{}, // GGML_OP_SUM_ROWS
{}, // GGML_OP_MEAN
{}, // GGML_OP_ARGMAX
{}, // GGML_OP_COUNT_EQUAL
{}, // GGML_OP_REPEAT
{}, // GGML_OP_REPEAT_BACK
{}, // GGML_OP_CONCAT
{}, // GGML_OP_SILU_BACK
{}, // GGML_OP_NORM
{
// GGML_OP_RMS_NORM
QNN_OP_RMS_NORM, // qnn_op_name
generic_get_op_desc, // get_desc
QNN_OP_RMS_NORM_PARAM_EPSILON, // qnn_param_name
},
{}, // GGML_OP_RMS_NORM_BACK
{}, // GGML_OP_GROUP_NORM
{
// GGML_OP_MUL_MAT
QNN_OP_MAT_MUL, // qnn_op_name
},
{}, // GGML_OP_MUL_MAT_ID
{}, // GGML_OP_OUT_PROD
{}, // GGML_OP_SCALE
{}, // GGML_OP_SET
{}, // GGML_OP_CPY
{}, // GGML_OP_CONT
{
// GGML_OP_RESHAPE
QNN_OP_RESHAPE, // qnn_op_name
},
{}, // GGML_OP_VIEW
{}, // GGML_OP_PERMUTE
{}, // GGML_OP_TRANSPOSE
{}, // GGML_OP_GET_ROWS
{}, // GGML_OP_GET_ROWS_BACK
{}, // GGML_OP_DIAG
{}, // GGML_OP_DIAG_MASK_INF
{}, // GGML_OP_DIAG_MASK_ZERO
{}, // GGML_OP_SOFT_MAX
{}, // GGML_OP_SOFT_MAX_BACK
{}, // GGML_OP_ROPE
{}, // GGML_OP_ROPE_BACK
{}, // GGML_OP_CLAMP
{}, // GGML_OP_CONV_TRANSPOSE_1D
{}, // GGML_OP_IM2COL
{}, // GGML_OP_IM2COL_BACK
{}, // GGML_OP_CONV_TRANSPOSE_2D
{}, // GGML_OP_POOL_1D
{}, // GGML_OP_POOL_2D
{}, // GGML_OP_POOL_2D_BACK
{}, // GGML_OP_UPSCALE
{}, // GGML_OP_PAD
{}, // GGML_OP_PAD_REFLECT_1D
{}, // GGML_OP_ARANGE
{}, // GGML_OP_TIMESTEP_EMBEDDING
{}, // GGML_OP_ARGSORT
{}, // GGML_OP_LEAKY_RELU
{}, // GGML_OP_FLASH_ATTN_EXT
{}, // GGML_OP_FLASH_ATTN_BACK
{}, // GGML_OP_SSM_CONV
{}, // GGML_OP_SSM_SCAN
{}, // GGML_OP_WIN_PART
{}, // GGML_OP_WIN_UNPART
{}, // GGML_OP_GET_REL_POS
{}, // GGML_OP_ADD_REL_POS
{}, // GGML_OP_RWKV_WKV6
{}, // GGML_OP_GATED_LINEAR_ATTN
{}, // GGML_OP_UNARY
{}, // GGML_OP_MAP_UNARY
{}, // GGML_OP_MAP_BINARY
{}, // GGML_OP_MAP_CUSTOM1_F32
{}, // GGML_OP_MAP_CUSTOM2_F32
{}, // GGML_OP_MAP_CUSTOM3_F32
{}, // GGML_OP_MAP_CUSTOM1
{}, // GGML_OP_MAP_CUSTOM2
{}, // GGML_OP_MAP_CUSTOM3
{}, // GGML_OP_CROSS_ENTROPY_LOSS
{}, // GGML_OP_CROSS_ENTROPY_LOSS_BACK
{}, // GGML_OP_OPT_STEP_ADAMW
// ggml_unary_op
{}, // GGML_UNARY_OP_ABS
{}, // GGML_UNARY_OP_SGN
{}, // GGML_UNARY_OP_NEG
{}, // GGML_UNARY_OP_STEP
{}, // GGML_UNARY_OP_TANH
{}, // GGML_UNARY_OP_ELU
{}, // GGML_UNARY_OP_RELU
{}, // GGML_UNARY_OP_SIGMOID
{
// GGML_UNARY_OP_GELU
QNN_OP_GELU, // qnn_op_name
},
{}, // GGML_UNARY_OP_GELU_QUICK
{}, // GGML_UNARY_OP_SILU
{}, // GGML_UNARY_OP_HARDSWISH
{}, // GGML_UNARY_OP_HARDSIGMOID
{}, // GGML_UNARY_OP_EXP
};
static_assert(kOpCaps[GGML_OP_NONE].get_desc == nullptr, "GGML_OP_NONE should not have get_desc function");
static_assert(kOpCaps[GGML_OP_ADD].qnn_op_name, "GGML_OP_ADD does not have qnn_op_name in the kOpCaps table");
static_assert(kOpCaps[GGML_OP_MUL_MAT].qnn_op_name, "GGML_OP_MUL_MAT does not have qnn_op_name in the kOpCaps table");
static_assert(kOpCaps[GGML_OP_MUL].qnn_op_name, "GGML_OP_MUL does not have qnn_op_name in the kOpCaps table");
static_assert(kOpCaps[GGML_OP_LOG].qnn_op_name, "GGML_OP_LOG does not have qnn_op_name in the kOpCaps table");
static_assert(std::size(kOpCaps) == (GGML_OP_COUNT + GGML_UNARY_OP_COUNT),
"GGML_OP_COUNT does not match the size of the kOpCaps table");
std::shared_ptr<qnn::ggml_qnn_op_config> mat_mul_op_constructor(const ggml_tensor * op,
const std::string & instance_name,
std::shared_ptr<qnn::qnn_instance> qnn_instance) {
GGML_UNUSED(op);
QNN_LOG_DEBUG("create QNN_OP_MAT_MUL, name %s\n", instance_name.c_str());
return std::make_shared<qnn::ggml_qnn_matmul_op_config>(instance_name, qnn_instance);
}
template <size_t _op>
std::shared_ptr<qnn::ggml_qnn_op_config> generic_op_constructor(const ggml_tensor * op,
const std::string & instance_name,
std::shared_ptr<qnn::qnn_instance> qnn_instance) {
GGML_UNUSED(op);
static_assert(_op < std::size(kOpCaps));
static_assert(kOpCaps[_op].qnn_op_name != nullptr);
return std::make_shared<qnn::ggml_qnn_single_op_config>(instance_name, QNN_OP_PACKAGE_NAME_QTI_AISW,
kOpCaps[_op].qnn_op_name, qnn_instance);
}
void add_type_parameters(std::shared_ptr<qnn::ggml_qnn_op_config_base> op, const char * name, float value) {
Qnn_Scalar_t scalar = QNN_SCALAR_INIT;
scalar.dataType = QNN_DATATYPE_FLOAT_32;
scalar.floatValue = value;
op->add_scalar_param(name, scalar);
}
template <size_t _op, typename _ggml_op_param_type, typename _qnn_op_type_name>
std::shared_ptr<qnn::ggml_qnn_op_config> op_constructor_with_type_param(
const ggml_tensor * op, const std::string & instance_name, std::shared_ptr<qnn::qnn_instance> qnn_instance) {
static_assert(std::is_base_of<qnn::ggml_qnn_op_config_base, _qnn_op_type_name>::value);
static_assert(_op < std::size(kOpCaps));
constexpr auto & op_caps = kOpCaps[_op];
static_assert(op_caps.qnn_op_name != nullptr);
_ggml_op_param_type op_param;
memcpy(&op_param, op->op_params, sizeof(op_param));
auto qnn_op = std::make_shared<_qnn_op_type_name>(instance_name, QNN_OP_PACKAGE_NAME_QTI_AISW, op_caps.qnn_op_name,
qnn_instance);
if (op_caps.qnn_param_name) {
add_type_parameters(qnn_op, op_caps.qnn_param_name, op_param);
}
return qnn_op;
}
constexpr const op_constructor_t kOpConstructors[] = {
nullptr, // GGML_OP_NONE
nullptr, // GGML_OP_DUP
generic_op_constructor<GGML_OP_ADD>, // GGML_OP_ADD
nullptr, // GGML_OP_ADD1
nullptr, // GGML_OP_ACC
generic_op_constructor<GGML_OP_SUB>, // GGML_OP_SUB
generic_op_constructor<GGML_OP_MUL>, // GGML_OP_MUL
generic_op_constructor<GGML_OP_DIV>, // GGML_OP_DIV
nullptr, // GGML_OP_SQR
generic_op_constructor<GGML_OP_SQRT>, // GGML_OP_SQRT
generic_op_constructor<GGML_OP_LOG>, // GGML_OP_LOG
nullptr, // GGML_OP_SIN
nullptr, // GGML_OP_COS
nullptr, // GGML_OP_SUM
nullptr, // GGML_OP_SUM_ROWS
nullptr, // GGML_OP_MEAN
nullptr, // GGML_OP_ARGMAX
nullptr, // GGML_OP_COUNT_EQUAL
nullptr, // GGML_OP_REPEAT
nullptr, // GGML_OP_REPEAT_BACK
nullptr, // GGML_OP_CONCAT
nullptr, // GGML_OP_SILU_BACK
nullptr, // GGML_OP_NORM
op_constructor_with_type_param<GGML_OP_RMS_NORM, float, qnn::ggml_qnn_rmsnorm_op_config>, // GGML_OP_RMS_NORM
nullptr, // GGML_OP_RMS_NORM_BACK
nullptr, // GGML_OP_GROUP_NORM
mat_mul_op_constructor, // GGML_OP_MUL_MAT
nullptr, // GGML_OP_MUL_MAT_ID
nullptr, // GGML_OP_OUT_PROD
nullptr, // GGML_OP_SCALE
nullptr, // GGML_OP_SET
nullptr, // GGML_OP_CPY
nullptr, // GGML_OP_CONT
generic_op_constructor<GGML_OP_RESHAPE>, // GGML_OP_RESHAPE
nullptr, // GGML_OP_VIEW
nullptr, // GGML_OP_PERMUTE
nullptr, // GGML_OP_TRANSPOSE
nullptr, // GGML_OP_GET_ROWS
nullptr, // GGML_OP_GET_ROWS_BACK
nullptr, // GGML_OP_DIAG
nullptr, // GGML_OP_DIAG_MASK_INF
nullptr, // GGML_OP_DIAG_MASK_ZERO
nullptr, // GGML_OP_SOFT_MAX
nullptr, // GGML_OP_SOFT_MAX_BACK
nullptr, // GGML_OP_ROPE
nullptr, // GGML_OP_ROPE_BACK
nullptr, // GGML_OP_CLAMP
nullptr, // GGML_OP_CONV_TRANSPOSE_1D
nullptr, // GGML_OP_IM2COL
nullptr, // GGML_OP_IM2COL_BACK
nullptr, // GGML_OP_CONV_TRANSPOSE_2D
nullptr, // GGML_OP_POOL_1D
nullptr, // GGML_OP_POOL_2D
nullptr, // GGML_OP_POOL_2D_BACK
nullptr, // GGML_OP_UPSCALE
nullptr, // GGML_OP_PAD
nullptr, // GGML_OP_PAD_REFLECT_1D
nullptr, // GGML_OP_ARANGE
nullptr, // GGML_OP_TIMESTEP_EMBEDDING
nullptr, // GGML_OP_ARGSORT
nullptr, // GGML_OP_LEAKY_RELU
nullptr, // GGML_OP_FLASH_ATTN_EXT
nullptr, // GGML_OP_FLASH_ATTN_BACK
nullptr, // GGML_OP_SSM_CONV
nullptr, // GGML_OP_SSM_SCAN
nullptr, // GGML_OP_WIN_PART
nullptr, // GGML_OP_WIN_UNPART
nullptr, // GGML_OP_GET_REL_POS
nullptr, // GGML_OP_ADD_REL_POS
nullptr, // GGML_OP_RWKV_WKV6
nullptr, // GGML_OP_GATED_LINEAR_ATTN
nullptr, // GGML_OP_UNARY
nullptr, // GGML_OP_MAP_UNARY
nullptr, // GGML_OP_MAP_BINARY
nullptr, // GGML_OP_MAP_CUSTOM1_F32
nullptr, // GGML_OP_MAP_CUSTOM2_F32
nullptr, // GGML_OP_MAP_CUSTOM3_F32
nullptr, // GGML_OP_MAP_CUSTOM1
nullptr, // GGML_OP_MAP_CUSTOM2
nullptr, // GGML_OP_MAP_CUSTOM3
nullptr, // GGML_OP_CROSS_ENTROPY_LOSS
nullptr, // GGML_OP_CROSS_ENTROPY_LOSS_BACK
nullptr, // GGML_OP_OPT_STEP_ADAMW
// ggml_unary_op
nullptr, // GGML_UNARY_OP_ABS
nullptr, // GGML_UNARY_OP_SGN
nullptr, // GGML_UNARY_OP_NEG
nullptr, // GGML_UNARY_OP_STEP
nullptr, // GGML_UNARY_OP_TANH
nullptr, // GGML_UNARY_OP_ELU
nullptr, // GGML_UNARY_OP_RELU
nullptr, // GGML_UNARY_OP_SIGMOID
nullptr, // GGML_UNARY_OP_GELU
nullptr, // GGML_UNARY_OP_GELU_QUICK
nullptr, // GGML_UNARY_OP_SILU
nullptr, // GGML_UNARY_OP_HARDSWISH
nullptr, // GGML_UNARY_OP_HARDSIGMOID
nullptr, // GGML_UNARY_OP_EXP
};
static_assert(kOpConstructors[GGML_OP_NONE] == nullptr, "GGML_OP_NONE does not match the nullptr function");
static_assert(kOpConstructors[GGML_OP_ADD] == generic_op_constructor<GGML_OP_ADD>,
"GGML_OP_ADD does not match the generic_op_constructor<GGML_OP_ADD> function");
static_assert(kOpConstructors[GGML_OP_MUL_MAT] == mat_mul_op_constructor,
"GGML_OP_MUL_MAT does not match the mat_mul_op_constructor function");
static_assert(std::size(kOpConstructors) == (GGML_OP_COUNT + GGML_UNARY_OP_COUNT),
"GGML_OP_COUNT does not match the size of the kOpConstructors table");
} // namespace
namespace qnn {
void append_tensor_shape_and_type(const ggml_tensor * tensor, std::string & output) {
append_tensor_shape_and_type_impl(tensor, GGML_TYPE_COUNT, output);
}
size_t get_qnn_op_index(const ggml_tensor * tensor) {
if (tensor->op == GGML_OP_UNARY) {
return kGgmlUnaryOpStart + ggml_get_unary_op(tensor);
}
return tensor->op;
}
const char * get_qnn_op_name(const ggml_tensor * op) {
auto op_index = get_qnn_op_index(op);
GGML_ASSERT(op_index < std::size(kOpCaps));
GGML_ASSERT(kOpCaps[op_index].qnn_op_name);
return kOpCaps[op_index].qnn_op_name;
}
void get_qnn_op_desc(const ggml_tensor * op, bool append_dimensions, ggml_type override_data_type,
std::string & output) {
auto op_index = get_qnn_op_index(op);
GGML_ASSERT(op_index < std::size(kOpCaps));
auto get_desc = kOpCaps[op_index].get_desc;
if (get_desc) {
get_desc(op, append_dimensions, override_data_type, output);
} else {
generic_get_op_desc(op, append_dimensions, override_data_type, output);
}
}
std::shared_ptr<ggml_qnn_op_config> create_op(const ggml_tensor * op, const std::string & name,
qnn_instance_ptr qnn_instance) {
auto op_index = get_qnn_op_index(op);
GGML_ASSERT(op_index < std::size(kOpCaps));
auto op_constructor = kOpConstructors[op_index];
GGML_ASSERT(op_constructor);
return op_constructor(op, name, qnn_instance);
}
} // namespace qnn