feat: add QNN_OP_TRANSPOSE (#6)
* redo: add convert nodes This reverts commit 8448acd5ebf8fe86ab9d25313b64a15c811ef96e. * align clang format with cann * rename binary_op -> general_op casue there're some op that will only tak 1 param * Revert "rename binary_op -> general_op" This reverts commit 5be63b1a0dc4614457785367dade62158fe46214. * wip * add GGML_OP_PERMUTE * add GGML_OP_VIEW and GGML_OP_GET_ROWS * wip * Revert "wip" This reverts commit 772462ca6cfa01ea31bde725c2da60076ad9385f.
This commit is contained in:
parent
0fec56fd57
commit
8ad86dc703
|
|
@ -57,30 +57,30 @@ struct qnn_device_caps {
|
||||||
};
|
};
|
||||||
|
|
||||||
const qnn_device_caps kDeviceCaps[GGML_QNN_MAX_DEVICES]{
|
const qnn_device_caps kDeviceCaps[GGML_QNN_MAX_DEVICES]{
|
||||||
{ "qnn-cpu",
|
{// https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/CpuOpDefSupplement.html#matmul
|
||||||
"Qualcomm Kryo CPU",
|
"qnn-cpu",
|
||||||
"libQnnCpu.so",
|
"Qualcomm Kryo CPU",
|
||||||
GGML_BACKEND_DEVICE_TYPE_CPU,
|
"libQnnCpu.so",
|
||||||
{ GGML_TYPE_F32,
|
GGML_BACKEND_DEVICE_TYPE_CPU,
|
||||||
GGML_TYPE_I8 } }, // https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/CpuOpDefSupplement.html#matmul
|
{GGML_TYPE_F32, GGML_TYPE_I8}},
|
||||||
{ "qnn-gpu",
|
{// https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/GpuOpDefSupplement.html#matmul
|
||||||
"Qualcomm Adreno GPU",
|
"qnn-gpu",
|
||||||
"libQnnGpu.so",
|
"Qualcomm Adreno GPU",
|
||||||
GGML_BACKEND_DEVICE_TYPE_GPU,
|
"libQnnGpu.so",
|
||||||
{ GGML_TYPE_F32,
|
GGML_BACKEND_DEVICE_TYPE_GPU,
|
||||||
GGML_TYPE_F16 } }, // https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/GpuOpDefSupplement.html#matmul
|
{GGML_TYPE_F32, GGML_TYPE_F16}},
|
||||||
{ "qnn-npu",
|
{// https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/HtpOpDefSupplement.html#matmul
|
||||||
"Qualcomm NPU",
|
"qnn-npu",
|
||||||
"libQnnHtp.so",
|
"Qualcomm NPU",
|
||||||
GGML_BACKEND_DEVICE_TYPE_GPU,
|
"libQnnHtp.so",
|
||||||
{ GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_I16,
|
GGML_BACKEND_DEVICE_TYPE_GPU,
|
||||||
GGML_TYPE_I8 } }, // https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/HtpOpDefSupplement.html#matmul
|
{GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_I16, GGML_TYPE_I8}},
|
||||||
};
|
};
|
||||||
|
|
||||||
class ggml_backend_qnn_buffer_context {
|
class ggml_backend_qnn_buffer_context {
|
||||||
public:
|
public:
|
||||||
ggml_backend_qnn_buffer_context(QNNBackend device, std::shared_ptr<qnn::qnn_instance> instance, size_t size) :
|
ggml_backend_qnn_buffer_context(QNNBackend device, std::shared_ptr<qnn::qnn_instance> instance, size_t size)
|
||||||
_instance(instance), _name(QNN_BACKEND_NAME + std::to_string(device)) {
|
: _instance(instance), _name(QNN_BACKEND_NAME + std::to_string(device)) {
|
||||||
|
|
||||||
// TODO: fix this for other platforms
|
// TODO: fix this for other platforms
|
||||||
size_t size_page = sysconf(_SC_PAGESIZE);
|
size_t size_page = sysconf(_SC_PAGESIZE);
|
||||||
|
|
@ -251,7 +251,7 @@ ggml_backend_buffer_type_t ggml_backend_qnn_buffer_type(ggml_backend_dev_t dev)
|
||||||
if (!ggml_backend_qnn_buffer_type_initialized) {
|
if (!ggml_backend_qnn_buffer_type_initialized) {
|
||||||
for (size_t i = 0; i < GGML_QNN_MAX_DEVICES; i++) {
|
for (size_t i = 0; i < GGML_QNN_MAX_DEVICES; i++) {
|
||||||
auto &context = ggml_backend_qnn_buffer_type_contexts[i];
|
auto &context = ggml_backend_qnn_buffer_type_contexts[i];
|
||||||
context = { std::string(QNN_BACKEND_NAME) + std::to_string(i) };
|
context = {std::string(QNN_BACKEND_NAME) + std::to_string(i)};
|
||||||
ggml_backend_qnn_buffer_types[i] = {
|
ggml_backend_qnn_buffer_types[i] = {
|
||||||
/* .iface = */ {
|
/* .iface = */ {
|
||||||
/* .get_name = */ ggml_backend_qnn_buffer_type_name,
|
/* .get_name = */ ggml_backend_qnn_buffer_type_name,
|
||||||
|
|
@ -348,8 +348,8 @@ void ggml_backend_qnn_device_get_props(ggml_backend_dev_t dev, struct ggml_backe
|
||||||
}
|
}
|
||||||
|
|
||||||
ggml_guid_t ggml_backend_qnn_guid() {
|
ggml_guid_t ggml_backend_qnn_guid() {
|
||||||
static ggml_guid guid = { 0x1a, 0x2b, 0x3c, 0x4d, 0x5e, 0x6f, 0x70, 0x81,
|
static ggml_guid guid = {0x1a, 0x2b, 0x3c, 0x4d, 0x5e, 0x6f, 0x70, 0x81,
|
||||||
0x92, 0xa3, 0xb4, 0xc5, 0xd6, 0xe7, 0xf8, 0x09 };
|
0x92, 0xa3, 0xb4, 0xc5, 0xd6, 0xe7, 0xf8, 0x09};
|
||||||
return &guid;
|
return &guid;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -511,7 +511,7 @@ const ggml_backend_reg_i ggml_backend_qnn_reg_interface = {
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
ggml_backend_reg_t ggml_backend_qnn_reg() {
|
ggml_backend_reg_t ggml_backend_qnn_reg() {
|
||||||
static ggml_backend_qnn_reg_impl reg{ ggml_backend_qnn_reg_interface };
|
static ggml_backend_qnn_reg_impl reg{ggml_backend_qnn_reg_interface};
|
||||||
static bool initialized = false;
|
static bool initialized = false;
|
||||||
static std::mutex mutex;
|
static std::mutex mutex;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -3,16 +3,50 @@ BasedOnStyle: Google
|
||||||
IndentWidth: 4
|
IndentWidth: 4
|
||||||
AccessModifierOffset: -4
|
AccessModifierOffset: -4
|
||||||
AlignAfterOpenBracket: Align
|
AlignAfterOpenBracket: Align
|
||||||
AlignOperands: true
|
AlignConsecutiveMacros: false
|
||||||
|
AlignConsecutiveAssignments: false
|
||||||
|
AlignConsecutiveDeclarations: false
|
||||||
|
AlignEscapedNewlines: Left
|
||||||
|
AlignOperands: true
|
||||||
AlignTrailingComments: true
|
AlignTrailingComments: true
|
||||||
|
AllowAllArgumentsOnNextLine: true
|
||||||
|
AllowAllConstructorInitializersOnNextLine: true
|
||||||
|
AllowAllParametersOfDeclarationOnNextLine: true
|
||||||
|
AllowShortBlocksOnASingleLine: Never
|
||||||
|
AllowShortCaseLabelsOnASingleLine: false
|
||||||
|
AllowShortFunctionsOnASingleLine: All
|
||||||
|
AllowShortLambdasOnASingleLine: All
|
||||||
|
AllowShortIfStatementsOnASingleLine: WithoutElse
|
||||||
|
AllowShortLoopsOnASingleLine: true
|
||||||
|
AlwaysBreakAfterDefinitionReturnType: None
|
||||||
|
AlwaysBreakAfterReturnType: None
|
||||||
|
AlwaysBreakBeforeMultilineStrings: true
|
||||||
|
AlwaysBreakTemplateDeclarations: Yes
|
||||||
BinPackArguments: true
|
BinPackArguments: true
|
||||||
BinPackParameters: true
|
BinPackParameters: true
|
||||||
BreakBeforeBraces: Custom
|
BraceWrapping:
|
||||||
BreakConstructorInitializers: AfterColon
|
AfterCaseLabel: false
|
||||||
|
AfterClass: false
|
||||||
|
AfterControlStatement: false
|
||||||
|
AfterEnum: false
|
||||||
|
AfterFunction: false
|
||||||
|
AfterNamespace: false
|
||||||
|
AfterObjCDeclaration: false
|
||||||
|
AfterStruct: false
|
||||||
|
AfterUnion: false
|
||||||
|
AfterExternBlock: false
|
||||||
|
BeforeCatch: false
|
||||||
|
BeforeElse: false
|
||||||
|
IndentBraces: false
|
||||||
|
SplitEmptyFunction: true
|
||||||
|
SplitEmptyRecord: true
|
||||||
|
SplitEmptyNamespace: true
|
||||||
ColumnLimit: 120
|
ColumnLimit: 120
|
||||||
Cpp11BracedListStyle: false
|
ConstructorInitializerIndentWidth: 4
|
||||||
|
ContinuationIndentWidth: 4
|
||||||
|
Cpp11BracedListStyle: true
|
||||||
DerivePointerAlignment: false
|
DerivePointerAlignment: false
|
||||||
IncludeCategories:
|
IncludeCategories:
|
||||||
- Regex: '^<.*\.h>'
|
- Regex: '^<.*\.h>'
|
||||||
Priority: 1
|
Priority: 1
|
||||||
- Regex: '^<.*'
|
- Regex: '^<.*'
|
||||||
|
|
@ -28,4 +62,4 @@ MaxEmptyLinesToKeep: 1
|
||||||
PointerAlignment: Right
|
PointerAlignment: Right
|
||||||
SortIncludes: true
|
SortIncludes: true
|
||||||
SpacesBeforeTrailingComments: 1
|
SpacesBeforeTrailingComments: 1
|
||||||
UseTab: Never
|
UseTab: Never
|
||||||
|
|
|
||||||
|
|
@ -92,10 +92,10 @@ qnn::ggml_tensor_array_t to_ggml_tensor_array(const std::array<ggml_tensor *, _S
|
||||||
return qnn::ggml_tensor_array_t(array.data(), array.data() + _Size);
|
return qnn::ggml_tensor_array_t(array.data(), array.data() + _Size);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <size_t _InputSize, size_t _OutputSize>
|
template <size_t _InputSize>
|
||||||
bool execute_graph(qnn::ggml_qnn_graph *graph, const std::array<ggml_tensor *, _InputSize> &inputs,
|
bool execute_graph(qnn::ggml_qnn_graph *graph, const std::array<ggml_tensor *, _InputSize> &inputs,
|
||||||
const std::array<ggml_tensor *, _OutputSize> &outputs) {
|
ggml_tensor *output) {
|
||||||
if (!graph->execute(to_ggml_tensor_array<_InputSize>(inputs), to_ggml_tensor_array<_OutputSize>(outputs))) {
|
if (!graph->execute(to_ggml_tensor_array<_InputSize>(inputs), to_ggml_tensor_array<1>({output}))) {
|
||||||
QNN_LOG_WARN("execute failed\n");
|
QNN_LOG_WARN("execute failed\n");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
@ -154,37 +154,37 @@ constexpr const char *kGgmlOpToQnnOp[] = {
|
||||||
nullptr, // GGML_OP_MUL_MAT_ID
|
nullptr, // GGML_OP_MUL_MAT_ID
|
||||||
nullptr, // GGML_OP_OUT_PROD
|
nullptr, // GGML_OP_OUT_PROD
|
||||||
|
|
||||||
nullptr, // GGML_OP_SCALE
|
nullptr, // GGML_OP_SCALE
|
||||||
nullptr, // GGML_OP_SET
|
nullptr, // GGML_OP_SET
|
||||||
nullptr, // GGML_OP_CPY
|
nullptr, // GGML_OP_CPY
|
||||||
nullptr, // GGML_OP_CONT
|
nullptr, // GGML_OP_CONT
|
||||||
nullptr, // GGML_OP_RESHAPE
|
nullptr, // GGML_OP_RESHAPE
|
||||||
nullptr, // GGML_OP_VIEW
|
nullptr, // GGML_OP_VIEW
|
||||||
nullptr, // GGML_OP_PERMUTE
|
QNN_OP_TRANSPOSE, // GGML_OP_PERMUTE
|
||||||
nullptr, // GGML_OP_TRANSPOSE
|
nullptr, // GGML_OP_TRANSPOSE
|
||||||
nullptr, // GGML_OP_GET_ROWS
|
nullptr, // GGML_OP_GET_ROWS
|
||||||
nullptr, // GGML_OP_GET_ROWS_BACK
|
nullptr, // GGML_OP_GET_ROWS_BACK
|
||||||
nullptr, // GGML_OP_DIAG
|
nullptr, // GGML_OP_DIAG
|
||||||
nullptr, // GGML_OP_DIAG_MASK_INF
|
nullptr, // GGML_OP_DIAG_MASK_INF
|
||||||
nullptr, // GGML_OP_DIAG_MASK_ZERO
|
nullptr, // GGML_OP_DIAG_MASK_ZERO
|
||||||
nullptr, // GGML_OP_SOFT_MAX
|
nullptr, // GGML_OP_SOFT_MAX
|
||||||
nullptr, // GGML_OP_SOFT_MAX_BACK
|
nullptr, // GGML_OP_SOFT_MAX_BACK
|
||||||
nullptr, // GGML_OP_ROPE
|
nullptr, // GGML_OP_ROPE
|
||||||
nullptr, // GGML_OP_ROPE_BACK
|
nullptr, // GGML_OP_ROPE_BACK
|
||||||
nullptr, // GGML_OP_CLAMP
|
nullptr, // GGML_OP_CLAMP
|
||||||
nullptr, // GGML_OP_CONV_TRANSPOSE_1D
|
nullptr, // GGML_OP_CONV_TRANSPOSE_1D
|
||||||
nullptr, // GGML_OP_IM2COL
|
nullptr, // GGML_OP_IM2COL
|
||||||
nullptr, // GGML_OP_IM2COL_BACK
|
nullptr, // GGML_OP_IM2COL_BACK
|
||||||
nullptr, // GGML_OP_CONV_TRANSPOSE_2D
|
nullptr, // GGML_OP_CONV_TRANSPOSE_2D
|
||||||
nullptr, // GGML_OP_POOL_1D
|
nullptr, // GGML_OP_POOL_1D
|
||||||
nullptr, // GGML_OP_POOL_2D
|
nullptr, // GGML_OP_POOL_2D
|
||||||
nullptr, // GGML_OP_POOL_2D_BACK
|
nullptr, // GGML_OP_POOL_2D_BACK
|
||||||
nullptr, // GGML_OP_UPSCALE
|
nullptr, // GGML_OP_UPSCALE
|
||||||
nullptr, // GGML_OP_PAD
|
nullptr, // GGML_OP_PAD
|
||||||
nullptr, // GGML_OP_ARANGE
|
nullptr, // GGML_OP_ARANGE
|
||||||
nullptr, // GGML_OP_TIMESTEP_EMBEDDING
|
nullptr, // GGML_OP_TIMESTEP_EMBEDDING
|
||||||
nullptr, // GGML_OP_ARGSORT
|
nullptr, // GGML_OP_ARGSORT
|
||||||
nullptr, // GGML_OP_LEAKY_RELU
|
nullptr, // GGML_OP_LEAKY_RELU
|
||||||
|
|
||||||
nullptr, // GGML_OP_FLASH_ATTN_EXT
|
nullptr, // GGML_OP_FLASH_ATTN_EXT
|
||||||
nullptr, // GGML_OP_FLASH_ATTN_BACK
|
nullptr, // GGML_OP_FLASH_ATTN_BACK
|
||||||
|
|
@ -235,16 +235,16 @@ static_assert(sizeof(kGgmlOpToQnnOp) / sizeof(kGgmlOpToQnnOp[0]) == (GGML_OP_COU
|
||||||
static_assert(kGgmlOpToQnnOp[GGML_UNARY_OP_GELU + kGgmlUnaryOpStart] != nullptr,
|
static_assert(kGgmlOpToQnnOp[GGML_UNARY_OP_GELU + kGgmlUnaryOpStart] != nullptr,
|
||||||
"GGML_UNARY_OP_GELU does not correspond to QNN_OP_GELU");
|
"GGML_UNARY_OP_GELU does not correspond to QNN_OP_GELU");
|
||||||
|
|
||||||
template <size_t _InputSize, size_t _OutputSize>
|
template <size_t _InputSize>
|
||||||
qnn::ggml_qnn_graph *get_qnn_graph_from_cache(ggml_backend_qnn_device_context *ctx, size_t op,
|
qnn::ggml_qnn_graph *get_qnn_graph_from_cache(ggml_backend_qnn_device_context *ctx, size_t op,
|
||||||
const std::array<ggml_tensor *, _InputSize> &inputs,
|
const std::array<ggml_tensor *, _InputSize> &inputs,
|
||||||
const std::array<ggml_tensor *, _OutputSize> &outputs) {
|
ggml_tensor *output) {
|
||||||
GGML_ASSERT(op < (GGML_OP_COUNT + GGML_UNARY_OP_COUNT));
|
GGML_ASSERT(op < (GGML_OP_COUNT + GGML_UNARY_OP_COUNT));
|
||||||
|
|
||||||
auto &graph_cache = ctx->qnn_graph_cache;
|
auto &graph_cache = ctx->qnn_graph_cache;
|
||||||
const auto *op_name =
|
const auto *op_name =
|
||||||
op < kGgmlUnaryOpStart ? ggml_op_name(ggml_op(op)) : ggml_unary_op_name(ggml_unary_op(op - kGgmlUnaryOpStart));
|
op < kGgmlUnaryOpStart ? ggml_op_name(ggml_op(op)) : ggml_unary_op_name(ggml_unary_op(op - kGgmlUnaryOpStart));
|
||||||
auto graph_key = get_graph_key<_InputSize, _OutputSize>(op_name, inputs, outputs);
|
auto graph_key = get_graph_key<_InputSize, 1>(op_name, inputs, {output});
|
||||||
auto it = graph_cache.find(graph_key);
|
auto it = graph_cache.find(graph_key);
|
||||||
qnn::ggml_qnn_graph *graph_ptr = nullptr;
|
qnn::ggml_qnn_graph *graph_ptr = nullptr;
|
||||||
if (it != graph_cache.end()) {
|
if (it != graph_cache.end()) {
|
||||||
|
|
@ -259,7 +259,7 @@ qnn::ggml_qnn_graph *get_qnn_graph_from_cache(ggml_backend_qnn_device_context *c
|
||||||
|
|
||||||
auto op_constructor = qnn::create_op_constructor(kGgmlOpToQnnOp[op]);
|
auto op_constructor = qnn::create_op_constructor(kGgmlOpToQnnOp[op]);
|
||||||
if (!graph->build_graph(op_constructor, to_ggml_tensor_array<_InputSize>(inputs),
|
if (!graph->build_graph(op_constructor, to_ggml_tensor_array<_InputSize>(inputs),
|
||||||
to_ggml_tensor_array<_OutputSize>(outputs))) {
|
to_ggml_tensor_array<1>({output}))) {
|
||||||
QNN_LOG_ERROR("build_graph failed\n");
|
QNN_LOG_ERROR("build_graph failed\n");
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
@ -278,9 +278,9 @@ bool qnn_binary_op_impl(ggml_backend_qnn_device_context *ctx, ggml_tensor *src0,
|
||||||
CHECK_PARAMS(ctx, src0, src1, dst);
|
CHECK_PARAMS(ctx, src0, src1, dst);
|
||||||
|
|
||||||
bool succeed = false;
|
bool succeed = false;
|
||||||
auto *graph_ptr = get_qnn_graph_from_cache<2, 1>(ctx, _GgmlOp, { src0, src1 }, { dst });
|
auto *graph_ptr = get_qnn_graph_from_cache<2>(ctx, _GgmlOp, {src0, src1}, dst);
|
||||||
if (graph_ptr) {
|
if (graph_ptr) {
|
||||||
succeed = execute_graph<2, 1>(graph_ptr, { src0, src1 }, { dst });
|
succeed = execute_graph<2>(graph_ptr, {src0, src1}, dst);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef NDEBUG
|
#ifndef NDEBUG
|
||||||
|
|
@ -301,9 +301,9 @@ bool qnn_unary_op_impl(ggml_backend_qnn_device_context *ctx, ggml_tensor *src, g
|
||||||
CHECK_PARAMS(ctx, src, dst);
|
CHECK_PARAMS(ctx, src, dst);
|
||||||
|
|
||||||
bool succeed = false;
|
bool succeed = false;
|
||||||
auto *graph_ptr = get_qnn_graph_from_cache<1, 1>(ctx, _GgmlOp, { src }, { dst });
|
auto *graph_ptr = get_qnn_graph_from_cache<1>(ctx, _GgmlOp, {src}, dst);
|
||||||
if (graph_ptr) {
|
if (graph_ptr) {
|
||||||
succeed = execute_graph<1, 1>(graph_ptr, { src }, { dst });
|
succeed = execute_graph<1>(graph_ptr, {src}, dst);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef NDEBUG
|
#ifndef NDEBUG
|
||||||
|
|
@ -315,6 +315,22 @@ bool qnn_unary_op_impl(ggml_backend_qnn_device_context *ctx, ggml_tensor *src, g
|
||||||
|
|
||||||
return succeed;
|
return succeed;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool qnn_unary_nop_impl(ggml_backend_qnn_device_context *ctx, ggml_tensor *src, ggml_tensor *dst) {
|
||||||
|
GGML_UNUSED(ctx);
|
||||||
|
GGML_UNUSED(src);
|
||||||
|
GGML_UNUSED(dst);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool qnn_binary_nop_impl(ggml_backend_qnn_device_context *ctx, ggml_tensor *src0, ggml_tensor *src1, ggml_tensor *dst) {
|
||||||
|
GGML_UNUSED(ctx);
|
||||||
|
GGML_UNUSED(src0);
|
||||||
|
GGML_UNUSED(src1);
|
||||||
|
GGML_UNUSED(dst);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
constexpr const ggml_qnn_unary_op_t kQnnUnaryOpsTable[] = {
|
constexpr const ggml_qnn_unary_op_t kQnnUnaryOpsTable[] = {
|
||||||
nullptr, // GGML_OP_NONE
|
nullptr, // GGML_OP_NONE
|
||||||
nullptr, // GGML_OP_DUP
|
nullptr, // GGML_OP_DUP
|
||||||
|
|
@ -347,37 +363,37 @@ constexpr const ggml_qnn_unary_op_t kQnnUnaryOpsTable[] = {
|
||||||
nullptr, // GGML_OP_MUL_MAT_ID
|
nullptr, // GGML_OP_MUL_MAT_ID
|
||||||
nullptr, // GGML_OP_OUT_PROD
|
nullptr, // GGML_OP_OUT_PROD
|
||||||
|
|
||||||
nullptr, // GGML_OP_SCALE
|
nullptr, // GGML_OP_SCALE
|
||||||
nullptr, // GGML_OP_SET
|
nullptr, // GGML_OP_SET
|
||||||
nullptr, // GGML_OP_CPY
|
nullptr, // GGML_OP_CPY
|
||||||
nullptr, // GGML_OP_CONT
|
nullptr, // GGML_OP_CONT
|
||||||
nullptr, // GGML_OP_RESHAPE
|
nullptr, // GGML_OP_RESHAPE
|
||||||
nullptr, // GGML_OP_VIEW
|
qnn_unary_nop_impl, // GGML_OP_VIEW
|
||||||
nullptr, // GGML_OP_PERMUTE
|
qnn_unary_op_impl<GGML_OP_PERMUTE>, // GGML_OP_PERMUTE
|
||||||
nullptr, // GGML_OP_TRANSPOSE
|
nullptr, // GGML_OP_TRANSPOSE
|
||||||
nullptr, // GGML_OP_GET_ROWS
|
qnn_unary_nop_impl, // GGML_OP_GET_ROWS
|
||||||
nullptr, // GGML_OP_GET_ROWS_BACK
|
nullptr, // GGML_OP_GET_ROWS_BACK
|
||||||
nullptr, // GGML_OP_DIAG
|
nullptr, // GGML_OP_DIAG
|
||||||
nullptr, // GGML_OP_DIAG_MASK_INF
|
nullptr, // GGML_OP_DIAG_MASK_INF
|
||||||
nullptr, // GGML_OP_DIAG_MASK_ZERO
|
nullptr, // GGML_OP_DIAG_MASK_ZERO
|
||||||
nullptr, // GGML_OP_SOFT_MAX
|
nullptr, // GGML_OP_SOFT_MAX
|
||||||
nullptr, // GGML_OP_SOFT_MAX_BACK
|
nullptr, // GGML_OP_SOFT_MAX_BACK
|
||||||
nullptr, // GGML_OP_ROPE
|
nullptr, // GGML_OP_ROPE
|
||||||
nullptr, // GGML_OP_ROPE_BACK
|
nullptr, // GGML_OP_ROPE_BACK
|
||||||
nullptr, // GGML_OP_CLAMP
|
nullptr, // GGML_OP_CLAMP
|
||||||
nullptr, // GGML_OP_CONV_TRANSPOSE_1D
|
nullptr, // GGML_OP_CONV_TRANSPOSE_1D
|
||||||
nullptr, // GGML_OP_IM2COL
|
nullptr, // GGML_OP_IM2COL
|
||||||
nullptr, // GGML_OP_IM2COL_BACK
|
nullptr, // GGML_OP_IM2COL_BACK
|
||||||
nullptr, // GGML_OP_CONV_TRANSPOSE_2D
|
nullptr, // GGML_OP_CONV_TRANSPOSE_2D
|
||||||
nullptr, // GGML_OP_POOL_1D
|
nullptr, // GGML_OP_POOL_1D
|
||||||
nullptr, // GGML_OP_POOL_2D
|
nullptr, // GGML_OP_POOL_2D
|
||||||
nullptr, // GGML_OP_POOL_2D_BACK
|
nullptr, // GGML_OP_POOL_2D_BACK
|
||||||
nullptr, // GGML_OP_UPSCALE
|
nullptr, // GGML_OP_UPSCALE
|
||||||
nullptr, // GGML_OP_PAD
|
nullptr, // GGML_OP_PAD
|
||||||
nullptr, // GGML_OP_ARANGE
|
nullptr, // GGML_OP_ARANGE
|
||||||
nullptr, // GGML_OP_TIMESTEP_EMBEDDING
|
nullptr, // GGML_OP_TIMESTEP_EMBEDDING
|
||||||
nullptr, // GGML_OP_ARGSORT
|
nullptr, // GGML_OP_ARGSORT
|
||||||
nullptr, // GGML_OP_LEAKY_RELU
|
nullptr, // GGML_OP_LEAKY_RELU
|
||||||
|
|
||||||
nullptr, // GGML_OP_FLASH_ATTN_EXT
|
nullptr, // GGML_OP_FLASH_ATTN_EXT
|
||||||
nullptr, // GGML_OP_FLASH_ATTN_BACK
|
nullptr, // GGML_OP_FLASH_ATTN_BACK
|
||||||
|
|
@ -522,18 +538,24 @@ static_assert(sizeof(kQnnBinaryOpsTable) / sizeof(kQnnBinaryOpsTable[0]) == GGML
|
||||||
"GGML_OP_COUNT does not match the size of the kQnnBinaryOpsTable table");
|
"GGML_OP_COUNT does not match the size of the kQnnBinaryOpsTable table");
|
||||||
|
|
||||||
bool ggml_qnn_supports_tensor(ggml_backend_qnn_device_context *ctx, const ggml_tensor *tensor) {
|
bool ggml_qnn_supports_tensor(ggml_backend_qnn_device_context *ctx, const ggml_tensor *tensor) {
|
||||||
|
if (!tensor) {
|
||||||
|
QNN_LOG_DEBUG("tensor is nullptr");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto *type_name = ggml_get_type_traits(tensor->type)->type_name;
|
||||||
switch (tensor->type) {
|
switch (tensor->type) {
|
||||||
case GGML_TYPE_F32:
|
case GGML_TYPE_F32:
|
||||||
case GGML_TYPE_F16:
|
case GGML_TYPE_F16:
|
||||||
case GGML_TYPE_Q8_0:
|
case GGML_TYPE_Q8_0:
|
||||||
case GGML_TYPE_Q4_0:
|
case GGML_TYPE_Q4_0:
|
||||||
if (ctx->supported_types.find(tensor->type) == ctx->supported_types.end()) {
|
if (ctx->supported_types.find(tensor->type) == ctx->supported_types.end()) {
|
||||||
QNN_LOG_DEBUG("unsupported data type GGML_TYPE_F16 for cpu backend");
|
QNN_LOG_DEBUG("unsupported data type %s for backend %d", type_name, (int)ctx->device);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
QNN_LOG_DEBUG("unsupported data type %d", tensor->type);
|
QNN_LOG_DEBUG("unsupported data type %s", type_name);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -591,19 +613,15 @@ bool ggml_qnn_supports_op(ggml_backend_qnn_device_context *ctx, const ggml_tenso
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (!kQnnUnaryOpsTable[op->op] && !kQnnBinaryOpsTable[op->op]) {
|
if (!kQnnUnaryOpsTable[op->op] && !kQnnBinaryOpsTable[op->op]) {
|
||||||
QNN_LOG_DEBUG("unsupported op %d", op->op);
|
QNN_LOG_DEBUG("[%s] unsupported op", ggml_op_name(op->op));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto *src0 = op->src[0];
|
auto *src0 = op->src[0];
|
||||||
auto *src1 = op->src[1];
|
auto *src1 = op->src[1];
|
||||||
if (!src0 || !src1) {
|
if (!ggml_qnn_supports_tensor(ctx, src0) || !ggml_qnn_supports_tensor(ctx, op) ||
|
||||||
QNN_LOG_DEBUG("src0 or src1 is nullptr");
|
(kQnnBinaryOpsTable[op->op] && !ggml_qnn_supports_tensor(ctx, src1))) {
|
||||||
return false;
|
QNN_LOG_DEBUG("[%s] unsupported tensor", ggml_op_name(op->op));
|
||||||
}
|
|
||||||
|
|
||||||
if (!ggml_qnn_supports_tensor(ctx, src0) || !ggml_qnn_supports_tensor(ctx, src1) ||
|
|
||||||
!ggml_qnn_supports_tensor(ctx, op)) {
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -642,7 +660,7 @@ bool ggml_qnn_forward(ggml_backend_qnn_device_context *ctx, struct ggml_tensor *
|
||||||
return binary_op(ctx, tensor->src[0], tensor->src[1], tensor);
|
return binary_op(ctx, tensor->src[0], tensor->src[1], tensor);
|
||||||
}
|
}
|
||||||
|
|
||||||
QNN_LOG_WARN("unsupported op %s", ggml_op_desc(tensor));
|
QNN_LOG_WARN("[forward]unsupported op %s", ggml_op_desc(tensor));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -7,10 +7,10 @@
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
constexpr const qnn::qnn_dimension_array_t kTransposeParamData[GGML_MAX_DIMS] = {
|
constexpr const qnn::qnn_dimension_array_t kTransposeParamData[GGML_MAX_DIMS] = {
|
||||||
{ 0 },
|
{0},
|
||||||
{ 1, 0 },
|
{1, 0},
|
||||||
{ 0, 2, 1 },
|
{0, 2, 1},
|
||||||
{ 0, 1, 3, 2 },
|
{0, 1, 3, 2},
|
||||||
};
|
};
|
||||||
|
|
||||||
qnn::qnn_dimension_array_t get_transposed_dimensions(const qnn::qnn_dimension_array_t &dimensions, int rank) {
|
qnn::qnn_dimension_array_t get_transposed_dimensions(const qnn::qnn_dimension_array_t &dimensions, int rank) {
|
||||||
|
|
@ -96,9 +96,8 @@ bool bind_tensors(const qnn::ggml_tensor_array_t &ggml_tensors, qnn::ggml_qnn_te
|
||||||
class ggml_qnn_connectable_op_config : public qnn::ggml_qnn_op_config_base {
|
class ggml_qnn_connectable_op_config : public qnn::ggml_qnn_op_config_base {
|
||||||
public:
|
public:
|
||||||
explicit ggml_qnn_connectable_op_config(const std::string &name, const std::string &package_name,
|
explicit ggml_qnn_connectable_op_config(const std::string &name, const std::string &package_name,
|
||||||
const std::string &op_type,
|
const std::string &op_type, std::shared_ptr<qnn::qnn_instance> qnn_instance)
|
||||||
std::shared_ptr<qnn::qnn_instance> qnn_instance) :
|
: ggml_qnn_op_config_base(name, package_name, op_type, qnn_instance) {}
|
||||||
ggml_qnn_op_config_base(name, package_name, op_type, qnn_instance) {}
|
|
||||||
|
|
||||||
bool create_tensors(QNNBackend device, Qnn_GraphHandle_t graph_handle,
|
bool create_tensors(QNNBackend device, Qnn_GraphHandle_t graph_handle,
|
||||||
const qnn::ggml_tensor_array_t &tensor_inputs,
|
const qnn::ggml_tensor_array_t &tensor_inputs,
|
||||||
|
|
@ -264,11 +263,22 @@ bool ggml_qnn_single_op_config::create_tensors(QNNBackend device, Qnn_GraphHandl
|
||||||
const ggml_tensor_array_t &tensor_inputs,
|
const ggml_tensor_array_t &tensor_inputs,
|
||||||
const ggml_tensor_array_t &tensor_outputs) {
|
const ggml_tensor_array_t &tensor_outputs) {
|
||||||
const auto tensor_rank = get_rank(tensor_inputs, tensor_outputs);
|
const auto tensor_rank = get_rank(tensor_inputs, tensor_outputs);
|
||||||
tensor_common_params params = { "src", tensor_rank, true, device, graph_handle, _qnn_instance };
|
tensor_common_params params = {"src", tensor_rank, true, device, graph_handle, _qnn_instance};
|
||||||
create_tensors_from_ggml_tensor(params, tensor_inputs, &_tensor_inputs, &_qnn_tensor_inputs);
|
create_tensors_from_ggml_tensor(params, tensor_inputs, &_tensor_inputs, &_qnn_tensor_inputs);
|
||||||
params.name_prefix = "dst";
|
params.name_prefix = "dst";
|
||||||
params.is_input = false;
|
params.is_input = false;
|
||||||
create_tensors_from_ggml_tensor(params, tensor_outputs, &_tensor_outputs, &_qnn_tensor_outputs);
|
create_tensors_from_ggml_tensor(params, tensor_outputs, &_tensor_outputs, &_qnn_tensor_outputs);
|
||||||
|
|
||||||
|
if (_param_buffer.size() > 0) {
|
||||||
|
// handle parameters in output tensor
|
||||||
|
auto *params = tensor_outputs.front()->op_params;
|
||||||
|
memcpy(_param_buffer.data(), params, _param_buffer.size());
|
||||||
|
|
||||||
|
const uint32_t count = uint32_t(_param_buffer.size() / qnn_datatype_size(_param_type));
|
||||||
|
const qnn_dimension_array_t param_dims = {count, 1, 1, 1};
|
||||||
|
add_tensor_param(_param_name, param_dims, 1, _param_buffer.data(), _param_type, device, graph_handle);
|
||||||
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -281,7 +291,7 @@ bool ggml_qnn_matmul_op_config::create_tensors(QNNBackend device, Qnn_GraphHandl
|
||||||
GGML_ASSERT(tensor_rank >= 2);
|
GGML_ASSERT(tensor_rank >= 2);
|
||||||
|
|
||||||
// create input tensors
|
// create input tensors
|
||||||
tensor_common_params params = { "src", tensor_rank, true, device, graph_handle, _qnn_instance };
|
tensor_common_params params = {"src", tensor_rank, true, device, graph_handle, _qnn_instance};
|
||||||
create_tensors_from_ggml_tensor(params, tensor_inputs, &_tensor_inputs, &_qnn_tensor_inputs);
|
create_tensors_from_ggml_tensor(params, tensor_inputs, &_tensor_inputs, &_qnn_tensor_inputs);
|
||||||
|
|
||||||
// create output tensor
|
// create output tensor
|
||||||
|
|
@ -290,8 +300,49 @@ bool ggml_qnn_matmul_op_config::create_tensors(QNNBackend device, Qnn_GraphHandl
|
||||||
params.is_input = false;
|
params.is_input = false;
|
||||||
create_tensors_from_ggml_tensor(params, tensor_outputs, &mat_mul_tensor_outputs, nullptr);
|
create_tensors_from_ggml_tensor(params, tensor_outputs, &mat_mul_tensor_outputs, nullptr);
|
||||||
|
|
||||||
|
if (device == QNN_BACKEND_GPU) {
|
||||||
|
// there's no convert op for GPU, so we should create matmul nodes directl.
|
||||||
|
return create_mat_mul_nodes(device, graph_handle, tensor_rank, _tensor_inputs, mat_mul_tensor_outputs);
|
||||||
|
}
|
||||||
|
|
||||||
|
// create tensors for convert node
|
||||||
|
ggml_qnn_tensor_array_t mat_mul_tensor_inputs = _tensor_inputs;
|
||||||
|
auto input_tensor_type = get_tensor_type(mat_mul_tensor_inputs);
|
||||||
|
QNN_LOG_DEBUG("matmul input tensor type: %s\n", qnn_datatype_to_string(input_tensor_type));
|
||||||
|
|
||||||
|
_input_converts.resize(mat_mul_tensor_inputs.size());
|
||||||
|
for (size_t i = 0; i < mat_mul_tensor_inputs.size(); ++i) {
|
||||||
|
// create input convert nodes
|
||||||
|
std::string convert_name("convert_src" + std::to_string(i));
|
||||||
|
auto convert_in = mat_mul_tensor_inputs[i];
|
||||||
|
auto convert_out = std::make_shared<ggml_qnn_tensor>(ggml_qnn_tensor::INTERMEDIATE, convert_name + "_out",
|
||||||
|
convert_in->get_dimensions(), input_tensor_type,
|
||||||
|
tensor_rank, device, graph_handle, _qnn_instance);
|
||||||
|
auto convert = std::make_shared<ggml_qnn_connectable_op_config>(convert_name, QNN_OP_PACKAGE_NAME_QTI_AISW,
|
||||||
|
QNN_OP_CONVERT, _qnn_instance);
|
||||||
|
convert->set_input_tensors({convert_in});
|
||||||
|
convert->set_output_tensors({convert_out});
|
||||||
|
mat_mul_tensor_inputs[i] = convert_out;
|
||||||
|
_input_converts[i] = convert;
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
// create output convert node
|
||||||
|
std::string convert_name("convert_dst");
|
||||||
|
auto convert_out = mat_mul_tensor_outputs.front();
|
||||||
|
auto convert_in = std::make_shared<ggml_qnn_tensor>(ggml_qnn_tensor::INTERMEDIATE, convert_name + "_in",
|
||||||
|
convert_out->get_dimensions(), input_tensor_type,
|
||||||
|
tensor_rank, device, graph_handle, _qnn_instance);
|
||||||
|
auto output_convert = std::make_shared<ggml_qnn_connectable_op_config>(
|
||||||
|
convert_name, QNN_OP_PACKAGE_NAME_QTI_AISW, QNN_OP_CONVERT, _qnn_instance);
|
||||||
|
output_convert->set_input_tensors({convert_in});
|
||||||
|
output_convert->set_output_tensors({convert_out});
|
||||||
|
mat_mul_tensor_outputs[0] = convert_in;
|
||||||
|
_output_convert = output_convert;
|
||||||
|
}
|
||||||
|
|
||||||
// create mat_mul nodes
|
// create mat_mul nodes
|
||||||
return create_mat_mul_nodes(device, graph_handle, tensor_rank, _tensor_inputs, mat_mul_tensor_outputs);
|
return create_mat_mul_nodes(device, graph_handle, tensor_rank, mat_mul_tensor_inputs, mat_mul_tensor_outputs);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ggml_qnn_matmul_op_config::create_mat_mul_nodes(QNNBackend device, Qnn_GraphHandle_t graph_handle, const int rank,
|
bool ggml_qnn_matmul_op_config::create_mat_mul_nodes(QNNBackend device, Qnn_GraphHandle_t graph_handle, const int rank,
|
||||||
|
|
@ -371,7 +422,7 @@ bool ggml_qnn_matmul_op_config::create_mat_mul_nodes(QNNBackend device, Qnn_Grap
|
||||||
|
|
||||||
// set transpose0 parameters
|
// set transpose0 parameters
|
||||||
auto *params_data = reinterpret_cast<const uint8_t *>(kTransposeParamData[rank - 1].data());
|
auto *params_data = reinterpret_cast<const uint8_t *>(kTransposeParamData[rank - 1].data());
|
||||||
const qnn_dimension_array_t param_dims = { (uint32_t)rank, 1, 1, 1 };
|
const qnn_dimension_array_t param_dims = {(uint32_t)rank, 1, 1, 1};
|
||||||
transpose0->add_tensor_param(QNN_OP_TRANSPOSE_PARAM_PERM, param_dims, 1, params_data, QNN_DATATYPE_UINT_32, device,
|
transpose0->add_tensor_param(QNN_OP_TRANSPOSE_PARAM_PERM, param_dims, 1, params_data, QNN_DATATYPE_UINT_32, device,
|
||||||
graph_handle);
|
graph_handle);
|
||||||
|
|
||||||
|
|
@ -380,19 +431,19 @@ bool ggml_qnn_matmul_op_config::create_mat_mul_nodes(QNNBackend device, Qnn_Grap
|
||||||
graph_handle);
|
graph_handle);
|
||||||
|
|
||||||
// set tensor to transpose0
|
// set tensor to transpose0
|
||||||
ggml_qnn_tensor_array_t tensors = { tensor_inputs.back() };
|
ggml_qnn_tensor_array_t tensors = {tensor_inputs.back()};
|
||||||
transpose0->set_input_tensors(tensors);
|
transpose0->set_input_tensors(tensors);
|
||||||
tensors = { src0_trans };
|
tensors = {src0_trans};
|
||||||
transpose0->set_output_tensors(tensors);
|
transpose0->set_output_tensors(tensors);
|
||||||
|
|
||||||
// set tensor to mat_mul
|
// set tensor to mat_mul
|
||||||
tensors = { tensor_inputs.front(), src0_trans };
|
tensors = {tensor_inputs.front(), src0_trans};
|
||||||
mat_mul->set_input_tensors(tensors);
|
mat_mul->set_input_tensors(tensors);
|
||||||
tensors = { dst_trans };
|
tensors = {dst_trans};
|
||||||
mat_mul->set_output_tensors(tensors);
|
mat_mul->set_output_tensors(tensors);
|
||||||
|
|
||||||
// set tensor to transpose1
|
// set tensor to transpose1
|
||||||
tensors = { dst_trans };
|
tensors = {dst_trans};
|
||||||
transpose1->set_input_tensors(tensors);
|
transpose1->set_input_tensors(tensors);
|
||||||
transpose1->set_output_tensors(tensor_outputs);
|
transpose1->set_output_tensors(tensor_outputs);
|
||||||
|
|
||||||
|
|
@ -459,6 +510,13 @@ ggml_op_constructor_t create_op_constructor(const std::string &op_name) {
|
||||||
QNN_LOG_DEBUG("create QNN_OP_MAT_MUL, name %s\n", instance_name.c_str());
|
QNN_LOG_DEBUG("create QNN_OP_MAT_MUL, name %s\n", instance_name.c_str());
|
||||||
return std::make_unique<qnn::ggml_qnn_matmul_op_config>(instance_name, qnn_instance);
|
return std::make_unique<qnn::ggml_qnn_matmul_op_config>(instance_name, qnn_instance);
|
||||||
};
|
};
|
||||||
|
} else if (op_name == QNN_OP_TRANSPOSE) {
|
||||||
|
return [](const std::string &instance_name,
|
||||||
|
std::shared_ptr<qnn::qnn_instance> qnn_instance) -> std::unique_ptr<qnn::ggml_qnn_op_config> {
|
||||||
|
return std::make_unique<qnn::ggml_qnn_single_op_config>(instance_name, QNN_OP_PACKAGE_NAME_QTI_AISW,
|
||||||
|
QNN_OP_TRANSPOSE, QNN_OP_TRANSPOSE_PARAM_PERM,
|
||||||
|
QNN_DATATYPE_UINT_32, 4 * sizeof(uint32_t), qnn_instance);
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
return [op_name](const std::string &instance_name,
|
return [op_name](const std::string &instance_name,
|
||||||
|
|
|
||||||
|
|
@ -30,11 +30,16 @@ public:
|
||||||
virtual void unbind_output_tensors() = 0;
|
virtual void unbind_output_tensors() = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
using ggml_op_constructor_t =
|
||||||
|
std::function<std::unique_ptr<ggml_qnn_op_config>(const std::string &, std::shared_ptr<qnn_instance>)>;
|
||||||
|
|
||||||
|
ggml_op_constructor_t create_op_constructor(const std::string &op_name);
|
||||||
|
|
||||||
class ggml_qnn_op_config_base : public ggml_qnn_op_config {
|
class ggml_qnn_op_config_base : public ggml_qnn_op_config {
|
||||||
public:
|
public:
|
||||||
explicit ggml_qnn_op_config_base(const std::string &name, const std::string &package_name,
|
explicit ggml_qnn_op_config_base(const std::string &name, const std::string &package_name,
|
||||||
const std::string &op_type, std::shared_ptr<qnn_instance> qnn_instance) :
|
const std::string &op_type, std::shared_ptr<qnn_instance> qnn_instance)
|
||||||
_name(name), _package_name(package_name), _op_type(op_type), _qnn_instance(qnn_instance) {}
|
: _name(name), _package_name(package_name), _op_type(op_type), _qnn_instance(qnn_instance) {}
|
||||||
|
|
||||||
void add_scalar_param(const std::string &name, const Qnn_Scalar_t scalar);
|
void add_scalar_param(const std::string &name, const Qnn_Scalar_t scalar);
|
||||||
bool add_tensor_param(const std::string &name, const qnn_dimension_array_t &dimensions, int rank,
|
bool add_tensor_param(const std::string &name, const qnn_dimension_array_t &dimensions, int rank,
|
||||||
|
|
@ -70,21 +75,34 @@ protected:
|
||||||
class ggml_qnn_single_op_config : public ggml_qnn_op_config_base {
|
class ggml_qnn_single_op_config : public ggml_qnn_op_config_base {
|
||||||
public:
|
public:
|
||||||
explicit ggml_qnn_single_op_config(const std::string &name, const std::string &package_name,
|
explicit ggml_qnn_single_op_config(const std::string &name, const std::string &package_name,
|
||||||
const std::string &op_type, std::shared_ptr<qnn_instance> qnn_instance) :
|
const std::string &op_type, std::shared_ptr<qnn_instance> qnn_instance)
|
||||||
ggml_qnn_op_config_base(name, package_name, op_type, qnn_instance) {}
|
: ggml_qnn_op_config_base(name, package_name, op_type, qnn_instance) {}
|
||||||
|
|
||||||
|
explicit ggml_qnn_single_op_config(const std::string &name, const std::string &package_name,
|
||||||
|
const std::string &op_type, const std::string ¶m_name,
|
||||||
|
const Qnn_DataType_t param_type, const size_t param_size,
|
||||||
|
std::shared_ptr<qnn_instance> qnn_instance)
|
||||||
|
: ggml_qnn_op_config_base(name, package_name, op_type, qnn_instance),
|
||||||
|
_param_name(param_name),
|
||||||
|
_param_type(param_type),
|
||||||
|
_param_buffer(param_size) {}
|
||||||
|
|
||||||
bool create_tensors(QNNBackend device, Qnn_GraphHandle_t graph_handle, const ggml_tensor_array_t &tensor_inputs,
|
bool create_tensors(QNNBackend device, Qnn_GraphHandle_t graph_handle, const ggml_tensor_array_t &tensor_inputs,
|
||||||
const ggml_tensor_array_t &tensor_outputs) override;
|
const ggml_tensor_array_t &tensor_outputs) override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
const std::string _param_name;
|
||||||
|
const Qnn_DataType_t _param_type = QNN_DATATYPE_UINT_32;
|
||||||
|
std::vector<uint8_t> _param_buffer;
|
||||||
|
|
||||||
DISABLE_COPY(ggml_qnn_single_op_config);
|
DISABLE_COPY(ggml_qnn_single_op_config);
|
||||||
DISABLE_MOVE(ggml_qnn_single_op_config);
|
DISABLE_MOVE(ggml_qnn_single_op_config);
|
||||||
};
|
};
|
||||||
|
|
||||||
class ggml_qnn_matmul_op_config : public ggml_qnn_op_config {
|
class ggml_qnn_matmul_op_config : public ggml_qnn_op_config {
|
||||||
public:
|
public:
|
||||||
ggml_qnn_matmul_op_config(const std::string &name, std::shared_ptr<qnn_instance> qnn_instance) :
|
ggml_qnn_matmul_op_config(const std::string &name, std::shared_ptr<qnn_instance> qnn_instance)
|
||||||
_name(name), _qnn_instance(qnn_instance) {}
|
: _name(name), _qnn_instance(qnn_instance) {}
|
||||||
|
|
||||||
bool create_tensors(QNNBackend device, Qnn_GraphHandle_t graph_handle, const ggml_tensor_array_t &tensor_inputs,
|
bool create_tensors(QNNBackend device, Qnn_GraphHandle_t graph_handle, const ggml_tensor_array_t &tensor_inputs,
|
||||||
const ggml_tensor_array_t &tensor_outputs) override;
|
const ggml_tensor_array_t &tensor_outputs) override;
|
||||||
|
|
@ -114,9 +132,4 @@ private:
|
||||||
DISABLE_MOVE(ggml_qnn_matmul_op_config);
|
DISABLE_MOVE(ggml_qnn_matmul_op_config);
|
||||||
};
|
};
|
||||||
|
|
||||||
using ggml_op_constructor_t =
|
|
||||||
std::function<std::unique_ptr<ggml_qnn_op_config>(const std::string &, std::shared_ptr<qnn_instance>)>;
|
|
||||||
|
|
||||||
ggml_op_constructor_t create_op_constructor(const std::string &op_name);
|
|
||||||
|
|
||||||
} // namespace qnn
|
} // namespace qnn
|
||||||
|
|
|
||||||
|
|
@ -257,6 +257,7 @@ private:
|
||||||
DISABLE_MOVE(ggml_qnn_tensor);
|
DISABLE_MOVE(ggml_qnn_tensor);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
using ggml_qnn_tensor_ptr_t = std::shared_ptr<ggml_qnn_tensor>;
|
||||||
using ggml_qnn_tensor_array_t = std::vector<std::shared_ptr<ggml_qnn_tensor>>;
|
using ggml_qnn_tensor_array_t = std::vector<std::shared_ptr<ggml_qnn_tensor>>;
|
||||||
|
|
||||||
} // namespace qnn
|
} // namespace qnn
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue