diff --git a/ggml/src/ggml-qnn/.clang-format b/ggml/src/ggml-qnn/.clang-format deleted file mode 100644 index 0c67c54239..0000000000 --- a/ggml/src/ggml-qnn/.clang-format +++ /dev/null @@ -1,65 +0,0 @@ ---- -BasedOnStyle: Google -IndentWidth: 4 -AccessModifierOffset: -4 -AlignAfterOpenBracket: Align -AlignConsecutiveMacros: false -AlignConsecutiveAssignments: false -AlignConsecutiveDeclarations: false -AlignEscapedNewlines: Left -AlignOperands: true -AlignTrailingComments: true -AllowAllArgumentsOnNextLine: true -AllowAllConstructorInitializersOnNextLine: true -AllowAllParametersOfDeclarationOnNextLine: true -AllowShortBlocksOnASingleLine: Never -AllowShortCaseLabelsOnASingleLine: false -AllowShortFunctionsOnASingleLine: All -AllowShortLambdasOnASingleLine: All -AllowShortIfStatementsOnASingleLine: WithoutElse -AllowShortLoopsOnASingleLine: true -AlwaysBreakAfterDefinitionReturnType: None -AlwaysBreakAfterReturnType: None -AlwaysBreakBeforeMultilineStrings: true -AlwaysBreakTemplateDeclarations: Yes -BinPackArguments: true -BinPackParameters: true -BraceWrapping: - AfterCaseLabel: false - AfterClass: false - AfterControlStatement: false - AfterEnum: false - AfterFunction: false - AfterNamespace: false - AfterObjCDeclaration: false - AfterStruct: false - AfterUnion: false - AfterExternBlock: false - BeforeCatch: false - BeforeElse: false - IndentBraces: false - SplitEmptyFunction: true - SplitEmptyRecord: true - SplitEmptyNamespace: true -ColumnLimit: 120 -ConstructorInitializerIndentWidth: 4 -ContinuationIndentWidth: 4 -Cpp11BracedListStyle: true -DerivePointerAlignment: false -IncludeCategories: - - Regex: '^<.*\.h>' - Priority: 1 - - Regex: '^<.*' - Priority: 2 - - Regex: '^"ggml\.h"' - Priority: 3 - - Regex: '^"ggml-.+\.h"' - Priority: 4 - - Regex: '.*' - Priority: 5 -KeepEmptyLinesAtTheStartOfBlocks: true -MaxEmptyLinesToKeep: 1 -PointerAlignment: Right -SortIncludes: true -SpacesBeforeTrailingComments: 1 -UseTab: Never diff --git a/ggml/src/ggml-qnn/backend-ops.cpp b/ggml/src/ggml-qnn/backend-ops.cpp index f62fc60d5c..3a401dd037 100644 --- a/ggml/src/ggml-qnn/backend-ops.cpp +++ b/ggml/src/ggml-qnn/backend-ops.cpp @@ -4,7 +4,6 @@ #include #include "ggml-impl.h" - #include "graph.hpp" #include "logger.hpp" #include "op-config.hpp" @@ -13,15 +12,15 @@ namespace { -bool qnn_is_op_valid(ggml_backend_qnn_device_context *ctx, const ggml_tensor *dst) { +bool qnn_is_op_valid(ggml_backend_qnn_device_context * ctx, const ggml_tensor * dst) { if (!ctx || !dst) { - QNN_LOG_WARN("invalid params"); + QNN_LOG_WARN("invalid params\n"); return false; } auto instance = ctx->instance; if (!instance) { - QNN_LOG_WARN("invalid instance"); + QNN_LOG_WARN("invalid instance\n"); return false; } @@ -32,7 +31,7 @@ bool qnn_is_op_valid(ggml_backend_qnn_device_context *ctx, const ggml_tensor *ds case 2: return dst->src[0] && dst->src[1]; default: - QNN_LOG_WARN("invalid op param count %d", (int)param_count); + QNN_LOG_WARN("invalid op param count %d\n", (int) param_count); break; } @@ -40,60 +39,51 @@ bool qnn_is_op_valid(ggml_backend_qnn_device_context *ctx, const ggml_tensor *ds } #ifndef NDEBUG -void print_ggml_tensor(const ggml_tensor *tensor) { - QNN_LOG_DEBUG("%s: type:%s ne: %ldx%ldx%ldx%ld, nb: %ldx%ldx%ldx%ld", tensor->name, ggml_type_name(tensor->type), - (long)tensor->ne[0], (long)tensor->ne[1], (long)tensor->ne[2], (long)tensor->ne[3], - (long)tensor->nb[0], (long)tensor->nb[1], (long)tensor->nb[2], (long)tensor->nb[3]); +void print_ggml_tensor(const ggml_tensor * tensor) { + QNN_LOG_DEBUG("%s: type:%s ne: %ldx%ldx%ldx%ld, nb: %ldx%ldx%ldx%ld\n", tensor->name, ggml_type_name(tensor->type), + (long) tensor->ne[0], (long) tensor->ne[1], (long) tensor->ne[2], (long) tensor->ne[3], + (long) tensor->nb[0], (long) tensor->nb[1], (long) tensor->nb[2], (long) tensor->nb[3]); } #endif -} // namespace +} // namespace namespace { -typedef bool (*ggml_qnn_op_t)(ggml_backend_qnn_device_context *ctx, ggml_tensor *dst); +typedef bool (*ggml_qnn_op_t)(ggml_backend_qnn_device_context * ctx, ggml_tensor * dst); -bool execute_graph(qnn::qnn_graph *graph, ggml_tensor *output) { - if (!graph->execute(output)) { - QNN_LOG_WARN("execute failed"); - return false; - } - - return true; -} - -void append_tensor_dimensions(const ggml_tensor *tensor, std::string &output) { - char buffer[256] = {}; - const auto *type_name = qnn::get_ggml_type_name(tensor->type); - int len = 0; +void append_tensor_dimensions(const ggml_tensor * tensor, std::string & output) { + char buffer[256] = {}; + const auto * type_name = qnn::get_ggml_type_name(tensor->type); + int len = 0; switch (ggml_n_dims(tensor)) { case 1: - len = snprintf(buffer, sizeof(buffer), "%ld%s", (long)tensor->ne[0], type_name); + len = snprintf(buffer, sizeof(buffer), "%ld%s", (long) tensor->ne[0], type_name); break; case 2: - len = snprintf(buffer, sizeof(buffer), "%ldx%ld%s", (long)tensor->ne[0], (long)tensor->ne[1], type_name); + len = snprintf(buffer, sizeof(buffer), "%ldx%ld%s", (long) tensor->ne[0], (long) tensor->ne[1], type_name); break; case 3: - len = snprintf(buffer, sizeof(buffer), "%ldx%ldx%ld%s", (long)tensor->ne[0], (long)tensor->ne[1], - (long)tensor->ne[2], type_name); + len = snprintf(buffer, sizeof(buffer), "%ldx%ldx%ld%s", (long) tensor->ne[0], (long) tensor->ne[1], + (long) tensor->ne[2], type_name); break; case 4: default: - len = snprintf(buffer, sizeof(buffer), "%ldx%ldx%ldx%ld%s", (long)tensor->ne[0], (long)tensor->ne[1], - (long)tensor->ne[2], (long)tensor->ne[3], type_name); + len = snprintf(buffer, sizeof(buffer), "%ldx%ldx%ldx%ld%s", (long) tensor->ne[0], (long) tensor->ne[1], + (long) tensor->ne[2], (long) tensor->ne[3], type_name); break; } - GGML_ASSERT(len > 0 && len < (int)sizeof(buffer)); + GGML_ASSERT(len > 0 && len < (int) sizeof(buffer)); output.append(buffer, len); } -void get_graph_key_from_op(const ggml_tensor *op, std::string &output) { +void get_graph_key_from_op(const ggml_tensor * op, std::string & output) { GGML_ASSERT(op->op != GGML_OP_NONE); output += ggml_op_desc(op); output += qnn::get_ggml_type_name(op->type); const auto param_count = qnn::get_qnn_op_input_param_count(op); for (size_t i = 0; i < param_count; ++i) { - auto *input = op->src[i]; + auto * input = op->src[i]; if (!input) { break; } @@ -103,7 +93,7 @@ void get_graph_key_from_op(const ggml_tensor *op, std::string &output) { } } -void get_op_key_with_src_op_desc(const ggml_tensor *op, std::string &output) { +void get_op_key_with_src_op_desc(const ggml_tensor * op, std::string & output) { output += ggml_op_desc(op); output += '('; if (op->src[0]) { @@ -116,25 +106,37 @@ void get_op_key_with_src_op_desc(const ggml_tensor *op, std::string &output) { output += ')'; } -void get_graph_key_from_cgraph(const ggml_cgraph *cgraph, std::string &output) { - // generate key from the graph, the key is used to cache the graph, like: - // "MUL_MATf32_256x16x10f32_256x1x10f32#LOG#ADD#ADDf32_16x1x10f32" +/** + * @brief Generates a unique key for a given computation graph (cgraph). + * + * This key is used to cache the graph, enabling efficient reuse of previously + * compiled graphs. The key is constructed by concatenating the descriptions + * of the operations and their associated tensor dimensions within the graph. + * + * Example key format: "MUL_MATf32_256x16x10f32_256x1x10f32#LOG#ADD#ADDf32_16x1x10f32" + * + * @param cgraph The computation graph for which the key is generated. + * @param output The string where the generated key will be stored. + * + * TODO: Improve the key generation logic to handle more complex graph structures and edge cases. + */ +void get_graph_key_from_cgraph(const ggml_cgraph * cgraph, std::string & output) { if (cgraph->n_nodes == 0) { - QNN_LOG_DEBUG("empty cgraph"); + QNN_LOG_DEBUG("empty cgraph\n"); return; } { bool is_start = true; for (int i = 0; i < cgraph->n_nodes; ++i) { - auto *op = cgraph->nodes[i]; + auto * op = cgraph->nodes[i]; if (ggml_is_empty(op)) { - QNN_LOG_DEBUG("empty op in graph, skipping"); + QNN_LOG_DEBUG("empty op in graph, skipping\n"); continue; } if (op->op == GGML_OP_NONE) { - QNN_LOG_DEBUG("GGML_OP_NONE in graph, skipping"); + QNN_LOG_DEBUG("GGML_OP_NONE in graph, skipping\n"); continue; } @@ -149,55 +151,27 @@ void get_graph_key_from_cgraph(const ggml_cgraph *cgraph, std::string &output) { } if (cgraph->n_nodes > 1) { - auto *last_op = cgraph->nodes[cgraph->n_nodes - 1]; + auto * last_op = cgraph->nodes[cgraph->n_nodes - 1]; output += qnn::get_ggml_type_name(last_op->type); output += '_'; append_tensor_dimensions(last_op, output); } } -qnn::qnn_graph *get_qnn_graph_from_cache(ggml_backend_qnn_device_context *ctx, ggml_tensor *output) { - auto &graph_cache = ctx->qnn_graph_cache; - std::string graph_key; - get_graph_key_from_op(output, graph_key); - auto it = graph_cache.find(graph_key); - qnn::qnn_graph *graph_ptr = nullptr; - if (it != graph_cache.end()) { - QNN_LOG_DEBUG("[%s]found graph %s in cache", qnn::get_backend_name(ctx->device), graph_key.c_str()); - graph_ptr = it->second.get(); - } else { - auto graph = - std::make_unique(graph_key, ctx->device, ctx->instance, ctx->socinfo.vtcm_size_in_mb); - if (!graph->is_valid()) { - return nullptr; - } - - if (!graph->build_graph_from_op(output)) { - QNN_LOG_ERROR("[%s]build_graph_from_op failed", qnn::get_backend_name(ctx->device)); - return nullptr; - } - - graph_ptr = graph.get(); - graph_cache[graph_key] = std::move(graph); - } - - return graph_ptr; -} - -qnn::qnn_graph *get_qnn_graph_from_cache(ggml_backend_qnn_device_context *ctx, const ggml_cgraph *cgraph) { - auto &graph_cache = ctx->qnn_graph_cache; +qnn::qnn_graph * get_qnn_graph_from_cache(ggml_backend_qnn_device_context * ctx, const ggml_cgraph * cgraph) { + auto & graph_cache = ctx->qnn_graph_cache; std::string graph_key; get_graph_key_from_cgraph(cgraph, graph_key); if (graph_key.empty()) { - QNN_LOG_DEBUG("[%s]empty graph key for cgraph: %p, size: %d", qnn::get_backend_name(ctx->device), cgraph, - (int)cgraph->n_nodes); + QNN_LOG_DEBUG("[%s]empty graph key for cgraph: %p, size: %d\n", qnn::get_backend_name(ctx->device), + (const void *) cgraph, (int) cgraph->n_nodes); return nullptr; } - auto it = graph_cache.find(graph_key); - qnn::qnn_graph *graph_ptr = nullptr; + auto it = graph_cache.find(graph_key); + qnn::qnn_graph * graph_ptr = nullptr; if (it != graph_cache.end()) { - QNN_LOG_DEBUG("[%s]found graph %s in cache", qnn::get_backend_name(ctx->device), graph_key.c_str()); + QNN_LOG_DEBUG("[%s]found graph %s in cache\n", qnn::get_backend_name(ctx->device), graph_key.c_str()); graph_ptr = it->second.get(); } else { auto graph = @@ -207,180 +181,151 @@ qnn::qnn_graph *get_qnn_graph_from_cache(ggml_backend_qnn_device_context *ctx, c } if (!graph->build_graph_from_ggml_graph(cgraph)) { - QNN_LOG_ERROR("[%s]build_graph_from_op failed", qnn::get_backend_name(ctx->device)); + QNN_LOG_ERROR("[%s]build_graph_from_op failed\n", qnn::get_backend_name(ctx->device)); return nullptr; } - graph_ptr = graph.get(); + graph_ptr = graph.get(); graph_cache[graph_key] = std::move(graph); } return graph_ptr; } -bool qnn_generic_op_impl(ggml_backend_qnn_device_context *ctx, ggml_tensor *dst) { - if (!qnn_is_op_valid(ctx, dst)) { - return false; - } +// TODO: could be merge into op caps array +constexpr const bool kQnnSupportedOps[] = { + true, // GGML_OP_NONE + false, // GGML_OP_DUP + true, // GGML_OP_ADD + false, // GGML_OP_ADD1 + false, // GGML_OP_ACC + true, // GGML_OP_SUB + true, // GGML_OP_MUL + true, // GGML_OP_DIV + false, // GGML_OP_SQR + true, // GGML_OP_SQRT + true, // GGML_OP_LOG + false, // GGML_OP_SIN + false, // GGML_OP_COS + false, // GGML_OP_SUM + false, // GGML_OP_SUM_ROWS + false, // GGML_OP_MEAN + false, // GGML_OP_ARGMAX + false, // GGML_OP_COUNT_EQUAL + false, // GGML_OP_REPEAT + false, // GGML_OP_REPEAT_BACK + false, // GGML_OP_CONCAT + false, // GGML_OP_SILU_BACK + false, // GGML_OP_NORM + false, // GGML_OP_RMS_NORM + false, // GGML_OP_RMS_NORM_BACK + false, // GGML_OP_GROUP_NORM - auto *graph_ptr = get_qnn_graph_from_cache(ctx, dst); - bool succeed = graph_ptr && execute_graph(graph_ptr, dst); + true, // GGML_OP_MUL_MAT + false, // GGML_OP_MUL_MAT_ID + false, // GGML_OP_OUT_PROD -#ifndef NDEBUG - if (!succeed) { - const auto param_count = qnn::get_qnn_op_input_param_count(dst); - for (size_t i = 0; i < param_count; ++i) { - print_ggml_tensor(dst->src[i]); - } - print_ggml_tensor(dst); - } -#endif + false, // GGML_OP_SCALE + false, // GGML_OP_SET + false, // GGML_OP_CPY + false, // GGML_OP_CONT + true, // GGML_OP_RESHAPE + false, // GGML_OP_VIEW + false, // GGML_OP_PERMUTE + false, // GGML_OP_TRANSPOSE + false, // GGML_OP_GET_ROWS + false, // GGML_OP_GET_ROWS_BACK + false, // GGML_OP_DIAG + false, // GGML_OP_DIAG_MASK_INF + false, // GGML_OP_DIAG_MASK_ZERO + false, // GGML_OP_SOFT_MAX + false, // GGML_OP_SOFT_MAX_BACK + false, // GGML_OP_ROPE + false, // GGML_OP_ROPE_BACK + false, // GGML_OP_CLAMP + false, // GGML_OP_CONV_TRANSPOSE_1D + false, // GGML_OP_IM2COL + false, // GGML_OP_IM2COL_BACK + false, // GGML_OP_CONV_TRANSPOSE_2D + false, // GGML_OP_POOL_1D + false, // GGML_OP_POOL_2D + false, // GGML_OP_POOL_2D_BACK + false, // GGML_OP_UPSCALE + false, // GGML_OP_PAD + false, // GGML_OP_PAD_REFLECT_1D + false, // GGML_OP_ARANGE + false, // GGML_OP_TIMESTEP_EMBEDDING + false, // GGML_OP_ARGSORT + false, // GGML_OP_LEAKY_RELU - return succeed; -} + false, // GGML_OP_FLASH_ATTN_EXT + false, // GGML_OP_FLASH_ATTN_BACK + false, // GGML_OP_SSM_CONV + false, // GGML_OP_SSM_SCAN + false, // GGML_OP_WIN_PART + false, // GGML_OP_WIN_UNPART + false, // GGML_OP_GET_REL_POS + false, // GGML_OP_ADD_REL_POS + false, // GGML_OP_RWKV_WKV6 + false, // GGML_OP_GATED_LINEAR_ATTN -bool qnn_nop_impl(ggml_backend_qnn_device_context *ctx, ggml_tensor *dst) { - GGML_UNUSED(ctx); - GGML_UNUSED(dst); - return true; -} + false, // GGML_OP_UNARY -constexpr const ggml_qnn_op_t kQnnOpsTable[] = { - qnn_nop_impl, // GGML_OP_NONE - nullptr, // GGML_OP_DUP - qnn_generic_op_impl, // GGML_OP_ADD - nullptr, // GGML_OP_ADD1 - nullptr, // GGML_OP_ACC - qnn_generic_op_impl, // GGML_OP_SUB - qnn_generic_op_impl, // GGML_OP_MUL - qnn_generic_op_impl, // GGML_OP_DIV - nullptr, // GGML_OP_SQR - qnn_generic_op_impl, // GGML_OP_SQRT - qnn_generic_op_impl, // GGML_OP_LOG - nullptr, // GGML_OP_SIN - nullptr, // GGML_OP_COS - nullptr, // GGML_OP_SUM - nullptr, // GGML_OP_SUM_ROWS - nullptr, // GGML_OP_MEAN - nullptr, // GGML_OP_ARGMAX - nullptr, // GGML_OP_COUNT_EQUAL - nullptr, // GGML_OP_REPEAT - nullptr, // GGML_OP_REPEAT_BACK - nullptr, // GGML_OP_CONCAT - nullptr, // GGML_OP_SILU_BACK - nullptr, // GGML_OP_NORM - nullptr, // GGML_OP_RMS_NORM - nullptr, // GGML_OP_RMS_NORM_BACK - nullptr, // GGML_OP_GROUP_NORM + false, // GGML_OP_MAP_UNARY + false, // GGML_OP_MAP_BINARY - qnn_generic_op_impl, // GGML_OP_MUL_MAT - nullptr, // GGML_OP_MUL_MAT_ID - nullptr, // GGML_OP_OUT_PROD + false, // GGML_OP_MAP_CUSTOM1_F32 + false, // GGML_OP_MAP_CUSTOM2_F32 + false, // GGML_OP_MAP_CUSTOM3_F32 - nullptr, // GGML_OP_SCALE - nullptr, // GGML_OP_SET - nullptr, // GGML_OP_CPY - nullptr, // GGML_OP_CONT - qnn_nop_impl, // GGML_OP_RESHAPE - nullptr, // GGML_OP_VIEW - nullptr, // GGML_OP_PERMUTE - nullptr, // GGML_OP_TRANSPOSE - nullptr, // GGML_OP_GET_ROWS - nullptr, // GGML_OP_GET_ROWS_BACK - nullptr, // GGML_OP_DIAG - nullptr, // GGML_OP_DIAG_MASK_INF - nullptr, // GGML_OP_DIAG_MASK_ZERO - nullptr, // GGML_OP_SOFT_MAX - nullptr, // GGML_OP_SOFT_MAX_BACK - nullptr, // GGML_OP_ROPE - nullptr, // GGML_OP_ROPE_BACK - nullptr, // GGML_OP_CLAMP - nullptr, // GGML_OP_CONV_TRANSPOSE_1D - nullptr, // GGML_OP_IM2COL - nullptr, // GGML_OP_IM2COL_BACK - nullptr, // GGML_OP_CONV_TRANSPOSE_2D - nullptr, // GGML_OP_POOL_1D - nullptr, // GGML_OP_POOL_2D - nullptr, // GGML_OP_POOL_2D_BACK - nullptr, // GGML_OP_UPSCALE - nullptr, // GGML_OP_PAD - nullptr, // GGML_OP_PAD_REFLECT_1D - nullptr, // GGML_OP_ARANGE - nullptr, // GGML_OP_TIMESTEP_EMBEDDING - nullptr, // GGML_OP_ARGSORT - nullptr, // GGML_OP_LEAKY_RELU + false, // GGML_OP_MAP_CUSTOM1 + false, // GGML_OP_MAP_CUSTOM2 + false, // GGML_OP_MAP_CUSTOM3 - nullptr, // GGML_OP_FLASH_ATTN_EXT - nullptr, // GGML_OP_FLASH_ATTN_BACK - nullptr, // GGML_OP_SSM_CONV - nullptr, // GGML_OP_SSM_SCAN - nullptr, // GGML_OP_WIN_PART - nullptr, // GGML_OP_WIN_UNPART - nullptr, // GGML_OP_GET_REL_POS - nullptr, // GGML_OP_ADD_REL_POS - nullptr, // GGML_OP_RWKV_WKV6 - nullptr, // GGML_OP_GATED_LINEAR_ATTN - - nullptr, // GGML_OP_UNARY - - nullptr, // GGML_OP_MAP_UNARY - nullptr, // GGML_OP_MAP_BINARY - - nullptr, // GGML_OP_MAP_CUSTOM1_F32 - nullptr, // GGML_OP_MAP_CUSTOM2_F32 - nullptr, // GGML_OP_MAP_CUSTOM3_F32 - - nullptr, // GGML_OP_MAP_CUSTOM1 - nullptr, // GGML_OP_MAP_CUSTOM2 - nullptr, // GGML_OP_MAP_CUSTOM3 - - nullptr, // GGML_OP_CROSS_ENTROPY_LOSS - nullptr, // GGML_OP_CROSS_ENTROPY_LOSS_BACK - nullptr, // GGML_OP_OPT_STEP_ADAMW + false, // GGML_OP_CROSS_ENTROPY_LOSS + false, // GGML_OP_CROSS_ENTROPY_LOSS_BACK + false, // GGML_OP_OPT_STEP_ADAMW // ggml_unary_op - nullptr, // GGML_UNARY_OP_ABS - nullptr, // GGML_UNARY_OP_SGN - nullptr, // GGML_UNARY_OP_NEG - nullptr, // GGML_UNARY_OP_STEP - nullptr, // GGML_UNARY_OP_TANH - nullptr, // GGML_UNARY_OP_ELU - nullptr, // GGML_UNARY_OP_RELU - nullptr, // GGML_UNARY_OP_SIGMOID - qnn_generic_op_impl, // GGML_UNARY_OP_GELU - nullptr, // GGML_UNARY_OP_GELU_QUICK - nullptr, // GGML_UNARY_OP_SILU - nullptr, // GGML_UNARY_OP_HARDSWISH - nullptr, // GGML_UNARY_OP_HARDSIGMOID - nullptr, // GGML_UNARY_OP_EXP + false, // GGML_UNARY_OP_ABS + false, // GGML_UNARY_OP_SGN + false, // GGML_UNARY_OP_NEG + false, // GGML_UNARY_OP_STEP + false, // GGML_UNARY_OP_TANH + false, // GGML_UNARY_OP_ELU + false, // GGML_UNARY_OP_RELU + false, // GGML_UNARY_OP_SIGMOID + true, // GGML_UNARY_OP_GELU + false, // GGML_UNARY_OP_GELU_QUICK + false, // GGML_UNARY_OP_SILU + false, // GGML_UNARY_OP_HARDSWISH + false, // GGML_UNARY_OP_HARDSIGMOID + false, // GGML_UNARY_OP_EXP }; -static_assert(kQnnOpsTable[GGML_OP_NONE] == qnn_nop_impl, "GGML_OP_NONE does not match the qnn_nop_impl function"); -static_assert(kQnnOpsTable[GGML_OP_ADD] == qnn_generic_op_impl, - "GGML_OP_ADD does not match the qnn_generic_op_impl function"); -static_assert(kQnnOpsTable[GGML_OP_MUL] == qnn_generic_op_impl, - "GGML_OP_MUL does not match the qnn_generic_op_impl function"); -static_assert(kQnnOpsTable[GGML_OP_MUL_MAT] == qnn_generic_op_impl, - "GGML_OP_MUL_MAT does not match the qnn_generic_op_impl function"); -static_assert(kQnnOpsTable[GGML_OP_RESHAPE] == qnn_nop_impl, - "GGML_OP_RESHAPE does not match the qnn_nop_impl function"); -static_assert(kQnnOpsTable[GGML_OP_VIEW] == nullptr, "GGML_OP_VIEW is not nullptr"); -static_assert(std::size(kQnnOpsTable) == (GGML_OP_COUNT + GGML_UNARY_OP_COUNT), - "GGML_OP_COUNT does not match the size of the kQnnOpsTable table"); +static_assert(kQnnSupportedOps[GGML_OP_NONE], "GGML_OP_NONE is not true"); +static_assert(kQnnSupportedOps[GGML_OP_ADD], "GGML_OP_ADD is not true"); +static_assert(kQnnSupportedOps[GGML_OP_MUL], "GGML_OP_MUL is not true"); +static_assert(kQnnSupportedOps[GGML_OP_MUL_MAT], + "GGML_OP_MUL_MAT is not true, please check the kQnnSupportedOps table in the backend-ops.cpp file"); +static_assert(kQnnSupportedOps[GGML_OP_RESHAPE], "GGML_OP_RESHAPE is not true"); +static_assert(!kQnnSupportedOps[GGML_OP_VIEW], "GGML_OP_VIEW is not false"); +static_assert(std::size(kQnnSupportedOps) == (GGML_OP_COUNT + GGML_UNARY_OP_COUNT), + "GGML_OP_COUNT does not match the size of the kQnnSupportedOps table"); -bool ggml_qnn_supports_tensor(ggml_backend_qnn_device_context *ctx, const ggml_tensor *tensor) { +bool ggml_qnn_supports_tensor(ggml_backend_qnn_device_context * ctx, const ggml_tensor * tensor) { if (!tensor) { - QNN_LOG_DEBUG("tensor is nullptr"); + QNN_LOG_DEBUG("tensor is nullptr\n"); return false; } #ifndef NDEBUG if (tensor->view_src) { - auto *src_tensor = tensor->view_src; - QNN_LOG_DEBUG("[%s]tensor(%s_%dx%dx%dx%d) is a view, src: %s_%dx%dx%dx%d", qnn::get_backend_name(ctx->device), - ggml_get_name(tensor), tensor->ne[0], tensor->ne[1], tensor->ne[2], tensor->ne[3], - ggml_get_name(src_tensor), src_tensor->ne[0], src_tensor->ne[1], src_tensor->ne[2], - src_tensor->ne[3]); + auto * src_tensor = tensor->view_src; + QNN_LOG_DEBUG("[%s]tensor(%s_%dx%dx%dx%d) is a view, src: %s_%dx%dx%dx%d\n", qnn::get_backend_name(ctx->device), + ggml_get_name(tensor), (int) tensor->ne[0], (int) tensor->ne[1], (int) tensor->ne[2], + (int) tensor->ne[3], ggml_get_name(src_tensor), (int) src_tensor->ne[0], (int) src_tensor->ne[1], + (int) src_tensor->ne[2], (int) src_tensor->ne[3]); } #endif @@ -390,13 +335,14 @@ bool ggml_qnn_supports_tensor(ggml_backend_qnn_device_context *ctx, const ggml_t case GGML_TYPE_Q8_0: case GGML_TYPE_Q4_0: if (!(ctx->supported_types & (uint64_t(1) << tensor->type))) { - QNN_LOG_DEBUG("[%s]unsupported data type %s, supported_types: 0x%x", qnn::get_backend_name(ctx->device), - ggml_type_name(tensor->type), ctx->supported_types); + QNN_LOG_DEBUG("[%s]unsupported data type %s, supported_types: 0x%x\n", + qnn::get_backend_name(ctx->device), ggml_type_name(tensor->type), + (unsigned int) ctx->supported_types); return false; } break; default: - QNN_LOG_DEBUG("[%s]unsupported data type %s", qnn::get_backend_name(ctx->device), + QNN_LOG_DEBUG("[%s]unsupported data type %s\n", qnn::get_backend_name(ctx->device), ggml_type_name(tensor->type)); return false; } @@ -404,7 +350,7 @@ bool ggml_qnn_supports_tensor(ggml_backend_qnn_device_context *ctx, const ggml_t return true; } -bool ggnl_qnn_supports_op_tensor(ggml_backend_qnn_device_context *ctx, const ggml_tensor *op) { +bool ggnl_qnn_supports_op_tensor(ggml_backend_qnn_device_context * ctx, const ggml_tensor * op) { if (op->op == GGML_OP_NONE) { return true; } @@ -423,14 +369,14 @@ bool ggnl_qnn_supports_op_tensor(ggml_backend_qnn_device_context *ctx, const ggm return true; } -bool ggml_qnn_supports_matmul_op(ggml_backend_qnn_device_context *ctx, const ggml_tensor *op) { +bool ggml_qnn_supports_matmul_op(ggml_backend_qnn_device_context * ctx, const ggml_tensor * op) { constexpr const size_t kMaxNpuTensorSize = 8192L * 2048 + 8192 * 512 + 2048 * 512; - constexpr const auto get_tensor_size = [](const ggml_tensor *tensor) -> size_t { + constexpr const auto get_tensor_size = [](const ggml_tensor * tensor) -> size_t { return tensor->ne[0] * tensor->ne[1] * tensor->ne[2] * tensor->ne[3]; }; - auto *src0 = op->src[0]; - auto *src1 = op->src[1]; + auto * src0 = op->src[0]; + auto * src1 = op->src[1]; switch (ctx->device) { case QNN_BACKEND_NPU: if (src1->ne[2] != src0->ne[2] || src1->ne[3] != src0->ne[3]) { @@ -438,12 +384,10 @@ bool ggml_qnn_supports_matmul_op(ggml_backend_qnn_device_context *ctx, const ggm * TODO: remove the blocker here when NPU backend supports mul_mat like this: * [ne03, ne02, n, k] * [ne03 * x, ne02 * y, m, k] -> [ne03 * x, ne02 * y, m, n] */ - QNN_LOG_DEBUG("[qnn-npu][MUL_MAT]src0 and src1 dimensions are not equal, support/unsupported: %d/%d", - ctx->support_op_count.load(), ++(ctx->unsupported_op_count)); + QNN_LOG_DEBUG("[qnn-npu][MUL_MAT]src0 and src1 dimensions are not equal\n"); return false; } else if (get_tensor_size(src0) + get_tensor_size(src1) + get_tensor_size(op) >= kMaxNpuTensorSize) { - QNN_LOG_DEBUG("[qnn-npu][MUL_MAT]tensor size is too large, support/unsupported: %d/%d", - ctx->support_op_count.load(), ++(ctx->unsupported_op_count)); + QNN_LOG_DEBUG("[qnn-npu][MUL_MAT]tensor size is too large\n"); return false; } // fall through, from test here, the convert op is super slow on NPU: @@ -451,9 +395,8 @@ bool ggml_qnn_supports_matmul_op(ggml_backend_qnn_device_context *ctx, const ggm case QNN_BACKEND_GPU: if (src0->type != src1->type || src0->type != op->type) { // there's no convert op for GPU. - QNN_LOG_DEBUG( - "[qnn-gpu][MUL_MAT]type src0(%d), src1(%d) and op(%d) are not equal, support/unsupported: %d/%d", - src0->type, src1->type, op->type, ctx->support_op_count.load(), ++(ctx->unsupported_op_count)); + QNN_LOG_DEBUG("[qnn-gpu][MUL_MAT]type src0(%s), src1(%s) and op(%s) are not equal\n", + ggml_type_name(src0->type), ggml_type_name(src1->type), ggml_type_name(op->type)); return false; } break; @@ -462,31 +405,31 @@ bool ggml_qnn_supports_matmul_op(ggml_backend_qnn_device_context *ctx, const ggm } if ((src1->ne[2] % src0->ne[2]) != 0 || (src1->ne[3] % src0->ne[3]) != 0) { - QNN_LOG_DEBUG("[%s][MUL_MAT]src0 and src1 dimensions are not equal, support/unsupported: %d/%d", - qnn::get_backend_name(ctx->device), ctx->support_op_count.load(), ++(ctx->unsupported_op_count)); + QNN_LOG_DEBUG("[%s][MUL_MAT]src0 and src1 dimensions are not equal\n", qnn::get_backend_name(ctx->device)); return false; } - QNN_LOG_DEBUG("[%s][MUL_MAT]supported matmul op, support/unsupported: %d/%d", qnn::get_backend_name(ctx->device), - ++(ctx->support_op_count), ctx->unsupported_op_count.load()); + QNN_LOG_DEBUG("[%s][MUL_MAT]supported matmul op\n", qnn::get_backend_name(ctx->device)); return true; } -} // namespace +} // namespace namespace qnn { -bool device_supports_op(ggml_backend_qnn_device_context *ctx, const ggml_tensor *op) { +bool device_supports_op(ggml_backend_qnn_device_context * ctx, const ggml_tensor * op) { // Note that this function could be called before the device context is initialized if (op->op == GGML_OP_NONE) { return true; } - if (!kQnnOpsTable[qnn::get_qnn_op_index(op)]) { + if (!kQnnSupportedOps[qnn::get_qnn_op_index(op)]) { #ifndef NDEBUG std::string op_key; get_graph_key_from_op(op, op_key); - QNN_LOG_DEBUG("[%s]unsupported op", op_key.c_str()); + ctx->unsupported_op_count++; + QNN_LOG_DEBUG("[%s][%s]op was unsupported, support/unsupported: %d/%d\n", qnn::get_backend_name(ctx->device), + op_key.c_str(), ctx->supported_op_count.load(), ctx->unsupported_op_count.load()); #endif return false; } @@ -495,48 +438,69 @@ bool device_supports_op(ggml_backend_qnn_device_context *ctx, const ggml_tensor #ifndef NDEBUG std::string tensor_dims; append_tensor_dimensions(op, tensor_dims); - QNN_LOG_DEBUG("[%s]unsupported tensor(%s)", ggml_op_name(op->op), tensor_dims.c_str()); + QNN_LOG_DEBUG("[%s][%s]unsupported tensor(%s), support/unsupported: %d/%d\n", + qnn::get_backend_name(ctx->device), ggml_op_name(op->op), tensor_dims.c_str(), + ctx->supported_op_count.load(), ctx->unsupported_op_count.load()); #endif return false; } + bool is_op_supported = true; if (op->op == GGML_OP_UNARY) { const auto unary_op = ggml_get_unary_op(op); if (unary_op == GGML_UNARY_OP_GELU) { // TODO: fix this - QNN_LOG_DEBUG("[GELU]unsupported unary op GGML_UNARY_OP_GELU for NPU"); - return false; + QNN_LOG_DEBUG("[GELU]unsupported unary op GGML_UNARY_OP_GELU for NPU\n"); + is_op_supported = false; } } else { - auto *src0 = op->src[0]; - auto *src1 = op->src[1]; + auto * src0 = op->src[0]; + auto * src1 = op->src[1]; switch (op->op) { case GGML_OP_ADD: + case GGML_OP_SUB: + case GGML_OP_MUL: + case GGML_OP_DIV: if (!ggml_are_same_shape(src0, src1)) { - QNN_LOG_DEBUG("[ADD] src0 and src1 dimensions are not equal"); - return false; + QNN_LOG_DEBUG("[%s][%s] src0 and src1 dimensions are not equal\n", + qnn::get_backend_name(ctx->device), ggml_op_name(op->op)); + is_op_supported = false; } break; - case GGML_OP_MUL_MAT: - return ggml_qnn_supports_matmul_op(ctx, op); + is_op_supported = ggml_qnn_supports_matmul_op(ctx, op); + break; default: - return false; + // default to supported + break; } } - return true; +#ifndef NDEBUG + if (is_op_supported) { + ctx->supported_op_count++; + QNN_LOG_DEBUG("[%s][%s]op was supported, support/unsupported: %d/%d\n", qnn::get_backend_name(ctx->device), + ggml_op_name(op->op), ctx->supported_op_count.load(), ctx->unsupported_op_count.load()); + } else { + ctx->unsupported_op_count++; + QNN_LOG_DEBUG("[%s][%s]op was unsupported, support/unsupported: %d/%d\n", qnn::get_backend_name(ctx->device), + ggml_op_name(op->op), ctx->supported_op_count.load(), ctx->unsupported_op_count.load()); + } +#endif + + return is_op_supported; } -bool device_compute_graph(ggml_backend_qnn_device_context *ctx, ggml_cgraph *cgraph) { - QNN_LOG_DEBUG("[%s]compute graph start, nodes count: %d", qnn::get_backend_name(ctx->device), (int)cgraph->n_nodes); +bool device_compute_graph(ggml_backend_qnn_device_context * ctx, ggml_cgraph * cgraph) { + QNN_LOG_DEBUG("[%s]compute graph start, nodes count: %d\n", qnn::get_backend_name(ctx->device), + (int) cgraph->n_nodes); auto qnn_graph = get_qnn_graph_from_cache(ctx, cgraph); - bool success = qnn_graph && qnn_graph->execute(cgraph); + bool success = qnn_graph && qnn_graph->execute(cgraph); - QNN_LOG_DEBUG("[%s]compute graph, success: %d", qnn::get_backend_name(ctx->device), (int)success); + QNN_LOG_DEBUG("[%s]compute graph, success: %d\n", qnn::get_backend_name(ctx->device), (int) success); return success; } -} // namespace qnn +} // namespace qnn diff --git a/ggml/src/ggml-qnn/backend-ops.hpp b/ggml/src/ggml-qnn/backend-ops.hpp index c49c4d6dc1..64fb10f00d 100644 --- a/ggml/src/ggml-qnn/backend-ops.hpp +++ b/ggml/src/ggml-qnn/backend-ops.hpp @@ -1,12 +1,11 @@ #pragma once -#include "ggml.h" - #include "backend.hpp" +#include "ggml.h" namespace qnn { -bool device_supports_op(ggml_backend_qnn_device_context *ctx, const ggml_tensor *op); -bool device_compute_graph(ggml_backend_qnn_device_context *ctx, ggml_cgraph *cgraph); +bool device_supports_op(ggml_backend_qnn_device_context * ctx, const ggml_tensor * op); +bool device_compute_graph(ggml_backend_qnn_device_context * ctx, ggml_cgraph * cgraph); -} // namespace qnn +} // namespace qnn diff --git a/ggml/src/ggml-qnn/backend.hpp b/ggml/src/ggml-qnn/backend.hpp index df5e2eb08f..253b0b6723 100644 --- a/ggml/src/ggml-qnn/backend.hpp +++ b/ggml/src/ggml-qnn/backend.hpp @@ -2,7 +2,7 @@ #pragma once #ifndef NDEBUG -#include +# include #endif #include @@ -10,39 +10,41 @@ #include #include -#include "ggml.h" - #include "ggml-backend.h" #include "ggml-qnn.h" - +#include "ggml.h" #include "graph.hpp" #include "qnn-lib.hpp" namespace qnn { typedef std::unordered_map> qnn_graph_cache_t; -} // namespace qnn +} // namespace qnn struct ggml_backend_qnn_device_context { // initialize in constructor - QNNBackend device; - size_t threads; + QNNBackend device; + size_t threads; std::string name; std::string lib_name; // initialize in qnn init - qnn::qcom_socinfo socinfo = {}; - uint64_t supported_types; - std::shared_ptr instance; + qnn::qcom_socinfo socinfo = {}; + uint64_t supported_types; + std::shared_ptr instance; std::shared_ptr qnn_interface; qnn::qnn_graph_cache_t qnn_graph_cache; #ifndef NDEBUG - std::atomic_uint32_t support_op_count = 0; + std::atomic_uint32_t supported_op_count = 0; std::atomic_uint32_t unsupported_op_count = 0; #endif - explicit ggml_backend_qnn_device_context(QNNBackend device, size_t threads, const char *name, const char *lib_name, - uint64_t supported_types) - : device(device), threads(threads), name(name), lib_name(lib_name), supported_types(supported_types) {} + explicit ggml_backend_qnn_device_context(QNNBackend device, size_t threads, const char * name, + const char * lib_name, uint64_t supported_types) : + device(device), + threads(threads), + name(name), + lib_name(lib_name), + supported_types(supported_types) {} }; diff --git a/ggml/src/ggml-qnn/buffer.hpp b/ggml/src/ggml-qnn/buffer.hpp index ce796cbe4d..43c4666dd1 100644 --- a/ggml/src/ggml-qnn/buffer.hpp +++ b/ggml/src/ggml-qnn/buffer.hpp @@ -14,7 +14,7 @@ namespace qnn { * This abstract class defines the interface for managing generic memory buffers in a QNN context. */ class qnn_buffer_interface { -public: + public: virtual ~qnn_buffer_interface() = default; /** @@ -35,7 +35,7 @@ public: * * @return A pointer to the buffer. */ - virtual uint8_t *get_buffer() = 0; + virtual uint8_t * get_buffer() = 0; /** * @brief Gets the buffer pointer. @@ -68,21 +68,22 @@ using qnn_buffer_ptr = std::shared_ptr; * handles cleanup of the buffer and its associated memory handle upon destruction. */ class qnn_rpc_buffer : public qnn_buffer_interface { -public: + public: qnn_rpc_buffer(std::shared_ptr qnn_instance, const size_t size, const uint32_t rank, - uint32_t *dimensions, Qnn_DataType_t data_type) - : _size(size), _qnn_instance(qnn_instance) { - - _qnn_rpc_buffer = static_cast(qnn_instance->alloc_rpcmem(size, alignof(uint8_t *))); + uint32_t * dimensions, Qnn_DataType_t data_type) : + _size(size), + _qnn_instance(qnn_instance) { + _qnn_rpc_buffer = static_cast(qnn_instance->alloc_rpcmem(size, alignof(uint8_t *))); _qnn_rpc_mem_handle = qnn_instance->register_rpcmem(_qnn_rpc_buffer, rank, dimensions, data_type); if (!_qnn_rpc_buffer || !_qnn_rpc_mem_handle) { - QNN_LOG_WARN("Failed to register RPC memory: buffer or memory handle is null"); + QNN_LOG_WARN("Failed to register RPC memory: buffer or memory handle is null\n"); // let the destructor free the buffer return; } - QNN_LOG_DEBUG("alloc rpcmem(%p) successfully, size %d", _qnn_rpc_buffer, (int)size); + QNN_LOG_DEBUG("alloc rpcmem(%p) successfully, size %d\n", (void *) _qnn_rpc_buffer, (int) size); } + ~qnn_rpc_buffer() { if (_qnn_instance) { if (_qnn_rpc_mem_handle) { @@ -97,14 +98,16 @@ public: bool is_valid() const override { return _qnn_rpc_buffer && _qnn_rpc_mem_handle; } - uint8_t *get_buffer() override { return _qnn_rpc_buffer; } + uint8_t * get_buffer() override { return _qnn_rpc_buffer; } + size_t get_size() const override { return _size; } + Qnn_MemHandle_t get_mem_handle() const override { return _qnn_rpc_mem_handle; } -private: - size_t _size = 0; - uint8_t *_qnn_rpc_buffer = nullptr; - Qnn_MemHandle_t _qnn_rpc_mem_handle = nullptr; + private: + size_t _size = 0; + uint8_t * _qnn_rpc_buffer = nullptr; + Qnn_MemHandle_t _qnn_rpc_mem_handle = nullptr; std::shared_ptr _qnn_instance; DISABLE_COPY(qnn_rpc_buffer); @@ -119,12 +122,12 @@ private: * a consistent interface for buffer management. */ class qnn_mem_buffer : public qnn_buffer_interface { -public: - explicit qnn_mem_buffer(const uint8_t *data, size_t size) { + public: + explicit qnn_mem_buffer(const uint8_t * data, size_t size) { _buffer = reinterpret_cast(qnn::page_align_alloc(size)); if (!_buffer) { - QNN_LOG_WARN("failed to allocate %.2f MiB", float(size / (1 << 20))); + QNN_LOG_WARN("failed to allocate %.2f MiB\n", float(size / (1 << 20))); return; } @@ -134,49 +137,51 @@ public: memcpy(_buffer, data, size); } - QNN_LOG_DEBUG("alloc buffer: %p, size: %ld", _buffer, size); + QNN_LOG_DEBUG("alloc buffer: %p, size: %ld\n", (void *) _buffer, (long) size); } explicit qnn_mem_buffer(size_t size) : qnn_mem_buffer(nullptr, size) {} ~qnn_mem_buffer() { - QNN_LOG_DEBUG("free buffer: %p, size: %ld", _buffer, _size); + QNN_LOG_DEBUG("free buffer: %p, size: %ld\n", (void *) _buffer, (long) _size); // the free will do nothing if the _buffer is nullptr qnn::align_free(_buffer); } bool is_valid() const override { return _buffer != nullptr; } - uint8_t *get_buffer() override { return _buffer; } + uint8_t * get_buffer() override { return _buffer; } + size_t get_size() const override { return _size; } + Qnn_MemHandle_t get_mem_handle() const override { return nullptr; } -private: - size_t _size = 0; - uint8_t *_buffer = nullptr; + private: + size_t _size = 0; + uint8_t * _buffer = nullptr; DISABLE_COPY(qnn_mem_buffer); DISABLE_MOVE(qnn_mem_buffer); }; class qnn_mem_buffer_slice : public qnn_buffer_interface { -public: - qnn_mem_buffer_slice(const uint8_t *buffer, size_t size) : _buffer(const_cast(buffer)), _size(size) {} + public: + qnn_mem_buffer_slice(const uint8_t * buffer, size_t size) : _buffer(const_cast(buffer)), _size(size) {} bool is_valid() const override { return _buffer && _size; } - uint8_t *get_buffer() override { return _buffer; } + uint8_t * get_buffer() override { return _buffer; } size_t get_size() const override { return _size; } Qnn_MemHandle_t get_mem_handle() const override { return nullptr; } -private: - uint8_t *_buffer = nullptr; - size_t _size = 0; + private: + uint8_t * _buffer = nullptr; + size_t _size = 0; DISABLE_COPY(qnn_mem_buffer_slice); DISABLE_MOVE(qnn_mem_buffer_slice); }; -} // namespace qnn +} // namespace qnn diff --git a/ggml/src/ggml-qnn/dl-loader.hpp b/ggml/src/ggml-qnn/dl-loader.hpp new file mode 100644 index 0000000000..e183d190ce --- /dev/null +++ b/ggml/src/ggml-qnn/dl-loader.hpp @@ -0,0 +1,76 @@ +#pragma once + +#ifdef __linux__ +# include +# include +#elif defined(_WIN32) +# define WIN32_LEAN_AND_MEAN +# ifndef NOMINMAX +# define NOMINMAX +# endif +# include +#endif + +#include + +namespace qnn { + +#ifdef __linux__ +typedef void * dl_handler_t; + +inline qnn::dl_handler_t dl_load(const std::string & lib_path) { + return dlopen(lib_path.c_str(), RTLD_NOW | RTLD_LOCAL); +} + +inline void * dl_sym(qnn::dl_handler_t handle, const std::string & symbol) { + return dlsym(handle, symbol.c_str()); +} + +inline bool dl_unload(qnn::dl_handler_t handle) { + return dlclose(handle) == 0; +} + +inline const char * dl_error() { + return dlerror(); +} +#elif defined(_WIN32) +using dl_handler_t = HMODULE; + +inline qnn::dl_handler_t dl_load(const std::string & lib_path) { + // suppress error dialogs for missing DLLs + auto old_mode = SetErrorMode(SEM_FAILCRITICALERRORS); + SetErrorMode(old_mode | SEM_FAILCRITICALERRORS); + + auto handle = LoadLibraryA(lib_path.c_str()); // TODO: use wstring version for unicode paths + + SetErrorMode(old_mode); + return handle; +} + +inline void * dl_sym(qnn::dl_handler_t handle, const std::string & symbol) { + auto old_mode = SetErrorMode(SEM_FAILCRITICALERRORS); + SetErrorMode(old_mode | SEM_FAILCRITICALERRORS); + + void * p = (void *) GetProcAddress(handle, symbol.c_str()); + + SetErrorMode(old_mode); + return p; +} + +inline bool dl_unload(qnn::dl_handler_t handle) { + FreeLibrary(handle); + return true; +} + +inline const char * dl_error() { + // TODO: implement dl_error for Windows + return nullptr; +} + +#endif + +template Fn dl_sym_typed(qnn::dl_handler_t handle, const std::string & function_name) { + return reinterpret_cast(dl_sym(handle, function_name)); +} + +} // namespace qnn diff --git a/ggml/src/ggml-qnn/dl_loader.hpp b/ggml/src/ggml-qnn/dl_loader.hpp deleted file mode 100644 index 1beec8866b..0000000000 --- a/ggml/src/ggml-qnn/dl_loader.hpp +++ /dev/null @@ -1,71 +0,0 @@ -#pragma once - -#ifdef __linux__ -#include -#include -#elif defined(_WIN32) -#define WIN32_LEAN_AND_MEAN -#ifndef NOMINMAX -#define NOMINMAX -#endif -#include -#endif - -#include - -namespace qnn { - -#ifdef __linux__ -typedef void *dl_handler_t; - -inline qnn::dl_handler_t dl_load(const std::string &lib_path) { - return dlopen(lib_path.c_str(), RTLD_NOW | RTLD_LOCAL); -} - -inline void *dl_sym(qnn::dl_handler_t handle, const std::string &symbol) { return dlsym(handle, symbol.c_str()); } - -inline bool dl_unload(qnn::dl_handler_t handle) { return dlclose(handle) == 0; } - -inline const char *dl_error() { return dlerror(); } -#elif defined(_WIN32) -using dl_handler_t = HMODULE; - -inline qnn::dl_handler_t dl_load(const std::string &lib_path) { - // suppress error dialogs for missing DLLs - auto old_mode = SetErrorMode(SEM_FAILCRITICALERRORS); - SetErrorMode(old_mode | SEM_FAILCRITICALERRORS); - - auto handle = LoadLibraryA(lib_path.c_str()); // TODO: use wstring version for unicode paths - - SetErrorMode(old_mode); - return handle; -} - -inline void *dl_sym(qnn::dl_handler_t handle, const std::string &symbol) { - auto old_mode = SetErrorMode(SEM_FAILCRITICALERRORS); - SetErrorMode(old_mode | SEM_FAILCRITICALERRORS); - - void *p = (void *)GetProcAddress(handle, symbol.c_str()); - - SetErrorMode(old_mode); - return p; -} - -inline bool dl_unload(qnn::dl_handler_t handle) { - FreeLibrary(handle); - return true; -} - -inline const char *dl_error() { - // TODO: implement dl_error for Windows - return nullptr; -} - -#endif - -template -Fn dl_sym_typed(qnn::dl_handler_t handle, const std::string &function_name) { - return reinterpret_cast(dl_sym(handle, function_name)); -} - -} // namespace qnn diff --git a/ggml/src/ggml-qnn/ggml-qnn.cpp b/ggml/src/ggml-qnn/ggml-qnn.cpp index 8150dcb9ea..626ba2cce9 100644 --- a/ggml/src/ggml-qnn/ggml-qnn.cpp +++ b/ggml/src/ggml-qnn/ggml-qnn.cpp @@ -6,7 +6,6 @@ #include "ggml-backend-impl.h" #include "ggml-impl.h" - #include "ggml-qnn/backend-ops.hpp" #include "ggml-qnn/backend.hpp" #include "ggml-qnn/logger.hpp" @@ -19,9 +18,9 @@ // // ================================================================================================= #ifdef NDEBUG -#define ENABLE_QNNBACKEND_PERF 0 // enable/disable op's perf info +# define ENABLE_QNNBACKEND_PERF 0 // enable/disable op's perf info #else -#define ENABLE_QNNBACKEND_PERF 1 // enable/disable op's perf info +# define ENABLE_QNNBACKEND_PERF 1 // enable/disable op's perf info #endif #define QNN_BACKEND_NAME "qnn" @@ -29,50 +28,42 @@ namespace { #ifdef _WIN32 -constexpr const char *kQnnCpuLibName = "QnnCpu.dll"; -constexpr const char *kQnnGpuLibName = "QnnGpu.dll"; -constexpr const char *kQnnNpuLibName = "QnnHtp.dll"; +constexpr const char * kQnnCpuLibName = "QnnCpu.dll"; +constexpr const char * kQnnGpuLibName = "QnnGpu.dll"; +constexpr const char * kQnnNpuLibName = "QnnHtp.dll"; #else -constexpr const char *kQnnCpuLibName = "libQnnCpu.so"; -constexpr const char *kQnnGpuLibName = "libQnnGpu.so"; -constexpr const char *kQnnNpuLibName = "libQnnHtp.so"; +constexpr const char * kQnnCpuLibName = "libQnnCpu.so"; +constexpr const char * kQnnGpuLibName = "libQnnGpu.so"; +constexpr const char * kQnnNpuLibName = "libQnnHtp.so"; #endif struct qnn_device_caps { - const char *name; - const char *description; - const char *lib_name; + const char * name; + const char * description; + const char * lib_name; enum ggml_backend_dev_type type; // TODO: should get this caps from device uint64_t supported_types; }; +// TODO: should move this to qnn-lib.cpp constexpr const qnn_device_caps kDeviceCaps[] = { { - // https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/CpuOpDefSupplement.html#matmul - "qnn-cpu", - "Qualcomm Kryo CPU", - kQnnCpuLibName, - GGML_BACKEND_DEVICE_TYPE_CPU, - (1 << GGML_TYPE_I8) | (1 << GGML_TYPE_F32), - }, + "qnn-cpu", "Qualcomm Kryo CPU", + kQnnCpuLibName, GGML_BACKEND_DEVICE_TYPE_CPU, + (1 << GGML_TYPE_I8) | (1 << GGML_TYPE_F32), + }, // https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/CpuOpDefSupplement.html#matmul { - // https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/GpuOpDefSupplement.html#matmul - "qnn-gpu", - "Qualcomm Adreno GPU", - kQnnGpuLibName, - GGML_BACKEND_DEVICE_TYPE_GPU, - (1 << GGML_TYPE_F32) | (1 << GGML_TYPE_F16), - }, + "qnn-gpu", "Qualcomm Adreno GPU", + kQnnGpuLibName, GGML_BACKEND_DEVICE_TYPE_GPU, + (1 << GGML_TYPE_F32) | (1 << GGML_TYPE_F16), + }, // https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/GpuOpDefSupplement.html#matmul { - // https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/HtpOpDefSupplement.html#matmul - "qnn-npu", - "Qualcomm NPU", - kQnnNpuLibName, - GGML_BACKEND_DEVICE_TYPE_ACCEL, - (1 << GGML_TYPE_F32) | (1 << GGML_TYPE_F16) | (1 << GGML_TYPE_I16) | (1 << GGML_TYPE_I8), - }, + "qnn-npu", "Qualcomm NPU", + kQnnNpuLibName, GGML_BACKEND_DEVICE_TYPE_ACCEL, + (1 << GGML_TYPE_F32) | (1 << GGML_TYPE_F16) | (1 << GGML_TYPE_I16) | (1 << GGML_TYPE_I8), + }, // https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/HtpOpDefSupplement.html#matmul }; static_assert(sizeof(kDeviceCaps) / sizeof(kDeviceCaps[0]) == GGML_QNN_MAX_DEVICES, @@ -85,11 +76,11 @@ static_assert(kDeviceCaps[QNN_BACKEND_GPU].type == GGML_BACKEND_DEVICE_TYPE_GPU, static_assert(kDeviceCaps[QNN_BACKEND_CPU].type == GGML_BACKEND_DEVICE_TYPE_CPU, "The NPU device should be an accelerator device"); -ggml_backend_qnn_device_context *get_device_context(ggml_backend_dev_t dev) { +ggml_backend_qnn_device_context * get_device_context(ggml_backend_dev_t dev) { return reinterpret_cast(dev->context); } -qnn::qnn_buffer_interface *get_buffer_context(ggml_backend_buffer_t buffer) { +qnn::qnn_buffer_interface * get_buffer_context(ggml_backend_buffer_t buffer) { return reinterpret_cast(buffer->context); } @@ -99,34 +90,34 @@ qnn::qnn_buffer_interface *get_buffer_context(ggml_backend_buffer_t buffer) { * ----------------------------------------------------------------------------------------------- */ void ggml_backend_qnn_buffer_free_buffer(ggml_backend_buffer_t buffer) { - auto *ctx = get_buffer_context(buffer); + auto * ctx = get_buffer_context(buffer); delete ctx; } -void *ggml_backend_qnn_buffer_get_base(ggml_backend_buffer_t buffer) { - auto *ctx = get_buffer_context(buffer); +void * ggml_backend_qnn_buffer_get_base(ggml_backend_buffer_t buffer) { + auto * ctx = get_buffer_context(buffer); return ctx->get_buffer(); } -void ggml_backend_qnn_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_tensor *tensor) { +void ggml_backend_qnn_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor) { GGML_UNUSED(buffer); GGML_UNUSED(tensor); // TODO: we should create the qnn tensor along with the ggml tensor } -void ggml_backend_qnn_buffer_set_tensor(ggml_backend_buffer_t buffer, ggml_tensor *tensor, const void *data, +void ggml_backend_qnn_buffer_set_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor, const void * data, size_t offset, size_t size) { GGML_UNUSED(buffer); - memcpy((char *)tensor->data + offset, data, size); + memcpy((char *) tensor->data + offset, data, size); } -void ggml_backend_qnn_buffer_get_tensor(ggml_backend_buffer_t buffer, const ggml_tensor *tensor, void *data, +void ggml_backend_qnn_buffer_get_tensor(ggml_backend_buffer_t buffer, const ggml_tensor * tensor, void * data, size_t offset, size_t size) { GGML_UNUSED(buffer); - memcpy(data, (const char *)tensor->data + offset, size); + memcpy(data, (const char *) tensor->data + offset, size); } -bool ggml_backend_qnn_buffer_cpy_tensor(ggml_backend_buffer_t buffer, const ggml_tensor *src, ggml_tensor *dst) { +bool ggml_backend_qnn_buffer_cpy_tensor(ggml_backend_buffer_t buffer, const ggml_tensor * src, ggml_tensor * dst) { GGML_UNUSED(buffer); if (ggml_backend_buffer_is_host(src->buffer)) { memcpy(dst->data, src->data, ggml_nbytes(src)); @@ -137,7 +128,7 @@ bool ggml_backend_qnn_buffer_cpy_tensor(ggml_backend_buffer_t buffer, const ggml } void ggml_backend_qnn_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) { - auto *ctx = get_buffer_context(buffer); + auto * ctx = get_buffer_context(buffer); memset(ctx->get_buffer(), value, ctx->get_size()); } @@ -158,19 +149,19 @@ constexpr const ggml_backend_buffer_i ggml_backend_qnn_buffer_interface = { * qnn backend object * ----------------------------------------------------------------------------------------------- */ -const char *ggml_backend_qnn_buffer_type_name(ggml_backend_buffer_type_t buft) { - auto *dev_ctx = get_device_context(buft->device); +const char * ggml_backend_qnn_buffer_type_name(ggml_backend_buffer_type_t buft) { + auto * dev_ctx = get_device_context(buft->device); return qnn::get_backend_name(dev_ctx->device); } ggml_backend_buffer_t ggml_backend_qnn_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) { - qnn::qnn_buffer_interface *ctx = new qnn::qnn_mem_buffer(size); + qnn::qnn_buffer_interface * ctx = new qnn::qnn_mem_buffer(size); if (!ctx->is_valid()) { return nullptr; } - QNN_LOG_DEBUG("[%s]alloc buffer: %p, size: %ld", qnn::get_backend_name(get_device_context(buft->device)->device), - ctx->get_buffer(), size); + QNN_LOG_DEBUG("[%s]alloc buffer: %p, size: %ld\n", qnn::get_backend_name(get_device_context(buft->device)->device), + (void *) ctx->get_buffer(), (long) size); return ggml_backend_buffer_init(buft, ggml_backend_qnn_buffer_interface, ctx, size); } @@ -192,16 +183,16 @@ bool ggml_backend_qnn_buffer_is_host(ggml_backend_buffer_type_t buft) { return true; } -const char *ggml_backend_qnn_name(ggml_backend_t backend) { - auto *device_ctx = get_device_context(backend->device); +const char * ggml_backend_qnn_name(ggml_backend_t backend) { + auto * device_ctx = get_device_context(backend->device); return device_ctx->name.c_str(); } void ggml_backend_qnn_free(ggml_backend_t backend) { - auto *device_ctx = get_device_context(backend->device); - QNN_LOG_INFO("idx %d, name:%s", device_ctx->device, device_ctx->name.c_str()); + auto * device_ctx = get_device_context(backend->device); + QNN_LOG_INFO("idx %d, name:%s\n", device_ctx->device, device_ctx->name.c_str()); - auto &instance = device_ctx->instance; + auto & instance = device_ctx->instance; if (instance) { device_ctx->qnn_graph_cache.clear(); device_ctx->qnn_interface.reset(); @@ -212,35 +203,33 @@ void ggml_backend_qnn_free(ggml_backend_t backend) { delete backend; } -bool ggml_backend_qnn_cpy_tensor_async(ggml_backend_t backend_src, ggml_backend_t backend_dst, const ggml_tensor *src, - ggml_tensor *dst) { +bool ggml_backend_qnn_cpy_tensor_async(ggml_backend_t backend_src, ggml_backend_t backend_dst, const ggml_tensor * src, + ggml_tensor * dst) { GGML_UNUSED(backend_src); GGML_UNUSED(backend_dst); GGML_UNUSED(src); GGML_UNUSED(dst); - QNN_LOG_DEBUG("opy form %s to %s, src_is_qnn: %d, dst_is_qnn: %d", ggml_get_name(src), ggml_get_name(dst), - (int)ggml_backend_is_qnn(backend_src), (int)ggml_backend_is_qnn(backend_dst)); + QNN_LOG_DEBUG("opy form %s to %s, src_is_qnn: %d, dst_is_qnn: %d\n", ggml_get_name(src), ggml_get_name(dst), + (int) ggml_backend_is_qnn(backend_src), (int) ggml_backend_is_qnn(backend_dst)); return false; } ggml_backend_buffer_type_t ggml_backend_qnn_buffer_type(ggml_backend_dev_t dev) { static ggml_backend_buffer_type ggml_backend_qnn_buffer_types[GGML_QNN_MAX_DEVICES]; - auto *dev_ctx = get_device_context(dev); + auto * dev_ctx = get_device_context(dev); if (!ggml_backend_qnn_buffer_types[dev_ctx->device].device) { ggml_backend_qnn_buffer_types[dev_ctx->device] = { /* .iface = */ { - /* .get_name = */ ggml_backend_qnn_buffer_type_name, - /* .alloc_buffer = */ - ggml_backend_qnn_buffer_type_alloc_buffer, - /* .get_alignment = */ - ggml_backend_qnn_buffer_type_get_alignment, - /* .get_max_size = */ - ggml_backend_qnn_buffer_type_get_max_size, - /* .get_alloc_size = */ nullptr, // defaults to ggml_nbytes + /* .get_name = */ ggml_backend_qnn_buffer_type_name, + /* .alloc_buffer = */ + ggml_backend_qnn_buffer_type_alloc_buffer, /* .get_alignment = */ + ggml_backend_qnn_buffer_type_get_alignment, /* .get_max_size = */ + ggml_backend_qnn_buffer_type_get_max_size, /* .get_alloc_size = */ nullptr, // defaults to ggml_nbytes /* .is_host = */ ggml_backend_qnn_buffer_is_host, - }, - /* .device */ dev, + }, + /* .device */ + dev, /* .context = */ nullptr, }; } else { @@ -250,9 +239,9 @@ ggml_backend_buffer_type_t ggml_backend_qnn_buffer_type(ggml_backend_dev_t dev) return &ggml_backend_qnn_buffer_types[dev_ctx->device]; } -ggml_status ggml_backend_qnn_graph_compute(ggml_backend_t backend, ggml_cgraph *cgraph) { - return qnn::device_compute_graph(get_device_context(backend->device), cgraph) ? GGML_STATUS_SUCCESS - : GGML_STATUS_FAILED; +ggml_status ggml_backend_qnn_graph_compute(ggml_backend_t backend, ggml_cgraph * cgraph) { + return qnn::device_compute_graph(get_device_context(backend->device), cgraph) ? GGML_STATUS_SUCCESS : + GGML_STATUS_FAILED; } constexpr const ggml_backend_i ggml_backend_qnn_interface = { @@ -276,31 +265,31 @@ constexpr const ggml_backend_i ggml_backend_qnn_interface = { * qnn backend device object * ----------------------------------------------------------------------------------------------- */ -const char *ggml_backend_qnn_device_get_name(ggml_backend_dev_t dev) { - const auto &caps = kDeviceCaps[get_device_context(dev)->device]; +const char * ggml_backend_qnn_device_get_name(ggml_backend_dev_t dev) { + const auto & caps = kDeviceCaps[get_device_context(dev)->device]; return caps.name; } -const char *ggml_backend_qnn_device_get_description(ggml_backend_dev_t dev) { - const auto &caps = kDeviceCaps[get_device_context(dev)->device]; +const char * ggml_backend_qnn_device_get_description(ggml_backend_dev_t dev) { + const auto & caps = kDeviceCaps[get_device_context(dev)->device]; return caps.description; } -void ggml_backend_qnn_device_get_memory(ggml_backend_dev_t dev, size_t *free, size_t *total) { +void ggml_backend_qnn_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) { GGML_UNUSED(dev); - *free = qnn::get_system_free_memory_in_bytes(); + *free = qnn::get_system_free_memory_in_bytes(); *total = qnn::get_system_total_memory_in_bytes(); - QNN_LOG_DEBUG("free memory: %ldMB, total memory: %ldMB", (*free / 1048576), (*total) / 1048576); + QNN_LOG_DEBUG("free memory: %ldMB, total memory: %ldMB\n", (*free / 1048576), (*total) / 1048576); } enum ggml_backend_dev_type ggml_backend_qnn_device_get_type(ggml_backend_dev_t dev) { return kDeviceCaps[get_device_context(dev)->device].type; } -void ggml_backend_qnn_device_get_props(ggml_backend_dev_t dev, ggml_backend_dev_props *props) { - props->name = ggml_backend_qnn_device_get_name(dev); +void ggml_backend_qnn_device_get_props(ggml_backend_dev_t dev, ggml_backend_dev_props * props) { + props->name = ggml_backend_qnn_device_get_name(dev); props->description = ggml_backend_qnn_device_get_description(dev); - props->type = ggml_backend_qnn_device_get_type(dev); + props->type = ggml_backend_qnn_device_get_type(dev); ggml_backend_qnn_device_get_memory(dev, &props->memory_free, &props->memory_total); props->caps = { /* async */ false, @@ -311,12 +300,12 @@ void ggml_backend_qnn_device_get_props(ggml_backend_dev_t dev, ggml_backend_dev_ } ggml_guid_t ggml_backend_qnn_guid() { - static ggml_guid guid = {0x1a, 0x2b, 0x3c, 0x4d, 0x5e, 0x6f, 0x70, 0x81, - 0x92, 0xa3, 0xb4, 0xc5, 0xd6, 0xe7, 0xf8, 0x09}; + static ggml_guid guid = { 0x1a, 0x2b, 0x3c, 0x4d, 0x5e, 0x6f, 0x70, 0x81, + 0x92, 0xa3, 0xb4, 0xc5, 0xd6, 0xe7, 0xf8, 0x09 }; return &guid; } -ggml_backend_t ggml_backend_qnn_init_with_device_context(ggml_backend_dev_t dev, const char *extend_lib_search_path) { +ggml_backend_t ggml_backend_qnn_init_with_device_context(ggml_backend_dev_t dev, const char * extend_lib_search_path) { if (!extend_lib_search_path) { extend_lib_search_path = GGML_QNN_DEFAULT_LIB_SEARCH_PATH; QNN_LOG_WARN( @@ -324,27 +313,27 @@ ggml_backend_t ggml_backend_qnn_init_with_device_context(ggml_backend_dev_t dev, "use " GGML_QNN_DEFAULT_LIB_SEARCH_PATH " as default"); } - auto *dev_ctx = get_device_context(dev); - const auto device = dev_ctx->device; - QNN_LOG_DEBUG("device %s", qnn::get_backend_name(device)); - QNN_LOG_DEBUG("extend_lib_search_path %s", extend_lib_search_path); + auto * dev_ctx = get_device_context(dev); + const auto device = dev_ctx->device; + QNN_LOG_DEBUG("device %s\n", qnn::get_backend_name(device)); + QNN_LOG_DEBUG("extend_lib_search_path %s\n", extend_lib_search_path); auto instance = std::make_shared(extend_lib_search_path, dev_ctx->lib_name); - auto result = instance->qnn_init(nullptr); + auto result = instance->qnn_init(nullptr); if (result != 0) { - QNN_LOG_WARN("failed to init qnn backend %s", qnn::get_backend_name(device)); + QNN_LOG_WARN("failed to init qnn backend %s\n", qnn::get_backend_name(device)); return nullptr; } auto qnn_interface = instance->get_qnn_interface(); if (!qnn_interface) { - QNN_LOG_WARN("qnn subsystem failure"); + QNN_LOG_WARN("qnn subsystem failure\n"); return nullptr; } std::string device_name = qnn::get_backend_name(device); - QNN_LOG_INFO("qnn device name %s", device_name.c_str()); - dev_ctx->instance = instance; - dev_ctx->qnn_interface = qnn_interface; - dev_ctx->socinfo = instance->get_soc_info(); + QNN_LOG_INFO("qnn device name %s\n", device_name.c_str()); + dev_ctx->instance = instance; + dev_ctx->qnn_interface = qnn_interface; + dev_ctx->socinfo = instance->get_soc_info(); dev_ctx->supported_types = kDeviceCaps[device].supported_types; ggml_backend_t qnn_backend = new ggml_backend{ @@ -357,7 +346,7 @@ ggml_backend_t ggml_backend_qnn_init_with_device_context(ggml_backend_dev_t dev, return qnn_backend; } -ggml_backend_t ggml_backend_qnn_device_init(ggml_backend_dev_t dev, const char *params) { +ggml_backend_t ggml_backend_qnn_device_init(ggml_backend_dev_t dev, const char * params) { return ggml_backend_qnn_init_with_device_context(dev, params); } @@ -365,7 +354,7 @@ ggml_backend_buffer_type_t ggml_backend_qnn_device_get_buffer_type(ggml_backend_ return ggml_backend_qnn_buffer_type(dev); } -ggml_backend_buffer_t ggml_backend_qnn_device_buffer_from_ptr(ggml_backend_dev_t dev, void *ptr, size_t size, +ggml_backend_buffer_t ggml_backend_qnn_device_buffer_from_ptr(ggml_backend_dev_t dev, void * ptr, size_t size, size_t max_tensor_size) { // TODO GGML_UNUSED(dev); @@ -373,9 +362,9 @@ ggml_backend_buffer_t ggml_backend_qnn_device_buffer_from_ptr(ggml_backend_dev_t return ggml_backend_cpu_buffer_from_ptr(ptr, size); } -bool ggml_backend_qnn_device_supports_op(ggml_backend_dev_t dev, const ggml_tensor *op) { +bool ggml_backend_qnn_device_supports_op(ggml_backend_dev_t dev, const ggml_tensor * op) { // Note that this function could be called before the device context is initialized - auto *device_ctx = get_device_context(dev); + auto * device_ctx = get_device_context(dev); return qnn::device_supports_op(device_ctx, op); } @@ -384,13 +373,13 @@ bool ggml_backend_qnn_device_supports_buft(ggml_backend_dev_t dev, ggml_backend_ return ggml_backend_buft_is_host(buft); } -bool ggml_backend_qnn_device_offload_op(ggml_backend_dev_t dev, const ggml_tensor *op) { +bool ggml_backend_qnn_device_offload_op(ggml_backend_dev_t dev, const ggml_tensor * op) { #ifdef NDEBUG GGML_UNUSED(dev); GGML_UNUSED(op); #else - auto *device_ctx = get_device_context(dev); - QNN_LOG_DEBUG("[%s][%s]offload op", qnn::get_backend_name(device_ctx->device), ggml_op_name(op->op)); + auto * device_ctx = get_device_context(dev); + QNN_LOG_DEBUG("[%s][%s]offload op\n", qnn::get_backend_name(device_ctx->device), ggml_op_name(op->op)); #endif return false; } @@ -421,15 +410,15 @@ constexpr const ggml_backend_device_i ggml_backend_qnn_device_interface = { struct ggml_backend_qnn_reg_impl : ggml_backend_reg { std::vector> device_contexts; - std::vector devices; + std::vector devices; explicit ggml_backend_qnn_reg_impl(ggml_backend_reg_i interface) { context = this; - iface = interface; + iface = interface; - QNN_LOG_DEBUG("qnn backend registry init"); + QNN_LOG_DEBUG("qnn backend registry init\n"); for (size_t i = 0; i < QNN_BACKEND_COUNT; i++) { - const auto device_enum = (QNNBackend)(QNN_BACKEND_COUNT - 1 - i); // init from the last device, i.e. NPU + const auto device_enum = (QNNBackend) (QNN_BACKEND_COUNT - 1 - i); // init from the last device, i.e. NPU #ifndef GGML_QNN_ENABLE_CPU_BACKEND if (device_enum == QNN_BACKEND_CPU) { /* @@ -441,7 +430,7 @@ struct ggml_backend_qnn_reg_impl : ggml_backend_reg { #endif device_contexts.emplace_back(std::make_unique( - /* .device = */ device_enum, // init from the last device, i.e. NPU + /* .device = */ device_enum, // init from the last device, i.e. NPU /* .threads = */ 1, /* .name = */ qnn::get_backend_name(device_enum), /* .lib_name = */ kDeviceCaps[device_enum].lib_name, @@ -456,18 +445,18 @@ struct ggml_backend_qnn_reg_impl : ggml_backend_reg { } }; -const char *ggml_backend_qnn_reg_get_name(ggml_backend_reg_t reg) { +const char * ggml_backend_qnn_reg_get_name(ggml_backend_reg_t reg) { GGML_UNUSED(reg); return GGML_QNN_NAME; } size_t ggml_backend_qnn_reg_get_device_count(ggml_backend_reg_t reg) { - auto *ctx = (ggml_backend_qnn_reg_impl *)reg->context; + auto * ctx = (ggml_backend_qnn_reg_impl *) reg->context; return ctx->devices.size(); } ggml_backend_dev_t ggml_backend_qnn_reg_get_device(ggml_backend_reg_t reg, size_t index) { - auto *ctx = (ggml_backend_qnn_reg_impl *)reg->context; + auto * ctx = (ggml_backend_qnn_reg_impl *) reg->context; GGML_ASSERT(index < ctx->devices.size()); return &(ctx->devices[index]); } @@ -479,11 +468,13 @@ const ggml_backend_reg_i ggml_backend_qnn_reg_interface = { /* .get_proc_address = */ nullptr, }; -} // namespace +} // namespace -bool ggml_backend_is_qnn(ggml_backend_t backend) { return ggml_guid_matches(backend->guid, ggml_backend_qnn_guid()); } +bool ggml_backend_is_qnn(ggml_backend_t backend) { + return ggml_guid_matches(backend->guid, ggml_backend_qnn_guid()); +} ggml_backend_reg_t ggml_backend_qnn_reg() { - static ggml_backend_qnn_reg_impl reg{ggml_backend_qnn_reg_interface}; + static ggml_backend_qnn_reg_impl reg{ ggml_backend_qnn_reg_interface }; return ® } diff --git a/ggml/src/ggml-qnn/graph.cpp b/ggml/src/ggml-qnn/graph.cpp index 25ce5b8fb2..b3ab161e9f 100644 --- a/ggml/src/ggml-qnn/graph.cpp +++ b/ggml/src/ggml-qnn/graph.cpp @@ -5,7 +5,6 @@ #include #include "ggml-impl.h" - #include "logger.hpp" #include "op-config.hpp" #include "tensor.hpp" @@ -13,9 +12,9 @@ namespace { using qnn_tensor_cache_t = std::unordered_map; -int get_op_max_rank(const ggml_tensor *op) { - int max_rank = ggml_n_dims(op); - const int count = (int)qnn::get_qnn_op_input_param_count(op); +int get_op_max_rank(const ggml_tensor * op) { + int max_rank = ggml_n_dims(op); + const int count = (int) qnn::get_qnn_op_input_param_count(op); for (int i = 0; i < count; ++i) { max_rank = std::max(max_rank, ggml_n_dims(op->src[i])); } @@ -23,10 +22,10 @@ int get_op_max_rank(const ggml_tensor *op) { return max_rank; } -qnn::qnn_tensor_ptr_t create_tensor_with_cache(ggml_tensor *tensor, qnn::ggml_qnn_tensor::tensor_type_t type, int rank, +qnn::qnn_tensor_ptr_t create_tensor_with_cache(ggml_tensor * tensor, qnn::ggml_qnn_tensor::tensor_type_t type, int rank, QNNBackend device, Qnn_GraphHandle_t graph_handle, std::shared_ptr qnn_instance, - qnn_tensor_cache_t &tensor_cache) { + qnn_tensor_cache_t & tensor_cache) { GGML_ASSERT(tensor); if (tensor_cache.count(tensor)) { return tensor_cache[tensor]; @@ -38,13 +37,13 @@ qnn::qnn_tensor_ptr_t create_tensor_with_cache(ggml_tensor *tensor, qnn::ggml_qn return qnn_tensor; } -qnn::qnn_tensor_array_t create_tensors_with_cache(const qnn::ggml_tensor_array_t &ggml_tensors, +qnn::qnn_tensor_array_t create_tensors_with_cache(const qnn::ggml_tensor_array_t & ggml_tensors, qnn::ggml_qnn_tensor::tensor_type_t type, int rank, QNNBackend device, - Qnn_GraphHandle_t graph_handle, + Qnn_GraphHandle_t graph_handle, std::shared_ptr qnn_instance, - qnn_tensor_cache_t &tensor_cache) { + qnn_tensor_cache_t & tensor_cache) { qnn::qnn_tensor_array_t tensors; - for (auto *tensor : ggml_tensors) { + for (auto * tensor : ggml_tensors) { tensors.push_back( create_tensor_with_cache(tensor, type, rank, device, graph_handle, qnn_instance, tensor_cache)); } @@ -52,10 +51,10 @@ qnn::qnn_tensor_array_t create_tensors_with_cache(const qnn::ggml_tensor_array_t return tensors; } -qnn::qnn_op_config_ptr_t create_operation_from_op_tensor(ggml_tensor *dst, const std::string &name, int rank, +qnn::qnn_op_config_ptr_t create_operation_from_op_tensor(ggml_tensor * dst, const std::string & name, int rank, QNNBackend device, Qnn_GraphHandle_t graph_handle, std::shared_ptr qnn_instance, - bool is_intermediate, qnn_tensor_cache_t &tensor_cache) { + bool is_intermediate, qnn_tensor_cache_t & tensor_cache) { auto operation = qnn::create_op(dst, name, qnn_instance); // input tensors @@ -71,22 +70,22 @@ qnn::qnn_op_config_ptr_t create_operation_from_op_tensor(ggml_tensor *dst, const // output tensor tensor_type = is_intermediate ? qnn::ggml_qnn_tensor::INTERMEDIATE : qnn::ggml_qnn_tensor::OUTPUT; qnn::qnn_tensor_array_t output_qnn_tensors = - create_tensors_with_cache({dst}, tensor_type, rank, device, graph_handle, qnn_instance, tensor_cache); + create_tensors_with_cache({ dst }, tensor_type, rank, device, graph_handle, qnn_instance, tensor_cache); operation->set_output_tensors(output_qnn_tensors); // initialize operation if (!operation->initialize_op_nodes(device, graph_handle)) { - QNN_LOG_ERROR("[%s][%s]initialize_op_nodes failed", qnn::get_backend_name(device), name.c_str()); + QNN_LOG_ERROR("[%s][%s]initialize_op_nodes failed\n", qnn::get_backend_name(device), name.c_str()); return nullptr; } return operation; } -bool bind_src_tensors(ggml_tensor *op, qnn::qnn_tensor_array_t &tensor_wrappers, - std::vector &qnn_tensors) { +bool bind_src_tensors(ggml_tensor * op, qnn::qnn_tensor_array_t & tensor_wrappers, + std::vector & qnn_tensors) { if (op->op == GGML_OP_NONE) { - QNN_LOG_DEBUG("op %s is not a valid op", ggml_get_name(op)); + QNN_LOG_DEBUG("op %s is not a valid op\n", ggml_get_name(op)); return false; } @@ -94,9 +93,9 @@ bool bind_src_tensors(ggml_tensor *op, qnn::qnn_tensor_array_t &tensor_wrappers, GGML_ASSERT(tensor_wrappers.size() == param_count); qnn_tensors.resize(param_count); for (size_t i = 0; i < param_count; ++i) { - auto *ggml_tensor = op->src[i]; + auto * ggml_tensor = op->src[i]; if (!tensor_wrappers[i]->bind_ggml_tensor(ggml_tensor)) { - QNN_LOG_ERROR("bind tensor %s failed", ggml_get_name(ggml_tensor)); + QNN_LOG_ERROR("bind tensor %s failed\n", ggml_get_name(ggml_tensor)); return false; } @@ -116,22 +115,21 @@ bool bind_src_tensors(ggml_tensor *op, qnn::qnn_tensor_array_t &tensor_wrappers, * * TODO: this algorithm is not perfect and may not work for all cases. It assumes that the tensors are * connected in a way that allows for unambiguous categorization. - * It also assumes that the tensors are connected in a way that allows for unambiguous categorization. */ -int get_io_tensors_from_graph(const ggml_cgraph *cgraph, qnn::ggml_tensor_array_t &inputs, - qnn::ggml_tensor_array_t &outputs) { +int get_io_tensors_from_graph(const ggml_cgraph * cgraph, qnn::ggml_tensor_array_t & inputs, + qnn::ggml_tensor_array_t & outputs) { struct _tensor_connectivity_info { - size_t in_degree = 0; - size_t out_degree = 0; + size_t in_degree = 0; + size_t out_degree = 0; size_t insert_index = 0; }; using ggml_tensor_connectivity_map_t = std::unordered_map; ggml_tensor_connectivity_map_t connectivity_map; - int rank = 0; + int rank = 0; for (int i = 0; i < cgraph->n_nodes; i++) { - ggml_tensor *dst = cgraph->nodes[i]; + ggml_tensor * dst = cgraph->nodes[i]; if (ggml_is_empty(dst)) { continue; } @@ -144,7 +142,7 @@ int get_io_tensors_from_graph(const ggml_cgraph *cgraph, qnn::ggml_tensor_array_ rank = std::max(rank, ggml_n_dims(dst)); if (connectivity_map.count(dst) == 0) { connectivity_map[dst] = { - 1, // in-degree, at least 1 + 1, // in-degree, at least 1 0, connectivity_map.size(), }; @@ -153,13 +151,13 @@ int get_io_tensors_from_graph(const ggml_cgraph *cgraph, qnn::ggml_tensor_array_ } for (size_t i = 0; i < GGML_MAX_DIMS && dst->src[i]; ++i) { - auto *src = dst->src[i]; - rank = std::max(rank, ggml_n_dims(src)); + auto * src = dst->src[i]; + rank = std::max(rank, ggml_n_dims(src)); if (connectivity_map.count(src) == 0) { connectivity_map[src] = { 0, - 1, // out-degree, at least 1 + 1, // out-degree, at least 1 connectivity_map.size(), }; } else { @@ -168,7 +166,7 @@ int get_io_tensors_from_graph(const ggml_cgraph *cgraph, qnn::ggml_tensor_array_ } } - for (const auto &kv : connectivity_map) { + for (const auto & kv : connectivity_map) { if (kv.second.in_degree == 0) { inputs.push_back(kv.first); } @@ -178,126 +176,103 @@ int get_io_tensors_from_graph(const ggml_cgraph *cgraph, qnn::ggml_tensor_array_ } } - std::sort(inputs.begin(), inputs.end(), [&connectivity_map](ggml_tensor *lhs, ggml_tensor *rhs) { + std::sort(inputs.begin(), inputs.end(), [&connectivity_map](ggml_tensor * lhs, ggml_tensor * rhs) { return connectivity_map[lhs].insert_index < connectivity_map[rhs].insert_index; }); - std::sort(outputs.begin(), outputs.end(), [&connectivity_map](ggml_tensor *lhs, ggml_tensor *rhs) { + std::sort(outputs.begin(), outputs.end(), [&connectivity_map](ggml_tensor * lhs, ggml_tensor * rhs) { return connectivity_map[lhs].insert_index < connectivity_map[rhs].insert_index; }); return rank; } -} // namespace +} // namespace namespace qnn { -qnn_graph::qnn_graph(const std::string &graph_name, QNNBackend device, std::shared_ptr qnn_instance, - size_t vtcm_size_in_mb) - : _graph_name(graph_name), _device(device), _qnn_instance(qnn_instance) { - QNN_LOG_DEBUG("[%s][%s]created", get_backend_name(device), graph_name.c_str()); +qnn_graph::qnn_graph(const std::string & graph_name, QNNBackend device, std::shared_ptr qnn_instance, + size_t vtcm_size_in_mb) : + _graph_name(graph_name), + _device(device), + _qnn_instance(qnn_instance) { + QNN_LOG_DEBUG("[%s][%s]created\n", get_backend_name(device), graph_name.c_str()); - auto qnn_interface = qnn_instance->get_qnn_interface(); - auto qnn_context = qnn_instance->get_qnn_context_handle(); - Qnn_ErrorHandle_t error = QNN_SUCCESS; - Qnn_GraphHandle_t graph_handle = nullptr; + auto qnn_interface = qnn_instance->get_qnn_interface(); + auto qnn_context = qnn_instance->get_qnn_context_handle(); + Qnn_ErrorHandle_t error = QNN_SUCCESS; + Qnn_GraphHandle_t graph_handle = nullptr; if (device == QNN_BACKEND_NPU) { // TODO: fix graph config here for NPU QnnHtpGraph_CustomConfig_t hvx_config; - hvx_config.option = QNN_HTP_GRAPH_CONFIG_OPTION_NUM_HVX_THREADS; + hvx_config.option = QNN_HTP_GRAPH_CONFIG_OPTION_NUM_HVX_THREADS; hvx_config.numHvxThreads = 8; QnnGraph_Config_t graph_hvx_config; - graph_hvx_config.option = QNN_GRAPH_CONFIG_OPTION_CUSTOM; + graph_hvx_config.option = QNN_GRAPH_CONFIG_OPTION_CUSTOM; graph_hvx_config.customConfig = &hvx_config; QnnHtpGraph_CustomConfig_t dlbc_config; - dlbc_config.option = QNN_HTP_GRAPH_CONFIG_OPTION_OPTIMIZATION; - dlbc_config.optimizationOption.type = QNN_HTP_GRAPH_OPTIMIZATION_TYPE_ENABLE_DLBC; - dlbc_config.optimizationOption.floatValue = 1.0; // set to 0.0 to turn off DLBC + dlbc_config.option = QNN_HTP_GRAPH_CONFIG_OPTION_OPTIMIZATION; + dlbc_config.optimizationOption.type = QNN_HTP_GRAPH_OPTIMIZATION_TYPE_ENABLE_DLBC; + dlbc_config.optimizationOption.floatValue = 1.0; // set to 0.0 to turn off DLBC QnnGraph_Config_t graph_dlbc_config; - graph_dlbc_config.option = QNN_GRAPH_CONFIG_OPTION_CUSTOM; + graph_dlbc_config.option = QNN_GRAPH_CONFIG_OPTION_CUSTOM; graph_dlbc_config.customConfig = &dlbc_config; QnnHtpGraph_CustomConfig_t opt_config; - opt_config.optimizationOption.type = QNN_HTP_GRAPH_OPTIMIZATION_TYPE_FINALIZE_OPTIMIZATION_FLAG; - opt_config.optimizationOption.floatValue = 1; // 1 / 3 + opt_config.optimizationOption.type = QNN_HTP_GRAPH_OPTIMIZATION_TYPE_FINALIZE_OPTIMIZATION_FLAG; + opt_config.optimizationOption.floatValue = 1; // 1 / 3 QnnGraph_Config_t graph_opt_config; - graph_opt_config.option = QNN_GRAPH_CONFIG_OPTION_CUSTOM; + graph_opt_config.option = QNN_GRAPH_CONFIG_OPTION_CUSTOM; graph_opt_config.customConfig = &opt_config; QnnHtpGraph_CustomConfig_t vtcm_config; - vtcm_config.option = QNN_HTP_GRAPH_CONFIG_OPTION_VTCM_SIZE; - vtcm_config.vtcmSizeInMB = (uint32_t)vtcm_size_in_mb; + vtcm_config.option = QNN_HTP_GRAPH_CONFIG_OPTION_VTCM_SIZE; + vtcm_config.vtcmSizeInMB = (uint32_t) vtcm_size_in_mb; QnnGraph_Config_t graph_vtcm_config; - graph_vtcm_config.option = QNN_GRAPH_CONFIG_OPTION_CUSTOM; + graph_vtcm_config.option = QNN_GRAPH_CONFIG_OPTION_CUSTOM; graph_vtcm_config.customConfig = &vtcm_config; - const QnnGraph_Config_t *graph_configs[] = {&graph_hvx_config, &graph_dlbc_config, &graph_vtcm_config, - &graph_opt_config, nullptr}; + const QnnGraph_Config_t * graph_configs[] = { &graph_hvx_config, &graph_dlbc_config, &graph_vtcm_config, + &graph_opt_config, nullptr }; error = qnn_interface->qnn_graph_create(qnn_context, graph_name.c_str(), graph_configs, &graph_handle); } else { error = qnn_interface->qnn_graph_create(qnn_context, graph_name.c_str(), nullptr, &graph_handle); } if (error != QNN_SUCCESS) { - QNN_LOG_ERROR("[%s][%s]failed to create qnn graph, error: %s", get_backend_name(device), graph_name.c_str(), + QNN_LOG_ERROR("[%s][%s]failed to create qnn graph, error: %s\n", get_backend_name(device), graph_name.c_str(), get_qnn_error_string(error)); return; } - QNN_LOG_INFO("[%s][%s]create succeed", get_backend_name(device), graph_name.c_str()); - _graph_handle = graph_handle; + QNN_LOG_INFO("[%s][%s]create succeed\n", get_backend_name(device), graph_name.c_str()); + _graph_handle = graph_handle; _qnn_interface = qnn_interface; } -qnn_graph::~qnn_graph() { QNN_LOG_DEBUG("[%s][%s]destroy", get_backend_name(_device), _graph_name.c_str()); } - -bool qnn_graph::build_graph_from_op(ggml_tensor *op) { - if (!is_valid()) { - QNN_LOG_ERROR("Invalid graph"); - return false; - } - - QNN_LOG_DEBUG("[%s][%s]build start", get_backend_name(_device), _graph_name.c_str()); - qnn_tensor_cache_t tensor_cache; - const auto rank = get_op_max_rank(op); - auto operation = create_operation_from_op_tensor(op, _graph_name, rank, _device, _graph_handle, _qnn_instance, - false, tensor_cache); - if (!operation) { - QNN_LOG_ERROR("[%s][%s]create_operation_from_op_tensor failed", get_backend_name(_device), _graph_name.c_str()); - return false; - } - - _tensor_inputs = operation->get_input_tensors(); - _tensor_outputs = operation->get_output_tensors(); - _operations.push_back(std::move(operation)); - if (!finalize()) { - return false; - } - - QNN_LOG_DEBUG("[%s][%s]build succeed", get_backend_name(_device), _graph_name.c_str()); - return true; +qnn_graph::~qnn_graph() { + QNN_LOG_DEBUG("[%s][%s]destroy\n", get_backend_name(_device), _graph_name.c_str()); } -bool qnn_graph::build_graph_from_ggml_graph(const ggml_cgraph *cgraph) { - QNN_LOG_DEBUG("[%s][%s]build start", get_backend_name(_device), _graph_name.c_str()); +bool qnn_graph::build_graph_from_ggml_graph(const ggml_cgraph * cgraph) { + QNN_LOG_DEBUG("[%s][%s]build start\n", get_backend_name(_device), _graph_name.c_str()); ggml_tensor_array_t inputs; ggml_tensor_array_t outputs; - int rank = get_io_tensors_from_graph(cgraph, inputs, outputs); - QNN_LOG_DEBUG("[%s]rank: %d, input_set: %d, output_set: %d", get_backend_name(_device), rank, int(inputs.size()), + int rank = get_io_tensors_from_graph(cgraph, inputs, outputs); + QNN_LOG_DEBUG("[%s]rank: %d, input_set: %d, output_set: %d\n", get_backend_name(_device), rank, int(inputs.size()), int(outputs.size())); { qnn_tensor_cache_t tensor_cache; - auto input_tensors = create_tensors_with_cache(inputs, ggml_qnn_tensor::INPUT, rank, _device, _graph_handle, - _qnn_instance, tensor_cache); + auto input_tensors = create_tensors_with_cache(inputs, ggml_qnn_tensor::INPUT, rank, _device, _graph_handle, + _qnn_instance, tensor_cache); auto output_tensors = create_tensors_with_cache(outputs, ggml_qnn_tensor::OUTPUT, rank, _device, _graph_handle, _qnn_instance, tensor_cache); qnn_op_config_array_t operations; for (int i = 0; i < cgraph->n_nodes; i++) { - ggml_tensor *dst = cgraph->nodes[i]; + ggml_tensor * dst = cgraph->nodes[i]; if (ggml_is_empty(dst)) { continue; } @@ -307,83 +282,49 @@ bool qnn_graph::build_graph_from_ggml_graph(const ggml_cgraph *cgraph) { continue; } - QNN_LOG_DEBUG("[%s]create op: %s", get_backend_name(_device), get_qnn_op_name(dst)); + QNN_LOG_DEBUG("[%s]create op: %s\n", get_backend_name(_device), get_qnn_op_name(dst)); auto operation = create_operation_from_op_tensor(dst, dst->name, rank, _device, _graph_handle, - _qnn_instance, true, tensor_cache); // TODO: fix op name + _qnn_instance, true, tensor_cache); // TODO: fix op name operations.push_back(operation); } - _tensor_inputs = std::move(input_tensors); + _tensor_inputs = std::move(input_tensors); _tensor_outputs = std::move(output_tensors); - _operations = std::move(operations); + _operations = std::move(operations); if (!finalize()) { return false; } } - QNN_LOG_DEBUG("[%s][%s]build succeed", get_backend_name(_device), _graph_name.c_str()); + QNN_LOG_DEBUG("[%s][%s]build succeed\n", get_backend_name(_device), _graph_name.c_str()); return true; } -bool qnn_graph::execute(ggml_tensor *op) { - if (!bind_src_tensors(op, _tensor_inputs, _qnn_tensor_inputs)) { - QNN_LOG_ERROR("[%s][%s]bind input tensors failed", get_backend_name(_device), _graph_name.c_str()); - return false; - } - - if (!qnn::bind_tensors({op}, _tensor_outputs, _qnn_tensor_outputs)) { - QNN_LOG_ERROR("[%s][%s]bind output tensors failed", get_backend_name(_device), _graph_name.c_str()); - return false; - } - - auto &qnn_tensor_inputs = _qnn_tensor_inputs; - auto &qnn_tensor_outputs = _qnn_tensor_outputs; - auto error = - _qnn_interface->qnn_graph_execute(_graph_handle, qnn_tensor_inputs.data(), qnn_tensor_inputs.size(), - qnn_tensor_outputs.data(), qnn_tensor_outputs.size(), nullptr, nullptr); - unbind_tensors(_tensor_inputs); - unbind_tensors(_tensor_outputs); - - if (error != QNN_SUCCESS) { - if (_device == QNN_BACKEND_NPU && error == QNN_COMMON_ERROR_SYSTEM_COMMUNICATION) { - QNN_LOG_WARN("[%s][%s]NPU crashed. SSR detected. Caused QNN graph execute error.", - get_backend_name(_device), _graph_name.c_str()); - } else { - QNN_LOG_ERROR("[%s][%s]error: %s", get_backend_name(_device), _graph_name.c_str(), - get_qnn_error_string(error)); - } - return false; - } - - QNN_LOG_DEBUG("[%s][%s]execute succeed", get_backend_name(_device), _graph_name.c_str()); - return true; -} - -bool qnn_graph::execute(const ggml_cgraph *cgraph) { +bool qnn_graph::execute(const ggml_cgraph * cgraph) { ggml_tensor_array_t inputs; ggml_tensor_array_t outputs; #ifdef NDEBUG get_io_tensors_from_graph(cgraph, inputs, outputs); #else int rank = get_io_tensors_from_graph(cgraph, inputs, outputs); - QNN_LOG_DEBUG("[%s]rank: %d, input_set: %d, output_set: %d", get_backend_name(_device), rank, int(inputs.size()), + QNN_LOG_DEBUG("[%s]rank: %d, input_set: %d, output_set: %d\n", get_backend_name(_device), rank, int(inputs.size()), int(outputs.size())); #endif { if (!qnn::bind_tensors(inputs, _tensor_inputs, _qnn_tensor_inputs)) { - QNN_LOG_ERROR("[%s][%s]bind input tensors failed", get_backend_name(_device), _graph_name.c_str()); + QNN_LOG_ERROR("[%s][%s]bind input tensors failed\n", get_backend_name(_device), _graph_name.c_str()); return false; } if (!qnn::bind_tensors(outputs, _tensor_outputs, _qnn_tensor_outputs)) { - QNN_LOG_ERROR("[%s][%s]bind output tensors failed", get_backend_name(_device), _graph_name.c_str()); + QNN_LOG_ERROR("[%s][%s]bind output tensors failed\n", get_backend_name(_device), _graph_name.c_str()); return false; } - auto &qnn_tensor_inputs = _qnn_tensor_inputs; - auto &qnn_tensor_outputs = _qnn_tensor_outputs; - auto error = + auto & qnn_tensor_inputs = _qnn_tensor_inputs; + auto & qnn_tensor_outputs = _qnn_tensor_outputs; + auto error = _qnn_interface->qnn_graph_execute(_graph_handle, qnn_tensor_inputs.data(), qnn_tensor_inputs.size(), qnn_tensor_outputs.data(), qnn_tensor_outputs.size(), nullptr, nullptr); unbind_tensors(_tensor_inputs); @@ -391,35 +332,35 @@ bool qnn_graph::execute(const ggml_cgraph *cgraph) { if (error != QNN_SUCCESS) { if (_device == QNN_BACKEND_NPU && error == QNN_COMMON_ERROR_SYSTEM_COMMUNICATION) { - QNN_LOG_WARN("[%s][%s]NPU crashed. SSR detected. Caused QNN graph execute error.", + QNN_LOG_WARN("[%s][%s]NPU crashed. SSR detected. Caused QNN graph execute error.\n", get_backend_name(_device), _graph_name.c_str()); } else { - QNN_LOG_ERROR("[%s][%s]error: %s", get_backend_name(_device), _graph_name.c_str(), + QNN_LOG_ERROR("[%s][%s]error: %s\n", get_backend_name(_device), _graph_name.c_str(), get_qnn_error_string(error)); } return false; } - QNN_LOG_DEBUG("[%s][%s]execute succeed", get_backend_name(_device), _graph_name.c_str()); + QNN_LOG_DEBUG("[%s][%s]execute succeed\n", get_backend_name(_device), _graph_name.c_str()); return true; } } bool qnn_graph::finalize() { if (!qnn::add_op_to_graph(_graph_handle, _operations)) { - QNN_LOG_ERROR("[%s]add nodes failed", _graph_name.c_str()); + QNN_LOG_ERROR("[%s]add nodes failed\n", _graph_name.c_str()); return false; } auto error = _qnn_interface->qnn_graph_finalize(_graph_handle, nullptr, nullptr); if (error != QNN_SUCCESS) { - QNN_LOG_ERROR("[%s][%s]qnn_graph_finalize.error: %s", get_backend_name(_device), _graph_name.c_str(), + QNN_LOG_ERROR("[%s][%s]qnn_graph_finalize.error: %s\n", get_backend_name(_device), _graph_name.c_str(), get_qnn_error_string(error)); return false; } - QNN_LOG_DEBUG("[%s][%s]finalize succeed", get_backend_name(_device), _graph_name.c_str()); + QNN_LOG_DEBUG("[%s][%s]finalize succeed\n", get_backend_name(_device), _graph_name.c_str()); return true; } -} // namespace qnn +} // namespace qnn diff --git a/ggml/src/ggml-qnn/graph.hpp b/ggml/src/ggml-qnn/graph.hpp index 521186f790..dc1ed0b3f8 100644 --- a/ggml/src/ggml-qnn/graph.hpp +++ b/ggml/src/ggml-qnn/graph.hpp @@ -6,41 +6,51 @@ #include #include "ggml-qnn.h" - #include "op-config.hpp" #include "qnn-lib.hpp" namespace qnn { +/** + * @class qnn_graph + * @brief Manages a QNN graph, converting a GGML graph to QNN format and handling its execution. + * + * This class is responsible for building a QNN graph from a given GGML graph, + * determining its input/output tensors, finalizing the configuration, and + * executing the graph on the specified backend device. + */ class qnn_graph { -public: - explicit qnn_graph(const std::string &graph_name, QNNBackend device, std::shared_ptr qnn_instance, + public: + explicit qnn_graph(const std::string & graph_name, QNNBackend device, std::shared_ptr qnn_instance, size_t vtcm_size_in_mb); ~qnn_graph(); - bool build_graph_from_op(ggml_tensor *op); - bool build_graph_from_ggml_graph(const ggml_cgraph *cgraph); + bool build_graph_from_ggml_graph(const ggml_cgraph * cgraph); + + bool execute(const ggml_cgraph * cgraph); - bool execute(ggml_tensor *op); - bool execute(const ggml_cgraph *cgraph); bool is_valid() const { return _graph_handle != nullptr; } + Qnn_GraphHandle_t get_graph_handler() const { return _graph_handle; } + std::shared_ptr get_qnn_instance() { return _qnn_instance; } - const std::string &get_name() const { return _graph_name; } + + const std::string & get_name() const { return _graph_name; } + QNNBackend get_device() const { return _device; } -private: + private: bool finalize(); - const std::string _graph_name; - const QNNBackend _device; - Qnn_GraphHandle_t _graph_handle = nullptr; - std::shared_ptr _qnn_instance; + const std::string _graph_name; + const QNNBackend _device; + Qnn_GraphHandle_t _graph_handle = nullptr; + std::shared_ptr _qnn_instance; std::shared_ptr _qnn_interface; - qnn_op_config_array_t _operations; + qnn_op_config_array_t _operations; - qnn_tensor_array_t _tensor_inputs; - qnn_tensor_array_t _tensor_outputs; + qnn_tensor_array_t _tensor_inputs; + qnn_tensor_array_t _tensor_outputs; std::vector _qnn_tensor_inputs; std::vector _qnn_tensor_outputs; @@ -50,4 +60,4 @@ private: using qnn_graph_ptr_t = std::shared_ptr; -} // namespace qnn +} // namespace qnn diff --git a/ggml/src/ggml-qnn/logger.cpp b/ggml/src/ggml-qnn/logger.cpp index 23a3f305c0..5418d03be4 100644 --- a/ggml/src/ggml-qnn/logger.cpp +++ b/ggml/src/ggml-qnn/logger.cpp @@ -1,70 +1,45 @@ #include "logger.hpp" -#include -#include +#ifndef NDEBUG -#if defined(__ANDROID__) || defined(ANDROID) -#include -#endif +# include -void qnn::internal_log(ggml_log_level level, const char * /*file*/, const char *func, int line, const char *format, - ...) { - static std::mutex qnn_internal_log_mutex; - static char s_qnn_internal_log_buf[QNN_LOGBUF_LEN]; +# include "QnnInterface.h" +# include "QnnTypes.h" +# include "System/QnnSystemInterface.h" - { - std::lock_guard lock(qnn_internal_log_mutex); - va_list args; - - va_start(args, format); - int len_prefix = snprintf(s_qnn_internal_log_buf, QNN_LOGBUF_LEN, "[%s, %d]: ", func, line); - int len = vsnprintf(s_qnn_internal_log_buf + len_prefix, QNN_LOGBUF_LEN - len_prefix, format, args); - if (len < (QNN_LOGBUF_LEN - len_prefix)) { -#if defined(__ANDROID__) || defined(ANDROID) - // print to android logcat - __android_log_print(level, "ggml-qnn", "%s\n", s_qnn_internal_log_buf); -#else - (void)level; -#endif - // print to stdout - printf("%s\n", s_qnn_internal_log_buf); - } - va_end(args); - } -} - -#if ENABLE_QNNSDK_LOG -void qnn::sdk_logcallback(const char *fmt, QnnLog_Level_t level, uint64_t /*timestamp*/, va_list argp) { +void qnn::sdk_logcallback(const char * fmt, QnnLog_Level_t level, uint64_t /*timestamp*/, va_list argp) { static std::mutex log_mutex; - static char s_ggml_qnn_logbuf[QNN_LOGBUF_LEN]; + static char s_ggml_qnn_logbuf[4096]; - const char *log_level_desc = ""; + char log_level_desc = 'U'; switch (level) { case QNN_LOG_LEVEL_ERROR: - log_level_desc = "ERROR"; + log_level_desc = 'E'; break; case QNN_LOG_LEVEL_WARN: - log_level_desc = "WARNING"; + log_level_desc = 'W'; break; case QNN_LOG_LEVEL_INFO: - log_level_desc = "INFO"; + log_level_desc = 'I'; break; case QNN_LOG_LEVEL_DEBUG: - log_level_desc = "DEBUG"; + log_level_desc = 'D'; break; case QNN_LOG_LEVEL_VERBOSE: - log_level_desc = "VERBOSE"; - break; - case QNN_LOG_LEVEL_MAX: - log_level_desc = "UNKNOWN"; + log_level_desc = 'V'; break; } { std::lock_guard lock(log_mutex); - vsnprintf(s_ggml_qnn_logbuf, QNN_LOGBUF_LEN, fmt, argp); - QNN_LOG_INFO("[%s]%s", log_level_desc, s_ggml_qnn_logbuf); + int size = vsnprintf(s_ggml_qnn_logbuf, sizeof(s_ggml_qnn_logbuf), fmt, argp); + if (size > 0 && s_ggml_qnn_logbuf[size - 1] != '\n') { + QNN_LOG_INFO("[%c]%s\n", log_level_desc, s_ggml_qnn_logbuf); + } else { + QNN_LOG_INFO("[%c]%s", log_level_desc, s_ggml_qnn_logbuf); + } } } #else diff --git a/ggml/src/ggml-qnn/logger.hpp b/ggml/src/ggml-qnn/logger.hpp index b4bab0c006..cf94ce2217 100644 --- a/ggml/src/ggml-qnn/logger.hpp +++ b/ggml/src/ggml-qnn/logger.hpp @@ -1,43 +1,16 @@ #pragma once -#include +#include +#include "ggml-impl.h" #include "ggml.h" - -#include "QnnCommon.h" -#include "QnnInterface.h" -#include "QnnTypes.h" -#include "System/QnnSystemInterface.h" - -#define QNN_LOGBUF_LEN 4096 +#include "QnnLog.h" namespace qnn { -void internal_log(ggml_log_level level, const char *file, const char *func, int line, const char *format, ...); +void sdk_logcallback(const char * fmt, QnnLog_Level_t level, uint64_t timestamp, va_list argp); +} // namespace qnn -void sdk_logcallback(const char *fmt, QnnLog_Level_t level, uint64_t timestamp, va_list argp); -} // namespace qnn - -// ================================================================================================= -// -// QNN backend internal log function -// -// ================================================================================================= -#define QNN_LOG_ERROR(...) qnn::internal_log(GGML_LOG_LEVEL_DEBUG, __FILE__, __FUNCTION__, __LINE__, __VA_ARGS__) - -#define QNN_LOG_WARN(...) qnn::internal_log(GGML_LOG_LEVEL_DEBUG, __FILE__, __FUNCTION__, __LINE__, __VA_ARGS__) - -#define QNN_LOG_INFO(...) qnn::internal_log(GGML_LOG_LEVEL_DEBUG, __FILE__, __FUNCTION__, __LINE__, __VA_ARGS__) - -#ifdef NDEBUG -#define ENABLE_QNNBACKEND_DEBUG 0 // for troubleshooting QNN backend -#define ENABLE_QNNSDK_LOG 0 // enable/disable QNN SDK's internal log -#else -#define ENABLE_QNNBACKEND_DEBUG 1 // for troubleshooting QNN backend -#define ENABLE_QNNSDK_LOG 1 // enable/disable QNN SDK's internal log -#endif - -#if ENABLE_QNNBACKEND_DEBUG -#define QNN_LOG_DEBUG(...) qnn::internal_log(GGML_LOG_LEVEL_DEBUG, __FILE__, __FUNCTION__, __LINE__, __VA_ARGS__) -#else -#define QNN_LOG_DEBUG(...) -#endif +#define QNN_LOG_ERROR(...) (GGML_LOG_ERROR(__VA_ARGS__)) +#define QNN_LOG_WARN(...) (GGML_LOG_WARN(__VA_ARGS__)) +#define QNN_LOG_INFO(...) (GGML_LOG_INFO(__VA_ARGS__)) +#define QNN_LOG_DEBUG(...) (GGML_LOG_DEBUG(__VA_ARGS__)) diff --git a/ggml/src/ggml-qnn/op-config-base.hpp b/ggml/src/ggml-qnn/op-config-base.hpp index 274bb8318f..b24b53bf2a 100644 --- a/ggml/src/ggml-qnn/op-config-base.hpp +++ b/ggml/src/ggml-qnn/op-config-base.hpp @@ -4,7 +4,6 @@ #include #include "ggml-qnn.h" - #include "qnn-types.hpp" #include "tensor.hpp" @@ -18,7 +17,7 @@ namespace qnn { * adding operations to a graph, and binding/unbinding input and output tensors. */ class ggml_qnn_op_config { -public: + public: virtual ~ggml_qnn_op_config() {} /** @@ -32,8 +31,8 @@ public: * * @param tensor_inputs A reference to a vector of qnn_tensor_ptr_t objects representing the input tensors. */ - virtual void set_input_tensors(qnn::qnn_tensor_array_t &tensor_inputs) = 0; - virtual void set_input_tensors(qnn::qnn_tensor_array_t &&tensor_inputs) = 0; + virtual void set_input_tensors(qnn::qnn_tensor_array_t & tensor_inputs) = 0; + virtual void set_input_tensors(qnn::qnn_tensor_array_t && tensor_inputs) = 0; /** * @brief Sets custom output tensors for the operation. This method should be called before `initialize_op_nodes`. @@ -46,8 +45,8 @@ public: * * @param tensor_outputs A reference to a vector of qnn_tensor_ptr_t objects representing the output tensors. */ - virtual void set_output_tensors(qnn::qnn_tensor_array_t &tensor_outputs) = 0; - virtual void set_output_tensors(qnn::qnn_tensor_array_t &&tensor_inputs) = 0; + virtual void set_output_tensors(qnn::qnn_tensor_array_t & tensor_outputs) = 0; + virtual void set_output_tensors(qnn::qnn_tensor_array_t && tensor_inputs) = 0; /** * @brief Creates tensors and internal nodes for constructing the calculation graph. @@ -71,7 +70,7 @@ public: * * @return A reference to a vector of qnn_tensor_ptr_t objects representing the input tensors. */ - virtual const qnn_tensor_array_t &get_input_tensors() = 0; + virtual const qnn_tensor_array_t & get_input_tensors() = 0; /** * @brief Pure virtual function to retrieve the output tensors of a QNN. @@ -82,7 +81,7 @@ public: * * @return A reference to a vector of qnn_tensor_ptr_t objects representing the output tensors. */ - virtual const qnn_tensor_array_t &get_output_tensors() = 0; + virtual const qnn_tensor_array_t & get_output_tensors() = 0; /** * @brief Adds an operation to the given graph. @@ -109,7 +108,7 @@ public: * containing the input tensors. * @return true if the input tensors were successfully bound, false otherwise. */ - virtual bool bind_input_tensors(const ggml_tensor_array_t &tensor_inputs) = 0; + virtual bool bind_input_tensors(const ggml_tensor_array_t & tensor_inputs) = 0; /** * @brief Binds the output tensors to the given tensor array. @@ -123,7 +122,7 @@ public: * represent the output tensors to be bound. * @return true if the binding is successful, false otherwise. */ - virtual bool bind_output_tensors(const ggml_tensor_array_t &tensor_outputs) = 0; + virtual bool bind_output_tensors(const ggml_tensor_array_t & tensor_outputs) = 0; /** * @brief Unbinds the input tensors from the operation. @@ -146,7 +145,7 @@ public: virtual void unbind_output_tensors() = 0; }; -using qnn_op_config_ptr_t = std::shared_ptr; +using qnn_op_config_ptr_t = std::shared_ptr; using qnn_op_config_array_t = std::vector; -} // namespace qnn +} // namespace qnn diff --git a/ggml/src/ggml-qnn/op-config-caps.cpp b/ggml/src/ggml-qnn/op-config-caps.cpp index b250c214a3..16b50503be 100644 --- a/ggml/src/ggml-qnn/op-config-caps.cpp +++ b/ggml/src/ggml-qnn/op-config-caps.cpp @@ -3,73 +3,73 @@ namespace { -using op_constructor_t = std::shared_ptr (*)(const ggml_tensor *, const std::string &, +using op_constructor_t = std::shared_ptr (*)(const ggml_tensor *, const std::string &, std::shared_ptr); -using op_dims_calc_func_t = void (*)(const std::vector &input_dims, - qnn::ggml_dimension_array_t &output_dims); +using op_dims_calc_func_t = void (*)(const std::vector & input_dims, + qnn::ggml_dimension_array_t & output_dims); -void element_wise_op_dims(const std::vector &input_dims, - qnn::ggml_dimension_array_t &output_dims) { +void element_wise_op_dims(const std::vector & input_dims, + qnn::ggml_dimension_array_t & output_dims) { for (size_t i = 1; i < std::size(output_dims); i++) { output_dims[i] = input_dims.front()[i]; } } -void mat_mul_op_dims(const std::vector &input_dims, - qnn::ggml_dimension_array_t &output_dims) { +void mat_mul_op_dims(const std::vector & input_dims, + qnn::ggml_dimension_array_t & output_dims) { GGML_ASSERT(input_dims.size() == 2); output_dims[0] = input_dims.front()[1]; output_dims[1] = input_dims.back()[1]; } struct qnn_op_caps_t { - const char *qnn_op_name = nullptr; - const size_t input_param_count = 0; - op_dims_calc_func_t calc_dims_func = nullptr; - const char *qnn_param_name = nullptr; + const char * qnn_op_name = nullptr; + const size_t input_param_count = 0; + op_dims_calc_func_t calc_dims_func = nullptr; + const char * qnn_param_name = nullptr; }; constexpr const qnn_op_caps_t kOpCaps[] = { {}, // GGML_OP_NONE {}, // GGML_OP_DUP { - // GGML_OP_ADD - QNN_OP_ELEMENT_WISE_ADD, // qnn_op_name - 2, // input_param_count - element_wise_op_dims, // calc_dims_func + // GGML_OP_ADD + QNN_OP_ELEMENT_WISE_ADD, // qnn_op_name + 2, // input_param_count + element_wise_op_dims, // calc_dims_func }, {}, // GGML_OP_ADD1 {}, // GGML_OP_ACC { - // GGML_OP_SUB - QNN_OP_ELEMENT_WISE_SUBTRACT, // qnn_op_name - 2, // input_param_count - element_wise_op_dims, // calc_dims_func + // GGML_OP_SUB + QNN_OP_ELEMENT_WISE_SUBTRACT, // qnn_op_name + 2, // input_param_count + element_wise_op_dims, // calc_dims_func }, { - // GGML_OP_MUL - QNN_OP_ELEMENT_WISE_MULTIPLY, // qnn_op_name - 2, // input_param_count - element_wise_op_dims, // calc_dims_func + // GGML_OP_MUL + QNN_OP_ELEMENT_WISE_MULTIPLY, // qnn_op_name + 2, // input_param_count + element_wise_op_dims, // calc_dims_func }, { - // GGML_OP_DIV - QNN_OP_ELEMENT_WISE_DIVIDE, // qnn_op_name - 2, // input_param_count - element_wise_op_dims, // calc_dims_func + // GGML_OP_DIV + QNN_OP_ELEMENT_WISE_DIVIDE, // qnn_op_name + 2, // input_param_count + element_wise_op_dims, // calc_dims_func }, {}, // GGML_OP_SQR { - // GGML_OP_SQRT - QNN_OP_ELEMENT_WISE_SQUARE_ROOT, // qnn_op_name - 1, // input_param_count - element_wise_op_dims, // calc_dims_func + // GGML_OP_SQRT + QNN_OP_ELEMENT_WISE_SQUARE_ROOT, // qnn_op_name + 1, // input_param_count + element_wise_op_dims, // calc_dims_func }, { - // GGML_OP_LOG - QNN_OP_ELEMENT_WISE_LOG, // qnn_op_name - 1, // input_param_count - element_wise_op_dims, // calc_dims_func + // GGML_OP_LOG + QNN_OP_ELEMENT_WISE_LOG, // qnn_op_name + 1, // input_param_count + element_wise_op_dims, // calc_dims_func }, {}, // GGML_OP_SIN {}, // GGML_OP_COS @@ -84,19 +84,19 @@ constexpr const qnn_op_caps_t kOpCaps[] = { {}, // GGML_OP_SILU_BACK {}, // GGML_OP_NORM { - // GGML_OP_RMS_NORM - QNN_OP_RMS_NORM, // qnn_op_name - 1, // input_param_count - nullptr, // TODO: calc_dims_func - QNN_OP_RMS_NORM_PARAM_EPSILON, // qnn_param_name + // GGML_OP_RMS_NORM + QNN_OP_RMS_NORM, // qnn_op_name + 1, // input_param_count + nullptr, // TODO: calc_dims_func + QNN_OP_RMS_NORM_PARAM_EPSILON, // qnn_param_name }, {}, // GGML_OP_RMS_NORM_BACK {}, // GGML_OP_GROUP_NORM { - // GGML_OP_MUL_MAT - QNN_OP_MAT_MUL, // qnn_op_name - 2, // input_param_count - mat_mul_op_dims, // calc_dims_func + // GGML_OP_MUL_MAT + QNN_OP_MAT_MUL, // qnn_op_name + 2, // input_param_count + mat_mul_op_dims, // calc_dims_func }, {}, // GGML_OP_MUL_MAT_ID {}, // GGML_OP_OUT_PROD @@ -105,10 +105,10 @@ constexpr const qnn_op_caps_t kOpCaps[] = { {}, // GGML_OP_CPY {}, // GGML_OP_CONT { - // GGML_OP_RESHAPE - QNN_OP_RESHAPE, // qnn_op_name - 1, // input_param_count - nullptr, // TODO: calc_dims_func + // GGML_OP_RESHAPE + QNN_OP_RESHAPE, // qnn_op_name + 1, // input_param_count + nullptr, // TODO: calc_dims_func }, {}, // GGML_OP_VIEW {}, // GGML_OP_PERMUTE @@ -177,10 +177,10 @@ constexpr const qnn_op_caps_t kOpCaps[] = { {}, // GGML_UNARY_OP_RELU {}, // GGML_UNARY_OP_SIGMOID { - // GGML_UNARY_OP_GELU - QNN_OP_GELU, // qnn_op_name - 1, // input_param_count - nullptr, // TODO: calc_dims_func + // GGML_UNARY_OP_GELU + QNN_OP_GELU, // qnn_op_name + 1, // input_param_count + nullptr, // TODO: calc_dims_func }, {}, // GGML_UNARY_OP_GELU_QUICK {}, // GGML_UNARY_OP_SILU @@ -201,15 +201,17 @@ static_assert(kOpCaps[GGML_OP_COUNT + GGML_UNARY_OP_GELU].input_param_count == 1 static_assert(std::size(kOpCaps) == (GGML_OP_COUNT + GGML_UNARY_OP_COUNT), "GGML_OP_COUNT does not match the size of the kOpCaps table"); -std::shared_ptr mat_mul_op_constructor(const ggml_tensor *op, const std::string &instance_name, +std::shared_ptr mat_mul_op_constructor(const ggml_tensor * op, + const std::string & instance_name, std::shared_ptr qnn_instance) { GGML_UNUSED(op); - QNN_LOG_DEBUG("create QNN_OP_MAT_MUL, name %s", instance_name.c_str()); + QNN_LOG_DEBUG("create QNN_OP_MAT_MUL, name %s\n", instance_name.c_str()); return std::make_shared(instance_name, qnn_instance); } template -std::shared_ptr generic_op_constructor(const ggml_tensor *op, const std::string &instance_name, +std::shared_ptr generic_op_constructor(const ggml_tensor * op, + const std::string & instance_name, std::shared_ptr qnn_instance) { GGML_UNUSED(op); static_assert(_op < std::size(kOpCaps)); @@ -218,20 +220,20 @@ std::shared_ptr generic_op_constructor(const ggml_tenso kOpCaps[_op].qnn_op_name, qnn_instance); } -void add_type_parameters(std::shared_ptr op, const char *name, float value) { +void add_type_parameters(std::shared_ptr op, const char * name, float value) { Qnn_Scalar_t scalar = QNN_SCALAR_INIT; - scalar.dataType = QNN_DATATYPE_FLOAT_32; - scalar.floatValue = value; + scalar.dataType = QNN_DATATYPE_FLOAT_32; + scalar.floatValue = value; op->add_scalar_param(name, scalar); } template std::shared_ptr op_constructor_with_type_param( - const ggml_tensor *op, const std::string &instance_name, std::shared_ptr qnn_instance) { + const ggml_tensor * op, const std::string & instance_name, std::shared_ptr qnn_instance) { static_assert(std::is_base_of::value); static_assert(_op < std::size(kOpCaps)); - constexpr auto &op_caps = kOpCaps[_op]; + constexpr auto & op_caps = kOpCaps[_op]; static_assert(op_caps.qnn_op_name != nullptr); _ggml_op_param_type op_param; @@ -245,113 +247,113 @@ std::shared_ptr op_constructor_with_type_param( } constexpr const op_constructor_t kOpConstructors[] = { - nullptr, // GGML_OP_NONE - nullptr, // GGML_OP_DUP - generic_op_constructor, // GGML_OP_ADD - nullptr, // GGML_OP_ADD1 - nullptr, // GGML_OP_ACC - generic_op_constructor, // GGML_OP_SUB - generic_op_constructor, // GGML_OP_MUL - generic_op_constructor, // GGML_OP_DIV - nullptr, // GGML_OP_SQR - generic_op_constructor, // GGML_OP_SQRT - generic_op_constructor, // GGML_OP_LOG - nullptr, // GGML_OP_SIN - nullptr, // GGML_OP_COS - nullptr, // GGML_OP_SUM - nullptr, // GGML_OP_SUM_ROWS - nullptr, // GGML_OP_MEAN - nullptr, // GGML_OP_ARGMAX - nullptr, // GGML_OP_COUNT_EQUAL - nullptr, // GGML_OP_REPEAT - nullptr, // GGML_OP_REPEAT_BACK - nullptr, // GGML_OP_CONCAT - nullptr, // GGML_OP_SILU_BACK - nullptr, // GGML_OP_NORM - op_constructor_with_type_param, // GGML_OP_RMS_NORM - nullptr, // GGML_OP_RMS_NORM_BACK - nullptr, // GGML_OP_GROUP_NORM + nullptr, // GGML_OP_NONE + nullptr, // GGML_OP_DUP + generic_op_constructor, // GGML_OP_ADD + nullptr, // GGML_OP_ADD1 + nullptr, // GGML_OP_ACC + generic_op_constructor, // GGML_OP_SUB + generic_op_constructor, // GGML_OP_MUL + generic_op_constructor, // GGML_OP_DIV + nullptr, // GGML_OP_SQR + generic_op_constructor, // GGML_OP_SQRT + generic_op_constructor, // GGML_OP_LOG + nullptr, // GGML_OP_SIN + nullptr, // GGML_OP_COS + nullptr, // GGML_OP_SUM + nullptr, // GGML_OP_SUM_ROWS + nullptr, // GGML_OP_MEAN + nullptr, // GGML_OP_ARGMAX + nullptr, // GGML_OP_COUNT_EQUAL + nullptr, // GGML_OP_REPEAT + nullptr, // GGML_OP_REPEAT_BACK + nullptr, // GGML_OP_CONCAT + nullptr, // GGML_OP_SILU_BACK + nullptr, // GGML_OP_NORM + op_constructor_with_type_param, // GGML_OP_RMS_NORM + nullptr, // GGML_OP_RMS_NORM_BACK + nullptr, // GGML_OP_GROUP_NORM - mat_mul_op_constructor, // GGML_OP_MUL_MAT - nullptr, // GGML_OP_MUL_MAT_ID - nullptr, // GGML_OP_OUT_PROD + mat_mul_op_constructor, // GGML_OP_MUL_MAT + nullptr, // GGML_OP_MUL_MAT_ID + nullptr, // GGML_OP_OUT_PROD - nullptr, // GGML_OP_SCALE - nullptr, // GGML_OP_SET - nullptr, // GGML_OP_CPY - nullptr, // GGML_OP_CONT - generic_op_constructor, // GGML_OP_RESHAPE - nullptr, // GGML_OP_VIEW - nullptr, // GGML_OP_PERMUTE - nullptr, // GGML_OP_TRANSPOSE - nullptr, // GGML_OP_GET_ROWS - nullptr, // GGML_OP_GET_ROWS_BACK - nullptr, // GGML_OP_DIAG - nullptr, // GGML_OP_DIAG_MASK_INF - nullptr, // GGML_OP_DIAG_MASK_ZERO - nullptr, // GGML_OP_SOFT_MAX - nullptr, // GGML_OP_SOFT_MAX_BACK - nullptr, // GGML_OP_ROPE - nullptr, // GGML_OP_ROPE_BACK - nullptr, // GGML_OP_CLAMP - nullptr, // GGML_OP_CONV_TRANSPOSE_1D - nullptr, // GGML_OP_IM2COL - nullptr, // GGML_OP_IM2COL_BACK - nullptr, // GGML_OP_CONV_TRANSPOSE_2D - nullptr, // GGML_OP_POOL_1D - nullptr, // GGML_OP_POOL_2D - nullptr, // GGML_OP_POOL_2D_BACK - nullptr, // GGML_OP_UPSCALE - nullptr, // GGML_OP_PAD - nullptr, // GGML_OP_PAD_REFLECT_1D - nullptr, // GGML_OP_ARANGE - nullptr, // GGML_OP_TIMESTEP_EMBEDDING - nullptr, // GGML_OP_ARGSORT - nullptr, // GGML_OP_LEAKY_RELU + nullptr, // GGML_OP_SCALE + nullptr, // GGML_OP_SET + nullptr, // GGML_OP_CPY + nullptr, // GGML_OP_CONT + generic_op_constructor, // GGML_OP_RESHAPE + nullptr, // GGML_OP_VIEW + nullptr, // GGML_OP_PERMUTE + nullptr, // GGML_OP_TRANSPOSE + nullptr, // GGML_OP_GET_ROWS + nullptr, // GGML_OP_GET_ROWS_BACK + nullptr, // GGML_OP_DIAG + nullptr, // GGML_OP_DIAG_MASK_INF + nullptr, // GGML_OP_DIAG_MASK_ZERO + nullptr, // GGML_OP_SOFT_MAX + nullptr, // GGML_OP_SOFT_MAX_BACK + nullptr, // GGML_OP_ROPE + nullptr, // GGML_OP_ROPE_BACK + nullptr, // GGML_OP_CLAMP + nullptr, // GGML_OP_CONV_TRANSPOSE_1D + nullptr, // GGML_OP_IM2COL + nullptr, // GGML_OP_IM2COL_BACK + nullptr, // GGML_OP_CONV_TRANSPOSE_2D + nullptr, // GGML_OP_POOL_1D + nullptr, // GGML_OP_POOL_2D + nullptr, // GGML_OP_POOL_2D_BACK + nullptr, // GGML_OP_UPSCALE + nullptr, // GGML_OP_PAD + nullptr, // GGML_OP_PAD_REFLECT_1D + nullptr, // GGML_OP_ARANGE + nullptr, // GGML_OP_TIMESTEP_EMBEDDING + nullptr, // GGML_OP_ARGSORT + nullptr, // GGML_OP_LEAKY_RELU - nullptr, // GGML_OP_FLASH_ATTN_EXT - nullptr, // GGML_OP_FLASH_ATTN_BACK - nullptr, // GGML_OP_SSM_CONV - nullptr, // GGML_OP_SSM_SCAN - nullptr, // GGML_OP_WIN_PART - nullptr, // GGML_OP_WIN_UNPART - nullptr, // GGML_OP_GET_REL_POS - nullptr, // GGML_OP_ADD_REL_POS - nullptr, // GGML_OP_RWKV_WKV6 - nullptr, // GGML_OP_GATED_LINEAR_ATTN + nullptr, // GGML_OP_FLASH_ATTN_EXT + nullptr, // GGML_OP_FLASH_ATTN_BACK + nullptr, // GGML_OP_SSM_CONV + nullptr, // GGML_OP_SSM_SCAN + nullptr, // GGML_OP_WIN_PART + nullptr, // GGML_OP_WIN_UNPART + nullptr, // GGML_OP_GET_REL_POS + nullptr, // GGML_OP_ADD_REL_POS + nullptr, // GGML_OP_RWKV_WKV6 + nullptr, // GGML_OP_GATED_LINEAR_ATTN - nullptr, // GGML_OP_UNARY + nullptr, // GGML_OP_UNARY - nullptr, // GGML_OP_MAP_UNARY - nullptr, // GGML_OP_MAP_BINARY + nullptr, // GGML_OP_MAP_UNARY + nullptr, // GGML_OP_MAP_BINARY - nullptr, // GGML_OP_MAP_CUSTOM1_F32 - nullptr, // GGML_OP_MAP_CUSTOM2_F32 - nullptr, // GGML_OP_MAP_CUSTOM3_F32 + nullptr, // GGML_OP_MAP_CUSTOM1_F32 + nullptr, // GGML_OP_MAP_CUSTOM2_F32 + nullptr, // GGML_OP_MAP_CUSTOM3_F32 - nullptr, // GGML_OP_MAP_CUSTOM1 - nullptr, // GGML_OP_MAP_CUSTOM2 - nullptr, // GGML_OP_MAP_CUSTOM3 + nullptr, // GGML_OP_MAP_CUSTOM1 + nullptr, // GGML_OP_MAP_CUSTOM2 + nullptr, // GGML_OP_MAP_CUSTOM3 - nullptr, // GGML_OP_CROSS_ENTROPY_LOSS - nullptr, // GGML_OP_CROSS_ENTROPY_LOSS_BACK - nullptr, // GGML_OP_OPT_STEP_ADAMW + nullptr, // GGML_OP_CROSS_ENTROPY_LOSS + nullptr, // GGML_OP_CROSS_ENTROPY_LOSS_BACK + nullptr, // GGML_OP_OPT_STEP_ADAMW // ggml_unary_op - nullptr, // GGML_UNARY_OP_ABS - nullptr, // GGML_UNARY_OP_SGN - nullptr, // GGML_UNARY_OP_NEG - nullptr, // GGML_UNARY_OP_STEP - nullptr, // GGML_UNARY_OP_TANH - nullptr, // GGML_UNARY_OP_ELU - nullptr, // GGML_UNARY_OP_RELU - nullptr, // GGML_UNARY_OP_SIGMOID - nullptr, // GGML_UNARY_OP_GELU - nullptr, // GGML_UNARY_OP_GELU_QUICK - nullptr, // GGML_UNARY_OP_SILU - nullptr, // GGML_UNARY_OP_HARDSWISH - nullptr, // GGML_UNARY_OP_HARDSIGMOID - nullptr, // GGML_UNARY_OP_EXP + nullptr, // GGML_UNARY_OP_ABS + nullptr, // GGML_UNARY_OP_SGN + nullptr, // GGML_UNARY_OP_NEG + nullptr, // GGML_UNARY_OP_STEP + nullptr, // GGML_UNARY_OP_TANH + nullptr, // GGML_UNARY_OP_ELU + nullptr, // GGML_UNARY_OP_RELU + nullptr, // GGML_UNARY_OP_SIGMOID + nullptr, // GGML_UNARY_OP_GELU + nullptr, // GGML_UNARY_OP_GELU_QUICK + nullptr, // GGML_UNARY_OP_SILU + nullptr, // GGML_UNARY_OP_HARDSWISH + nullptr, // GGML_UNARY_OP_HARDSIGMOID + nullptr, // GGML_UNARY_OP_EXP }; static_assert(kOpConstructors[GGML_OP_NONE] == nullptr, "GGML_OP_NONE does not match the nullptr function"); @@ -362,11 +364,11 @@ static_assert(kOpConstructors[GGML_OP_MUL_MAT] == mat_mul_op_constructor, static_assert(std::size(kOpConstructors) == (GGML_OP_COUNT + GGML_UNARY_OP_COUNT), "GGML_OP_COUNT does not match the size of the kOpConstructors table"); -} // namespace +} // namespace namespace qnn { -size_t get_qnn_op_index(const ggml_tensor *tensor) { +size_t get_qnn_op_index(const ggml_tensor * tensor) { if (tensor->op == GGML_OP_UNARY) { return kGgmlUnaryOpStart + ggml_get_unary_op(tensor); } @@ -374,20 +376,20 @@ size_t get_qnn_op_index(const ggml_tensor *tensor) { return tensor->op; } -const char *get_qnn_op_name(const ggml_tensor *op) { +const char * get_qnn_op_name(const ggml_tensor * op) { auto op_index = get_qnn_op_index(op); GGML_ASSERT(op_index < std::size(kOpCaps)); GGML_ASSERT(kOpCaps[op_index].qnn_op_name); return kOpCaps[op_index].qnn_op_name; } -size_t get_qnn_op_input_param_count(const ggml_tensor *op) { +size_t get_qnn_op_input_param_count(const ggml_tensor * op) { auto op_index = get_qnn_op_index(op); GGML_ASSERT(op_index < std::size(kOpCaps)); return kOpCaps[op_index].input_param_count; } -std::shared_ptr create_op(const ggml_tensor *op, const std::string &name, +std::shared_ptr create_op(const ggml_tensor * op, const std::string & name, std::shared_ptr qnn_instance) { auto op_index = get_qnn_op_index(op); GGML_ASSERT(op_index < std::size(kOpCaps)); @@ -396,4 +398,4 @@ std::shared_ptr create_op(const ggml_tensor *op, const std:: return op_constructor(op, name, qnn_instance); } -} // namespace qnn +} // namespace qnn diff --git a/ggml/src/ggml-qnn/op-config-impl.cpp b/ggml/src/ggml-qnn/op-config-impl.cpp index 1b05b3581a..14638a554e 100644 --- a/ggml/src/ggml-qnn/op-config-impl.cpp +++ b/ggml/src/ggml-qnn/op-config-impl.cpp @@ -6,14 +6,7 @@ namespace { -constexpr const qnn::qnn_dimension_array_t kTransposeParamData[GGML_MAX_DIMS] = { - {0}, - {1, 0}, - {0, 2, 1}, - {0, 1, 3, 2}, -}; - -qnn::qnn_dimension_array_t get_transposed_dimensions(const qnn::qnn_dimension_array_t &dimensions, int rank) { +qnn::qnn_dimension_array_t get_transposed_dimensions(const qnn::qnn_dimension_array_t & dimensions, int rank) { qnn::qnn_dimension_array_t transposed_dims = dimensions; if (rank >= 2) { transposed_dims[rank - 1] = dimensions[rank - 2]; @@ -23,11 +16,11 @@ qnn::qnn_dimension_array_t get_transposed_dimensions(const qnn::qnn_dimension_ar return transposed_dims; } -int get_rank(const qnn::ggml_tensor_array_t &tensor_inputs, const qnn::ggml_tensor_array_t &tensor_outputs) { +int get_rank(const qnn::ggml_tensor_array_t & tensor_inputs, const qnn::ggml_tensor_array_t & tensor_outputs) { return std::max(qnn::get_ggml_tensors_max_rank(tensor_inputs), qnn::get_ggml_tensors_max_rank(tensor_outputs)); } -Qnn_DataType_t get_tensor_type(const qnn::qnn_tensor_array_t &tensors) { +Qnn_DataType_t get_tensor_type(const qnn::qnn_tensor_array_t & tensors) { Qnn_DataType_t type = QNN_DATATYPE_UNDEFINED; for (auto tensor : tensors) { auto tensor_type_size = qnn::qnn_datatype_size(tensor->get_data_type()); @@ -40,67 +33,67 @@ Qnn_DataType_t get_tensor_type(const qnn::qnn_tensor_array_t &tensors) { return type; } -} // namespace +} // namespace namespace qnn { -void ggml_qnn_op_config_base::add_scalar_param(const std::string &name, const Qnn_Scalar_t scalar) { +void ggml_qnn_op_config_base::add_scalar_param(const std::string & name, const Qnn_Scalar_t scalar) { _param_names.push_back(name); Qnn_Param_t param = QNN_PARAM_INIT; - param.paramType = QNN_PARAMTYPE_SCALAR; - param.name = _param_names.back().c_str(); + param.paramType = QNN_PARAMTYPE_SCALAR; + param.name = _param_names.back().c_str(); param.scalarParam = scalar; _qnn_parameters.push_back(param); } -bool ggml_qnn_op_config_base::add_tensor_param(const std::string &name, const qnn_dimension_array_t &dimensions, - int rank, const uint8_t *data, const Qnn_DataType_t data_type, +bool ggml_qnn_op_config_base::add_tensor_param(const std::string & name, const qnn_dimension_array_t & dimensions, + int rank, const uint8_t * data, const Qnn_DataType_t data_type, QNNBackend device, Qnn_GraphHandle_t graph_handle) { - std::string tensor_name = _name + name + std::to_string(_tensor_parameters.size()); - auto param_tensor = std::make_shared(ggml_qnn_tensor::PARAMETER, tensor_name, dimensions, - data_type, rank, device, graph_handle, _qnn_instance); - size_t data_size = ggml_type_size(ggml_datatype_from_qnn_datatype(data_type)); + std::string tensor_name = _name + name + std::to_string(_tensor_parameters.size()); + auto param_tensor = std::make_shared(ggml_qnn_tensor::PARAMETER, tensor_name, dimensions, + data_type, rank, device, graph_handle, _qnn_instance); + size_t data_size = ggml_type_size(ggml_datatype_from_qnn_datatype(data_type)); for (int i = 0; i < rank; i++) { data_size *= dimensions[i]; } GGML_ASSERT(data_size > 0); if (!param_tensor->set_data_buffer(data, data_size)) { - QNN_LOG_ERROR("parameter tensor bind_buffer failed"); + QNN_LOG_ERROR("parameter tensor bind_buffer failed\n"); return false; } if (!param_tensor->alloc_qnn_tensor_id()) { - QNN_LOG_ERROR("parameter tensor alloc_qnn_tensor_id failed"); + QNN_LOG_ERROR("parameter tensor alloc_qnn_tensor_id failed\n"); return false; } _tensor_parameters.push_back(param_tensor); _param_names.push_back(name); Qnn_Param_t param = QNN_PARAM_INIT; - param.paramType = QNN_PARAMTYPE_TENSOR; - param.name = _param_names.back().c_str(); + param.paramType = QNN_PARAMTYPE_TENSOR; + param.name = _param_names.back().c_str(); param.tensorParam = param_tensor->get_qnn_tensor(); _qnn_parameters.push_back(param); return true; } -void ggml_qnn_op_config_base::set_input_tensors(qnn::qnn_tensor_array_t &tensor_inputs) { +void ggml_qnn_op_config_base::set_input_tensors(qnn::qnn_tensor_array_t & tensor_inputs) { _tensor_inputs = tensor_inputs; _qnn_tensor_inputs.resize(_tensor_inputs.size()); } -void ggml_qnn_op_config_base::set_input_tensors(qnn::qnn_tensor_array_t &&tensor_inputs) { +void ggml_qnn_op_config_base::set_input_tensors(qnn::qnn_tensor_array_t && tensor_inputs) { _tensor_inputs = tensor_inputs; _qnn_tensor_inputs.resize(_tensor_inputs.size()); } -void ggml_qnn_op_config_base::set_output_tensors(qnn::qnn_tensor_array_t &tensor_outputs) { +void ggml_qnn_op_config_base::set_output_tensors(qnn::qnn_tensor_array_t & tensor_outputs) { _tensor_outputs = std::move(tensor_outputs); _qnn_tensor_outputs.resize(_tensor_outputs.size()); } -void ggml_qnn_op_config_base::set_output_tensors(qnn::qnn_tensor_array_t &&tensor_outputs) { +void ggml_qnn_op_config_base::set_output_tensors(qnn::qnn_tensor_array_t && tensor_outputs) { _tensor_outputs = std::move(tensor_outputs); _qnn_tensor_outputs.resize(_tensor_outputs.size()); } @@ -109,74 +102,80 @@ bool ggml_qnn_op_config_base::add_op_to_graph(Qnn_GraphHandle_t graph_handle) { GGML_ASSERT(_qnn_tensor_inputs.size() == _tensor_inputs.size()); GGML_ASSERT(_qnn_tensor_outputs.size() == _tensor_outputs.size()); - QNN_LOG_DEBUG("[%s]add to graph start", _name.c_str()); + QNN_LOG_DEBUG("[%s]add to graph start\n", _name.c_str()); for (size_t i = 0; i < _tensor_inputs.size(); i++) { auto tensor = _tensor_inputs[i]; if (!tensor->alloc_qnn_tensor_id()) { - QNN_LOG_ERROR("[%s]input tensor alloc_qnn_tensor_id failed", _name.c_str()); + QNN_LOG_ERROR("[%s]input tensor alloc_qnn_tensor_id failed\n", _name.c_str()); return false; } - QNN_LOG_DEBUG("[%s]input tensor id: %d", _name.c_str(), tensor->get_qnn_tensor_id()); + QNN_LOG_DEBUG("[%s]input tensor id: %d\n", _name.c_str(), tensor->get_qnn_tensor_id()); _qnn_tensor_inputs[i] = tensor->get_qnn_tensor(); } for (size_t i = 0; i < _tensor_outputs.size(); i++) { auto tensor = _tensor_outputs[i]; if (!tensor->alloc_qnn_tensor_id()) { - QNN_LOG_ERROR("[%s]output tensor alloc_qnn_tensor_id failed", _name.c_str()); + QNN_LOG_ERROR("[%s]output tensor alloc_qnn_tensor_id failed\n", _name.c_str()); return false; } - QNN_LOG_DEBUG("[%s]output tensor id: %d", _name.c_str(), tensor->get_qnn_tensor_id()); + QNN_LOG_DEBUG("[%s]output tensor id: %d\n", _name.c_str(), tensor->get_qnn_tensor_id()); _qnn_tensor_outputs[i] = tensor->get_qnn_tensor(); } auto qnn_interface = _qnn_instance->get_qnn_interface(); - auto error = qnn_interface->qnn_graph_add_node(graph_handle, get_op_config()); + auto error = qnn_interface->qnn_graph_add_node(graph_handle, get_op_config()); if (error != QNN_SUCCESS) { - QNN_LOG_ERROR("[%s]qnn_graph_add_node.error: %s", _name.c_str(), get_qnn_error_string(error)); + QNN_LOG_ERROR("[%s]qnn_graph_add_node.error: %s\n", _name.c_str(), get_qnn_error_string(error)); return false; } - QNN_LOG_DEBUG("[%s]added to graph succeed", _name.c_str()); + QNN_LOG_DEBUG("[%s]added to graph succeed\n", _name.c_str()); return true; } -bool ggml_qnn_op_config_base::bind_input_tensors(const ggml_tensor_array_t &tensor_inputs) { +bool ggml_qnn_op_config_base::bind_input_tensors(const ggml_tensor_array_t & tensor_inputs) { GGML_ASSERT(tensor_inputs.size() == _tensor_inputs.size()); return qnn::bind_tensors(tensor_inputs, _tensor_inputs, _qnn_tensor_inputs); } -bool ggml_qnn_op_config_base::bind_output_tensors(const ggml_tensor_array_t &tensor_outputs) { +bool ggml_qnn_op_config_base::bind_output_tensors(const ggml_tensor_array_t & tensor_outputs) { GGML_ASSERT(tensor_outputs.size() == _tensor_outputs.size()); return qnn::bind_tensors(tensor_outputs, _tensor_outputs, _qnn_tensor_outputs); } void ggml_qnn_op_config_base::unbind_input_tensors() { - for (auto &tensor : _tensor_inputs) { + for (auto & tensor : _tensor_inputs) { tensor->unbind(); } } void ggml_qnn_op_config_base::unbind_output_tensors() { - for (auto &tensor : _tensor_outputs) { + for (auto & tensor : _tensor_outputs) { tensor->unbind(); } } Qnn_OpConfig_t ggml_qnn_op_config_base::get_op_config() { - Qnn_OpConfig_t config = QNN_OPCONFIG_INIT; - config.version = QNN_OPCONFIG_VERSION_1; - auto &op_config = config.v1; - op_config.name = _name.c_str(); - op_config.packageName = _package_name.c_str(); - op_config.typeName = _op_type.c_str(); - op_config.numOfParams = (uint32_t)_qnn_parameters.size(); - op_config.params = _qnn_parameters.data(); - op_config.numOfInputs = (uint32_t)_qnn_tensor_inputs.size(); - op_config.inputTensors = _qnn_tensor_inputs.data(); - op_config.numOfOutputs = (uint32_t)_qnn_tensor_outputs.size(); + GGML_ASSERT(_qnn_parameters.size() == _param_names.size()); + + for (size_t i = 0; i < _qnn_parameters.size(); i++) { + _qnn_parameters[i].name = _param_names[i].c_str(); + } + + Qnn_OpConfig_t config = QNN_OPCONFIG_INIT; + config.version = QNN_OPCONFIG_VERSION_1; + auto & op_config = config.v1; + op_config.name = _name.c_str(); + op_config.packageName = _package_name.c_str(); + op_config.typeName = _op_type.c_str(); + op_config.numOfParams = (uint32_t) _qnn_parameters.size(); + op_config.params = _qnn_parameters.data(); + op_config.numOfInputs = (uint32_t) _qnn_tensor_inputs.size(); + op_config.inputTensors = _qnn_tensor_inputs.data(); + op_config.numOfOutputs = (uint32_t) _qnn_tensor_outputs.size(); op_config.outputTensors = _qnn_tensor_outputs.data(); return config; } @@ -188,33 +187,33 @@ bool ggml_qnn_single_op_config::initialize_op_nodes(QNNBackend device, Qnn_Graph } bool ggml_qnn_rmsnorm_op_config::initialize_op_nodes(QNNBackend device, Qnn_GraphHandle_t graph_handle) { - constexpr const uint32_t kAxes[] = {0}; - add_tensor_param(QNN_OP_RMS_NORM_PARAM_AXES, {1}, 1, reinterpret_cast(kAxes), QNN_DATATYPE_UINT_32, - device, graph_handle); + constexpr const uint32_t kAxes[] = { 0 }; + add_tensor_param(QNN_OP_RMS_NORM_PARAM_AXES, { 1 }, 1, reinterpret_cast(kAxes), + QNN_DATATYPE_UINT_32, device, graph_handle); return true; } -void ggml_qnn_aggregate_op_config::set_input_tensors(qnn::qnn_tensor_array_t &tensor_inputs) { +void ggml_qnn_aggregate_op_config::set_input_tensors(qnn::qnn_tensor_array_t & tensor_inputs) { _tensor_inputs = tensor_inputs; } -void ggml_qnn_aggregate_op_config::set_input_tensors(qnn::qnn_tensor_array_t &&tensor_inputs) { +void ggml_qnn_aggregate_op_config::set_input_tensors(qnn::qnn_tensor_array_t && tensor_inputs) { _tensor_inputs = std::move(tensor_inputs); } -void ggml_qnn_aggregate_op_config::set_output_tensors(qnn::qnn_tensor_array_t &tensor_outputs) { +void ggml_qnn_aggregate_op_config::set_output_tensors(qnn::qnn_tensor_array_t & tensor_outputs) { _tensor_outputs = tensor_outputs; } -void ggml_qnn_aggregate_op_config::set_output_tensors(qnn::qnn_tensor_array_t &&tensor_outputs) { +void ggml_qnn_aggregate_op_config::set_output_tensors(qnn::qnn_tensor_array_t && tensor_outputs) { _tensor_outputs = std::move(tensor_outputs); } -bool ggml_qnn_aggregate_op_config::bind_input_tensors(const ggml_tensor_array_t &tensor_inputs) { +bool ggml_qnn_aggregate_op_config::bind_input_tensors(const ggml_tensor_array_t & tensor_inputs) { return qnn::bind_tensors(tensor_inputs, _tensor_inputs); } -bool ggml_qnn_aggregate_op_config::bind_output_tensors(const ggml_tensor_array_t &tensor_outputs) { +bool ggml_qnn_aggregate_op_config::bind_output_tensors(const ggml_tensor_array_t & tensor_outputs) { return qnn::bind_tensors(tensor_outputs, _tensor_outputs); } @@ -223,18 +222,18 @@ bool ggml_qnn_matmul_op_config::initialize_op_nodes(QNNBackend device, Qnn_Graph GGML_ASSERT(_tensor_outputs.size() == 1); // create convert nodes - const auto tensor_rank = _tensor_inputs.front()->get_rank(); - qnn_tensor_array_t mat_mul_tensor_inputs = _tensor_inputs; + const auto tensor_rank = _tensor_inputs.front()->get_rank(); + qnn_tensor_array_t mat_mul_tensor_inputs = _tensor_inputs; qnn_tensor_array_t mat_mul_tensor_outputs = _tensor_outputs; if (!create_convert_nodes(device, graph_handle, tensor_rank, mat_mul_tensor_inputs, mat_mul_tensor_outputs)) { - QNN_LOG_ERROR("create convert nodes failed"); + QNN_LOG_ERROR("create convert nodes failed\n"); return false; } mat_mul_tensor_inputs.front() = create_gather_nodes(device, graph_handle, tensor_rank, mat_mul_tensor_inputs.front(), mat_mul_tensor_inputs.back()->get_dimensions()); - return create_mat_mul_nodes(device, graph_handle, tensor_rank, mat_mul_tensor_inputs, mat_mul_tensor_outputs); + return create_mat_mul_nodes(mat_mul_tensor_inputs, mat_mul_tensor_outputs); } qnn_tensor_ptr_t ggml_qnn_matmul_op_config::create_gather_nodes(QNNBackend device, Qnn_GraphHandle_t graph_handle, @@ -244,9 +243,9 @@ qnn_tensor_ptr_t ggml_qnn_matmul_op_config::create_gather_nodes(QNNBackend devic return tensor_input; } - const auto &input_dimensions = tensor_input->get_dimensions(); - output_dimensions[rank - 1] = input_dimensions[rank - 1]; - output_dimensions[rank - 2] = input_dimensions[rank - 2]; + const auto & input_dimensions = tensor_input->get_dimensions(); + output_dimensions[rank - 1] = input_dimensions[rank - 1]; + output_dimensions[rank - 2] = input_dimensions[rank - 2]; const auto y = output_dimensions[rank - 3] / input_dimensions[rank - 3]; if (y == 1 && (rank == 3 || (rank == 4 && output_dimensions[rank - 4] == input_dimensions[rank - 4]))) { @@ -255,9 +254,9 @@ qnn_tensor_ptr_t ggml_qnn_matmul_op_config::create_gather_nodes(QNNBackend devic // create concat nodes, to convert tensor shape from [ne03, ne02, n, k] to [ne03 * x, ne02 * y, n, k] constexpr const auto create_node = - [](const std::string &name, const int rank, const int axis, const qnn_dimension_array_t &dimensions, + [](const std::string & name, const int rank, const int axis, const qnn_dimension_array_t & dimensions, qnn_tensor_ptr_t tensor_input, QNNBackend device, Qnn_GraphHandle_t graph_handle, - std::shared_ptr qnn_instance, qnn_tensor_ptr_t &tensor_output) -> qnn_op_config_ptr_t { + std::shared_ptr qnn_instance, qnn_tensor_ptr_t & tensor_output) -> qnn_op_config_ptr_t { auto gather_out = std::make_shared(ggml_qnn_tensor::INTERMEDIATE, name + "_out", dimensions, tensor_input->get_data_type(), rank, device, graph_handle, qnn_instance); @@ -265,32 +264,32 @@ qnn_tensor_ptr_t ggml_qnn_matmul_op_config::create_gather_nodes(QNNBackend devic qnn_instance); Qnn_Scalar_t scalar = QNN_SCALAR_INIT; - scalar.dataType = QNN_DATATYPE_INT_32; - scalar.int32Value = axis; + scalar.dataType = QNN_DATATYPE_INT_32; + scalar.int32Value = axis; gather_op->add_scalar_param(QNN_OP_GATHER_PARAM_AXIS, scalar); - gather_op->set_output_tensors({gather_out}); + gather_op->set_output_tensors({ gather_out }); // here we calculate the index mapping, will generate a 1d tensor like [0, 0, 0, 1, 1, 1, 2, 2, 2, ...], // by repeating each index [scale] times. - const auto scale = dimensions[axis] / tensor_input->get_dimensions()[axis]; - auto index_buffer = std::make_shared(dimensions[axis] * sizeof(uint32_t)); + const auto scale = dimensions[axis] / tensor_input->get_dimensions()[axis]; + auto index_buffer = std::make_shared(dimensions[axis] * sizeof(uint32_t)); for (uint32_t *curr = reinterpret_cast(index_buffer->get_buffer()), *end = curr + dimensions[axis]; curr < end; curr++) { *curr = uint32_t((curr - reinterpret_cast(index_buffer->get_buffer())) / scale); } auto gather_index = std::make_shared( - ggml_qnn_tensor::PARAMETER, name + "_index", qnn_dimension_array_t{dimensions[axis]}, QNN_DATATYPE_UINT_32, - 1, device, graph_handle, qnn_instance); + ggml_qnn_tensor::PARAMETER, name + "_index", qnn_dimension_array_t{ dimensions[axis] }, + QNN_DATATYPE_UINT_32, 1, device, graph_handle, qnn_instance); gather_index->set_data_buffer(index_buffer); - gather_op->set_input_tensors({tensor_input, gather_index}); + gather_op->set_input_tensors({ tensor_input, gather_index }); tensor_output = gather_out; return gather_op; }; qnn_dimension_array_t intermediate_dimensions = input_dimensions; - intermediate_dimensions[rank - 3] = output_dimensions[rank - 3]; + intermediate_dimensions[rank - 3] = output_dimensions[rank - 3]; qnn_tensor_ptr_t gather0_out; _operations.push_back(create_node(_name + "_gather0", rank, rank - 3, intermediate_dimensions, tensor_input, device, graph_handle, _qnn_instance, gather0_out)); @@ -305,8 +304,8 @@ qnn_tensor_ptr_t ggml_qnn_matmul_op_config::create_gather_nodes(QNNBackend devic } bool ggml_qnn_matmul_op_config::create_convert_nodes(QNNBackend device, Qnn_GraphHandle_t graph_handle, const int rank, - qnn_tensor_array_t &tensor_inputs, - qnn_tensor_array_t &tensor_outputs) { + qnn_tensor_array_t & tensor_inputs, + qnn_tensor_array_t & tensor_outputs) { if (device == QNN_BACKEND_GPU) { // there's no convert op for GPU, so we should create matmul nodes directly. return true; @@ -314,7 +313,7 @@ bool ggml_qnn_matmul_op_config::create_convert_nodes(QNNBackend device, Qnn_Grap // create tensors for convert node auto tensor_type = get_tensor_type(tensor_inputs); - QNN_LOG_DEBUG("input tensor type: %s", qnn_datatype_to_string(tensor_type)); + QNN_LOG_DEBUG("input tensor type: %s\n", qnn_datatype_to_string(tensor_type)); for (size_t i = 0; i < tensor_inputs.size(); ++i) { // create input convert nodes @@ -327,10 +326,10 @@ bool ggml_qnn_matmul_op_config::create_convert_nodes(QNNBackend device, Qnn_Grap auto convert_out = std::make_shared(ggml_qnn_tensor::INTERMEDIATE, convert_name + "_out", convert_in->get_dimensions(), tensor_type, rank, device, graph_handle, _qnn_instance); - auto convert = std::make_shared(convert_name, QNN_OP_PACKAGE_NAME_QTI_AISW, - QNN_OP_CONVERT, _qnn_instance); - convert->set_input_tensors({convert_in}); - convert->set_output_tensors({convert_out}); + auto convert = std::make_shared(convert_name, QNN_OP_PACKAGE_NAME_QTI_AISW, + QNN_OP_CONVERT, _qnn_instance); + convert->set_input_tensors({ convert_in }); + convert->set_output_tensors({ convert_out }); tensor_inputs[i] = convert_out; _operations.push_back(convert); } @@ -338,14 +337,14 @@ bool ggml_qnn_matmul_op_config::create_convert_nodes(QNNBackend device, Qnn_Grap if (tensor_outputs.front()->get_data_type() != tensor_type) { // create output convert node std::string convert_name("convert_dst"); - auto convert_out = tensor_outputs.front(); - auto convert_in = std::make_shared(ggml_qnn_tensor::INTERMEDIATE, convert_name + "_in", - convert_out->get_dimensions(), tensor_type, rank, device, - graph_handle, _qnn_instance); + auto convert_out = tensor_outputs.front(); + auto convert_in = std::make_shared(ggml_qnn_tensor::INTERMEDIATE, convert_name + "_in", + convert_out->get_dimensions(), tensor_type, rank, device, + graph_handle, _qnn_instance); auto output_convert = std::make_shared(convert_name, QNN_OP_PACKAGE_NAME_QTI_AISW, QNN_OP_CONVERT, _qnn_instance); - output_convert->set_input_tensors({convert_in}); - output_convert->set_output_tensors({convert_out}); + output_convert->set_input_tensors({ convert_in }); + output_convert->set_output_tensors({ convert_out }); tensor_outputs.front() = convert_in; _operations.push_back(output_convert); } @@ -353,10 +352,8 @@ bool ggml_qnn_matmul_op_config::create_convert_nodes(QNNBackend device, Qnn_Grap return true; } -bool ggml_qnn_matmul_op_config::create_mat_mul_nodes(QNNBackend device, Qnn_GraphHandle_t graph_handle, const int rank, - qnn_tensor_array_t &tensor_inputs, - qnn_tensor_array_t &tensor_outputs) { - +bool ggml_qnn_matmul_op_config::create_mat_mul_nodes(qnn_tensor_array_t & tensor_inputs, + qnn_tensor_array_t & tensor_outputs) { /* * First, both the ggml and qnn tensor in memory are stored as row-major format. (For more details, please refer to: * https://pytorch.org/blog/tensor-memory-format-matters/#:~:text=Column%20Major%20Order:%20In%20this%20format,%20the%20matrix) @@ -395,8 +392,8 @@ bool ggml_qnn_matmul_op_config::create_mat_mul_nodes(QNNBackend device, Qnn_Grap * So here we need to create graph like: * ```mermaid * graph TD; - * i1>ggml_tensor_in0] --src1--> mat_mul0; - * i2>ggml_tensor_in1] --src0.T--> mat_mul0; + * i1>ggml_tensor_in1] --src0--> mat_mul0; + * i2>ggml_tensor_in0] --src1.T--> mat_mul0; * mat_mul0 --dst0--> o1>ggml_tensor_out]; * ``` */ @@ -411,8 +408,8 @@ bool ggml_qnn_matmul_op_config::create_mat_mul_nodes(QNNBackend device, Qnn_Grap std::make_shared(_name, QNN_OP_PACKAGE_NAME_QTI_AISW, QNN_OP_MAT_MUL, _qnn_instance); Qnn_Scalar_t scalar = QNN_SCALAR_INIT; - scalar.dataType = QNN_DATATYPE_BOOL_8; - scalar.bool8Value = 1; + scalar.dataType = QNN_DATATYPE_BOOL_8; + scalar.bool8Value = 1; mat_mul->add_scalar_param(QNN_OP_MAT_MUL_PARAM_TRANSPOSE_IN1, scalar); // set tensor to mat_mul @@ -424,4 +421,4 @@ bool ggml_qnn_matmul_op_config::create_mat_mul_nodes(QNNBackend device, Qnn_Grap return true; } -} // namespace qnn +} // namespace qnn diff --git a/ggml/src/ggml-qnn/op-config-impl.hpp b/ggml/src/ggml-qnn/op-config-impl.hpp index 4a00ed2cc7..8e2f107b2d 100644 --- a/ggml/src/ggml-qnn/op-config-impl.hpp +++ b/ggml/src/ggml-qnn/op-config-impl.hpp @@ -13,77 +13,83 @@ namespace qnn { class ggml_qnn_op_config_base : public ggml_qnn_op_config { -public: - explicit ggml_qnn_op_config_base(const std::string &name, const std::string &package_name, - const std::string &op_type, std::shared_ptr qnn_instance) - : _name(name), _package_name(package_name), _op_type(op_type), _qnn_instance(qnn_instance) {} + public: + explicit ggml_qnn_op_config_base(const std::string & name, const std::string & package_name, + const std::string & op_type, std::shared_ptr qnn_instance) : + _name(name), + _package_name(package_name), + _op_type(op_type), + _qnn_instance(qnn_instance) {} - void add_scalar_param(const std::string &name, const Qnn_Scalar_t scalar); - bool add_tensor_param(const std::string &name, const qnn_dimension_array_t &dimensions, int rank, - const uint8_t *data, const Qnn_DataType_t data_type, QNNBackend device, + void add_scalar_param(const std::string & name, const Qnn_Scalar_t scalar); + bool add_tensor_param(const std::string & name, const qnn_dimension_array_t & dimensions, int rank, + const uint8_t * data, const Qnn_DataType_t data_type, QNNBackend device, Qnn_GraphHandle_t graph_handle); - void set_input_tensors(qnn::qnn_tensor_array_t &tensor_inputs) override; - void set_input_tensors(qnn::qnn_tensor_array_t &&tensor_inputs) override; - void set_output_tensors(qnn::qnn_tensor_array_t &tensor_inputs) override; - void set_output_tensors(qnn::qnn_tensor_array_t &&tensor_inputs) override; + void set_input_tensors(qnn::qnn_tensor_array_t & tensor_inputs) override; + void set_input_tensors(qnn::qnn_tensor_array_t && tensor_inputs) override; + void set_output_tensors(qnn::qnn_tensor_array_t & tensor_inputs) override; + void set_output_tensors(qnn::qnn_tensor_array_t && tensor_inputs) override; bool add_op_to_graph(Qnn_GraphHandle_t graph_handle) override; - bool bind_input_tensors(const ggml_tensor_array_t &tensor_inputs) override; - bool bind_output_tensors(const ggml_tensor_array_t &tensor_outputs) override; + bool bind_input_tensors(const ggml_tensor_array_t & tensor_inputs) override; + bool bind_output_tensors(const ggml_tensor_array_t & tensor_outputs) override; void unbind_input_tensors() override; void unbind_output_tensors() override; - const qnn_tensor_array_t &get_input_tensors() override { return _tensor_inputs; } - const qnn_tensor_array_t &get_output_tensors() override { return _tensor_outputs; } -protected: + const qnn_tensor_array_t & get_input_tensors() override { return _tensor_inputs; } + + const qnn_tensor_array_t & get_output_tensors() override { return _tensor_outputs; } + + protected: Qnn_OpConfig_t get_op_config(); - std::string _name; - std::string _package_name; - std::string _op_type; + std::string _name; + std::string _package_name; + std::string _op_type; std::shared_ptr _qnn_instance; - qnn_tensor_array_t _tensor_inputs; - qnn_tensor_array_t _tensor_outputs; - qnn_tensor_array_t _tensor_parameters; - std::vector _qnn_tensor_inputs; - std::vector _qnn_tensor_outputs; - std::vector _qnn_parameters; - std::vector _param_names; + qnn_tensor_array_t _tensor_inputs; + qnn_tensor_array_t _tensor_outputs; + qnn_tensor_array_t _tensor_parameters; + std::vector _qnn_tensor_inputs; + std::vector _qnn_tensor_outputs; + std::vector _qnn_parameters; + std::vector _param_names; DISABLE_COPY(ggml_qnn_op_config_base); DISABLE_MOVE(ggml_qnn_op_config_base); }; class ggml_qnn_single_op_config : public ggml_qnn_op_config_base { -public: - explicit ggml_qnn_single_op_config(const std::string &name, const std::string &package_name, - const std::string &op_type, std::shared_ptr qnn_instance) - : ggml_qnn_op_config_base(name, package_name, op_type, qnn_instance) {} + public: + explicit ggml_qnn_single_op_config(const std::string & name, const std::string & package_name, + const std::string & op_type, std::shared_ptr qnn_instance) : + ggml_qnn_op_config_base(name, package_name, op_type, qnn_instance) {} bool initialize_op_nodes(QNNBackend device, Qnn_GraphHandle_t graph_handle) override; -private: + private: DISABLE_COPY(ggml_qnn_single_op_config); DISABLE_MOVE(ggml_qnn_single_op_config); }; class ggml_qnn_rmsnorm_op_config : public ggml_qnn_op_config_base { -public: - explicit ggml_qnn_rmsnorm_op_config(const std::string &name, const std::string &package_name, - const std::string &op_type, std::shared_ptr qnn_instance) - : ggml_qnn_op_config_base(name, package_name, op_type, qnn_instance) {} + public: + explicit ggml_qnn_rmsnorm_op_config(const std::string & name, const std::string & package_name, + const std::string & op_type, std::shared_ptr qnn_instance) : + ggml_qnn_op_config_base(name, package_name, op_type, qnn_instance) {} bool initialize_op_nodes(QNNBackend device, Qnn_GraphHandle_t graph_handle) override; -private: + private: DISABLE_COPY(ggml_qnn_rmsnorm_op_config); DISABLE_MOVE(ggml_qnn_rmsnorm_op_config); }; class ggml_qnn_aggregate_op_config : public ggml_qnn_op_config { -public: - explicit ggml_qnn_aggregate_op_config(const std::string &name, std::shared_ptr qnn_instance) - : _name(name), _qnn_instance(qnn_instance) {} + public: + explicit ggml_qnn_aggregate_op_config(const std::string & name, std::shared_ptr qnn_instance) : + _name(name), + _qnn_instance(qnn_instance) {} ~ggml_qnn_aggregate_op_config() { _tensor_inputs.clear(); @@ -91,61 +97,63 @@ public: _operations.clear(); } - void set_input_tensors(qnn::qnn_tensor_array_t &tensor_inputs) override; - void set_input_tensors(qnn::qnn_tensor_array_t &&tensor_inputs) override; - void set_output_tensors(qnn::qnn_tensor_array_t &tensor_inputs) override; - void set_output_tensors(qnn::qnn_tensor_array_t &&tensor_inputs) override; + void set_input_tensors(qnn::qnn_tensor_array_t & tensor_inputs) override; + void set_input_tensors(qnn::qnn_tensor_array_t && tensor_inputs) override; + void set_output_tensors(qnn::qnn_tensor_array_t & tensor_inputs) override; + void set_output_tensors(qnn::qnn_tensor_array_t && tensor_inputs) override; + bool add_op_to_graph(Qnn_GraphHandle_t graph_handle) override { return qnn::add_op_to_graph(graph_handle, _operations); } - bool bind_input_tensors(const ggml_tensor_array_t &tensor_inputs) override; - bool bind_output_tensors(const ggml_tensor_array_t &tensor_outputs) override; + bool bind_input_tensors(const ggml_tensor_array_t & tensor_inputs) override; + bool bind_output_tensors(const ggml_tensor_array_t & tensor_outputs) override; + void unbind_input_tensors() override { - for (auto &tensor : _tensor_inputs) { + for (auto & tensor : _tensor_inputs) { tensor->unbind(); } } void unbind_output_tensors() override { - for (auto &tensor : _tensor_outputs) { + for (auto & tensor : _tensor_outputs) { tensor->unbind(); } } - const qnn_tensor_array_t &get_input_tensors() override { return _tensor_inputs; } - const qnn_tensor_array_t &get_output_tensors() override { return _tensor_outputs; } + const qnn_tensor_array_t & get_input_tensors() override { return _tensor_inputs; } -protected: - std::string _name; + const qnn_tensor_array_t & get_output_tensors() override { return _tensor_outputs; } + + protected: + std::string _name; std::shared_ptr _qnn_instance; std::vector _operations; - qnn_tensor_array_t _tensor_inputs; - qnn_tensor_array_t _tensor_outputs; + qnn_tensor_array_t _tensor_inputs; + qnn_tensor_array_t _tensor_outputs; -private: + private: DISABLE_COPY(ggml_qnn_aggregate_op_config); DISABLE_MOVE(ggml_qnn_aggregate_op_config); }; class ggml_qnn_matmul_op_config : public ggml_qnn_aggregate_op_config { -public: - ggml_qnn_matmul_op_config(const std::string &name, std::shared_ptr qnn_instance) - : ggml_qnn_aggregate_op_config(name, qnn_instance) {} + public: + ggml_qnn_matmul_op_config(const std::string & name, std::shared_ptr qnn_instance) : + ggml_qnn_aggregate_op_config(name, qnn_instance) {} bool initialize_op_nodes(QNNBackend device, Qnn_GraphHandle_t graph_handle) override; -private: + private: qnn_tensor_ptr_t create_gather_nodes(QNNBackend device, Qnn_GraphHandle_t graph_handle, const int rank, qnn_tensor_ptr_t tensor_input, qnn_dimension_array_t output_dimensions); - bool create_convert_nodes(QNNBackend device, Qnn_GraphHandle_t graph_handle, const int rank, - qnn_tensor_array_t &tensor_inputs, qnn_tensor_array_t &tensor_outputs); - bool create_mat_mul_nodes(QNNBackend device, Qnn_GraphHandle_t graph_handle, const int rank, - qnn_tensor_array_t &tensor_inputs, qnn_tensor_array_t &tensor_outputs); + bool create_convert_nodes(QNNBackend device, Qnn_GraphHandle_t graph_handle, const int rank, + qnn_tensor_array_t & tensor_inputs, qnn_tensor_array_t & tensor_outputs); + bool create_mat_mul_nodes(qnn_tensor_array_t & tensor_inputs, qnn_tensor_array_t & tensor_outputs); DISABLE_COPY(ggml_qnn_matmul_op_config); DISABLE_MOVE(ggml_qnn_matmul_op_config); }; -} // namespace qnn +} // namespace qnn diff --git a/ggml/src/ggml-qnn/op-config.hpp b/ggml/src/ggml-qnn/op-config.hpp index 6b8c6946b8..d613a2116c 100644 --- a/ggml/src/ggml-qnn/op-config.hpp +++ b/ggml/src/ggml-qnn/op-config.hpp @@ -14,14 +14,14 @@ namespace qnn { constexpr const size_t kGgmlUnaryOpStart = GGML_OP_COUNT; -size_t get_qnn_op_index(const ggml_tensor *tensor); -const char *get_qnn_op_name(const ggml_tensor *op); -size_t get_qnn_op_input_param_count(const ggml_tensor *op); -std::shared_ptr create_op(const ggml_tensor *op, const std::string &name, +size_t get_qnn_op_index(const ggml_tensor * tensor); +const char * get_qnn_op_name(const ggml_tensor * op); +size_t get_qnn_op_input_param_count(const ggml_tensor * op); +std::shared_ptr create_op(const ggml_tensor * op, const std::string & name, std::shared_ptr qnn_instance); -inline bool add_op_to_graph(Qnn_GraphHandle_t graph_handle, std::vector &operations) { - for (auto &op : operations) { +inline bool add_op_to_graph(Qnn_GraphHandle_t graph_handle, std::vector & operations) { + for (auto & op : operations) { if (!op->add_op_to_graph(graph_handle)) { return false; } @@ -30,4 +30,4 @@ inline bool add_op_to_graph(Qnn_GraphHandle_t graph_handle, std::vector #if defined(__linux__) -#include +# include #endif namespace { #ifdef _WIN32 -constexpr const char *kQnnSystemLibName = "QnnSystem.dll"; -constexpr const char *kQnnRpcLibName = "libcdsprpc.dll"; +constexpr const char * kQnnSystemLibName = "QnnSystem.dll"; +constexpr const char * kQnnRpcLibName = "libcdsprpc.dll"; #else -constexpr const char *kQnnSystemLibName = "libQnnSystem.so"; -constexpr const char *kQnnRpcLibName = "libcdsprpc.so"; +constexpr const char * kQnnSystemLibName = "libQnnSystem.so"; +constexpr const char * kQnnRpcLibName = "libcdsprpc.so"; #endif -void insert_path(std::string &path, std::string insert_path, const char separator = ':') { +void insert_path(std::string & path, std::string insert_path, const char separator = ':') { if (!insert_path.empty() && !path.empty()) { insert_path += separator; } @@ -27,10 +27,10 @@ void insert_path(std::string &path, std::string insert_path, const char separato } // TODO: Fix this for other platforms, or use a more portable way to set the library search path -bool set_qnn_lib_search_path(const std::string &custom_lib_search_path) { +bool set_qnn_lib_search_path(const std::string & custom_lib_search_path) { #if defined(__linux__) { - auto *original = getenv("LD_LIBRARY_PATH"); + auto * original = getenv("LD_LIBRARY_PATH"); std::string lib_search_path = original ? original : ""; insert_path(lib_search_path, "/vendor/dsp/cdsp:/vendor/lib64:" @@ -41,7 +41,7 @@ bool set_qnn_lib_search_path(const std::string &custom_lib_search_path) { } } -#if defined(__ANDROID__) || defined(ANDROID) +# if defined(__ANDROID__) || defined(ANDROID) { // See also: https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-2/dsp_runtime.html std::string adsp_lib_search_path = custom_lib_search_path + @@ -51,87 +51,89 @@ bool set_qnn_lib_search_path(const std::string &custom_lib_search_path) { return false; } - QNN_LOG_DEBUG("ADSP_LIBRARY_PATH=%s", getenv("ADSP_LIBRARY_PATH")); + QNN_LOG_DEBUG("ADSP_LIBRARY_PATH=%s", getenv("ADSP_LIBRARY_PATH\n")); } -#endif +# endif - QNN_LOG_DEBUG("LD_LIBRARY_PATH=%s", getenv("LD_LIBRARY_PATH")); + QNN_LOG_DEBUG("LD_LIBRARY_PATH=%s", getenv("LD_LIBRARY_PATH\n")); #else - (void)custom_lib_search_path; + (void) custom_lib_search_path; #endif return true; } -qnn::dl_handler_t load_lib_with_fallback(const std::string &lib_path, const std::string &load_directory) { +qnn::dl_handler_t load_lib_with_fallback(const std::string & lib_path, const std::string & load_directory) { std::filesystem::path full_path(load_directory); full_path /= std::filesystem::path(lib_path).filename(); auto handle = qnn::dl_load(full_path.string()); if (!handle) { - QNN_LOG_WARN("failed to load %s, fallback to %s", full_path.c_str(), lib_path.c_str()); + QNN_LOG_WARN("failed to load %s, fallback to %s\n", full_path.c_str(), lib_path.c_str()); handle = qnn::dl_load(lib_path); } return handle; } -} // namespace +} // namespace namespace qnn { -qnn_system_interface::qnn_system_interface(const QnnSystemInterface_t &qnn_sys_interface, dl_handler_t lib_handle) - : _qnn_sys_interface(qnn_sys_interface), _lib_handle(lib_handle) { +qnn_system_interface::qnn_system_interface(const QnnSystemInterface_t & qnn_sys_interface, dl_handler_t lib_handle) : + _qnn_sys_interface(qnn_sys_interface), + _lib_handle(lib_handle) { qnn_system_context_create(&_qnn_system_handle); if (_qnn_system_handle) { - QNN_LOG_INFO("initialize qnn system successfully"); + QNN_LOG_INFO("initialize qnn system successfully\n"); } else { - QNN_LOG_WARN("can not create QNN system contenxt"); + QNN_LOG_WARN("can not create QNN system contenxt\n"); } } qnn_system_interface::~qnn_system_interface() { if (_qnn_system_handle) { if (qnn_system_context_free(_qnn_system_handle) != QNN_SUCCESS) { - QNN_LOG_WARN("failed to free QNN system context"); + QNN_LOG_WARN("failed to free QNN system context\n"); } } else { - QNN_LOG_WARN("system handle is null"); + QNN_LOG_WARN("system handle is null\n"); } if (_lib_handle) { if (!dl_unload(_lib_handle)) { - QNN_LOG_WARN("failed to close QnnSystem library, error %s", dl_error()); + QNN_LOG_WARN("failed to close QnnSystem library, error %s\n", dl_error()); } } else { - QNN_LOG_WARN("system lib handle is null"); + QNN_LOG_WARN("system lib handle is null\n"); } } -qnn_instance::qnn_instance(const std::string &lib_path, const std::string &backend_lib_name) - : _additional_lib_load_path(lib_path), _backend_lib_name(std::move(backend_lib_name)) { +qnn_instance::qnn_instance(const std::string & lib_path, const std::string & backend_lib_name) : + _additional_lib_load_path(lib_path), + _backend_lib_name(std::move(backend_lib_name)) { if (set_qnn_lib_search_path(lib_path)) { - QNN_LOG_DEBUG("[%s] set_qnn_lib_search_path succeed", _backend_lib_name.c_str()); + QNN_LOG_DEBUG("[%s] set_qnn_lib_search_path succeed\n", _backend_lib_name.c_str()); } else { - QNN_LOG_ERROR("[%s] set_qnn_lib_search_path failed", _backend_lib_name.c_str()); + QNN_LOG_ERROR("[%s] set_qnn_lib_search_path failed\n", _backend_lib_name.c_str()); } } -int qnn_instance::qnn_init(const QnnSaver_Config_t **saver_config) { +int qnn_instance::qnn_init(const QnnSaver_Config_t ** saver_config) { BackendIdType backend_id = QNN_BACKEND_ID_NULL; - QNN_LOG_DEBUG("enter qnn_init"); + QNN_LOG_DEBUG("enter qnn_init\n"); std::lock_guard lock(_init_mutex); if (load_system() != 0) { - QNN_LOG_WARN("failed to load QNN system lib"); + QNN_LOG_WARN("failed to load QNN system lib\n"); return 1; } else { - QNN_LOG_DEBUG("load QNN system lib successfully"); + QNN_LOG_DEBUG("load QNN system lib successfully\n"); } std::string backend_lib_path = _backend_lib_name; if (_lib_path_to_backend_id.count(backend_lib_path) == 0) { if (load_backend(backend_lib_path, saver_config) != 0) { - QNN_LOG_WARN("failed to load QNN backend"); + QNN_LOG_WARN("failed to load QNN backend\n"); return 2; } } @@ -149,119 +151,119 @@ int qnn_instance::qnn_init(const QnnSaver_Config_t **saver_config) { _qnn_interface->qnn_log_create(qnn::sdk_logcallback, _qnn_log_level, &_qnn_log_handle); if (!_qnn_log_handle) { // NPU backend not work on Qualcomm SoC equipped low-end phone - QNN_LOG_WARN("why failed to initialize qnn log"); + QNN_LOG_WARN("why failed to initialize qnn log\n"); return 4; } else { - QNN_LOG_DEBUG("initialize qnn log successfully"); + QNN_LOG_DEBUG("initialize qnn log successfully\n"); } std::vector temp_backend_config; _qnn_interface->qnn_backend_create( _qnn_log_handle, temp_backend_config.empty() ? nullptr : temp_backend_config.data(), &_qnn_backend_handle); if (!_qnn_backend_handle) { - QNN_LOG_WARN("why failed to initialize qnn backend"); + QNN_LOG_WARN("why failed to initialize qnn backend\n"); return 5; } else { - QNN_LOG_DEBUG("initialize qnn backend successfully"); + QNN_LOG_DEBUG("initialize qnn backend successfully\n"); } auto qnn_status = _qnn_interface->qnn_property_has_capability(QNN_PROPERTY_GROUP_DEVICE); if (QNN_PROPERTY_NOT_SUPPORTED == qnn_status) { - QNN_LOG_WARN("device property is not supported"); + QNN_LOG_WARN("device property is not supported\n"); } if (QNN_PROPERTY_ERROR_UNKNOWN_KEY == qnn_status) { - QNN_LOG_WARN("device property is not known to backend"); + QNN_LOG_WARN("device property is not known to backend\n"); } qnn_status = QNN_SUCCESS; if (_backend_lib_name.find("Htp") != _backend_lib_name.npos) { - const QnnDevice_PlatformInfo_t *p_info = nullptr; - qnn_status = _qnn_interface->qnn_device_get_platform_info(nullptr, &p_info); + const QnnDevice_PlatformInfo_t * p_info = nullptr; + qnn_status = _qnn_interface->qnn_device_get_platform_info(nullptr, &p_info); if (qnn_status == QNN_SUCCESS) { - QNN_LOG_INFO("device counts %d", p_info->v1.numHwDevices); - QnnDevice_HardwareDeviceInfo_t *infos = p_info->v1.hwDevices; + QNN_LOG_INFO("device counts %d\n", p_info->v1.numHwDevices); + QnnDevice_HardwareDeviceInfo_t * infos = p_info->v1.hwDevices; QnnHtpDevice_OnChipDeviceInfoExtension_t chipinfo = {}; for (uint32_t i = 0; i < p_info->v1.numHwDevices; i++) { - QNN_LOG_INFO("deviceID:%d, deviceType:%d, numCores %d", infos[i].v1.deviceId, infos[i].v1.deviceType, - infos[i].v1.numCores); + QNN_LOG_INFO("deviceID:%d, deviceType:%d, numCores %d\n", (int) infos[i].v1.deviceId, + (int) infos[i].v1.deviceType, (int) infos[i].v1.numCores); QnnDevice_DeviceInfoExtension_t devinfo = infos[i].v1.deviceInfoExtension; - chipinfo = devinfo->onChipDevice; - size_t htp_arch = (size_t)chipinfo.arch; - QNN_LOG_INFO("htp_type:%d(%s)", devinfo->devType, + chipinfo = devinfo->onChipDevice; + size_t htp_arch = (size_t) chipinfo.arch; + QNN_LOG_INFO("htp_type:%d(%s)\n", devinfo->devType, (devinfo->devType == QNN_HTP_DEVICE_TYPE_ON_CHIP) ? "ON_CHIP" : ""); - QNN_LOG_INFO("qualcomm soc_model:%d(%s), htp_arch:%d(%s), vtcm_size:%d MB", chipinfo.socModel, - qnn::get_chipset_desc(chipinfo.socModel), htp_arch, qnn::get_htparch_desc(htp_arch), - chipinfo.vtcmSize); - _soc_info = {chipinfo.socModel, htp_arch, chipinfo.vtcmSize}; + QNN_LOG_INFO("qualcomm soc_model:%d(%s), htp_arch:%d(%s), vtcm_size:%d MB\n", (int) chipinfo.socModel, + qnn::get_chipset_desc(chipinfo.socModel), (int) htp_arch, qnn::get_htparch_desc(htp_arch), + (int) chipinfo.vtcmSize); + _soc_info = { chipinfo.socModel, htp_arch, chipinfo.vtcmSize }; } _qnn_interface->qnn_device_free_platform_info(nullptr, p_info); } else { // For emulator, we can't get platform info - QNN_LOG_WARN("failed to get platform info, are we in emulator?"); - _soc_info = {NONE, UNKNOWN_SM, 0}; + QNN_LOG_WARN("failed to get platform info, are we in emulator?\n"); + _soc_info = { NONE, UNKNOWN_SM, 0 }; } QnnHtpDevice_CustomConfig_t soc_customconfig; - soc_customconfig.option = QNN_HTP_DEVICE_CONFIG_OPTION_SOC; + soc_customconfig.option = QNN_HTP_DEVICE_CONFIG_OPTION_SOC; soc_customconfig.socModel = _soc_info.soc_model; QnnDevice_Config_t soc_devconfig; - soc_devconfig.option = QNN_DEVICE_CONFIG_OPTION_CUSTOM; + soc_devconfig.option = QNN_DEVICE_CONFIG_OPTION_CUSTOM; soc_devconfig.customConfig = &soc_customconfig; QnnHtpDevice_CustomConfig_t arch_customconfig; - arch_customconfig.option = QNN_HTP_DEVICE_CONFIG_OPTION_ARCH; - arch_customconfig.arch.arch = (QnnHtpDevice_Arch_t)_soc_info.htp_arch; - arch_customconfig.arch.deviceId = 0; // Id of device to be used. 0 will use by default. + arch_customconfig.option = QNN_HTP_DEVICE_CONFIG_OPTION_ARCH; + arch_customconfig.arch.arch = (QnnHtpDevice_Arch_t) _soc_info.htp_arch; + arch_customconfig.arch.deviceId = 0; // Id of device to be used. 0 will use by default. QnnDevice_Config_t arch_devconfig; - arch_devconfig.option = QNN_DEVICE_CONFIG_OPTION_CUSTOM; + arch_devconfig.option = QNN_DEVICE_CONFIG_OPTION_CUSTOM; arch_devconfig.customConfig = &arch_customconfig; - const QnnDevice_Config_t *p_deviceconfig[] = {&soc_devconfig, &arch_devconfig, nullptr}; + const QnnDevice_Config_t * p_deviceconfig[] = { &soc_devconfig, &arch_devconfig, nullptr }; qnn_status = _qnn_interface->qnn_device_create(_qnn_log_handle, p_deviceconfig, &_qnn_device_handle); } else { qnn_status = _qnn_interface->qnn_device_create(_qnn_log_handle, nullptr, &_qnn_device_handle); } if (QNN_SUCCESS != qnn_status && QNN_DEVICE_ERROR_UNSUPPORTED_FEATURE != qnn_status) { - QNN_LOG_WARN("failed to create QNN device"); + QNN_LOG_WARN("failed to create QNN device\n"); } else { - QNN_LOG_INFO("create QNN device successfully"); + QNN_LOG_INFO("create QNN device successfully\n"); } if (_profile_level != sdk_profile_level::profile_off) { - QNN_LOG_INFO("profiling turned on; level = %d", _profile_level); + QNN_LOG_INFO("profiling turned on; level = %d\n", _profile_level); auto profile_level = _profile_level == sdk_profile_level::profile_detail ? QNN_PROFILE_LEVEL_DETAILED : QNN_PROFILE_LEVEL_BASIC; if (QNN_PROFILE_NO_ERROR != _qnn_interface->qnn_profile_create(_qnn_backend_handle, profile_level, &_qnn_profile_handle)) { - QNN_LOG_WARN("unable to create profile handle in the backend"); + QNN_LOG_WARN("unable to create profile handle in the backend\n"); return 6; } else { - QNN_LOG_DEBUG("initialize qnn profile successfully"); + QNN_LOG_DEBUG("initialize qnn profile successfully\n"); } } _rpc_lib_handle = load_lib_with_fallback(kQnnRpcLibName, _additional_lib_load_path); if (_rpc_lib_handle) { _pfn_rpc_mem_alloc = reinterpret_cast(dl_sym(_rpc_lib_handle, "rpcmem_alloc")); - _pfn_rpc_mem_free = reinterpret_cast(dl_sym(_rpc_lib_handle, "rpcmem_free")); + _pfn_rpc_mem_free = reinterpret_cast(dl_sym(_rpc_lib_handle, "rpcmem_free")); _pfn_rpc_mem_to_fd = reinterpret_cast(dl_sym(_rpc_lib_handle, "rpcmem_to_fd")); if (!_pfn_rpc_mem_alloc || !_pfn_rpc_mem_free || !_pfn_rpc_mem_to_fd) { - QNN_LOG_WARN("unable to access symbols in QNN RPC lib. error: %s", dl_error()); + QNN_LOG_WARN("unable to access symbols in QNN RPC lib. error: %s\n", dl_error()); dl_unload(_rpc_lib_handle); return 9; } - _pfn_rpc_mem_init = reinterpret_cast(dl_sym(_rpc_lib_handle, "rpcmem_init")); + _pfn_rpc_mem_init = reinterpret_cast(dl_sym(_rpc_lib_handle, "rpcmem_init")); _pfn_rpc_mem_deinit = reinterpret_cast(dl_sym(_rpc_lib_handle, "rpcmem_deinit")); if (_pfn_rpc_mem_init) { _pfn_rpc_mem_init(); } _rpcmem_initialized = true; - QNN_LOG_DEBUG("load rpcmem lib successfully"); + QNN_LOG_DEBUG("load rpcmem lib successfully\n"); } else { - QNN_LOG_WARN("failed to load qualcomm rpc lib, skipping, error:%s", dl_error()); + QNN_LOG_WARN("failed to load qualcomm rpc lib, skipping, error:%s\n", dl_error()); } /* TODO: not used, keep it for further usage @@ -271,23 +273,23 @@ int qnn_instance::qnn_init(const QnnSaver_Config_t **saver_config) { */ _qnn_interface->qnn_context_create(_qnn_backend_handle, _qnn_device_handle, nullptr, &_qnn_context_handle); if (nullptr == _qnn_context_handle) { - QNN_LOG_WARN("why failed to initialize qnn context"); + QNN_LOG_WARN("why failed to initialize qnn context\n"); return 10; } else { - QNN_LOG_DEBUG("initialize qnn context successfully"); + QNN_LOG_DEBUG("initialize qnn context successfully\n"); } if (_backend_lib_name.find("Htp") != _backend_lib_name.npos) { // TODO: faster approach to probe the accurate capacity of rpc ion memory - size_t candidate_size = 0; - uint8_t *rpc_buffer = nullptr; - const int size_in_mb = (1 << 20); - size_t probe_slots[] = {1024, 1536, 2048 - 48, 2048}; - size_t probe_counts = sizeof(probe_slots) / sizeof(size_t); + size_t candidate_size = 0; + uint8_t * rpc_buffer = nullptr; + const int size_in_mb = (1 << 20); + size_t probe_slots[] = { 1024, 1536, 2048 - 48, 2048 }; + size_t probe_counts = sizeof(probe_slots) / sizeof(size_t); for (size_t idx = 0; idx < probe_counts; idx++) { rpc_buffer = static_cast(alloc_rpcmem(probe_slots[idx] * size_in_mb, sizeof(void *))); if (!rpc_buffer) { - QNN_LOG_DEBUG("alloc rpcmem %d (MB) failure, %s", probe_slots[idx], strerror(errno)); + QNN_LOG_DEBUG("alloc rpcmem %d (MB) failure, %s\n", (int) probe_slots[idx], strerror(errno)); break; } else { candidate_size = probe_slots[idx]; @@ -297,27 +299,27 @@ int qnn_instance::qnn_init(const QnnSaver_Config_t **saver_config) { } _rpcmem_capacity = std::max(candidate_size, _rpcmem_capacity); - QNN_LOG_INFO("capacity of QNN rpc ion memory is about %d MB", _rpcmem_capacity); + QNN_LOG_INFO("capacity of QNN rpc ion memory is about %d MB\n", (int) _rpcmem_capacity); if (init_htp_perfinfra() != 0) { - QNN_LOG_WARN("initialize HTP performance failure"); + QNN_LOG_WARN("initialize HTP performance failure\n"); } if (set_rpc_polling() != 0) { - QNN_LOG_WARN("set RPC polling failure"); + QNN_LOG_WARN("set RPC polling failure\n"); } if (set_high_performance_mode() != 0) { - QNN_LOG_WARN("set HTP high performance mode failure"); + QNN_LOG_WARN("set HTP high performance mode failure\n"); } } - QNN_LOG_DEBUG("leave qnn_init"); + QNN_LOG_DEBUG("leave qnn_init\n"); return 0; } int qnn_instance::qnn_finalize() { - int ret_status = 0; - Qnn_ErrorHandle_t error = QNN_SUCCESS; + int ret_status = 0; + Qnn_ErrorHandle_t error = QNN_SUCCESS; if (_rpc_lib_handle) { if (_pfn_rpc_mem_deinit) { @@ -326,9 +328,9 @@ int qnn_instance::qnn_finalize() { } if (dl_unload(_rpc_lib_handle)) { - QNN_LOG_DEBUG("succeed to close rpcmem lib"); + QNN_LOG_DEBUG("succeed to close rpcmem lib\n"); } else { - QNN_LOG_WARN("failed to unload qualcomm's rpc lib, error:%s", dl_error()); + QNN_LOG_WARN("failed to unload qualcomm's rpc lib, error:%s\n", dl_error()); } } @@ -339,8 +341,8 @@ int qnn_instance::qnn_finalize() { if (_qnn_context_handle) { error = _qnn_interface->qnn_context_free(_qnn_context_handle, _qnn_profile_handle); if (error != QNN_SUCCESS) { - QNN_LOG_WARN("failed to free QNN context_handle: ID %u, error %d", _qnn_interface->get_backend_id(), - QNN_GET_ERROR_CODE(error)); + QNN_LOG_WARN("failed to free QNN context_handle: ID %u, error %d\n", _qnn_interface->get_backend_id(), + (int) QNN_GET_ERROR_CODE(error)); } _qnn_context_handle = nullptr; } @@ -348,8 +350,8 @@ int qnn_instance::qnn_finalize() { if (_qnn_profile_handle) { error = _qnn_interface->qnn_profile_free(_qnn_profile_handle); if (error != QNN_SUCCESS) { - QNN_LOG_WARN("failed to free QNN profile_handle: ID %u, error %d", _qnn_interface->get_backend_id(), - QNN_GET_ERROR_CODE(error)); + QNN_LOG_WARN("failed to free QNN profile_handle: ID %u, error %d\n", _qnn_interface->get_backend_id(), + (int) QNN_GET_ERROR_CODE(error)); } _qnn_profile_handle = nullptr; } @@ -357,8 +359,8 @@ int qnn_instance::qnn_finalize() { if (_qnn_device_handle) { error = _qnn_interface->qnn_device_free(_qnn_device_handle); if (error != QNN_SUCCESS) { - QNN_LOG_WARN("failed to free QNN device_handle: ID %u, error %d", _qnn_interface->get_backend_id(), - QNN_GET_ERROR_CODE(error)); + QNN_LOG_WARN("failed to free QNN device_handle: ID %u, error %d\n", _qnn_interface->get_backend_id(), + (int) QNN_GET_ERROR_CODE(error)); } _qnn_device_handle = nullptr; } @@ -366,17 +368,17 @@ int qnn_instance::qnn_finalize() { if (_qnn_backend_handle) { error = _qnn_interface->qnn_backend_free(_qnn_backend_handle); if (error != QNN_SUCCESS) { - QNN_LOG_WARN("failed to free QNN backend_handle: ID %u, error %d", _qnn_interface->get_backend_id(), - QNN_GET_ERROR_CODE(error)); + QNN_LOG_WARN("failed to free QNN backend_handle: ID %u, error %d\n", _qnn_interface->get_backend_id(), + (int) QNN_GET_ERROR_CODE(error)); } _qnn_backend_handle = nullptr; } - if (nullptr != _qnn_log_handle) { + if (_qnn_log_handle) { error = _qnn_interface->qnn_log_free(_qnn_log_handle); if (error != QNN_SUCCESS) { - QNN_LOG_WARN("failed to free QNN log_handle: ID %u, error %d", _qnn_interface->get_backend_id(), - QNN_GET_ERROR_CODE(error)); + QNN_LOG_WARN("failed to free QNN log_handle: ID %u, error %d\n", _qnn_interface->get_backend_id(), + (int) QNN_GET_ERROR_CODE(error)); } _qnn_log_handle = nullptr; } @@ -389,60 +391,60 @@ int qnn_instance::qnn_finalize() { } int qnn_instance::load_system() { - QNN_LOG_DEBUG("[%s]lib: %s", _backend_lib_name.c_str(), kQnnSystemLibName); + QNN_LOG_DEBUG("[%s]lib: %s\n", _backend_lib_name.c_str(), kQnnSystemLibName); auto system_lib_handle = load_lib_with_fallback(kQnnSystemLibName, _additional_lib_load_path); if (!system_lib_handle) { - QNN_LOG_WARN("can not load QNN library %s, error: %s", kQnnSystemLibName, dl_error()); + QNN_LOG_WARN("can not load QNN library %s, error: %s\n", kQnnSystemLibName, dl_error()); return 1; } - auto *get_providers = + auto * get_providers = dl_sym_typed(system_lib_handle, "QnnSystemInterface_getProviders"); if (!get_providers) { - QNN_LOG_WARN("can not load QNN symbol QnnSystemInterface_getProviders: %s", dl_error()); + QNN_LOG_WARN("can not load QNN symbol QnnSystemInterface_getProviders: %s\n", dl_error()); return 2; } - uint32_t num_providers = 0; - const QnnSystemInterface_t **provider_list = nullptr; - Qnn_ErrorHandle_t error = get_providers(&provider_list, &num_providers); + uint32_t num_providers = 0; + const QnnSystemInterface_t ** provider_list = nullptr; + Qnn_ErrorHandle_t error = get_providers(&provider_list, &num_providers); if (error != QNN_SUCCESS) { - QNN_LOG_WARN("failed to get providers, error %d", QNN_GET_ERROR_CODE(error)); + QNN_LOG_WARN("failed to get providers, error %d\n", (int) QNN_GET_ERROR_CODE(error)); return 3; } - QNN_LOG_DEBUG("num_providers: %d", num_providers); + QNN_LOG_DEBUG("num_providers: %d\n", num_providers); if (num_providers != _required_num_providers) { - QNN_LOG_WARN("providers is %d instead of required %d", num_providers, _required_num_providers); + QNN_LOG_WARN("providers is %d instead of required %d\n", (int) num_providers, (int) _required_num_providers); return 4; } if (!provider_list) { - QNN_LOG_WARN("can not get providers"); + QNN_LOG_WARN("can not get providers\n"); return 5; } QNN_SYSTEM_INTERFACE_VER_TYPE qnn_system_interface; - bool found_valid_system_interface = false; + bool found_valid_system_interface = false; for (size_t idx = 0; idx < num_providers; idx++) { if (QNN_SYSTEM_API_VERSION_MAJOR == provider_list[idx]->systemApiVersion.major && QNN_SYSTEM_API_VERSION_MINOR <= provider_list[idx]->systemApiVersion.minor) { found_valid_system_interface = true; - qnn_system_interface = provider_list[idx]->QNN_SYSTEM_INTERFACE_VER_NAME; + qnn_system_interface = provider_list[idx]->QNN_SYSTEM_INTERFACE_VER_NAME; break; } } if (!found_valid_system_interface) { - QNN_LOG_WARN("unable to find a valid qnn system interface"); + QNN_LOG_WARN("unable to find a valid qnn system interface\n"); return 6; } else { - QNN_LOG_DEBUG("find a valid qnn system interface"); + QNN_LOG_DEBUG("find a valid qnn system interface\n"); } auto qnn_sys_interface = std::make_shared(*provider_list[0], system_lib_handle); if (!qnn_sys_interface->is_valid()) { - QNN_LOG_WARN("failed to create QNN system interface"); + QNN_LOG_WARN("failed to create QNN system interface\n"); return 7; } @@ -450,79 +452,79 @@ int qnn_instance::load_system() { return 0; } -int qnn_instance::load_backend(std::string &lib_path, const QnnSaver_Config_t ** /*saver_config*/) { +int qnn_instance::load_backend(std::string & lib_path, const QnnSaver_Config_t ** /*saver_config*/) { Qnn_ErrorHandle_t error = QNN_SUCCESS; - QNN_LOG_DEBUG("lib_path:%s", lib_path.c_str()); + QNN_LOG_DEBUG("lib_path:%s\n", lib_path.c_str()); auto lib_handle = load_lib_with_fallback(lib_path, _additional_lib_load_path); if (!lib_handle) { - QNN_LOG_WARN("can not open QNN library %s, with error: %s", lib_path.c_str(), dl_error()); + QNN_LOG_WARN("can not open QNN library %s, with error: %s\n", lib_path.c_str(), dl_error()); return 1; } auto get_providers = dl_sym_typed(lib_handle, "QnnInterface_getProviders"); if (!get_providers) { - QNN_LOG_WARN("can not load symbol QnnInterface_getProviders : %s", dl_error()); + QNN_LOG_WARN("can not load symbol QnnInterface_getProviders : %s\n", dl_error()); return 2; } - std::uint32_t num_providers = 0; - const QnnInterface_t **provider_list = nullptr; - error = get_providers(&provider_list, &num_providers); + std::uint32_t num_providers = 0; + const QnnInterface_t ** provider_list = nullptr; + error = get_providers(&provider_list, &num_providers); if (error != QNN_SUCCESS) { - QNN_LOG_WARN("failed to get providers, error %d", QNN_GET_ERROR_CODE(error)); + QNN_LOG_WARN("failed to get providers, error %d\n", (int) QNN_GET_ERROR_CODE(error)); return 3; } - QNN_LOG_DEBUG("num_providers=%d", num_providers); + QNN_LOG_DEBUG("num_providers=%d\n", num_providers); if (num_providers != _required_num_providers) { - QNN_LOG_WARN("providers is %d instead of required %d", num_providers, _required_num_providers); + QNN_LOG_WARN("providers is %d instead of required %d\n", num_providers, _required_num_providers); return 4; } if (!provider_list) { - QNN_LOG_WARN("failed to get qnn interface providers"); + QNN_LOG_WARN("failed to get qnn interface providers\n"); return 5; } - bool found_valid_interface = false; + bool found_valid_interface = false; QNN_INTERFACE_VER_TYPE qnn_interface; for (size_t idx = 0; idx < num_providers; idx++) { if (QNN_API_VERSION_MAJOR == provider_list[idx]->apiVersion.coreApiVersion.major && QNN_API_VERSION_MINOR <= provider_list[idx]->apiVersion.coreApiVersion.minor) { found_valid_interface = true; - qnn_interface = provider_list[idx]->QNN_INTERFACE_VER_NAME; + qnn_interface = provider_list[idx]->QNN_INTERFACE_VER_NAME; break; } } if (!found_valid_interface) { - QNN_LOG_WARN("unable to find a valid qnn interface"); + QNN_LOG_WARN("unable to find a valid qnn interface\n"); return 6; } else { - QNN_LOG_DEBUG("find a valid qnn interface"); + QNN_LOG_DEBUG("find a valid qnn interface\n"); } - BackendIdType backend_id = provider_list[0]->backendId; + BackendIdType backend_id = provider_list[0]->backendId; _lib_path_to_backend_id[lib_path] = backend_id; if (_loaded_backend.count(backend_id) > 0) { - QNN_LOG_WARN("lib_path %s is loaded, but backend %d already exists", lib_path.c_str(), backend_id); + QNN_LOG_WARN("lib_path %s is loaded, but backend %d already exists\n", lib_path.c_str(), backend_id); } _loaded_backend[backend_id] = provider_list[0]; if (_loaded_lib_handle.count(backend_id) > 0) { - QNN_LOG_WARN("closing %p", _loaded_lib_handle[backend_id]); + QNN_LOG_WARN("closing %p\n", _loaded_lib_handle[backend_id]); if (!dl_unload(_loaded_lib_handle[backend_id])) { - QNN_LOG_WARN("fail to close %p with error %s", _loaded_lib_handle[backend_id], dl_error()); + QNN_LOG_WARN("fail to close %p with error %s\n", _loaded_lib_handle[backend_id], dl_error()); } } _loaded_lib_handle[backend_id] = lib_handle; - _backend_id = backend_id; + _backend_id = backend_id; return 0; } int qnn_instance::unload_backend() { - for (auto &it : _loaded_lib_handle) { + for (auto & it : _loaded_lib_handle) { if (!dl_unload(it.second)) { - QNN_LOG_WARN("failed to close QNN backend %d, error %s", it.first, dl_error()); + QNN_LOG_WARN("failed to close QNN backend %d, error %s\n", it.first, dl_error()); } } @@ -533,4 +535,4 @@ int qnn_instance::unload_backend() { return 0; } -} // namespace qnn +} // namespace qnn diff --git a/ggml/src/ggml-qnn/qnn-lib.hpp b/ggml/src/ggml-qnn/qnn-lib.hpp index 968df5bcf2..bb6006acda 100644 --- a/ggml/src/ggml-qnn/qnn-lib.hpp +++ b/ggml/src/ggml-qnn/qnn-lib.hpp @@ -24,7 +24,7 @@ #include #include -#include "dl_loader.hpp" +#include "dl-loader.hpp" #include "qnn-types.hpp" #include "utils.hpp" @@ -42,16 +42,15 @@ namespace qnn { #pragma GCC diagnostic ignored "-Wpedantic" class qnn_system_interface { - #define DEFINE_SHIM_FUNCTION_SYS_INTERFACE(F, pointer_name) \ - template \ - inline auto qnn_##F(Args... args) const { \ + template inline auto qnn_##F(Args... args) const { \ return (_qnn_sys_interface.QNN_SYSTEM_INTERFACE_VER_NAME.pointer_name)(std::forward(args)...); \ } -public: - qnn_system_interface(const QnnSystemInterface_t &qnn_sys_interface, dl_handler_t lib_handle); + public: + qnn_system_interface(const QnnSystemInterface_t & qnn_sys_interface, dl_handler_t lib_handle); ~qnn_system_interface(); + bool is_valid() const { return _qnn_system_handle != nullptr; } // QnnSystem @@ -61,27 +60,25 @@ public: DEFINE_SHIM_FUNCTION_SYS_INTERFACE(system_context_free, systemContextFree); -private: + private: qnn_system_interface(const qnn_system_interface &) = delete; - void operator=(const qnn_system_interface &) = delete; - qnn_system_interface(qnn_system_interface &&) = delete; - void operator=(qnn_system_interface &&) = delete; + void operator=(const qnn_system_interface &) = delete; + qnn_system_interface(qnn_system_interface &&) = delete; + void operator=(qnn_system_interface &&) = delete; const QnnSystemInterface_t _qnn_sys_interface = {}; - dl_handler_t _lib_handle = nullptr; - QnnSystemContext_Handle_t _qnn_system_handle = nullptr; + dl_handler_t _lib_handle = nullptr; + QnnSystemContext_Handle_t _qnn_system_handle = nullptr; }; class qnn_interface { - #define DEFINE_SHIM_FUNCTION_INTERFACE(F, pointer_name) \ - template \ - inline auto qnn_##F(Args... args) const { \ + template inline auto qnn_##F(Args... args) const { \ return (_qnn_interface.QNN_INTERFACE_VER_NAME.pointer_name)(std::forward(args)...); \ } -public: - qnn_interface(const QnnInterface_t &qnn_interface) : _qnn_interface(qnn_interface) {} + public: + qnn_interface(const QnnInterface_t & qnn_interface) : _qnn_interface(qnn_interface) {} // QnnBackend DEFINE_SHIM_FUNCTION_INTERFACE(backend_create, backendCreate); @@ -161,11 +158,11 @@ public: uint32_t get_backend_id() const { return _qnn_interface.backendId; } -private: - qnn_interface(const qnn_interface &) = delete; + private: + qnn_interface(const qnn_interface &) = delete; void operator=(const qnn_interface &) = delete; - qnn_interface(qnn_interface &&) = delete; - void operator=(qnn_interface &&) = delete; + qnn_interface(qnn_interface &&) = delete; + void operator=(qnn_interface &&) = delete; const QnnInterface_t _qnn_interface = {}; }; @@ -173,17 +170,19 @@ private: #pragma GCC diagnostic pop class qnn_instance { -public: + public: using BackendIdType = decltype(QnnInterface_t{}.backendId); - explicit qnn_instance(const std::string &lib_path, const std::string &backend_lib_name); + explicit qnn_instance(const std::string & lib_path, const std::string & backend_lib_name); + ~qnn_instance() {} - int qnn_init(const QnnSaver_Config_t **saver_config); + + int qnn_init(const QnnSaver_Config_t ** saver_config); int qnn_finalize(); std::shared_ptr get_qnn_interface() { if (!_qnn_interface) { - QNN_LOG_WARN("pls check why _qnn_interface is not loaded"); + QNN_LOG_WARN("pls check why _qnn_interface is not loaded\n"); } return _qnn_interface; } @@ -202,26 +201,26 @@ public: int init_htp_perfinfra() { QnnDevice_Infrastructure_t device_infra = nullptr; - auto error = _qnn_interface->qnn_device_get_infrastructure(&device_infra); + auto error = _qnn_interface->qnn_device_get_infrastructure(&device_infra); if (error != QNN_SUCCESS) { - QNN_LOG_WARN("failed to get qnn device infra"); + QNN_LOG_WARN("failed to get qnn device infra\n"); return 1; } else { - QNN_LOG_INFO("HTP backend perf_infrastructure creation ok"); + QNN_LOG_INFO("HTP backend perf_infrastructure creation ok\n"); } - QnnHtpDevice_Infrastructure_t *htp_infra = static_cast(device_infra); - QnnHtpDevice_PerfInfrastructure_t *htp_perfinfra = &htp_infra->perfInfra; - uint32_t power_configid = 1; - uint32_t device_id = 0; - uint32_t core_id = 0; + QnnHtpDevice_Infrastructure_t * htp_infra = static_cast(device_infra); + QnnHtpDevice_PerfInfrastructure_t * htp_perfinfra = &htp_infra->perfInfra; + uint32_t power_configid = 1; + uint32_t device_id = 0; + uint32_t core_id = 0; htp_perfinfra->createPowerConfigId(device_id, core_id, &power_configid); if (htp_infra->infraType != QNN_HTP_DEVICE_INFRASTRUCTURE_TYPE_PERF) { - QNN_LOG_INFO("HTP infra type = %d, which is not perf infra type", htp_infra->infraType); + QNN_LOG_INFO("HTP infra type = %d, which is not perf infra type\n", htp_infra->infraType); } else { - QNN_LOG_INFO("HTP infra type = %d, which is perf infra type", htp_infra->infraType); + QNN_LOG_INFO("HTP infra type = %d, which is perf infra type\n", htp_infra->infraType); } - _qnn_htp_perfinfra = htp_perfinfra; + _qnn_htp_perfinfra = htp_perfinfra; _qnn_power_configid = power_configid; return 0; @@ -231,7 +230,7 @@ public: if (_qnn_htp_perfinfra) { QnnHtpPerfInfrastructure_PowerConfig_t rpc_polling_time; memset(&rpc_polling_time, 0, sizeof(rpc_polling_time)); - rpc_polling_time.option = QNN_HTP_PERF_INFRASTRUCTURE_POWER_CONFIGOPTION_RPC_POLLING_TIME; + rpc_polling_time.option = QNN_HTP_PERF_INFRASTRUCTURE_POWER_CONFIGOPTION_RPC_POLLING_TIME; // use rpc polling time recommended 0-10000 us rpc_polling_time.rpcPollingTimeConfig = 9999; @@ -241,16 +240,16 @@ public: // use rpc control latency recommended 100 us, refer hexagon sdk rpc_control_latency.rpcControlLatencyConfig = 100; - const QnnHtpPerfInfrastructure_PowerConfig_t *power_configs[] = {&rpc_polling_time, &rpc_control_latency, - nullptr}; + const QnnHtpPerfInfrastructure_PowerConfig_t * power_configs[] = { &rpc_polling_time, &rpc_control_latency, + nullptr }; Qnn_ErrorHandle_t qnn_status = _qnn_htp_perfinfra->setPowerConfig(_qnn_power_configid, power_configs); if (qnn_status != QNN_SUCCESS) { - QNN_LOG_WARN("set htp perf failed"); + QNN_LOG_WARN("set htp perf failed\n"); } else { - QNN_LOG_DEBUG("set htp perf ok"); + QNN_LOG_DEBUG("set htp perf ok\n"); } } else { - QNN_LOG_WARN("can't set htp perf"); + QNN_LOG_WARN("can't set htp perf\n"); } return 0; @@ -258,7 +257,7 @@ public: int set_high_performance_mode() { if (nullptr == _qnn_htp_perfinfra) { - QNN_LOG_WARN("perf intra is null"); + QNN_LOG_WARN("perf intra is null\n"); return 1; } @@ -266,83 +265,83 @@ public: memset(&power_config, 0, sizeof(power_config)); power_config.option = QNN_HTP_PERF_INFRASTRUCTURE_POWER_CONFIGOPTION_DCVS_V3; - power_config.dcvsV3Config.setDcvsEnable = 1; - power_config.dcvsV3Config.dcvsEnable = 0; - power_config.dcvsV3Config.contextId = _qnn_power_configid; - power_config.dcvsV3Config.powerMode = QNN_HTP_PERF_INFRASTRUCTURE_POWERMODE_PERFORMANCE_MODE; - power_config.dcvsV3Config.setSleepLatency = 1; // true to consider Latency parameter otherwise false - power_config.dcvsV3Config.sleepLatency = 40; - power_config.dcvsV3Config.setBusParams = 1; // true to consider Bus parameter otherwise false - power_config.dcvsV3Config.setCoreParams = 1; // true to consider Core parameter otherwise false - power_config.dcvsV3Config.sleepDisable = 1; // true to consider sleep/LPM modes, false to enable + power_config.dcvsV3Config.setDcvsEnable = 1; + power_config.dcvsV3Config.dcvsEnable = 0; + power_config.dcvsV3Config.contextId = _qnn_power_configid; + power_config.dcvsV3Config.powerMode = QNN_HTP_PERF_INFRASTRUCTURE_POWERMODE_PERFORMANCE_MODE; + power_config.dcvsV3Config.setSleepLatency = 1; // true to consider Latency parameter otherwise false + power_config.dcvsV3Config.sleepLatency = 40; + power_config.dcvsV3Config.setBusParams = 1; // true to consider Bus parameter otherwise false + power_config.dcvsV3Config.setCoreParams = 1; // true to consider Core parameter otherwise false + power_config.dcvsV3Config.sleepDisable = 1; // true to consider sleep/LPM modes, false to enable power_config.dcvsV3Config.setSleepDisable = - 1; // true to consider sleep disable/enable parameter otherwise false set sleep latency parameter + 1; // true to consider sleep disable/enable parameter otherwise false set sleep latency parameter // set Bus Clock Parameters - power_config.dcvsV3Config.busVoltageCornerMin = DCVS_VOLTAGE_VCORNER_MAX_VOLTAGE_CORNER; - power_config.dcvsV3Config.busVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_MAX_VOLTAGE_CORNER; - power_config.dcvsV3Config.busVoltageCornerMax = DCVS_VOLTAGE_VCORNER_MAX_VOLTAGE_CORNER; + power_config.dcvsV3Config.busVoltageCornerMin = DCVS_VOLTAGE_VCORNER_MAX_VOLTAGE_CORNER; + power_config.dcvsV3Config.busVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_MAX_VOLTAGE_CORNER; + power_config.dcvsV3Config.busVoltageCornerMax = DCVS_VOLTAGE_VCORNER_MAX_VOLTAGE_CORNER; // set Core Clock Parameters - power_config.dcvsV3Config.coreVoltageCornerMin = DCVS_VOLTAGE_VCORNER_MAX_VOLTAGE_CORNER; + power_config.dcvsV3Config.coreVoltageCornerMin = DCVS_VOLTAGE_VCORNER_MAX_VOLTAGE_CORNER; power_config.dcvsV3Config.coreVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_MAX_VOLTAGE_CORNER; - power_config.dcvsV3Config.coreVoltageCornerMax = DCVS_VOLTAGE_VCORNER_MAX_VOLTAGE_CORNER; + power_config.dcvsV3Config.coreVoltageCornerMax = DCVS_VOLTAGE_VCORNER_MAX_VOLTAGE_CORNER; // set power config with different performance parameters - const QnnHtpPerfInfrastructure_PowerConfig_t *power_configs[] = {&power_config, nullptr}; - Qnn_ErrorHandle_t qnn_status = QNN_SUCCESS; + const QnnHtpPerfInfrastructure_PowerConfig_t * power_configs[] = { &power_config, nullptr }; + Qnn_ErrorHandle_t qnn_status = QNN_SUCCESS; qnn_status = _qnn_htp_perfinfra->setPowerConfig(_qnn_power_configid, power_configs); if (qnn_status != QNN_SUCCESS) { - QNN_LOG_WARN("set htp high performance mode failed"); + QNN_LOG_WARN("set htp high performance mode failed\n"); } else { - QNN_LOG_DEBUG("set htp high performance mode ok"); + QNN_LOG_DEBUG("set htp high performance mode ok\n"); } return 0; } - std::string &get_qnn_graph_name() { return _graph_name; } + std::string & get_qnn_graph_name() { return _graph_name; } bool is_rpcmem_initialized() { return _rpcmem_initialized; } size_t get_rpcmem_capacity() { return _rpcmem_capacity; } - void *alloc_rpcmem(size_t bytes, size_t alignment) { + void * alloc_rpcmem(size_t bytes, size_t alignment) { if (!_rpcmem_initialized) { - QNN_LOG_WARN("rpc memory not initialized"); + QNN_LOG_WARN("rpc memory not initialized\n"); return nullptr; } - auto allocate_bytes = static_cast(bytes + alignment); - void *buf = _pfn_rpc_mem_alloc(RPCMEM_HEAP_ID_SYSTEM, RPCMEM_DEFAULT_FLAGS, (int)allocate_bytes); + auto allocate_bytes = static_cast(bytes + alignment); + void * buf = _pfn_rpc_mem_alloc(RPCMEM_HEAP_ID_SYSTEM, RPCMEM_DEFAULT_FLAGS, (int) allocate_bytes); if (!buf) { - QNN_LOG_WARN("failed to allocate rpc memory, size: %d MB", (int)(allocate_bytes / (1 << 20))); + QNN_LOG_WARN("failed to allocate rpc memory, size: %d MB\n", (int) (allocate_bytes / (1 << 20))); return nullptr; } auto aligned_buf = reinterpret_cast(qnn::align_to(alignment, reinterpret_cast(buf))); - bool status = _rpcmem_store_map.insert(std::pair(aligned_buf, buf)).second; + bool status = _rpcmem_store_map.insert(std::pair(aligned_buf, buf)).second; if (!status) { - QNN_LOG_WARN("failed to allocate rpc memory"); + QNN_LOG_WARN("failed to allocate rpc memory\n"); _pfn_rpc_mem_free(buf); } return aligned_buf; } - void free_rpcmem(void *buf) { + void free_rpcmem(void * buf) { if (!_rpcmem_initialized) { - QNN_LOG_WARN("rpc memory not initialized"); + QNN_LOG_WARN("rpc memory not initialized\n"); } else if (_rpcmem_store_map.count(buf) == 0) { - QNN_LOG_WARN("no allocated tensor"); + QNN_LOG_WARN("no allocated tensor\n"); } else { _pfn_rpc_mem_free(_rpcmem_store_map[buf]); _rpcmem_store_map.erase(buf); } } - int32_t rpcmem_to_fd(void *buf) { + int32_t rpcmem_to_fd(void * buf) { int32_t mem_fd = -1; if (!is_rpcmem_initialized()) { - QNN_LOG_WARN("rpc memory not initialized"); + QNN_LOG_WARN("rpc memory not initialized\n"); } else { mem_fd = _pfn_rpc_mem_to_fd(buf); } @@ -350,74 +349,80 @@ public: return mem_fd; } - Qnn_MemHandle_t register_rpcmem(void *p_data, const uint32_t rank, uint32_t *dimensions, Qnn_DataType_t data_type) { + Qnn_MemHandle_t register_rpcmem(void * p_data, const uint32_t rank, uint32_t * dimensions, + Qnn_DataType_t data_type) { if (!p_data) { - QNN_LOG_WARN("invalid param"); + QNN_LOG_WARN("invalid param\n"); return nullptr; } if (!is_rpcmem_initialized()) { - QNN_LOG_WARN("rpc memory not initialized"); + QNN_LOG_WARN("rpc memory not initialized\n"); return nullptr; } if (is_rpcmem_registered(p_data)) { - QNN_LOG_WARN("rpc memory already registered"); + QNN_LOG_WARN("rpc memory already registered\n"); return _qnn_rpc_buffer_to_handles[p_data]; } auto mem_fd = rpcmem_to_fd(p_data); if (mem_fd == -1) { - QNN_LOG_WARN("failed to get file descriptor"); + QNN_LOG_WARN("failed to get file descriptor\n"); return nullptr; } - QNN_LOG_DEBUG("mem_fd %d", mem_fd); - Qnn_MemDescriptor_t descriptor = {{rank, dimensions, nullptr}, data_type, QNN_MEM_TYPE_ION, {{mem_fd}}}; + QNN_LOG_DEBUG("mem_fd %d\n", mem_fd); + Qnn_MemDescriptor_t descriptor = { + { rank, dimensions, nullptr }, + data_type, QNN_MEM_TYPE_ION, { { mem_fd } } + }; Qnn_MemHandle_t handle = nullptr; - auto error = _qnn_interface->qnn_mem_register(_qnn_context_handle, &descriptor, - /*numDescriptors=*/1, &handle); + auto error = _qnn_interface->qnn_mem_register(_qnn_context_handle, &descriptor, + /*numDescriptors=*/1, &handle); if (error != QNN_SUCCESS) { - QNN_LOG_WARN("failed to register shared memory, error %d, %s", QNN_GET_ERROR_CODE(error), strerror(error)); + QNN_LOG_WARN("failed to register shared memory, error %d, %s\n", (int) QNN_GET_ERROR_CODE(error), + strerror(error)); return nullptr; } - _qnn_rpc_buffer_to_handles.insert({p_data, handle}); - QNN_LOG_DEBUG("successfully register shared memory handler: %p", handle); + _qnn_rpc_buffer_to_handles.insert({ p_data, handle }); + QNN_LOG_DEBUG("successfully register shared memory handler: %p\n", handle); return handle; } void unregister_rpcmem(Qnn_MemHandle_t mem_handle) { auto error = _qnn_interface->qnn_mem_de_register(&mem_handle, 1); if (error != QNN_SUCCESS) { - QNN_LOG_WARN("failed to unregister shared memory, error %d", QNN_GET_ERROR_CODE(error)); + QNN_LOG_WARN("failed to unregister shared memory, error %d\n", (int) QNN_GET_ERROR_CODE(error)); } auto it = std::find_if(_qnn_rpc_buffer_to_handles.begin(), _qnn_rpc_buffer_to_handles.end(), - [mem_handle](const auto &kv) { return kv.second == mem_handle; }); + [mem_handle](const auto & kv) { return kv.second == mem_handle; }); if (it == _qnn_rpc_buffer_to_handles.end()) { - QNN_LOG_WARN("failed to find shared memory handler: %p", mem_handle); + QNN_LOG_WARN("failed to find shared memory handler: %p\n", mem_handle); return; } _qnn_rpc_buffer_to_handles.erase(it); } - bool is_rpcmem_allocated(void *buf) { return _rpcmem_store_map.count(buf) != 0; } - bool is_rpcmem_registered(void *buf) { return _qnn_rpc_buffer_to_handles.count(buf) != 0U; } + bool is_rpcmem_allocated(void * buf) { return _rpcmem_store_map.count(buf) != 0; } - const qnn::qcom_socinfo &get_soc_info() { return _soc_info; } + bool is_rpcmem_registered(void * buf) { return _qnn_rpc_buffer_to_handles.count(buf) != 0U; } -private: + const qnn::qcom_socinfo & get_soc_info() { return _soc_info; } + + private: int load_system(); - int load_backend(std::string &lib_path, const QnnSaver_Config_t ** /*saver_config*/); + int load_backend(std::string & lib_path, const QnnSaver_Config_t ** /*saver_config*/); int unload_backend(); -private: + private: static constexpr const int _required_num_providers = 1; - std::string _additional_lib_load_path; - std::string _backend_lib_name; + std::string _additional_lib_load_path; + std::string _backend_lib_name; BackendIdType _backend_id; QnnLog_Level_t _qnn_log_level = QNN_LOG_LEVEL_DEBUG; @@ -429,7 +434,7 @@ private: #endif std::shared_ptr _qnn_sys_interface; - std::shared_ptr _qnn_interface; + std::shared_ptr _qnn_interface; Qnn_GraphHandle_t _qnn_graph_handle = nullptr; @@ -443,29 +448,29 @@ private: Qnn_ContextHandle_t _qnn_context_handle = nullptr; - QnnHtpDevice_PerfInfrastructure_t *_qnn_htp_perfinfra = nullptr; - uint32_t _qnn_power_configid = 1; + QnnHtpDevice_PerfInfrastructure_t * _qnn_htp_perfinfra = nullptr; + uint32_t _qnn_power_configid = 1; std::unordered_map _qnn_rpc_buffer_to_handles; - std::mutex _init_mutex; - std::unordered_map _loaded_lib_handle; - std::unordered_map _lib_path_to_backend_id; + std::mutex _init_mutex; + std::unordered_map _loaded_lib_handle; + std::unordered_map _lib_path_to_backend_id; std::unordered_map _loaded_backend; - dl_handler_t _rpc_lib_handle = nullptr; - std::atomic_bool _rpcmem_initialized{false}; - qnn::pfn_rpc_mem_alloc _pfn_rpc_mem_alloc = nullptr; - qnn::pfn_rpc_mem_free _pfn_rpc_mem_free = nullptr; - qnn::pfn_rpc_mem_to_fd _pfn_rpc_mem_to_fd = nullptr; - qnn::pfn_rpc_mem_init _pfn_rpc_mem_init = nullptr; - qnn::pfn_rpc_mem_deinit _pfn_rpc_mem_deinit = nullptr; + dl_handler_t _rpc_lib_handle = nullptr; + std::atomic_bool _rpcmem_initialized{ false }; + qnn::pfn_rpc_mem_alloc _pfn_rpc_mem_alloc = nullptr; + qnn::pfn_rpc_mem_free _pfn_rpc_mem_free = nullptr; + qnn::pfn_rpc_mem_to_fd _pfn_rpc_mem_to_fd = nullptr; + qnn::pfn_rpc_mem_init _pfn_rpc_mem_init = nullptr; + qnn::pfn_rpc_mem_deinit _pfn_rpc_mem_deinit = nullptr; std::unordered_map _rpcmem_store_map; - size_t _rpcmem_capacity = 512; + size_t _rpcmem_capacity = 512; std::string _graph_name; qnn::qcom_socinfo _soc_info = {}; }; -} // namespace qnn +} // namespace qnn diff --git a/ggml/src/ggml-qnn/qnn-types.hpp b/ggml/src/ggml-qnn/qnn-types.hpp index ec30602843..8284036bb7 100644 --- a/ggml/src/ggml-qnn/qnn-types.hpp +++ b/ggml/src/ggml-qnn/qnn-types.hpp @@ -20,48 +20,48 @@ enum sdk_profile_level { profile_off = 0, profile_basic, profile_detail }; enum qcom_htp_arch { NONE = 0, - V68 = 68, - V69 = 69, - V73 = 73, - V75 = 75, - V79 = 79, // SD 8 Gen 4 (SM8750) + V68 = 68, + V69 = 69, + V73 = 73, + V75 = 75, + V79 = 79, // SD 8 Gen 4 (SM8750) }; enum qcom_chipset { UNKNOWN_SM = 0, - SM8450 = 36, // v69, SD 8 Gen 1 - SM8475 = 42, // v69, SD 8+ Gen 1 - SM8550 = 43, // v73, SD 8 Gen 2 - SSG2115P = 46, // v73 - SM8650 = 57, // v75, SD 8 Gen 3 - SA8295 = 39, // v68 - SM8750 = 69, // v79, SD 8 Gen 4 + SM8450 = 36, // v69, SD 8 Gen 1 + SM8475 = 42, // v69, SD 8+ Gen 1 + SM8550 = 43, // v73, SD 8 Gen 2 + SSG2115P = 46, // v73 + SM8650 = 57, // v75, SD 8 Gen 3 + SA8295 = 39, // v68 + SM8750 = 69, // v79, SD 8 Gen 4 }; struct qcom_socinfo { uint32_t soc_model; - size_t htp_arch; - size_t vtcm_size_in_mb; + size_t htp_arch; + size_t vtcm_size_in_mb; }; -using pfn_rpc_mem_init = void (*)(void); +using pfn_rpc_mem_init = void (*)(void); using pfn_rpc_mem_deinit = void (*)(void); -using pfn_rpc_mem_alloc = void *(*)(int, uint32_t, int); -using pfn_rpc_mem_free = void (*)(void *); -using pfn_rpc_mem_to_fd = int (*)(void *); +using pfn_rpc_mem_alloc = void * (*) (int, uint32_t, int); +using pfn_rpc_mem_free = void (*)(void *); +using pfn_rpc_mem_to_fd = int (*)(void *); -using pfn_qnnsaver_initialize = decltype(QnnSaver_initialize); -using pfn_qnninterface_getproviders = decltype(QnnInterface_getProviders); +using pfn_qnnsaver_initialize = decltype(QnnSaver_initialize); +using pfn_qnninterface_getproviders = decltype(QnnInterface_getProviders); using pfn_qnnsysteminterface_getproviders = decltype(QnnSystemInterface_getProviders); -} // namespace qnn +} // namespace qnn -#define RPCMEM_DEFAULT_FLAGS 1 +#define RPCMEM_DEFAULT_FLAGS 1 #define RPCMEM_HEAP_ID_SYSTEM 25 -#define DISABLE_COPY(class_name) \ - class_name(const class_name &) = delete; \ +#define DISABLE_COPY(class_name) \ + class_name(const class_name &) = delete; \ void operator=(const class_name &) = delete -#define DISABLE_MOVE(class_name) \ - class_name(class_name &&) = delete; \ +#define DISABLE_MOVE(class_name) \ + class_name(class_name &&) = delete; \ void operator=(class_name &&) = delete diff --git a/ggml/src/ggml-qnn/tensor.hpp b/ggml/src/ggml-qnn/tensor.hpp index 423c3ba7fa..660223caf7 100644 --- a/ggml/src/ggml-qnn/tensor.hpp +++ b/ggml/src/ggml-qnn/tensor.hpp @@ -9,9 +9,8 @@ #include #include -#include "ggml-qnn.h" - #include "buffer.hpp" +#include "ggml-qnn.h" #include "logger.hpp" #include "qnn-lib.hpp" #include "utils.hpp" @@ -21,14 +20,17 @@ namespace qnn { static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS should be 4"); class ggml_qnn_tensor : public std::enable_shared_from_this { -public: + public: typedef enum _tensor_type { INPUT, OUTPUT, INTERMEDIATE, PARAMETER, BIDIRECTION } tensor_type_t; - explicit ggml_qnn_tensor(tensor_type_t tensor_type, const std::string &name, - const qnn_dimension_array_t &dimensions, Qnn_DataType_t data_type, int rank, + explicit ggml_qnn_tensor(tensor_type_t tensor_type, const std::string & name, + const qnn_dimension_array_t & dimensions, Qnn_DataType_t data_type, int rank, QNNBackend device, Qnn_GraphHandle_t graph_handle, - std::shared_ptr qnn_instance) - : _tensor_name(name), _device(device), _qnn_instance(qnn_instance), _graph_handle(graph_handle) { + std::shared_ptr qnn_instance) : + _tensor_name(name), + _device(device), + _qnn_instance(qnn_instance), + _graph_handle(graph_handle) { if (!_tensor_name.empty()) { QNN_TENSOR_SET_NAME(_qnn_tensor, _tensor_name.c_str()); } @@ -37,23 +39,24 @@ public: QNN_TENSOR_SET_DIMENSIONS(_qnn_tensor, _dimensions.data()); QNN_TENSOR_SET_DATA_FORMAT(_qnn_tensor, QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER); update_params_from_ggml_tensor(tensor_type, data_type, rank); - QNN_LOG_DEBUG("[%s][%s]created, rank: %d, dims: [%d, %d, %d, %d], type: %s", get_backend_name(device), - _tensor_name.c_str(), rank, (int)_dimensions[0], (int)_dimensions[1], (int)_dimensions[2], - (int)_dimensions[3], qnn_datatype_to_string(data_type)); + QNN_LOG_DEBUG("[%s][%s]created, rank: %d, dims: [%d, %d, %d, %d], type: %s\n", get_backend_name(device), + _tensor_name.c_str(), rank, (int) _dimensions[0], (int) _dimensions[1], (int) _dimensions[2], + (int) _dimensions[3], qnn_datatype_to_string(data_type)); } - explicit ggml_qnn_tensor(tensor_type_t tensor_type, const std::string &name, - const ggml_dimension_array_t &dimensions, ggml_type data_type, int rank, QNNBackend device, - Qnn_GraphHandle_t graph_handle, std::shared_ptr qnn_instance) - : ggml_qnn_tensor(tensor_type, name, get_internal_dimension(dimensions, rank), - qnn_datatype_from_ggml_datatype(data_type), rank, device, graph_handle, qnn_instance) {} + explicit ggml_qnn_tensor(tensor_type_t tensor_type, const std::string & name, + const ggml_dimension_array_t & dimensions, ggml_type data_type, int rank, + QNNBackend device, Qnn_GraphHandle_t graph_handle, + std::shared_ptr qnn_instance) : + ggml_qnn_tensor(tensor_type, name, get_internal_dimension(dimensions, rank), + qnn_datatype_from_ggml_datatype(data_type), rank, device, graph_handle, qnn_instance) {} ~ggml_qnn_tensor() { _rpc_buffer.reset(); unbind(); } - bool set_data_buffer(const uint8_t *buffer, const size_t buffer_size) { + bool set_data_buffer(const uint8_t * buffer, const size_t buffer_size) { auto qnn_buffer = std::make_shared(buffer, buffer_size); if (bind_buffer_impl(qnn_buffer)) { return true; @@ -74,71 +77,72 @@ public: bool alloc_qnn_tensor_id() { if (QNN_TENSOR_GET_ID(_qnn_tensor)) { - QNN_LOG_DEBUG("[%s]tensor already has a id: %d", _tensor_name.c_str(), QNN_TENSOR_GET_ID(_qnn_tensor)); + QNN_LOG_DEBUG("[%s]tensor already has a id: %d\n", _tensor_name.c_str(), QNN_TENSOR_GET_ID(_qnn_tensor)); return true; } - Qnn_Tensor_t qnn_tensor = _qnn_tensor; - auto qnn_interface = _qnn_instance->get_qnn_interface(); - auto error = qnn_interface->qnn_tensor_create_graph_tensor(_graph_handle, &qnn_tensor); + Qnn_Tensor_t qnn_tensor = _qnn_tensor; + auto qnn_interface = _qnn_instance->get_qnn_interface(); + auto error = qnn_interface->qnn_tensor_create_graph_tensor(_graph_handle, &qnn_tensor); if (error != QNN_SUCCESS) { - QNN_LOG_WARN("[%s]allocate id failed , error: %d", _tensor_name.c_str(), error); + QNN_LOG_WARN("[%s]allocate id failed , error: %d\n", _tensor_name.c_str(), (int) error); return false; } QNN_TENSOR_SET_ID(_qnn_tensor, QNN_TENSOR_GET_ID(qnn_tensor)); - QNN_LOG_DEBUG("[%s][%s]allocated id: %d, rank: %d", get_backend_name(_device), _tensor_name.c_str(), + QNN_LOG_DEBUG("[%s][%s]allocated id: %d, rank: %d\n", get_backend_name(_device), _tensor_name.c_str(), QNN_TENSOR_GET_ID(qnn_tensor), QNN_TENSOR_GET_RANK(qnn_tensor)); return true; } - bool bind_ggml_tensor(ggml_tensor *tensor) { + bool bind_ggml_tensor(ggml_tensor * tensor) { if (!_can_unbind) { - QNN_LOG_DEBUG("[%s]already has buffer storage, skip bind", _tensor_name.c_str()); + QNN_LOG_DEBUG("[%s]already has buffer storage, skip bind\n", _tensor_name.c_str()); return true; } #ifndef NDEBUG if (tensor->view_src) { - auto *src = tensor->view_src; - QNN_LOG_DEBUG("[%s]tensor(%s_%dx%dx%dx%d) is a view, src: %s_%dx%dx%dx%d", get_backend_name(_device), - tensor->name, tensor->ne[0], tensor->ne[1], tensor->ne[2], tensor->ne[3], src->name, - src->ne[0], src->ne[1], src->ne[2], src->ne[3]); + auto * src = tensor->view_src; + QNN_LOG_DEBUG("[%s]tensor(%s_%dx%dx%dx%d) is a view, src: %s_%dx%dx%dx%d\n", get_backend_name(_device), + tensor->name, (int) tensor->ne[0], (int) tensor->ne[1], (int) tensor->ne[2], + (int) tensor->ne[3], src->name, (int) src->ne[0], (int) src->ne[1], (int) src->ne[2], + (int) src->ne[3]); } #endif auto buffer = std::make_shared(reinterpret_cast(tensor->data), ggml_nbytes(tensor)); if (!bind_buffer_impl(buffer)) { - QNN_LOG_WARN("[%s]failed to bind ggml tensor(%s)", _tensor_name.c_str(), ggml_get_name(tensor)); + QNN_LOG_WARN("[%s]failed to bind ggml tensor(%s)\n", _tensor_name.c_str(), ggml_get_name(tensor)); return false; } - QNN_LOG_DEBUG("[%s][%s]bind to ggml tensor(%s)", get_backend_name(_device), _tensor_name.c_str(), + QNN_LOG_DEBUG("[%s][%s]bind to ggml tensor(%s)\n", get_backend_name(_device), _tensor_name.c_str(), ggml_get_name(tensor)); tensor->extra = this; - _ggml_tensor = tensor; + _ggml_tensor = tensor; return true; } bool unbind() { if (!_graph_handle) { - QNN_LOG_WARN("[%s]not bound to any graph", _tensor_name.c_str()); + QNN_LOG_WARN("[%s]not bound to any graph\n", _tensor_name.c_str()); return false; } if (!_buffer) { - QNN_LOG_DEBUG("[%s]unbind to ggml tensor", _tensor_name.c_str()); + QNN_LOG_DEBUG("[%s]unbind to ggml tensor\n", _tensor_name.c_str()); return true; } if (!read_from_qnn_tensor()) { - QNN_LOG_WARN("[%s]read from qnn tensor failed", _tensor_name.c_str()); + QNN_LOG_WARN("[%s]read from qnn tensor failed\n", _tensor_name.c_str()); return false; } if (!_can_unbind) { - QNN_LOG_DEBUG("[%s]already has buffer storage, stop unbind", _tensor_name.c_str()); + QNN_LOG_DEBUG("[%s]already has buffer storage, stop unbind\n", _tensor_name.c_str()); return true; } @@ -146,42 +150,46 @@ public: QNN_TENSOR_SET_MEM_TYPE(_qnn_tensor, QNN_TENSORMEMTYPE_RAW); Qnn_ClientBuffer_t client_buf = {}; QNN_TENSOR_SET_CLIENT_BUF(_qnn_tensor, client_buf); - QNN_LOG_DEBUG("[%s]clear client buffer", _tensor_name.c_str()); + QNN_LOG_DEBUG("[%s]clear client buffer\n", _tensor_name.c_str()); } - QNN_LOG_DEBUG("[%s][%s]unbind from buffer: %p, size: %d", get_backend_name(_device), _tensor_name.c_str(), - _buffer.get(), (int)_buffer->get_size()); + QNN_LOG_DEBUG("[%s][%s]unbind from buffer: %p, size: %d\n", get_backend_name(_device), _tensor_name.c_str(), + (void *) _buffer.get(), (int) _buffer->get_size()); _buffer.reset(); if (_ggml_tensor) { _ggml_tensor->extra = nullptr; - _ggml_tensor = nullptr; + _ggml_tensor = nullptr; } return true; } - const Qnn_Tensor_t &get_qnn_tensor() const { return _qnn_tensor; } + const Qnn_Tensor_t & get_qnn_tensor() const { return _qnn_tensor; } + Qnn_DataType_t get_data_type() const { return QNN_TENSOR_GET_DATA_TYPE(_qnn_tensor); } - const qnn_dimension_array_t &get_dimensions() const { return _dimensions; } + + const qnn_dimension_array_t & get_dimensions() const { return _dimensions; } + uint32_t get_rank() const { return QNN_TENSOR_GET_RANK(_qnn_tensor); } + uint32_t get_qnn_tensor_id() const { return QNN_TENSOR_GET_ID(_qnn_tensor); } -private: + private: bool bind_buffer_impl(qnn_buffer_ptr buffer) { if (_buffer) { if (_buffer != buffer) { - QNN_LOG_WARN("[%s]has been bound to another buffer %p", _tensor_name.c_str(), _buffer.get()); + QNN_LOG_WARN("[%s]has been bound to another buffer %p\n", _tensor_name.c_str(), (void *) _buffer.get()); return false; } - QNN_LOG_DEBUG("[%s]already bound to same ggml tensor %p", _tensor_name.c_str(), _buffer.get()); + QNN_LOG_DEBUG("[%s]already bound to same ggml tensor %p\n", _tensor_name.c_str(), (void *) _buffer.get()); return true; } if (QNN_TENSOR_GET_TYPE(_qnn_tensor) == QNN_TENSOR_TYPE_NATIVE) { - QNN_LOG_DEBUG("[%s]tensor type(%d) not READ/WRITE, skipping", _tensor_name.c_str(), - (int)QNN_TENSOR_TYPE_NATIVE); + QNN_LOG_DEBUG("[%s]tensor type(%d) not READ/WRITE, skipping\n", _tensor_name.c_str(), + (int) QNN_TENSOR_TYPE_NATIVE); return true; } @@ -191,7 +199,7 @@ private: _qnn_instance, buffer->get_size(), QNN_TENSOR_GET_RANK(_qnn_tensor), QNN_TENSOR_GET_DIMENSIONS(_qnn_tensor), QNN_TENSOR_GET_DATA_TYPE(_qnn_tensor)); if (!rpc_buffer->is_valid()) { - QNN_LOG_WARN("[%s][%s]alloc rpc mem failed", get_backend_name(_device), _tensor_name.c_str()); + QNN_LOG_WARN("[%s][%s]alloc rpc mem failed\n", get_backend_name(_device), _tensor_name.c_str()); return false; } @@ -201,38 +209,38 @@ private: QNN_TENSOR_SET_MEM_TYPE(_qnn_tensor, QNN_TENSORMEMTYPE_MEMHANDLE); auto mem_handle = _rpc_buffer->get_mem_handle(); if (!mem_handle) { - QNN_LOG_WARN("[%s][%s]can't find rpcmem from qnn mem handle", get_backend_name(_device), + QNN_LOG_WARN("[%s][%s]can't find rpcmem from qnn mem handle\n", get_backend_name(_device), _tensor_name.c_str()); return false; } QNN_TENSOR_SET_MEM_HANDLE(_qnn_tensor, mem_handle); - QNN_LOG_DEBUG("[%s][%s]use mem handle %p", get_backend_name(_device), _tensor_name.c_str(), + QNN_LOG_DEBUG("[%s][%s]use mem handle %p\n", get_backend_name(_device), _tensor_name.c_str(), QNN_TENSOR_GET_MEM_HANDLE(_qnn_tensor)); } else { QNN_TENSOR_SET_MEM_TYPE(_qnn_tensor, QNN_TENSORMEMTYPE_RAW); - Qnn_ClientBuffer_t client_buf = {buffer->get_buffer(), (uint32_t)buffer->get_size()}; + Qnn_ClientBuffer_t client_buf = { buffer->get_buffer(), (uint32_t) buffer->get_size() }; QNN_TENSOR_SET_CLIENT_BUF(_qnn_tensor, client_buf); - QNN_LOG_DEBUG("[%s]use client buffer %p size %d", _tensor_name.c_str(), client_buf.data, - (int)client_buf.dataSize); + QNN_LOG_DEBUG("[%s]use client buffer %p size %d\n", _tensor_name.c_str(), client_buf.data, + (int) client_buf.dataSize); } _buffer = buffer; if (!write_to_qnn_tensor()) { - QNN_LOG_WARN("[%s]write to qnn tensor failed", _tensor_name.c_str()); + QNN_LOG_WARN("[%s]write to qnn tensor failed\n", _tensor_name.c_str()); return false; } - QNN_LOG_DEBUG("[%s][%s]bind to buffer: %p, size: %d", get_backend_name(_device), _tensor_name.c_str(), - buffer.get(), (int)buffer->get_size()); + QNN_LOG_DEBUG("[%s][%s]bind to buffer: %p, size: %d\n", get_backend_name(_device), _tensor_name.c_str(), + (void *) buffer.get(), (int) buffer->get_size()); return true; } bool write_to_qnn_tensor() { auto tensor_type = QNN_TENSOR_GET_TYPE(_qnn_tensor); if (tensor_type != QNN_TENSOR_TYPE_APP_WRITE && tensor_type != QNN_TENSOR_TYPE_APP_READWRITE) { - QNN_LOG_DEBUG("[%s]tensor type(%d) not WRITE", _tensor_name.c_str(), (int)tensor_type); + QNN_LOG_DEBUG("[%s]tensor type(%d) not WRITE\n", _tensor_name.c_str(), (int) tensor_type); return true; } @@ -241,14 +249,14 @@ private: } // For CPU and GPU, the data is already in the tensor. - QNN_LOG_DEBUG("[%s][%s]write tensor to qnn", get_backend_name(_device), _tensor_name.c_str()); + QNN_LOG_DEBUG("[%s][%s]write tensor to qnn\n", get_backend_name(_device), _tensor_name.c_str()); return true; } bool read_from_qnn_tensor() { auto tensor_type = QNN_TENSOR_GET_TYPE(_qnn_tensor); if (tensor_type != QNN_TENSOR_TYPE_APP_READ && tensor_type != QNN_TENSOR_TYPE_APP_READWRITE) { - QNN_LOG_DEBUG("[%s]tensor type(%d) not READ", _tensor_name.c_str(), (int)tensor_type); + QNN_LOG_DEBUG("[%s]tensor type(%d) not READ\n", _tensor_name.c_str(), (int) tensor_type); return true; } @@ -257,7 +265,7 @@ private: } // For CPU and GPU, the data is already in the tensor. - QNN_LOG_DEBUG("[%s][%s]read tensor from qnn", get_backend_name(_device), _tensor_name.c_str()); + QNN_LOG_DEBUG("[%s][%s]read tensor from qnn\n", get_backend_name(_device), _tensor_name.c_str()); return true; } @@ -265,7 +273,7 @@ private: QNN_TENSOR_SET_DATA_TYPE(_qnn_tensor, data_type); // TODO: set the quantizeParams base on the tensor type - QNN_TENSOR_SET_RANK(_qnn_tensor, (uint32_t)rank); + QNN_TENSOR_SET_RANK(_qnn_tensor, (uint32_t) rank); QNN_TENSOR_SET_MEM_TYPE(_qnn_tensor, QNN_TENSORMEMTYPE_RAW); Qnn_ClientBuffer_t client_buf = {}; QNN_TENSOR_SET_CLIENT_BUF(_qnn_tensor, client_buf); @@ -290,7 +298,7 @@ private: break; } QNN_TENSOR_SET_TYPE(_qnn_tensor, new_tensor_type); - QNN_LOG_DEBUG("[%s][%s]tensor changed to type %d", get_backend_name(_device), _tensor_name.c_str(), + QNN_LOG_DEBUG("[%s][%s]tensor changed to type %d\n", get_backend_name(_device), _tensor_name.c_str(), new_tensor_type); } @@ -299,31 +307,31 @@ private: return false; } - std::string _tensor_name; - qnn_buffer_ptr _buffer; - bool _can_unbind = true; - QNNBackend _device; + std::string _tensor_name; + qnn_buffer_ptr _buffer; + bool _can_unbind = true; + QNNBackend _device; std::shared_ptr _qnn_instance; - Qnn_Tensor_t _qnn_tensor = qnn_tensor_init(kDefaultQnnTensorVersion); - qnn_dimension_array_t _dimensions = {}; - Qnn_GraphHandle_t _graph_handle = nullptr; - qnn_buffer_ptr _rpc_buffer; - ggml_tensor *_ggml_tensor = nullptr; + Qnn_Tensor_t _qnn_tensor = qnn_tensor_init(kDefaultQnnTensorVersion); + qnn_dimension_array_t _dimensions = {}; + Qnn_GraphHandle_t _graph_handle = nullptr; + qnn_buffer_ptr _rpc_buffer; + ggml_tensor * _ggml_tensor = nullptr; DISABLE_COPY(ggml_qnn_tensor); DISABLE_MOVE(ggml_qnn_tensor); }; -using qnn_tensor_ptr_t = std::shared_ptr; -using qnn_tensor_array_t = std::vector; +using qnn_tensor_ptr_t = std::shared_ptr; +using qnn_tensor_array_t = std::vector; using ggml_tensor_array_t = std::vector; -inline qnn_tensor_ptr_t get_qnn_tensor_ptr(ggml_tensor *ggml_tensor) { - return ggml_tensor->extra ? reinterpret_cast(ggml_tensor->extra)->shared_from_this() - : qnn_tensor_ptr_t(); +inline qnn_tensor_ptr_t get_qnn_tensor_ptr(ggml_tensor * ggml_tensor) { + return ggml_tensor->extra ? reinterpret_cast(ggml_tensor->extra)->shared_from_this() : + qnn_tensor_ptr_t(); } -inline int get_ggml_tensors_max_rank(const qnn::ggml_tensor_array_t &tensors) { +inline int get_ggml_tensors_max_rank(const qnn::ggml_tensor_array_t & tensors) { int max_rank = 0; for (auto tensor : tensors) { max_rank = std::max(max_rank, ggml_n_dims(tensor)); @@ -332,14 +340,14 @@ inline int get_ggml_tensors_max_rank(const qnn::ggml_tensor_array_t &tensors) { return max_rank; } -inline bool bind_tensors(const ggml_tensor_array_t &ggml_tensors, qnn_tensor_array_t &tensor_wrappers, - std::vector &qnn_tensors) { +inline bool bind_tensors(const ggml_tensor_array_t & ggml_tensors, qnn_tensor_array_t & tensor_wrappers, + std::vector & qnn_tensors) { GGML_ASSERT(tensor_wrappers.size() == ggml_tensors.size()); qnn_tensors.resize(ggml_tensors.size()); for (size_t i = 0; i < ggml_tensors.size(); i++) { - auto *ggml_tensor = ggml_tensors[i]; + auto * ggml_tensor = ggml_tensors[i]; if (!tensor_wrappers[i]->bind_ggml_tensor(ggml_tensor)) { - QNN_LOG_ERROR("bind tensor %s failed", ggml_get_name(ggml_tensor)); + QNN_LOG_ERROR("bind tensor %s failed\n", ggml_get_name(ggml_tensor)); return false; } @@ -349,12 +357,12 @@ inline bool bind_tensors(const ggml_tensor_array_t &ggml_tensors, qnn_tensor_arr return true; } -inline bool bind_tensors(const ggml_tensor_array_t &ggml_tensors, qnn_tensor_array_t &tensor_wrappers) { +inline bool bind_tensors(const ggml_tensor_array_t & ggml_tensors, qnn_tensor_array_t & tensor_wrappers) { GGML_ASSERT(tensor_wrappers.size() == ggml_tensors.size()); for (size_t i = 0; i < ggml_tensors.size(); i++) { - auto *ggml_tensor = ggml_tensors[i]; + auto * ggml_tensor = ggml_tensors[i]; if (!tensor_wrappers[i]->bind_ggml_tensor(ggml_tensor)) { - QNN_LOG_ERROR("bind tensor %s failed", ggml_get_name(ggml_tensor)); + QNN_LOG_ERROR("bind tensor %s failed\n", ggml_get_name(ggml_tensor)); return false; } } @@ -362,31 +370,31 @@ inline bool bind_tensors(const ggml_tensor_array_t &ggml_tensors, qnn_tensor_arr return true; } -inline void unbind_tensors(qnn_tensor_array_t &tensor_wrappers) { - for (auto &tensor : tensor_wrappers) { +inline void unbind_tensors(qnn_tensor_array_t & tensor_wrappers) { + for (auto & tensor : tensor_wrappers) { tensor->unbind(); } } struct tensor_create_common_params { - const char *name_prefix; - int tensor_rank; - bool is_input; - QNNBackend device; - Qnn_GraphHandle_t graph_handle; + const char * name_prefix; + int tensor_rank; + bool is_input; + QNNBackend device; + Qnn_GraphHandle_t graph_handle; std::shared_ptr qnn_instance; }; -inline void create_tensors_from_ggml_tensor(const tensor_create_common_params ¶ms, - const ggml_tensor_array_t &ggml_tensors, - qnn_tensor_array_t *tensor_wrappers, - std::vector *qnn_tensors) { +inline void create_tensors_from_ggml_tensor(const tensor_create_common_params & params, + const ggml_tensor_array_t & ggml_tensors, + qnn_tensor_array_t * tensor_wrappers, + std::vector * qnn_tensors) { if (qnn_tensors) { qnn_tensors->resize(ggml_tensors.size()); } if (!tensor_wrappers->empty()) { - QNN_LOG_DEBUG("tensor_wrappers is not empty, skip create tensors"); + QNN_LOG_DEBUG("tensor_wrappers is not empty, skip create tensors\n"); GGML_ASSERT(tensor_wrappers->size() == ggml_tensors.size()); return; } @@ -394,14 +402,14 @@ inline void create_tensors_from_ggml_tensor(const tensor_create_common_params &p tensor_wrappers->resize(ggml_tensors.size()); char buffer[GGML_MAX_NAME] = {}; - auto tensor_type = params.is_input ? ggml_qnn_tensor::INPUT : ggml_qnn_tensor::OUTPUT; + auto tensor_type = params.is_input ? ggml_qnn_tensor::INPUT : ggml_qnn_tensor::OUTPUT; for (size_t i = 0; i < ggml_tensors.size(); i++) { - snprintf(buffer, GGML_MAX_NAME, "%s%d", params.name_prefix, (int)i); - auto *ggml_tensor = ggml_tensors[i]; + snprintf(buffer, GGML_MAX_NAME, "%s%d", params.name_prefix, (int) i); + auto * ggml_tensor = ggml_tensors[i]; (*tensor_wrappers)[i] = std::make_shared(tensor_type, std::string(buffer), ggml_tensor->ne, ggml_tensor->type, params.tensor_rank, params.device, params.graph_handle, params.qnn_instance); } } -} // namespace qnn +} // namespace qnn diff --git a/ggml/src/ggml-qnn/utils.cpp b/ggml/src/ggml-qnn/utils.cpp index e9aa4d3737..f9178f90d5 100644 --- a/ggml/src/ggml-qnn/utils.cpp +++ b/ggml/src/ggml-qnn/utils.cpp @@ -4,30 +4,28 @@ #include #include "ggml-qnn.h" - -#include "QnnGraph.h" #include "qnn-types.hpp" +#include "QnnGraph.h" #ifdef _WIN32 -#include +# include #else -#include -#include +# include +# include #endif namespace { -template -_Ty align_to_generic(size_t alignment, _Ty offset) { - return offset % alignment == 0 ? offset - : offset + (static_cast<_Ty>(alignment) - (offset % static_cast<_Ty>(alignment))); +template _Ty align_to_generic(size_t alignment, _Ty offset) { + return offset % alignment == 0 ? offset : + offset + (static_cast<_Ty>(alignment) - (offset % static_cast<_Ty>(alignment))); } -} // namespace +} // namespace namespace qnn { -qnn_dimension_array_t get_internal_dimension(const ggml_dimension_array_t &dims, uint32_t rank) { +qnn_dimension_array_t get_internal_dimension(const ggml_dimension_array_t & dims, uint32_t rank) { static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS should be 4"); GGML_ASSERT(rank <= GGML_MAX_DIMS && rank > 0); @@ -43,30 +41,29 @@ qnn_dimension_array_t get_internal_dimension(const ggml_dimension_array_t &dims, * The ggml tensor will have dimensions [3, 2], while the qnn tensor will have dimensions [2, 3]. */ for (uint32_t i = 0; i < rank; i++) { - internal_dims[i] = std::max((uint32_t)dims[rank - 1 - i], 1); + internal_dims[i] = std::max((uint32_t) dims[rank - 1 - i], 1); } return internal_dims; } -qnn_dimension_array_t get_view_internal_dimension(const ggml_tensor *tensor, size_t &element_offset_out) { - +qnn_dimension_array_t get_view_internal_dimension(const ggml_tensor * tensor, size_t & element_offset_out) { element_offset_out = 0; - auto *parent_tensor = tensor; + auto * parent_tensor = tensor; while (parent_tensor->view_src) { element_offset_out += parent_tensor->view_offs; parent_tensor = parent_tensor->view_src; } - const auto rank = get_ggml_tensor_rank(tensor); + const auto rank = get_ggml_tensor_rank(tensor); const auto parent_rank = get_ggml_tensor_rank(parent_tensor); GGML_ASSERT(parent_tensor->type == tensor->type); GGML_ASSERT(parent_rank == rank); const auto block_size = ggml_blck_size(tensor->type); element_offset_out = - element_offset_out * block_size / tensor->nb[0]; // calculate the element offset in the view tensor + element_offset_out * block_size / tensor->nb[0]; // calculate the element offset in the view tensor return get_internal_dimension(parent_tensor->ne, parent_rank); } @@ -141,7 +138,7 @@ size_t qnn_datatype_size(Qnn_DataType_t qnn_type) { return 0; } -const char *qnn_datatype_to_string(Qnn_DataType_t qnn_type) { +const char * qnn_datatype_to_string(Qnn_DataType_t qnn_type) { switch (qnn_type) { case QNN_DATATYPE_FLOAT_32: return "QNN_DATATYPE_FLOAT_32"; @@ -166,7 +163,7 @@ const char *qnn_datatype_to_string(Qnn_DataType_t qnn_type) { return "QNN_DATATYPE_UNDEFINED"; } -uint32_t get_ggml_tensor_rank(const ggml_tensor *tensor) { +uint32_t get_ggml_tensor_rank(const ggml_tensor * tensor) { uint32_t rank = 0; for (int i = 0; i < GGML_MAX_DIMS; i++) { if ((0 != tensor->ne[i]) && (1 != tensor->ne[i])) { @@ -176,12 +173,12 @@ uint32_t get_ggml_tensor_rank(const ggml_tensor *tensor) { return rank; } -const char *get_ggml_type_name(ggml_type type) { - const auto *traits = ggml_get_type_traits(type); +const char * get_ggml_type_name(ggml_type type) { + const auto * traits = ggml_get_type_traits(type); return traits->type_name; } -const char *get_backend_name(QNNBackend device_index) { +const char * get_backend_name(QNNBackend device_index) { switch (device_index) { case QNN_BACKEND_CPU: return "qnn-cpu"; @@ -195,7 +192,7 @@ const char *get_backend_name(QNNBackend device_index) { } } -const char *get_chipset_desc(uint32_t chipset_id) { +const char * get_chipset_desc(uint32_t chipset_id) { switch (chipset_id) { case SM8450: return "SD 8 Gen 1 (SM8450)"; @@ -212,7 +209,7 @@ const char *get_chipset_desc(uint32_t chipset_id) { } } -const char *get_htparch_desc(size_t htp_arch) { +const char * get_htparch_desc(size_t htp_arch) { switch (htp_arch) { case V68: return "QCOM_HTP_V68"; @@ -229,12 +226,18 @@ const char *get_htparch_desc(size_t htp_arch) { } } -intptr_t align_to(size_t alignment, intptr_t offset) { return align_to_generic(alignment, offset); } +intptr_t align_to(size_t alignment, intptr_t offset) { + return align_to_generic(alignment, offset); +} -uint32_t get_ggml_tensor_data_size(const ggml_tensor *tensor) { return (uint32_t)ggml_nbytes(tensor); } +uint32_t get_ggml_tensor_data_size(const ggml_tensor * tensor) { + return (uint32_t) ggml_nbytes(tensor); +} #ifdef _WIN32 -static void *_align_alloc(size_t alignment, size_t size) { return _aligned_malloc(size, alignment); } +static void * _align_alloc(size_t alignment, size_t size) { + return _aligned_malloc(size, alignment); +} static size_t _get_page_size() { SYSTEM_INFO si; @@ -242,22 +245,31 @@ static size_t _get_page_size() { return si.dwPageSize; } -void align_free(void *ptr) { _aligned_free(ptr); } +void align_free(void * ptr) { + _aligned_free(ptr); +} #else -static void *_align_alloc(size_t alignment, size_t size) { return std::aligned_alloc(alignment, size); } +static void * _align_alloc(size_t alignment, size_t size) { + return std::aligned_alloc(alignment, size); +} -static size_t _get_page_size() { return sysconf(_SC_PAGESIZE); } +static size_t _get_page_size() { + return sysconf(_SC_PAGESIZE); +} -void align_free(void *ptr) { std::free(ptr); } +void align_free(void * ptr) { + std::free(ptr); +} #endif -void *page_align_alloc(size_t size) { - const size_t alignment = _get_page_size(); - size_t size_aligned = align_to_generic(alignment, size); - QNN_LOG_DEBUG("_align_alloc success, alignment: %ld, size: %ld, size_aligned: %ld", alignment, size, size_aligned); - void *data = _align_alloc(alignment, size_aligned); +void * page_align_alloc(size_t size) { + const size_t alignment = _get_page_size(); + size_t size_aligned = align_to_generic(alignment, size); + QNN_LOG_DEBUG("_align_alloc success, alignment: %ld, size: %ld, size_aligned: %ld\n", alignment, size, size_aligned); + void * data = _align_alloc(alignment, size_aligned); if (!data) { - QNN_LOG_WARN("_align_alloc failed, alignment: %ld, size: %ld, size_aligned: %ld", alignment, size, size_aligned); + QNN_LOG_WARN("_align_alloc failed, alignment: %ld, size: %ld, size_aligned: %ld\n", alignment, size, + size_aligned); return nullptr; } @@ -270,7 +282,7 @@ void *page_align_alloc(size_t size) { // // ================================================================================================= // TODO: only support GGML_OP_ADD/GGML_OP_MUL/GGML_OP_MUL_MAT -const char *opname_from_ggmlop(enum ggml_op ggmlop) { +const char * opname_from_ggmlop(enum ggml_op ggmlop) { switch (ggmlop) { case GGML_OP_ADD: return QNN_OP_ELEMENT_WISE_ADD; @@ -284,7 +296,7 @@ const char *opname_from_ggmlop(enum ggml_op ggmlop) { return nullptr; } -const char *get_qnn_error_string(Qnn_ErrorHandle_t error) { +const char * get_qnn_error_string(Qnn_ErrorHandle_t error) { // A complete list of error codes can be found at here: // https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/api_error_codes.html thread_local static char error_code[128] = {}; @@ -377,7 +389,7 @@ const char *get_qnn_error_string(Qnn_ErrorHandle_t error) { size_t get_system_total_memory_in_bytes() { MEMORYSTATUSEX mem = {}; - mem.dwLength = sizeof(mem); + mem.dwLength = sizeof(mem); if (GlobalMemoryStatusEx(&mem)) { return mem.ullTotalPhys; } @@ -387,7 +399,7 @@ size_t get_system_total_memory_in_bytes() { size_t get_system_free_memory_in_bytes() { MEMORYSTATUSEX mem = {}; - mem.dwLength = sizeof(mem); + mem.dwLength = sizeof(mem); if (GlobalMemoryStatusEx(&mem)) { return mem.ullAvailPhys; } @@ -403,8 +415,8 @@ size_t get_system_total_memory_in_bytes() { return (info.totalram + info.totalswap) * info.mem_unit; } - auto pages = (size_t)sysconf(_SC_PHYS_PAGES); - auto page_size = (size_t)sysconf(_SC_PAGE_SIZE); + auto pages = (size_t) sysconf(_SC_PHYS_PAGES); + auto page_size = (size_t) sysconf(_SC_PAGE_SIZE); return pages * page_size; } @@ -414,11 +426,11 @@ size_t get_system_free_memory_in_bytes() { return (info.freeram + info.freeswap) * info.mem_unit; } - auto avail_pages = (size_t)sysconf(_SC_AVPHYS_PAGES); - auto page_size = (size_t)sysconf(_SC_PAGE_SIZE); + auto avail_pages = (size_t) sysconf(_SC_AVPHYS_PAGES); + auto page_size = (size_t) sysconf(_SC_PAGE_SIZE); return avail_pages * page_size; } #endif -} // namespace qnn +} // namespace qnn diff --git a/ggml/src/ggml-qnn/utils.hpp b/ggml/src/ggml-qnn/utils.hpp index cdff53e773..d6130a3df4 100644 --- a/ggml/src/ggml-qnn/utils.hpp +++ b/ggml/src/ggml-qnn/utils.hpp @@ -5,38 +5,36 @@ #include #include -#include "ggml.h" - #include "ggml-qnn.h" - -#include "QnnTypes.h" +#include "ggml.h" #include "logger.hpp" +#include "QnnTypes.h" #define QNN_TENSOR_VER(x) ((x).v1) namespace qnn { using ggml_dimension_array_t = int64_t[GGML_MAX_DIMS]; -using ggml_stride_array_t = size_t[GGML_MAX_DIMS]; -using qnn_dimension_array_t = std::array; +using ggml_stride_array_t = size_t[GGML_MAX_DIMS]; +using qnn_dimension_array_t = std::array; -qnn_dimension_array_t get_internal_dimension(const ggml_dimension_array_t &dims, uint32_t rank); -qnn_dimension_array_t get_view_internal_dimension(const ggml_tensor *tensor, size_t &element_offser_out); +qnn_dimension_array_t get_internal_dimension(const ggml_dimension_array_t & dims, uint32_t rank); +qnn_dimension_array_t get_view_internal_dimension(const ggml_tensor * tensor, size_t & element_offser_out); -uint32_t get_ggml_tensor_rank(const ggml_tensor *tensor); -const char *get_ggml_type_name(ggml_type type); -const char *get_backend_name(QNNBackend device_index); -const char *get_chipset_desc(uint32_t chipset_id); -const char *get_htparch_desc(size_t htp_arch); -intptr_t align_to(size_t alignment, intptr_t offset); -uint32_t get_ggml_tensor_data_size(const ggml_tensor *tensor); +uint32_t get_ggml_tensor_rank(const ggml_tensor * tensor); +const char * get_ggml_type_name(ggml_type type); +const char * get_backend_name(QNNBackend device_index); +const char * get_chipset_desc(uint32_t chipset_id); +const char * get_htparch_desc(size_t htp_arch); +intptr_t align_to(size_t alignment, intptr_t offset); +uint32_t get_ggml_tensor_data_size(const ggml_tensor * tensor); -void *page_align_alloc(size_t size); -void align_free(void *ptr); +void * page_align_alloc(size_t size); +void align_free(void * ptr); -const char *opname_from_ggmlop(enum ggml_op ggmlop); +const char * opname_from_ggmlop(enum ggml_op ggmlop); -const char *get_qnn_error_string(Qnn_ErrorHandle_t error); +const char * get_qnn_error_string(Qnn_ErrorHandle_t error); constexpr const Qnn_TensorVersion_t kDefaultQnnTensorVersion = QNN_TENSOR_VERSION_1; @@ -51,7 +49,7 @@ inline Qnn_Tensor_t qnn_tensor_init(Qnn_TensorVersion_t version) { return tensor; } -inline uint32_t get_qnn_tensorid(const Qnn_Tensor_t &tensor) { +inline uint32_t get_qnn_tensorid(const Qnn_Tensor_t & tensor) { if (tensor.version == kDefaultQnnTensorVersion) { return QNN_TENSOR_VER(tensor).id; } @@ -59,156 +57,158 @@ inline uint32_t get_qnn_tensorid(const Qnn_Tensor_t &tensor) { return 0u; } -inline const char *get_qnn_tensorname(const Qnn_Tensor_t &tensor) { +inline const char * get_qnn_tensorname(const Qnn_Tensor_t & tensor) { if (tensor.version == kDefaultQnnTensorVersion) { return QNN_TENSOR_VER(tensor).name; } return nullptr; } -inline Qnn_TensorType_t get_qnn_tensortype(const Qnn_Tensor_t &tensor) { +inline Qnn_TensorType_t get_qnn_tensortype(const Qnn_Tensor_t & tensor) { if (tensor.version == kDefaultQnnTensorVersion) { return QNN_TENSOR_VER(tensor).type; } return QNN_TENSOR_TYPE_UNDEFINED; } -inline Qnn_TensorDataFormat_t get_qnn_tensor_dataformat(const Qnn_Tensor_t &tensor) { +inline Qnn_TensorDataFormat_t get_qnn_tensor_dataformat(const Qnn_Tensor_t & tensor) { if (tensor.version == kDefaultQnnTensorVersion) { return QNN_TENSOR_VER(tensor).dataFormat; } return QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER; } -inline Qnn_DataType_t get_qnn_tensor_datatype(const Qnn_Tensor_t &tensor) { +inline Qnn_DataType_t get_qnn_tensor_datatype(const Qnn_Tensor_t & tensor) { if (tensor.version == kDefaultQnnTensorVersion) { return QNN_TENSOR_VER(tensor).dataType; } return QNN_DATATYPE_UNDEFINED; } -inline Qnn_QuantizeParams_t get_qnn_tensor_quantparams(const Qnn_Tensor_t &tensor) { +inline Qnn_QuantizeParams_t get_qnn_tensor_quantparams(const Qnn_Tensor_t & tensor) { if (tensor.version == kDefaultQnnTensorVersion) { return QNN_TENSOR_VER(tensor).quantizeParams; } return QNN_QUANTIZE_PARAMS_INIT; } -inline uint32_t get_qnn_tensor_rank(const Qnn_Tensor_t &tensor) { +inline uint32_t get_qnn_tensor_rank(const Qnn_Tensor_t & tensor) { if (tensor.version == kDefaultQnnTensorVersion) { return QNN_TENSOR_VER(tensor).rank; } return 0u; } -inline uint32_t *get_qnn_tensor_dimensions(const Qnn_Tensor_t &tensor) { +inline uint32_t * get_qnn_tensor_dimensions(const Qnn_Tensor_t & tensor) { if (tensor.version == kDefaultQnnTensorVersion) { return QNN_TENSOR_VER(tensor).dimensions; } return nullptr; } -inline Qnn_TensorMemType_t get_qnn_tensor_memtype(const Qnn_Tensor_t &tensor) { +inline Qnn_TensorMemType_t get_qnn_tensor_memtype(const Qnn_Tensor_t & tensor) { if (tensor.version == kDefaultQnnTensorVersion) { return QNN_TENSOR_VER(tensor).memType; } return QNN_TENSORMEMTYPE_UNDEFINED; } -inline Qnn_MemHandle_t get_qnn_tensor_memhandle(const Qnn_Tensor_t &tensor) { +inline Qnn_MemHandle_t get_qnn_tensor_memhandle(const Qnn_Tensor_t & tensor) { if (tensor.version == kDefaultQnnTensorVersion) { return QNN_TENSOR_VER(tensor).memHandle; } return nullptr; } -inline void set_qnn_tensor_id(Qnn_Tensor_t &tensor, uint32_t id) { +inline void set_qnn_tensor_id(Qnn_Tensor_t & tensor, uint32_t id) { if (tensor.version == kDefaultQnnTensorVersion) { QNN_TENSOR_VER(tensor).id = id; } } -inline void set_qnn_tensor_name(Qnn_Tensor_t &tensor, const char *name) { +inline void set_qnn_tensor_name(Qnn_Tensor_t & tensor, const char * name) { if (tensor.version == kDefaultQnnTensorVersion) { QNN_TENSOR_VER(tensor).name = name; } } -inline void set_qnn_tensor_type(Qnn_Tensor_t &tensor, Qnn_TensorType_t type) { +inline void set_qnn_tensor_type(Qnn_Tensor_t & tensor, Qnn_TensorType_t type) { if (tensor.version == kDefaultQnnTensorVersion) { QNN_TENSOR_VER(tensor).type = type; } } -inline void set_qnn_tensor_dataformat(Qnn_Tensor_t &tensor, Qnn_TensorDataFormat_t format) { +inline void set_qnn_tensor_dataformat(Qnn_Tensor_t & tensor, Qnn_TensorDataFormat_t format) { if (tensor.version == kDefaultQnnTensorVersion) { QNN_TENSOR_VER(tensor).dataFormat = format; } } -inline void set_qnn_tensor_datatype(Qnn_Tensor_t &tensor, Qnn_DataType_t dataType) { +inline void set_qnn_tensor_datatype(Qnn_Tensor_t & tensor, Qnn_DataType_t dataType) { if (tensor.version == kDefaultQnnTensorVersion) { QNN_TENSOR_VER(tensor).dataType = dataType; } } -inline void set_qnn_tensor_quantparams(Qnn_Tensor_t &tensor, Qnn_QuantizeParams_t params) { +inline void set_qnn_tensor_quantparams(Qnn_Tensor_t & tensor, Qnn_QuantizeParams_t params) { if (tensor.version == kDefaultQnnTensorVersion) { QNN_TENSOR_VER(tensor).quantizeParams = params; } } -inline void set_qnn_tensor_rank(Qnn_Tensor_t &tensor, uint32_t rank) { +inline void set_qnn_tensor_rank(Qnn_Tensor_t & tensor, uint32_t rank) { if (tensor.version == kDefaultQnnTensorVersion) { QNN_TENSOR_VER(tensor).rank = rank; } } -inline void set_qnn_tensor_dimensions(Qnn_Tensor_t &tensor, uint32_t *dims) { +inline void set_qnn_tensor_dimensions(Qnn_Tensor_t & tensor, uint32_t * dims) { if (tensor.version == kDefaultQnnTensorVersion) { QNN_TENSOR_VER(tensor).dimensions = dims; } } -inline void set_qnn_tensor_memtype(Qnn_Tensor_t &tensor, Qnn_TensorMemType_t mem_type) { +inline void set_qnn_tensor_memtype(Qnn_Tensor_t & tensor, Qnn_TensorMemType_t mem_type) { if (tensor.version == kDefaultQnnTensorVersion) { QNN_TENSOR_VER(tensor).memType = mem_type; } } -inline void set_qnn_tensor_clientbuf(Qnn_Tensor_t &tensor, Qnn_ClientBuffer_t client_buf) { +inline void set_qnn_tensor_clientbuf(Qnn_Tensor_t & tensor, Qnn_ClientBuffer_t client_buf) { if (tensor.version == kDefaultQnnTensorVersion) { QNN_TENSOR_VER(tensor).clientBuf = client_buf; } } -inline void set_qnn_tensor_memhandle(Qnn_Tensor_t &tensor, Qnn_MemHandle_t handle) { +inline void set_qnn_tensor_memhandle(Qnn_Tensor_t & tensor, Qnn_MemHandle_t handle) { if (tensor.version == kDefaultQnnTensorVersion) { QNN_TENSOR_VER(tensor).memHandle = handle; } } -inline void set_qnn_tensor_dyn_dimensions(Qnn_Tensor_t &tensor, uint8_t *isDynamicDimensions) { +inline void set_qnn_tensor_dyn_dimensions(Qnn_Tensor_t & tensor, uint8_t * isDynamicDimensions) { if (tensor.version == QNN_TENSOR_VERSION_2) { tensor.v2.isDynamicDimensions = isDynamicDimensions; } } Qnn_DataType_t qnn_datatype_from_ggml_datatype(ggml_type ggml_type); -ggml_type ggml_datatype_from_qnn_datatype(Qnn_DataType_t qnn_type); -size_t qnn_datatype_size(Qnn_DataType_t qnn_type); -const char *qnn_datatype_to_string(Qnn_DataType_t qnn_type); -size_t get_system_total_memory_in_bytes(); -size_t get_system_free_memory_in_bytes(); +ggml_type ggml_datatype_from_qnn_datatype(Qnn_DataType_t qnn_type); +size_t qnn_datatype_size(Qnn_DataType_t qnn_type); +const char * qnn_datatype_to_string(Qnn_DataType_t qnn_type); +size_t get_system_total_memory_in_bytes(); +size_t get_system_free_memory_in_bytes(); #if ENABLE_QNNBACKEND_PERF class qnn_perf { -public: - qnn_perf(const std::string &perf_name) : _perf_name(std::move(perf_name)) {}; + public: + qnn_perf(const std::string & perf_name) : _perf_name(std::move(perf_name)) {}; + ~qnn_perf() { info(); } - qnn_perf() = delete; - qnn_perf(const qnn_perf &) = delete; - qnn_perf &operator=(const qnn_perf &) = delete; + + qnn_perf() = delete; + qnn_perf(const qnn_perf &) = delete; + qnn_perf & operator=(const qnn_perf &) = delete; void start() { _begin_time = ggml_time_us(); } @@ -218,48 +218,51 @@ public: QNN_LOG_INFO("duration of %s : %lld microseconds\n", _perf_name.c_str(), _duration); } -private: - int64_t _begin_time = 0LL; - int64_t _end_time = 0LL; - int64_t _duration = 0LL; + private: + int64_t _begin_time = 0LL; + int64_t _end_time = 0LL; + int64_t _duration = 0LL; std::string _perf_name; }; #else class qnn_perf { -public: + public: qnn_perf(const std::string &) {} + ~qnn_perf() { info(); } - qnn_perf() = delete; - qnn_perf(const qnn_perf &) = delete; - qnn_perf &operator=(const qnn_perf &) = delete; + + qnn_perf() = delete; + qnn_perf(const qnn_perf &) = delete; + qnn_perf & operator=(const qnn_perf &) = delete; void start() {} + void info() {} }; #endif -} // namespace qnn +} // namespace qnn -#define QNN_TENSOR_GET_ID(tensor) qnn::get_qnn_tensorid(tensor) -#define QNN_TENSOR_GET_NAME(tensor) qnn::get_qnn_tensorname(tensor) -#define QNN_TENSOR_GET_TYPE(tensor) qnn::get_qnn_tensortype(tensor) -#define QNN_TENSOR_GET_DATA_FORMAT(tensor) qnn::get_qnn_tensor_dataformat(tensor) -#define QNN_TENSOR_GET_DATA_TYPE(tensor) qnn::get_qnn_tensor_datatype(tensor) +#define QNN_TENSOR_GET_ID(tensor) qnn::get_qnn_tensorid(tensor) +#define QNN_TENSOR_GET_NAME(tensor) qnn::get_qnn_tensorname(tensor) +#define QNN_TENSOR_GET_TYPE(tensor) qnn::get_qnn_tensortype(tensor) +#define QNN_TENSOR_GET_DATA_FORMAT(tensor) qnn::get_qnn_tensor_dataformat(tensor) +#define QNN_TENSOR_GET_DATA_TYPE(tensor) qnn::get_qnn_tensor_datatype(tensor) #define QNN_TENSOR_GET_QUANT_PARAMS(tensor) qnn::get_qnn_tensor_quantparams(tensor) -#define QNN_TENSOR_GET_RANK(tensor) qnn::get_qnn_tensor_rank(tensor) -#define QNN_TENSOR_GET_DIMENSIONS(tensor) qnn::get_qnn_tensor_dimensions(tensor) -#define QNN_TENSOR_GET_MEM_TYPE(tensor) qnn::get_qnn_tensor_memtype(tensor) -#define QNN_TENSOR_GET_MEM_HANDLE(tensor) qnn::get_qnn_tensor_memhandle(tensor) +#define QNN_TENSOR_GET_RANK(tensor) qnn::get_qnn_tensor_rank(tensor) +#define QNN_TENSOR_GET_DIMENSIONS(tensor) qnn::get_qnn_tensor_dimensions(tensor) +#define QNN_TENSOR_GET_MEM_TYPE(tensor) qnn::get_qnn_tensor_memtype(tensor) +#define QNN_TENSOR_GET_MEM_HANDLE(tensor) qnn::get_qnn_tensor_memhandle(tensor) -#define QNN_TENSOR_SET_ID(tensor, value) qnn::set_qnn_tensor_id(tensor, value) -#define QNN_TENSOR_SET_NAME(tensor, value) qnn::set_qnn_tensor_name(tensor, value) -#define QNN_TENSOR_SET_TYPE(tensor, value) qnn::set_qnn_tensor_type(tensor, value) -#define QNN_TENSOR_SET_DATA_FORMAT(tensor, value) qnn::set_qnn_tensor_dataformat(tensor, value) -#define QNN_TENSOR_SET_DATA_TYPE(tensor, value) qnn::set_qnn_tensor_datatype(tensor, value) -#define QNN_TENSOR_SET_QUANT_PARAMS(tensor, value) qnn::set_qnn_tensor_quantparams(tensor, value) -#define QNN_TENSOR_SET_RANK(tensor, value) qnn::set_qnn_tensor_rank(tensor, value) -#define QNN_TENSOR_SET_DIMENSIONS(tensor, value) qnn::set_qnn_tensor_dimensions(tensor, value) -#define QNN_TENSOR_SET_MEM_TYPE(tensor, value) qnn::set_qnn_tensor_memtype(tensor, value) -#define QNN_TENSOR_SET_CLIENT_BUF(tensor, value) qnn::set_qnn_tensor_clientbuf(tensor, value) -#define QNN_TENSOR_SET_MEM_HANDLE(tensor, value) qnn::set_qnn_tensor_memhandle(tensor, value) +#define QNN_TENSOR_SET_ID(tensor, value) qnn::set_qnn_tensor_id(tensor, value) +#define QNN_TENSOR_SET_NAME(tensor, value) qnn::set_qnn_tensor_name(tensor, value) +#define QNN_TENSOR_SET_TYPE(tensor, value) qnn::set_qnn_tensor_type(tensor, value) +#define QNN_TENSOR_SET_DATA_FORMAT(tensor, value) qnn::set_qnn_tensor_dataformat(tensor, value) +#define QNN_TENSOR_SET_DATA_TYPE(tensor, value) qnn::set_qnn_tensor_datatype(tensor, value) +#define QNN_TENSOR_SET_QUANT_PARAMS(tensor, value) qnn::set_qnn_tensor_quantparams(tensor, value) +#define QNN_TENSOR_SET_RANK(tensor, value) qnn::set_qnn_tensor_rank(tensor, value) +#define QNN_TENSOR_SET_DIMENSIONS(tensor, value) qnn::set_qnn_tensor_dimensions(tensor, value) +#define QNN_TENSOR_SET_MEM_TYPE(tensor, value) qnn::set_qnn_tensor_memtype(tensor, value) +#define QNN_TENSOR_SET_CLIENT_BUF(tensor, value) qnn::set_qnn_tensor_clientbuf(tensor, value) +#define QNN_TENSOR_SET_MEM_HANDLE(tensor, value) qnn::set_qnn_tensor_memhandle(tensor, value) #define QNN_TENSOR_SET_DYN_DIMENSIONS(tensor, value) qnn::set_qnn_tensor_dyn_dimensions(tensor, value)