* fix warning * wip * add todo for graph key generate * rename some file to meet upstream guideline * remove local .clang-format * expend supported/unsupported counter to all ops * append device name to log * port to ggml logger * fix warning after adapt to ggml logger * append \n to all log * use case op instead of convert * Revert "use case op instead of convert" This reverts commit e662fc2dfee41719aaf7bc9d75e03e8d0f7ded0f. * fix op that needs same shape * opt kQnnOpsTable * refresh params name field when getting op config * opt npu log print * remove unused functions
This commit is contained in:
parent
ff033e1e23
commit
c867641222
|
|
@ -1,65 +0,0 @@
|
|||
---
|
||||
BasedOnStyle: Google
|
||||
IndentWidth: 4
|
||||
AccessModifierOffset: -4
|
||||
AlignAfterOpenBracket: Align
|
||||
AlignConsecutiveMacros: false
|
||||
AlignConsecutiveAssignments: false
|
||||
AlignConsecutiveDeclarations: false
|
||||
AlignEscapedNewlines: Left
|
||||
AlignOperands: true
|
||||
AlignTrailingComments: true
|
||||
AllowAllArgumentsOnNextLine: true
|
||||
AllowAllConstructorInitializersOnNextLine: true
|
||||
AllowAllParametersOfDeclarationOnNextLine: true
|
||||
AllowShortBlocksOnASingleLine: Never
|
||||
AllowShortCaseLabelsOnASingleLine: false
|
||||
AllowShortFunctionsOnASingleLine: All
|
||||
AllowShortLambdasOnASingleLine: All
|
||||
AllowShortIfStatementsOnASingleLine: WithoutElse
|
||||
AllowShortLoopsOnASingleLine: true
|
||||
AlwaysBreakAfterDefinitionReturnType: None
|
||||
AlwaysBreakAfterReturnType: None
|
||||
AlwaysBreakBeforeMultilineStrings: true
|
||||
AlwaysBreakTemplateDeclarations: Yes
|
||||
BinPackArguments: true
|
||||
BinPackParameters: true
|
||||
BraceWrapping:
|
||||
AfterCaseLabel: false
|
||||
AfterClass: false
|
||||
AfterControlStatement: false
|
||||
AfterEnum: false
|
||||
AfterFunction: false
|
||||
AfterNamespace: false
|
||||
AfterObjCDeclaration: false
|
||||
AfterStruct: false
|
||||
AfterUnion: false
|
||||
AfterExternBlock: false
|
||||
BeforeCatch: false
|
||||
BeforeElse: false
|
||||
IndentBraces: false
|
||||
SplitEmptyFunction: true
|
||||
SplitEmptyRecord: true
|
||||
SplitEmptyNamespace: true
|
||||
ColumnLimit: 120
|
||||
ConstructorInitializerIndentWidth: 4
|
||||
ContinuationIndentWidth: 4
|
||||
Cpp11BracedListStyle: true
|
||||
DerivePointerAlignment: false
|
||||
IncludeCategories:
|
||||
- Regex: '^<.*\.h>'
|
||||
Priority: 1
|
||||
- Regex: '^<.*'
|
||||
Priority: 2
|
||||
- Regex: '^"ggml\.h"'
|
||||
Priority: 3
|
||||
- Regex: '^"ggml-.+\.h"'
|
||||
Priority: 4
|
||||
- Regex: '.*'
|
||||
Priority: 5
|
||||
KeepEmptyLinesAtTheStartOfBlocks: true
|
||||
MaxEmptyLinesToKeep: 1
|
||||
PointerAlignment: Right
|
||||
SortIncludes: true
|
||||
SpacesBeforeTrailingComments: 1
|
||||
UseTab: Never
|
||||
|
|
@ -4,7 +4,6 @@
|
|||
#include <memory>
|
||||
|
||||
#include "ggml-impl.h"
|
||||
|
||||
#include "graph.hpp"
|
||||
#include "logger.hpp"
|
||||
#include "op-config.hpp"
|
||||
|
|
@ -13,15 +12,15 @@
|
|||
|
||||
namespace {
|
||||
|
||||
bool qnn_is_op_valid(ggml_backend_qnn_device_context *ctx, const ggml_tensor *dst) {
|
||||
bool qnn_is_op_valid(ggml_backend_qnn_device_context * ctx, const ggml_tensor * dst) {
|
||||
if (!ctx || !dst) {
|
||||
QNN_LOG_WARN("invalid params");
|
||||
QNN_LOG_WARN("invalid params\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
auto instance = ctx->instance;
|
||||
if (!instance) {
|
||||
QNN_LOG_WARN("invalid instance");
|
||||
QNN_LOG_WARN("invalid instance\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
@ -32,7 +31,7 @@ bool qnn_is_op_valid(ggml_backend_qnn_device_context *ctx, const ggml_tensor *ds
|
|||
case 2:
|
||||
return dst->src[0] && dst->src[1];
|
||||
default:
|
||||
QNN_LOG_WARN("invalid op param count %d", (int)param_count);
|
||||
QNN_LOG_WARN("invalid op param count %d\n", (int) param_count);
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
@ -40,60 +39,51 @@ bool qnn_is_op_valid(ggml_backend_qnn_device_context *ctx, const ggml_tensor *ds
|
|||
}
|
||||
|
||||
#ifndef NDEBUG
|
||||
void print_ggml_tensor(const ggml_tensor *tensor) {
|
||||
QNN_LOG_DEBUG("%s: type:%s ne: %ldx%ldx%ldx%ld, nb: %ldx%ldx%ldx%ld", tensor->name, ggml_type_name(tensor->type),
|
||||
(long)tensor->ne[0], (long)tensor->ne[1], (long)tensor->ne[2], (long)tensor->ne[3],
|
||||
(long)tensor->nb[0], (long)tensor->nb[1], (long)tensor->nb[2], (long)tensor->nb[3]);
|
||||
void print_ggml_tensor(const ggml_tensor * tensor) {
|
||||
QNN_LOG_DEBUG("%s: type:%s ne: %ldx%ldx%ldx%ld, nb: %ldx%ldx%ldx%ld\n", tensor->name, ggml_type_name(tensor->type),
|
||||
(long) tensor->ne[0], (long) tensor->ne[1], (long) tensor->ne[2], (long) tensor->ne[3],
|
||||
(long) tensor->nb[0], (long) tensor->nb[1], (long) tensor->nb[2], (long) tensor->nb[3]);
|
||||
}
|
||||
#endif
|
||||
|
||||
} // namespace
|
||||
} // namespace
|
||||
|
||||
namespace {
|
||||
|
||||
typedef bool (*ggml_qnn_op_t)(ggml_backend_qnn_device_context *ctx, ggml_tensor *dst);
|
||||
typedef bool (*ggml_qnn_op_t)(ggml_backend_qnn_device_context * ctx, ggml_tensor * dst);
|
||||
|
||||
bool execute_graph(qnn::qnn_graph *graph, ggml_tensor *output) {
|
||||
if (!graph->execute(output)) {
|
||||
QNN_LOG_WARN("execute failed");
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void append_tensor_dimensions(const ggml_tensor *tensor, std::string &output) {
|
||||
char buffer[256] = {};
|
||||
const auto *type_name = qnn::get_ggml_type_name(tensor->type);
|
||||
int len = 0;
|
||||
void append_tensor_dimensions(const ggml_tensor * tensor, std::string & output) {
|
||||
char buffer[256] = {};
|
||||
const auto * type_name = qnn::get_ggml_type_name(tensor->type);
|
||||
int len = 0;
|
||||
switch (ggml_n_dims(tensor)) {
|
||||
case 1:
|
||||
len = snprintf(buffer, sizeof(buffer), "%ld%s", (long)tensor->ne[0], type_name);
|
||||
len = snprintf(buffer, sizeof(buffer), "%ld%s", (long) tensor->ne[0], type_name);
|
||||
break;
|
||||
case 2:
|
||||
len = snprintf(buffer, sizeof(buffer), "%ldx%ld%s", (long)tensor->ne[0], (long)tensor->ne[1], type_name);
|
||||
len = snprintf(buffer, sizeof(buffer), "%ldx%ld%s", (long) tensor->ne[0], (long) tensor->ne[1], type_name);
|
||||
break;
|
||||
case 3:
|
||||
len = snprintf(buffer, sizeof(buffer), "%ldx%ldx%ld%s", (long)tensor->ne[0], (long)tensor->ne[1],
|
||||
(long)tensor->ne[2], type_name);
|
||||
len = snprintf(buffer, sizeof(buffer), "%ldx%ldx%ld%s", (long) tensor->ne[0], (long) tensor->ne[1],
|
||||
(long) tensor->ne[2], type_name);
|
||||
break;
|
||||
case 4:
|
||||
default:
|
||||
len = snprintf(buffer, sizeof(buffer), "%ldx%ldx%ldx%ld%s", (long)tensor->ne[0], (long)tensor->ne[1],
|
||||
(long)tensor->ne[2], (long)tensor->ne[3], type_name);
|
||||
len = snprintf(buffer, sizeof(buffer), "%ldx%ldx%ldx%ld%s", (long) tensor->ne[0], (long) tensor->ne[1],
|
||||
(long) tensor->ne[2], (long) tensor->ne[3], type_name);
|
||||
break;
|
||||
}
|
||||
GGML_ASSERT(len > 0 && len < (int)sizeof(buffer));
|
||||
GGML_ASSERT(len > 0 && len < (int) sizeof(buffer));
|
||||
output.append(buffer, len);
|
||||
}
|
||||
|
||||
void get_graph_key_from_op(const ggml_tensor *op, std::string &output) {
|
||||
void get_graph_key_from_op(const ggml_tensor * op, std::string & output) {
|
||||
GGML_ASSERT(op->op != GGML_OP_NONE);
|
||||
output += ggml_op_desc(op);
|
||||
output += qnn::get_ggml_type_name(op->type);
|
||||
const auto param_count = qnn::get_qnn_op_input_param_count(op);
|
||||
for (size_t i = 0; i < param_count; ++i) {
|
||||
auto *input = op->src[i];
|
||||
auto * input = op->src[i];
|
||||
if (!input) {
|
||||
break;
|
||||
}
|
||||
|
|
@ -103,7 +93,7 @@ void get_graph_key_from_op(const ggml_tensor *op, std::string &output) {
|
|||
}
|
||||
}
|
||||
|
||||
void get_op_key_with_src_op_desc(const ggml_tensor *op, std::string &output) {
|
||||
void get_op_key_with_src_op_desc(const ggml_tensor * op, std::string & output) {
|
||||
output += ggml_op_desc(op);
|
||||
output += '(';
|
||||
if (op->src[0]) {
|
||||
|
|
@ -116,25 +106,37 @@ void get_op_key_with_src_op_desc(const ggml_tensor *op, std::string &output) {
|
|||
output += ')';
|
||||
}
|
||||
|
||||
void get_graph_key_from_cgraph(const ggml_cgraph *cgraph, std::string &output) {
|
||||
// generate key from the graph, the key is used to cache the graph, like:
|
||||
// "MUL_MATf32_256x16x10f32_256x1x10f32#LOG#ADD#ADDf32_16x1x10f32"
|
||||
/**
|
||||
* @brief Generates a unique key for a given computation graph (cgraph).
|
||||
*
|
||||
* This key is used to cache the graph, enabling efficient reuse of previously
|
||||
* compiled graphs. The key is constructed by concatenating the descriptions
|
||||
* of the operations and their associated tensor dimensions within the graph.
|
||||
*
|
||||
* Example key format: "MUL_MATf32_256x16x10f32_256x1x10f32#LOG#ADD#ADDf32_16x1x10f32"
|
||||
*
|
||||
* @param cgraph The computation graph for which the key is generated.
|
||||
* @param output The string where the generated key will be stored.
|
||||
*
|
||||
* TODO: Improve the key generation logic to handle more complex graph structures and edge cases.
|
||||
*/
|
||||
void get_graph_key_from_cgraph(const ggml_cgraph * cgraph, std::string & output) {
|
||||
if (cgraph->n_nodes == 0) {
|
||||
QNN_LOG_DEBUG("empty cgraph");
|
||||
QNN_LOG_DEBUG("empty cgraph\n");
|
||||
return;
|
||||
}
|
||||
|
||||
{
|
||||
bool is_start = true;
|
||||
for (int i = 0; i < cgraph->n_nodes; ++i) {
|
||||
auto *op = cgraph->nodes[i];
|
||||
auto * op = cgraph->nodes[i];
|
||||
if (ggml_is_empty(op)) {
|
||||
QNN_LOG_DEBUG("empty op in graph, skipping");
|
||||
QNN_LOG_DEBUG("empty op in graph, skipping\n");
|
||||
continue;
|
||||
}
|
||||
|
||||
if (op->op == GGML_OP_NONE) {
|
||||
QNN_LOG_DEBUG("GGML_OP_NONE in graph, skipping");
|
||||
QNN_LOG_DEBUG("GGML_OP_NONE in graph, skipping\n");
|
||||
continue;
|
||||
}
|
||||
|
||||
|
|
@ -149,55 +151,27 @@ void get_graph_key_from_cgraph(const ggml_cgraph *cgraph, std::string &output) {
|
|||
}
|
||||
|
||||
if (cgraph->n_nodes > 1) {
|
||||
auto *last_op = cgraph->nodes[cgraph->n_nodes - 1];
|
||||
auto * last_op = cgraph->nodes[cgraph->n_nodes - 1];
|
||||
output += qnn::get_ggml_type_name(last_op->type);
|
||||
output += '_';
|
||||
append_tensor_dimensions(last_op, output);
|
||||
}
|
||||
}
|
||||
|
||||
qnn::qnn_graph *get_qnn_graph_from_cache(ggml_backend_qnn_device_context *ctx, ggml_tensor *output) {
|
||||
auto &graph_cache = ctx->qnn_graph_cache;
|
||||
std::string graph_key;
|
||||
get_graph_key_from_op(output, graph_key);
|
||||
auto it = graph_cache.find(graph_key);
|
||||
qnn::qnn_graph *graph_ptr = nullptr;
|
||||
if (it != graph_cache.end()) {
|
||||
QNN_LOG_DEBUG("[%s]found graph %s in cache", qnn::get_backend_name(ctx->device), graph_key.c_str());
|
||||
graph_ptr = it->second.get();
|
||||
} else {
|
||||
auto graph =
|
||||
std::make_unique<qnn::qnn_graph>(graph_key, ctx->device, ctx->instance, ctx->socinfo.vtcm_size_in_mb);
|
||||
if (!graph->is_valid()) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (!graph->build_graph_from_op(output)) {
|
||||
QNN_LOG_ERROR("[%s]build_graph_from_op failed", qnn::get_backend_name(ctx->device));
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
graph_ptr = graph.get();
|
||||
graph_cache[graph_key] = std::move(graph);
|
||||
}
|
||||
|
||||
return graph_ptr;
|
||||
}
|
||||
|
||||
qnn::qnn_graph *get_qnn_graph_from_cache(ggml_backend_qnn_device_context *ctx, const ggml_cgraph *cgraph) {
|
||||
auto &graph_cache = ctx->qnn_graph_cache;
|
||||
qnn::qnn_graph * get_qnn_graph_from_cache(ggml_backend_qnn_device_context * ctx, const ggml_cgraph * cgraph) {
|
||||
auto & graph_cache = ctx->qnn_graph_cache;
|
||||
std::string graph_key;
|
||||
get_graph_key_from_cgraph(cgraph, graph_key);
|
||||
if (graph_key.empty()) {
|
||||
QNN_LOG_DEBUG("[%s]empty graph key for cgraph: %p, size: %d", qnn::get_backend_name(ctx->device), cgraph,
|
||||
(int)cgraph->n_nodes);
|
||||
QNN_LOG_DEBUG("[%s]empty graph key for cgraph: %p, size: %d\n", qnn::get_backend_name(ctx->device),
|
||||
(const void *) cgraph, (int) cgraph->n_nodes);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto it = graph_cache.find(graph_key);
|
||||
qnn::qnn_graph *graph_ptr = nullptr;
|
||||
auto it = graph_cache.find(graph_key);
|
||||
qnn::qnn_graph * graph_ptr = nullptr;
|
||||
if (it != graph_cache.end()) {
|
||||
QNN_LOG_DEBUG("[%s]found graph %s in cache", qnn::get_backend_name(ctx->device), graph_key.c_str());
|
||||
QNN_LOG_DEBUG("[%s]found graph %s in cache\n", qnn::get_backend_name(ctx->device), graph_key.c_str());
|
||||
graph_ptr = it->second.get();
|
||||
} else {
|
||||
auto graph =
|
||||
|
|
@ -207,180 +181,151 @@ qnn::qnn_graph *get_qnn_graph_from_cache(ggml_backend_qnn_device_context *ctx, c
|
|||
}
|
||||
|
||||
if (!graph->build_graph_from_ggml_graph(cgraph)) {
|
||||
QNN_LOG_ERROR("[%s]build_graph_from_op failed", qnn::get_backend_name(ctx->device));
|
||||
QNN_LOG_ERROR("[%s]build_graph_from_op failed\n", qnn::get_backend_name(ctx->device));
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
graph_ptr = graph.get();
|
||||
graph_ptr = graph.get();
|
||||
graph_cache[graph_key] = std::move(graph);
|
||||
}
|
||||
|
||||
return graph_ptr;
|
||||
}
|
||||
|
||||
bool qnn_generic_op_impl(ggml_backend_qnn_device_context *ctx, ggml_tensor *dst) {
|
||||
if (!qnn_is_op_valid(ctx, dst)) {
|
||||
return false;
|
||||
}
|
||||
// TODO: could be merge into op caps array
|
||||
constexpr const bool kQnnSupportedOps[] = {
|
||||
true, // GGML_OP_NONE
|
||||
false, // GGML_OP_DUP
|
||||
true, // GGML_OP_ADD
|
||||
false, // GGML_OP_ADD1
|
||||
false, // GGML_OP_ACC
|
||||
true, // GGML_OP_SUB
|
||||
true, // GGML_OP_MUL
|
||||
true, // GGML_OP_DIV
|
||||
false, // GGML_OP_SQR
|
||||
true, // GGML_OP_SQRT
|
||||
true, // GGML_OP_LOG
|
||||
false, // GGML_OP_SIN
|
||||
false, // GGML_OP_COS
|
||||
false, // GGML_OP_SUM
|
||||
false, // GGML_OP_SUM_ROWS
|
||||
false, // GGML_OP_MEAN
|
||||
false, // GGML_OP_ARGMAX
|
||||
false, // GGML_OP_COUNT_EQUAL
|
||||
false, // GGML_OP_REPEAT
|
||||
false, // GGML_OP_REPEAT_BACK
|
||||
false, // GGML_OP_CONCAT
|
||||
false, // GGML_OP_SILU_BACK
|
||||
false, // GGML_OP_NORM
|
||||
false, // GGML_OP_RMS_NORM
|
||||
false, // GGML_OP_RMS_NORM_BACK
|
||||
false, // GGML_OP_GROUP_NORM
|
||||
|
||||
auto *graph_ptr = get_qnn_graph_from_cache(ctx, dst);
|
||||
bool succeed = graph_ptr && execute_graph(graph_ptr, dst);
|
||||
true, // GGML_OP_MUL_MAT
|
||||
false, // GGML_OP_MUL_MAT_ID
|
||||
false, // GGML_OP_OUT_PROD
|
||||
|
||||
#ifndef NDEBUG
|
||||
if (!succeed) {
|
||||
const auto param_count = qnn::get_qnn_op_input_param_count(dst);
|
||||
for (size_t i = 0; i < param_count; ++i) {
|
||||
print_ggml_tensor(dst->src[i]);
|
||||
}
|
||||
print_ggml_tensor(dst);
|
||||
}
|
||||
#endif
|
||||
false, // GGML_OP_SCALE
|
||||
false, // GGML_OP_SET
|
||||
false, // GGML_OP_CPY
|
||||
false, // GGML_OP_CONT
|
||||
true, // GGML_OP_RESHAPE
|
||||
false, // GGML_OP_VIEW
|
||||
false, // GGML_OP_PERMUTE
|
||||
false, // GGML_OP_TRANSPOSE
|
||||
false, // GGML_OP_GET_ROWS
|
||||
false, // GGML_OP_GET_ROWS_BACK
|
||||
false, // GGML_OP_DIAG
|
||||
false, // GGML_OP_DIAG_MASK_INF
|
||||
false, // GGML_OP_DIAG_MASK_ZERO
|
||||
false, // GGML_OP_SOFT_MAX
|
||||
false, // GGML_OP_SOFT_MAX_BACK
|
||||
false, // GGML_OP_ROPE
|
||||
false, // GGML_OP_ROPE_BACK
|
||||
false, // GGML_OP_CLAMP
|
||||
false, // GGML_OP_CONV_TRANSPOSE_1D
|
||||
false, // GGML_OP_IM2COL
|
||||
false, // GGML_OP_IM2COL_BACK
|
||||
false, // GGML_OP_CONV_TRANSPOSE_2D
|
||||
false, // GGML_OP_POOL_1D
|
||||
false, // GGML_OP_POOL_2D
|
||||
false, // GGML_OP_POOL_2D_BACK
|
||||
false, // GGML_OP_UPSCALE
|
||||
false, // GGML_OP_PAD
|
||||
false, // GGML_OP_PAD_REFLECT_1D
|
||||
false, // GGML_OP_ARANGE
|
||||
false, // GGML_OP_TIMESTEP_EMBEDDING
|
||||
false, // GGML_OP_ARGSORT
|
||||
false, // GGML_OP_LEAKY_RELU
|
||||
|
||||
return succeed;
|
||||
}
|
||||
false, // GGML_OP_FLASH_ATTN_EXT
|
||||
false, // GGML_OP_FLASH_ATTN_BACK
|
||||
false, // GGML_OP_SSM_CONV
|
||||
false, // GGML_OP_SSM_SCAN
|
||||
false, // GGML_OP_WIN_PART
|
||||
false, // GGML_OP_WIN_UNPART
|
||||
false, // GGML_OP_GET_REL_POS
|
||||
false, // GGML_OP_ADD_REL_POS
|
||||
false, // GGML_OP_RWKV_WKV6
|
||||
false, // GGML_OP_GATED_LINEAR_ATTN
|
||||
|
||||
bool qnn_nop_impl(ggml_backend_qnn_device_context *ctx, ggml_tensor *dst) {
|
||||
GGML_UNUSED(ctx);
|
||||
GGML_UNUSED(dst);
|
||||
return true;
|
||||
}
|
||||
false, // GGML_OP_UNARY
|
||||
|
||||
constexpr const ggml_qnn_op_t kQnnOpsTable[] = {
|
||||
qnn_nop_impl, // GGML_OP_NONE
|
||||
nullptr, // GGML_OP_DUP
|
||||
qnn_generic_op_impl, // GGML_OP_ADD
|
||||
nullptr, // GGML_OP_ADD1
|
||||
nullptr, // GGML_OP_ACC
|
||||
qnn_generic_op_impl, // GGML_OP_SUB
|
||||
qnn_generic_op_impl, // GGML_OP_MUL
|
||||
qnn_generic_op_impl, // GGML_OP_DIV
|
||||
nullptr, // GGML_OP_SQR
|
||||
qnn_generic_op_impl, // GGML_OP_SQRT
|
||||
qnn_generic_op_impl, // GGML_OP_LOG
|
||||
nullptr, // GGML_OP_SIN
|
||||
nullptr, // GGML_OP_COS
|
||||
nullptr, // GGML_OP_SUM
|
||||
nullptr, // GGML_OP_SUM_ROWS
|
||||
nullptr, // GGML_OP_MEAN
|
||||
nullptr, // GGML_OP_ARGMAX
|
||||
nullptr, // GGML_OP_COUNT_EQUAL
|
||||
nullptr, // GGML_OP_REPEAT
|
||||
nullptr, // GGML_OP_REPEAT_BACK
|
||||
nullptr, // GGML_OP_CONCAT
|
||||
nullptr, // GGML_OP_SILU_BACK
|
||||
nullptr, // GGML_OP_NORM
|
||||
nullptr, // GGML_OP_RMS_NORM
|
||||
nullptr, // GGML_OP_RMS_NORM_BACK
|
||||
nullptr, // GGML_OP_GROUP_NORM
|
||||
false, // GGML_OP_MAP_UNARY
|
||||
false, // GGML_OP_MAP_BINARY
|
||||
|
||||
qnn_generic_op_impl, // GGML_OP_MUL_MAT
|
||||
nullptr, // GGML_OP_MUL_MAT_ID
|
||||
nullptr, // GGML_OP_OUT_PROD
|
||||
false, // GGML_OP_MAP_CUSTOM1_F32
|
||||
false, // GGML_OP_MAP_CUSTOM2_F32
|
||||
false, // GGML_OP_MAP_CUSTOM3_F32
|
||||
|
||||
nullptr, // GGML_OP_SCALE
|
||||
nullptr, // GGML_OP_SET
|
||||
nullptr, // GGML_OP_CPY
|
||||
nullptr, // GGML_OP_CONT
|
||||
qnn_nop_impl, // GGML_OP_RESHAPE
|
||||
nullptr, // GGML_OP_VIEW
|
||||
nullptr, // GGML_OP_PERMUTE
|
||||
nullptr, // GGML_OP_TRANSPOSE
|
||||
nullptr, // GGML_OP_GET_ROWS
|
||||
nullptr, // GGML_OP_GET_ROWS_BACK
|
||||
nullptr, // GGML_OP_DIAG
|
||||
nullptr, // GGML_OP_DIAG_MASK_INF
|
||||
nullptr, // GGML_OP_DIAG_MASK_ZERO
|
||||
nullptr, // GGML_OP_SOFT_MAX
|
||||
nullptr, // GGML_OP_SOFT_MAX_BACK
|
||||
nullptr, // GGML_OP_ROPE
|
||||
nullptr, // GGML_OP_ROPE_BACK
|
||||
nullptr, // GGML_OP_CLAMP
|
||||
nullptr, // GGML_OP_CONV_TRANSPOSE_1D
|
||||
nullptr, // GGML_OP_IM2COL
|
||||
nullptr, // GGML_OP_IM2COL_BACK
|
||||
nullptr, // GGML_OP_CONV_TRANSPOSE_2D
|
||||
nullptr, // GGML_OP_POOL_1D
|
||||
nullptr, // GGML_OP_POOL_2D
|
||||
nullptr, // GGML_OP_POOL_2D_BACK
|
||||
nullptr, // GGML_OP_UPSCALE
|
||||
nullptr, // GGML_OP_PAD
|
||||
nullptr, // GGML_OP_PAD_REFLECT_1D
|
||||
nullptr, // GGML_OP_ARANGE
|
||||
nullptr, // GGML_OP_TIMESTEP_EMBEDDING
|
||||
nullptr, // GGML_OP_ARGSORT
|
||||
nullptr, // GGML_OP_LEAKY_RELU
|
||||
false, // GGML_OP_MAP_CUSTOM1
|
||||
false, // GGML_OP_MAP_CUSTOM2
|
||||
false, // GGML_OP_MAP_CUSTOM3
|
||||
|
||||
nullptr, // GGML_OP_FLASH_ATTN_EXT
|
||||
nullptr, // GGML_OP_FLASH_ATTN_BACK
|
||||
nullptr, // GGML_OP_SSM_CONV
|
||||
nullptr, // GGML_OP_SSM_SCAN
|
||||
nullptr, // GGML_OP_WIN_PART
|
||||
nullptr, // GGML_OP_WIN_UNPART
|
||||
nullptr, // GGML_OP_GET_REL_POS
|
||||
nullptr, // GGML_OP_ADD_REL_POS
|
||||
nullptr, // GGML_OP_RWKV_WKV6
|
||||
nullptr, // GGML_OP_GATED_LINEAR_ATTN
|
||||
|
||||
nullptr, // GGML_OP_UNARY
|
||||
|
||||
nullptr, // GGML_OP_MAP_UNARY
|
||||
nullptr, // GGML_OP_MAP_BINARY
|
||||
|
||||
nullptr, // GGML_OP_MAP_CUSTOM1_F32
|
||||
nullptr, // GGML_OP_MAP_CUSTOM2_F32
|
||||
nullptr, // GGML_OP_MAP_CUSTOM3_F32
|
||||
|
||||
nullptr, // GGML_OP_MAP_CUSTOM1
|
||||
nullptr, // GGML_OP_MAP_CUSTOM2
|
||||
nullptr, // GGML_OP_MAP_CUSTOM3
|
||||
|
||||
nullptr, // GGML_OP_CROSS_ENTROPY_LOSS
|
||||
nullptr, // GGML_OP_CROSS_ENTROPY_LOSS_BACK
|
||||
nullptr, // GGML_OP_OPT_STEP_ADAMW
|
||||
false, // GGML_OP_CROSS_ENTROPY_LOSS
|
||||
false, // GGML_OP_CROSS_ENTROPY_LOSS_BACK
|
||||
false, // GGML_OP_OPT_STEP_ADAMW
|
||||
|
||||
// ggml_unary_op
|
||||
nullptr, // GGML_UNARY_OP_ABS
|
||||
nullptr, // GGML_UNARY_OP_SGN
|
||||
nullptr, // GGML_UNARY_OP_NEG
|
||||
nullptr, // GGML_UNARY_OP_STEP
|
||||
nullptr, // GGML_UNARY_OP_TANH
|
||||
nullptr, // GGML_UNARY_OP_ELU
|
||||
nullptr, // GGML_UNARY_OP_RELU
|
||||
nullptr, // GGML_UNARY_OP_SIGMOID
|
||||
qnn_generic_op_impl, // GGML_UNARY_OP_GELU
|
||||
nullptr, // GGML_UNARY_OP_GELU_QUICK
|
||||
nullptr, // GGML_UNARY_OP_SILU
|
||||
nullptr, // GGML_UNARY_OP_HARDSWISH
|
||||
nullptr, // GGML_UNARY_OP_HARDSIGMOID
|
||||
nullptr, // GGML_UNARY_OP_EXP
|
||||
false, // GGML_UNARY_OP_ABS
|
||||
false, // GGML_UNARY_OP_SGN
|
||||
false, // GGML_UNARY_OP_NEG
|
||||
false, // GGML_UNARY_OP_STEP
|
||||
false, // GGML_UNARY_OP_TANH
|
||||
false, // GGML_UNARY_OP_ELU
|
||||
false, // GGML_UNARY_OP_RELU
|
||||
false, // GGML_UNARY_OP_SIGMOID
|
||||
true, // GGML_UNARY_OP_GELU
|
||||
false, // GGML_UNARY_OP_GELU_QUICK
|
||||
false, // GGML_UNARY_OP_SILU
|
||||
false, // GGML_UNARY_OP_HARDSWISH
|
||||
false, // GGML_UNARY_OP_HARDSIGMOID
|
||||
false, // GGML_UNARY_OP_EXP
|
||||
};
|
||||
|
||||
static_assert(kQnnOpsTable[GGML_OP_NONE] == qnn_nop_impl, "GGML_OP_NONE does not match the qnn_nop_impl function");
|
||||
static_assert(kQnnOpsTable[GGML_OP_ADD] == qnn_generic_op_impl,
|
||||
"GGML_OP_ADD does not match the qnn_generic_op_impl function");
|
||||
static_assert(kQnnOpsTable[GGML_OP_MUL] == qnn_generic_op_impl,
|
||||
"GGML_OP_MUL does not match the qnn_generic_op_impl function");
|
||||
static_assert(kQnnOpsTable[GGML_OP_MUL_MAT] == qnn_generic_op_impl,
|
||||
"GGML_OP_MUL_MAT does not match the qnn_generic_op_impl function");
|
||||
static_assert(kQnnOpsTable[GGML_OP_RESHAPE] == qnn_nop_impl,
|
||||
"GGML_OP_RESHAPE does not match the qnn_nop_impl function");
|
||||
static_assert(kQnnOpsTable[GGML_OP_VIEW] == nullptr, "GGML_OP_VIEW is not nullptr");
|
||||
static_assert(std::size(kQnnOpsTable) == (GGML_OP_COUNT + GGML_UNARY_OP_COUNT),
|
||||
"GGML_OP_COUNT does not match the size of the kQnnOpsTable table");
|
||||
static_assert(kQnnSupportedOps[GGML_OP_NONE], "GGML_OP_NONE is not true");
|
||||
static_assert(kQnnSupportedOps[GGML_OP_ADD], "GGML_OP_ADD is not true");
|
||||
static_assert(kQnnSupportedOps[GGML_OP_MUL], "GGML_OP_MUL is not true");
|
||||
static_assert(kQnnSupportedOps[GGML_OP_MUL_MAT],
|
||||
"GGML_OP_MUL_MAT is not true, please check the kQnnSupportedOps table in the backend-ops.cpp file");
|
||||
static_assert(kQnnSupportedOps[GGML_OP_RESHAPE], "GGML_OP_RESHAPE is not true");
|
||||
static_assert(!kQnnSupportedOps[GGML_OP_VIEW], "GGML_OP_VIEW is not false");
|
||||
static_assert(std::size(kQnnSupportedOps) == (GGML_OP_COUNT + GGML_UNARY_OP_COUNT),
|
||||
"GGML_OP_COUNT does not match the size of the kQnnSupportedOps table");
|
||||
|
||||
bool ggml_qnn_supports_tensor(ggml_backend_qnn_device_context *ctx, const ggml_tensor *tensor) {
|
||||
bool ggml_qnn_supports_tensor(ggml_backend_qnn_device_context * ctx, const ggml_tensor * tensor) {
|
||||
if (!tensor) {
|
||||
QNN_LOG_DEBUG("tensor is nullptr");
|
||||
QNN_LOG_DEBUG("tensor is nullptr\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
#ifndef NDEBUG
|
||||
if (tensor->view_src) {
|
||||
auto *src_tensor = tensor->view_src;
|
||||
QNN_LOG_DEBUG("[%s]tensor(%s_%dx%dx%dx%d) is a view, src: %s_%dx%dx%dx%d", qnn::get_backend_name(ctx->device),
|
||||
ggml_get_name(tensor), tensor->ne[0], tensor->ne[1], tensor->ne[2], tensor->ne[3],
|
||||
ggml_get_name(src_tensor), src_tensor->ne[0], src_tensor->ne[1], src_tensor->ne[2],
|
||||
src_tensor->ne[3]);
|
||||
auto * src_tensor = tensor->view_src;
|
||||
QNN_LOG_DEBUG("[%s]tensor(%s_%dx%dx%dx%d) is a view, src: %s_%dx%dx%dx%d\n", qnn::get_backend_name(ctx->device),
|
||||
ggml_get_name(tensor), (int) tensor->ne[0], (int) tensor->ne[1], (int) tensor->ne[2],
|
||||
(int) tensor->ne[3], ggml_get_name(src_tensor), (int) src_tensor->ne[0], (int) src_tensor->ne[1],
|
||||
(int) src_tensor->ne[2], (int) src_tensor->ne[3]);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
@ -390,13 +335,14 @@ bool ggml_qnn_supports_tensor(ggml_backend_qnn_device_context *ctx, const ggml_t
|
|||
case GGML_TYPE_Q8_0:
|
||||
case GGML_TYPE_Q4_0:
|
||||
if (!(ctx->supported_types & (uint64_t(1) << tensor->type))) {
|
||||
QNN_LOG_DEBUG("[%s]unsupported data type %s, supported_types: 0x%x", qnn::get_backend_name(ctx->device),
|
||||
ggml_type_name(tensor->type), ctx->supported_types);
|
||||
QNN_LOG_DEBUG("[%s]unsupported data type %s, supported_types: 0x%x\n",
|
||||
qnn::get_backend_name(ctx->device), ggml_type_name(tensor->type),
|
||||
(unsigned int) ctx->supported_types);
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
QNN_LOG_DEBUG("[%s]unsupported data type %s", qnn::get_backend_name(ctx->device),
|
||||
QNN_LOG_DEBUG("[%s]unsupported data type %s\n", qnn::get_backend_name(ctx->device),
|
||||
ggml_type_name(tensor->type));
|
||||
return false;
|
||||
}
|
||||
|
|
@ -404,7 +350,7 @@ bool ggml_qnn_supports_tensor(ggml_backend_qnn_device_context *ctx, const ggml_t
|
|||
return true;
|
||||
}
|
||||
|
||||
bool ggnl_qnn_supports_op_tensor(ggml_backend_qnn_device_context *ctx, const ggml_tensor *op) {
|
||||
bool ggnl_qnn_supports_op_tensor(ggml_backend_qnn_device_context * ctx, const ggml_tensor * op) {
|
||||
if (op->op == GGML_OP_NONE) {
|
||||
return true;
|
||||
}
|
||||
|
|
@ -423,14 +369,14 @@ bool ggnl_qnn_supports_op_tensor(ggml_backend_qnn_device_context *ctx, const ggm
|
|||
return true;
|
||||
}
|
||||
|
||||
bool ggml_qnn_supports_matmul_op(ggml_backend_qnn_device_context *ctx, const ggml_tensor *op) {
|
||||
bool ggml_qnn_supports_matmul_op(ggml_backend_qnn_device_context * ctx, const ggml_tensor * op) {
|
||||
constexpr const size_t kMaxNpuTensorSize = 8192L * 2048 + 8192 * 512 + 2048 * 512;
|
||||
constexpr const auto get_tensor_size = [](const ggml_tensor *tensor) -> size_t {
|
||||
constexpr const auto get_tensor_size = [](const ggml_tensor * tensor) -> size_t {
|
||||
return tensor->ne[0] * tensor->ne[1] * tensor->ne[2] * tensor->ne[3];
|
||||
};
|
||||
|
||||
auto *src0 = op->src[0];
|
||||
auto *src1 = op->src[1];
|
||||
auto * src0 = op->src[0];
|
||||
auto * src1 = op->src[1];
|
||||
switch (ctx->device) {
|
||||
case QNN_BACKEND_NPU:
|
||||
if (src1->ne[2] != src0->ne[2] || src1->ne[3] != src0->ne[3]) {
|
||||
|
|
@ -438,12 +384,10 @@ bool ggml_qnn_supports_matmul_op(ggml_backend_qnn_device_context *ctx, const ggm
|
|||
* TODO: remove the blocker here when NPU backend supports mul_mat like this:
|
||||
* [ne03, ne02, n, k] * [ne03 * x, ne02 * y, m, k] -> [ne03 * x, ne02 * y, m, n]
|
||||
*/
|
||||
QNN_LOG_DEBUG("[qnn-npu][MUL_MAT]src0 and src1 dimensions are not equal, support/unsupported: %d/%d",
|
||||
ctx->support_op_count.load(), ++(ctx->unsupported_op_count));
|
||||
QNN_LOG_DEBUG("[qnn-npu][MUL_MAT]src0 and src1 dimensions are not equal\n");
|
||||
return false;
|
||||
} else if (get_tensor_size(src0) + get_tensor_size(src1) + get_tensor_size(op) >= kMaxNpuTensorSize) {
|
||||
QNN_LOG_DEBUG("[qnn-npu][MUL_MAT]tensor size is too large, support/unsupported: %d/%d",
|
||||
ctx->support_op_count.load(), ++(ctx->unsupported_op_count));
|
||||
QNN_LOG_DEBUG("[qnn-npu][MUL_MAT]tensor size is too large\n");
|
||||
return false;
|
||||
}
|
||||
// fall through, from test here, the convert op is super slow on NPU:
|
||||
|
|
@ -451,9 +395,8 @@ bool ggml_qnn_supports_matmul_op(ggml_backend_qnn_device_context *ctx, const ggm
|
|||
case QNN_BACKEND_GPU:
|
||||
if (src0->type != src1->type || src0->type != op->type) {
|
||||
// there's no convert op for GPU.
|
||||
QNN_LOG_DEBUG(
|
||||
"[qnn-gpu][MUL_MAT]type src0(%d), src1(%d) and op(%d) are not equal, support/unsupported: %d/%d",
|
||||
src0->type, src1->type, op->type, ctx->support_op_count.load(), ++(ctx->unsupported_op_count));
|
||||
QNN_LOG_DEBUG("[qnn-gpu][MUL_MAT]type src0(%s), src1(%s) and op(%s) are not equal\n",
|
||||
ggml_type_name(src0->type), ggml_type_name(src1->type), ggml_type_name(op->type));
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
|
|
@ -462,31 +405,31 @@ bool ggml_qnn_supports_matmul_op(ggml_backend_qnn_device_context *ctx, const ggm
|
|||
}
|
||||
|
||||
if ((src1->ne[2] % src0->ne[2]) != 0 || (src1->ne[3] % src0->ne[3]) != 0) {
|
||||
QNN_LOG_DEBUG("[%s][MUL_MAT]src0 and src1 dimensions are not equal, support/unsupported: %d/%d",
|
||||
qnn::get_backend_name(ctx->device), ctx->support_op_count.load(), ++(ctx->unsupported_op_count));
|
||||
QNN_LOG_DEBUG("[%s][MUL_MAT]src0 and src1 dimensions are not equal\n", qnn::get_backend_name(ctx->device));
|
||||
return false;
|
||||
}
|
||||
|
||||
QNN_LOG_DEBUG("[%s][MUL_MAT]supported matmul op, support/unsupported: %d/%d", qnn::get_backend_name(ctx->device),
|
||||
++(ctx->support_op_count), ctx->unsupported_op_count.load());
|
||||
QNN_LOG_DEBUG("[%s][MUL_MAT]supported matmul op\n", qnn::get_backend_name(ctx->device));
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace
|
||||
|
||||
namespace qnn {
|
||||
|
||||
bool device_supports_op(ggml_backend_qnn_device_context *ctx, const ggml_tensor *op) {
|
||||
bool device_supports_op(ggml_backend_qnn_device_context * ctx, const ggml_tensor * op) {
|
||||
// Note that this function could be called before the device context is initialized
|
||||
if (op->op == GGML_OP_NONE) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!kQnnOpsTable[qnn::get_qnn_op_index(op)]) {
|
||||
if (!kQnnSupportedOps[qnn::get_qnn_op_index(op)]) {
|
||||
#ifndef NDEBUG
|
||||
std::string op_key;
|
||||
get_graph_key_from_op(op, op_key);
|
||||
QNN_LOG_DEBUG("[%s]unsupported op", op_key.c_str());
|
||||
ctx->unsupported_op_count++;
|
||||
QNN_LOG_DEBUG("[%s][%s]op was unsupported, support/unsupported: %d/%d\n", qnn::get_backend_name(ctx->device),
|
||||
op_key.c_str(), ctx->supported_op_count.load(), ctx->unsupported_op_count.load());
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
|
|
@ -495,48 +438,69 @@ bool device_supports_op(ggml_backend_qnn_device_context *ctx, const ggml_tensor
|
|||
#ifndef NDEBUG
|
||||
std::string tensor_dims;
|
||||
append_tensor_dimensions(op, tensor_dims);
|
||||
QNN_LOG_DEBUG("[%s]unsupported tensor(%s)", ggml_op_name(op->op), tensor_dims.c_str());
|
||||
QNN_LOG_DEBUG("[%s][%s]unsupported tensor(%s), support/unsupported: %d/%d\n",
|
||||
qnn::get_backend_name(ctx->device), ggml_op_name(op->op), tensor_dims.c_str(),
|
||||
ctx->supported_op_count.load(), ctx->unsupported_op_count.load());
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
|
||||
bool is_op_supported = true;
|
||||
if (op->op == GGML_OP_UNARY) {
|
||||
const auto unary_op = ggml_get_unary_op(op);
|
||||
if (unary_op == GGML_UNARY_OP_GELU) {
|
||||
// TODO: fix this
|
||||
QNN_LOG_DEBUG("[GELU]unsupported unary op GGML_UNARY_OP_GELU for NPU");
|
||||
return false;
|
||||
QNN_LOG_DEBUG("[GELU]unsupported unary op GGML_UNARY_OP_GELU for NPU\n");
|
||||
is_op_supported = false;
|
||||
}
|
||||
} else {
|
||||
auto *src0 = op->src[0];
|
||||
auto *src1 = op->src[1];
|
||||
auto * src0 = op->src[0];
|
||||
auto * src1 = op->src[1];
|
||||
switch (op->op) {
|
||||
case GGML_OP_ADD:
|
||||
case GGML_OP_SUB:
|
||||
case GGML_OP_MUL:
|
||||
case GGML_OP_DIV:
|
||||
if (!ggml_are_same_shape(src0, src1)) {
|
||||
QNN_LOG_DEBUG("[ADD] src0 and src1 dimensions are not equal");
|
||||
return false;
|
||||
QNN_LOG_DEBUG("[%s][%s] src0 and src1 dimensions are not equal\n",
|
||||
qnn::get_backend_name(ctx->device), ggml_op_name(op->op));
|
||||
is_op_supported = false;
|
||||
}
|
||||
break;
|
||||
|
||||
case GGML_OP_MUL_MAT:
|
||||
return ggml_qnn_supports_matmul_op(ctx, op);
|
||||
is_op_supported = ggml_qnn_supports_matmul_op(ctx, op);
|
||||
break;
|
||||
|
||||
default:
|
||||
return false;
|
||||
// default to supported
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
#ifndef NDEBUG
|
||||
if (is_op_supported) {
|
||||
ctx->supported_op_count++;
|
||||
QNN_LOG_DEBUG("[%s][%s]op was supported, support/unsupported: %d/%d\n", qnn::get_backend_name(ctx->device),
|
||||
ggml_op_name(op->op), ctx->supported_op_count.load(), ctx->unsupported_op_count.load());
|
||||
} else {
|
||||
ctx->unsupported_op_count++;
|
||||
QNN_LOG_DEBUG("[%s][%s]op was unsupported, support/unsupported: %d/%d\n", qnn::get_backend_name(ctx->device),
|
||||
ggml_op_name(op->op), ctx->supported_op_count.load(), ctx->unsupported_op_count.load());
|
||||
}
|
||||
#endif
|
||||
|
||||
return is_op_supported;
|
||||
}
|
||||
|
||||
bool device_compute_graph(ggml_backend_qnn_device_context *ctx, ggml_cgraph *cgraph) {
|
||||
QNN_LOG_DEBUG("[%s]compute graph start, nodes count: %d", qnn::get_backend_name(ctx->device), (int)cgraph->n_nodes);
|
||||
bool device_compute_graph(ggml_backend_qnn_device_context * ctx, ggml_cgraph * cgraph) {
|
||||
QNN_LOG_DEBUG("[%s]compute graph start, nodes count: %d\n", qnn::get_backend_name(ctx->device),
|
||||
(int) cgraph->n_nodes);
|
||||
|
||||
auto qnn_graph = get_qnn_graph_from_cache(ctx, cgraph);
|
||||
bool success = qnn_graph && qnn_graph->execute(cgraph);
|
||||
bool success = qnn_graph && qnn_graph->execute(cgraph);
|
||||
|
||||
QNN_LOG_DEBUG("[%s]compute graph, success: %d", qnn::get_backend_name(ctx->device), (int)success);
|
||||
QNN_LOG_DEBUG("[%s]compute graph, success: %d\n", qnn::get_backend_name(ctx->device), (int) success);
|
||||
return success;
|
||||
}
|
||||
|
||||
} // namespace qnn
|
||||
} // namespace qnn
|
||||
|
|
|
|||
|
|
@ -1,12 +1,11 @@
|
|||
#pragma once
|
||||
|
||||
#include "ggml.h"
|
||||
|
||||
#include "backend.hpp"
|
||||
#include "ggml.h"
|
||||
|
||||
namespace qnn {
|
||||
|
||||
bool device_supports_op(ggml_backend_qnn_device_context *ctx, const ggml_tensor *op);
|
||||
bool device_compute_graph(ggml_backend_qnn_device_context *ctx, ggml_cgraph *cgraph);
|
||||
bool device_supports_op(ggml_backend_qnn_device_context * ctx, const ggml_tensor * op);
|
||||
bool device_compute_graph(ggml_backend_qnn_device_context * ctx, ggml_cgraph * cgraph);
|
||||
|
||||
} // namespace qnn
|
||||
} // namespace qnn
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
#pragma once
|
||||
|
||||
#ifndef NDEBUG
|
||||
#include <atomic>
|
||||
# include <atomic>
|
||||
#endif
|
||||
|
||||
#include <memory>
|
||||
|
|
@ -10,39 +10,41 @@
|
|||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
|
||||
#include "ggml.h"
|
||||
|
||||
#include "ggml-backend.h"
|
||||
#include "ggml-qnn.h"
|
||||
|
||||
#include "ggml.h"
|
||||
#include "graph.hpp"
|
||||
#include "qnn-lib.hpp"
|
||||
|
||||
namespace qnn {
|
||||
typedef std::unordered_map<std::string, std::unique_ptr<qnn::qnn_graph>> qnn_graph_cache_t;
|
||||
} // namespace qnn
|
||||
} // namespace qnn
|
||||
|
||||
struct ggml_backend_qnn_device_context {
|
||||
// initialize in constructor
|
||||
QNNBackend device;
|
||||
size_t threads;
|
||||
QNNBackend device;
|
||||
size_t threads;
|
||||
std::string name;
|
||||
std::string lib_name;
|
||||
|
||||
// initialize in qnn init
|
||||
qnn::qcom_socinfo socinfo = {};
|
||||
uint64_t supported_types;
|
||||
std::shared_ptr<qnn::qnn_instance> instance;
|
||||
qnn::qcom_socinfo socinfo = {};
|
||||
uint64_t supported_types;
|
||||
std::shared_ptr<qnn::qnn_instance> instance;
|
||||
std::shared_ptr<qnn::qnn_interface> qnn_interface;
|
||||
|
||||
qnn::qnn_graph_cache_t qnn_graph_cache;
|
||||
|
||||
#ifndef NDEBUG
|
||||
std::atomic_uint32_t support_op_count = 0;
|
||||
std::atomic_uint32_t supported_op_count = 0;
|
||||
std::atomic_uint32_t unsupported_op_count = 0;
|
||||
#endif
|
||||
|
||||
explicit ggml_backend_qnn_device_context(QNNBackend device, size_t threads, const char *name, const char *lib_name,
|
||||
uint64_t supported_types)
|
||||
: device(device), threads(threads), name(name), lib_name(lib_name), supported_types(supported_types) {}
|
||||
explicit ggml_backend_qnn_device_context(QNNBackend device, size_t threads, const char * name,
|
||||
const char * lib_name, uint64_t supported_types) :
|
||||
device(device),
|
||||
threads(threads),
|
||||
name(name),
|
||||
lib_name(lib_name),
|
||||
supported_types(supported_types) {}
|
||||
};
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ namespace qnn {
|
|||
* This abstract class defines the interface for managing generic memory buffers in a QNN context.
|
||||
*/
|
||||
class qnn_buffer_interface {
|
||||
public:
|
||||
public:
|
||||
virtual ~qnn_buffer_interface() = default;
|
||||
|
||||
/**
|
||||
|
|
@ -35,7 +35,7 @@ public:
|
|||
*
|
||||
* @return A pointer to the buffer.
|
||||
*/
|
||||
virtual uint8_t *get_buffer() = 0;
|
||||
virtual uint8_t * get_buffer() = 0;
|
||||
|
||||
/**
|
||||
* @brief Gets the buffer pointer.
|
||||
|
|
@ -68,21 +68,22 @@ using qnn_buffer_ptr = std::shared_ptr<qnn_buffer_interface>;
|
|||
* handles cleanup of the buffer and its associated memory handle upon destruction.
|
||||
*/
|
||||
class qnn_rpc_buffer : public qnn_buffer_interface {
|
||||
public:
|
||||
public:
|
||||
qnn_rpc_buffer(std::shared_ptr<qnn_instance> qnn_instance, const size_t size, const uint32_t rank,
|
||||
uint32_t *dimensions, Qnn_DataType_t data_type)
|
||||
: _size(size), _qnn_instance(qnn_instance) {
|
||||
|
||||
_qnn_rpc_buffer = static_cast<uint8_t *>(qnn_instance->alloc_rpcmem(size, alignof(uint8_t *)));
|
||||
uint32_t * dimensions, Qnn_DataType_t data_type) :
|
||||
_size(size),
|
||||
_qnn_instance(qnn_instance) {
|
||||
_qnn_rpc_buffer = static_cast<uint8_t *>(qnn_instance->alloc_rpcmem(size, alignof(uint8_t *)));
|
||||
_qnn_rpc_mem_handle = qnn_instance->register_rpcmem(_qnn_rpc_buffer, rank, dimensions, data_type);
|
||||
if (!_qnn_rpc_buffer || !_qnn_rpc_mem_handle) {
|
||||
QNN_LOG_WARN("Failed to register RPC memory: buffer or memory handle is null");
|
||||
QNN_LOG_WARN("Failed to register RPC memory: buffer or memory handle is null\n");
|
||||
// let the destructor free the buffer
|
||||
return;
|
||||
}
|
||||
|
||||
QNN_LOG_DEBUG("alloc rpcmem(%p) successfully, size %d", _qnn_rpc_buffer, (int)size);
|
||||
QNN_LOG_DEBUG("alloc rpcmem(%p) successfully, size %d\n", (void *) _qnn_rpc_buffer, (int) size);
|
||||
}
|
||||
|
||||
~qnn_rpc_buffer() {
|
||||
if (_qnn_instance) {
|
||||
if (_qnn_rpc_mem_handle) {
|
||||
|
|
@ -97,14 +98,16 @@ public:
|
|||
|
||||
bool is_valid() const override { return _qnn_rpc_buffer && _qnn_rpc_mem_handle; }
|
||||
|
||||
uint8_t *get_buffer() override { return _qnn_rpc_buffer; }
|
||||
uint8_t * get_buffer() override { return _qnn_rpc_buffer; }
|
||||
|
||||
size_t get_size() const override { return _size; }
|
||||
|
||||
Qnn_MemHandle_t get_mem_handle() const override { return _qnn_rpc_mem_handle; }
|
||||
|
||||
private:
|
||||
size_t _size = 0;
|
||||
uint8_t *_qnn_rpc_buffer = nullptr;
|
||||
Qnn_MemHandle_t _qnn_rpc_mem_handle = nullptr;
|
||||
private:
|
||||
size_t _size = 0;
|
||||
uint8_t * _qnn_rpc_buffer = nullptr;
|
||||
Qnn_MemHandle_t _qnn_rpc_mem_handle = nullptr;
|
||||
std::shared_ptr<qnn_instance> _qnn_instance;
|
||||
|
||||
DISABLE_COPY(qnn_rpc_buffer);
|
||||
|
|
@ -119,12 +122,12 @@ private:
|
|||
* a consistent interface for buffer management.
|
||||
*/
|
||||
class qnn_mem_buffer : public qnn_buffer_interface {
|
||||
public:
|
||||
explicit qnn_mem_buffer(const uint8_t *data, size_t size) {
|
||||
public:
|
||||
explicit qnn_mem_buffer(const uint8_t * data, size_t size) {
|
||||
_buffer = reinterpret_cast<uint8_t *>(qnn::page_align_alloc(size));
|
||||
|
||||
if (!_buffer) {
|
||||
QNN_LOG_WARN("failed to allocate %.2f MiB", float(size / (1 << 20)));
|
||||
QNN_LOG_WARN("failed to allocate %.2f MiB\n", float(size / (1 << 20)));
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -134,49 +137,51 @@ public:
|
|||
memcpy(_buffer, data, size);
|
||||
}
|
||||
|
||||
QNN_LOG_DEBUG("alloc buffer: %p, size: %ld", _buffer, size);
|
||||
QNN_LOG_DEBUG("alloc buffer: %p, size: %ld\n", (void *) _buffer, (long) size);
|
||||
}
|
||||
|
||||
explicit qnn_mem_buffer(size_t size) : qnn_mem_buffer(nullptr, size) {}
|
||||
|
||||
~qnn_mem_buffer() {
|
||||
QNN_LOG_DEBUG("free buffer: %p, size: %ld", _buffer, _size);
|
||||
QNN_LOG_DEBUG("free buffer: %p, size: %ld\n", (void *) _buffer, (long) _size);
|
||||
// the free will do nothing if the _buffer is nullptr
|
||||
qnn::align_free(_buffer);
|
||||
}
|
||||
|
||||
bool is_valid() const override { return _buffer != nullptr; }
|
||||
|
||||
uint8_t *get_buffer() override { return _buffer; }
|
||||
uint8_t * get_buffer() override { return _buffer; }
|
||||
|
||||
size_t get_size() const override { return _size; }
|
||||
|
||||
Qnn_MemHandle_t get_mem_handle() const override { return nullptr; }
|
||||
|
||||
private:
|
||||
size_t _size = 0;
|
||||
uint8_t *_buffer = nullptr;
|
||||
private:
|
||||
size_t _size = 0;
|
||||
uint8_t * _buffer = nullptr;
|
||||
|
||||
DISABLE_COPY(qnn_mem_buffer);
|
||||
DISABLE_MOVE(qnn_mem_buffer);
|
||||
};
|
||||
|
||||
class qnn_mem_buffer_slice : public qnn_buffer_interface {
|
||||
public:
|
||||
qnn_mem_buffer_slice(const uint8_t *buffer, size_t size) : _buffer(const_cast<uint8_t *>(buffer)), _size(size) {}
|
||||
public:
|
||||
qnn_mem_buffer_slice(const uint8_t * buffer, size_t size) : _buffer(const_cast<uint8_t *>(buffer)), _size(size) {}
|
||||
|
||||
bool is_valid() const override { return _buffer && _size; }
|
||||
|
||||
uint8_t *get_buffer() override { return _buffer; }
|
||||
uint8_t * get_buffer() override { return _buffer; }
|
||||
|
||||
size_t get_size() const override { return _size; }
|
||||
|
||||
Qnn_MemHandle_t get_mem_handle() const override { return nullptr; }
|
||||
|
||||
private:
|
||||
uint8_t *_buffer = nullptr;
|
||||
size_t _size = 0;
|
||||
private:
|
||||
uint8_t * _buffer = nullptr;
|
||||
size_t _size = 0;
|
||||
|
||||
DISABLE_COPY(qnn_mem_buffer_slice);
|
||||
DISABLE_MOVE(qnn_mem_buffer_slice);
|
||||
};
|
||||
|
||||
} // namespace qnn
|
||||
} // namespace qnn
|
||||
|
|
|
|||
|
|
@ -0,0 +1,76 @@
|
|||
#pragma once
|
||||
|
||||
#ifdef __linux__
|
||||
# include <dlfcn.h>
|
||||
# include <fcntl.h>
|
||||
#elif defined(_WIN32)
|
||||
# define WIN32_LEAN_AND_MEAN
|
||||
# ifndef NOMINMAX
|
||||
# define NOMINMAX
|
||||
# endif
|
||||
# include <windows.h>
|
||||
#endif
|
||||
|
||||
#include <string>
|
||||
|
||||
namespace qnn {
|
||||
|
||||
#ifdef __linux__
|
||||
typedef void * dl_handler_t;
|
||||
|
||||
inline qnn::dl_handler_t dl_load(const std::string & lib_path) {
|
||||
return dlopen(lib_path.c_str(), RTLD_NOW | RTLD_LOCAL);
|
||||
}
|
||||
|
||||
inline void * dl_sym(qnn::dl_handler_t handle, const std::string & symbol) {
|
||||
return dlsym(handle, symbol.c_str());
|
||||
}
|
||||
|
||||
inline bool dl_unload(qnn::dl_handler_t handle) {
|
||||
return dlclose(handle) == 0;
|
||||
}
|
||||
|
||||
inline const char * dl_error() {
|
||||
return dlerror();
|
||||
}
|
||||
#elif defined(_WIN32)
|
||||
using dl_handler_t = HMODULE;
|
||||
|
||||
inline qnn::dl_handler_t dl_load(const std::string & lib_path) {
|
||||
// suppress error dialogs for missing DLLs
|
||||
auto old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
|
||||
SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
|
||||
|
||||
auto handle = LoadLibraryA(lib_path.c_str()); // TODO: use wstring version for unicode paths
|
||||
|
||||
SetErrorMode(old_mode);
|
||||
return handle;
|
||||
}
|
||||
|
||||
inline void * dl_sym(qnn::dl_handler_t handle, const std::string & symbol) {
|
||||
auto old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
|
||||
SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
|
||||
|
||||
void * p = (void *) GetProcAddress(handle, symbol.c_str());
|
||||
|
||||
SetErrorMode(old_mode);
|
||||
return p;
|
||||
}
|
||||
|
||||
inline bool dl_unload(qnn::dl_handler_t handle) {
|
||||
FreeLibrary(handle);
|
||||
return true;
|
||||
}
|
||||
|
||||
inline const char * dl_error() {
|
||||
// TODO: implement dl_error for Windows
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
template <typename Fn> Fn dl_sym_typed(qnn::dl_handler_t handle, const std::string & function_name) {
|
||||
return reinterpret_cast<Fn>(dl_sym(handle, function_name));
|
||||
}
|
||||
|
||||
} // namespace qnn
|
||||
|
|
@ -1,71 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#ifdef __linux__
|
||||
#include <dlfcn.h>
|
||||
#include <fcntl.h>
|
||||
#elif defined(_WIN32)
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#ifndef NOMINMAX
|
||||
#define NOMINMAX
|
||||
#endif
|
||||
#include <windows.h>
|
||||
#endif
|
||||
|
||||
#include <string>
|
||||
|
||||
namespace qnn {
|
||||
|
||||
#ifdef __linux__
|
||||
typedef void *dl_handler_t;
|
||||
|
||||
inline qnn::dl_handler_t dl_load(const std::string &lib_path) {
|
||||
return dlopen(lib_path.c_str(), RTLD_NOW | RTLD_LOCAL);
|
||||
}
|
||||
|
||||
inline void *dl_sym(qnn::dl_handler_t handle, const std::string &symbol) { return dlsym(handle, symbol.c_str()); }
|
||||
|
||||
inline bool dl_unload(qnn::dl_handler_t handle) { return dlclose(handle) == 0; }
|
||||
|
||||
inline const char *dl_error() { return dlerror(); }
|
||||
#elif defined(_WIN32)
|
||||
using dl_handler_t = HMODULE;
|
||||
|
||||
inline qnn::dl_handler_t dl_load(const std::string &lib_path) {
|
||||
// suppress error dialogs for missing DLLs
|
||||
auto old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
|
||||
SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
|
||||
|
||||
auto handle = LoadLibraryA(lib_path.c_str()); // TODO: use wstring version for unicode paths
|
||||
|
||||
SetErrorMode(old_mode);
|
||||
return handle;
|
||||
}
|
||||
|
||||
inline void *dl_sym(qnn::dl_handler_t handle, const std::string &symbol) {
|
||||
auto old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
|
||||
SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
|
||||
|
||||
void *p = (void *)GetProcAddress(handle, symbol.c_str());
|
||||
|
||||
SetErrorMode(old_mode);
|
||||
return p;
|
||||
}
|
||||
|
||||
inline bool dl_unload(qnn::dl_handler_t handle) {
|
||||
FreeLibrary(handle);
|
||||
return true;
|
||||
}
|
||||
|
||||
inline const char *dl_error() {
|
||||
// TODO: implement dl_error for Windows
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
template <typename Fn>
|
||||
Fn dl_sym_typed(qnn::dl_handler_t handle, const std::string &function_name) {
|
||||
return reinterpret_cast<Fn>(dl_sym(handle, function_name));
|
||||
}
|
||||
|
||||
} // namespace qnn
|
||||
|
|
@ -6,7 +6,6 @@
|
|||
|
||||
#include "ggml-backend-impl.h"
|
||||
#include "ggml-impl.h"
|
||||
|
||||
#include "ggml-qnn/backend-ops.hpp"
|
||||
#include "ggml-qnn/backend.hpp"
|
||||
#include "ggml-qnn/logger.hpp"
|
||||
|
|
@ -19,9 +18,9 @@
|
|||
//
|
||||
// =================================================================================================
|
||||
#ifdef NDEBUG
|
||||
#define ENABLE_QNNBACKEND_PERF 0 // enable/disable op's perf info
|
||||
# define ENABLE_QNNBACKEND_PERF 0 // enable/disable op's perf info
|
||||
#else
|
||||
#define ENABLE_QNNBACKEND_PERF 1 // enable/disable op's perf info
|
||||
# define ENABLE_QNNBACKEND_PERF 1 // enable/disable op's perf info
|
||||
#endif
|
||||
|
||||
#define QNN_BACKEND_NAME "qnn"
|
||||
|
|
@ -29,50 +28,42 @@
|
|||
namespace {
|
||||
|
||||
#ifdef _WIN32
|
||||
constexpr const char *kQnnCpuLibName = "QnnCpu.dll";
|
||||
constexpr const char *kQnnGpuLibName = "QnnGpu.dll";
|
||||
constexpr const char *kQnnNpuLibName = "QnnHtp.dll";
|
||||
constexpr const char * kQnnCpuLibName = "QnnCpu.dll";
|
||||
constexpr const char * kQnnGpuLibName = "QnnGpu.dll";
|
||||
constexpr const char * kQnnNpuLibName = "QnnHtp.dll";
|
||||
#else
|
||||
constexpr const char *kQnnCpuLibName = "libQnnCpu.so";
|
||||
constexpr const char *kQnnGpuLibName = "libQnnGpu.so";
|
||||
constexpr const char *kQnnNpuLibName = "libQnnHtp.so";
|
||||
constexpr const char * kQnnCpuLibName = "libQnnCpu.so";
|
||||
constexpr const char * kQnnGpuLibName = "libQnnGpu.so";
|
||||
constexpr const char * kQnnNpuLibName = "libQnnHtp.so";
|
||||
#endif
|
||||
|
||||
struct qnn_device_caps {
|
||||
const char *name;
|
||||
const char *description;
|
||||
const char *lib_name;
|
||||
const char * name;
|
||||
const char * description;
|
||||
const char * lib_name;
|
||||
enum ggml_backend_dev_type type;
|
||||
|
||||
// TODO: should get this caps from device
|
||||
uint64_t supported_types;
|
||||
};
|
||||
|
||||
// TODO: should move this to qnn-lib.cpp
|
||||
constexpr const qnn_device_caps kDeviceCaps[] = {
|
||||
{
|
||||
// https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/CpuOpDefSupplement.html#matmul
|
||||
"qnn-cpu",
|
||||
"Qualcomm Kryo CPU",
|
||||
kQnnCpuLibName,
|
||||
GGML_BACKEND_DEVICE_TYPE_CPU,
|
||||
(1 << GGML_TYPE_I8) | (1 << GGML_TYPE_F32),
|
||||
},
|
||||
"qnn-cpu", "Qualcomm Kryo CPU",
|
||||
kQnnCpuLibName, GGML_BACKEND_DEVICE_TYPE_CPU,
|
||||
(1 << GGML_TYPE_I8) | (1 << GGML_TYPE_F32),
|
||||
}, // https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/CpuOpDefSupplement.html#matmul
|
||||
{
|
||||
// https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/GpuOpDefSupplement.html#matmul
|
||||
"qnn-gpu",
|
||||
"Qualcomm Adreno GPU",
|
||||
kQnnGpuLibName,
|
||||
GGML_BACKEND_DEVICE_TYPE_GPU,
|
||||
(1 << GGML_TYPE_F32) | (1 << GGML_TYPE_F16),
|
||||
},
|
||||
"qnn-gpu", "Qualcomm Adreno GPU",
|
||||
kQnnGpuLibName, GGML_BACKEND_DEVICE_TYPE_GPU,
|
||||
(1 << GGML_TYPE_F32) | (1 << GGML_TYPE_F16),
|
||||
}, // https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/GpuOpDefSupplement.html#matmul
|
||||
{
|
||||
// https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/HtpOpDefSupplement.html#matmul
|
||||
"qnn-npu",
|
||||
"Qualcomm NPU",
|
||||
kQnnNpuLibName,
|
||||
GGML_BACKEND_DEVICE_TYPE_ACCEL,
|
||||
(1 << GGML_TYPE_F32) | (1 << GGML_TYPE_F16) | (1 << GGML_TYPE_I16) | (1 << GGML_TYPE_I8),
|
||||
},
|
||||
"qnn-npu", "Qualcomm NPU",
|
||||
kQnnNpuLibName, GGML_BACKEND_DEVICE_TYPE_ACCEL,
|
||||
(1 << GGML_TYPE_F32) | (1 << GGML_TYPE_F16) | (1 << GGML_TYPE_I16) | (1 << GGML_TYPE_I8),
|
||||
}, // https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/HtpOpDefSupplement.html#matmul
|
||||
};
|
||||
|
||||
static_assert(sizeof(kDeviceCaps) / sizeof(kDeviceCaps[0]) == GGML_QNN_MAX_DEVICES,
|
||||
|
|
@ -85,11 +76,11 @@ static_assert(kDeviceCaps[QNN_BACKEND_GPU].type == GGML_BACKEND_DEVICE_TYPE_GPU,
|
|||
static_assert(kDeviceCaps[QNN_BACKEND_CPU].type == GGML_BACKEND_DEVICE_TYPE_CPU,
|
||||
"The NPU device should be an accelerator device");
|
||||
|
||||
ggml_backend_qnn_device_context *get_device_context(ggml_backend_dev_t dev) {
|
||||
ggml_backend_qnn_device_context * get_device_context(ggml_backend_dev_t dev) {
|
||||
return reinterpret_cast<ggml_backend_qnn_device_context *>(dev->context);
|
||||
}
|
||||
|
||||
qnn::qnn_buffer_interface *get_buffer_context(ggml_backend_buffer_t buffer) {
|
||||
qnn::qnn_buffer_interface * get_buffer_context(ggml_backend_buffer_t buffer) {
|
||||
return reinterpret_cast<qnn::qnn_buffer_interface *>(buffer->context);
|
||||
}
|
||||
|
||||
|
|
@ -99,34 +90,34 @@ qnn::qnn_buffer_interface *get_buffer_context(ggml_backend_buffer_t buffer) {
|
|||
* -----------------------------------------------------------------------------------------------
|
||||
*/
|
||||
void ggml_backend_qnn_buffer_free_buffer(ggml_backend_buffer_t buffer) {
|
||||
auto *ctx = get_buffer_context(buffer);
|
||||
auto * ctx = get_buffer_context(buffer);
|
||||
delete ctx;
|
||||
}
|
||||
|
||||
void *ggml_backend_qnn_buffer_get_base(ggml_backend_buffer_t buffer) {
|
||||
auto *ctx = get_buffer_context(buffer);
|
||||
void * ggml_backend_qnn_buffer_get_base(ggml_backend_buffer_t buffer) {
|
||||
auto * ctx = get_buffer_context(buffer);
|
||||
return ctx->get_buffer();
|
||||
}
|
||||
|
||||
void ggml_backend_qnn_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_tensor *tensor) {
|
||||
void ggml_backend_qnn_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor) {
|
||||
GGML_UNUSED(buffer);
|
||||
GGML_UNUSED(tensor);
|
||||
// TODO: we should create the qnn tensor along with the ggml tensor
|
||||
}
|
||||
|
||||
void ggml_backend_qnn_buffer_set_tensor(ggml_backend_buffer_t buffer, ggml_tensor *tensor, const void *data,
|
||||
void ggml_backend_qnn_buffer_set_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor, const void * data,
|
||||
size_t offset, size_t size) {
|
||||
GGML_UNUSED(buffer);
|
||||
memcpy((char *)tensor->data + offset, data, size);
|
||||
memcpy((char *) tensor->data + offset, data, size);
|
||||
}
|
||||
|
||||
void ggml_backend_qnn_buffer_get_tensor(ggml_backend_buffer_t buffer, const ggml_tensor *tensor, void *data,
|
||||
void ggml_backend_qnn_buffer_get_tensor(ggml_backend_buffer_t buffer, const ggml_tensor * tensor, void * data,
|
||||
size_t offset, size_t size) {
|
||||
GGML_UNUSED(buffer);
|
||||
memcpy(data, (const char *)tensor->data + offset, size);
|
||||
memcpy(data, (const char *) tensor->data + offset, size);
|
||||
}
|
||||
|
||||
bool ggml_backend_qnn_buffer_cpy_tensor(ggml_backend_buffer_t buffer, const ggml_tensor *src, ggml_tensor *dst) {
|
||||
bool ggml_backend_qnn_buffer_cpy_tensor(ggml_backend_buffer_t buffer, const ggml_tensor * src, ggml_tensor * dst) {
|
||||
GGML_UNUSED(buffer);
|
||||
if (ggml_backend_buffer_is_host(src->buffer)) {
|
||||
memcpy(dst->data, src->data, ggml_nbytes(src));
|
||||
|
|
@ -137,7 +128,7 @@ bool ggml_backend_qnn_buffer_cpy_tensor(ggml_backend_buffer_t buffer, const ggml
|
|||
}
|
||||
|
||||
void ggml_backend_qnn_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) {
|
||||
auto *ctx = get_buffer_context(buffer);
|
||||
auto * ctx = get_buffer_context(buffer);
|
||||
memset(ctx->get_buffer(), value, ctx->get_size());
|
||||
}
|
||||
|
||||
|
|
@ -158,19 +149,19 @@ constexpr const ggml_backend_buffer_i ggml_backend_qnn_buffer_interface = {
|
|||
* qnn backend object
|
||||
* -----------------------------------------------------------------------------------------------
|
||||
*/
|
||||
const char *ggml_backend_qnn_buffer_type_name(ggml_backend_buffer_type_t buft) {
|
||||
auto *dev_ctx = get_device_context(buft->device);
|
||||
const char * ggml_backend_qnn_buffer_type_name(ggml_backend_buffer_type_t buft) {
|
||||
auto * dev_ctx = get_device_context(buft->device);
|
||||
return qnn::get_backend_name(dev_ctx->device);
|
||||
}
|
||||
|
||||
ggml_backend_buffer_t ggml_backend_qnn_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) {
|
||||
qnn::qnn_buffer_interface *ctx = new qnn::qnn_mem_buffer(size);
|
||||
qnn::qnn_buffer_interface * ctx = new qnn::qnn_mem_buffer(size);
|
||||
if (!ctx->is_valid()) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
QNN_LOG_DEBUG("[%s]alloc buffer: %p, size: %ld", qnn::get_backend_name(get_device_context(buft->device)->device),
|
||||
ctx->get_buffer(), size);
|
||||
QNN_LOG_DEBUG("[%s]alloc buffer: %p, size: %ld\n", qnn::get_backend_name(get_device_context(buft->device)->device),
|
||||
(void *) ctx->get_buffer(), (long) size);
|
||||
return ggml_backend_buffer_init(buft, ggml_backend_qnn_buffer_interface, ctx, size);
|
||||
}
|
||||
|
||||
|
|
@ -192,16 +183,16 @@ bool ggml_backend_qnn_buffer_is_host(ggml_backend_buffer_type_t buft) {
|
|||
return true;
|
||||
}
|
||||
|
||||
const char *ggml_backend_qnn_name(ggml_backend_t backend) {
|
||||
auto *device_ctx = get_device_context(backend->device);
|
||||
const char * ggml_backend_qnn_name(ggml_backend_t backend) {
|
||||
auto * device_ctx = get_device_context(backend->device);
|
||||
return device_ctx->name.c_str();
|
||||
}
|
||||
|
||||
void ggml_backend_qnn_free(ggml_backend_t backend) {
|
||||
auto *device_ctx = get_device_context(backend->device);
|
||||
QNN_LOG_INFO("idx %d, name:%s", device_ctx->device, device_ctx->name.c_str());
|
||||
auto * device_ctx = get_device_context(backend->device);
|
||||
QNN_LOG_INFO("idx %d, name:%s\n", device_ctx->device, device_ctx->name.c_str());
|
||||
|
||||
auto &instance = device_ctx->instance;
|
||||
auto & instance = device_ctx->instance;
|
||||
if (instance) {
|
||||
device_ctx->qnn_graph_cache.clear();
|
||||
device_ctx->qnn_interface.reset();
|
||||
|
|
@ -212,35 +203,33 @@ void ggml_backend_qnn_free(ggml_backend_t backend) {
|
|||
delete backend;
|
||||
}
|
||||
|
||||
bool ggml_backend_qnn_cpy_tensor_async(ggml_backend_t backend_src, ggml_backend_t backend_dst, const ggml_tensor *src,
|
||||
ggml_tensor *dst) {
|
||||
bool ggml_backend_qnn_cpy_tensor_async(ggml_backend_t backend_src, ggml_backend_t backend_dst, const ggml_tensor * src,
|
||||
ggml_tensor * dst) {
|
||||
GGML_UNUSED(backend_src);
|
||||
GGML_UNUSED(backend_dst);
|
||||
GGML_UNUSED(src);
|
||||
GGML_UNUSED(dst);
|
||||
|
||||
QNN_LOG_DEBUG("opy form %s to %s, src_is_qnn: %d, dst_is_qnn: %d", ggml_get_name(src), ggml_get_name(dst),
|
||||
(int)ggml_backend_is_qnn(backend_src), (int)ggml_backend_is_qnn(backend_dst));
|
||||
QNN_LOG_DEBUG("opy form %s to %s, src_is_qnn: %d, dst_is_qnn: %d\n", ggml_get_name(src), ggml_get_name(dst),
|
||||
(int) ggml_backend_is_qnn(backend_src), (int) ggml_backend_is_qnn(backend_dst));
|
||||
return false;
|
||||
}
|
||||
|
||||
ggml_backend_buffer_type_t ggml_backend_qnn_buffer_type(ggml_backend_dev_t dev) {
|
||||
static ggml_backend_buffer_type ggml_backend_qnn_buffer_types[GGML_QNN_MAX_DEVICES];
|
||||
auto *dev_ctx = get_device_context(dev);
|
||||
auto * dev_ctx = get_device_context(dev);
|
||||
if (!ggml_backend_qnn_buffer_types[dev_ctx->device].device) {
|
||||
ggml_backend_qnn_buffer_types[dev_ctx->device] = {
|
||||
/* .iface = */ {
|
||||
/* .get_name = */ ggml_backend_qnn_buffer_type_name,
|
||||
/* .alloc_buffer = */
|
||||
ggml_backend_qnn_buffer_type_alloc_buffer,
|
||||
/* .get_alignment = */
|
||||
ggml_backend_qnn_buffer_type_get_alignment,
|
||||
/* .get_max_size = */
|
||||
ggml_backend_qnn_buffer_type_get_max_size,
|
||||
/* .get_alloc_size = */ nullptr, // defaults to ggml_nbytes
|
||||
/* .get_name = */ ggml_backend_qnn_buffer_type_name,
|
||||
/* .alloc_buffer = */
|
||||
ggml_backend_qnn_buffer_type_alloc_buffer, /* .get_alignment = */
|
||||
ggml_backend_qnn_buffer_type_get_alignment, /* .get_max_size = */
|
||||
ggml_backend_qnn_buffer_type_get_max_size, /* .get_alloc_size = */ nullptr, // defaults to ggml_nbytes
|
||||
/* .is_host = */ ggml_backend_qnn_buffer_is_host,
|
||||
},
|
||||
/* .device */ dev,
|
||||
},
|
||||
/* .device */
|
||||
dev,
|
||||
/* .context = */ nullptr,
|
||||
};
|
||||
} else {
|
||||
|
|
@ -250,9 +239,9 @@ ggml_backend_buffer_type_t ggml_backend_qnn_buffer_type(ggml_backend_dev_t dev)
|
|||
return &ggml_backend_qnn_buffer_types[dev_ctx->device];
|
||||
}
|
||||
|
||||
ggml_status ggml_backend_qnn_graph_compute(ggml_backend_t backend, ggml_cgraph *cgraph) {
|
||||
return qnn::device_compute_graph(get_device_context(backend->device), cgraph) ? GGML_STATUS_SUCCESS
|
||||
: GGML_STATUS_FAILED;
|
||||
ggml_status ggml_backend_qnn_graph_compute(ggml_backend_t backend, ggml_cgraph * cgraph) {
|
||||
return qnn::device_compute_graph(get_device_context(backend->device), cgraph) ? GGML_STATUS_SUCCESS :
|
||||
GGML_STATUS_FAILED;
|
||||
}
|
||||
|
||||
constexpr const ggml_backend_i ggml_backend_qnn_interface = {
|
||||
|
|
@ -276,31 +265,31 @@ constexpr const ggml_backend_i ggml_backend_qnn_interface = {
|
|||
* qnn backend device object
|
||||
* -----------------------------------------------------------------------------------------------
|
||||
*/
|
||||
const char *ggml_backend_qnn_device_get_name(ggml_backend_dev_t dev) {
|
||||
const auto &caps = kDeviceCaps[get_device_context(dev)->device];
|
||||
const char * ggml_backend_qnn_device_get_name(ggml_backend_dev_t dev) {
|
||||
const auto & caps = kDeviceCaps[get_device_context(dev)->device];
|
||||
return caps.name;
|
||||
}
|
||||
|
||||
const char *ggml_backend_qnn_device_get_description(ggml_backend_dev_t dev) {
|
||||
const auto &caps = kDeviceCaps[get_device_context(dev)->device];
|
||||
const char * ggml_backend_qnn_device_get_description(ggml_backend_dev_t dev) {
|
||||
const auto & caps = kDeviceCaps[get_device_context(dev)->device];
|
||||
return caps.description;
|
||||
}
|
||||
|
||||
void ggml_backend_qnn_device_get_memory(ggml_backend_dev_t dev, size_t *free, size_t *total) {
|
||||
void ggml_backend_qnn_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
|
||||
GGML_UNUSED(dev);
|
||||
*free = qnn::get_system_free_memory_in_bytes();
|
||||
*free = qnn::get_system_free_memory_in_bytes();
|
||||
*total = qnn::get_system_total_memory_in_bytes();
|
||||
QNN_LOG_DEBUG("free memory: %ldMB, total memory: %ldMB", (*free / 1048576), (*total) / 1048576);
|
||||
QNN_LOG_DEBUG("free memory: %ldMB, total memory: %ldMB\n", (*free / 1048576), (*total) / 1048576);
|
||||
}
|
||||
|
||||
enum ggml_backend_dev_type ggml_backend_qnn_device_get_type(ggml_backend_dev_t dev) {
|
||||
return kDeviceCaps[get_device_context(dev)->device].type;
|
||||
}
|
||||
|
||||
void ggml_backend_qnn_device_get_props(ggml_backend_dev_t dev, ggml_backend_dev_props *props) {
|
||||
props->name = ggml_backend_qnn_device_get_name(dev);
|
||||
void ggml_backend_qnn_device_get_props(ggml_backend_dev_t dev, ggml_backend_dev_props * props) {
|
||||
props->name = ggml_backend_qnn_device_get_name(dev);
|
||||
props->description = ggml_backend_qnn_device_get_description(dev);
|
||||
props->type = ggml_backend_qnn_device_get_type(dev);
|
||||
props->type = ggml_backend_qnn_device_get_type(dev);
|
||||
ggml_backend_qnn_device_get_memory(dev, &props->memory_free, &props->memory_total);
|
||||
props->caps = {
|
||||
/* async */ false,
|
||||
|
|
@ -311,12 +300,12 @@ void ggml_backend_qnn_device_get_props(ggml_backend_dev_t dev, ggml_backend_dev_
|
|||
}
|
||||
|
||||
ggml_guid_t ggml_backend_qnn_guid() {
|
||||
static ggml_guid guid = {0x1a, 0x2b, 0x3c, 0x4d, 0x5e, 0x6f, 0x70, 0x81,
|
||||
0x92, 0xa3, 0xb4, 0xc5, 0xd6, 0xe7, 0xf8, 0x09};
|
||||
static ggml_guid guid = { 0x1a, 0x2b, 0x3c, 0x4d, 0x5e, 0x6f, 0x70, 0x81,
|
||||
0x92, 0xa3, 0xb4, 0xc5, 0xd6, 0xe7, 0xf8, 0x09 };
|
||||
return &guid;
|
||||
}
|
||||
|
||||
ggml_backend_t ggml_backend_qnn_init_with_device_context(ggml_backend_dev_t dev, const char *extend_lib_search_path) {
|
||||
ggml_backend_t ggml_backend_qnn_init_with_device_context(ggml_backend_dev_t dev, const char * extend_lib_search_path) {
|
||||
if (!extend_lib_search_path) {
|
||||
extend_lib_search_path = GGML_QNN_DEFAULT_LIB_SEARCH_PATH;
|
||||
QNN_LOG_WARN(
|
||||
|
|
@ -324,27 +313,27 @@ ggml_backend_t ggml_backend_qnn_init_with_device_context(ggml_backend_dev_t dev,
|
|||
"use " GGML_QNN_DEFAULT_LIB_SEARCH_PATH " as default");
|
||||
}
|
||||
|
||||
auto *dev_ctx = get_device_context(dev);
|
||||
const auto device = dev_ctx->device;
|
||||
QNN_LOG_DEBUG("device %s", qnn::get_backend_name(device));
|
||||
QNN_LOG_DEBUG("extend_lib_search_path %s", extend_lib_search_path);
|
||||
auto * dev_ctx = get_device_context(dev);
|
||||
const auto device = dev_ctx->device;
|
||||
QNN_LOG_DEBUG("device %s\n", qnn::get_backend_name(device));
|
||||
QNN_LOG_DEBUG("extend_lib_search_path %s\n", extend_lib_search_path);
|
||||
auto instance = std::make_shared<qnn::qnn_instance>(extend_lib_search_path, dev_ctx->lib_name);
|
||||
auto result = instance->qnn_init(nullptr);
|
||||
auto result = instance->qnn_init(nullptr);
|
||||
if (result != 0) {
|
||||
QNN_LOG_WARN("failed to init qnn backend %s", qnn::get_backend_name(device));
|
||||
QNN_LOG_WARN("failed to init qnn backend %s\n", qnn::get_backend_name(device));
|
||||
return nullptr;
|
||||
}
|
||||
auto qnn_interface = instance->get_qnn_interface();
|
||||
if (!qnn_interface) {
|
||||
QNN_LOG_WARN("qnn subsystem failure");
|
||||
QNN_LOG_WARN("qnn subsystem failure\n");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
std::string device_name = qnn::get_backend_name(device);
|
||||
QNN_LOG_INFO("qnn device name %s", device_name.c_str());
|
||||
dev_ctx->instance = instance;
|
||||
dev_ctx->qnn_interface = qnn_interface;
|
||||
dev_ctx->socinfo = instance->get_soc_info();
|
||||
QNN_LOG_INFO("qnn device name %s\n", device_name.c_str());
|
||||
dev_ctx->instance = instance;
|
||||
dev_ctx->qnn_interface = qnn_interface;
|
||||
dev_ctx->socinfo = instance->get_soc_info();
|
||||
dev_ctx->supported_types = kDeviceCaps[device].supported_types;
|
||||
|
||||
ggml_backend_t qnn_backend = new ggml_backend{
|
||||
|
|
@ -357,7 +346,7 @@ ggml_backend_t ggml_backend_qnn_init_with_device_context(ggml_backend_dev_t dev,
|
|||
return qnn_backend;
|
||||
}
|
||||
|
||||
ggml_backend_t ggml_backend_qnn_device_init(ggml_backend_dev_t dev, const char *params) {
|
||||
ggml_backend_t ggml_backend_qnn_device_init(ggml_backend_dev_t dev, const char * params) {
|
||||
return ggml_backend_qnn_init_with_device_context(dev, params);
|
||||
}
|
||||
|
||||
|
|
@ -365,7 +354,7 @@ ggml_backend_buffer_type_t ggml_backend_qnn_device_get_buffer_type(ggml_backend_
|
|||
return ggml_backend_qnn_buffer_type(dev);
|
||||
}
|
||||
|
||||
ggml_backend_buffer_t ggml_backend_qnn_device_buffer_from_ptr(ggml_backend_dev_t dev, void *ptr, size_t size,
|
||||
ggml_backend_buffer_t ggml_backend_qnn_device_buffer_from_ptr(ggml_backend_dev_t dev, void * ptr, size_t size,
|
||||
size_t max_tensor_size) {
|
||||
// TODO
|
||||
GGML_UNUSED(dev);
|
||||
|
|
@ -373,9 +362,9 @@ ggml_backend_buffer_t ggml_backend_qnn_device_buffer_from_ptr(ggml_backend_dev_t
|
|||
return ggml_backend_cpu_buffer_from_ptr(ptr, size);
|
||||
}
|
||||
|
||||
bool ggml_backend_qnn_device_supports_op(ggml_backend_dev_t dev, const ggml_tensor *op) {
|
||||
bool ggml_backend_qnn_device_supports_op(ggml_backend_dev_t dev, const ggml_tensor * op) {
|
||||
// Note that this function could be called before the device context is initialized
|
||||
auto *device_ctx = get_device_context(dev);
|
||||
auto * device_ctx = get_device_context(dev);
|
||||
return qnn::device_supports_op(device_ctx, op);
|
||||
}
|
||||
|
||||
|
|
@ -384,13 +373,13 @@ bool ggml_backend_qnn_device_supports_buft(ggml_backend_dev_t dev, ggml_backend_
|
|||
return ggml_backend_buft_is_host(buft);
|
||||
}
|
||||
|
||||
bool ggml_backend_qnn_device_offload_op(ggml_backend_dev_t dev, const ggml_tensor *op) {
|
||||
bool ggml_backend_qnn_device_offload_op(ggml_backend_dev_t dev, const ggml_tensor * op) {
|
||||
#ifdef NDEBUG
|
||||
GGML_UNUSED(dev);
|
||||
GGML_UNUSED(op);
|
||||
#else
|
||||
auto *device_ctx = get_device_context(dev);
|
||||
QNN_LOG_DEBUG("[%s][%s]offload op", qnn::get_backend_name(device_ctx->device), ggml_op_name(op->op));
|
||||
auto * device_ctx = get_device_context(dev);
|
||||
QNN_LOG_DEBUG("[%s][%s]offload op\n", qnn::get_backend_name(device_ctx->device), ggml_op_name(op->op));
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
|
|
@ -421,15 +410,15 @@ constexpr const ggml_backend_device_i ggml_backend_qnn_device_interface = {
|
|||
|
||||
struct ggml_backend_qnn_reg_impl : ggml_backend_reg {
|
||||
std::vector<std::unique_ptr<ggml_backend_qnn_device_context>> device_contexts;
|
||||
std::vector<ggml_backend_device> devices;
|
||||
std::vector<ggml_backend_device> devices;
|
||||
|
||||
explicit ggml_backend_qnn_reg_impl(ggml_backend_reg_i interface) {
|
||||
context = this;
|
||||
iface = interface;
|
||||
iface = interface;
|
||||
|
||||
QNN_LOG_DEBUG("qnn backend registry init");
|
||||
QNN_LOG_DEBUG("qnn backend registry init\n");
|
||||
for (size_t i = 0; i < QNN_BACKEND_COUNT; i++) {
|
||||
const auto device_enum = (QNNBackend)(QNN_BACKEND_COUNT - 1 - i); // init from the last device, i.e. NPU
|
||||
const auto device_enum = (QNNBackend) (QNN_BACKEND_COUNT - 1 - i); // init from the last device, i.e. NPU
|
||||
#ifndef GGML_QNN_ENABLE_CPU_BACKEND
|
||||
if (device_enum == QNN_BACKEND_CPU) {
|
||||
/*
|
||||
|
|
@ -441,7 +430,7 @@ struct ggml_backend_qnn_reg_impl : ggml_backend_reg {
|
|||
#endif
|
||||
|
||||
device_contexts.emplace_back(std::make_unique<ggml_backend_qnn_device_context>(
|
||||
/* .device = */ device_enum, // init from the last device, i.e. NPU
|
||||
/* .device = */ device_enum, // init from the last device, i.e. NPU
|
||||
/* .threads = */ 1,
|
||||
/* .name = */ qnn::get_backend_name(device_enum),
|
||||
/* .lib_name = */ kDeviceCaps[device_enum].lib_name,
|
||||
|
|
@ -456,18 +445,18 @@ struct ggml_backend_qnn_reg_impl : ggml_backend_reg {
|
|||
}
|
||||
};
|
||||
|
||||
const char *ggml_backend_qnn_reg_get_name(ggml_backend_reg_t reg) {
|
||||
const char * ggml_backend_qnn_reg_get_name(ggml_backend_reg_t reg) {
|
||||
GGML_UNUSED(reg);
|
||||
return GGML_QNN_NAME;
|
||||
}
|
||||
|
||||
size_t ggml_backend_qnn_reg_get_device_count(ggml_backend_reg_t reg) {
|
||||
auto *ctx = (ggml_backend_qnn_reg_impl *)reg->context;
|
||||
auto * ctx = (ggml_backend_qnn_reg_impl *) reg->context;
|
||||
return ctx->devices.size();
|
||||
}
|
||||
|
||||
ggml_backend_dev_t ggml_backend_qnn_reg_get_device(ggml_backend_reg_t reg, size_t index) {
|
||||
auto *ctx = (ggml_backend_qnn_reg_impl *)reg->context;
|
||||
auto * ctx = (ggml_backend_qnn_reg_impl *) reg->context;
|
||||
GGML_ASSERT(index < ctx->devices.size());
|
||||
return &(ctx->devices[index]);
|
||||
}
|
||||
|
|
@ -479,11 +468,13 @@ const ggml_backend_reg_i ggml_backend_qnn_reg_interface = {
|
|||
/* .get_proc_address = */ nullptr,
|
||||
};
|
||||
|
||||
} // namespace
|
||||
} // namespace
|
||||
|
||||
bool ggml_backend_is_qnn(ggml_backend_t backend) { return ggml_guid_matches(backend->guid, ggml_backend_qnn_guid()); }
|
||||
bool ggml_backend_is_qnn(ggml_backend_t backend) {
|
||||
return ggml_guid_matches(backend->guid, ggml_backend_qnn_guid());
|
||||
}
|
||||
|
||||
ggml_backend_reg_t ggml_backend_qnn_reg() {
|
||||
static ggml_backend_qnn_reg_impl reg{ggml_backend_qnn_reg_interface};
|
||||
static ggml_backend_qnn_reg_impl reg{ ggml_backend_qnn_reg_interface };
|
||||
return ®
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@
|
|||
#include <unordered_map>
|
||||
|
||||
#include "ggml-impl.h"
|
||||
|
||||
#include "logger.hpp"
|
||||
#include "op-config.hpp"
|
||||
#include "tensor.hpp"
|
||||
|
|
@ -13,9 +12,9 @@
|
|||
namespace {
|
||||
using qnn_tensor_cache_t = std::unordered_map<ggml_tensor *, qnn::qnn_tensor_ptr_t>;
|
||||
|
||||
int get_op_max_rank(const ggml_tensor *op) {
|
||||
int max_rank = ggml_n_dims(op);
|
||||
const int count = (int)qnn::get_qnn_op_input_param_count(op);
|
||||
int get_op_max_rank(const ggml_tensor * op) {
|
||||
int max_rank = ggml_n_dims(op);
|
||||
const int count = (int) qnn::get_qnn_op_input_param_count(op);
|
||||
for (int i = 0; i < count; ++i) {
|
||||
max_rank = std::max(max_rank, ggml_n_dims(op->src[i]));
|
||||
}
|
||||
|
|
@ -23,10 +22,10 @@ int get_op_max_rank(const ggml_tensor *op) {
|
|||
return max_rank;
|
||||
}
|
||||
|
||||
qnn::qnn_tensor_ptr_t create_tensor_with_cache(ggml_tensor *tensor, qnn::ggml_qnn_tensor::tensor_type_t type, int rank,
|
||||
qnn::qnn_tensor_ptr_t create_tensor_with_cache(ggml_tensor * tensor, qnn::ggml_qnn_tensor::tensor_type_t type, int rank,
|
||||
QNNBackend device, Qnn_GraphHandle_t graph_handle,
|
||||
std::shared_ptr<qnn::qnn_instance> qnn_instance,
|
||||
qnn_tensor_cache_t &tensor_cache) {
|
||||
qnn_tensor_cache_t & tensor_cache) {
|
||||
GGML_ASSERT(tensor);
|
||||
if (tensor_cache.count(tensor)) {
|
||||
return tensor_cache[tensor];
|
||||
|
|
@ -38,13 +37,13 @@ qnn::qnn_tensor_ptr_t create_tensor_with_cache(ggml_tensor *tensor, qnn::ggml_qn
|
|||
return qnn_tensor;
|
||||
}
|
||||
|
||||
qnn::qnn_tensor_array_t create_tensors_with_cache(const qnn::ggml_tensor_array_t &ggml_tensors,
|
||||
qnn::qnn_tensor_array_t create_tensors_with_cache(const qnn::ggml_tensor_array_t & ggml_tensors,
|
||||
qnn::ggml_qnn_tensor::tensor_type_t type, int rank, QNNBackend device,
|
||||
Qnn_GraphHandle_t graph_handle,
|
||||
Qnn_GraphHandle_t graph_handle,
|
||||
std::shared_ptr<qnn::qnn_instance> qnn_instance,
|
||||
qnn_tensor_cache_t &tensor_cache) {
|
||||
qnn_tensor_cache_t & tensor_cache) {
|
||||
qnn::qnn_tensor_array_t tensors;
|
||||
for (auto *tensor : ggml_tensors) {
|
||||
for (auto * tensor : ggml_tensors) {
|
||||
tensors.push_back(
|
||||
create_tensor_with_cache(tensor, type, rank, device, graph_handle, qnn_instance, tensor_cache));
|
||||
}
|
||||
|
|
@ -52,10 +51,10 @@ qnn::qnn_tensor_array_t create_tensors_with_cache(const qnn::ggml_tensor_array_t
|
|||
return tensors;
|
||||
}
|
||||
|
||||
qnn::qnn_op_config_ptr_t create_operation_from_op_tensor(ggml_tensor *dst, const std::string &name, int rank,
|
||||
qnn::qnn_op_config_ptr_t create_operation_from_op_tensor(ggml_tensor * dst, const std::string & name, int rank,
|
||||
QNNBackend device, Qnn_GraphHandle_t graph_handle,
|
||||
std::shared_ptr<qnn::qnn_instance> qnn_instance,
|
||||
bool is_intermediate, qnn_tensor_cache_t &tensor_cache) {
|
||||
bool is_intermediate, qnn_tensor_cache_t & tensor_cache) {
|
||||
auto operation = qnn::create_op(dst, name, qnn_instance);
|
||||
|
||||
// input tensors
|
||||
|
|
@ -71,22 +70,22 @@ qnn::qnn_op_config_ptr_t create_operation_from_op_tensor(ggml_tensor *dst, const
|
|||
// output tensor
|
||||
tensor_type = is_intermediate ? qnn::ggml_qnn_tensor::INTERMEDIATE : qnn::ggml_qnn_tensor::OUTPUT;
|
||||
qnn::qnn_tensor_array_t output_qnn_tensors =
|
||||
create_tensors_with_cache({dst}, tensor_type, rank, device, graph_handle, qnn_instance, tensor_cache);
|
||||
create_tensors_with_cache({ dst }, tensor_type, rank, device, graph_handle, qnn_instance, tensor_cache);
|
||||
operation->set_output_tensors(output_qnn_tensors);
|
||||
|
||||
// initialize operation
|
||||
if (!operation->initialize_op_nodes(device, graph_handle)) {
|
||||
QNN_LOG_ERROR("[%s][%s]initialize_op_nodes failed", qnn::get_backend_name(device), name.c_str());
|
||||
QNN_LOG_ERROR("[%s][%s]initialize_op_nodes failed\n", qnn::get_backend_name(device), name.c_str());
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return operation;
|
||||
}
|
||||
|
||||
bool bind_src_tensors(ggml_tensor *op, qnn::qnn_tensor_array_t &tensor_wrappers,
|
||||
std::vector<Qnn_Tensor_t> &qnn_tensors) {
|
||||
bool bind_src_tensors(ggml_tensor * op, qnn::qnn_tensor_array_t & tensor_wrappers,
|
||||
std::vector<Qnn_Tensor_t> & qnn_tensors) {
|
||||
if (op->op == GGML_OP_NONE) {
|
||||
QNN_LOG_DEBUG("op %s is not a valid op", ggml_get_name(op));
|
||||
QNN_LOG_DEBUG("op %s is not a valid op\n", ggml_get_name(op));
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
@ -94,9 +93,9 @@ bool bind_src_tensors(ggml_tensor *op, qnn::qnn_tensor_array_t &tensor_wrappers,
|
|||
GGML_ASSERT(tensor_wrappers.size() == param_count);
|
||||
qnn_tensors.resize(param_count);
|
||||
for (size_t i = 0; i < param_count; ++i) {
|
||||
auto *ggml_tensor = op->src[i];
|
||||
auto * ggml_tensor = op->src[i];
|
||||
if (!tensor_wrappers[i]->bind_ggml_tensor(ggml_tensor)) {
|
||||
QNN_LOG_ERROR("bind tensor %s failed", ggml_get_name(ggml_tensor));
|
||||
QNN_LOG_ERROR("bind tensor %s failed\n", ggml_get_name(ggml_tensor));
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
@ -116,22 +115,21 @@ bool bind_src_tensors(ggml_tensor *op, qnn::qnn_tensor_array_t &tensor_wrappers,
|
|||
*
|
||||
* TODO: this algorithm is not perfect and may not work for all cases. It assumes that the tensors are
|
||||
* connected in a way that allows for unambiguous categorization.
|
||||
* It also assumes that the tensors are connected in a way that allows for unambiguous categorization.
|
||||
*/
|
||||
int get_io_tensors_from_graph(const ggml_cgraph *cgraph, qnn::ggml_tensor_array_t &inputs,
|
||||
qnn::ggml_tensor_array_t &outputs) {
|
||||
int get_io_tensors_from_graph(const ggml_cgraph * cgraph, qnn::ggml_tensor_array_t & inputs,
|
||||
qnn::ggml_tensor_array_t & outputs) {
|
||||
struct _tensor_connectivity_info {
|
||||
size_t in_degree = 0;
|
||||
size_t out_degree = 0;
|
||||
size_t in_degree = 0;
|
||||
size_t out_degree = 0;
|
||||
size_t insert_index = 0;
|
||||
};
|
||||
|
||||
using ggml_tensor_connectivity_map_t = std::unordered_map<ggml_tensor *, _tensor_connectivity_info>;
|
||||
|
||||
ggml_tensor_connectivity_map_t connectivity_map;
|
||||
int rank = 0;
|
||||
int rank = 0;
|
||||
for (int i = 0; i < cgraph->n_nodes; i++) {
|
||||
ggml_tensor *dst = cgraph->nodes[i];
|
||||
ggml_tensor * dst = cgraph->nodes[i];
|
||||
if (ggml_is_empty(dst)) {
|
||||
continue;
|
||||
}
|
||||
|
|
@ -144,7 +142,7 @@ int get_io_tensors_from_graph(const ggml_cgraph *cgraph, qnn::ggml_tensor_array_
|
|||
rank = std::max(rank, ggml_n_dims(dst));
|
||||
if (connectivity_map.count(dst) == 0) {
|
||||
connectivity_map[dst] = {
|
||||
1, // in-degree, at least 1
|
||||
1, // in-degree, at least 1
|
||||
0,
|
||||
connectivity_map.size(),
|
||||
};
|
||||
|
|
@ -153,13 +151,13 @@ int get_io_tensors_from_graph(const ggml_cgraph *cgraph, qnn::ggml_tensor_array_
|
|||
}
|
||||
|
||||
for (size_t i = 0; i < GGML_MAX_DIMS && dst->src[i]; ++i) {
|
||||
auto *src = dst->src[i];
|
||||
rank = std::max(rank, ggml_n_dims(src));
|
||||
auto * src = dst->src[i];
|
||||
rank = std::max(rank, ggml_n_dims(src));
|
||||
|
||||
if (connectivity_map.count(src) == 0) {
|
||||
connectivity_map[src] = {
|
||||
0,
|
||||
1, // out-degree, at least 1
|
||||
1, // out-degree, at least 1
|
||||
connectivity_map.size(),
|
||||
};
|
||||
} else {
|
||||
|
|
@ -168,7 +166,7 @@ int get_io_tensors_from_graph(const ggml_cgraph *cgraph, qnn::ggml_tensor_array_
|
|||
}
|
||||
}
|
||||
|
||||
for (const auto &kv : connectivity_map) {
|
||||
for (const auto & kv : connectivity_map) {
|
||||
if (kv.second.in_degree == 0) {
|
||||
inputs.push_back(kv.first);
|
||||
}
|
||||
|
|
@ -178,126 +176,103 @@ int get_io_tensors_from_graph(const ggml_cgraph *cgraph, qnn::ggml_tensor_array_
|
|||
}
|
||||
}
|
||||
|
||||
std::sort(inputs.begin(), inputs.end(), [&connectivity_map](ggml_tensor *lhs, ggml_tensor *rhs) {
|
||||
std::sort(inputs.begin(), inputs.end(), [&connectivity_map](ggml_tensor * lhs, ggml_tensor * rhs) {
|
||||
return connectivity_map[lhs].insert_index < connectivity_map[rhs].insert_index;
|
||||
});
|
||||
|
||||
std::sort(outputs.begin(), outputs.end(), [&connectivity_map](ggml_tensor *lhs, ggml_tensor *rhs) {
|
||||
std::sort(outputs.begin(), outputs.end(), [&connectivity_map](ggml_tensor * lhs, ggml_tensor * rhs) {
|
||||
return connectivity_map[lhs].insert_index < connectivity_map[rhs].insert_index;
|
||||
});
|
||||
|
||||
return rank;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace
|
||||
|
||||
namespace qnn {
|
||||
|
||||
qnn_graph::qnn_graph(const std::string &graph_name, QNNBackend device, std::shared_ptr<qnn_instance> qnn_instance,
|
||||
size_t vtcm_size_in_mb)
|
||||
: _graph_name(graph_name), _device(device), _qnn_instance(qnn_instance) {
|
||||
QNN_LOG_DEBUG("[%s][%s]created", get_backend_name(device), graph_name.c_str());
|
||||
qnn_graph::qnn_graph(const std::string & graph_name, QNNBackend device, std::shared_ptr<qnn_instance> qnn_instance,
|
||||
size_t vtcm_size_in_mb) :
|
||||
_graph_name(graph_name),
|
||||
_device(device),
|
||||
_qnn_instance(qnn_instance) {
|
||||
QNN_LOG_DEBUG("[%s][%s]created\n", get_backend_name(device), graph_name.c_str());
|
||||
|
||||
auto qnn_interface = qnn_instance->get_qnn_interface();
|
||||
auto qnn_context = qnn_instance->get_qnn_context_handle();
|
||||
Qnn_ErrorHandle_t error = QNN_SUCCESS;
|
||||
Qnn_GraphHandle_t graph_handle = nullptr;
|
||||
auto qnn_interface = qnn_instance->get_qnn_interface();
|
||||
auto qnn_context = qnn_instance->get_qnn_context_handle();
|
||||
Qnn_ErrorHandle_t error = QNN_SUCCESS;
|
||||
Qnn_GraphHandle_t graph_handle = nullptr;
|
||||
if (device == QNN_BACKEND_NPU) {
|
||||
// TODO: fix graph config here for NPU
|
||||
QnnHtpGraph_CustomConfig_t hvx_config;
|
||||
hvx_config.option = QNN_HTP_GRAPH_CONFIG_OPTION_NUM_HVX_THREADS;
|
||||
hvx_config.option = QNN_HTP_GRAPH_CONFIG_OPTION_NUM_HVX_THREADS;
|
||||
hvx_config.numHvxThreads = 8;
|
||||
QnnGraph_Config_t graph_hvx_config;
|
||||
graph_hvx_config.option = QNN_GRAPH_CONFIG_OPTION_CUSTOM;
|
||||
graph_hvx_config.option = QNN_GRAPH_CONFIG_OPTION_CUSTOM;
|
||||
graph_hvx_config.customConfig = &hvx_config;
|
||||
|
||||
QnnHtpGraph_CustomConfig_t dlbc_config;
|
||||
dlbc_config.option = QNN_HTP_GRAPH_CONFIG_OPTION_OPTIMIZATION;
|
||||
dlbc_config.optimizationOption.type = QNN_HTP_GRAPH_OPTIMIZATION_TYPE_ENABLE_DLBC;
|
||||
dlbc_config.optimizationOption.floatValue = 1.0; // set to 0.0 to turn off DLBC
|
||||
dlbc_config.option = QNN_HTP_GRAPH_CONFIG_OPTION_OPTIMIZATION;
|
||||
dlbc_config.optimizationOption.type = QNN_HTP_GRAPH_OPTIMIZATION_TYPE_ENABLE_DLBC;
|
||||
dlbc_config.optimizationOption.floatValue = 1.0; // set to 0.0 to turn off DLBC
|
||||
QnnGraph_Config_t graph_dlbc_config;
|
||||
graph_dlbc_config.option = QNN_GRAPH_CONFIG_OPTION_CUSTOM;
|
||||
graph_dlbc_config.option = QNN_GRAPH_CONFIG_OPTION_CUSTOM;
|
||||
graph_dlbc_config.customConfig = &dlbc_config;
|
||||
|
||||
QnnHtpGraph_CustomConfig_t opt_config;
|
||||
opt_config.optimizationOption.type = QNN_HTP_GRAPH_OPTIMIZATION_TYPE_FINALIZE_OPTIMIZATION_FLAG;
|
||||
opt_config.optimizationOption.floatValue = 1; // 1 / 3
|
||||
opt_config.optimizationOption.type = QNN_HTP_GRAPH_OPTIMIZATION_TYPE_FINALIZE_OPTIMIZATION_FLAG;
|
||||
opt_config.optimizationOption.floatValue = 1; // 1 / 3
|
||||
QnnGraph_Config_t graph_opt_config;
|
||||
graph_opt_config.option = QNN_GRAPH_CONFIG_OPTION_CUSTOM;
|
||||
graph_opt_config.option = QNN_GRAPH_CONFIG_OPTION_CUSTOM;
|
||||
graph_opt_config.customConfig = &opt_config;
|
||||
|
||||
QnnHtpGraph_CustomConfig_t vtcm_config;
|
||||
vtcm_config.option = QNN_HTP_GRAPH_CONFIG_OPTION_VTCM_SIZE;
|
||||
vtcm_config.vtcmSizeInMB = (uint32_t)vtcm_size_in_mb;
|
||||
vtcm_config.option = QNN_HTP_GRAPH_CONFIG_OPTION_VTCM_SIZE;
|
||||
vtcm_config.vtcmSizeInMB = (uint32_t) vtcm_size_in_mb;
|
||||
QnnGraph_Config_t graph_vtcm_config;
|
||||
graph_vtcm_config.option = QNN_GRAPH_CONFIG_OPTION_CUSTOM;
|
||||
graph_vtcm_config.option = QNN_GRAPH_CONFIG_OPTION_CUSTOM;
|
||||
graph_vtcm_config.customConfig = &vtcm_config;
|
||||
|
||||
const QnnGraph_Config_t *graph_configs[] = {&graph_hvx_config, &graph_dlbc_config, &graph_vtcm_config,
|
||||
&graph_opt_config, nullptr};
|
||||
const QnnGraph_Config_t * graph_configs[] = { &graph_hvx_config, &graph_dlbc_config, &graph_vtcm_config,
|
||||
&graph_opt_config, nullptr };
|
||||
error = qnn_interface->qnn_graph_create(qnn_context, graph_name.c_str(), graph_configs, &graph_handle);
|
||||
} else {
|
||||
error = qnn_interface->qnn_graph_create(qnn_context, graph_name.c_str(), nullptr, &graph_handle);
|
||||
}
|
||||
|
||||
if (error != QNN_SUCCESS) {
|
||||
QNN_LOG_ERROR("[%s][%s]failed to create qnn graph, error: %s", get_backend_name(device), graph_name.c_str(),
|
||||
QNN_LOG_ERROR("[%s][%s]failed to create qnn graph, error: %s\n", get_backend_name(device), graph_name.c_str(),
|
||||
get_qnn_error_string(error));
|
||||
return;
|
||||
}
|
||||
|
||||
QNN_LOG_INFO("[%s][%s]create succeed", get_backend_name(device), graph_name.c_str());
|
||||
_graph_handle = graph_handle;
|
||||
QNN_LOG_INFO("[%s][%s]create succeed\n", get_backend_name(device), graph_name.c_str());
|
||||
_graph_handle = graph_handle;
|
||||
_qnn_interface = qnn_interface;
|
||||
}
|
||||
|
||||
qnn_graph::~qnn_graph() { QNN_LOG_DEBUG("[%s][%s]destroy", get_backend_name(_device), _graph_name.c_str()); }
|
||||
|
||||
bool qnn_graph::build_graph_from_op(ggml_tensor *op) {
|
||||
if (!is_valid()) {
|
||||
QNN_LOG_ERROR("Invalid graph");
|
||||
return false;
|
||||
}
|
||||
|
||||
QNN_LOG_DEBUG("[%s][%s]build start", get_backend_name(_device), _graph_name.c_str());
|
||||
qnn_tensor_cache_t tensor_cache;
|
||||
const auto rank = get_op_max_rank(op);
|
||||
auto operation = create_operation_from_op_tensor(op, _graph_name, rank, _device, _graph_handle, _qnn_instance,
|
||||
false, tensor_cache);
|
||||
if (!operation) {
|
||||
QNN_LOG_ERROR("[%s][%s]create_operation_from_op_tensor failed", get_backend_name(_device), _graph_name.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
_tensor_inputs = operation->get_input_tensors();
|
||||
_tensor_outputs = operation->get_output_tensors();
|
||||
_operations.push_back(std::move(operation));
|
||||
if (!finalize()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
QNN_LOG_DEBUG("[%s][%s]build succeed", get_backend_name(_device), _graph_name.c_str());
|
||||
return true;
|
||||
qnn_graph::~qnn_graph() {
|
||||
QNN_LOG_DEBUG("[%s][%s]destroy\n", get_backend_name(_device), _graph_name.c_str());
|
||||
}
|
||||
|
||||
bool qnn_graph::build_graph_from_ggml_graph(const ggml_cgraph *cgraph) {
|
||||
QNN_LOG_DEBUG("[%s][%s]build start", get_backend_name(_device), _graph_name.c_str());
|
||||
bool qnn_graph::build_graph_from_ggml_graph(const ggml_cgraph * cgraph) {
|
||||
QNN_LOG_DEBUG("[%s][%s]build start\n", get_backend_name(_device), _graph_name.c_str());
|
||||
|
||||
ggml_tensor_array_t inputs;
|
||||
ggml_tensor_array_t outputs;
|
||||
int rank = get_io_tensors_from_graph(cgraph, inputs, outputs);
|
||||
QNN_LOG_DEBUG("[%s]rank: %d, input_set: %d, output_set: %d", get_backend_name(_device), rank, int(inputs.size()),
|
||||
int rank = get_io_tensors_from_graph(cgraph, inputs, outputs);
|
||||
QNN_LOG_DEBUG("[%s]rank: %d, input_set: %d, output_set: %d\n", get_backend_name(_device), rank, int(inputs.size()),
|
||||
int(outputs.size()));
|
||||
|
||||
{
|
||||
qnn_tensor_cache_t tensor_cache;
|
||||
auto input_tensors = create_tensors_with_cache(inputs, ggml_qnn_tensor::INPUT, rank, _device, _graph_handle,
|
||||
_qnn_instance, tensor_cache);
|
||||
auto input_tensors = create_tensors_with_cache(inputs, ggml_qnn_tensor::INPUT, rank, _device, _graph_handle,
|
||||
_qnn_instance, tensor_cache);
|
||||
auto output_tensors = create_tensors_with_cache(outputs, ggml_qnn_tensor::OUTPUT, rank, _device, _graph_handle,
|
||||
_qnn_instance, tensor_cache);
|
||||
qnn_op_config_array_t operations;
|
||||
for (int i = 0; i < cgraph->n_nodes; i++) {
|
||||
ggml_tensor *dst = cgraph->nodes[i];
|
||||
ggml_tensor * dst = cgraph->nodes[i];
|
||||
if (ggml_is_empty(dst)) {
|
||||
continue;
|
||||
}
|
||||
|
|
@ -307,83 +282,49 @@ bool qnn_graph::build_graph_from_ggml_graph(const ggml_cgraph *cgraph) {
|
|||
continue;
|
||||
}
|
||||
|
||||
QNN_LOG_DEBUG("[%s]create op: %s", get_backend_name(_device), get_qnn_op_name(dst));
|
||||
QNN_LOG_DEBUG("[%s]create op: %s\n", get_backend_name(_device), get_qnn_op_name(dst));
|
||||
auto operation = create_operation_from_op_tensor(dst, dst->name, rank, _device, _graph_handle,
|
||||
_qnn_instance, true, tensor_cache); // TODO: fix op name
|
||||
_qnn_instance, true, tensor_cache); // TODO: fix op name
|
||||
operations.push_back(operation);
|
||||
}
|
||||
|
||||
_tensor_inputs = std::move(input_tensors);
|
||||
_tensor_inputs = std::move(input_tensors);
|
||||
_tensor_outputs = std::move(output_tensors);
|
||||
_operations = std::move(operations);
|
||||
_operations = std::move(operations);
|
||||
if (!finalize()) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
QNN_LOG_DEBUG("[%s][%s]build succeed", get_backend_name(_device), _graph_name.c_str());
|
||||
QNN_LOG_DEBUG("[%s][%s]build succeed\n", get_backend_name(_device), _graph_name.c_str());
|
||||
return true;
|
||||
}
|
||||
|
||||
bool qnn_graph::execute(ggml_tensor *op) {
|
||||
if (!bind_src_tensors(op, _tensor_inputs, _qnn_tensor_inputs)) {
|
||||
QNN_LOG_ERROR("[%s][%s]bind input tensors failed", get_backend_name(_device), _graph_name.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!qnn::bind_tensors({op}, _tensor_outputs, _qnn_tensor_outputs)) {
|
||||
QNN_LOG_ERROR("[%s][%s]bind output tensors failed", get_backend_name(_device), _graph_name.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
auto &qnn_tensor_inputs = _qnn_tensor_inputs;
|
||||
auto &qnn_tensor_outputs = _qnn_tensor_outputs;
|
||||
auto error =
|
||||
_qnn_interface->qnn_graph_execute(_graph_handle, qnn_tensor_inputs.data(), qnn_tensor_inputs.size(),
|
||||
qnn_tensor_outputs.data(), qnn_tensor_outputs.size(), nullptr, nullptr);
|
||||
unbind_tensors(_tensor_inputs);
|
||||
unbind_tensors(_tensor_outputs);
|
||||
|
||||
if (error != QNN_SUCCESS) {
|
||||
if (_device == QNN_BACKEND_NPU && error == QNN_COMMON_ERROR_SYSTEM_COMMUNICATION) {
|
||||
QNN_LOG_WARN("[%s][%s]NPU crashed. SSR detected. Caused QNN graph execute error.",
|
||||
get_backend_name(_device), _graph_name.c_str());
|
||||
} else {
|
||||
QNN_LOG_ERROR("[%s][%s]error: %s", get_backend_name(_device), _graph_name.c_str(),
|
||||
get_qnn_error_string(error));
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
QNN_LOG_DEBUG("[%s][%s]execute succeed", get_backend_name(_device), _graph_name.c_str());
|
||||
return true;
|
||||
}
|
||||
|
||||
bool qnn_graph::execute(const ggml_cgraph *cgraph) {
|
||||
bool qnn_graph::execute(const ggml_cgraph * cgraph) {
|
||||
ggml_tensor_array_t inputs;
|
||||
ggml_tensor_array_t outputs;
|
||||
#ifdef NDEBUG
|
||||
get_io_tensors_from_graph(cgraph, inputs, outputs);
|
||||
#else
|
||||
int rank = get_io_tensors_from_graph(cgraph, inputs, outputs);
|
||||
QNN_LOG_DEBUG("[%s]rank: %d, input_set: %d, output_set: %d", get_backend_name(_device), rank, int(inputs.size()),
|
||||
QNN_LOG_DEBUG("[%s]rank: %d, input_set: %d, output_set: %d\n", get_backend_name(_device), rank, int(inputs.size()),
|
||||
int(outputs.size()));
|
||||
#endif
|
||||
|
||||
{
|
||||
if (!qnn::bind_tensors(inputs, _tensor_inputs, _qnn_tensor_inputs)) {
|
||||
QNN_LOG_ERROR("[%s][%s]bind input tensors failed", get_backend_name(_device), _graph_name.c_str());
|
||||
QNN_LOG_ERROR("[%s][%s]bind input tensors failed\n", get_backend_name(_device), _graph_name.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!qnn::bind_tensors(outputs, _tensor_outputs, _qnn_tensor_outputs)) {
|
||||
QNN_LOG_ERROR("[%s][%s]bind output tensors failed", get_backend_name(_device), _graph_name.c_str());
|
||||
QNN_LOG_ERROR("[%s][%s]bind output tensors failed\n", get_backend_name(_device), _graph_name.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
auto &qnn_tensor_inputs = _qnn_tensor_inputs;
|
||||
auto &qnn_tensor_outputs = _qnn_tensor_outputs;
|
||||
auto error =
|
||||
auto & qnn_tensor_inputs = _qnn_tensor_inputs;
|
||||
auto & qnn_tensor_outputs = _qnn_tensor_outputs;
|
||||
auto error =
|
||||
_qnn_interface->qnn_graph_execute(_graph_handle, qnn_tensor_inputs.data(), qnn_tensor_inputs.size(),
|
||||
qnn_tensor_outputs.data(), qnn_tensor_outputs.size(), nullptr, nullptr);
|
||||
unbind_tensors(_tensor_inputs);
|
||||
|
|
@ -391,35 +332,35 @@ bool qnn_graph::execute(const ggml_cgraph *cgraph) {
|
|||
|
||||
if (error != QNN_SUCCESS) {
|
||||
if (_device == QNN_BACKEND_NPU && error == QNN_COMMON_ERROR_SYSTEM_COMMUNICATION) {
|
||||
QNN_LOG_WARN("[%s][%s]NPU crashed. SSR detected. Caused QNN graph execute error.",
|
||||
QNN_LOG_WARN("[%s][%s]NPU crashed. SSR detected. Caused QNN graph execute error.\n",
|
||||
get_backend_name(_device), _graph_name.c_str());
|
||||
} else {
|
||||
QNN_LOG_ERROR("[%s][%s]error: %s", get_backend_name(_device), _graph_name.c_str(),
|
||||
QNN_LOG_ERROR("[%s][%s]error: %s\n", get_backend_name(_device), _graph_name.c_str(),
|
||||
get_qnn_error_string(error));
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
QNN_LOG_DEBUG("[%s][%s]execute succeed", get_backend_name(_device), _graph_name.c_str());
|
||||
QNN_LOG_DEBUG("[%s][%s]execute succeed\n", get_backend_name(_device), _graph_name.c_str());
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
bool qnn_graph::finalize() {
|
||||
if (!qnn::add_op_to_graph(_graph_handle, _operations)) {
|
||||
QNN_LOG_ERROR("[%s]add nodes failed", _graph_name.c_str());
|
||||
QNN_LOG_ERROR("[%s]add nodes failed\n", _graph_name.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
auto error = _qnn_interface->qnn_graph_finalize(_graph_handle, nullptr, nullptr);
|
||||
if (error != QNN_SUCCESS) {
|
||||
QNN_LOG_ERROR("[%s][%s]qnn_graph_finalize.error: %s", get_backend_name(_device), _graph_name.c_str(),
|
||||
QNN_LOG_ERROR("[%s][%s]qnn_graph_finalize.error: %s\n", get_backend_name(_device), _graph_name.c_str(),
|
||||
get_qnn_error_string(error));
|
||||
return false;
|
||||
}
|
||||
|
||||
QNN_LOG_DEBUG("[%s][%s]finalize succeed", get_backend_name(_device), _graph_name.c_str());
|
||||
QNN_LOG_DEBUG("[%s][%s]finalize succeed\n", get_backend_name(_device), _graph_name.c_str());
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace qnn
|
||||
} // namespace qnn
|
||||
|
|
|
|||
|
|
@ -6,41 +6,51 @@
|
|||
#include <vector>
|
||||
|
||||
#include "ggml-qnn.h"
|
||||
|
||||
#include "op-config.hpp"
|
||||
#include "qnn-lib.hpp"
|
||||
|
||||
namespace qnn {
|
||||
|
||||
/**
|
||||
* @class qnn_graph
|
||||
* @brief Manages a QNN graph, converting a GGML graph to QNN format and handling its execution.
|
||||
*
|
||||
* This class is responsible for building a QNN graph from a given GGML graph,
|
||||
* determining its input/output tensors, finalizing the configuration, and
|
||||
* executing the graph on the specified backend device.
|
||||
*/
|
||||
class qnn_graph {
|
||||
public:
|
||||
explicit qnn_graph(const std::string &graph_name, QNNBackend device, std::shared_ptr<qnn_instance> qnn_instance,
|
||||
public:
|
||||
explicit qnn_graph(const std::string & graph_name, QNNBackend device, std::shared_ptr<qnn_instance> qnn_instance,
|
||||
size_t vtcm_size_in_mb);
|
||||
~qnn_graph();
|
||||
|
||||
bool build_graph_from_op(ggml_tensor *op);
|
||||
bool build_graph_from_ggml_graph(const ggml_cgraph *cgraph);
|
||||
bool build_graph_from_ggml_graph(const ggml_cgraph * cgraph);
|
||||
|
||||
bool execute(const ggml_cgraph * cgraph);
|
||||
|
||||
bool execute(ggml_tensor *op);
|
||||
bool execute(const ggml_cgraph *cgraph);
|
||||
bool is_valid() const { return _graph_handle != nullptr; }
|
||||
|
||||
Qnn_GraphHandle_t get_graph_handler() const { return _graph_handle; }
|
||||
|
||||
std::shared_ptr<qnn_instance> get_qnn_instance() { return _qnn_instance; }
|
||||
const std::string &get_name() const { return _graph_name; }
|
||||
|
||||
const std::string & get_name() const { return _graph_name; }
|
||||
|
||||
QNNBackend get_device() const { return _device; }
|
||||
|
||||
private:
|
||||
private:
|
||||
bool finalize();
|
||||
|
||||
const std::string _graph_name;
|
||||
const QNNBackend _device;
|
||||
Qnn_GraphHandle_t _graph_handle = nullptr;
|
||||
std::shared_ptr<qnn_instance> _qnn_instance;
|
||||
const std::string _graph_name;
|
||||
const QNNBackend _device;
|
||||
Qnn_GraphHandle_t _graph_handle = nullptr;
|
||||
std::shared_ptr<qnn_instance> _qnn_instance;
|
||||
std::shared_ptr<qnn_interface> _qnn_interface;
|
||||
qnn_op_config_array_t _operations;
|
||||
qnn_op_config_array_t _operations;
|
||||
|
||||
qnn_tensor_array_t _tensor_inputs;
|
||||
qnn_tensor_array_t _tensor_outputs;
|
||||
qnn_tensor_array_t _tensor_inputs;
|
||||
qnn_tensor_array_t _tensor_outputs;
|
||||
std::vector<Qnn_Tensor_t> _qnn_tensor_inputs;
|
||||
std::vector<Qnn_Tensor_t> _qnn_tensor_outputs;
|
||||
|
||||
|
|
@ -50,4 +60,4 @@ private:
|
|||
|
||||
using qnn_graph_ptr_t = std::shared_ptr<qnn_graph>;
|
||||
|
||||
} // namespace qnn
|
||||
} // namespace qnn
|
||||
|
|
|
|||
|
|
@ -1,70 +1,45 @@
|
|||
|
||||
#include "logger.hpp"
|
||||
|
||||
#include <cstdio>
|
||||
#include <mutex>
|
||||
#ifndef NDEBUG
|
||||
|
||||
#if defined(__ANDROID__) || defined(ANDROID)
|
||||
#include <android/log.h>
|
||||
#endif
|
||||
# include <mutex>
|
||||
|
||||
void qnn::internal_log(ggml_log_level level, const char * /*file*/, const char *func, int line, const char *format,
|
||||
...) {
|
||||
static std::mutex qnn_internal_log_mutex;
|
||||
static char s_qnn_internal_log_buf[QNN_LOGBUF_LEN];
|
||||
# include "QnnInterface.h"
|
||||
# include "QnnTypes.h"
|
||||
# include "System/QnnSystemInterface.h"
|
||||
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(qnn_internal_log_mutex);
|
||||
va_list args;
|
||||
|
||||
va_start(args, format);
|
||||
int len_prefix = snprintf(s_qnn_internal_log_buf, QNN_LOGBUF_LEN, "[%s, %d]: ", func, line);
|
||||
int len = vsnprintf(s_qnn_internal_log_buf + len_prefix, QNN_LOGBUF_LEN - len_prefix, format, args);
|
||||
if (len < (QNN_LOGBUF_LEN - len_prefix)) {
|
||||
#if defined(__ANDROID__) || defined(ANDROID)
|
||||
// print to android logcat
|
||||
__android_log_print(level, "ggml-qnn", "%s\n", s_qnn_internal_log_buf);
|
||||
#else
|
||||
(void)level;
|
||||
#endif
|
||||
// print to stdout
|
||||
printf("%s\n", s_qnn_internal_log_buf);
|
||||
}
|
||||
va_end(args);
|
||||
}
|
||||
}
|
||||
|
||||
#if ENABLE_QNNSDK_LOG
|
||||
void qnn::sdk_logcallback(const char *fmt, QnnLog_Level_t level, uint64_t /*timestamp*/, va_list argp) {
|
||||
void qnn::sdk_logcallback(const char * fmt, QnnLog_Level_t level, uint64_t /*timestamp*/, va_list argp) {
|
||||
static std::mutex log_mutex;
|
||||
static char s_ggml_qnn_logbuf[QNN_LOGBUF_LEN];
|
||||
static char s_ggml_qnn_logbuf[4096];
|
||||
|
||||
const char *log_level_desc = "";
|
||||
char log_level_desc = 'U';
|
||||
switch (level) {
|
||||
case QNN_LOG_LEVEL_ERROR:
|
||||
log_level_desc = "ERROR";
|
||||
log_level_desc = 'E';
|
||||
break;
|
||||
case QNN_LOG_LEVEL_WARN:
|
||||
log_level_desc = "WARNING";
|
||||
log_level_desc = 'W';
|
||||
break;
|
||||
case QNN_LOG_LEVEL_INFO:
|
||||
log_level_desc = "INFO";
|
||||
log_level_desc = 'I';
|
||||
break;
|
||||
case QNN_LOG_LEVEL_DEBUG:
|
||||
log_level_desc = "DEBUG";
|
||||
log_level_desc = 'D';
|
||||
break;
|
||||
case QNN_LOG_LEVEL_VERBOSE:
|
||||
log_level_desc = "VERBOSE";
|
||||
break;
|
||||
case QNN_LOG_LEVEL_MAX:
|
||||
log_level_desc = "UNKNOWN";
|
||||
log_level_desc = 'V';
|
||||
break;
|
||||
}
|
||||
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(log_mutex);
|
||||
vsnprintf(s_ggml_qnn_logbuf, QNN_LOGBUF_LEN, fmt, argp);
|
||||
QNN_LOG_INFO("[%s]%s", log_level_desc, s_ggml_qnn_logbuf);
|
||||
int size = vsnprintf(s_ggml_qnn_logbuf, sizeof(s_ggml_qnn_logbuf), fmt, argp);
|
||||
if (size > 0 && s_ggml_qnn_logbuf[size - 1] != '\n') {
|
||||
QNN_LOG_INFO("[%c]%s\n", log_level_desc, s_ggml_qnn_logbuf);
|
||||
} else {
|
||||
QNN_LOG_INFO("[%c]%s", log_level_desc, s_ggml_qnn_logbuf);
|
||||
}
|
||||
}
|
||||
}
|
||||
#else
|
||||
|
|
|
|||
|
|
@ -1,43 +1,16 @@
|
|||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
#include <cstdint>
|
||||
|
||||
#include "ggml-impl.h"
|
||||
#include "ggml.h"
|
||||
|
||||
#include "QnnCommon.h"
|
||||
#include "QnnInterface.h"
|
||||
#include "QnnTypes.h"
|
||||
#include "System/QnnSystemInterface.h"
|
||||
|
||||
#define QNN_LOGBUF_LEN 4096
|
||||
#include "QnnLog.h"
|
||||
|
||||
namespace qnn {
|
||||
void internal_log(ggml_log_level level, const char *file, const char *func, int line, const char *format, ...);
|
||||
void sdk_logcallback(const char * fmt, QnnLog_Level_t level, uint64_t timestamp, va_list argp);
|
||||
} // namespace qnn
|
||||
|
||||
void sdk_logcallback(const char *fmt, QnnLog_Level_t level, uint64_t timestamp, va_list argp);
|
||||
} // namespace qnn
|
||||
|
||||
// =================================================================================================
|
||||
//
|
||||
// QNN backend internal log function
|
||||
//
|
||||
// =================================================================================================
|
||||
#define QNN_LOG_ERROR(...) qnn::internal_log(GGML_LOG_LEVEL_DEBUG, __FILE__, __FUNCTION__, __LINE__, __VA_ARGS__)
|
||||
|
||||
#define QNN_LOG_WARN(...) qnn::internal_log(GGML_LOG_LEVEL_DEBUG, __FILE__, __FUNCTION__, __LINE__, __VA_ARGS__)
|
||||
|
||||
#define QNN_LOG_INFO(...) qnn::internal_log(GGML_LOG_LEVEL_DEBUG, __FILE__, __FUNCTION__, __LINE__, __VA_ARGS__)
|
||||
|
||||
#ifdef NDEBUG
|
||||
#define ENABLE_QNNBACKEND_DEBUG 0 // for troubleshooting QNN backend
|
||||
#define ENABLE_QNNSDK_LOG 0 // enable/disable QNN SDK's internal log
|
||||
#else
|
||||
#define ENABLE_QNNBACKEND_DEBUG 1 // for troubleshooting QNN backend
|
||||
#define ENABLE_QNNSDK_LOG 1 // enable/disable QNN SDK's internal log
|
||||
#endif
|
||||
|
||||
#if ENABLE_QNNBACKEND_DEBUG
|
||||
#define QNN_LOG_DEBUG(...) qnn::internal_log(GGML_LOG_LEVEL_DEBUG, __FILE__, __FUNCTION__, __LINE__, __VA_ARGS__)
|
||||
#else
|
||||
#define QNN_LOG_DEBUG(...)
|
||||
#endif
|
||||
#define QNN_LOG_ERROR(...) (GGML_LOG_ERROR(__VA_ARGS__))
|
||||
#define QNN_LOG_WARN(...) (GGML_LOG_WARN(__VA_ARGS__))
|
||||
#define QNN_LOG_INFO(...) (GGML_LOG_INFO(__VA_ARGS__))
|
||||
#define QNN_LOG_DEBUG(...) (GGML_LOG_DEBUG(__VA_ARGS__))
|
||||
|
|
|
|||
|
|
@ -4,7 +4,6 @@
|
|||
#include <vector>
|
||||
|
||||
#include "ggml-qnn.h"
|
||||
|
||||
#include "qnn-types.hpp"
|
||||
#include "tensor.hpp"
|
||||
|
||||
|
|
@ -18,7 +17,7 @@ namespace qnn {
|
|||
* adding operations to a graph, and binding/unbinding input and output tensors.
|
||||
*/
|
||||
class ggml_qnn_op_config {
|
||||
public:
|
||||
public:
|
||||
virtual ~ggml_qnn_op_config() {}
|
||||
|
||||
/**
|
||||
|
|
@ -32,8 +31,8 @@ public:
|
|||
*
|
||||
* @param tensor_inputs A reference to a vector of qnn_tensor_ptr_t objects representing the input tensors.
|
||||
*/
|
||||
virtual void set_input_tensors(qnn::qnn_tensor_array_t &tensor_inputs) = 0;
|
||||
virtual void set_input_tensors(qnn::qnn_tensor_array_t &&tensor_inputs) = 0;
|
||||
virtual void set_input_tensors(qnn::qnn_tensor_array_t & tensor_inputs) = 0;
|
||||
virtual void set_input_tensors(qnn::qnn_tensor_array_t && tensor_inputs) = 0;
|
||||
|
||||
/**
|
||||
* @brief Sets custom output tensors for the operation. This method should be called before `initialize_op_nodes`.
|
||||
|
|
@ -46,8 +45,8 @@ public:
|
|||
*
|
||||
* @param tensor_outputs A reference to a vector of qnn_tensor_ptr_t objects representing the output tensors.
|
||||
*/
|
||||
virtual void set_output_tensors(qnn::qnn_tensor_array_t &tensor_outputs) = 0;
|
||||
virtual void set_output_tensors(qnn::qnn_tensor_array_t &&tensor_inputs) = 0;
|
||||
virtual void set_output_tensors(qnn::qnn_tensor_array_t & tensor_outputs) = 0;
|
||||
virtual void set_output_tensors(qnn::qnn_tensor_array_t && tensor_inputs) = 0;
|
||||
|
||||
/**
|
||||
* @brief Creates tensors and internal nodes for constructing the calculation graph.
|
||||
|
|
@ -71,7 +70,7 @@ public:
|
|||
*
|
||||
* @return A reference to a vector of qnn_tensor_ptr_t objects representing the input tensors.
|
||||
*/
|
||||
virtual const qnn_tensor_array_t &get_input_tensors() = 0;
|
||||
virtual const qnn_tensor_array_t & get_input_tensors() = 0;
|
||||
|
||||
/**
|
||||
* @brief Pure virtual function to retrieve the output tensors of a QNN.
|
||||
|
|
@ -82,7 +81,7 @@ public:
|
|||
*
|
||||
* @return A reference to a vector of qnn_tensor_ptr_t objects representing the output tensors.
|
||||
*/
|
||||
virtual const qnn_tensor_array_t &get_output_tensors() = 0;
|
||||
virtual const qnn_tensor_array_t & get_output_tensors() = 0;
|
||||
|
||||
/**
|
||||
* @brief Adds an operation to the given graph.
|
||||
|
|
@ -109,7 +108,7 @@ public:
|
|||
* containing the input tensors.
|
||||
* @return true if the input tensors were successfully bound, false otherwise.
|
||||
*/
|
||||
virtual bool bind_input_tensors(const ggml_tensor_array_t &tensor_inputs) = 0;
|
||||
virtual bool bind_input_tensors(const ggml_tensor_array_t & tensor_inputs) = 0;
|
||||
|
||||
/**
|
||||
* @brief Binds the output tensors to the given tensor array.
|
||||
|
|
@ -123,7 +122,7 @@ public:
|
|||
* represent the output tensors to be bound.
|
||||
* @return true if the binding is successful, false otherwise.
|
||||
*/
|
||||
virtual bool bind_output_tensors(const ggml_tensor_array_t &tensor_outputs) = 0;
|
||||
virtual bool bind_output_tensors(const ggml_tensor_array_t & tensor_outputs) = 0;
|
||||
|
||||
/**
|
||||
* @brief Unbinds the input tensors from the operation.
|
||||
|
|
@ -146,7 +145,7 @@ public:
|
|||
virtual void unbind_output_tensors() = 0;
|
||||
};
|
||||
|
||||
using qnn_op_config_ptr_t = std::shared_ptr<ggml_qnn_op_config>;
|
||||
using qnn_op_config_ptr_t = std::shared_ptr<ggml_qnn_op_config>;
|
||||
using qnn_op_config_array_t = std::vector<qnn_op_config_ptr_t>;
|
||||
|
||||
} // namespace qnn
|
||||
} // namespace qnn
|
||||
|
|
|
|||
|
|
@ -3,73 +3,73 @@
|
|||
|
||||
namespace {
|
||||
|
||||
using op_constructor_t = std::shared_ptr<qnn::ggml_qnn_op_config> (*)(const ggml_tensor *, const std::string &,
|
||||
using op_constructor_t = std::shared_ptr<qnn::ggml_qnn_op_config> (*)(const ggml_tensor *, const std::string &,
|
||||
std::shared_ptr<qnn::qnn_instance>);
|
||||
using op_dims_calc_func_t = void (*)(const std::vector<qnn::ggml_dimension_array_t> &input_dims,
|
||||
qnn::ggml_dimension_array_t &output_dims);
|
||||
using op_dims_calc_func_t = void (*)(const std::vector<qnn::ggml_dimension_array_t> & input_dims,
|
||||
qnn::ggml_dimension_array_t & output_dims);
|
||||
|
||||
void element_wise_op_dims(const std::vector<qnn::ggml_dimension_array_t> &input_dims,
|
||||
qnn::ggml_dimension_array_t &output_dims) {
|
||||
void element_wise_op_dims(const std::vector<qnn::ggml_dimension_array_t> & input_dims,
|
||||
qnn::ggml_dimension_array_t & output_dims) {
|
||||
for (size_t i = 1; i < std::size(output_dims); i++) {
|
||||
output_dims[i] = input_dims.front()[i];
|
||||
}
|
||||
}
|
||||
|
||||
void mat_mul_op_dims(const std::vector<qnn::ggml_dimension_array_t> &input_dims,
|
||||
qnn::ggml_dimension_array_t &output_dims) {
|
||||
void mat_mul_op_dims(const std::vector<qnn::ggml_dimension_array_t> & input_dims,
|
||||
qnn::ggml_dimension_array_t & output_dims) {
|
||||
GGML_ASSERT(input_dims.size() == 2);
|
||||
output_dims[0] = input_dims.front()[1];
|
||||
output_dims[1] = input_dims.back()[1];
|
||||
}
|
||||
|
||||
struct qnn_op_caps_t {
|
||||
const char *qnn_op_name = nullptr;
|
||||
const size_t input_param_count = 0;
|
||||
op_dims_calc_func_t calc_dims_func = nullptr;
|
||||
const char *qnn_param_name = nullptr;
|
||||
const char * qnn_op_name = nullptr;
|
||||
const size_t input_param_count = 0;
|
||||
op_dims_calc_func_t calc_dims_func = nullptr;
|
||||
const char * qnn_param_name = nullptr;
|
||||
};
|
||||
|
||||
constexpr const qnn_op_caps_t kOpCaps[] = {
|
||||
{}, // GGML_OP_NONE
|
||||
{}, // GGML_OP_DUP
|
||||
{
|
||||
// GGML_OP_ADD
|
||||
QNN_OP_ELEMENT_WISE_ADD, // qnn_op_name
|
||||
2, // input_param_count
|
||||
element_wise_op_dims, // calc_dims_func
|
||||
// GGML_OP_ADD
|
||||
QNN_OP_ELEMENT_WISE_ADD, // qnn_op_name
|
||||
2, // input_param_count
|
||||
element_wise_op_dims, // calc_dims_func
|
||||
},
|
||||
{}, // GGML_OP_ADD1
|
||||
{}, // GGML_OP_ACC
|
||||
{
|
||||
// GGML_OP_SUB
|
||||
QNN_OP_ELEMENT_WISE_SUBTRACT, // qnn_op_name
|
||||
2, // input_param_count
|
||||
element_wise_op_dims, // calc_dims_func
|
||||
// GGML_OP_SUB
|
||||
QNN_OP_ELEMENT_WISE_SUBTRACT, // qnn_op_name
|
||||
2, // input_param_count
|
||||
element_wise_op_dims, // calc_dims_func
|
||||
},
|
||||
{
|
||||
// GGML_OP_MUL
|
||||
QNN_OP_ELEMENT_WISE_MULTIPLY, // qnn_op_name
|
||||
2, // input_param_count
|
||||
element_wise_op_dims, // calc_dims_func
|
||||
// GGML_OP_MUL
|
||||
QNN_OP_ELEMENT_WISE_MULTIPLY, // qnn_op_name
|
||||
2, // input_param_count
|
||||
element_wise_op_dims, // calc_dims_func
|
||||
},
|
||||
{
|
||||
// GGML_OP_DIV
|
||||
QNN_OP_ELEMENT_WISE_DIVIDE, // qnn_op_name
|
||||
2, // input_param_count
|
||||
element_wise_op_dims, // calc_dims_func
|
||||
// GGML_OP_DIV
|
||||
QNN_OP_ELEMENT_WISE_DIVIDE, // qnn_op_name
|
||||
2, // input_param_count
|
||||
element_wise_op_dims, // calc_dims_func
|
||||
},
|
||||
{}, // GGML_OP_SQR
|
||||
{
|
||||
// GGML_OP_SQRT
|
||||
QNN_OP_ELEMENT_WISE_SQUARE_ROOT, // qnn_op_name
|
||||
1, // input_param_count
|
||||
element_wise_op_dims, // calc_dims_func
|
||||
// GGML_OP_SQRT
|
||||
QNN_OP_ELEMENT_WISE_SQUARE_ROOT, // qnn_op_name
|
||||
1, // input_param_count
|
||||
element_wise_op_dims, // calc_dims_func
|
||||
},
|
||||
{
|
||||
// GGML_OP_LOG
|
||||
QNN_OP_ELEMENT_WISE_LOG, // qnn_op_name
|
||||
1, // input_param_count
|
||||
element_wise_op_dims, // calc_dims_func
|
||||
// GGML_OP_LOG
|
||||
QNN_OP_ELEMENT_WISE_LOG, // qnn_op_name
|
||||
1, // input_param_count
|
||||
element_wise_op_dims, // calc_dims_func
|
||||
},
|
||||
{}, // GGML_OP_SIN
|
||||
{}, // GGML_OP_COS
|
||||
|
|
@ -84,19 +84,19 @@ constexpr const qnn_op_caps_t kOpCaps[] = {
|
|||
{}, // GGML_OP_SILU_BACK
|
||||
{}, // GGML_OP_NORM
|
||||
{
|
||||
// GGML_OP_RMS_NORM
|
||||
QNN_OP_RMS_NORM, // qnn_op_name
|
||||
1, // input_param_count
|
||||
nullptr, // TODO: calc_dims_func
|
||||
QNN_OP_RMS_NORM_PARAM_EPSILON, // qnn_param_name
|
||||
// GGML_OP_RMS_NORM
|
||||
QNN_OP_RMS_NORM, // qnn_op_name
|
||||
1, // input_param_count
|
||||
nullptr, // TODO: calc_dims_func
|
||||
QNN_OP_RMS_NORM_PARAM_EPSILON, // qnn_param_name
|
||||
},
|
||||
{}, // GGML_OP_RMS_NORM_BACK
|
||||
{}, // GGML_OP_GROUP_NORM
|
||||
{
|
||||
// GGML_OP_MUL_MAT
|
||||
QNN_OP_MAT_MUL, // qnn_op_name
|
||||
2, // input_param_count
|
||||
mat_mul_op_dims, // calc_dims_func
|
||||
// GGML_OP_MUL_MAT
|
||||
QNN_OP_MAT_MUL, // qnn_op_name
|
||||
2, // input_param_count
|
||||
mat_mul_op_dims, // calc_dims_func
|
||||
},
|
||||
{}, // GGML_OP_MUL_MAT_ID
|
||||
{}, // GGML_OP_OUT_PROD
|
||||
|
|
@ -105,10 +105,10 @@ constexpr const qnn_op_caps_t kOpCaps[] = {
|
|||
{}, // GGML_OP_CPY
|
||||
{}, // GGML_OP_CONT
|
||||
{
|
||||
// GGML_OP_RESHAPE
|
||||
QNN_OP_RESHAPE, // qnn_op_name
|
||||
1, // input_param_count
|
||||
nullptr, // TODO: calc_dims_func
|
||||
// GGML_OP_RESHAPE
|
||||
QNN_OP_RESHAPE, // qnn_op_name
|
||||
1, // input_param_count
|
||||
nullptr, // TODO: calc_dims_func
|
||||
},
|
||||
{}, // GGML_OP_VIEW
|
||||
{}, // GGML_OP_PERMUTE
|
||||
|
|
@ -177,10 +177,10 @@ constexpr const qnn_op_caps_t kOpCaps[] = {
|
|||
{}, // GGML_UNARY_OP_RELU
|
||||
{}, // GGML_UNARY_OP_SIGMOID
|
||||
{
|
||||
// GGML_UNARY_OP_GELU
|
||||
QNN_OP_GELU, // qnn_op_name
|
||||
1, // input_param_count
|
||||
nullptr, // TODO: calc_dims_func
|
||||
// GGML_UNARY_OP_GELU
|
||||
QNN_OP_GELU, // qnn_op_name
|
||||
1, // input_param_count
|
||||
nullptr, // TODO: calc_dims_func
|
||||
},
|
||||
{}, // GGML_UNARY_OP_GELU_QUICK
|
||||
{}, // GGML_UNARY_OP_SILU
|
||||
|
|
@ -201,15 +201,17 @@ static_assert(kOpCaps[GGML_OP_COUNT + GGML_UNARY_OP_GELU].input_param_count == 1
|
|||
static_assert(std::size(kOpCaps) == (GGML_OP_COUNT + GGML_UNARY_OP_COUNT),
|
||||
"GGML_OP_COUNT does not match the size of the kOpCaps table");
|
||||
|
||||
std::shared_ptr<qnn::ggml_qnn_op_config> mat_mul_op_constructor(const ggml_tensor *op, const std::string &instance_name,
|
||||
std::shared_ptr<qnn::ggml_qnn_op_config> mat_mul_op_constructor(const ggml_tensor * op,
|
||||
const std::string & instance_name,
|
||||
std::shared_ptr<qnn::qnn_instance> qnn_instance) {
|
||||
GGML_UNUSED(op);
|
||||
QNN_LOG_DEBUG("create QNN_OP_MAT_MUL, name %s", instance_name.c_str());
|
||||
QNN_LOG_DEBUG("create QNN_OP_MAT_MUL, name %s\n", instance_name.c_str());
|
||||
return std::make_shared<qnn::ggml_qnn_matmul_op_config>(instance_name, qnn_instance);
|
||||
}
|
||||
|
||||
template <size_t _op>
|
||||
std::shared_ptr<qnn::ggml_qnn_op_config> generic_op_constructor(const ggml_tensor *op, const std::string &instance_name,
|
||||
std::shared_ptr<qnn::ggml_qnn_op_config> generic_op_constructor(const ggml_tensor * op,
|
||||
const std::string & instance_name,
|
||||
std::shared_ptr<qnn::qnn_instance> qnn_instance) {
|
||||
GGML_UNUSED(op);
|
||||
static_assert(_op < std::size(kOpCaps));
|
||||
|
|
@ -218,20 +220,20 @@ std::shared_ptr<qnn::ggml_qnn_op_config> generic_op_constructor(const ggml_tenso
|
|||
kOpCaps[_op].qnn_op_name, qnn_instance);
|
||||
}
|
||||
|
||||
void add_type_parameters(std::shared_ptr<qnn::ggml_qnn_op_config_base> op, const char *name, float value) {
|
||||
void add_type_parameters(std::shared_ptr<qnn::ggml_qnn_op_config_base> op, const char * name, float value) {
|
||||
Qnn_Scalar_t scalar = QNN_SCALAR_INIT;
|
||||
scalar.dataType = QNN_DATATYPE_FLOAT_32;
|
||||
scalar.floatValue = value;
|
||||
scalar.dataType = QNN_DATATYPE_FLOAT_32;
|
||||
scalar.floatValue = value;
|
||||
op->add_scalar_param(name, scalar);
|
||||
}
|
||||
|
||||
template <size_t _op, typename _ggml_op_param_type, typename _qnn_op_type_name>
|
||||
std::shared_ptr<qnn::ggml_qnn_op_config> op_constructor_with_type_param(
|
||||
const ggml_tensor *op, const std::string &instance_name, std::shared_ptr<qnn::qnn_instance> qnn_instance) {
|
||||
const ggml_tensor * op, const std::string & instance_name, std::shared_ptr<qnn::qnn_instance> qnn_instance) {
|
||||
static_assert(std::is_base_of<qnn::ggml_qnn_op_config_base, _qnn_op_type_name>::value);
|
||||
static_assert(_op < std::size(kOpCaps));
|
||||
|
||||
constexpr auto &op_caps = kOpCaps[_op];
|
||||
constexpr auto & op_caps = kOpCaps[_op];
|
||||
static_assert(op_caps.qnn_op_name != nullptr);
|
||||
|
||||
_ggml_op_param_type op_param;
|
||||
|
|
@ -245,113 +247,113 @@ std::shared_ptr<qnn::ggml_qnn_op_config> op_constructor_with_type_param(
|
|||
}
|
||||
|
||||
constexpr const op_constructor_t kOpConstructors[] = {
|
||||
nullptr, // GGML_OP_NONE
|
||||
nullptr, // GGML_OP_DUP
|
||||
generic_op_constructor<GGML_OP_ADD>, // GGML_OP_ADD
|
||||
nullptr, // GGML_OP_ADD1
|
||||
nullptr, // GGML_OP_ACC
|
||||
generic_op_constructor<GGML_OP_SUB>, // GGML_OP_SUB
|
||||
generic_op_constructor<GGML_OP_MUL>, // GGML_OP_MUL
|
||||
generic_op_constructor<GGML_OP_DIV>, // GGML_OP_DIV
|
||||
nullptr, // GGML_OP_SQR
|
||||
generic_op_constructor<GGML_OP_SQRT>, // GGML_OP_SQRT
|
||||
generic_op_constructor<GGML_OP_LOG>, // GGML_OP_LOG
|
||||
nullptr, // GGML_OP_SIN
|
||||
nullptr, // GGML_OP_COS
|
||||
nullptr, // GGML_OP_SUM
|
||||
nullptr, // GGML_OP_SUM_ROWS
|
||||
nullptr, // GGML_OP_MEAN
|
||||
nullptr, // GGML_OP_ARGMAX
|
||||
nullptr, // GGML_OP_COUNT_EQUAL
|
||||
nullptr, // GGML_OP_REPEAT
|
||||
nullptr, // GGML_OP_REPEAT_BACK
|
||||
nullptr, // GGML_OP_CONCAT
|
||||
nullptr, // GGML_OP_SILU_BACK
|
||||
nullptr, // GGML_OP_NORM
|
||||
op_constructor_with_type_param<GGML_OP_RMS_NORM, float, qnn::ggml_qnn_rmsnorm_op_config>, // GGML_OP_RMS_NORM
|
||||
nullptr, // GGML_OP_RMS_NORM_BACK
|
||||
nullptr, // GGML_OP_GROUP_NORM
|
||||
nullptr, // GGML_OP_NONE
|
||||
nullptr, // GGML_OP_DUP
|
||||
generic_op_constructor<GGML_OP_ADD>, // GGML_OP_ADD
|
||||
nullptr, // GGML_OP_ADD1
|
||||
nullptr, // GGML_OP_ACC
|
||||
generic_op_constructor<GGML_OP_SUB>, // GGML_OP_SUB
|
||||
generic_op_constructor<GGML_OP_MUL>, // GGML_OP_MUL
|
||||
generic_op_constructor<GGML_OP_DIV>, // GGML_OP_DIV
|
||||
nullptr, // GGML_OP_SQR
|
||||
generic_op_constructor<GGML_OP_SQRT>, // GGML_OP_SQRT
|
||||
generic_op_constructor<GGML_OP_LOG>, // GGML_OP_LOG
|
||||
nullptr, // GGML_OP_SIN
|
||||
nullptr, // GGML_OP_COS
|
||||
nullptr, // GGML_OP_SUM
|
||||
nullptr, // GGML_OP_SUM_ROWS
|
||||
nullptr, // GGML_OP_MEAN
|
||||
nullptr, // GGML_OP_ARGMAX
|
||||
nullptr, // GGML_OP_COUNT_EQUAL
|
||||
nullptr, // GGML_OP_REPEAT
|
||||
nullptr, // GGML_OP_REPEAT_BACK
|
||||
nullptr, // GGML_OP_CONCAT
|
||||
nullptr, // GGML_OP_SILU_BACK
|
||||
nullptr, // GGML_OP_NORM
|
||||
op_constructor_with_type_param<GGML_OP_RMS_NORM, float, qnn::ggml_qnn_rmsnorm_op_config>, // GGML_OP_RMS_NORM
|
||||
nullptr, // GGML_OP_RMS_NORM_BACK
|
||||
nullptr, // GGML_OP_GROUP_NORM
|
||||
|
||||
mat_mul_op_constructor, // GGML_OP_MUL_MAT
|
||||
nullptr, // GGML_OP_MUL_MAT_ID
|
||||
nullptr, // GGML_OP_OUT_PROD
|
||||
mat_mul_op_constructor, // GGML_OP_MUL_MAT
|
||||
nullptr, // GGML_OP_MUL_MAT_ID
|
||||
nullptr, // GGML_OP_OUT_PROD
|
||||
|
||||
nullptr, // GGML_OP_SCALE
|
||||
nullptr, // GGML_OP_SET
|
||||
nullptr, // GGML_OP_CPY
|
||||
nullptr, // GGML_OP_CONT
|
||||
generic_op_constructor<GGML_OP_RESHAPE>, // GGML_OP_RESHAPE
|
||||
nullptr, // GGML_OP_VIEW
|
||||
nullptr, // GGML_OP_PERMUTE
|
||||
nullptr, // GGML_OP_TRANSPOSE
|
||||
nullptr, // GGML_OP_GET_ROWS
|
||||
nullptr, // GGML_OP_GET_ROWS_BACK
|
||||
nullptr, // GGML_OP_DIAG
|
||||
nullptr, // GGML_OP_DIAG_MASK_INF
|
||||
nullptr, // GGML_OP_DIAG_MASK_ZERO
|
||||
nullptr, // GGML_OP_SOFT_MAX
|
||||
nullptr, // GGML_OP_SOFT_MAX_BACK
|
||||
nullptr, // GGML_OP_ROPE
|
||||
nullptr, // GGML_OP_ROPE_BACK
|
||||
nullptr, // GGML_OP_CLAMP
|
||||
nullptr, // GGML_OP_CONV_TRANSPOSE_1D
|
||||
nullptr, // GGML_OP_IM2COL
|
||||
nullptr, // GGML_OP_IM2COL_BACK
|
||||
nullptr, // GGML_OP_CONV_TRANSPOSE_2D
|
||||
nullptr, // GGML_OP_POOL_1D
|
||||
nullptr, // GGML_OP_POOL_2D
|
||||
nullptr, // GGML_OP_POOL_2D_BACK
|
||||
nullptr, // GGML_OP_UPSCALE
|
||||
nullptr, // GGML_OP_PAD
|
||||
nullptr, // GGML_OP_PAD_REFLECT_1D
|
||||
nullptr, // GGML_OP_ARANGE
|
||||
nullptr, // GGML_OP_TIMESTEP_EMBEDDING
|
||||
nullptr, // GGML_OP_ARGSORT
|
||||
nullptr, // GGML_OP_LEAKY_RELU
|
||||
nullptr, // GGML_OP_SCALE
|
||||
nullptr, // GGML_OP_SET
|
||||
nullptr, // GGML_OP_CPY
|
||||
nullptr, // GGML_OP_CONT
|
||||
generic_op_constructor<GGML_OP_RESHAPE>, // GGML_OP_RESHAPE
|
||||
nullptr, // GGML_OP_VIEW
|
||||
nullptr, // GGML_OP_PERMUTE
|
||||
nullptr, // GGML_OP_TRANSPOSE
|
||||
nullptr, // GGML_OP_GET_ROWS
|
||||
nullptr, // GGML_OP_GET_ROWS_BACK
|
||||
nullptr, // GGML_OP_DIAG
|
||||
nullptr, // GGML_OP_DIAG_MASK_INF
|
||||
nullptr, // GGML_OP_DIAG_MASK_ZERO
|
||||
nullptr, // GGML_OP_SOFT_MAX
|
||||
nullptr, // GGML_OP_SOFT_MAX_BACK
|
||||
nullptr, // GGML_OP_ROPE
|
||||
nullptr, // GGML_OP_ROPE_BACK
|
||||
nullptr, // GGML_OP_CLAMP
|
||||
nullptr, // GGML_OP_CONV_TRANSPOSE_1D
|
||||
nullptr, // GGML_OP_IM2COL
|
||||
nullptr, // GGML_OP_IM2COL_BACK
|
||||
nullptr, // GGML_OP_CONV_TRANSPOSE_2D
|
||||
nullptr, // GGML_OP_POOL_1D
|
||||
nullptr, // GGML_OP_POOL_2D
|
||||
nullptr, // GGML_OP_POOL_2D_BACK
|
||||
nullptr, // GGML_OP_UPSCALE
|
||||
nullptr, // GGML_OP_PAD
|
||||
nullptr, // GGML_OP_PAD_REFLECT_1D
|
||||
nullptr, // GGML_OP_ARANGE
|
||||
nullptr, // GGML_OP_TIMESTEP_EMBEDDING
|
||||
nullptr, // GGML_OP_ARGSORT
|
||||
nullptr, // GGML_OP_LEAKY_RELU
|
||||
|
||||
nullptr, // GGML_OP_FLASH_ATTN_EXT
|
||||
nullptr, // GGML_OP_FLASH_ATTN_BACK
|
||||
nullptr, // GGML_OP_SSM_CONV
|
||||
nullptr, // GGML_OP_SSM_SCAN
|
||||
nullptr, // GGML_OP_WIN_PART
|
||||
nullptr, // GGML_OP_WIN_UNPART
|
||||
nullptr, // GGML_OP_GET_REL_POS
|
||||
nullptr, // GGML_OP_ADD_REL_POS
|
||||
nullptr, // GGML_OP_RWKV_WKV6
|
||||
nullptr, // GGML_OP_GATED_LINEAR_ATTN
|
||||
nullptr, // GGML_OP_FLASH_ATTN_EXT
|
||||
nullptr, // GGML_OP_FLASH_ATTN_BACK
|
||||
nullptr, // GGML_OP_SSM_CONV
|
||||
nullptr, // GGML_OP_SSM_SCAN
|
||||
nullptr, // GGML_OP_WIN_PART
|
||||
nullptr, // GGML_OP_WIN_UNPART
|
||||
nullptr, // GGML_OP_GET_REL_POS
|
||||
nullptr, // GGML_OP_ADD_REL_POS
|
||||
nullptr, // GGML_OP_RWKV_WKV6
|
||||
nullptr, // GGML_OP_GATED_LINEAR_ATTN
|
||||
|
||||
nullptr, // GGML_OP_UNARY
|
||||
nullptr, // GGML_OP_UNARY
|
||||
|
||||
nullptr, // GGML_OP_MAP_UNARY
|
||||
nullptr, // GGML_OP_MAP_BINARY
|
||||
nullptr, // GGML_OP_MAP_UNARY
|
||||
nullptr, // GGML_OP_MAP_BINARY
|
||||
|
||||
nullptr, // GGML_OP_MAP_CUSTOM1_F32
|
||||
nullptr, // GGML_OP_MAP_CUSTOM2_F32
|
||||
nullptr, // GGML_OP_MAP_CUSTOM3_F32
|
||||
nullptr, // GGML_OP_MAP_CUSTOM1_F32
|
||||
nullptr, // GGML_OP_MAP_CUSTOM2_F32
|
||||
nullptr, // GGML_OP_MAP_CUSTOM3_F32
|
||||
|
||||
nullptr, // GGML_OP_MAP_CUSTOM1
|
||||
nullptr, // GGML_OP_MAP_CUSTOM2
|
||||
nullptr, // GGML_OP_MAP_CUSTOM3
|
||||
nullptr, // GGML_OP_MAP_CUSTOM1
|
||||
nullptr, // GGML_OP_MAP_CUSTOM2
|
||||
nullptr, // GGML_OP_MAP_CUSTOM3
|
||||
|
||||
nullptr, // GGML_OP_CROSS_ENTROPY_LOSS
|
||||
nullptr, // GGML_OP_CROSS_ENTROPY_LOSS_BACK
|
||||
nullptr, // GGML_OP_OPT_STEP_ADAMW
|
||||
nullptr, // GGML_OP_CROSS_ENTROPY_LOSS
|
||||
nullptr, // GGML_OP_CROSS_ENTROPY_LOSS_BACK
|
||||
nullptr, // GGML_OP_OPT_STEP_ADAMW
|
||||
|
||||
// ggml_unary_op
|
||||
nullptr, // GGML_UNARY_OP_ABS
|
||||
nullptr, // GGML_UNARY_OP_SGN
|
||||
nullptr, // GGML_UNARY_OP_NEG
|
||||
nullptr, // GGML_UNARY_OP_STEP
|
||||
nullptr, // GGML_UNARY_OP_TANH
|
||||
nullptr, // GGML_UNARY_OP_ELU
|
||||
nullptr, // GGML_UNARY_OP_RELU
|
||||
nullptr, // GGML_UNARY_OP_SIGMOID
|
||||
nullptr, // GGML_UNARY_OP_GELU
|
||||
nullptr, // GGML_UNARY_OP_GELU_QUICK
|
||||
nullptr, // GGML_UNARY_OP_SILU
|
||||
nullptr, // GGML_UNARY_OP_HARDSWISH
|
||||
nullptr, // GGML_UNARY_OP_HARDSIGMOID
|
||||
nullptr, // GGML_UNARY_OP_EXP
|
||||
nullptr, // GGML_UNARY_OP_ABS
|
||||
nullptr, // GGML_UNARY_OP_SGN
|
||||
nullptr, // GGML_UNARY_OP_NEG
|
||||
nullptr, // GGML_UNARY_OP_STEP
|
||||
nullptr, // GGML_UNARY_OP_TANH
|
||||
nullptr, // GGML_UNARY_OP_ELU
|
||||
nullptr, // GGML_UNARY_OP_RELU
|
||||
nullptr, // GGML_UNARY_OP_SIGMOID
|
||||
nullptr, // GGML_UNARY_OP_GELU
|
||||
nullptr, // GGML_UNARY_OP_GELU_QUICK
|
||||
nullptr, // GGML_UNARY_OP_SILU
|
||||
nullptr, // GGML_UNARY_OP_HARDSWISH
|
||||
nullptr, // GGML_UNARY_OP_HARDSIGMOID
|
||||
nullptr, // GGML_UNARY_OP_EXP
|
||||
};
|
||||
|
||||
static_assert(kOpConstructors[GGML_OP_NONE] == nullptr, "GGML_OP_NONE does not match the nullptr function");
|
||||
|
|
@ -362,11 +364,11 @@ static_assert(kOpConstructors[GGML_OP_MUL_MAT] == mat_mul_op_constructor,
|
|||
static_assert(std::size(kOpConstructors) == (GGML_OP_COUNT + GGML_UNARY_OP_COUNT),
|
||||
"GGML_OP_COUNT does not match the size of the kOpConstructors table");
|
||||
|
||||
} // namespace
|
||||
} // namespace
|
||||
|
||||
namespace qnn {
|
||||
|
||||
size_t get_qnn_op_index(const ggml_tensor *tensor) {
|
||||
size_t get_qnn_op_index(const ggml_tensor * tensor) {
|
||||
if (tensor->op == GGML_OP_UNARY) {
|
||||
return kGgmlUnaryOpStart + ggml_get_unary_op(tensor);
|
||||
}
|
||||
|
|
@ -374,20 +376,20 @@ size_t get_qnn_op_index(const ggml_tensor *tensor) {
|
|||
return tensor->op;
|
||||
}
|
||||
|
||||
const char *get_qnn_op_name(const ggml_tensor *op) {
|
||||
const char * get_qnn_op_name(const ggml_tensor * op) {
|
||||
auto op_index = get_qnn_op_index(op);
|
||||
GGML_ASSERT(op_index < std::size(kOpCaps));
|
||||
GGML_ASSERT(kOpCaps[op_index].qnn_op_name);
|
||||
return kOpCaps[op_index].qnn_op_name;
|
||||
}
|
||||
|
||||
size_t get_qnn_op_input_param_count(const ggml_tensor *op) {
|
||||
size_t get_qnn_op_input_param_count(const ggml_tensor * op) {
|
||||
auto op_index = get_qnn_op_index(op);
|
||||
GGML_ASSERT(op_index < std::size(kOpCaps));
|
||||
return kOpCaps[op_index].input_param_count;
|
||||
}
|
||||
|
||||
std::shared_ptr<ggml_qnn_op_config> create_op(const ggml_tensor *op, const std::string &name,
|
||||
std::shared_ptr<ggml_qnn_op_config> create_op(const ggml_tensor * op, const std::string & name,
|
||||
std::shared_ptr<qnn_instance> qnn_instance) {
|
||||
auto op_index = get_qnn_op_index(op);
|
||||
GGML_ASSERT(op_index < std::size(kOpCaps));
|
||||
|
|
@ -396,4 +398,4 @@ std::shared_ptr<ggml_qnn_op_config> create_op(const ggml_tensor *op, const std::
|
|||
return op_constructor(op, name, qnn_instance);
|
||||
}
|
||||
|
||||
} // namespace qnn
|
||||
} // namespace qnn
|
||||
|
|
|
|||
|
|
@ -6,14 +6,7 @@
|
|||
|
||||
namespace {
|
||||
|
||||
constexpr const qnn::qnn_dimension_array_t kTransposeParamData[GGML_MAX_DIMS] = {
|
||||
{0},
|
||||
{1, 0},
|
||||
{0, 2, 1},
|
||||
{0, 1, 3, 2},
|
||||
};
|
||||
|
||||
qnn::qnn_dimension_array_t get_transposed_dimensions(const qnn::qnn_dimension_array_t &dimensions, int rank) {
|
||||
qnn::qnn_dimension_array_t get_transposed_dimensions(const qnn::qnn_dimension_array_t & dimensions, int rank) {
|
||||
qnn::qnn_dimension_array_t transposed_dims = dimensions;
|
||||
if (rank >= 2) {
|
||||
transposed_dims[rank - 1] = dimensions[rank - 2];
|
||||
|
|
@ -23,11 +16,11 @@ qnn::qnn_dimension_array_t get_transposed_dimensions(const qnn::qnn_dimension_ar
|
|||
return transposed_dims;
|
||||
}
|
||||
|
||||
int get_rank(const qnn::ggml_tensor_array_t &tensor_inputs, const qnn::ggml_tensor_array_t &tensor_outputs) {
|
||||
int get_rank(const qnn::ggml_tensor_array_t & tensor_inputs, const qnn::ggml_tensor_array_t & tensor_outputs) {
|
||||
return std::max(qnn::get_ggml_tensors_max_rank(tensor_inputs), qnn::get_ggml_tensors_max_rank(tensor_outputs));
|
||||
}
|
||||
|
||||
Qnn_DataType_t get_tensor_type(const qnn::qnn_tensor_array_t &tensors) {
|
||||
Qnn_DataType_t get_tensor_type(const qnn::qnn_tensor_array_t & tensors) {
|
||||
Qnn_DataType_t type = QNN_DATATYPE_UNDEFINED;
|
||||
for (auto tensor : tensors) {
|
||||
auto tensor_type_size = qnn::qnn_datatype_size(tensor->get_data_type());
|
||||
|
|
@ -40,67 +33,67 @@ Qnn_DataType_t get_tensor_type(const qnn::qnn_tensor_array_t &tensors) {
|
|||
return type;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace
|
||||
|
||||
namespace qnn {
|
||||
|
||||
void ggml_qnn_op_config_base::add_scalar_param(const std::string &name, const Qnn_Scalar_t scalar) {
|
||||
void ggml_qnn_op_config_base::add_scalar_param(const std::string & name, const Qnn_Scalar_t scalar) {
|
||||
_param_names.push_back(name);
|
||||
Qnn_Param_t param = QNN_PARAM_INIT;
|
||||
param.paramType = QNN_PARAMTYPE_SCALAR;
|
||||
param.name = _param_names.back().c_str();
|
||||
param.paramType = QNN_PARAMTYPE_SCALAR;
|
||||
param.name = _param_names.back().c_str();
|
||||
param.scalarParam = scalar;
|
||||
_qnn_parameters.push_back(param);
|
||||
}
|
||||
|
||||
bool ggml_qnn_op_config_base::add_tensor_param(const std::string &name, const qnn_dimension_array_t &dimensions,
|
||||
int rank, const uint8_t *data, const Qnn_DataType_t data_type,
|
||||
bool ggml_qnn_op_config_base::add_tensor_param(const std::string & name, const qnn_dimension_array_t & dimensions,
|
||||
int rank, const uint8_t * data, const Qnn_DataType_t data_type,
|
||||
QNNBackend device, Qnn_GraphHandle_t graph_handle) {
|
||||
std::string tensor_name = _name + name + std::to_string(_tensor_parameters.size());
|
||||
auto param_tensor = std::make_shared<ggml_qnn_tensor>(ggml_qnn_tensor::PARAMETER, tensor_name, dimensions,
|
||||
data_type, rank, device, graph_handle, _qnn_instance);
|
||||
size_t data_size = ggml_type_size(ggml_datatype_from_qnn_datatype(data_type));
|
||||
std::string tensor_name = _name + name + std::to_string(_tensor_parameters.size());
|
||||
auto param_tensor = std::make_shared<ggml_qnn_tensor>(ggml_qnn_tensor::PARAMETER, tensor_name, dimensions,
|
||||
data_type, rank, device, graph_handle, _qnn_instance);
|
||||
size_t data_size = ggml_type_size(ggml_datatype_from_qnn_datatype(data_type));
|
||||
for (int i = 0; i < rank; i++) {
|
||||
data_size *= dimensions[i];
|
||||
}
|
||||
|
||||
GGML_ASSERT(data_size > 0);
|
||||
if (!param_tensor->set_data_buffer(data, data_size)) {
|
||||
QNN_LOG_ERROR("parameter tensor bind_buffer failed");
|
||||
QNN_LOG_ERROR("parameter tensor bind_buffer failed\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!param_tensor->alloc_qnn_tensor_id()) {
|
||||
QNN_LOG_ERROR("parameter tensor alloc_qnn_tensor_id failed");
|
||||
QNN_LOG_ERROR("parameter tensor alloc_qnn_tensor_id failed\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
_tensor_parameters.push_back(param_tensor);
|
||||
_param_names.push_back(name);
|
||||
Qnn_Param_t param = QNN_PARAM_INIT;
|
||||
param.paramType = QNN_PARAMTYPE_TENSOR;
|
||||
param.name = _param_names.back().c_str();
|
||||
param.paramType = QNN_PARAMTYPE_TENSOR;
|
||||
param.name = _param_names.back().c_str();
|
||||
param.tensorParam = param_tensor->get_qnn_tensor();
|
||||
_qnn_parameters.push_back(param);
|
||||
return true;
|
||||
}
|
||||
|
||||
void ggml_qnn_op_config_base::set_input_tensors(qnn::qnn_tensor_array_t &tensor_inputs) {
|
||||
void ggml_qnn_op_config_base::set_input_tensors(qnn::qnn_tensor_array_t & tensor_inputs) {
|
||||
_tensor_inputs = tensor_inputs;
|
||||
_qnn_tensor_inputs.resize(_tensor_inputs.size());
|
||||
}
|
||||
|
||||
void ggml_qnn_op_config_base::set_input_tensors(qnn::qnn_tensor_array_t &&tensor_inputs) {
|
||||
void ggml_qnn_op_config_base::set_input_tensors(qnn::qnn_tensor_array_t && tensor_inputs) {
|
||||
_tensor_inputs = tensor_inputs;
|
||||
_qnn_tensor_inputs.resize(_tensor_inputs.size());
|
||||
}
|
||||
|
||||
void ggml_qnn_op_config_base::set_output_tensors(qnn::qnn_tensor_array_t &tensor_outputs) {
|
||||
void ggml_qnn_op_config_base::set_output_tensors(qnn::qnn_tensor_array_t & tensor_outputs) {
|
||||
_tensor_outputs = std::move(tensor_outputs);
|
||||
_qnn_tensor_outputs.resize(_tensor_outputs.size());
|
||||
}
|
||||
|
||||
void ggml_qnn_op_config_base::set_output_tensors(qnn::qnn_tensor_array_t &&tensor_outputs) {
|
||||
void ggml_qnn_op_config_base::set_output_tensors(qnn::qnn_tensor_array_t && tensor_outputs) {
|
||||
_tensor_outputs = std::move(tensor_outputs);
|
||||
_qnn_tensor_outputs.resize(_tensor_outputs.size());
|
||||
}
|
||||
|
|
@ -109,74 +102,80 @@ bool ggml_qnn_op_config_base::add_op_to_graph(Qnn_GraphHandle_t graph_handle) {
|
|||
GGML_ASSERT(_qnn_tensor_inputs.size() == _tensor_inputs.size());
|
||||
GGML_ASSERT(_qnn_tensor_outputs.size() == _tensor_outputs.size());
|
||||
|
||||
QNN_LOG_DEBUG("[%s]add to graph start", _name.c_str());
|
||||
QNN_LOG_DEBUG("[%s]add to graph start\n", _name.c_str());
|
||||
for (size_t i = 0; i < _tensor_inputs.size(); i++) {
|
||||
auto tensor = _tensor_inputs[i];
|
||||
if (!tensor->alloc_qnn_tensor_id()) {
|
||||
QNN_LOG_ERROR("[%s]input tensor alloc_qnn_tensor_id failed", _name.c_str());
|
||||
QNN_LOG_ERROR("[%s]input tensor alloc_qnn_tensor_id failed\n", _name.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
QNN_LOG_DEBUG("[%s]input tensor id: %d", _name.c_str(), tensor->get_qnn_tensor_id());
|
||||
QNN_LOG_DEBUG("[%s]input tensor id: %d\n", _name.c_str(), tensor->get_qnn_tensor_id());
|
||||
_qnn_tensor_inputs[i] = tensor->get_qnn_tensor();
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < _tensor_outputs.size(); i++) {
|
||||
auto tensor = _tensor_outputs[i];
|
||||
if (!tensor->alloc_qnn_tensor_id()) {
|
||||
QNN_LOG_ERROR("[%s]output tensor alloc_qnn_tensor_id failed", _name.c_str());
|
||||
QNN_LOG_ERROR("[%s]output tensor alloc_qnn_tensor_id failed\n", _name.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
QNN_LOG_DEBUG("[%s]output tensor id: %d", _name.c_str(), tensor->get_qnn_tensor_id());
|
||||
QNN_LOG_DEBUG("[%s]output tensor id: %d\n", _name.c_str(), tensor->get_qnn_tensor_id());
|
||||
_qnn_tensor_outputs[i] = tensor->get_qnn_tensor();
|
||||
}
|
||||
|
||||
auto qnn_interface = _qnn_instance->get_qnn_interface();
|
||||
auto error = qnn_interface->qnn_graph_add_node(graph_handle, get_op_config());
|
||||
auto error = qnn_interface->qnn_graph_add_node(graph_handle, get_op_config());
|
||||
if (error != QNN_SUCCESS) {
|
||||
QNN_LOG_ERROR("[%s]qnn_graph_add_node.error: %s", _name.c_str(), get_qnn_error_string(error));
|
||||
QNN_LOG_ERROR("[%s]qnn_graph_add_node.error: %s\n", _name.c_str(), get_qnn_error_string(error));
|
||||
return false;
|
||||
}
|
||||
|
||||
QNN_LOG_DEBUG("[%s]added to graph succeed", _name.c_str());
|
||||
QNN_LOG_DEBUG("[%s]added to graph succeed\n", _name.c_str());
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ggml_qnn_op_config_base::bind_input_tensors(const ggml_tensor_array_t &tensor_inputs) {
|
||||
bool ggml_qnn_op_config_base::bind_input_tensors(const ggml_tensor_array_t & tensor_inputs) {
|
||||
GGML_ASSERT(tensor_inputs.size() == _tensor_inputs.size());
|
||||
return qnn::bind_tensors(tensor_inputs, _tensor_inputs, _qnn_tensor_inputs);
|
||||
}
|
||||
|
||||
bool ggml_qnn_op_config_base::bind_output_tensors(const ggml_tensor_array_t &tensor_outputs) {
|
||||
bool ggml_qnn_op_config_base::bind_output_tensors(const ggml_tensor_array_t & tensor_outputs) {
|
||||
GGML_ASSERT(tensor_outputs.size() == _tensor_outputs.size());
|
||||
return qnn::bind_tensors(tensor_outputs, _tensor_outputs, _qnn_tensor_outputs);
|
||||
}
|
||||
|
||||
void ggml_qnn_op_config_base::unbind_input_tensors() {
|
||||
for (auto &tensor : _tensor_inputs) {
|
||||
for (auto & tensor : _tensor_inputs) {
|
||||
tensor->unbind();
|
||||
}
|
||||
}
|
||||
|
||||
void ggml_qnn_op_config_base::unbind_output_tensors() {
|
||||
for (auto &tensor : _tensor_outputs) {
|
||||
for (auto & tensor : _tensor_outputs) {
|
||||
tensor->unbind();
|
||||
}
|
||||
}
|
||||
|
||||
Qnn_OpConfig_t ggml_qnn_op_config_base::get_op_config() {
|
||||
Qnn_OpConfig_t config = QNN_OPCONFIG_INIT;
|
||||
config.version = QNN_OPCONFIG_VERSION_1;
|
||||
auto &op_config = config.v1;
|
||||
op_config.name = _name.c_str();
|
||||
op_config.packageName = _package_name.c_str();
|
||||
op_config.typeName = _op_type.c_str();
|
||||
op_config.numOfParams = (uint32_t)_qnn_parameters.size();
|
||||
op_config.params = _qnn_parameters.data();
|
||||
op_config.numOfInputs = (uint32_t)_qnn_tensor_inputs.size();
|
||||
op_config.inputTensors = _qnn_tensor_inputs.data();
|
||||
op_config.numOfOutputs = (uint32_t)_qnn_tensor_outputs.size();
|
||||
GGML_ASSERT(_qnn_parameters.size() == _param_names.size());
|
||||
|
||||
for (size_t i = 0; i < _qnn_parameters.size(); i++) {
|
||||
_qnn_parameters[i].name = _param_names[i].c_str();
|
||||
}
|
||||
|
||||
Qnn_OpConfig_t config = QNN_OPCONFIG_INIT;
|
||||
config.version = QNN_OPCONFIG_VERSION_1;
|
||||
auto & op_config = config.v1;
|
||||
op_config.name = _name.c_str();
|
||||
op_config.packageName = _package_name.c_str();
|
||||
op_config.typeName = _op_type.c_str();
|
||||
op_config.numOfParams = (uint32_t) _qnn_parameters.size();
|
||||
op_config.params = _qnn_parameters.data();
|
||||
op_config.numOfInputs = (uint32_t) _qnn_tensor_inputs.size();
|
||||
op_config.inputTensors = _qnn_tensor_inputs.data();
|
||||
op_config.numOfOutputs = (uint32_t) _qnn_tensor_outputs.size();
|
||||
op_config.outputTensors = _qnn_tensor_outputs.data();
|
||||
return config;
|
||||
}
|
||||
|
|
@ -188,33 +187,33 @@ bool ggml_qnn_single_op_config::initialize_op_nodes(QNNBackend device, Qnn_Graph
|
|||
}
|
||||
|
||||
bool ggml_qnn_rmsnorm_op_config::initialize_op_nodes(QNNBackend device, Qnn_GraphHandle_t graph_handle) {
|
||||
constexpr const uint32_t kAxes[] = {0};
|
||||
add_tensor_param(QNN_OP_RMS_NORM_PARAM_AXES, {1}, 1, reinterpret_cast<const uint8_t *>(kAxes), QNN_DATATYPE_UINT_32,
|
||||
device, graph_handle);
|
||||
constexpr const uint32_t kAxes[] = { 0 };
|
||||
add_tensor_param(QNN_OP_RMS_NORM_PARAM_AXES, { 1 }, 1, reinterpret_cast<const uint8_t *>(kAxes),
|
||||
QNN_DATATYPE_UINT_32, device, graph_handle);
|
||||
return true;
|
||||
}
|
||||
|
||||
void ggml_qnn_aggregate_op_config::set_input_tensors(qnn::qnn_tensor_array_t &tensor_inputs) {
|
||||
void ggml_qnn_aggregate_op_config::set_input_tensors(qnn::qnn_tensor_array_t & tensor_inputs) {
|
||||
_tensor_inputs = tensor_inputs;
|
||||
}
|
||||
|
||||
void ggml_qnn_aggregate_op_config::set_input_tensors(qnn::qnn_tensor_array_t &&tensor_inputs) {
|
||||
void ggml_qnn_aggregate_op_config::set_input_tensors(qnn::qnn_tensor_array_t && tensor_inputs) {
|
||||
_tensor_inputs = std::move(tensor_inputs);
|
||||
}
|
||||
|
||||
void ggml_qnn_aggregate_op_config::set_output_tensors(qnn::qnn_tensor_array_t &tensor_outputs) {
|
||||
void ggml_qnn_aggregate_op_config::set_output_tensors(qnn::qnn_tensor_array_t & tensor_outputs) {
|
||||
_tensor_outputs = tensor_outputs;
|
||||
}
|
||||
|
||||
void ggml_qnn_aggregate_op_config::set_output_tensors(qnn::qnn_tensor_array_t &&tensor_outputs) {
|
||||
void ggml_qnn_aggregate_op_config::set_output_tensors(qnn::qnn_tensor_array_t && tensor_outputs) {
|
||||
_tensor_outputs = std::move(tensor_outputs);
|
||||
}
|
||||
|
||||
bool ggml_qnn_aggregate_op_config::bind_input_tensors(const ggml_tensor_array_t &tensor_inputs) {
|
||||
bool ggml_qnn_aggregate_op_config::bind_input_tensors(const ggml_tensor_array_t & tensor_inputs) {
|
||||
return qnn::bind_tensors(tensor_inputs, _tensor_inputs);
|
||||
}
|
||||
|
||||
bool ggml_qnn_aggregate_op_config::bind_output_tensors(const ggml_tensor_array_t &tensor_outputs) {
|
||||
bool ggml_qnn_aggregate_op_config::bind_output_tensors(const ggml_tensor_array_t & tensor_outputs) {
|
||||
return qnn::bind_tensors(tensor_outputs, _tensor_outputs);
|
||||
}
|
||||
|
||||
|
|
@ -223,18 +222,18 @@ bool ggml_qnn_matmul_op_config::initialize_op_nodes(QNNBackend device, Qnn_Graph
|
|||
GGML_ASSERT(_tensor_outputs.size() == 1);
|
||||
|
||||
// create convert nodes
|
||||
const auto tensor_rank = _tensor_inputs.front()->get_rank();
|
||||
qnn_tensor_array_t mat_mul_tensor_inputs = _tensor_inputs;
|
||||
const auto tensor_rank = _tensor_inputs.front()->get_rank();
|
||||
qnn_tensor_array_t mat_mul_tensor_inputs = _tensor_inputs;
|
||||
qnn_tensor_array_t mat_mul_tensor_outputs = _tensor_outputs;
|
||||
if (!create_convert_nodes(device, graph_handle, tensor_rank, mat_mul_tensor_inputs, mat_mul_tensor_outputs)) {
|
||||
QNN_LOG_ERROR("create convert nodes failed");
|
||||
QNN_LOG_ERROR("create convert nodes failed\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
mat_mul_tensor_inputs.front() =
|
||||
create_gather_nodes(device, graph_handle, tensor_rank, mat_mul_tensor_inputs.front(),
|
||||
mat_mul_tensor_inputs.back()->get_dimensions());
|
||||
return create_mat_mul_nodes(device, graph_handle, tensor_rank, mat_mul_tensor_inputs, mat_mul_tensor_outputs);
|
||||
return create_mat_mul_nodes(mat_mul_tensor_inputs, mat_mul_tensor_outputs);
|
||||
}
|
||||
|
||||
qnn_tensor_ptr_t ggml_qnn_matmul_op_config::create_gather_nodes(QNNBackend device, Qnn_GraphHandle_t graph_handle,
|
||||
|
|
@ -244,9 +243,9 @@ qnn_tensor_ptr_t ggml_qnn_matmul_op_config::create_gather_nodes(QNNBackend devic
|
|||
return tensor_input;
|
||||
}
|
||||
|
||||
const auto &input_dimensions = tensor_input->get_dimensions();
|
||||
output_dimensions[rank - 1] = input_dimensions[rank - 1];
|
||||
output_dimensions[rank - 2] = input_dimensions[rank - 2];
|
||||
const auto & input_dimensions = tensor_input->get_dimensions();
|
||||
output_dimensions[rank - 1] = input_dimensions[rank - 1];
|
||||
output_dimensions[rank - 2] = input_dimensions[rank - 2];
|
||||
|
||||
const auto y = output_dimensions[rank - 3] / input_dimensions[rank - 3];
|
||||
if (y == 1 && (rank == 3 || (rank == 4 && output_dimensions[rank - 4] == input_dimensions[rank - 4]))) {
|
||||
|
|
@ -255,9 +254,9 @@ qnn_tensor_ptr_t ggml_qnn_matmul_op_config::create_gather_nodes(QNNBackend devic
|
|||
|
||||
// create concat nodes, to convert tensor shape from [ne03, ne02, n, k] to [ne03 * x, ne02 * y, n, k]
|
||||
constexpr const auto create_node =
|
||||
[](const std::string &name, const int rank, const int axis, const qnn_dimension_array_t &dimensions,
|
||||
[](const std::string & name, const int rank, const int axis, const qnn_dimension_array_t & dimensions,
|
||||
qnn_tensor_ptr_t tensor_input, QNNBackend device, Qnn_GraphHandle_t graph_handle,
|
||||
std::shared_ptr<qnn_instance> qnn_instance, qnn_tensor_ptr_t &tensor_output) -> qnn_op_config_ptr_t {
|
||||
std::shared_ptr<qnn_instance> qnn_instance, qnn_tensor_ptr_t & tensor_output) -> qnn_op_config_ptr_t {
|
||||
auto gather_out =
|
||||
std::make_shared<ggml_qnn_tensor>(ggml_qnn_tensor::INTERMEDIATE, name + "_out", dimensions,
|
||||
tensor_input->get_data_type(), rank, device, graph_handle, qnn_instance);
|
||||
|
|
@ -265,32 +264,32 @@ qnn_tensor_ptr_t ggml_qnn_matmul_op_config::create_gather_nodes(QNNBackend devic
|
|||
qnn_instance);
|
||||
|
||||
Qnn_Scalar_t scalar = QNN_SCALAR_INIT;
|
||||
scalar.dataType = QNN_DATATYPE_INT_32;
|
||||
scalar.int32Value = axis;
|
||||
scalar.dataType = QNN_DATATYPE_INT_32;
|
||||
scalar.int32Value = axis;
|
||||
gather_op->add_scalar_param(QNN_OP_GATHER_PARAM_AXIS, scalar);
|
||||
gather_op->set_output_tensors({gather_out});
|
||||
gather_op->set_output_tensors({ gather_out });
|
||||
|
||||
// here we calculate the index mapping, will generate a 1d tensor like [0, 0, 0, 1, 1, 1, 2, 2, 2, ...],
|
||||
// by repeating each index [scale] times.
|
||||
const auto scale = dimensions[axis] / tensor_input->get_dimensions()[axis];
|
||||
auto index_buffer = std::make_shared<qnn_mem_buffer>(dimensions[axis] * sizeof(uint32_t));
|
||||
const auto scale = dimensions[axis] / tensor_input->get_dimensions()[axis];
|
||||
auto index_buffer = std::make_shared<qnn_mem_buffer>(dimensions[axis] * sizeof(uint32_t));
|
||||
for (uint32_t *curr = reinterpret_cast<uint32_t *>(index_buffer->get_buffer()), *end = curr + dimensions[axis];
|
||||
curr < end; curr++) {
|
||||
*curr = uint32_t((curr - reinterpret_cast<uint32_t *>(index_buffer->get_buffer())) / scale);
|
||||
}
|
||||
|
||||
auto gather_index = std::make_shared<ggml_qnn_tensor>(
|
||||
ggml_qnn_tensor::PARAMETER, name + "_index", qnn_dimension_array_t{dimensions[axis]}, QNN_DATATYPE_UINT_32,
|
||||
1, device, graph_handle, qnn_instance);
|
||||
ggml_qnn_tensor::PARAMETER, name + "_index", qnn_dimension_array_t{ dimensions[axis] },
|
||||
QNN_DATATYPE_UINT_32, 1, device, graph_handle, qnn_instance);
|
||||
gather_index->set_data_buffer(index_buffer);
|
||||
gather_op->set_input_tensors({tensor_input, gather_index});
|
||||
gather_op->set_input_tensors({ tensor_input, gather_index });
|
||||
|
||||
tensor_output = gather_out;
|
||||
return gather_op;
|
||||
};
|
||||
|
||||
qnn_dimension_array_t intermediate_dimensions = input_dimensions;
|
||||
intermediate_dimensions[rank - 3] = output_dimensions[rank - 3];
|
||||
intermediate_dimensions[rank - 3] = output_dimensions[rank - 3];
|
||||
qnn_tensor_ptr_t gather0_out;
|
||||
_operations.push_back(create_node(_name + "_gather0", rank, rank - 3, intermediate_dimensions, tensor_input, device,
|
||||
graph_handle, _qnn_instance, gather0_out));
|
||||
|
|
@ -305,8 +304,8 @@ qnn_tensor_ptr_t ggml_qnn_matmul_op_config::create_gather_nodes(QNNBackend devic
|
|||
}
|
||||
|
||||
bool ggml_qnn_matmul_op_config::create_convert_nodes(QNNBackend device, Qnn_GraphHandle_t graph_handle, const int rank,
|
||||
qnn_tensor_array_t &tensor_inputs,
|
||||
qnn_tensor_array_t &tensor_outputs) {
|
||||
qnn_tensor_array_t & tensor_inputs,
|
||||
qnn_tensor_array_t & tensor_outputs) {
|
||||
if (device == QNN_BACKEND_GPU) {
|
||||
// there's no convert op for GPU, so we should create matmul nodes directly.
|
||||
return true;
|
||||
|
|
@ -314,7 +313,7 @@ bool ggml_qnn_matmul_op_config::create_convert_nodes(QNNBackend device, Qnn_Grap
|
|||
|
||||
// create tensors for convert node
|
||||
auto tensor_type = get_tensor_type(tensor_inputs);
|
||||
QNN_LOG_DEBUG("input tensor type: %s", qnn_datatype_to_string(tensor_type));
|
||||
QNN_LOG_DEBUG("input tensor type: %s\n", qnn_datatype_to_string(tensor_type));
|
||||
|
||||
for (size_t i = 0; i < tensor_inputs.size(); ++i) {
|
||||
// create input convert nodes
|
||||
|
|
@ -327,10 +326,10 @@ bool ggml_qnn_matmul_op_config::create_convert_nodes(QNNBackend device, Qnn_Grap
|
|||
auto convert_out = std::make_shared<ggml_qnn_tensor>(ggml_qnn_tensor::INTERMEDIATE, convert_name + "_out",
|
||||
convert_in->get_dimensions(), tensor_type, rank, device,
|
||||
graph_handle, _qnn_instance);
|
||||
auto convert = std::make_shared<ggml_qnn_single_op_config>(convert_name, QNN_OP_PACKAGE_NAME_QTI_AISW,
|
||||
QNN_OP_CONVERT, _qnn_instance);
|
||||
convert->set_input_tensors({convert_in});
|
||||
convert->set_output_tensors({convert_out});
|
||||
auto convert = std::make_shared<ggml_qnn_single_op_config>(convert_name, QNN_OP_PACKAGE_NAME_QTI_AISW,
|
||||
QNN_OP_CONVERT, _qnn_instance);
|
||||
convert->set_input_tensors({ convert_in });
|
||||
convert->set_output_tensors({ convert_out });
|
||||
tensor_inputs[i] = convert_out;
|
||||
_operations.push_back(convert);
|
||||
}
|
||||
|
|
@ -338,14 +337,14 @@ bool ggml_qnn_matmul_op_config::create_convert_nodes(QNNBackend device, Qnn_Grap
|
|||
if (tensor_outputs.front()->get_data_type() != tensor_type) {
|
||||
// create output convert node
|
||||
std::string convert_name("convert_dst");
|
||||
auto convert_out = tensor_outputs.front();
|
||||
auto convert_in = std::make_shared<ggml_qnn_tensor>(ggml_qnn_tensor::INTERMEDIATE, convert_name + "_in",
|
||||
convert_out->get_dimensions(), tensor_type, rank, device,
|
||||
graph_handle, _qnn_instance);
|
||||
auto convert_out = tensor_outputs.front();
|
||||
auto convert_in = std::make_shared<ggml_qnn_tensor>(ggml_qnn_tensor::INTERMEDIATE, convert_name + "_in",
|
||||
convert_out->get_dimensions(), tensor_type, rank, device,
|
||||
graph_handle, _qnn_instance);
|
||||
auto output_convert = std::make_shared<ggml_qnn_single_op_config>(convert_name, QNN_OP_PACKAGE_NAME_QTI_AISW,
|
||||
QNN_OP_CONVERT, _qnn_instance);
|
||||
output_convert->set_input_tensors({convert_in});
|
||||
output_convert->set_output_tensors({convert_out});
|
||||
output_convert->set_input_tensors({ convert_in });
|
||||
output_convert->set_output_tensors({ convert_out });
|
||||
tensor_outputs.front() = convert_in;
|
||||
_operations.push_back(output_convert);
|
||||
}
|
||||
|
|
@ -353,10 +352,8 @@ bool ggml_qnn_matmul_op_config::create_convert_nodes(QNNBackend device, Qnn_Grap
|
|||
return true;
|
||||
}
|
||||
|
||||
bool ggml_qnn_matmul_op_config::create_mat_mul_nodes(QNNBackend device, Qnn_GraphHandle_t graph_handle, const int rank,
|
||||
qnn_tensor_array_t &tensor_inputs,
|
||||
qnn_tensor_array_t &tensor_outputs) {
|
||||
|
||||
bool ggml_qnn_matmul_op_config::create_mat_mul_nodes(qnn_tensor_array_t & tensor_inputs,
|
||||
qnn_tensor_array_t & tensor_outputs) {
|
||||
/*
|
||||
* First, both the ggml and qnn tensor in memory are stored as row-major format. (For more details, please refer to:
|
||||
* https://pytorch.org/blog/tensor-memory-format-matters/#:~:text=Column%20Major%20Order:%20In%20this%20format,%20the%20matrix)
|
||||
|
|
@ -395,8 +392,8 @@ bool ggml_qnn_matmul_op_config::create_mat_mul_nodes(QNNBackend device, Qnn_Grap
|
|||
* So here we need to create graph like:
|
||||
* ```mermaid
|
||||
* graph TD;
|
||||
* i1>ggml_tensor_in0] --src1--> mat_mul0;
|
||||
* i2>ggml_tensor_in1] --src0.T--> mat_mul0;
|
||||
* i1>ggml_tensor_in1] --src0--> mat_mul0;
|
||||
* i2>ggml_tensor_in0] --src1.T--> mat_mul0;
|
||||
* mat_mul0 --dst0--> o1>ggml_tensor_out];
|
||||
* ```
|
||||
*/
|
||||
|
|
@ -411,8 +408,8 @@ bool ggml_qnn_matmul_op_config::create_mat_mul_nodes(QNNBackend device, Qnn_Grap
|
|||
std::make_shared<ggml_qnn_single_op_config>(_name, QNN_OP_PACKAGE_NAME_QTI_AISW, QNN_OP_MAT_MUL, _qnn_instance);
|
||||
|
||||
Qnn_Scalar_t scalar = QNN_SCALAR_INIT;
|
||||
scalar.dataType = QNN_DATATYPE_BOOL_8;
|
||||
scalar.bool8Value = 1;
|
||||
scalar.dataType = QNN_DATATYPE_BOOL_8;
|
||||
scalar.bool8Value = 1;
|
||||
mat_mul->add_scalar_param(QNN_OP_MAT_MUL_PARAM_TRANSPOSE_IN1, scalar);
|
||||
|
||||
// set tensor to mat_mul
|
||||
|
|
@ -424,4 +421,4 @@ bool ggml_qnn_matmul_op_config::create_mat_mul_nodes(QNNBackend device, Qnn_Grap
|
|||
return true;
|
||||
}
|
||||
|
||||
} // namespace qnn
|
||||
} // namespace qnn
|
||||
|
|
|
|||
|
|
@ -13,77 +13,83 @@
|
|||
namespace qnn {
|
||||
|
||||
class ggml_qnn_op_config_base : public ggml_qnn_op_config {
|
||||
public:
|
||||
explicit ggml_qnn_op_config_base(const std::string &name, const std::string &package_name,
|
||||
const std::string &op_type, std::shared_ptr<qnn_instance> qnn_instance)
|
||||
: _name(name), _package_name(package_name), _op_type(op_type), _qnn_instance(qnn_instance) {}
|
||||
public:
|
||||
explicit ggml_qnn_op_config_base(const std::string & name, const std::string & package_name,
|
||||
const std::string & op_type, std::shared_ptr<qnn_instance> qnn_instance) :
|
||||
_name(name),
|
||||
_package_name(package_name),
|
||||
_op_type(op_type),
|
||||
_qnn_instance(qnn_instance) {}
|
||||
|
||||
void add_scalar_param(const std::string &name, const Qnn_Scalar_t scalar);
|
||||
bool add_tensor_param(const std::string &name, const qnn_dimension_array_t &dimensions, int rank,
|
||||
const uint8_t *data, const Qnn_DataType_t data_type, QNNBackend device,
|
||||
void add_scalar_param(const std::string & name, const Qnn_Scalar_t scalar);
|
||||
bool add_tensor_param(const std::string & name, const qnn_dimension_array_t & dimensions, int rank,
|
||||
const uint8_t * data, const Qnn_DataType_t data_type, QNNBackend device,
|
||||
Qnn_GraphHandle_t graph_handle);
|
||||
|
||||
void set_input_tensors(qnn::qnn_tensor_array_t &tensor_inputs) override;
|
||||
void set_input_tensors(qnn::qnn_tensor_array_t &&tensor_inputs) override;
|
||||
void set_output_tensors(qnn::qnn_tensor_array_t &tensor_inputs) override;
|
||||
void set_output_tensors(qnn::qnn_tensor_array_t &&tensor_inputs) override;
|
||||
void set_input_tensors(qnn::qnn_tensor_array_t & tensor_inputs) override;
|
||||
void set_input_tensors(qnn::qnn_tensor_array_t && tensor_inputs) override;
|
||||
void set_output_tensors(qnn::qnn_tensor_array_t & tensor_inputs) override;
|
||||
void set_output_tensors(qnn::qnn_tensor_array_t && tensor_inputs) override;
|
||||
bool add_op_to_graph(Qnn_GraphHandle_t graph_handle) override;
|
||||
bool bind_input_tensors(const ggml_tensor_array_t &tensor_inputs) override;
|
||||
bool bind_output_tensors(const ggml_tensor_array_t &tensor_outputs) override;
|
||||
bool bind_input_tensors(const ggml_tensor_array_t & tensor_inputs) override;
|
||||
bool bind_output_tensors(const ggml_tensor_array_t & tensor_outputs) override;
|
||||
void unbind_input_tensors() override;
|
||||
void unbind_output_tensors() override;
|
||||
const qnn_tensor_array_t &get_input_tensors() override { return _tensor_inputs; }
|
||||
const qnn_tensor_array_t &get_output_tensors() override { return _tensor_outputs; }
|
||||
|
||||
protected:
|
||||
const qnn_tensor_array_t & get_input_tensors() override { return _tensor_inputs; }
|
||||
|
||||
const qnn_tensor_array_t & get_output_tensors() override { return _tensor_outputs; }
|
||||
|
||||
protected:
|
||||
Qnn_OpConfig_t get_op_config();
|
||||
|
||||
std::string _name;
|
||||
std::string _package_name;
|
||||
std::string _op_type;
|
||||
std::string _name;
|
||||
std::string _package_name;
|
||||
std::string _op_type;
|
||||
std::shared_ptr<qnn_instance> _qnn_instance;
|
||||
qnn_tensor_array_t _tensor_inputs;
|
||||
qnn_tensor_array_t _tensor_outputs;
|
||||
qnn_tensor_array_t _tensor_parameters;
|
||||
std::vector<Qnn_Tensor_t> _qnn_tensor_inputs;
|
||||
std::vector<Qnn_Tensor_t> _qnn_tensor_outputs;
|
||||
std::vector<Qnn_Param_t> _qnn_parameters;
|
||||
std::vector<std::string> _param_names;
|
||||
qnn_tensor_array_t _tensor_inputs;
|
||||
qnn_tensor_array_t _tensor_outputs;
|
||||
qnn_tensor_array_t _tensor_parameters;
|
||||
std::vector<Qnn_Tensor_t> _qnn_tensor_inputs;
|
||||
std::vector<Qnn_Tensor_t> _qnn_tensor_outputs;
|
||||
std::vector<Qnn_Param_t> _qnn_parameters;
|
||||
std::vector<std::string> _param_names;
|
||||
|
||||
DISABLE_COPY(ggml_qnn_op_config_base);
|
||||
DISABLE_MOVE(ggml_qnn_op_config_base);
|
||||
};
|
||||
|
||||
class ggml_qnn_single_op_config : public ggml_qnn_op_config_base {
|
||||
public:
|
||||
explicit ggml_qnn_single_op_config(const std::string &name, const std::string &package_name,
|
||||
const std::string &op_type, std::shared_ptr<qnn_instance> qnn_instance)
|
||||
: ggml_qnn_op_config_base(name, package_name, op_type, qnn_instance) {}
|
||||
public:
|
||||
explicit ggml_qnn_single_op_config(const std::string & name, const std::string & package_name,
|
||||
const std::string & op_type, std::shared_ptr<qnn_instance> qnn_instance) :
|
||||
ggml_qnn_op_config_base(name, package_name, op_type, qnn_instance) {}
|
||||
|
||||
bool initialize_op_nodes(QNNBackend device, Qnn_GraphHandle_t graph_handle) override;
|
||||
|
||||
private:
|
||||
private:
|
||||
DISABLE_COPY(ggml_qnn_single_op_config);
|
||||
DISABLE_MOVE(ggml_qnn_single_op_config);
|
||||
};
|
||||
|
||||
class ggml_qnn_rmsnorm_op_config : public ggml_qnn_op_config_base {
|
||||
public:
|
||||
explicit ggml_qnn_rmsnorm_op_config(const std::string &name, const std::string &package_name,
|
||||
const std::string &op_type, std::shared_ptr<qnn_instance> qnn_instance)
|
||||
: ggml_qnn_op_config_base(name, package_name, op_type, qnn_instance) {}
|
||||
public:
|
||||
explicit ggml_qnn_rmsnorm_op_config(const std::string & name, const std::string & package_name,
|
||||
const std::string & op_type, std::shared_ptr<qnn_instance> qnn_instance) :
|
||||
ggml_qnn_op_config_base(name, package_name, op_type, qnn_instance) {}
|
||||
|
||||
bool initialize_op_nodes(QNNBackend device, Qnn_GraphHandle_t graph_handle) override;
|
||||
|
||||
private:
|
||||
private:
|
||||
DISABLE_COPY(ggml_qnn_rmsnorm_op_config);
|
||||
DISABLE_MOVE(ggml_qnn_rmsnorm_op_config);
|
||||
};
|
||||
|
||||
class ggml_qnn_aggregate_op_config : public ggml_qnn_op_config {
|
||||
public:
|
||||
explicit ggml_qnn_aggregate_op_config(const std::string &name, std::shared_ptr<qnn_instance> qnn_instance)
|
||||
: _name(name), _qnn_instance(qnn_instance) {}
|
||||
public:
|
||||
explicit ggml_qnn_aggregate_op_config(const std::string & name, std::shared_ptr<qnn_instance> qnn_instance) :
|
||||
_name(name),
|
||||
_qnn_instance(qnn_instance) {}
|
||||
|
||||
~ggml_qnn_aggregate_op_config() {
|
||||
_tensor_inputs.clear();
|
||||
|
|
@ -91,61 +97,63 @@ public:
|
|||
_operations.clear();
|
||||
}
|
||||
|
||||
void set_input_tensors(qnn::qnn_tensor_array_t &tensor_inputs) override;
|
||||
void set_input_tensors(qnn::qnn_tensor_array_t &&tensor_inputs) override;
|
||||
void set_output_tensors(qnn::qnn_tensor_array_t &tensor_inputs) override;
|
||||
void set_output_tensors(qnn::qnn_tensor_array_t &&tensor_inputs) override;
|
||||
void set_input_tensors(qnn::qnn_tensor_array_t & tensor_inputs) override;
|
||||
void set_input_tensors(qnn::qnn_tensor_array_t && tensor_inputs) override;
|
||||
void set_output_tensors(qnn::qnn_tensor_array_t & tensor_inputs) override;
|
||||
void set_output_tensors(qnn::qnn_tensor_array_t && tensor_inputs) override;
|
||||
|
||||
bool add_op_to_graph(Qnn_GraphHandle_t graph_handle) override {
|
||||
return qnn::add_op_to_graph(graph_handle, _operations);
|
||||
}
|
||||
|
||||
bool bind_input_tensors(const ggml_tensor_array_t &tensor_inputs) override;
|
||||
bool bind_output_tensors(const ggml_tensor_array_t &tensor_outputs) override;
|
||||
bool bind_input_tensors(const ggml_tensor_array_t & tensor_inputs) override;
|
||||
bool bind_output_tensors(const ggml_tensor_array_t & tensor_outputs) override;
|
||||
|
||||
void unbind_input_tensors() override {
|
||||
for (auto &tensor : _tensor_inputs) {
|
||||
for (auto & tensor : _tensor_inputs) {
|
||||
tensor->unbind();
|
||||
}
|
||||
}
|
||||
|
||||
void unbind_output_tensors() override {
|
||||
for (auto &tensor : _tensor_outputs) {
|
||||
for (auto & tensor : _tensor_outputs) {
|
||||
tensor->unbind();
|
||||
}
|
||||
}
|
||||
|
||||
const qnn_tensor_array_t &get_input_tensors() override { return _tensor_inputs; }
|
||||
const qnn_tensor_array_t &get_output_tensors() override { return _tensor_outputs; }
|
||||
const qnn_tensor_array_t & get_input_tensors() override { return _tensor_inputs; }
|
||||
|
||||
protected:
|
||||
std::string _name;
|
||||
const qnn_tensor_array_t & get_output_tensors() override { return _tensor_outputs; }
|
||||
|
||||
protected:
|
||||
std::string _name;
|
||||
std::shared_ptr<qnn_instance> _qnn_instance;
|
||||
|
||||
std::vector<qnn_op_config_ptr_t> _operations;
|
||||
qnn_tensor_array_t _tensor_inputs;
|
||||
qnn_tensor_array_t _tensor_outputs;
|
||||
qnn_tensor_array_t _tensor_inputs;
|
||||
qnn_tensor_array_t _tensor_outputs;
|
||||
|
||||
private:
|
||||
private:
|
||||
DISABLE_COPY(ggml_qnn_aggregate_op_config);
|
||||
DISABLE_MOVE(ggml_qnn_aggregate_op_config);
|
||||
};
|
||||
|
||||
class ggml_qnn_matmul_op_config : public ggml_qnn_aggregate_op_config {
|
||||
public:
|
||||
ggml_qnn_matmul_op_config(const std::string &name, std::shared_ptr<qnn_instance> qnn_instance)
|
||||
: ggml_qnn_aggregate_op_config(name, qnn_instance) {}
|
||||
public:
|
||||
ggml_qnn_matmul_op_config(const std::string & name, std::shared_ptr<qnn_instance> qnn_instance) :
|
||||
ggml_qnn_aggregate_op_config(name, qnn_instance) {}
|
||||
|
||||
bool initialize_op_nodes(QNNBackend device, Qnn_GraphHandle_t graph_handle) override;
|
||||
|
||||
private:
|
||||
private:
|
||||
qnn_tensor_ptr_t create_gather_nodes(QNNBackend device, Qnn_GraphHandle_t graph_handle, const int rank,
|
||||
qnn_tensor_ptr_t tensor_input, qnn_dimension_array_t output_dimensions);
|
||||
bool create_convert_nodes(QNNBackend device, Qnn_GraphHandle_t graph_handle, const int rank,
|
||||
qnn_tensor_array_t &tensor_inputs, qnn_tensor_array_t &tensor_outputs);
|
||||
bool create_mat_mul_nodes(QNNBackend device, Qnn_GraphHandle_t graph_handle, const int rank,
|
||||
qnn_tensor_array_t &tensor_inputs, qnn_tensor_array_t &tensor_outputs);
|
||||
bool create_convert_nodes(QNNBackend device, Qnn_GraphHandle_t graph_handle, const int rank,
|
||||
qnn_tensor_array_t & tensor_inputs, qnn_tensor_array_t & tensor_outputs);
|
||||
bool create_mat_mul_nodes(qnn_tensor_array_t & tensor_inputs, qnn_tensor_array_t & tensor_outputs);
|
||||
|
||||
DISABLE_COPY(ggml_qnn_matmul_op_config);
|
||||
DISABLE_MOVE(ggml_qnn_matmul_op_config);
|
||||
};
|
||||
|
||||
} // namespace qnn
|
||||
} // namespace qnn
|
||||
|
|
|
|||
|
|
@ -14,14 +14,14 @@ namespace qnn {
|
|||
|
||||
constexpr const size_t kGgmlUnaryOpStart = GGML_OP_COUNT;
|
||||
|
||||
size_t get_qnn_op_index(const ggml_tensor *tensor);
|
||||
const char *get_qnn_op_name(const ggml_tensor *op);
|
||||
size_t get_qnn_op_input_param_count(const ggml_tensor *op);
|
||||
std::shared_ptr<ggml_qnn_op_config> create_op(const ggml_tensor *op, const std::string &name,
|
||||
size_t get_qnn_op_index(const ggml_tensor * tensor);
|
||||
const char * get_qnn_op_name(const ggml_tensor * op);
|
||||
size_t get_qnn_op_input_param_count(const ggml_tensor * op);
|
||||
std::shared_ptr<ggml_qnn_op_config> create_op(const ggml_tensor * op, const std::string & name,
|
||||
std::shared_ptr<qnn_instance> qnn_instance);
|
||||
|
||||
inline bool add_op_to_graph(Qnn_GraphHandle_t graph_handle, std::vector<qnn_op_config_ptr_t> &operations) {
|
||||
for (auto &op : operations) {
|
||||
inline bool add_op_to_graph(Qnn_GraphHandle_t graph_handle, std::vector<qnn_op_config_ptr_t> & operations) {
|
||||
for (auto & op : operations) {
|
||||
if (!op->add_op_to_graph(graph_handle)) {
|
||||
return false;
|
||||
}
|
||||
|
|
@ -30,4 +30,4 @@ inline bool add_op_to_graph(Qnn_GraphHandle_t graph_handle, std::vector<qnn_op_c
|
|||
return true;
|
||||
}
|
||||
|
||||
} // namespace qnn
|
||||
} // namespace qnn
|
||||
|
|
|
|||
|
|
@ -4,21 +4,21 @@
|
|||
#include <filesystem>
|
||||
|
||||
#if defined(__linux__)
|
||||
#include <unistd.h>
|
||||
# include <unistd.h>
|
||||
#endif
|
||||
|
||||
namespace {
|
||||
|
||||
#ifdef _WIN32
|
||||
constexpr const char *kQnnSystemLibName = "QnnSystem.dll";
|
||||
constexpr const char *kQnnRpcLibName = "libcdsprpc.dll";
|
||||
constexpr const char * kQnnSystemLibName = "QnnSystem.dll";
|
||||
constexpr const char * kQnnRpcLibName = "libcdsprpc.dll";
|
||||
#else
|
||||
constexpr const char *kQnnSystemLibName = "libQnnSystem.so";
|
||||
constexpr const char *kQnnRpcLibName = "libcdsprpc.so";
|
||||
constexpr const char * kQnnSystemLibName = "libQnnSystem.so";
|
||||
constexpr const char * kQnnRpcLibName = "libcdsprpc.so";
|
||||
|
||||
#endif
|
||||
|
||||
void insert_path(std::string &path, std::string insert_path, const char separator = ':') {
|
||||
void insert_path(std::string & path, std::string insert_path, const char separator = ':') {
|
||||
if (!insert_path.empty() && !path.empty()) {
|
||||
insert_path += separator;
|
||||
}
|
||||
|
|
@ -27,10 +27,10 @@ void insert_path(std::string &path, std::string insert_path, const char separato
|
|||
}
|
||||
|
||||
// TODO: Fix this for other platforms, or use a more portable way to set the library search path
|
||||
bool set_qnn_lib_search_path(const std::string &custom_lib_search_path) {
|
||||
bool set_qnn_lib_search_path(const std::string & custom_lib_search_path) {
|
||||
#if defined(__linux__)
|
||||
{
|
||||
auto *original = getenv("LD_LIBRARY_PATH");
|
||||
auto * original = getenv("LD_LIBRARY_PATH");
|
||||
std::string lib_search_path = original ? original : "";
|
||||
insert_path(lib_search_path,
|
||||
"/vendor/dsp/cdsp:/vendor/lib64:"
|
||||
|
|
@ -41,7 +41,7 @@ bool set_qnn_lib_search_path(const std::string &custom_lib_search_path) {
|
|||
}
|
||||
}
|
||||
|
||||
#if defined(__ANDROID__) || defined(ANDROID)
|
||||
# if defined(__ANDROID__) || defined(ANDROID)
|
||||
{
|
||||
// See also: https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-2/dsp_runtime.html
|
||||
std::string adsp_lib_search_path = custom_lib_search_path +
|
||||
|
|
@ -51,87 +51,89 @@ bool set_qnn_lib_search_path(const std::string &custom_lib_search_path) {
|
|||
return false;
|
||||
}
|
||||
|
||||
QNN_LOG_DEBUG("ADSP_LIBRARY_PATH=%s", getenv("ADSP_LIBRARY_PATH"));
|
||||
QNN_LOG_DEBUG("ADSP_LIBRARY_PATH=%s", getenv("ADSP_LIBRARY_PATH\n"));
|
||||
}
|
||||
#endif
|
||||
# endif
|
||||
|
||||
QNN_LOG_DEBUG("LD_LIBRARY_PATH=%s", getenv("LD_LIBRARY_PATH"));
|
||||
QNN_LOG_DEBUG("LD_LIBRARY_PATH=%s", getenv("LD_LIBRARY_PATH\n"));
|
||||
#else
|
||||
(void)custom_lib_search_path;
|
||||
(void) custom_lib_search_path;
|
||||
#endif
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
qnn::dl_handler_t load_lib_with_fallback(const std::string &lib_path, const std::string &load_directory) {
|
||||
qnn::dl_handler_t load_lib_with_fallback(const std::string & lib_path, const std::string & load_directory) {
|
||||
std::filesystem::path full_path(load_directory);
|
||||
full_path /= std::filesystem::path(lib_path).filename();
|
||||
auto handle = qnn::dl_load(full_path.string());
|
||||
if (!handle) {
|
||||
QNN_LOG_WARN("failed to load %s, fallback to %s", full_path.c_str(), lib_path.c_str());
|
||||
QNN_LOG_WARN("failed to load %s, fallback to %s\n", full_path.c_str(), lib_path.c_str());
|
||||
handle = qnn::dl_load(lib_path);
|
||||
}
|
||||
|
||||
return handle;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace
|
||||
|
||||
namespace qnn {
|
||||
|
||||
qnn_system_interface::qnn_system_interface(const QnnSystemInterface_t &qnn_sys_interface, dl_handler_t lib_handle)
|
||||
: _qnn_sys_interface(qnn_sys_interface), _lib_handle(lib_handle) {
|
||||
qnn_system_interface::qnn_system_interface(const QnnSystemInterface_t & qnn_sys_interface, dl_handler_t lib_handle) :
|
||||
_qnn_sys_interface(qnn_sys_interface),
|
||||
_lib_handle(lib_handle) {
|
||||
qnn_system_context_create(&_qnn_system_handle);
|
||||
if (_qnn_system_handle) {
|
||||
QNN_LOG_INFO("initialize qnn system successfully");
|
||||
QNN_LOG_INFO("initialize qnn system successfully\n");
|
||||
} else {
|
||||
QNN_LOG_WARN("can not create QNN system contenxt");
|
||||
QNN_LOG_WARN("can not create QNN system contenxt\n");
|
||||
}
|
||||
}
|
||||
|
||||
qnn_system_interface::~qnn_system_interface() {
|
||||
if (_qnn_system_handle) {
|
||||
if (qnn_system_context_free(_qnn_system_handle) != QNN_SUCCESS) {
|
||||
QNN_LOG_WARN("failed to free QNN system context");
|
||||
QNN_LOG_WARN("failed to free QNN system context\n");
|
||||
}
|
||||
} else {
|
||||
QNN_LOG_WARN("system handle is null");
|
||||
QNN_LOG_WARN("system handle is null\n");
|
||||
}
|
||||
|
||||
if (_lib_handle) {
|
||||
if (!dl_unload(_lib_handle)) {
|
||||
QNN_LOG_WARN("failed to close QnnSystem library, error %s", dl_error());
|
||||
QNN_LOG_WARN("failed to close QnnSystem library, error %s\n", dl_error());
|
||||
}
|
||||
} else {
|
||||
QNN_LOG_WARN("system lib handle is null");
|
||||
QNN_LOG_WARN("system lib handle is null\n");
|
||||
}
|
||||
}
|
||||
|
||||
qnn_instance::qnn_instance(const std::string &lib_path, const std::string &backend_lib_name)
|
||||
: _additional_lib_load_path(lib_path), _backend_lib_name(std::move(backend_lib_name)) {
|
||||
qnn_instance::qnn_instance(const std::string & lib_path, const std::string & backend_lib_name) :
|
||||
_additional_lib_load_path(lib_path),
|
||||
_backend_lib_name(std::move(backend_lib_name)) {
|
||||
if (set_qnn_lib_search_path(lib_path)) {
|
||||
QNN_LOG_DEBUG("[%s] set_qnn_lib_search_path succeed", _backend_lib_name.c_str());
|
||||
QNN_LOG_DEBUG("[%s] set_qnn_lib_search_path succeed\n", _backend_lib_name.c_str());
|
||||
} else {
|
||||
QNN_LOG_ERROR("[%s] set_qnn_lib_search_path failed", _backend_lib_name.c_str());
|
||||
QNN_LOG_ERROR("[%s] set_qnn_lib_search_path failed\n", _backend_lib_name.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
int qnn_instance::qnn_init(const QnnSaver_Config_t **saver_config) {
|
||||
int qnn_instance::qnn_init(const QnnSaver_Config_t ** saver_config) {
|
||||
BackendIdType backend_id = QNN_BACKEND_ID_NULL;
|
||||
QNN_LOG_DEBUG("enter qnn_init");
|
||||
QNN_LOG_DEBUG("enter qnn_init\n");
|
||||
|
||||
std::lock_guard<std::mutex> lock(_init_mutex);
|
||||
if (load_system() != 0) {
|
||||
QNN_LOG_WARN("failed to load QNN system lib");
|
||||
QNN_LOG_WARN("failed to load QNN system lib\n");
|
||||
return 1;
|
||||
} else {
|
||||
QNN_LOG_DEBUG("load QNN system lib successfully");
|
||||
QNN_LOG_DEBUG("load QNN system lib successfully\n");
|
||||
}
|
||||
|
||||
std::string backend_lib_path = _backend_lib_name;
|
||||
if (_lib_path_to_backend_id.count(backend_lib_path) == 0) {
|
||||
if (load_backend(backend_lib_path, saver_config) != 0) {
|
||||
QNN_LOG_WARN("failed to load QNN backend");
|
||||
QNN_LOG_WARN("failed to load QNN backend\n");
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
|
|
@ -149,119 +151,119 @@ int qnn_instance::qnn_init(const QnnSaver_Config_t **saver_config) {
|
|||
_qnn_interface->qnn_log_create(qnn::sdk_logcallback, _qnn_log_level, &_qnn_log_handle);
|
||||
if (!_qnn_log_handle) {
|
||||
// NPU backend not work on Qualcomm SoC equipped low-end phone
|
||||
QNN_LOG_WARN("why failed to initialize qnn log");
|
||||
QNN_LOG_WARN("why failed to initialize qnn log\n");
|
||||
return 4;
|
||||
} else {
|
||||
QNN_LOG_DEBUG("initialize qnn log successfully");
|
||||
QNN_LOG_DEBUG("initialize qnn log successfully\n");
|
||||
}
|
||||
|
||||
std::vector<const QnnBackend_Config_t *> temp_backend_config;
|
||||
_qnn_interface->qnn_backend_create(
|
||||
_qnn_log_handle, temp_backend_config.empty() ? nullptr : temp_backend_config.data(), &_qnn_backend_handle);
|
||||
if (!_qnn_backend_handle) {
|
||||
QNN_LOG_WARN("why failed to initialize qnn backend");
|
||||
QNN_LOG_WARN("why failed to initialize qnn backend\n");
|
||||
return 5;
|
||||
} else {
|
||||
QNN_LOG_DEBUG("initialize qnn backend successfully");
|
||||
QNN_LOG_DEBUG("initialize qnn backend successfully\n");
|
||||
}
|
||||
|
||||
auto qnn_status = _qnn_interface->qnn_property_has_capability(QNN_PROPERTY_GROUP_DEVICE);
|
||||
if (QNN_PROPERTY_NOT_SUPPORTED == qnn_status) {
|
||||
QNN_LOG_WARN("device property is not supported");
|
||||
QNN_LOG_WARN("device property is not supported\n");
|
||||
}
|
||||
if (QNN_PROPERTY_ERROR_UNKNOWN_KEY == qnn_status) {
|
||||
QNN_LOG_WARN("device property is not known to backend");
|
||||
QNN_LOG_WARN("device property is not known to backend\n");
|
||||
}
|
||||
|
||||
qnn_status = QNN_SUCCESS;
|
||||
if (_backend_lib_name.find("Htp") != _backend_lib_name.npos) {
|
||||
const QnnDevice_PlatformInfo_t *p_info = nullptr;
|
||||
qnn_status = _qnn_interface->qnn_device_get_platform_info(nullptr, &p_info);
|
||||
const QnnDevice_PlatformInfo_t * p_info = nullptr;
|
||||
qnn_status = _qnn_interface->qnn_device_get_platform_info(nullptr, &p_info);
|
||||
if (qnn_status == QNN_SUCCESS) {
|
||||
QNN_LOG_INFO("device counts %d", p_info->v1.numHwDevices);
|
||||
QnnDevice_HardwareDeviceInfo_t *infos = p_info->v1.hwDevices;
|
||||
QNN_LOG_INFO("device counts %d\n", p_info->v1.numHwDevices);
|
||||
QnnDevice_HardwareDeviceInfo_t * infos = p_info->v1.hwDevices;
|
||||
QnnHtpDevice_OnChipDeviceInfoExtension_t chipinfo = {};
|
||||
for (uint32_t i = 0; i < p_info->v1.numHwDevices; i++) {
|
||||
QNN_LOG_INFO("deviceID:%d, deviceType:%d, numCores %d", infos[i].v1.deviceId, infos[i].v1.deviceType,
|
||||
infos[i].v1.numCores);
|
||||
QNN_LOG_INFO("deviceID:%d, deviceType:%d, numCores %d\n", (int) infos[i].v1.deviceId,
|
||||
(int) infos[i].v1.deviceType, (int) infos[i].v1.numCores);
|
||||
QnnDevice_DeviceInfoExtension_t devinfo = infos[i].v1.deviceInfoExtension;
|
||||
chipinfo = devinfo->onChipDevice;
|
||||
size_t htp_arch = (size_t)chipinfo.arch;
|
||||
QNN_LOG_INFO("htp_type:%d(%s)", devinfo->devType,
|
||||
chipinfo = devinfo->onChipDevice;
|
||||
size_t htp_arch = (size_t) chipinfo.arch;
|
||||
QNN_LOG_INFO("htp_type:%d(%s)\n", devinfo->devType,
|
||||
(devinfo->devType == QNN_HTP_DEVICE_TYPE_ON_CHIP) ? "ON_CHIP" : "");
|
||||
QNN_LOG_INFO("qualcomm soc_model:%d(%s), htp_arch:%d(%s), vtcm_size:%d MB", chipinfo.socModel,
|
||||
qnn::get_chipset_desc(chipinfo.socModel), htp_arch, qnn::get_htparch_desc(htp_arch),
|
||||
chipinfo.vtcmSize);
|
||||
_soc_info = {chipinfo.socModel, htp_arch, chipinfo.vtcmSize};
|
||||
QNN_LOG_INFO("qualcomm soc_model:%d(%s), htp_arch:%d(%s), vtcm_size:%d MB\n", (int) chipinfo.socModel,
|
||||
qnn::get_chipset_desc(chipinfo.socModel), (int) htp_arch, qnn::get_htparch_desc(htp_arch),
|
||||
(int) chipinfo.vtcmSize);
|
||||
_soc_info = { chipinfo.socModel, htp_arch, chipinfo.vtcmSize };
|
||||
}
|
||||
_qnn_interface->qnn_device_free_platform_info(nullptr, p_info);
|
||||
} else {
|
||||
// For emulator, we can't get platform info
|
||||
QNN_LOG_WARN("failed to get platform info, are we in emulator?");
|
||||
_soc_info = {NONE, UNKNOWN_SM, 0};
|
||||
QNN_LOG_WARN("failed to get platform info, are we in emulator?\n");
|
||||
_soc_info = { NONE, UNKNOWN_SM, 0 };
|
||||
}
|
||||
|
||||
QnnHtpDevice_CustomConfig_t soc_customconfig;
|
||||
soc_customconfig.option = QNN_HTP_DEVICE_CONFIG_OPTION_SOC;
|
||||
soc_customconfig.option = QNN_HTP_DEVICE_CONFIG_OPTION_SOC;
|
||||
soc_customconfig.socModel = _soc_info.soc_model;
|
||||
QnnDevice_Config_t soc_devconfig;
|
||||
soc_devconfig.option = QNN_DEVICE_CONFIG_OPTION_CUSTOM;
|
||||
soc_devconfig.option = QNN_DEVICE_CONFIG_OPTION_CUSTOM;
|
||||
soc_devconfig.customConfig = &soc_customconfig;
|
||||
|
||||
QnnHtpDevice_CustomConfig_t arch_customconfig;
|
||||
arch_customconfig.option = QNN_HTP_DEVICE_CONFIG_OPTION_ARCH;
|
||||
arch_customconfig.arch.arch = (QnnHtpDevice_Arch_t)_soc_info.htp_arch;
|
||||
arch_customconfig.arch.deviceId = 0; // Id of device to be used. 0 will use by default.
|
||||
arch_customconfig.option = QNN_HTP_DEVICE_CONFIG_OPTION_ARCH;
|
||||
arch_customconfig.arch.arch = (QnnHtpDevice_Arch_t) _soc_info.htp_arch;
|
||||
arch_customconfig.arch.deviceId = 0; // Id of device to be used. 0 will use by default.
|
||||
QnnDevice_Config_t arch_devconfig;
|
||||
arch_devconfig.option = QNN_DEVICE_CONFIG_OPTION_CUSTOM;
|
||||
arch_devconfig.option = QNN_DEVICE_CONFIG_OPTION_CUSTOM;
|
||||
arch_devconfig.customConfig = &arch_customconfig;
|
||||
|
||||
const QnnDevice_Config_t *p_deviceconfig[] = {&soc_devconfig, &arch_devconfig, nullptr};
|
||||
const QnnDevice_Config_t * p_deviceconfig[] = { &soc_devconfig, &arch_devconfig, nullptr };
|
||||
qnn_status = _qnn_interface->qnn_device_create(_qnn_log_handle, p_deviceconfig, &_qnn_device_handle);
|
||||
} else {
|
||||
qnn_status = _qnn_interface->qnn_device_create(_qnn_log_handle, nullptr, &_qnn_device_handle);
|
||||
}
|
||||
if (QNN_SUCCESS != qnn_status && QNN_DEVICE_ERROR_UNSUPPORTED_FEATURE != qnn_status) {
|
||||
QNN_LOG_WARN("failed to create QNN device");
|
||||
QNN_LOG_WARN("failed to create QNN device\n");
|
||||
} else {
|
||||
QNN_LOG_INFO("create QNN device successfully");
|
||||
QNN_LOG_INFO("create QNN device successfully\n");
|
||||
}
|
||||
|
||||
if (_profile_level != sdk_profile_level::profile_off) {
|
||||
QNN_LOG_INFO("profiling turned on; level = %d", _profile_level);
|
||||
QNN_LOG_INFO("profiling turned on; level = %d\n", _profile_level);
|
||||
auto profile_level =
|
||||
_profile_level == sdk_profile_level::profile_detail ? QNN_PROFILE_LEVEL_DETAILED : QNN_PROFILE_LEVEL_BASIC;
|
||||
|
||||
if (QNN_PROFILE_NO_ERROR !=
|
||||
_qnn_interface->qnn_profile_create(_qnn_backend_handle, profile_level, &_qnn_profile_handle)) {
|
||||
QNN_LOG_WARN("unable to create profile handle in the backend");
|
||||
QNN_LOG_WARN("unable to create profile handle in the backend\n");
|
||||
return 6;
|
||||
} else {
|
||||
QNN_LOG_DEBUG("initialize qnn profile successfully");
|
||||
QNN_LOG_DEBUG("initialize qnn profile successfully\n");
|
||||
}
|
||||
}
|
||||
|
||||
_rpc_lib_handle = load_lib_with_fallback(kQnnRpcLibName, _additional_lib_load_path);
|
||||
if (_rpc_lib_handle) {
|
||||
_pfn_rpc_mem_alloc = reinterpret_cast<qnn::pfn_rpc_mem_alloc>(dl_sym(_rpc_lib_handle, "rpcmem_alloc"));
|
||||
_pfn_rpc_mem_free = reinterpret_cast<qnn::pfn_rpc_mem_free>(dl_sym(_rpc_lib_handle, "rpcmem_free"));
|
||||
_pfn_rpc_mem_free = reinterpret_cast<qnn::pfn_rpc_mem_free>(dl_sym(_rpc_lib_handle, "rpcmem_free"));
|
||||
_pfn_rpc_mem_to_fd = reinterpret_cast<qnn::pfn_rpc_mem_to_fd>(dl_sym(_rpc_lib_handle, "rpcmem_to_fd"));
|
||||
if (!_pfn_rpc_mem_alloc || !_pfn_rpc_mem_free || !_pfn_rpc_mem_to_fd) {
|
||||
QNN_LOG_WARN("unable to access symbols in QNN RPC lib. error: %s", dl_error());
|
||||
QNN_LOG_WARN("unable to access symbols in QNN RPC lib. error: %s\n", dl_error());
|
||||
dl_unload(_rpc_lib_handle);
|
||||
return 9;
|
||||
}
|
||||
|
||||
_pfn_rpc_mem_init = reinterpret_cast<qnn::pfn_rpc_mem_init>(dl_sym(_rpc_lib_handle, "rpcmem_init"));
|
||||
_pfn_rpc_mem_init = reinterpret_cast<qnn::pfn_rpc_mem_init>(dl_sym(_rpc_lib_handle, "rpcmem_init"));
|
||||
_pfn_rpc_mem_deinit = reinterpret_cast<qnn::pfn_rpc_mem_deinit>(dl_sym(_rpc_lib_handle, "rpcmem_deinit"));
|
||||
if (_pfn_rpc_mem_init) {
|
||||
_pfn_rpc_mem_init();
|
||||
}
|
||||
|
||||
_rpcmem_initialized = true;
|
||||
QNN_LOG_DEBUG("load rpcmem lib successfully");
|
||||
QNN_LOG_DEBUG("load rpcmem lib successfully\n");
|
||||
} else {
|
||||
QNN_LOG_WARN("failed to load qualcomm rpc lib, skipping, error:%s", dl_error());
|
||||
QNN_LOG_WARN("failed to load qualcomm rpc lib, skipping, error:%s\n", dl_error());
|
||||
}
|
||||
|
||||
/* TODO: not used, keep it for further usage
|
||||
|
|
@ -271,23 +273,23 @@ int qnn_instance::qnn_init(const QnnSaver_Config_t **saver_config) {
|
|||
*/
|
||||
_qnn_interface->qnn_context_create(_qnn_backend_handle, _qnn_device_handle, nullptr, &_qnn_context_handle);
|
||||
if (nullptr == _qnn_context_handle) {
|
||||
QNN_LOG_WARN("why failed to initialize qnn context");
|
||||
QNN_LOG_WARN("why failed to initialize qnn context\n");
|
||||
return 10;
|
||||
} else {
|
||||
QNN_LOG_DEBUG("initialize qnn context successfully");
|
||||
QNN_LOG_DEBUG("initialize qnn context successfully\n");
|
||||
}
|
||||
|
||||
if (_backend_lib_name.find("Htp") != _backend_lib_name.npos) {
|
||||
// TODO: faster approach to probe the accurate capacity of rpc ion memory
|
||||
size_t candidate_size = 0;
|
||||
uint8_t *rpc_buffer = nullptr;
|
||||
const int size_in_mb = (1 << 20);
|
||||
size_t probe_slots[] = {1024, 1536, 2048 - 48, 2048};
|
||||
size_t probe_counts = sizeof(probe_slots) / sizeof(size_t);
|
||||
size_t candidate_size = 0;
|
||||
uint8_t * rpc_buffer = nullptr;
|
||||
const int size_in_mb = (1 << 20);
|
||||
size_t probe_slots[] = { 1024, 1536, 2048 - 48, 2048 };
|
||||
size_t probe_counts = sizeof(probe_slots) / sizeof(size_t);
|
||||
for (size_t idx = 0; idx < probe_counts; idx++) {
|
||||
rpc_buffer = static_cast<uint8_t *>(alloc_rpcmem(probe_slots[idx] * size_in_mb, sizeof(void *)));
|
||||
if (!rpc_buffer) {
|
||||
QNN_LOG_DEBUG("alloc rpcmem %d (MB) failure, %s", probe_slots[idx], strerror(errno));
|
||||
QNN_LOG_DEBUG("alloc rpcmem %d (MB) failure, %s\n", (int) probe_slots[idx], strerror(errno));
|
||||
break;
|
||||
} else {
|
||||
candidate_size = probe_slots[idx];
|
||||
|
|
@ -297,27 +299,27 @@ int qnn_instance::qnn_init(const QnnSaver_Config_t **saver_config) {
|
|||
}
|
||||
|
||||
_rpcmem_capacity = std::max(candidate_size, _rpcmem_capacity);
|
||||
QNN_LOG_INFO("capacity of QNN rpc ion memory is about %d MB", _rpcmem_capacity);
|
||||
QNN_LOG_INFO("capacity of QNN rpc ion memory is about %d MB\n", (int) _rpcmem_capacity);
|
||||
|
||||
if (init_htp_perfinfra() != 0) {
|
||||
QNN_LOG_WARN("initialize HTP performance failure");
|
||||
QNN_LOG_WARN("initialize HTP performance failure\n");
|
||||
}
|
||||
if (set_rpc_polling() != 0) {
|
||||
QNN_LOG_WARN("set RPC polling failure");
|
||||
QNN_LOG_WARN("set RPC polling failure\n");
|
||||
}
|
||||
if (set_high_performance_mode() != 0) {
|
||||
QNN_LOG_WARN("set HTP high performance mode failure");
|
||||
QNN_LOG_WARN("set HTP high performance mode failure\n");
|
||||
}
|
||||
}
|
||||
|
||||
QNN_LOG_DEBUG("leave qnn_init");
|
||||
QNN_LOG_DEBUG("leave qnn_init\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int qnn_instance::qnn_finalize() {
|
||||
int ret_status = 0;
|
||||
Qnn_ErrorHandle_t error = QNN_SUCCESS;
|
||||
int ret_status = 0;
|
||||
Qnn_ErrorHandle_t error = QNN_SUCCESS;
|
||||
|
||||
if (_rpc_lib_handle) {
|
||||
if (_pfn_rpc_mem_deinit) {
|
||||
|
|
@ -326,9 +328,9 @@ int qnn_instance::qnn_finalize() {
|
|||
}
|
||||
|
||||
if (dl_unload(_rpc_lib_handle)) {
|
||||
QNN_LOG_DEBUG("succeed to close rpcmem lib");
|
||||
QNN_LOG_DEBUG("succeed to close rpcmem lib\n");
|
||||
} else {
|
||||
QNN_LOG_WARN("failed to unload qualcomm's rpc lib, error:%s", dl_error());
|
||||
QNN_LOG_WARN("failed to unload qualcomm's rpc lib, error:%s\n", dl_error());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -339,8 +341,8 @@ int qnn_instance::qnn_finalize() {
|
|||
if (_qnn_context_handle) {
|
||||
error = _qnn_interface->qnn_context_free(_qnn_context_handle, _qnn_profile_handle);
|
||||
if (error != QNN_SUCCESS) {
|
||||
QNN_LOG_WARN("failed to free QNN context_handle: ID %u, error %d", _qnn_interface->get_backend_id(),
|
||||
QNN_GET_ERROR_CODE(error));
|
||||
QNN_LOG_WARN("failed to free QNN context_handle: ID %u, error %d\n", _qnn_interface->get_backend_id(),
|
||||
(int) QNN_GET_ERROR_CODE(error));
|
||||
}
|
||||
_qnn_context_handle = nullptr;
|
||||
}
|
||||
|
|
@ -348,8 +350,8 @@ int qnn_instance::qnn_finalize() {
|
|||
if (_qnn_profile_handle) {
|
||||
error = _qnn_interface->qnn_profile_free(_qnn_profile_handle);
|
||||
if (error != QNN_SUCCESS) {
|
||||
QNN_LOG_WARN("failed to free QNN profile_handle: ID %u, error %d", _qnn_interface->get_backend_id(),
|
||||
QNN_GET_ERROR_CODE(error));
|
||||
QNN_LOG_WARN("failed to free QNN profile_handle: ID %u, error %d\n", _qnn_interface->get_backend_id(),
|
||||
(int) QNN_GET_ERROR_CODE(error));
|
||||
}
|
||||
_qnn_profile_handle = nullptr;
|
||||
}
|
||||
|
|
@ -357,8 +359,8 @@ int qnn_instance::qnn_finalize() {
|
|||
if (_qnn_device_handle) {
|
||||
error = _qnn_interface->qnn_device_free(_qnn_device_handle);
|
||||
if (error != QNN_SUCCESS) {
|
||||
QNN_LOG_WARN("failed to free QNN device_handle: ID %u, error %d", _qnn_interface->get_backend_id(),
|
||||
QNN_GET_ERROR_CODE(error));
|
||||
QNN_LOG_WARN("failed to free QNN device_handle: ID %u, error %d\n", _qnn_interface->get_backend_id(),
|
||||
(int) QNN_GET_ERROR_CODE(error));
|
||||
}
|
||||
_qnn_device_handle = nullptr;
|
||||
}
|
||||
|
|
@ -366,17 +368,17 @@ int qnn_instance::qnn_finalize() {
|
|||
if (_qnn_backend_handle) {
|
||||
error = _qnn_interface->qnn_backend_free(_qnn_backend_handle);
|
||||
if (error != QNN_SUCCESS) {
|
||||
QNN_LOG_WARN("failed to free QNN backend_handle: ID %u, error %d", _qnn_interface->get_backend_id(),
|
||||
QNN_GET_ERROR_CODE(error));
|
||||
QNN_LOG_WARN("failed to free QNN backend_handle: ID %u, error %d\n", _qnn_interface->get_backend_id(),
|
||||
(int) QNN_GET_ERROR_CODE(error));
|
||||
}
|
||||
_qnn_backend_handle = nullptr;
|
||||
}
|
||||
|
||||
if (nullptr != _qnn_log_handle) {
|
||||
if (_qnn_log_handle) {
|
||||
error = _qnn_interface->qnn_log_free(_qnn_log_handle);
|
||||
if (error != QNN_SUCCESS) {
|
||||
QNN_LOG_WARN("failed to free QNN log_handle: ID %u, error %d", _qnn_interface->get_backend_id(),
|
||||
QNN_GET_ERROR_CODE(error));
|
||||
QNN_LOG_WARN("failed to free QNN log_handle: ID %u, error %d\n", _qnn_interface->get_backend_id(),
|
||||
(int) QNN_GET_ERROR_CODE(error));
|
||||
}
|
||||
_qnn_log_handle = nullptr;
|
||||
}
|
||||
|
|
@ -389,60 +391,60 @@ int qnn_instance::qnn_finalize() {
|
|||
}
|
||||
|
||||
int qnn_instance::load_system() {
|
||||
QNN_LOG_DEBUG("[%s]lib: %s", _backend_lib_name.c_str(), kQnnSystemLibName);
|
||||
QNN_LOG_DEBUG("[%s]lib: %s\n", _backend_lib_name.c_str(), kQnnSystemLibName);
|
||||
auto system_lib_handle = load_lib_with_fallback(kQnnSystemLibName, _additional_lib_load_path);
|
||||
if (!system_lib_handle) {
|
||||
QNN_LOG_WARN("can not load QNN library %s, error: %s", kQnnSystemLibName, dl_error());
|
||||
QNN_LOG_WARN("can not load QNN library %s, error: %s\n", kQnnSystemLibName, dl_error());
|
||||
return 1;
|
||||
}
|
||||
|
||||
auto *get_providers =
|
||||
auto * get_providers =
|
||||
dl_sym_typed<qnn::pfn_qnnsysteminterface_getproviders *>(system_lib_handle, "QnnSystemInterface_getProviders");
|
||||
if (!get_providers) {
|
||||
QNN_LOG_WARN("can not load QNN symbol QnnSystemInterface_getProviders: %s", dl_error());
|
||||
QNN_LOG_WARN("can not load QNN symbol QnnSystemInterface_getProviders: %s\n", dl_error());
|
||||
return 2;
|
||||
}
|
||||
|
||||
uint32_t num_providers = 0;
|
||||
const QnnSystemInterface_t **provider_list = nullptr;
|
||||
Qnn_ErrorHandle_t error = get_providers(&provider_list, &num_providers);
|
||||
uint32_t num_providers = 0;
|
||||
const QnnSystemInterface_t ** provider_list = nullptr;
|
||||
Qnn_ErrorHandle_t error = get_providers(&provider_list, &num_providers);
|
||||
if (error != QNN_SUCCESS) {
|
||||
QNN_LOG_WARN("failed to get providers, error %d", QNN_GET_ERROR_CODE(error));
|
||||
QNN_LOG_WARN("failed to get providers, error %d\n", (int) QNN_GET_ERROR_CODE(error));
|
||||
return 3;
|
||||
}
|
||||
|
||||
QNN_LOG_DEBUG("num_providers: %d", num_providers);
|
||||
QNN_LOG_DEBUG("num_providers: %d\n", num_providers);
|
||||
if (num_providers != _required_num_providers) {
|
||||
QNN_LOG_WARN("providers is %d instead of required %d", num_providers, _required_num_providers);
|
||||
QNN_LOG_WARN("providers is %d instead of required %d\n", (int) num_providers, (int) _required_num_providers);
|
||||
return 4;
|
||||
}
|
||||
|
||||
if (!provider_list) {
|
||||
QNN_LOG_WARN("can not get providers");
|
||||
QNN_LOG_WARN("can not get providers\n");
|
||||
return 5;
|
||||
}
|
||||
|
||||
QNN_SYSTEM_INTERFACE_VER_TYPE qnn_system_interface;
|
||||
bool found_valid_system_interface = false;
|
||||
bool found_valid_system_interface = false;
|
||||
for (size_t idx = 0; idx < num_providers; idx++) {
|
||||
if (QNN_SYSTEM_API_VERSION_MAJOR == provider_list[idx]->systemApiVersion.major &&
|
||||
QNN_SYSTEM_API_VERSION_MINOR <= provider_list[idx]->systemApiVersion.minor) {
|
||||
found_valid_system_interface = true;
|
||||
qnn_system_interface = provider_list[idx]->QNN_SYSTEM_INTERFACE_VER_NAME;
|
||||
qnn_system_interface = provider_list[idx]->QNN_SYSTEM_INTERFACE_VER_NAME;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!found_valid_system_interface) {
|
||||
QNN_LOG_WARN("unable to find a valid qnn system interface");
|
||||
QNN_LOG_WARN("unable to find a valid qnn system interface\n");
|
||||
return 6;
|
||||
} else {
|
||||
QNN_LOG_DEBUG("find a valid qnn system interface");
|
||||
QNN_LOG_DEBUG("find a valid qnn system interface\n");
|
||||
}
|
||||
|
||||
auto qnn_sys_interface = std::make_shared<qnn::qnn_system_interface>(*provider_list[0], system_lib_handle);
|
||||
if (!qnn_sys_interface->is_valid()) {
|
||||
QNN_LOG_WARN("failed to create QNN system interface");
|
||||
QNN_LOG_WARN("failed to create QNN system interface\n");
|
||||
return 7;
|
||||
}
|
||||
|
||||
|
|
@ -450,79 +452,79 @@ int qnn_instance::load_system() {
|
|||
return 0;
|
||||
}
|
||||
|
||||
int qnn_instance::load_backend(std::string &lib_path, const QnnSaver_Config_t ** /*saver_config*/) {
|
||||
int qnn_instance::load_backend(std::string & lib_path, const QnnSaver_Config_t ** /*saver_config*/) {
|
||||
Qnn_ErrorHandle_t error = QNN_SUCCESS;
|
||||
QNN_LOG_DEBUG("lib_path:%s", lib_path.c_str());
|
||||
QNN_LOG_DEBUG("lib_path:%s\n", lib_path.c_str());
|
||||
|
||||
auto lib_handle = load_lib_with_fallback(lib_path, _additional_lib_load_path);
|
||||
if (!lib_handle) {
|
||||
QNN_LOG_WARN("can not open QNN library %s, with error: %s", lib_path.c_str(), dl_error());
|
||||
QNN_LOG_WARN("can not open QNN library %s, with error: %s\n", lib_path.c_str(), dl_error());
|
||||
return 1;
|
||||
}
|
||||
|
||||
auto get_providers = dl_sym_typed<qnn::pfn_qnninterface_getproviders *>(lib_handle, "QnnInterface_getProviders");
|
||||
if (!get_providers) {
|
||||
QNN_LOG_WARN("can not load symbol QnnInterface_getProviders : %s", dl_error());
|
||||
QNN_LOG_WARN("can not load symbol QnnInterface_getProviders : %s\n", dl_error());
|
||||
return 2;
|
||||
}
|
||||
|
||||
std::uint32_t num_providers = 0;
|
||||
const QnnInterface_t **provider_list = nullptr;
|
||||
error = get_providers(&provider_list, &num_providers);
|
||||
std::uint32_t num_providers = 0;
|
||||
const QnnInterface_t ** provider_list = nullptr;
|
||||
error = get_providers(&provider_list, &num_providers);
|
||||
if (error != QNN_SUCCESS) {
|
||||
QNN_LOG_WARN("failed to get providers, error %d", QNN_GET_ERROR_CODE(error));
|
||||
QNN_LOG_WARN("failed to get providers, error %d\n", (int) QNN_GET_ERROR_CODE(error));
|
||||
return 3;
|
||||
}
|
||||
QNN_LOG_DEBUG("num_providers=%d", num_providers);
|
||||
QNN_LOG_DEBUG("num_providers=%d\n", num_providers);
|
||||
if (num_providers != _required_num_providers) {
|
||||
QNN_LOG_WARN("providers is %d instead of required %d", num_providers, _required_num_providers);
|
||||
QNN_LOG_WARN("providers is %d instead of required %d\n", num_providers, _required_num_providers);
|
||||
return 4;
|
||||
}
|
||||
|
||||
if (!provider_list) {
|
||||
QNN_LOG_WARN("failed to get qnn interface providers");
|
||||
QNN_LOG_WARN("failed to get qnn interface providers\n");
|
||||
return 5;
|
||||
}
|
||||
bool found_valid_interface = false;
|
||||
bool found_valid_interface = false;
|
||||
QNN_INTERFACE_VER_TYPE qnn_interface;
|
||||
for (size_t idx = 0; idx < num_providers; idx++) {
|
||||
if (QNN_API_VERSION_MAJOR == provider_list[idx]->apiVersion.coreApiVersion.major &&
|
||||
QNN_API_VERSION_MINOR <= provider_list[idx]->apiVersion.coreApiVersion.minor) {
|
||||
found_valid_interface = true;
|
||||
qnn_interface = provider_list[idx]->QNN_INTERFACE_VER_NAME;
|
||||
qnn_interface = provider_list[idx]->QNN_INTERFACE_VER_NAME;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!found_valid_interface) {
|
||||
QNN_LOG_WARN("unable to find a valid qnn interface");
|
||||
QNN_LOG_WARN("unable to find a valid qnn interface\n");
|
||||
return 6;
|
||||
} else {
|
||||
QNN_LOG_DEBUG("find a valid qnn interface");
|
||||
QNN_LOG_DEBUG("find a valid qnn interface\n");
|
||||
}
|
||||
|
||||
BackendIdType backend_id = provider_list[0]->backendId;
|
||||
BackendIdType backend_id = provider_list[0]->backendId;
|
||||
_lib_path_to_backend_id[lib_path] = backend_id;
|
||||
if (_loaded_backend.count(backend_id) > 0) {
|
||||
QNN_LOG_WARN("lib_path %s is loaded, but backend %d already exists", lib_path.c_str(), backend_id);
|
||||
QNN_LOG_WARN("lib_path %s is loaded, but backend %d already exists\n", lib_path.c_str(), backend_id);
|
||||
}
|
||||
_loaded_backend[backend_id] = provider_list[0];
|
||||
if (_loaded_lib_handle.count(backend_id) > 0) {
|
||||
QNN_LOG_WARN("closing %p", _loaded_lib_handle[backend_id]);
|
||||
QNN_LOG_WARN("closing %p\n", _loaded_lib_handle[backend_id]);
|
||||
if (!dl_unload(_loaded_lib_handle[backend_id])) {
|
||||
QNN_LOG_WARN("fail to close %p with error %s", _loaded_lib_handle[backend_id], dl_error());
|
||||
QNN_LOG_WARN("fail to close %p with error %s\n", _loaded_lib_handle[backend_id], dl_error());
|
||||
}
|
||||
}
|
||||
_loaded_lib_handle[backend_id] = lib_handle;
|
||||
_backend_id = backend_id;
|
||||
_backend_id = backend_id;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int qnn_instance::unload_backend() {
|
||||
for (auto &it : _loaded_lib_handle) {
|
||||
for (auto & it : _loaded_lib_handle) {
|
||||
if (!dl_unload(it.second)) {
|
||||
QNN_LOG_WARN("failed to close QNN backend %d, error %s", it.first, dl_error());
|
||||
QNN_LOG_WARN("failed to close QNN backend %d, error %s\n", it.first, dl_error());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -533,4 +535,4 @@ int qnn_instance::unload_backend() {
|
|||
return 0;
|
||||
}
|
||||
|
||||
} // namespace qnn
|
||||
} // namespace qnn
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@
|
|||
#include <QnnTypes.h>
|
||||
#include <System/QnnSystemInterface.h>
|
||||
|
||||
#include "dl_loader.hpp"
|
||||
#include "dl-loader.hpp"
|
||||
#include "qnn-types.hpp"
|
||||
#include "utils.hpp"
|
||||
|
||||
|
|
@ -42,16 +42,15 @@ namespace qnn {
|
|||
#pragma GCC diagnostic ignored "-Wpedantic"
|
||||
|
||||
class qnn_system_interface {
|
||||
|
||||
#define DEFINE_SHIM_FUNCTION_SYS_INTERFACE(F, pointer_name) \
|
||||
template <typename... Args> \
|
||||
inline auto qnn_##F(Args... args) const { \
|
||||
template <typename... Args> inline auto qnn_##F(Args... args) const { \
|
||||
return (_qnn_sys_interface.QNN_SYSTEM_INTERFACE_VER_NAME.pointer_name)(std::forward<Args>(args)...); \
|
||||
}
|
||||
|
||||
public:
|
||||
qnn_system_interface(const QnnSystemInterface_t &qnn_sys_interface, dl_handler_t lib_handle);
|
||||
public:
|
||||
qnn_system_interface(const QnnSystemInterface_t & qnn_sys_interface, dl_handler_t lib_handle);
|
||||
~qnn_system_interface();
|
||||
|
||||
bool is_valid() const { return _qnn_system_handle != nullptr; }
|
||||
|
||||
// QnnSystem
|
||||
|
|
@ -61,27 +60,25 @@ public:
|
|||
|
||||
DEFINE_SHIM_FUNCTION_SYS_INTERFACE(system_context_free, systemContextFree);
|
||||
|
||||
private:
|
||||
private:
|
||||
qnn_system_interface(const qnn_system_interface &) = delete;
|
||||
void operator=(const qnn_system_interface &) = delete;
|
||||
qnn_system_interface(qnn_system_interface &&) = delete;
|
||||
void operator=(qnn_system_interface &&) = delete;
|
||||
void operator=(const qnn_system_interface &) = delete;
|
||||
qnn_system_interface(qnn_system_interface &&) = delete;
|
||||
void operator=(qnn_system_interface &&) = delete;
|
||||
|
||||
const QnnSystemInterface_t _qnn_sys_interface = {};
|
||||
dl_handler_t _lib_handle = nullptr;
|
||||
QnnSystemContext_Handle_t _qnn_system_handle = nullptr;
|
||||
dl_handler_t _lib_handle = nullptr;
|
||||
QnnSystemContext_Handle_t _qnn_system_handle = nullptr;
|
||||
};
|
||||
|
||||
class qnn_interface {
|
||||
|
||||
#define DEFINE_SHIM_FUNCTION_INTERFACE(F, pointer_name) \
|
||||
template <typename... Args> \
|
||||
inline auto qnn_##F(Args... args) const { \
|
||||
template <typename... Args> inline auto qnn_##F(Args... args) const { \
|
||||
return (_qnn_interface.QNN_INTERFACE_VER_NAME.pointer_name)(std::forward<Args>(args)...); \
|
||||
}
|
||||
|
||||
public:
|
||||
qnn_interface(const QnnInterface_t &qnn_interface) : _qnn_interface(qnn_interface) {}
|
||||
public:
|
||||
qnn_interface(const QnnInterface_t & qnn_interface) : _qnn_interface(qnn_interface) {}
|
||||
|
||||
// QnnBackend
|
||||
DEFINE_SHIM_FUNCTION_INTERFACE(backend_create, backendCreate);
|
||||
|
|
@ -161,11 +158,11 @@ public:
|
|||
|
||||
uint32_t get_backend_id() const { return _qnn_interface.backendId; }
|
||||
|
||||
private:
|
||||
qnn_interface(const qnn_interface &) = delete;
|
||||
private:
|
||||
qnn_interface(const qnn_interface &) = delete;
|
||||
void operator=(const qnn_interface &) = delete;
|
||||
qnn_interface(qnn_interface &&) = delete;
|
||||
void operator=(qnn_interface &&) = delete;
|
||||
qnn_interface(qnn_interface &&) = delete;
|
||||
void operator=(qnn_interface &&) = delete;
|
||||
|
||||
const QnnInterface_t _qnn_interface = {};
|
||||
};
|
||||
|
|
@ -173,17 +170,19 @@ private:
|
|||
#pragma GCC diagnostic pop
|
||||
|
||||
class qnn_instance {
|
||||
public:
|
||||
public:
|
||||
using BackendIdType = decltype(QnnInterface_t{}.backendId);
|
||||
|
||||
explicit qnn_instance(const std::string &lib_path, const std::string &backend_lib_name);
|
||||
explicit qnn_instance(const std::string & lib_path, const std::string & backend_lib_name);
|
||||
|
||||
~qnn_instance() {}
|
||||
int qnn_init(const QnnSaver_Config_t **saver_config);
|
||||
|
||||
int qnn_init(const QnnSaver_Config_t ** saver_config);
|
||||
int qnn_finalize();
|
||||
|
||||
std::shared_ptr<qnn_interface> get_qnn_interface() {
|
||||
if (!_qnn_interface) {
|
||||
QNN_LOG_WARN("pls check why _qnn_interface is not loaded");
|
||||
QNN_LOG_WARN("pls check why _qnn_interface is not loaded\n");
|
||||
}
|
||||
return _qnn_interface;
|
||||
}
|
||||
|
|
@ -202,26 +201,26 @@ public:
|
|||
|
||||
int init_htp_perfinfra() {
|
||||
QnnDevice_Infrastructure_t device_infra = nullptr;
|
||||
auto error = _qnn_interface->qnn_device_get_infrastructure(&device_infra);
|
||||
auto error = _qnn_interface->qnn_device_get_infrastructure(&device_infra);
|
||||
if (error != QNN_SUCCESS) {
|
||||
QNN_LOG_WARN("failed to get qnn device infra");
|
||||
QNN_LOG_WARN("failed to get qnn device infra\n");
|
||||
return 1;
|
||||
} else {
|
||||
QNN_LOG_INFO("HTP backend perf_infrastructure creation ok");
|
||||
QNN_LOG_INFO("HTP backend perf_infrastructure creation ok\n");
|
||||
}
|
||||
|
||||
QnnHtpDevice_Infrastructure_t *htp_infra = static_cast<QnnHtpDevice_Infrastructure_t *>(device_infra);
|
||||
QnnHtpDevice_PerfInfrastructure_t *htp_perfinfra = &htp_infra->perfInfra;
|
||||
uint32_t power_configid = 1;
|
||||
uint32_t device_id = 0;
|
||||
uint32_t core_id = 0;
|
||||
QnnHtpDevice_Infrastructure_t * htp_infra = static_cast<QnnHtpDevice_Infrastructure_t *>(device_infra);
|
||||
QnnHtpDevice_PerfInfrastructure_t * htp_perfinfra = &htp_infra->perfInfra;
|
||||
uint32_t power_configid = 1;
|
||||
uint32_t device_id = 0;
|
||||
uint32_t core_id = 0;
|
||||
htp_perfinfra->createPowerConfigId(device_id, core_id, &power_configid);
|
||||
if (htp_infra->infraType != QNN_HTP_DEVICE_INFRASTRUCTURE_TYPE_PERF) {
|
||||
QNN_LOG_INFO("HTP infra type = %d, which is not perf infra type", htp_infra->infraType);
|
||||
QNN_LOG_INFO("HTP infra type = %d, which is not perf infra type\n", htp_infra->infraType);
|
||||
} else {
|
||||
QNN_LOG_INFO("HTP infra type = %d, which is perf infra type", htp_infra->infraType);
|
||||
QNN_LOG_INFO("HTP infra type = %d, which is perf infra type\n", htp_infra->infraType);
|
||||
}
|
||||
_qnn_htp_perfinfra = htp_perfinfra;
|
||||
_qnn_htp_perfinfra = htp_perfinfra;
|
||||
_qnn_power_configid = power_configid;
|
||||
|
||||
return 0;
|
||||
|
|
@ -231,7 +230,7 @@ public:
|
|||
if (_qnn_htp_perfinfra) {
|
||||
QnnHtpPerfInfrastructure_PowerConfig_t rpc_polling_time;
|
||||
memset(&rpc_polling_time, 0, sizeof(rpc_polling_time));
|
||||
rpc_polling_time.option = QNN_HTP_PERF_INFRASTRUCTURE_POWER_CONFIGOPTION_RPC_POLLING_TIME;
|
||||
rpc_polling_time.option = QNN_HTP_PERF_INFRASTRUCTURE_POWER_CONFIGOPTION_RPC_POLLING_TIME;
|
||||
// use rpc polling time recommended 0-10000 us
|
||||
rpc_polling_time.rpcPollingTimeConfig = 9999;
|
||||
|
||||
|
|
@ -241,16 +240,16 @@ public:
|
|||
// use rpc control latency recommended 100 us, refer hexagon sdk
|
||||
rpc_control_latency.rpcControlLatencyConfig = 100;
|
||||
|
||||
const QnnHtpPerfInfrastructure_PowerConfig_t *power_configs[] = {&rpc_polling_time, &rpc_control_latency,
|
||||
nullptr};
|
||||
const QnnHtpPerfInfrastructure_PowerConfig_t * power_configs[] = { &rpc_polling_time, &rpc_control_latency,
|
||||
nullptr };
|
||||
Qnn_ErrorHandle_t qnn_status = _qnn_htp_perfinfra->setPowerConfig(_qnn_power_configid, power_configs);
|
||||
if (qnn_status != QNN_SUCCESS) {
|
||||
QNN_LOG_WARN("set htp perf failed");
|
||||
QNN_LOG_WARN("set htp perf failed\n");
|
||||
} else {
|
||||
QNN_LOG_DEBUG("set htp perf ok");
|
||||
QNN_LOG_DEBUG("set htp perf ok\n");
|
||||
}
|
||||
} else {
|
||||
QNN_LOG_WARN("can't set htp perf");
|
||||
QNN_LOG_WARN("can't set htp perf\n");
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
|
@ -258,7 +257,7 @@ public:
|
|||
|
||||
int set_high_performance_mode() {
|
||||
if (nullptr == _qnn_htp_perfinfra) {
|
||||
QNN_LOG_WARN("perf intra is null");
|
||||
QNN_LOG_WARN("perf intra is null\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
|
@ -266,83 +265,83 @@ public:
|
|||
memset(&power_config, 0, sizeof(power_config));
|
||||
power_config.option = QNN_HTP_PERF_INFRASTRUCTURE_POWER_CONFIGOPTION_DCVS_V3;
|
||||
|
||||
power_config.dcvsV3Config.setDcvsEnable = 1;
|
||||
power_config.dcvsV3Config.dcvsEnable = 0;
|
||||
power_config.dcvsV3Config.contextId = _qnn_power_configid;
|
||||
power_config.dcvsV3Config.powerMode = QNN_HTP_PERF_INFRASTRUCTURE_POWERMODE_PERFORMANCE_MODE;
|
||||
power_config.dcvsV3Config.setSleepLatency = 1; // true to consider Latency parameter otherwise false
|
||||
power_config.dcvsV3Config.sleepLatency = 40;
|
||||
power_config.dcvsV3Config.setBusParams = 1; // true to consider Bus parameter otherwise false
|
||||
power_config.dcvsV3Config.setCoreParams = 1; // true to consider Core parameter otherwise false
|
||||
power_config.dcvsV3Config.sleepDisable = 1; // true to consider sleep/LPM modes, false to enable
|
||||
power_config.dcvsV3Config.setDcvsEnable = 1;
|
||||
power_config.dcvsV3Config.dcvsEnable = 0;
|
||||
power_config.dcvsV3Config.contextId = _qnn_power_configid;
|
||||
power_config.dcvsV3Config.powerMode = QNN_HTP_PERF_INFRASTRUCTURE_POWERMODE_PERFORMANCE_MODE;
|
||||
power_config.dcvsV3Config.setSleepLatency = 1; // true to consider Latency parameter otherwise false
|
||||
power_config.dcvsV3Config.sleepLatency = 40;
|
||||
power_config.dcvsV3Config.setBusParams = 1; // true to consider Bus parameter otherwise false
|
||||
power_config.dcvsV3Config.setCoreParams = 1; // true to consider Core parameter otherwise false
|
||||
power_config.dcvsV3Config.sleepDisable = 1; // true to consider sleep/LPM modes, false to enable
|
||||
power_config.dcvsV3Config.setSleepDisable =
|
||||
1; // true to consider sleep disable/enable parameter otherwise false set sleep latency parameter
|
||||
1; // true to consider sleep disable/enable parameter otherwise false set sleep latency parameter
|
||||
// set Bus Clock Parameters
|
||||
power_config.dcvsV3Config.busVoltageCornerMin = DCVS_VOLTAGE_VCORNER_MAX_VOLTAGE_CORNER;
|
||||
power_config.dcvsV3Config.busVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_MAX_VOLTAGE_CORNER;
|
||||
power_config.dcvsV3Config.busVoltageCornerMax = DCVS_VOLTAGE_VCORNER_MAX_VOLTAGE_CORNER;
|
||||
power_config.dcvsV3Config.busVoltageCornerMin = DCVS_VOLTAGE_VCORNER_MAX_VOLTAGE_CORNER;
|
||||
power_config.dcvsV3Config.busVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_MAX_VOLTAGE_CORNER;
|
||||
power_config.dcvsV3Config.busVoltageCornerMax = DCVS_VOLTAGE_VCORNER_MAX_VOLTAGE_CORNER;
|
||||
// set Core Clock Parameters
|
||||
power_config.dcvsV3Config.coreVoltageCornerMin = DCVS_VOLTAGE_VCORNER_MAX_VOLTAGE_CORNER;
|
||||
power_config.dcvsV3Config.coreVoltageCornerMin = DCVS_VOLTAGE_VCORNER_MAX_VOLTAGE_CORNER;
|
||||
power_config.dcvsV3Config.coreVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_MAX_VOLTAGE_CORNER;
|
||||
power_config.dcvsV3Config.coreVoltageCornerMax = DCVS_VOLTAGE_VCORNER_MAX_VOLTAGE_CORNER;
|
||||
power_config.dcvsV3Config.coreVoltageCornerMax = DCVS_VOLTAGE_VCORNER_MAX_VOLTAGE_CORNER;
|
||||
|
||||
// set power config with different performance parameters
|
||||
const QnnHtpPerfInfrastructure_PowerConfig_t *power_configs[] = {&power_config, nullptr};
|
||||
Qnn_ErrorHandle_t qnn_status = QNN_SUCCESS;
|
||||
const QnnHtpPerfInfrastructure_PowerConfig_t * power_configs[] = { &power_config, nullptr };
|
||||
Qnn_ErrorHandle_t qnn_status = QNN_SUCCESS;
|
||||
qnn_status = _qnn_htp_perfinfra->setPowerConfig(_qnn_power_configid, power_configs);
|
||||
if (qnn_status != QNN_SUCCESS) {
|
||||
QNN_LOG_WARN("set htp high performance mode failed");
|
||||
QNN_LOG_WARN("set htp high performance mode failed\n");
|
||||
} else {
|
||||
QNN_LOG_DEBUG("set htp high performance mode ok");
|
||||
QNN_LOG_DEBUG("set htp high performance mode ok\n");
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
std::string &get_qnn_graph_name() { return _graph_name; }
|
||||
std::string & get_qnn_graph_name() { return _graph_name; }
|
||||
|
||||
bool is_rpcmem_initialized() { return _rpcmem_initialized; }
|
||||
|
||||
size_t get_rpcmem_capacity() { return _rpcmem_capacity; }
|
||||
|
||||
void *alloc_rpcmem(size_t bytes, size_t alignment) {
|
||||
void * alloc_rpcmem(size_t bytes, size_t alignment) {
|
||||
if (!_rpcmem_initialized) {
|
||||
QNN_LOG_WARN("rpc memory not initialized");
|
||||
QNN_LOG_WARN("rpc memory not initialized\n");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto allocate_bytes = static_cast<int64_t>(bytes + alignment);
|
||||
void *buf = _pfn_rpc_mem_alloc(RPCMEM_HEAP_ID_SYSTEM, RPCMEM_DEFAULT_FLAGS, (int)allocate_bytes);
|
||||
auto allocate_bytes = static_cast<int64_t>(bytes + alignment);
|
||||
void * buf = _pfn_rpc_mem_alloc(RPCMEM_HEAP_ID_SYSTEM, RPCMEM_DEFAULT_FLAGS, (int) allocate_bytes);
|
||||
if (!buf) {
|
||||
QNN_LOG_WARN("failed to allocate rpc memory, size: %d MB", (int)(allocate_bytes / (1 << 20)));
|
||||
QNN_LOG_WARN("failed to allocate rpc memory, size: %d MB\n", (int) (allocate_bytes / (1 << 20)));
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto aligned_buf = reinterpret_cast<void *>(qnn::align_to(alignment, reinterpret_cast<intptr_t>(buf)));
|
||||
bool status = _rpcmem_store_map.insert(std::pair<void *, void *>(aligned_buf, buf)).second;
|
||||
bool status = _rpcmem_store_map.insert(std::pair<void *, void *>(aligned_buf, buf)).second;
|
||||
if (!status) {
|
||||
QNN_LOG_WARN("failed to allocate rpc memory");
|
||||
QNN_LOG_WARN("failed to allocate rpc memory\n");
|
||||
_pfn_rpc_mem_free(buf);
|
||||
}
|
||||
|
||||
return aligned_buf;
|
||||
}
|
||||
|
||||
void free_rpcmem(void *buf) {
|
||||
void free_rpcmem(void * buf) {
|
||||
if (!_rpcmem_initialized) {
|
||||
QNN_LOG_WARN("rpc memory not initialized");
|
||||
QNN_LOG_WARN("rpc memory not initialized\n");
|
||||
} else if (_rpcmem_store_map.count(buf) == 0) {
|
||||
QNN_LOG_WARN("no allocated tensor");
|
||||
QNN_LOG_WARN("no allocated tensor\n");
|
||||
} else {
|
||||
_pfn_rpc_mem_free(_rpcmem_store_map[buf]);
|
||||
_rpcmem_store_map.erase(buf);
|
||||
}
|
||||
}
|
||||
|
||||
int32_t rpcmem_to_fd(void *buf) {
|
||||
int32_t rpcmem_to_fd(void * buf) {
|
||||
int32_t mem_fd = -1;
|
||||
if (!is_rpcmem_initialized()) {
|
||||
QNN_LOG_WARN("rpc memory not initialized");
|
||||
QNN_LOG_WARN("rpc memory not initialized\n");
|
||||
} else {
|
||||
mem_fd = _pfn_rpc_mem_to_fd(buf);
|
||||
}
|
||||
|
|
@ -350,74 +349,80 @@ public:
|
|||
return mem_fd;
|
||||
}
|
||||
|
||||
Qnn_MemHandle_t register_rpcmem(void *p_data, const uint32_t rank, uint32_t *dimensions, Qnn_DataType_t data_type) {
|
||||
Qnn_MemHandle_t register_rpcmem(void * p_data, const uint32_t rank, uint32_t * dimensions,
|
||||
Qnn_DataType_t data_type) {
|
||||
if (!p_data) {
|
||||
QNN_LOG_WARN("invalid param");
|
||||
QNN_LOG_WARN("invalid param\n");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (!is_rpcmem_initialized()) {
|
||||
QNN_LOG_WARN("rpc memory not initialized");
|
||||
QNN_LOG_WARN("rpc memory not initialized\n");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (is_rpcmem_registered(p_data)) {
|
||||
QNN_LOG_WARN("rpc memory already registered");
|
||||
QNN_LOG_WARN("rpc memory already registered\n");
|
||||
return _qnn_rpc_buffer_to_handles[p_data];
|
||||
}
|
||||
|
||||
auto mem_fd = rpcmem_to_fd(p_data);
|
||||
if (mem_fd == -1) {
|
||||
QNN_LOG_WARN("failed to get file descriptor");
|
||||
QNN_LOG_WARN("failed to get file descriptor\n");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
QNN_LOG_DEBUG("mem_fd %d", mem_fd);
|
||||
Qnn_MemDescriptor_t descriptor = {{rank, dimensions, nullptr}, data_type, QNN_MEM_TYPE_ION, {{mem_fd}}};
|
||||
QNN_LOG_DEBUG("mem_fd %d\n", mem_fd);
|
||||
Qnn_MemDescriptor_t descriptor = {
|
||||
{ rank, dimensions, nullptr },
|
||||
data_type, QNN_MEM_TYPE_ION, { { mem_fd } }
|
||||
};
|
||||
Qnn_MemHandle_t handle = nullptr;
|
||||
auto error = _qnn_interface->qnn_mem_register(_qnn_context_handle, &descriptor,
|
||||
/*numDescriptors=*/1, &handle);
|
||||
auto error = _qnn_interface->qnn_mem_register(_qnn_context_handle, &descriptor,
|
||||
/*numDescriptors=*/1, &handle);
|
||||
if (error != QNN_SUCCESS) {
|
||||
QNN_LOG_WARN("failed to register shared memory, error %d, %s", QNN_GET_ERROR_CODE(error), strerror(error));
|
||||
QNN_LOG_WARN("failed to register shared memory, error %d, %s\n", (int) QNN_GET_ERROR_CODE(error),
|
||||
strerror(error));
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
_qnn_rpc_buffer_to_handles.insert({p_data, handle});
|
||||
QNN_LOG_DEBUG("successfully register shared memory handler: %p", handle);
|
||||
_qnn_rpc_buffer_to_handles.insert({ p_data, handle });
|
||||
QNN_LOG_DEBUG("successfully register shared memory handler: %p\n", handle);
|
||||
return handle;
|
||||
}
|
||||
|
||||
void unregister_rpcmem(Qnn_MemHandle_t mem_handle) {
|
||||
auto error = _qnn_interface->qnn_mem_de_register(&mem_handle, 1);
|
||||
if (error != QNN_SUCCESS) {
|
||||
QNN_LOG_WARN("failed to unregister shared memory, error %d", QNN_GET_ERROR_CODE(error));
|
||||
QNN_LOG_WARN("failed to unregister shared memory, error %d\n", (int) QNN_GET_ERROR_CODE(error));
|
||||
}
|
||||
|
||||
auto it = std::find_if(_qnn_rpc_buffer_to_handles.begin(), _qnn_rpc_buffer_to_handles.end(),
|
||||
[mem_handle](const auto &kv) { return kv.second == mem_handle; });
|
||||
[mem_handle](const auto & kv) { return kv.second == mem_handle; });
|
||||
if (it == _qnn_rpc_buffer_to_handles.end()) {
|
||||
QNN_LOG_WARN("failed to find shared memory handler: %p", mem_handle);
|
||||
QNN_LOG_WARN("failed to find shared memory handler: %p\n", mem_handle);
|
||||
return;
|
||||
}
|
||||
|
||||
_qnn_rpc_buffer_to_handles.erase(it);
|
||||
}
|
||||
|
||||
bool is_rpcmem_allocated(void *buf) { return _rpcmem_store_map.count(buf) != 0; }
|
||||
bool is_rpcmem_registered(void *buf) { return _qnn_rpc_buffer_to_handles.count(buf) != 0U; }
|
||||
bool is_rpcmem_allocated(void * buf) { return _rpcmem_store_map.count(buf) != 0; }
|
||||
|
||||
const qnn::qcom_socinfo &get_soc_info() { return _soc_info; }
|
||||
bool is_rpcmem_registered(void * buf) { return _qnn_rpc_buffer_to_handles.count(buf) != 0U; }
|
||||
|
||||
private:
|
||||
const qnn::qcom_socinfo & get_soc_info() { return _soc_info; }
|
||||
|
||||
private:
|
||||
int load_system();
|
||||
int load_backend(std::string &lib_path, const QnnSaver_Config_t ** /*saver_config*/);
|
||||
int load_backend(std::string & lib_path, const QnnSaver_Config_t ** /*saver_config*/);
|
||||
int unload_backend();
|
||||
|
||||
private:
|
||||
private:
|
||||
static constexpr const int _required_num_providers = 1;
|
||||
|
||||
std::string _additional_lib_load_path;
|
||||
std::string _backend_lib_name;
|
||||
std::string _additional_lib_load_path;
|
||||
std::string _backend_lib_name;
|
||||
BackendIdType _backend_id;
|
||||
|
||||
QnnLog_Level_t _qnn_log_level = QNN_LOG_LEVEL_DEBUG;
|
||||
|
|
@ -429,7 +434,7 @@ private:
|
|||
#endif
|
||||
|
||||
std::shared_ptr<qnn::qnn_system_interface> _qnn_sys_interface;
|
||||
std::shared_ptr<qnn::qnn_interface> _qnn_interface;
|
||||
std::shared_ptr<qnn::qnn_interface> _qnn_interface;
|
||||
|
||||
Qnn_GraphHandle_t _qnn_graph_handle = nullptr;
|
||||
|
||||
|
|
@ -443,29 +448,29 @@ private:
|
|||
|
||||
Qnn_ContextHandle_t _qnn_context_handle = nullptr;
|
||||
|
||||
QnnHtpDevice_PerfInfrastructure_t *_qnn_htp_perfinfra = nullptr;
|
||||
uint32_t _qnn_power_configid = 1;
|
||||
QnnHtpDevice_PerfInfrastructure_t * _qnn_htp_perfinfra = nullptr;
|
||||
uint32_t _qnn_power_configid = 1;
|
||||
|
||||
std::unordered_map<void *, Qnn_MemHandle_t> _qnn_rpc_buffer_to_handles;
|
||||
|
||||
std::mutex _init_mutex;
|
||||
std::unordered_map<BackendIdType, dl_handler_t> _loaded_lib_handle;
|
||||
std::unordered_map<std::string, BackendIdType> _lib_path_to_backend_id;
|
||||
std::mutex _init_mutex;
|
||||
std::unordered_map<BackendIdType, dl_handler_t> _loaded_lib_handle;
|
||||
std::unordered_map<std::string, BackendIdType> _lib_path_to_backend_id;
|
||||
std::unordered_map<BackendIdType, const QnnInterface_t *> _loaded_backend;
|
||||
|
||||
dl_handler_t _rpc_lib_handle = nullptr;
|
||||
std::atomic_bool _rpcmem_initialized{false};
|
||||
qnn::pfn_rpc_mem_alloc _pfn_rpc_mem_alloc = nullptr;
|
||||
qnn::pfn_rpc_mem_free _pfn_rpc_mem_free = nullptr;
|
||||
qnn::pfn_rpc_mem_to_fd _pfn_rpc_mem_to_fd = nullptr;
|
||||
qnn::pfn_rpc_mem_init _pfn_rpc_mem_init = nullptr;
|
||||
qnn::pfn_rpc_mem_deinit _pfn_rpc_mem_deinit = nullptr;
|
||||
dl_handler_t _rpc_lib_handle = nullptr;
|
||||
std::atomic_bool _rpcmem_initialized{ false };
|
||||
qnn::pfn_rpc_mem_alloc _pfn_rpc_mem_alloc = nullptr;
|
||||
qnn::pfn_rpc_mem_free _pfn_rpc_mem_free = nullptr;
|
||||
qnn::pfn_rpc_mem_to_fd _pfn_rpc_mem_to_fd = nullptr;
|
||||
qnn::pfn_rpc_mem_init _pfn_rpc_mem_init = nullptr;
|
||||
qnn::pfn_rpc_mem_deinit _pfn_rpc_mem_deinit = nullptr;
|
||||
std::unordered_map<void *, void *> _rpcmem_store_map;
|
||||
size_t _rpcmem_capacity = 512;
|
||||
size_t _rpcmem_capacity = 512;
|
||||
|
||||
std::string _graph_name;
|
||||
|
||||
qnn::qcom_socinfo _soc_info = {};
|
||||
};
|
||||
|
||||
} // namespace qnn
|
||||
} // namespace qnn
|
||||
|
|
|
|||
|
|
@ -20,48 +20,48 @@ enum sdk_profile_level { profile_off = 0, profile_basic, profile_detail };
|
|||
|
||||
enum qcom_htp_arch {
|
||||
NONE = 0,
|
||||
V68 = 68,
|
||||
V69 = 69,
|
||||
V73 = 73,
|
||||
V75 = 75,
|
||||
V79 = 79, // SD 8 Gen 4 (SM8750)
|
||||
V68 = 68,
|
||||
V69 = 69,
|
||||
V73 = 73,
|
||||
V75 = 75,
|
||||
V79 = 79, // SD 8 Gen 4 (SM8750)
|
||||
};
|
||||
|
||||
enum qcom_chipset {
|
||||
UNKNOWN_SM = 0,
|
||||
SM8450 = 36, // v69, SD 8 Gen 1
|
||||
SM8475 = 42, // v69, SD 8+ Gen 1
|
||||
SM8550 = 43, // v73, SD 8 Gen 2
|
||||
SSG2115P = 46, // v73
|
||||
SM8650 = 57, // v75, SD 8 Gen 3
|
||||
SA8295 = 39, // v68
|
||||
SM8750 = 69, // v79, SD 8 Gen 4
|
||||
SM8450 = 36, // v69, SD 8 Gen 1
|
||||
SM8475 = 42, // v69, SD 8+ Gen 1
|
||||
SM8550 = 43, // v73, SD 8 Gen 2
|
||||
SSG2115P = 46, // v73
|
||||
SM8650 = 57, // v75, SD 8 Gen 3
|
||||
SA8295 = 39, // v68
|
||||
SM8750 = 69, // v79, SD 8 Gen 4
|
||||
};
|
||||
|
||||
struct qcom_socinfo {
|
||||
uint32_t soc_model;
|
||||
size_t htp_arch;
|
||||
size_t vtcm_size_in_mb;
|
||||
size_t htp_arch;
|
||||
size_t vtcm_size_in_mb;
|
||||
};
|
||||
|
||||
using pfn_rpc_mem_init = void (*)(void);
|
||||
using pfn_rpc_mem_init = void (*)(void);
|
||||
using pfn_rpc_mem_deinit = void (*)(void);
|
||||
using pfn_rpc_mem_alloc = void *(*)(int, uint32_t, int);
|
||||
using pfn_rpc_mem_free = void (*)(void *);
|
||||
using pfn_rpc_mem_to_fd = int (*)(void *);
|
||||
using pfn_rpc_mem_alloc = void * (*) (int, uint32_t, int);
|
||||
using pfn_rpc_mem_free = void (*)(void *);
|
||||
using pfn_rpc_mem_to_fd = int (*)(void *);
|
||||
|
||||
using pfn_qnnsaver_initialize = decltype(QnnSaver_initialize);
|
||||
using pfn_qnninterface_getproviders = decltype(QnnInterface_getProviders);
|
||||
using pfn_qnnsaver_initialize = decltype(QnnSaver_initialize);
|
||||
using pfn_qnninterface_getproviders = decltype(QnnInterface_getProviders);
|
||||
using pfn_qnnsysteminterface_getproviders = decltype(QnnSystemInterface_getProviders);
|
||||
} // namespace qnn
|
||||
} // namespace qnn
|
||||
|
||||
#define RPCMEM_DEFAULT_FLAGS 1
|
||||
#define RPCMEM_DEFAULT_FLAGS 1
|
||||
#define RPCMEM_HEAP_ID_SYSTEM 25
|
||||
|
||||
#define DISABLE_COPY(class_name) \
|
||||
class_name(const class_name &) = delete; \
|
||||
#define DISABLE_COPY(class_name) \
|
||||
class_name(const class_name &) = delete; \
|
||||
void operator=(const class_name &) = delete
|
||||
|
||||
#define DISABLE_MOVE(class_name) \
|
||||
class_name(class_name &&) = delete; \
|
||||
#define DISABLE_MOVE(class_name) \
|
||||
class_name(class_name &&) = delete; \
|
||||
void operator=(class_name &&) = delete
|
||||
|
|
|
|||
|
|
@ -9,9 +9,8 @@
|
|||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "ggml-qnn.h"
|
||||
|
||||
#include "buffer.hpp"
|
||||
#include "ggml-qnn.h"
|
||||
#include "logger.hpp"
|
||||
#include "qnn-lib.hpp"
|
||||
#include "utils.hpp"
|
||||
|
|
@ -21,14 +20,17 @@ namespace qnn {
|
|||
static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS should be 4");
|
||||
|
||||
class ggml_qnn_tensor : public std::enable_shared_from_this<ggml_qnn_tensor> {
|
||||
public:
|
||||
public:
|
||||
typedef enum _tensor_type { INPUT, OUTPUT, INTERMEDIATE, PARAMETER, BIDIRECTION } tensor_type_t;
|
||||
|
||||
explicit ggml_qnn_tensor(tensor_type_t tensor_type, const std::string &name,
|
||||
const qnn_dimension_array_t &dimensions, Qnn_DataType_t data_type, int rank,
|
||||
explicit ggml_qnn_tensor(tensor_type_t tensor_type, const std::string & name,
|
||||
const qnn_dimension_array_t & dimensions, Qnn_DataType_t data_type, int rank,
|
||||
QNNBackend device, Qnn_GraphHandle_t graph_handle,
|
||||
std::shared_ptr<qnn_instance> qnn_instance)
|
||||
: _tensor_name(name), _device(device), _qnn_instance(qnn_instance), _graph_handle(graph_handle) {
|
||||
std::shared_ptr<qnn_instance> qnn_instance) :
|
||||
_tensor_name(name),
|
||||
_device(device),
|
||||
_qnn_instance(qnn_instance),
|
||||
_graph_handle(graph_handle) {
|
||||
if (!_tensor_name.empty()) {
|
||||
QNN_TENSOR_SET_NAME(_qnn_tensor, _tensor_name.c_str());
|
||||
}
|
||||
|
|
@ -37,23 +39,24 @@ public:
|
|||
QNN_TENSOR_SET_DIMENSIONS(_qnn_tensor, _dimensions.data());
|
||||
QNN_TENSOR_SET_DATA_FORMAT(_qnn_tensor, QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER);
|
||||
update_params_from_ggml_tensor(tensor_type, data_type, rank);
|
||||
QNN_LOG_DEBUG("[%s][%s]created, rank: %d, dims: [%d, %d, %d, %d], type: %s", get_backend_name(device),
|
||||
_tensor_name.c_str(), rank, (int)_dimensions[0], (int)_dimensions[1], (int)_dimensions[2],
|
||||
(int)_dimensions[3], qnn_datatype_to_string(data_type));
|
||||
QNN_LOG_DEBUG("[%s][%s]created, rank: %d, dims: [%d, %d, %d, %d], type: %s\n", get_backend_name(device),
|
||||
_tensor_name.c_str(), rank, (int) _dimensions[0], (int) _dimensions[1], (int) _dimensions[2],
|
||||
(int) _dimensions[3], qnn_datatype_to_string(data_type));
|
||||
}
|
||||
|
||||
explicit ggml_qnn_tensor(tensor_type_t tensor_type, const std::string &name,
|
||||
const ggml_dimension_array_t &dimensions, ggml_type data_type, int rank, QNNBackend device,
|
||||
Qnn_GraphHandle_t graph_handle, std::shared_ptr<qnn_instance> qnn_instance)
|
||||
: ggml_qnn_tensor(tensor_type, name, get_internal_dimension(dimensions, rank),
|
||||
qnn_datatype_from_ggml_datatype(data_type), rank, device, graph_handle, qnn_instance) {}
|
||||
explicit ggml_qnn_tensor(tensor_type_t tensor_type, const std::string & name,
|
||||
const ggml_dimension_array_t & dimensions, ggml_type data_type, int rank,
|
||||
QNNBackend device, Qnn_GraphHandle_t graph_handle,
|
||||
std::shared_ptr<qnn_instance> qnn_instance) :
|
||||
ggml_qnn_tensor(tensor_type, name, get_internal_dimension(dimensions, rank),
|
||||
qnn_datatype_from_ggml_datatype(data_type), rank, device, graph_handle, qnn_instance) {}
|
||||
|
||||
~ggml_qnn_tensor() {
|
||||
_rpc_buffer.reset();
|
||||
unbind();
|
||||
}
|
||||
|
||||
bool set_data_buffer(const uint8_t *buffer, const size_t buffer_size) {
|
||||
bool set_data_buffer(const uint8_t * buffer, const size_t buffer_size) {
|
||||
auto qnn_buffer = std::make_shared<qnn_mem_buffer>(buffer, buffer_size);
|
||||
if (bind_buffer_impl(qnn_buffer)) {
|
||||
return true;
|
||||
|
|
@ -74,71 +77,72 @@ public:
|
|||
|
||||
bool alloc_qnn_tensor_id() {
|
||||
if (QNN_TENSOR_GET_ID(_qnn_tensor)) {
|
||||
QNN_LOG_DEBUG("[%s]tensor already has a id: %d", _tensor_name.c_str(), QNN_TENSOR_GET_ID(_qnn_tensor));
|
||||
QNN_LOG_DEBUG("[%s]tensor already has a id: %d\n", _tensor_name.c_str(), QNN_TENSOR_GET_ID(_qnn_tensor));
|
||||
return true;
|
||||
}
|
||||
|
||||
Qnn_Tensor_t qnn_tensor = _qnn_tensor;
|
||||
auto qnn_interface = _qnn_instance->get_qnn_interface();
|
||||
auto error = qnn_interface->qnn_tensor_create_graph_tensor(_graph_handle, &qnn_tensor);
|
||||
Qnn_Tensor_t qnn_tensor = _qnn_tensor;
|
||||
auto qnn_interface = _qnn_instance->get_qnn_interface();
|
||||
auto error = qnn_interface->qnn_tensor_create_graph_tensor(_graph_handle, &qnn_tensor);
|
||||
if (error != QNN_SUCCESS) {
|
||||
QNN_LOG_WARN("[%s]allocate id failed , error: %d", _tensor_name.c_str(), error);
|
||||
QNN_LOG_WARN("[%s]allocate id failed , error: %d\n", _tensor_name.c_str(), (int) error);
|
||||
return false;
|
||||
}
|
||||
|
||||
QNN_TENSOR_SET_ID(_qnn_tensor, QNN_TENSOR_GET_ID(qnn_tensor));
|
||||
QNN_LOG_DEBUG("[%s][%s]allocated id: %d, rank: %d", get_backend_name(_device), _tensor_name.c_str(),
|
||||
QNN_LOG_DEBUG("[%s][%s]allocated id: %d, rank: %d\n", get_backend_name(_device), _tensor_name.c_str(),
|
||||
QNN_TENSOR_GET_ID(qnn_tensor), QNN_TENSOR_GET_RANK(qnn_tensor));
|
||||
return true;
|
||||
}
|
||||
|
||||
bool bind_ggml_tensor(ggml_tensor *tensor) {
|
||||
bool bind_ggml_tensor(ggml_tensor * tensor) {
|
||||
if (!_can_unbind) {
|
||||
QNN_LOG_DEBUG("[%s]already has buffer storage, skip bind", _tensor_name.c_str());
|
||||
QNN_LOG_DEBUG("[%s]already has buffer storage, skip bind\n", _tensor_name.c_str());
|
||||
return true;
|
||||
}
|
||||
|
||||
#ifndef NDEBUG
|
||||
if (tensor->view_src) {
|
||||
auto *src = tensor->view_src;
|
||||
QNN_LOG_DEBUG("[%s]tensor(%s_%dx%dx%dx%d) is a view, src: %s_%dx%dx%dx%d", get_backend_name(_device),
|
||||
tensor->name, tensor->ne[0], tensor->ne[1], tensor->ne[2], tensor->ne[3], src->name,
|
||||
src->ne[0], src->ne[1], src->ne[2], src->ne[3]);
|
||||
auto * src = tensor->view_src;
|
||||
QNN_LOG_DEBUG("[%s]tensor(%s_%dx%dx%dx%d) is a view, src: %s_%dx%dx%dx%d\n", get_backend_name(_device),
|
||||
tensor->name, (int) tensor->ne[0], (int) tensor->ne[1], (int) tensor->ne[2],
|
||||
(int) tensor->ne[3], src->name, (int) src->ne[0], (int) src->ne[1], (int) src->ne[2],
|
||||
(int) src->ne[3]);
|
||||
}
|
||||
#endif
|
||||
|
||||
auto buffer =
|
||||
std::make_shared<qnn_mem_buffer_slice>(reinterpret_cast<uint8_t *>(tensor->data), ggml_nbytes(tensor));
|
||||
if (!bind_buffer_impl(buffer)) {
|
||||
QNN_LOG_WARN("[%s]failed to bind ggml tensor(%s)", _tensor_name.c_str(), ggml_get_name(tensor));
|
||||
QNN_LOG_WARN("[%s]failed to bind ggml tensor(%s)\n", _tensor_name.c_str(), ggml_get_name(tensor));
|
||||
return false;
|
||||
}
|
||||
|
||||
QNN_LOG_DEBUG("[%s][%s]bind to ggml tensor(%s)", get_backend_name(_device), _tensor_name.c_str(),
|
||||
QNN_LOG_DEBUG("[%s][%s]bind to ggml tensor(%s)\n", get_backend_name(_device), _tensor_name.c_str(),
|
||||
ggml_get_name(tensor));
|
||||
tensor->extra = this;
|
||||
_ggml_tensor = tensor;
|
||||
_ggml_tensor = tensor;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool unbind() {
|
||||
if (!_graph_handle) {
|
||||
QNN_LOG_WARN("[%s]not bound to any graph", _tensor_name.c_str());
|
||||
QNN_LOG_WARN("[%s]not bound to any graph\n", _tensor_name.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!_buffer) {
|
||||
QNN_LOG_DEBUG("[%s]unbind to ggml tensor", _tensor_name.c_str());
|
||||
QNN_LOG_DEBUG("[%s]unbind to ggml tensor\n", _tensor_name.c_str());
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!read_from_qnn_tensor()) {
|
||||
QNN_LOG_WARN("[%s]read from qnn tensor failed", _tensor_name.c_str());
|
||||
QNN_LOG_WARN("[%s]read from qnn tensor failed\n", _tensor_name.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!_can_unbind) {
|
||||
QNN_LOG_DEBUG("[%s]already has buffer storage, stop unbind", _tensor_name.c_str());
|
||||
QNN_LOG_DEBUG("[%s]already has buffer storage, stop unbind\n", _tensor_name.c_str());
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -146,42 +150,46 @@ public:
|
|||
QNN_TENSOR_SET_MEM_TYPE(_qnn_tensor, QNN_TENSORMEMTYPE_RAW);
|
||||
Qnn_ClientBuffer_t client_buf = {};
|
||||
QNN_TENSOR_SET_CLIENT_BUF(_qnn_tensor, client_buf);
|
||||
QNN_LOG_DEBUG("[%s]clear client buffer", _tensor_name.c_str());
|
||||
QNN_LOG_DEBUG("[%s]clear client buffer\n", _tensor_name.c_str());
|
||||
}
|
||||
|
||||
QNN_LOG_DEBUG("[%s][%s]unbind from buffer: %p, size: %d", get_backend_name(_device), _tensor_name.c_str(),
|
||||
_buffer.get(), (int)_buffer->get_size());
|
||||
QNN_LOG_DEBUG("[%s][%s]unbind from buffer: %p, size: %d\n", get_backend_name(_device), _tensor_name.c_str(),
|
||||
(void *) _buffer.get(), (int) _buffer->get_size());
|
||||
_buffer.reset();
|
||||
|
||||
if (_ggml_tensor) {
|
||||
_ggml_tensor->extra = nullptr;
|
||||
_ggml_tensor = nullptr;
|
||||
_ggml_tensor = nullptr;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
const Qnn_Tensor_t &get_qnn_tensor() const { return _qnn_tensor; }
|
||||
const Qnn_Tensor_t & get_qnn_tensor() const { return _qnn_tensor; }
|
||||
|
||||
Qnn_DataType_t get_data_type() const { return QNN_TENSOR_GET_DATA_TYPE(_qnn_tensor); }
|
||||
const qnn_dimension_array_t &get_dimensions() const { return _dimensions; }
|
||||
|
||||
const qnn_dimension_array_t & get_dimensions() const { return _dimensions; }
|
||||
|
||||
uint32_t get_rank() const { return QNN_TENSOR_GET_RANK(_qnn_tensor); }
|
||||
|
||||
uint32_t get_qnn_tensor_id() const { return QNN_TENSOR_GET_ID(_qnn_tensor); }
|
||||
|
||||
private:
|
||||
private:
|
||||
bool bind_buffer_impl(qnn_buffer_ptr buffer) {
|
||||
if (_buffer) {
|
||||
if (_buffer != buffer) {
|
||||
QNN_LOG_WARN("[%s]has been bound to another buffer %p", _tensor_name.c_str(), _buffer.get());
|
||||
QNN_LOG_WARN("[%s]has been bound to another buffer %p\n", _tensor_name.c_str(), (void *) _buffer.get());
|
||||
return false;
|
||||
}
|
||||
|
||||
QNN_LOG_DEBUG("[%s]already bound to same ggml tensor %p", _tensor_name.c_str(), _buffer.get());
|
||||
QNN_LOG_DEBUG("[%s]already bound to same ggml tensor %p\n", _tensor_name.c_str(), (void *) _buffer.get());
|
||||
return true;
|
||||
}
|
||||
|
||||
if (QNN_TENSOR_GET_TYPE(_qnn_tensor) == QNN_TENSOR_TYPE_NATIVE) {
|
||||
QNN_LOG_DEBUG("[%s]tensor type(%d) not READ/WRITE, skipping", _tensor_name.c_str(),
|
||||
(int)QNN_TENSOR_TYPE_NATIVE);
|
||||
QNN_LOG_DEBUG("[%s]tensor type(%d) not READ/WRITE, skipping\n", _tensor_name.c_str(),
|
||||
(int) QNN_TENSOR_TYPE_NATIVE);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -191,7 +199,7 @@ private:
|
|||
_qnn_instance, buffer->get_size(), QNN_TENSOR_GET_RANK(_qnn_tensor),
|
||||
QNN_TENSOR_GET_DIMENSIONS(_qnn_tensor), QNN_TENSOR_GET_DATA_TYPE(_qnn_tensor));
|
||||
if (!rpc_buffer->is_valid()) {
|
||||
QNN_LOG_WARN("[%s][%s]alloc rpc mem failed", get_backend_name(_device), _tensor_name.c_str());
|
||||
QNN_LOG_WARN("[%s][%s]alloc rpc mem failed\n", get_backend_name(_device), _tensor_name.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
@ -201,38 +209,38 @@ private:
|
|||
QNN_TENSOR_SET_MEM_TYPE(_qnn_tensor, QNN_TENSORMEMTYPE_MEMHANDLE);
|
||||
auto mem_handle = _rpc_buffer->get_mem_handle();
|
||||
if (!mem_handle) {
|
||||
QNN_LOG_WARN("[%s][%s]can't find rpcmem from qnn mem handle", get_backend_name(_device),
|
||||
QNN_LOG_WARN("[%s][%s]can't find rpcmem from qnn mem handle\n", get_backend_name(_device),
|
||||
_tensor_name.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
QNN_TENSOR_SET_MEM_HANDLE(_qnn_tensor, mem_handle);
|
||||
QNN_LOG_DEBUG("[%s][%s]use mem handle %p", get_backend_name(_device), _tensor_name.c_str(),
|
||||
QNN_LOG_DEBUG("[%s][%s]use mem handle %p\n", get_backend_name(_device), _tensor_name.c_str(),
|
||||
QNN_TENSOR_GET_MEM_HANDLE(_qnn_tensor));
|
||||
} else {
|
||||
QNN_TENSOR_SET_MEM_TYPE(_qnn_tensor, QNN_TENSORMEMTYPE_RAW);
|
||||
Qnn_ClientBuffer_t client_buf = {buffer->get_buffer(), (uint32_t)buffer->get_size()};
|
||||
Qnn_ClientBuffer_t client_buf = { buffer->get_buffer(), (uint32_t) buffer->get_size() };
|
||||
QNN_TENSOR_SET_CLIENT_BUF(_qnn_tensor, client_buf);
|
||||
QNN_LOG_DEBUG("[%s]use client buffer %p size %d", _tensor_name.c_str(), client_buf.data,
|
||||
(int)client_buf.dataSize);
|
||||
QNN_LOG_DEBUG("[%s]use client buffer %p size %d\n", _tensor_name.c_str(), client_buf.data,
|
||||
(int) client_buf.dataSize);
|
||||
}
|
||||
|
||||
_buffer = buffer;
|
||||
|
||||
if (!write_to_qnn_tensor()) {
|
||||
QNN_LOG_WARN("[%s]write to qnn tensor failed", _tensor_name.c_str());
|
||||
QNN_LOG_WARN("[%s]write to qnn tensor failed\n", _tensor_name.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
QNN_LOG_DEBUG("[%s][%s]bind to buffer: %p, size: %d", get_backend_name(_device), _tensor_name.c_str(),
|
||||
buffer.get(), (int)buffer->get_size());
|
||||
QNN_LOG_DEBUG("[%s][%s]bind to buffer: %p, size: %d\n", get_backend_name(_device), _tensor_name.c_str(),
|
||||
(void *) buffer.get(), (int) buffer->get_size());
|
||||
return true;
|
||||
}
|
||||
|
||||
bool write_to_qnn_tensor() {
|
||||
auto tensor_type = QNN_TENSOR_GET_TYPE(_qnn_tensor);
|
||||
if (tensor_type != QNN_TENSOR_TYPE_APP_WRITE && tensor_type != QNN_TENSOR_TYPE_APP_READWRITE) {
|
||||
QNN_LOG_DEBUG("[%s]tensor type(%d) not WRITE", _tensor_name.c_str(), (int)tensor_type);
|
||||
QNN_LOG_DEBUG("[%s]tensor type(%d) not WRITE\n", _tensor_name.c_str(), (int) tensor_type);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -241,14 +249,14 @@ private:
|
|||
}
|
||||
|
||||
// For CPU and GPU, the data is already in the tensor.
|
||||
QNN_LOG_DEBUG("[%s][%s]write tensor to qnn", get_backend_name(_device), _tensor_name.c_str());
|
||||
QNN_LOG_DEBUG("[%s][%s]write tensor to qnn\n", get_backend_name(_device), _tensor_name.c_str());
|
||||
return true;
|
||||
}
|
||||
|
||||
bool read_from_qnn_tensor() {
|
||||
auto tensor_type = QNN_TENSOR_GET_TYPE(_qnn_tensor);
|
||||
if (tensor_type != QNN_TENSOR_TYPE_APP_READ && tensor_type != QNN_TENSOR_TYPE_APP_READWRITE) {
|
||||
QNN_LOG_DEBUG("[%s]tensor type(%d) not READ", _tensor_name.c_str(), (int)tensor_type);
|
||||
QNN_LOG_DEBUG("[%s]tensor type(%d) not READ\n", _tensor_name.c_str(), (int) tensor_type);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -257,7 +265,7 @@ private:
|
|||
}
|
||||
|
||||
// For CPU and GPU, the data is already in the tensor.
|
||||
QNN_LOG_DEBUG("[%s][%s]read tensor from qnn", get_backend_name(_device), _tensor_name.c_str());
|
||||
QNN_LOG_DEBUG("[%s][%s]read tensor from qnn\n", get_backend_name(_device), _tensor_name.c_str());
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -265,7 +273,7 @@ private:
|
|||
QNN_TENSOR_SET_DATA_TYPE(_qnn_tensor, data_type);
|
||||
// TODO: set the quantizeParams base on the tensor type
|
||||
|
||||
QNN_TENSOR_SET_RANK(_qnn_tensor, (uint32_t)rank);
|
||||
QNN_TENSOR_SET_RANK(_qnn_tensor, (uint32_t) rank);
|
||||
QNN_TENSOR_SET_MEM_TYPE(_qnn_tensor, QNN_TENSORMEMTYPE_RAW);
|
||||
Qnn_ClientBuffer_t client_buf = {};
|
||||
QNN_TENSOR_SET_CLIENT_BUF(_qnn_tensor, client_buf);
|
||||
|
|
@ -290,7 +298,7 @@ private:
|
|||
break;
|
||||
}
|
||||
QNN_TENSOR_SET_TYPE(_qnn_tensor, new_tensor_type);
|
||||
QNN_LOG_DEBUG("[%s][%s]tensor changed to type %d", get_backend_name(_device), _tensor_name.c_str(),
|
||||
QNN_LOG_DEBUG("[%s][%s]tensor changed to type %d\n", get_backend_name(_device), _tensor_name.c_str(),
|
||||
new_tensor_type);
|
||||
}
|
||||
|
||||
|
|
@ -299,31 +307,31 @@ private:
|
|||
return false;
|
||||
}
|
||||
|
||||
std::string _tensor_name;
|
||||
qnn_buffer_ptr _buffer;
|
||||
bool _can_unbind = true;
|
||||
QNNBackend _device;
|
||||
std::string _tensor_name;
|
||||
qnn_buffer_ptr _buffer;
|
||||
bool _can_unbind = true;
|
||||
QNNBackend _device;
|
||||
std::shared_ptr<qnn_instance> _qnn_instance;
|
||||
Qnn_Tensor_t _qnn_tensor = qnn_tensor_init(kDefaultQnnTensorVersion);
|
||||
qnn_dimension_array_t _dimensions = {};
|
||||
Qnn_GraphHandle_t _graph_handle = nullptr;
|
||||
qnn_buffer_ptr _rpc_buffer;
|
||||
ggml_tensor *_ggml_tensor = nullptr;
|
||||
Qnn_Tensor_t _qnn_tensor = qnn_tensor_init(kDefaultQnnTensorVersion);
|
||||
qnn_dimension_array_t _dimensions = {};
|
||||
Qnn_GraphHandle_t _graph_handle = nullptr;
|
||||
qnn_buffer_ptr _rpc_buffer;
|
||||
ggml_tensor * _ggml_tensor = nullptr;
|
||||
|
||||
DISABLE_COPY(ggml_qnn_tensor);
|
||||
DISABLE_MOVE(ggml_qnn_tensor);
|
||||
};
|
||||
|
||||
using qnn_tensor_ptr_t = std::shared_ptr<ggml_qnn_tensor>;
|
||||
using qnn_tensor_array_t = std::vector<qnn_tensor_ptr_t>;
|
||||
using qnn_tensor_ptr_t = std::shared_ptr<ggml_qnn_tensor>;
|
||||
using qnn_tensor_array_t = std::vector<qnn_tensor_ptr_t>;
|
||||
using ggml_tensor_array_t = std::vector<ggml_tensor *>;
|
||||
|
||||
inline qnn_tensor_ptr_t get_qnn_tensor_ptr(ggml_tensor *ggml_tensor) {
|
||||
return ggml_tensor->extra ? reinterpret_cast<ggml_qnn_tensor *>(ggml_tensor->extra)->shared_from_this()
|
||||
: qnn_tensor_ptr_t();
|
||||
inline qnn_tensor_ptr_t get_qnn_tensor_ptr(ggml_tensor * ggml_tensor) {
|
||||
return ggml_tensor->extra ? reinterpret_cast<ggml_qnn_tensor *>(ggml_tensor->extra)->shared_from_this() :
|
||||
qnn_tensor_ptr_t();
|
||||
}
|
||||
|
||||
inline int get_ggml_tensors_max_rank(const qnn::ggml_tensor_array_t &tensors) {
|
||||
inline int get_ggml_tensors_max_rank(const qnn::ggml_tensor_array_t & tensors) {
|
||||
int max_rank = 0;
|
||||
for (auto tensor : tensors) {
|
||||
max_rank = std::max(max_rank, ggml_n_dims(tensor));
|
||||
|
|
@ -332,14 +340,14 @@ inline int get_ggml_tensors_max_rank(const qnn::ggml_tensor_array_t &tensors) {
|
|||
return max_rank;
|
||||
}
|
||||
|
||||
inline bool bind_tensors(const ggml_tensor_array_t &ggml_tensors, qnn_tensor_array_t &tensor_wrappers,
|
||||
std::vector<Qnn_Tensor_t> &qnn_tensors) {
|
||||
inline bool bind_tensors(const ggml_tensor_array_t & ggml_tensors, qnn_tensor_array_t & tensor_wrappers,
|
||||
std::vector<Qnn_Tensor_t> & qnn_tensors) {
|
||||
GGML_ASSERT(tensor_wrappers.size() == ggml_tensors.size());
|
||||
qnn_tensors.resize(ggml_tensors.size());
|
||||
for (size_t i = 0; i < ggml_tensors.size(); i++) {
|
||||
auto *ggml_tensor = ggml_tensors[i];
|
||||
auto * ggml_tensor = ggml_tensors[i];
|
||||
if (!tensor_wrappers[i]->bind_ggml_tensor(ggml_tensor)) {
|
||||
QNN_LOG_ERROR("bind tensor %s failed", ggml_get_name(ggml_tensor));
|
||||
QNN_LOG_ERROR("bind tensor %s failed\n", ggml_get_name(ggml_tensor));
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
@ -349,12 +357,12 @@ inline bool bind_tensors(const ggml_tensor_array_t &ggml_tensors, qnn_tensor_arr
|
|||
return true;
|
||||
}
|
||||
|
||||
inline bool bind_tensors(const ggml_tensor_array_t &ggml_tensors, qnn_tensor_array_t &tensor_wrappers) {
|
||||
inline bool bind_tensors(const ggml_tensor_array_t & ggml_tensors, qnn_tensor_array_t & tensor_wrappers) {
|
||||
GGML_ASSERT(tensor_wrappers.size() == ggml_tensors.size());
|
||||
for (size_t i = 0; i < ggml_tensors.size(); i++) {
|
||||
auto *ggml_tensor = ggml_tensors[i];
|
||||
auto * ggml_tensor = ggml_tensors[i];
|
||||
if (!tensor_wrappers[i]->bind_ggml_tensor(ggml_tensor)) {
|
||||
QNN_LOG_ERROR("bind tensor %s failed", ggml_get_name(ggml_tensor));
|
||||
QNN_LOG_ERROR("bind tensor %s failed\n", ggml_get_name(ggml_tensor));
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
|
@ -362,31 +370,31 @@ inline bool bind_tensors(const ggml_tensor_array_t &ggml_tensors, qnn_tensor_arr
|
|||
return true;
|
||||
}
|
||||
|
||||
inline void unbind_tensors(qnn_tensor_array_t &tensor_wrappers) {
|
||||
for (auto &tensor : tensor_wrappers) {
|
||||
inline void unbind_tensors(qnn_tensor_array_t & tensor_wrappers) {
|
||||
for (auto & tensor : tensor_wrappers) {
|
||||
tensor->unbind();
|
||||
}
|
||||
}
|
||||
|
||||
struct tensor_create_common_params {
|
||||
const char *name_prefix;
|
||||
int tensor_rank;
|
||||
bool is_input;
|
||||
QNNBackend device;
|
||||
Qnn_GraphHandle_t graph_handle;
|
||||
const char * name_prefix;
|
||||
int tensor_rank;
|
||||
bool is_input;
|
||||
QNNBackend device;
|
||||
Qnn_GraphHandle_t graph_handle;
|
||||
std::shared_ptr<qnn::qnn_instance> qnn_instance;
|
||||
};
|
||||
|
||||
inline void create_tensors_from_ggml_tensor(const tensor_create_common_params ¶ms,
|
||||
const ggml_tensor_array_t &ggml_tensors,
|
||||
qnn_tensor_array_t *tensor_wrappers,
|
||||
std::vector<Qnn_Tensor_t> *qnn_tensors) {
|
||||
inline void create_tensors_from_ggml_tensor(const tensor_create_common_params & params,
|
||||
const ggml_tensor_array_t & ggml_tensors,
|
||||
qnn_tensor_array_t * tensor_wrappers,
|
||||
std::vector<Qnn_Tensor_t> * qnn_tensors) {
|
||||
if (qnn_tensors) {
|
||||
qnn_tensors->resize(ggml_tensors.size());
|
||||
}
|
||||
|
||||
if (!tensor_wrappers->empty()) {
|
||||
QNN_LOG_DEBUG("tensor_wrappers is not empty, skip create tensors");
|
||||
QNN_LOG_DEBUG("tensor_wrappers is not empty, skip create tensors\n");
|
||||
GGML_ASSERT(tensor_wrappers->size() == ggml_tensors.size());
|
||||
return;
|
||||
}
|
||||
|
|
@ -394,14 +402,14 @@ inline void create_tensors_from_ggml_tensor(const tensor_create_common_params &p
|
|||
tensor_wrappers->resize(ggml_tensors.size());
|
||||
|
||||
char buffer[GGML_MAX_NAME] = {};
|
||||
auto tensor_type = params.is_input ? ggml_qnn_tensor::INPUT : ggml_qnn_tensor::OUTPUT;
|
||||
auto tensor_type = params.is_input ? ggml_qnn_tensor::INPUT : ggml_qnn_tensor::OUTPUT;
|
||||
for (size_t i = 0; i < ggml_tensors.size(); i++) {
|
||||
snprintf(buffer, GGML_MAX_NAME, "%s%d", params.name_prefix, (int)i);
|
||||
auto *ggml_tensor = ggml_tensors[i];
|
||||
snprintf(buffer, GGML_MAX_NAME, "%s%d", params.name_prefix, (int) i);
|
||||
auto * ggml_tensor = ggml_tensors[i];
|
||||
(*tensor_wrappers)[i] = std::make_shared<ggml_qnn_tensor>(tensor_type, std::string(buffer), ggml_tensor->ne,
|
||||
ggml_tensor->type, params.tensor_rank, params.device,
|
||||
params.graph_handle, params.qnn_instance);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace qnn
|
||||
} // namespace qnn
|
||||
|
|
|
|||
|
|
@ -4,30 +4,28 @@
|
|||
#include <cstdlib>
|
||||
|
||||
#include "ggml-qnn.h"
|
||||
|
||||
#include "QnnGraph.h"
|
||||
#include "qnn-types.hpp"
|
||||
#include "QnnGraph.h"
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <windows.h>
|
||||
# include <windows.h>
|
||||
#else
|
||||
#include <sys/sysinfo.h>
|
||||
#include <unistd.h>
|
||||
# include <sys/sysinfo.h>
|
||||
# include <unistd.h>
|
||||
#endif
|
||||
|
||||
namespace {
|
||||
|
||||
template <typename _Ty>
|
||||
_Ty align_to_generic(size_t alignment, _Ty offset) {
|
||||
return offset % alignment == 0 ? offset
|
||||
: offset + (static_cast<_Ty>(alignment) - (offset % static_cast<_Ty>(alignment)));
|
||||
template <typename _Ty> _Ty align_to_generic(size_t alignment, _Ty offset) {
|
||||
return offset % alignment == 0 ? offset :
|
||||
offset + (static_cast<_Ty>(alignment) - (offset % static_cast<_Ty>(alignment)));
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace
|
||||
|
||||
namespace qnn {
|
||||
|
||||
qnn_dimension_array_t get_internal_dimension(const ggml_dimension_array_t &dims, uint32_t rank) {
|
||||
qnn_dimension_array_t get_internal_dimension(const ggml_dimension_array_t & dims, uint32_t rank) {
|
||||
static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS should be 4");
|
||||
GGML_ASSERT(rank <= GGML_MAX_DIMS && rank > 0);
|
||||
|
||||
|
|
@ -43,30 +41,29 @@ qnn_dimension_array_t get_internal_dimension(const ggml_dimension_array_t &dims,
|
|||
* The ggml tensor will have dimensions [3, 2], while the qnn tensor will have dimensions [2, 3].
|
||||
*/
|
||||
for (uint32_t i = 0; i < rank; i++) {
|
||||
internal_dims[i] = std::max<uint32_t>((uint32_t)dims[rank - 1 - i], 1);
|
||||
internal_dims[i] = std::max<uint32_t>((uint32_t) dims[rank - 1 - i], 1);
|
||||
}
|
||||
|
||||
return internal_dims;
|
||||
}
|
||||
|
||||
qnn_dimension_array_t get_view_internal_dimension(const ggml_tensor *tensor, size_t &element_offset_out) {
|
||||
|
||||
qnn_dimension_array_t get_view_internal_dimension(const ggml_tensor * tensor, size_t & element_offset_out) {
|
||||
element_offset_out = 0;
|
||||
|
||||
auto *parent_tensor = tensor;
|
||||
auto * parent_tensor = tensor;
|
||||
while (parent_tensor->view_src) {
|
||||
element_offset_out += parent_tensor->view_offs;
|
||||
parent_tensor = parent_tensor->view_src;
|
||||
}
|
||||
|
||||
const auto rank = get_ggml_tensor_rank(tensor);
|
||||
const auto rank = get_ggml_tensor_rank(tensor);
|
||||
const auto parent_rank = get_ggml_tensor_rank(parent_tensor);
|
||||
GGML_ASSERT(parent_tensor->type == tensor->type);
|
||||
GGML_ASSERT(parent_rank == rank);
|
||||
|
||||
const auto block_size = ggml_blck_size(tensor->type);
|
||||
element_offset_out =
|
||||
element_offset_out * block_size / tensor->nb[0]; // calculate the element offset in the view tensor
|
||||
element_offset_out * block_size / tensor->nb[0]; // calculate the element offset in the view tensor
|
||||
|
||||
return get_internal_dimension(parent_tensor->ne, parent_rank);
|
||||
}
|
||||
|
|
@ -141,7 +138,7 @@ size_t qnn_datatype_size(Qnn_DataType_t qnn_type) {
|
|||
return 0;
|
||||
}
|
||||
|
||||
const char *qnn_datatype_to_string(Qnn_DataType_t qnn_type) {
|
||||
const char * qnn_datatype_to_string(Qnn_DataType_t qnn_type) {
|
||||
switch (qnn_type) {
|
||||
case QNN_DATATYPE_FLOAT_32:
|
||||
return "QNN_DATATYPE_FLOAT_32";
|
||||
|
|
@ -166,7 +163,7 @@ const char *qnn_datatype_to_string(Qnn_DataType_t qnn_type) {
|
|||
return "QNN_DATATYPE_UNDEFINED";
|
||||
}
|
||||
|
||||
uint32_t get_ggml_tensor_rank(const ggml_tensor *tensor) {
|
||||
uint32_t get_ggml_tensor_rank(const ggml_tensor * tensor) {
|
||||
uint32_t rank = 0;
|
||||
for (int i = 0; i < GGML_MAX_DIMS; i++) {
|
||||
if ((0 != tensor->ne[i]) && (1 != tensor->ne[i])) {
|
||||
|
|
@ -176,12 +173,12 @@ uint32_t get_ggml_tensor_rank(const ggml_tensor *tensor) {
|
|||
return rank;
|
||||
}
|
||||
|
||||
const char *get_ggml_type_name(ggml_type type) {
|
||||
const auto *traits = ggml_get_type_traits(type);
|
||||
const char * get_ggml_type_name(ggml_type type) {
|
||||
const auto * traits = ggml_get_type_traits(type);
|
||||
return traits->type_name;
|
||||
}
|
||||
|
||||
const char *get_backend_name(QNNBackend device_index) {
|
||||
const char * get_backend_name(QNNBackend device_index) {
|
||||
switch (device_index) {
|
||||
case QNN_BACKEND_CPU:
|
||||
return "qnn-cpu";
|
||||
|
|
@ -195,7 +192,7 @@ const char *get_backend_name(QNNBackend device_index) {
|
|||
}
|
||||
}
|
||||
|
||||
const char *get_chipset_desc(uint32_t chipset_id) {
|
||||
const char * get_chipset_desc(uint32_t chipset_id) {
|
||||
switch (chipset_id) {
|
||||
case SM8450:
|
||||
return "SD 8 Gen 1 (SM8450)";
|
||||
|
|
@ -212,7 +209,7 @@ const char *get_chipset_desc(uint32_t chipset_id) {
|
|||
}
|
||||
}
|
||||
|
||||
const char *get_htparch_desc(size_t htp_arch) {
|
||||
const char * get_htparch_desc(size_t htp_arch) {
|
||||
switch (htp_arch) {
|
||||
case V68:
|
||||
return "QCOM_HTP_V68";
|
||||
|
|
@ -229,12 +226,18 @@ const char *get_htparch_desc(size_t htp_arch) {
|
|||
}
|
||||
}
|
||||
|
||||
intptr_t align_to(size_t alignment, intptr_t offset) { return align_to_generic<intptr_t>(alignment, offset); }
|
||||
intptr_t align_to(size_t alignment, intptr_t offset) {
|
||||
return align_to_generic<intptr_t>(alignment, offset);
|
||||
}
|
||||
|
||||
uint32_t get_ggml_tensor_data_size(const ggml_tensor *tensor) { return (uint32_t)ggml_nbytes(tensor); }
|
||||
uint32_t get_ggml_tensor_data_size(const ggml_tensor * tensor) {
|
||||
return (uint32_t) ggml_nbytes(tensor);
|
||||
}
|
||||
|
||||
#ifdef _WIN32
|
||||
static void *_align_alloc(size_t alignment, size_t size) { return _aligned_malloc(size, alignment); }
|
||||
static void * _align_alloc(size_t alignment, size_t size) {
|
||||
return _aligned_malloc(size, alignment);
|
||||
}
|
||||
|
||||
static size_t _get_page_size() {
|
||||
SYSTEM_INFO si;
|
||||
|
|
@ -242,22 +245,31 @@ static size_t _get_page_size() {
|
|||
return si.dwPageSize;
|
||||
}
|
||||
|
||||
void align_free(void *ptr) { _aligned_free(ptr); }
|
||||
void align_free(void * ptr) {
|
||||
_aligned_free(ptr);
|
||||
}
|
||||
#else
|
||||
static void *_align_alloc(size_t alignment, size_t size) { return std::aligned_alloc(alignment, size); }
|
||||
static void * _align_alloc(size_t alignment, size_t size) {
|
||||
return std::aligned_alloc(alignment, size);
|
||||
}
|
||||
|
||||
static size_t _get_page_size() { return sysconf(_SC_PAGESIZE); }
|
||||
static size_t _get_page_size() {
|
||||
return sysconf(_SC_PAGESIZE);
|
||||
}
|
||||
|
||||
void align_free(void *ptr) { std::free(ptr); }
|
||||
void align_free(void * ptr) {
|
||||
std::free(ptr);
|
||||
}
|
||||
#endif
|
||||
|
||||
void *page_align_alloc(size_t size) {
|
||||
const size_t alignment = _get_page_size();
|
||||
size_t size_aligned = align_to_generic<size_t>(alignment, size);
|
||||
QNN_LOG_DEBUG("_align_alloc success, alignment: %ld, size: %ld, size_aligned: %ld", alignment, size, size_aligned);
|
||||
void *data = _align_alloc(alignment, size_aligned);
|
||||
void * page_align_alloc(size_t size) {
|
||||
const size_t alignment = _get_page_size();
|
||||
size_t size_aligned = align_to_generic<size_t>(alignment, size);
|
||||
QNN_LOG_DEBUG("_align_alloc success, alignment: %ld, size: %ld, size_aligned: %ld\n", alignment, size, size_aligned);
|
||||
void * data = _align_alloc(alignment, size_aligned);
|
||||
if (!data) {
|
||||
QNN_LOG_WARN("_align_alloc failed, alignment: %ld, size: %ld, size_aligned: %ld", alignment, size, size_aligned);
|
||||
QNN_LOG_WARN("_align_alloc failed, alignment: %ld, size: %ld, size_aligned: %ld\n", alignment, size,
|
||||
size_aligned);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
|
@ -270,7 +282,7 @@ void *page_align_alloc(size_t size) {
|
|||
//
|
||||
// =================================================================================================
|
||||
// TODO: only support GGML_OP_ADD/GGML_OP_MUL/GGML_OP_MUL_MAT
|
||||
const char *opname_from_ggmlop(enum ggml_op ggmlop) {
|
||||
const char * opname_from_ggmlop(enum ggml_op ggmlop) {
|
||||
switch (ggmlop) {
|
||||
case GGML_OP_ADD:
|
||||
return QNN_OP_ELEMENT_WISE_ADD;
|
||||
|
|
@ -284,7 +296,7 @@ const char *opname_from_ggmlop(enum ggml_op ggmlop) {
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
const char *get_qnn_error_string(Qnn_ErrorHandle_t error) {
|
||||
const char * get_qnn_error_string(Qnn_ErrorHandle_t error) {
|
||||
// A complete list of error codes can be found at here:
|
||||
// https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/api_error_codes.html
|
||||
thread_local static char error_code[128] = {};
|
||||
|
|
@ -377,7 +389,7 @@ const char *get_qnn_error_string(Qnn_ErrorHandle_t error) {
|
|||
|
||||
size_t get_system_total_memory_in_bytes() {
|
||||
MEMORYSTATUSEX mem = {};
|
||||
mem.dwLength = sizeof(mem);
|
||||
mem.dwLength = sizeof(mem);
|
||||
if (GlobalMemoryStatusEx(&mem)) {
|
||||
return mem.ullTotalPhys;
|
||||
}
|
||||
|
|
@ -387,7 +399,7 @@ size_t get_system_total_memory_in_bytes() {
|
|||
|
||||
size_t get_system_free_memory_in_bytes() {
|
||||
MEMORYSTATUSEX mem = {};
|
||||
mem.dwLength = sizeof(mem);
|
||||
mem.dwLength = sizeof(mem);
|
||||
if (GlobalMemoryStatusEx(&mem)) {
|
||||
return mem.ullAvailPhys;
|
||||
}
|
||||
|
|
@ -403,8 +415,8 @@ size_t get_system_total_memory_in_bytes() {
|
|||
return (info.totalram + info.totalswap) * info.mem_unit;
|
||||
}
|
||||
|
||||
auto pages = (size_t)sysconf(_SC_PHYS_PAGES);
|
||||
auto page_size = (size_t)sysconf(_SC_PAGE_SIZE);
|
||||
auto pages = (size_t) sysconf(_SC_PHYS_PAGES);
|
||||
auto page_size = (size_t) sysconf(_SC_PAGE_SIZE);
|
||||
return pages * page_size;
|
||||
}
|
||||
|
||||
|
|
@ -414,11 +426,11 @@ size_t get_system_free_memory_in_bytes() {
|
|||
return (info.freeram + info.freeswap) * info.mem_unit;
|
||||
}
|
||||
|
||||
auto avail_pages = (size_t)sysconf(_SC_AVPHYS_PAGES);
|
||||
auto page_size = (size_t)sysconf(_SC_PAGE_SIZE);
|
||||
auto avail_pages = (size_t) sysconf(_SC_AVPHYS_PAGES);
|
||||
auto page_size = (size_t) sysconf(_SC_PAGE_SIZE);
|
||||
return avail_pages * page_size;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
} // namespace qnn
|
||||
} // namespace qnn
|
||||
|
|
|
|||
|
|
@ -5,38 +5,36 @@
|
|||
#include <cstdint>
|
||||
#include <string>
|
||||
|
||||
#include "ggml.h"
|
||||
|
||||
#include "ggml-qnn.h"
|
||||
|
||||
#include "QnnTypes.h"
|
||||
#include "ggml.h"
|
||||
#include "logger.hpp"
|
||||
#include "QnnTypes.h"
|
||||
|
||||
#define QNN_TENSOR_VER(x) ((x).v1)
|
||||
|
||||
namespace qnn {
|
||||
|
||||
using ggml_dimension_array_t = int64_t[GGML_MAX_DIMS];
|
||||
using ggml_stride_array_t = size_t[GGML_MAX_DIMS];
|
||||
using qnn_dimension_array_t = std::array<uint32_t, GGML_MAX_DIMS>;
|
||||
using ggml_stride_array_t = size_t[GGML_MAX_DIMS];
|
||||
using qnn_dimension_array_t = std::array<uint32_t, GGML_MAX_DIMS>;
|
||||
|
||||
qnn_dimension_array_t get_internal_dimension(const ggml_dimension_array_t &dims, uint32_t rank);
|
||||
qnn_dimension_array_t get_view_internal_dimension(const ggml_tensor *tensor, size_t &element_offser_out);
|
||||
qnn_dimension_array_t get_internal_dimension(const ggml_dimension_array_t & dims, uint32_t rank);
|
||||
qnn_dimension_array_t get_view_internal_dimension(const ggml_tensor * tensor, size_t & element_offser_out);
|
||||
|
||||
uint32_t get_ggml_tensor_rank(const ggml_tensor *tensor);
|
||||
const char *get_ggml_type_name(ggml_type type);
|
||||
const char *get_backend_name(QNNBackend device_index);
|
||||
const char *get_chipset_desc(uint32_t chipset_id);
|
||||
const char *get_htparch_desc(size_t htp_arch);
|
||||
intptr_t align_to(size_t alignment, intptr_t offset);
|
||||
uint32_t get_ggml_tensor_data_size(const ggml_tensor *tensor);
|
||||
uint32_t get_ggml_tensor_rank(const ggml_tensor * tensor);
|
||||
const char * get_ggml_type_name(ggml_type type);
|
||||
const char * get_backend_name(QNNBackend device_index);
|
||||
const char * get_chipset_desc(uint32_t chipset_id);
|
||||
const char * get_htparch_desc(size_t htp_arch);
|
||||
intptr_t align_to(size_t alignment, intptr_t offset);
|
||||
uint32_t get_ggml_tensor_data_size(const ggml_tensor * tensor);
|
||||
|
||||
void *page_align_alloc(size_t size);
|
||||
void align_free(void *ptr);
|
||||
void * page_align_alloc(size_t size);
|
||||
void align_free(void * ptr);
|
||||
|
||||
const char *opname_from_ggmlop(enum ggml_op ggmlop);
|
||||
const char * opname_from_ggmlop(enum ggml_op ggmlop);
|
||||
|
||||
const char *get_qnn_error_string(Qnn_ErrorHandle_t error);
|
||||
const char * get_qnn_error_string(Qnn_ErrorHandle_t error);
|
||||
|
||||
constexpr const Qnn_TensorVersion_t kDefaultQnnTensorVersion = QNN_TENSOR_VERSION_1;
|
||||
|
||||
|
|
@ -51,7 +49,7 @@ inline Qnn_Tensor_t qnn_tensor_init(Qnn_TensorVersion_t version) {
|
|||
return tensor;
|
||||
}
|
||||
|
||||
inline uint32_t get_qnn_tensorid(const Qnn_Tensor_t &tensor) {
|
||||
inline uint32_t get_qnn_tensorid(const Qnn_Tensor_t & tensor) {
|
||||
if (tensor.version == kDefaultQnnTensorVersion) {
|
||||
return QNN_TENSOR_VER(tensor).id;
|
||||
}
|
||||
|
|
@ -59,156 +57,158 @@ inline uint32_t get_qnn_tensorid(const Qnn_Tensor_t &tensor) {
|
|||
return 0u;
|
||||
}
|
||||
|
||||
inline const char *get_qnn_tensorname(const Qnn_Tensor_t &tensor) {
|
||||
inline const char * get_qnn_tensorname(const Qnn_Tensor_t & tensor) {
|
||||
if (tensor.version == kDefaultQnnTensorVersion) {
|
||||
return QNN_TENSOR_VER(tensor).name;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
inline Qnn_TensorType_t get_qnn_tensortype(const Qnn_Tensor_t &tensor) {
|
||||
inline Qnn_TensorType_t get_qnn_tensortype(const Qnn_Tensor_t & tensor) {
|
||||
if (tensor.version == kDefaultQnnTensorVersion) {
|
||||
return QNN_TENSOR_VER(tensor).type;
|
||||
}
|
||||
return QNN_TENSOR_TYPE_UNDEFINED;
|
||||
}
|
||||
|
||||
inline Qnn_TensorDataFormat_t get_qnn_tensor_dataformat(const Qnn_Tensor_t &tensor) {
|
||||
inline Qnn_TensorDataFormat_t get_qnn_tensor_dataformat(const Qnn_Tensor_t & tensor) {
|
||||
if (tensor.version == kDefaultQnnTensorVersion) {
|
||||
return QNN_TENSOR_VER(tensor).dataFormat;
|
||||
}
|
||||
return QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER;
|
||||
}
|
||||
|
||||
inline Qnn_DataType_t get_qnn_tensor_datatype(const Qnn_Tensor_t &tensor) {
|
||||
inline Qnn_DataType_t get_qnn_tensor_datatype(const Qnn_Tensor_t & tensor) {
|
||||
if (tensor.version == kDefaultQnnTensorVersion) {
|
||||
return QNN_TENSOR_VER(tensor).dataType;
|
||||
}
|
||||
return QNN_DATATYPE_UNDEFINED;
|
||||
}
|
||||
|
||||
inline Qnn_QuantizeParams_t get_qnn_tensor_quantparams(const Qnn_Tensor_t &tensor) {
|
||||
inline Qnn_QuantizeParams_t get_qnn_tensor_quantparams(const Qnn_Tensor_t & tensor) {
|
||||
if (tensor.version == kDefaultQnnTensorVersion) {
|
||||
return QNN_TENSOR_VER(tensor).quantizeParams;
|
||||
}
|
||||
return QNN_QUANTIZE_PARAMS_INIT;
|
||||
}
|
||||
|
||||
inline uint32_t get_qnn_tensor_rank(const Qnn_Tensor_t &tensor) {
|
||||
inline uint32_t get_qnn_tensor_rank(const Qnn_Tensor_t & tensor) {
|
||||
if (tensor.version == kDefaultQnnTensorVersion) {
|
||||
return QNN_TENSOR_VER(tensor).rank;
|
||||
}
|
||||
return 0u;
|
||||
}
|
||||
|
||||
inline uint32_t *get_qnn_tensor_dimensions(const Qnn_Tensor_t &tensor) {
|
||||
inline uint32_t * get_qnn_tensor_dimensions(const Qnn_Tensor_t & tensor) {
|
||||
if (tensor.version == kDefaultQnnTensorVersion) {
|
||||
return QNN_TENSOR_VER(tensor).dimensions;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
inline Qnn_TensorMemType_t get_qnn_tensor_memtype(const Qnn_Tensor_t &tensor) {
|
||||
inline Qnn_TensorMemType_t get_qnn_tensor_memtype(const Qnn_Tensor_t & tensor) {
|
||||
if (tensor.version == kDefaultQnnTensorVersion) {
|
||||
return QNN_TENSOR_VER(tensor).memType;
|
||||
}
|
||||
return QNN_TENSORMEMTYPE_UNDEFINED;
|
||||
}
|
||||
|
||||
inline Qnn_MemHandle_t get_qnn_tensor_memhandle(const Qnn_Tensor_t &tensor) {
|
||||
inline Qnn_MemHandle_t get_qnn_tensor_memhandle(const Qnn_Tensor_t & tensor) {
|
||||
if (tensor.version == kDefaultQnnTensorVersion) {
|
||||
return QNN_TENSOR_VER(tensor).memHandle;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
inline void set_qnn_tensor_id(Qnn_Tensor_t &tensor, uint32_t id) {
|
||||
inline void set_qnn_tensor_id(Qnn_Tensor_t & tensor, uint32_t id) {
|
||||
if (tensor.version == kDefaultQnnTensorVersion) {
|
||||
QNN_TENSOR_VER(tensor).id = id;
|
||||
}
|
||||
}
|
||||
|
||||
inline void set_qnn_tensor_name(Qnn_Tensor_t &tensor, const char *name) {
|
||||
inline void set_qnn_tensor_name(Qnn_Tensor_t & tensor, const char * name) {
|
||||
if (tensor.version == kDefaultQnnTensorVersion) {
|
||||
QNN_TENSOR_VER(tensor).name = name;
|
||||
}
|
||||
}
|
||||
|
||||
inline void set_qnn_tensor_type(Qnn_Tensor_t &tensor, Qnn_TensorType_t type) {
|
||||
inline void set_qnn_tensor_type(Qnn_Tensor_t & tensor, Qnn_TensorType_t type) {
|
||||
if (tensor.version == kDefaultQnnTensorVersion) {
|
||||
QNN_TENSOR_VER(tensor).type = type;
|
||||
}
|
||||
}
|
||||
|
||||
inline void set_qnn_tensor_dataformat(Qnn_Tensor_t &tensor, Qnn_TensorDataFormat_t format) {
|
||||
inline void set_qnn_tensor_dataformat(Qnn_Tensor_t & tensor, Qnn_TensorDataFormat_t format) {
|
||||
if (tensor.version == kDefaultQnnTensorVersion) {
|
||||
QNN_TENSOR_VER(tensor).dataFormat = format;
|
||||
}
|
||||
}
|
||||
|
||||
inline void set_qnn_tensor_datatype(Qnn_Tensor_t &tensor, Qnn_DataType_t dataType) {
|
||||
inline void set_qnn_tensor_datatype(Qnn_Tensor_t & tensor, Qnn_DataType_t dataType) {
|
||||
if (tensor.version == kDefaultQnnTensorVersion) {
|
||||
QNN_TENSOR_VER(tensor).dataType = dataType;
|
||||
}
|
||||
}
|
||||
|
||||
inline void set_qnn_tensor_quantparams(Qnn_Tensor_t &tensor, Qnn_QuantizeParams_t params) {
|
||||
inline void set_qnn_tensor_quantparams(Qnn_Tensor_t & tensor, Qnn_QuantizeParams_t params) {
|
||||
if (tensor.version == kDefaultQnnTensorVersion) {
|
||||
QNN_TENSOR_VER(tensor).quantizeParams = params;
|
||||
}
|
||||
}
|
||||
|
||||
inline void set_qnn_tensor_rank(Qnn_Tensor_t &tensor, uint32_t rank) {
|
||||
inline void set_qnn_tensor_rank(Qnn_Tensor_t & tensor, uint32_t rank) {
|
||||
if (tensor.version == kDefaultQnnTensorVersion) {
|
||||
QNN_TENSOR_VER(tensor).rank = rank;
|
||||
}
|
||||
}
|
||||
|
||||
inline void set_qnn_tensor_dimensions(Qnn_Tensor_t &tensor, uint32_t *dims) {
|
||||
inline void set_qnn_tensor_dimensions(Qnn_Tensor_t & tensor, uint32_t * dims) {
|
||||
if (tensor.version == kDefaultQnnTensorVersion) {
|
||||
QNN_TENSOR_VER(tensor).dimensions = dims;
|
||||
}
|
||||
}
|
||||
|
||||
inline void set_qnn_tensor_memtype(Qnn_Tensor_t &tensor, Qnn_TensorMemType_t mem_type) {
|
||||
inline void set_qnn_tensor_memtype(Qnn_Tensor_t & tensor, Qnn_TensorMemType_t mem_type) {
|
||||
if (tensor.version == kDefaultQnnTensorVersion) {
|
||||
QNN_TENSOR_VER(tensor).memType = mem_type;
|
||||
}
|
||||
}
|
||||
|
||||
inline void set_qnn_tensor_clientbuf(Qnn_Tensor_t &tensor, Qnn_ClientBuffer_t client_buf) {
|
||||
inline void set_qnn_tensor_clientbuf(Qnn_Tensor_t & tensor, Qnn_ClientBuffer_t client_buf) {
|
||||
if (tensor.version == kDefaultQnnTensorVersion) {
|
||||
QNN_TENSOR_VER(tensor).clientBuf = client_buf;
|
||||
}
|
||||
}
|
||||
|
||||
inline void set_qnn_tensor_memhandle(Qnn_Tensor_t &tensor, Qnn_MemHandle_t handle) {
|
||||
inline void set_qnn_tensor_memhandle(Qnn_Tensor_t & tensor, Qnn_MemHandle_t handle) {
|
||||
if (tensor.version == kDefaultQnnTensorVersion) {
|
||||
QNN_TENSOR_VER(tensor).memHandle = handle;
|
||||
}
|
||||
}
|
||||
|
||||
inline void set_qnn_tensor_dyn_dimensions(Qnn_Tensor_t &tensor, uint8_t *isDynamicDimensions) {
|
||||
inline void set_qnn_tensor_dyn_dimensions(Qnn_Tensor_t & tensor, uint8_t * isDynamicDimensions) {
|
||||
if (tensor.version == QNN_TENSOR_VERSION_2) {
|
||||
tensor.v2.isDynamicDimensions = isDynamicDimensions;
|
||||
}
|
||||
}
|
||||
|
||||
Qnn_DataType_t qnn_datatype_from_ggml_datatype(ggml_type ggml_type);
|
||||
ggml_type ggml_datatype_from_qnn_datatype(Qnn_DataType_t qnn_type);
|
||||
size_t qnn_datatype_size(Qnn_DataType_t qnn_type);
|
||||
const char *qnn_datatype_to_string(Qnn_DataType_t qnn_type);
|
||||
size_t get_system_total_memory_in_bytes();
|
||||
size_t get_system_free_memory_in_bytes();
|
||||
ggml_type ggml_datatype_from_qnn_datatype(Qnn_DataType_t qnn_type);
|
||||
size_t qnn_datatype_size(Qnn_DataType_t qnn_type);
|
||||
const char * qnn_datatype_to_string(Qnn_DataType_t qnn_type);
|
||||
size_t get_system_total_memory_in_bytes();
|
||||
size_t get_system_free_memory_in_bytes();
|
||||
|
||||
#if ENABLE_QNNBACKEND_PERF
|
||||
class qnn_perf {
|
||||
public:
|
||||
qnn_perf(const std::string &perf_name) : _perf_name(std::move(perf_name)) {};
|
||||
public:
|
||||
qnn_perf(const std::string & perf_name) : _perf_name(std::move(perf_name)) {};
|
||||
|
||||
~qnn_perf() { info(); }
|
||||
qnn_perf() = delete;
|
||||
qnn_perf(const qnn_perf &) = delete;
|
||||
qnn_perf &operator=(const qnn_perf &) = delete;
|
||||
|
||||
qnn_perf() = delete;
|
||||
qnn_perf(const qnn_perf &) = delete;
|
||||
qnn_perf & operator=(const qnn_perf &) = delete;
|
||||
|
||||
void start() { _begin_time = ggml_time_us(); }
|
||||
|
||||
|
|
@ -218,48 +218,51 @@ public:
|
|||
QNN_LOG_INFO("duration of %s : %lld microseconds\n", _perf_name.c_str(), _duration);
|
||||
}
|
||||
|
||||
private:
|
||||
int64_t _begin_time = 0LL;
|
||||
int64_t _end_time = 0LL;
|
||||
int64_t _duration = 0LL;
|
||||
private:
|
||||
int64_t _begin_time = 0LL;
|
||||
int64_t _end_time = 0LL;
|
||||
int64_t _duration = 0LL;
|
||||
std::string _perf_name;
|
||||
};
|
||||
#else
|
||||
class qnn_perf {
|
||||
public:
|
||||
public:
|
||||
qnn_perf(const std::string &) {}
|
||||
|
||||
~qnn_perf() { info(); }
|
||||
qnn_perf() = delete;
|
||||
qnn_perf(const qnn_perf &) = delete;
|
||||
qnn_perf &operator=(const qnn_perf &) = delete;
|
||||
|
||||
qnn_perf() = delete;
|
||||
qnn_perf(const qnn_perf &) = delete;
|
||||
qnn_perf & operator=(const qnn_perf &) = delete;
|
||||
|
||||
void start() {}
|
||||
|
||||
void info() {}
|
||||
};
|
||||
#endif
|
||||
|
||||
} // namespace qnn
|
||||
} // namespace qnn
|
||||
|
||||
#define QNN_TENSOR_GET_ID(tensor) qnn::get_qnn_tensorid(tensor)
|
||||
#define QNN_TENSOR_GET_NAME(tensor) qnn::get_qnn_tensorname(tensor)
|
||||
#define QNN_TENSOR_GET_TYPE(tensor) qnn::get_qnn_tensortype(tensor)
|
||||
#define QNN_TENSOR_GET_DATA_FORMAT(tensor) qnn::get_qnn_tensor_dataformat(tensor)
|
||||
#define QNN_TENSOR_GET_DATA_TYPE(tensor) qnn::get_qnn_tensor_datatype(tensor)
|
||||
#define QNN_TENSOR_GET_ID(tensor) qnn::get_qnn_tensorid(tensor)
|
||||
#define QNN_TENSOR_GET_NAME(tensor) qnn::get_qnn_tensorname(tensor)
|
||||
#define QNN_TENSOR_GET_TYPE(tensor) qnn::get_qnn_tensortype(tensor)
|
||||
#define QNN_TENSOR_GET_DATA_FORMAT(tensor) qnn::get_qnn_tensor_dataformat(tensor)
|
||||
#define QNN_TENSOR_GET_DATA_TYPE(tensor) qnn::get_qnn_tensor_datatype(tensor)
|
||||
#define QNN_TENSOR_GET_QUANT_PARAMS(tensor) qnn::get_qnn_tensor_quantparams(tensor)
|
||||
#define QNN_TENSOR_GET_RANK(tensor) qnn::get_qnn_tensor_rank(tensor)
|
||||
#define QNN_TENSOR_GET_DIMENSIONS(tensor) qnn::get_qnn_tensor_dimensions(tensor)
|
||||
#define QNN_TENSOR_GET_MEM_TYPE(tensor) qnn::get_qnn_tensor_memtype(tensor)
|
||||
#define QNN_TENSOR_GET_MEM_HANDLE(tensor) qnn::get_qnn_tensor_memhandle(tensor)
|
||||
#define QNN_TENSOR_GET_RANK(tensor) qnn::get_qnn_tensor_rank(tensor)
|
||||
#define QNN_TENSOR_GET_DIMENSIONS(tensor) qnn::get_qnn_tensor_dimensions(tensor)
|
||||
#define QNN_TENSOR_GET_MEM_TYPE(tensor) qnn::get_qnn_tensor_memtype(tensor)
|
||||
#define QNN_TENSOR_GET_MEM_HANDLE(tensor) qnn::get_qnn_tensor_memhandle(tensor)
|
||||
|
||||
#define QNN_TENSOR_SET_ID(tensor, value) qnn::set_qnn_tensor_id(tensor, value)
|
||||
#define QNN_TENSOR_SET_NAME(tensor, value) qnn::set_qnn_tensor_name(tensor, value)
|
||||
#define QNN_TENSOR_SET_TYPE(tensor, value) qnn::set_qnn_tensor_type(tensor, value)
|
||||
#define QNN_TENSOR_SET_DATA_FORMAT(tensor, value) qnn::set_qnn_tensor_dataformat(tensor, value)
|
||||
#define QNN_TENSOR_SET_DATA_TYPE(tensor, value) qnn::set_qnn_tensor_datatype(tensor, value)
|
||||
#define QNN_TENSOR_SET_QUANT_PARAMS(tensor, value) qnn::set_qnn_tensor_quantparams(tensor, value)
|
||||
#define QNN_TENSOR_SET_RANK(tensor, value) qnn::set_qnn_tensor_rank(tensor, value)
|
||||
#define QNN_TENSOR_SET_DIMENSIONS(tensor, value) qnn::set_qnn_tensor_dimensions(tensor, value)
|
||||
#define QNN_TENSOR_SET_MEM_TYPE(tensor, value) qnn::set_qnn_tensor_memtype(tensor, value)
|
||||
#define QNN_TENSOR_SET_CLIENT_BUF(tensor, value) qnn::set_qnn_tensor_clientbuf(tensor, value)
|
||||
#define QNN_TENSOR_SET_MEM_HANDLE(tensor, value) qnn::set_qnn_tensor_memhandle(tensor, value)
|
||||
#define QNN_TENSOR_SET_ID(tensor, value) qnn::set_qnn_tensor_id(tensor, value)
|
||||
#define QNN_TENSOR_SET_NAME(tensor, value) qnn::set_qnn_tensor_name(tensor, value)
|
||||
#define QNN_TENSOR_SET_TYPE(tensor, value) qnn::set_qnn_tensor_type(tensor, value)
|
||||
#define QNN_TENSOR_SET_DATA_FORMAT(tensor, value) qnn::set_qnn_tensor_dataformat(tensor, value)
|
||||
#define QNN_TENSOR_SET_DATA_TYPE(tensor, value) qnn::set_qnn_tensor_datatype(tensor, value)
|
||||
#define QNN_TENSOR_SET_QUANT_PARAMS(tensor, value) qnn::set_qnn_tensor_quantparams(tensor, value)
|
||||
#define QNN_TENSOR_SET_RANK(tensor, value) qnn::set_qnn_tensor_rank(tensor, value)
|
||||
#define QNN_TENSOR_SET_DIMENSIONS(tensor, value) qnn::set_qnn_tensor_dimensions(tensor, value)
|
||||
#define QNN_TENSOR_SET_MEM_TYPE(tensor, value) qnn::set_qnn_tensor_memtype(tensor, value)
|
||||
#define QNN_TENSOR_SET_CLIENT_BUF(tensor, value) qnn::set_qnn_tensor_clientbuf(tensor, value)
|
||||
#define QNN_TENSOR_SET_MEM_HANDLE(tensor, value) qnn::set_qnn_tensor_memhandle(tensor, value)
|
||||
#define QNN_TENSOR_SET_DYN_DIMENSIONS(tensor, value) qnn::set_qnn_tensor_dyn_dimensions(tensor, value)
|
||||
|
|
|
|||
Loading…
Reference in New Issue