From a822d0075392defba5a83524f3a3564dc71c7f72 Mon Sep 17 00:00:00 2001 From: nullname Date: Mon, 24 Feb 2025 10:47:47 +0800 Subject: [PATCH] feat: run on win (#24) * move qnn_instance function implementation into cpp * wip * wip * move dl related function into separated file * use cast op for gpu * Revert "use cast op for gpu" This reverts commit 05df7362a15c022d05940d682e84cf480a082c6a. * Reapply "use cast op for gpu" This reverts commit 2520e5922a216faceb6d7efcde23dafe6947a4b3. * fix compiling error in win * fix align_alloc in win * fix compiling error * add get sys free/total mem for win * wip * suppress warning in win * add missing chrono header * set the correct qnn lib name for windows * add flag to control cpu backend * wip * wip * Revert "Reapply "use cast op for gpu"" This reverts commit f56519c374a7d46faac706cf214de48ff5fc5139. * fix compiling error for linux build * fix cdsprpc dynamic library name * wip * skip rpc load fail * fix page_align_alloc * suppress some warning in gcc * wip * reuse align to function * more log * add log and fix warning * wip * fix asan errors and memory leaks * fix the get_io_tensors_from_graph * improve comment * print GGML_QNN_DEFAULT_LIB_SEARCH_PATH * revert some unused changes * move library search path setter into qnn module * fix android library loading * skip qnn_device_get_platform_info for npu emulator --- ggml/src/ggml-qnn/CMakeLists.txt | 18 +- ggml/src/ggml-qnn/backend-ops.cpp | 2 +- ggml/src/ggml-qnn/buffer.hpp | 3 + ggml/src/ggml-qnn/dl_loader.hpp | 71 ++++ ggml/src/ggml-qnn/ggml-qnn.cpp | 72 +--- ggml/src/ggml-qnn/graph.cpp | 73 +++- ggml/src/ggml-qnn/logger.cpp | 15 +- ggml/src/ggml-qnn/op-config-caps.cpp | 15 +- ggml/src/ggml-qnn/op-config-impl.cpp | 2 +- ggml/src/ggml-qnn/op-config.hpp | 3 - ggml/src/ggml-qnn/qnn-lib.cpp | 521 ++++++++++++++++++++++++++- ggml/src/ggml-qnn/qnn-lib.hpp | 469 ++---------------------- ggml/src/ggml-qnn/tensor.hpp | 15 +- ggml/src/ggml-qnn/utils.cpp | 86 +++-- ggml/src/ggml-qnn/utils.hpp | 9 +- 15 files changed, 782 insertions(+), 592 deletions(-) create mode 100644 ggml/src/ggml-qnn/dl_loader.hpp diff --git a/ggml/src/ggml-qnn/CMakeLists.txt b/ggml/src/ggml-qnn/CMakeLists.txt index 7bbb9be76b..ccf51e1a55 100644 --- a/ggml/src/ggml-qnn/CMakeLists.txt +++ b/ggml/src/ggml-qnn/CMakeLists.txt @@ -4,12 +4,15 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Android") find_library(LOG_LIB log) set(QNN_LINK_LIBRARIES ${LOG_LIB}) set(QNN_DEFAULT_LIB_SEARCH_PATH "/data/local/tmp/" CACHE STRING "customized library search path for QNN backend") +elseif(CMAKE_SYSTEM_NAME STREQUAL "Windows" OR CMAKE_SYSTEM_NAME STREQUAL "Linux") + set(QNN_DEFAULT_LIB_SEARCH_PATH "" CACHE STRING "customized library search path for QNN backend") else() - message(FATAL_ERROR "QNN now only available on Android") + message(FATAL_ERROR "QNN now only available on Android, Windows and Linux") endif() if(NOT DEFINED GGML_QNN_SDK_PATH) # try read from environment variable + # TODO: create a function to search for the SDK path if(DEFINED ENV{QNN_SDK_PATH}) set(GGML_QNN_SDK_PATH $ENV{QNN_SDK_PATH}) else() @@ -29,5 +32,14 @@ ggml_add_backend_library(ggml-qnn target_include_directories(ggml-qnn PRIVATE ${GGML_QNN_SDK_PATH}/include/QNN ${CMAKE_CURRENT_LIST_DIR}) target_link_libraries(ggml-qnn PRIVATE ${QNN_LINK_LIBRARIES}) -string(REGEX REPLACE "/$" "" GGML_QNN_DEFAULT_LIB_SEARCH_PATH "${QNN_DEFAULT_LIB_SEARCH_PATH}") -target_compile_definitions(ggml-qnn PRIVATE GGML_QNN_DEFAULT_LIB_SEARCH_PATH="${QNN_DEFAULT_LIB_SEARCH_PATH}/") +if(NOT "${QNN_DEFAULT_LIB_SEARCH_PATH}" STREQUAL "") + string(REGEX REPLACE "/$" "" QNN_DEFAULT_LIB_SEARCH_PATH "${QNN_DEFAULT_LIB_SEARCH_PATH}") +endif() + +message("GGML_QNN_DEFAULT_LIB_SEARCH_PATH: ${QNN_DEFAULT_LIB_SEARCH_PATH}") +target_compile_definitions(ggml-qnn PRIVATE GGML_QNN_DEFAULT_LIB_SEARCH_PATH="${QNN_DEFAULT_LIB_SEARCH_PATH}") + +if(GGML_QNN_ENABLE_CPU_BACKEND) + message("GGML_QNN_ENABLE_CPU_BACKEND is enabled") + target_compile_definitions(ggml-qnn PRIVATE GGML_QNN_ENABLE_CPU_BACKEND) +endif() diff --git a/ggml/src/ggml-qnn/backend-ops.cpp b/ggml/src/ggml-qnn/backend-ops.cpp index 8bbf26da52..f62fc60d5c 100644 --- a/ggml/src/ggml-qnn/backend-ops.cpp +++ b/ggml/src/ggml-qnn/backend-ops.cpp @@ -389,7 +389,7 @@ bool ggml_qnn_supports_tensor(ggml_backend_qnn_device_context *ctx, const ggml_t case GGML_TYPE_F16: case GGML_TYPE_Q8_0: case GGML_TYPE_Q4_0: - if (!(ctx->supported_types & (1 << tensor->type))) { + if (!(ctx->supported_types & (uint64_t(1) << tensor->type))) { QNN_LOG_DEBUG("[%s]unsupported data type %s, supported_types: 0x%x", qnn::get_backend_name(ctx->device), ggml_type_name(tensor->type), ctx->supported_types); return false; diff --git a/ggml/src/ggml-qnn/buffer.hpp b/ggml/src/ggml-qnn/buffer.hpp index af165b394e..ce796cbe4d 100644 --- a/ggml/src/ggml-qnn/buffer.hpp +++ b/ggml/src/ggml-qnn/buffer.hpp @@ -133,11 +133,14 @@ public: if (data) { memcpy(_buffer, data, size); } + + QNN_LOG_DEBUG("alloc buffer: %p, size: %ld", _buffer, size); } explicit qnn_mem_buffer(size_t size) : qnn_mem_buffer(nullptr, size) {} ~qnn_mem_buffer() { + QNN_LOG_DEBUG("free buffer: %p, size: %ld", _buffer, _size); // the free will do nothing if the _buffer is nullptr qnn::align_free(_buffer); } diff --git a/ggml/src/ggml-qnn/dl_loader.hpp b/ggml/src/ggml-qnn/dl_loader.hpp new file mode 100644 index 0000000000..1beec8866b --- /dev/null +++ b/ggml/src/ggml-qnn/dl_loader.hpp @@ -0,0 +1,71 @@ +#pragma once + +#ifdef __linux__ +#include +#include +#elif defined(_WIN32) +#define WIN32_LEAN_AND_MEAN +#ifndef NOMINMAX +#define NOMINMAX +#endif +#include +#endif + +#include + +namespace qnn { + +#ifdef __linux__ +typedef void *dl_handler_t; + +inline qnn::dl_handler_t dl_load(const std::string &lib_path) { + return dlopen(lib_path.c_str(), RTLD_NOW | RTLD_LOCAL); +} + +inline void *dl_sym(qnn::dl_handler_t handle, const std::string &symbol) { return dlsym(handle, symbol.c_str()); } + +inline bool dl_unload(qnn::dl_handler_t handle) { return dlclose(handle) == 0; } + +inline const char *dl_error() { return dlerror(); } +#elif defined(_WIN32) +using dl_handler_t = HMODULE; + +inline qnn::dl_handler_t dl_load(const std::string &lib_path) { + // suppress error dialogs for missing DLLs + auto old_mode = SetErrorMode(SEM_FAILCRITICALERRORS); + SetErrorMode(old_mode | SEM_FAILCRITICALERRORS); + + auto handle = LoadLibraryA(lib_path.c_str()); // TODO: use wstring version for unicode paths + + SetErrorMode(old_mode); + return handle; +} + +inline void *dl_sym(qnn::dl_handler_t handle, const std::string &symbol) { + auto old_mode = SetErrorMode(SEM_FAILCRITICALERRORS); + SetErrorMode(old_mode | SEM_FAILCRITICALERRORS); + + void *p = (void *)GetProcAddress(handle, symbol.c_str()); + + SetErrorMode(old_mode); + return p; +} + +inline bool dl_unload(qnn::dl_handler_t handle) { + FreeLibrary(handle); + return true; +} + +inline const char *dl_error() { + // TODO: implement dl_error for Windows + return nullptr; +} + +#endif + +template +Fn dl_sym_typed(qnn::dl_handler_t handle, const std::string &function_name) { + return reinterpret_cast(dl_sym(handle, function_name)); +} + +} // namespace qnn diff --git a/ggml/src/ggml-qnn/ggml-qnn.cpp b/ggml/src/ggml-qnn/ggml-qnn.cpp index b3673eb35a..8150dcb9ea 100644 --- a/ggml/src/ggml-qnn/ggml-qnn.cpp +++ b/ggml/src/ggml-qnn/ggml-qnn.cpp @@ -1,23 +1,7 @@ #include "ggml-qnn.h" -#include -#include -#include -#include #include -#include -#include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include #include "ggml-backend-impl.h" @@ -44,6 +28,16 @@ namespace { +#ifdef _WIN32 +constexpr const char *kQnnCpuLibName = "QnnCpu.dll"; +constexpr const char *kQnnGpuLibName = "QnnGpu.dll"; +constexpr const char *kQnnNpuLibName = "QnnHtp.dll"; +#else +constexpr const char *kQnnCpuLibName = "libQnnCpu.so"; +constexpr const char *kQnnGpuLibName = "libQnnGpu.so"; +constexpr const char *kQnnNpuLibName = "libQnnHtp.so"; +#endif + struct qnn_device_caps { const char *name; const char *description; @@ -59,7 +53,7 @@ constexpr const qnn_device_caps kDeviceCaps[] = { // https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/CpuOpDefSupplement.html#matmul "qnn-cpu", "Qualcomm Kryo CPU", - "libQnnCpu.so", + kQnnCpuLibName, GGML_BACKEND_DEVICE_TYPE_CPU, (1 << GGML_TYPE_I8) | (1 << GGML_TYPE_F32), }, @@ -67,7 +61,7 @@ constexpr const qnn_device_caps kDeviceCaps[] = { // https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/GpuOpDefSupplement.html#matmul "qnn-gpu", "Qualcomm Adreno GPU", - "libQnnGpu.so", + kQnnGpuLibName, GGML_BACKEND_DEVICE_TYPE_GPU, (1 << GGML_TYPE_F32) | (1 << GGML_TYPE_F16), }, @@ -75,7 +69,7 @@ constexpr const qnn_device_caps kDeviceCaps[] = { // https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/HtpOpDefSupplement.html#matmul "qnn-npu", "Qualcomm NPU", - "libQnnHtp.so", + kQnnNpuLibName, GGML_BACKEND_DEVICE_TYPE_ACCEL, (1 << GGML_TYPE_F32) | (1 << GGML_TYPE_F16) | (1 << GGML_TYPE_I16) | (1 << GGML_TYPE_I8), }, @@ -214,6 +208,8 @@ void ggml_backend_qnn_free(ggml_backend_t backend) { instance->qnn_finalize(); instance.reset(); } + + delete backend; } bool ggml_backend_qnn_cpy_tensor_async(ggml_backend_t backend_src, ggml_backend_t backend_dst, const ggml_tensor *src, @@ -332,42 +328,10 @@ ggml_backend_t ggml_backend_qnn_init_with_device_context(ggml_backend_dev_t dev, const auto device = dev_ctx->device; QNN_LOG_DEBUG("device %s", qnn::get_backend_name(device)); QNN_LOG_DEBUG("extend_lib_search_path %s", extend_lib_search_path); - std::string path = extend_lib_search_path; - -// TODO: Fix this for other platforms -#if defined(__ANDROID__) || defined(ANDROID) - if (device == QNN_BACKEND_NPU) { - if (setenv("LD_LIBRARY_PATH", - (path + ":/vendor/dsp/cdsp:/vendor/lib64:/vendor/dsp/" - "dsp:/vendor/dsp/images") - .c_str(), - 1) == 0) { - QNN_LOG_DEBUG("QNN NPU backend setenv successfully"); - } else { - QNN_LOG_ERROR("QNN NPU backend setenv failure"); - } - if (setenv("ADSP_LIBRARY_PATH", - (path + ";/vendor/dsp/cdsp;/vendor/lib/rfsa/adsp;/system/lib/" - "rfsa/adsp;/vendor/dsp/dsp;/vendor/dsp/images;/dsp") - .c_str(), - 1) == 0) { - QNN_LOG_DEBUG("QNN NPU backend setenv successfully"); - } else { - QNN_LOG_ERROR("QNN NPU backend setenv failure"); - } - } else { - if (setenv("LD_LIBRARY_PATH", path.c_str(), 1) == 0) { - QNN_LOG_DEBUG("%s backend setenv successfully", qnn::get_backend_name(device)); - } else { - QNN_LOG_ERROR("%s backend setenv failure", qnn::get_backend_name(device)); - } - } -#endif - - auto instance = std::make_shared(path, dev_ctx->lib_name, "ggml"); + auto instance = std::make_shared(extend_lib_search_path, dev_ctx->lib_name); auto result = instance->qnn_init(nullptr); if (result != 0) { - QNN_LOG_WARN("init qnn subsystem failed with qnn backend %s, pls check why", qnn::get_backend_name(device)); + QNN_LOG_WARN("failed to init qnn backend %s", qnn::get_backend_name(device)); return nullptr; } auto qnn_interface = instance->get_qnn_interface(); @@ -466,6 +430,7 @@ struct ggml_backend_qnn_reg_impl : ggml_backend_reg { QNN_LOG_DEBUG("qnn backend registry init"); for (size_t i = 0; i < QNN_BACKEND_COUNT; i++) { const auto device_enum = (QNNBackend)(QNN_BACKEND_COUNT - 1 - i); // init from the last device, i.e. NPU +#ifndef GGML_QNN_ENABLE_CPU_BACKEND if (device_enum == QNN_BACKEND_CPU) { /* * here we skip the initialization of CPU device, @@ -473,6 +438,7 @@ struct ggml_backend_qnn_reg_impl : ggml_backend_reg { */ continue; } +#endif device_contexts.emplace_back(std::make_unique( /* .device = */ device_enum, // init from the last device, i.e. NPU diff --git a/ggml/src/ggml-qnn/graph.cpp b/ggml/src/ggml-qnn/graph.cpp index 680f5e23bd..25ce5b8fb2 100644 --- a/ggml/src/ggml-qnn/graph.cpp +++ b/ggml/src/ggml-qnn/graph.cpp @@ -1,7 +1,7 @@ #include "graph.hpp" -#include +#include #include #include "ggml-impl.h" @@ -106,13 +106,29 @@ bool bind_src_tensors(ggml_tensor *op, qnn::qnn_tensor_array_t &tensor_wrappers, return true; } +/** + * @brief Extracts input and output tensors from a computational graph. + * + * This function identifies the input and output tensors of a computational graph by analyzing the connectivity between + * tensor nodes. It does this by iterating over each node in the graph, using a connectivity map that associates every + * tensor with its number of incoming connections (in_degree), outgoing connections (out_degree), and an insertion index + * that preserves order. The insertion index is used later to sort the tensors in their original discovery order. + * + * TODO: this algorithm is not perfect and may not work for all cases. It assumes that the tensors are + * connected in a way that allows for unambiguous categorization. + * It also assumes that the tensors are connected in a way that allows for unambiguous categorization. + */ int get_io_tensors_from_graph(const ggml_cgraph *cgraph, qnn::ggml_tensor_array_t &inputs, qnn::ggml_tensor_array_t &outputs) { - using ggml_tensor_set_t = std::set; + struct _tensor_connectivity_info { + size_t in_degree = 0; + size_t out_degree = 0; + size_t insert_index = 0; + }; - ggml_tensor_set_t input_set; - ggml_tensor_set_t output_set; - ggml_tensor_set_t visited_set; + using ggml_tensor_connectivity_map_t = std::unordered_map; + + ggml_tensor_connectivity_map_t connectivity_map; int rank = 0; for (int i = 0; i < cgraph->n_nodes; i++) { ggml_tensor *dst = cgraph->nodes[i]; @@ -126,25 +142,50 @@ int get_io_tensors_from_graph(const ggml_cgraph *cgraph, qnn::ggml_tensor_array_ } rank = std::max(rank, ggml_n_dims(dst)); - input_set.erase(dst); - if (!visited_set.count(dst)) { - output_set.insert(dst); - visited_set.insert(dst); + if (connectivity_map.count(dst) == 0) { + connectivity_map[dst] = { + 1, // in-degree, at least 1 + 0, + connectivity_map.size(), + }; + } else { + ++(connectivity_map[dst].in_degree); } for (size_t i = 0; i < GGML_MAX_DIMS && dst->src[i]; ++i) { auto *src = dst->src[i]; rank = std::max(rank, ggml_n_dims(src)); - output_set.erase(src); - if (!visited_set.count(src)) { - input_set.insert(src); - visited_set.insert(src); + + if (connectivity_map.count(src) == 0) { + connectivity_map[src] = { + 0, + 1, // out-degree, at least 1 + connectivity_map.size(), + }; + } else { + ++(connectivity_map[src].out_degree); } } } - inputs.assign(input_set.begin(), input_set.end()); - outputs.assign(output_set.begin(), output_set.end()); + for (const auto &kv : connectivity_map) { + if (kv.second.in_degree == 0) { + inputs.push_back(kv.first); + } + + if (kv.second.out_degree == 0) { + outputs.push_back(kv.first); + } + } + + std::sort(inputs.begin(), inputs.end(), [&connectivity_map](ggml_tensor *lhs, ggml_tensor *rhs) { + return connectivity_map[lhs].insert_index < connectivity_map[rhs].insert_index; + }); + + std::sort(outputs.begin(), outputs.end(), [&connectivity_map](ggml_tensor *lhs, ggml_tensor *rhs) { + return connectivity_map[lhs].insert_index < connectivity_map[rhs].insert_index; + }); + return rank; } @@ -187,7 +228,7 @@ qnn_graph::qnn_graph(const std::string &graph_name, QNNBackend device, std::shar QnnHtpGraph_CustomConfig_t vtcm_config; vtcm_config.option = QNN_HTP_GRAPH_CONFIG_OPTION_VTCM_SIZE; - vtcm_config.vtcmSizeInMB = vtcm_size_in_mb; + vtcm_config.vtcmSizeInMB = (uint32_t)vtcm_size_in_mb; QnnGraph_Config_t graph_vtcm_config; graph_vtcm_config.option = QNN_GRAPH_CONFIG_OPTION_CUSTOM; graph_vtcm_config.customConfig = &vtcm_config; diff --git a/ggml/src/ggml-qnn/logger.cpp b/ggml/src/ggml-qnn/logger.cpp index 1e781721d6..23a3f305c0 100644 --- a/ggml/src/ggml-qnn/logger.cpp +++ b/ggml/src/ggml-qnn/logger.cpp @@ -1,8 +1,7 @@ #include "logger.hpp" -#include - +#include #include #if defined(__ANDROID__) || defined(ANDROID) @@ -23,10 +22,12 @@ void qnn::internal_log(ggml_log_level level, const char * /*file*/, const char * int len = vsnprintf(s_qnn_internal_log_buf + len_prefix, QNN_LOGBUF_LEN - len_prefix, format, args); if (len < (QNN_LOGBUF_LEN - len_prefix)) { #if defined(__ANDROID__) || defined(ANDROID) - // for Android APK + // print to android logcat __android_log_print(level, "ggml-qnn", "%s\n", s_qnn_internal_log_buf); +#else + (void)level; #endif - // for Android command line application or WoA(Windows on ARM) + // print to stdout printf("%s\n", s_qnn_internal_log_buf); } va_end(args); @@ -36,7 +37,7 @@ void qnn::internal_log(ggml_log_level level, const char * /*file*/, const char * #if ENABLE_QNNSDK_LOG void qnn::sdk_logcallback(const char *fmt, QnnLog_Level_t level, uint64_t /*timestamp*/, va_list argp) { static std::mutex log_mutex; - static unsigned char s_ggml_qnn_logbuf[QNN_LOGBUF_LEN]; + static char s_ggml_qnn_logbuf[QNN_LOGBUF_LEN]; const char *log_level_desc = ""; switch (level) { @@ -62,9 +63,7 @@ void qnn::sdk_logcallback(const char *fmt, QnnLog_Level_t level, uint64_t /*time { std::lock_guard lock(log_mutex); - - memset(s_ggml_qnn_logbuf, 0, QNN_LOGBUF_LEN); - vsnprintf(reinterpret_cast(s_ggml_qnn_logbuf), QNN_LOGBUF_LEN, fmt, argp); + vsnprintf(s_ggml_qnn_logbuf, QNN_LOGBUF_LEN, fmt, argp); QNN_LOG_INFO("[%s]%s", log_level_desc, s_ggml_qnn_logbuf); } } diff --git a/ggml/src/ggml-qnn/op-config-caps.cpp b/ggml/src/ggml-qnn/op-config-caps.cpp index 9b28a76dd1..b250c214a3 100644 --- a/ggml/src/ggml-qnn/op-config-caps.cpp +++ b/ggml/src/ggml-qnn/op-config-caps.cpp @@ -5,17 +5,17 @@ namespace { using op_constructor_t = std::shared_ptr (*)(const ggml_tensor *, const std::string &, std::shared_ptr); -using op_dims_calc_func_t = void (*)(const std::vector &input_dims, +using op_dims_calc_func_t = void (*)(const std::vector &input_dims, qnn::ggml_dimension_array_t &output_dims); -void element_wise_op_dims(const std::vector &input_dims, +void element_wise_op_dims(const std::vector &input_dims, qnn::ggml_dimension_array_t &output_dims) { for (size_t i = 1; i < std::size(output_dims); i++) { output_dims[i] = input_dims.front()[i]; } } -void mat_mul_op_dims(const std::vector &input_dims, +void mat_mul_op_dims(const std::vector &input_dims, qnn::ggml_dimension_array_t &output_dims) { GGML_ASSERT(input_dims.size() == 2); output_dims[0] = input_dims.front()[1]; @@ -374,15 +374,6 @@ size_t get_qnn_op_index(const ggml_tensor *tensor) { return tensor->op; } -void get_ggml_op_output_dimensions(const std::vector &input_dims, const ggml_tensor *op, - ggml_dimension_array_t &output_dims) { - auto op_index = get_qnn_op_index(op); - GGML_ASSERT(op_index < std::size(kOpCaps)); - auto get_dims = kOpCaps[op_index].calc_dims_func; - GGML_ASSERT(get_dims); - get_dims(input_dims, output_dims); -} - const char *get_qnn_op_name(const ggml_tensor *op) { auto op_index = get_qnn_op_index(op); GGML_ASSERT(op_index < std::size(kOpCaps)); diff --git a/ggml/src/ggml-qnn/op-config-impl.cpp b/ggml/src/ggml-qnn/op-config-impl.cpp index 19a1bf46ee..934dbadfdc 100644 --- a/ggml/src/ggml-qnn/op-config-impl.cpp +++ b/ggml/src/ggml-qnn/op-config-impl.cpp @@ -276,7 +276,7 @@ qnn_tensor_ptr_t ggml_qnn_matmul_op_config::create_gather_nodes(QNNBackend devic auto index_buffer = std::make_shared(dimensions[axis] * sizeof(uint32_t)); for (uint32_t *curr = reinterpret_cast(index_buffer->get_buffer()), *end = curr + dimensions[axis]; curr < end; curr++) { - *curr = (curr - reinterpret_cast(index_buffer->get_buffer())) / scale; + *curr = uint32_t((curr - reinterpret_cast(index_buffer->get_buffer())) / scale); } auto gather_index = std::make_shared( diff --git a/ggml/src/ggml-qnn/op-config.hpp b/ggml/src/ggml-qnn/op-config.hpp index 075c56fed6..6b8c6946b8 100644 --- a/ggml/src/ggml-qnn/op-config.hpp +++ b/ggml/src/ggml-qnn/op-config.hpp @@ -15,9 +15,6 @@ namespace qnn { constexpr const size_t kGgmlUnaryOpStart = GGML_OP_COUNT; size_t get_qnn_op_index(const ggml_tensor *tensor); -void get_ggml_op_output_dimensions(const std::vector &input_dims, const ggml_tensor *op, - ggml_dimension_array_t &output_dims); - const char *get_qnn_op_name(const ggml_tensor *op); size_t get_qnn_op_input_param_count(const ggml_tensor *op); std::shared_ptr create_op(const ggml_tensor *op, const std::string &name, diff --git a/ggml/src/ggml-qnn/qnn-lib.cpp b/ggml/src/ggml-qnn/qnn-lib.cpp index a7553c4ac2..1f9a68333c 100644 --- a/ggml/src/ggml-qnn/qnn-lib.cpp +++ b/ggml/src/ggml-qnn/qnn-lib.cpp @@ -1,35 +1,536 @@ #include "qnn-lib.hpp" +#include + +#if defined(__linux__) +#include +#endif + +namespace { + +#ifdef _WIN32 +constexpr const char *kQnnSystemLibName = "QnnSystem.dll"; +constexpr const char *kQnnRpcLibName = "libcdsprpc.dll"; +#else +constexpr const char *kQnnSystemLibName = "libQnnSystem.so"; +constexpr const char *kQnnRpcLibName = "libcdsprpc.so"; + +#endif + +void insert_path(std::string &path, std::string insert_path, const char separator = ':') { + if (!insert_path.empty() && !path.empty()) { + insert_path += separator; + } + + path.insert(0, insert_path); +} + +// TODO: Fix this for other platforms, or use a more portable way to set the library search path +bool set_qnn_lib_search_path(const std::string &custom_lib_search_path) { +#if defined(__linux__) + { + auto *original = getenv("LD_LIBRARY_PATH"); + std::string lib_search_path = original ? original : ""; + insert_path(lib_search_path, + "/vendor/dsp/cdsp:/vendor/lib64:" + "/vendor/dsp/dsp:/vendor/dsp/images"); + insert_path(lib_search_path, custom_lib_search_path); + if (setenv("LD_LIBRARY_PATH", lib_search_path.c_str(), 1)) { + return false; + } + } + +#if defined(__ANDROID__) || defined(ANDROID) + { + // See also: https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-2/dsp_runtime.html + std::string adsp_lib_search_path = custom_lib_search_path + + ";/vendor/dsp/cdsp;/vendor/lib/rfsa/adsp;/system/lib/" + "rfsa/adsp;/vendor/dsp/dsp;/vendor/dsp/images;/dsp"; + if (setenv("ADSP_LIBRARY_PATH", adsp_lib_search_path.c_str(), 1)) { + return false; + } + + QNN_LOG_DEBUG("ADSP_LIBRARY_PATH=%s", getenv("ADSP_LIBRARY_PATH")); + } +#endif + + QNN_LOG_DEBUG("LD_LIBRARY_PATH=%s", getenv("LD_LIBRARY_PATH")); +#else + (void)custom_lib_search_path; +#endif + + return true; +} + +qnn::dl_handler_t load_lib_with_fallback(const std::string &lib_path, const std::string &load_directory) { + std::filesystem::path full_path(load_directory); + full_path /= std::filesystem::path(lib_path).filename(); + auto handle = qnn::dl_load(full_path.string()); + if (!handle) { + QNN_LOG_WARN("failed to load %s, fallback to %s", full_path.c_str(), lib_path.c_str()); + handle = qnn::dl_load(lib_path); + } + + return handle; +} + +} // namespace + namespace qnn { -qnn_system_interface::qnn_system_interface(const QnnSystemInterface_t &qnn_sys_interface, dl_handler_t lib_handle) : - _qnn_sys_interface(qnn_sys_interface), _lib_handle(lib_handle) { +qnn_system_interface::qnn_system_interface(const QnnSystemInterface_t &qnn_sys_interface, dl_handler_t lib_handle) + : _qnn_sys_interface(qnn_sys_interface), _lib_handle(lib_handle) { qnn_system_context_create(&_qnn_system_handle); if (_qnn_system_handle) { - QNN_LOG_INFO("initialize qnn system successfully\n"); + QNN_LOG_INFO("initialize qnn system successfully"); } else { - QNN_LOG_WARN("can not create QNN system contenxt\n"); + QNN_LOG_WARN("can not create QNN system contenxt"); } } qnn_system_interface::~qnn_system_interface() { if (_qnn_system_handle) { if (qnn_system_context_free(_qnn_system_handle) != QNN_SUCCESS) { - QNN_LOG_WARN("failed to free QNN system context\n"); + QNN_LOG_WARN("failed to free QNN system context"); } } else { - QNN_LOG_WARN("system handle is null\n"); + QNN_LOG_WARN("system handle is null"); } if (_lib_handle) { - int dlclose_error = dl_unload(_lib_handle); - if (dlclose_error != 0) { - QNN_LOG_WARN("failed to close QnnSystem library, error %s\n", dl_error()); + if (!dl_unload(_lib_handle)) { + QNN_LOG_WARN("failed to close QnnSystem library, error %s", dl_error()); } } else { - QNN_LOG_WARN("system lib handle is null\n"); + QNN_LOG_WARN("system lib handle is null"); } } +qnn_instance::qnn_instance(const std::string &lib_path, const std::string &backend_lib_name) + : _additional_lib_load_path(lib_path), _backend_lib_name(std::move(backend_lib_name)) { + if (set_qnn_lib_search_path(lib_path)) { + QNN_LOG_DEBUG("[%s] set_qnn_lib_search_path succeed", _backend_lib_name.c_str()); + } else { + QNN_LOG_ERROR("[%s] set_qnn_lib_search_path failed", _backend_lib_name.c_str()); + } +} + +int qnn_instance::qnn_init(const QnnSaver_Config_t **saver_config) { + BackendIdType backend_id = QNN_BACKEND_ID_NULL; + QNN_LOG_DEBUG("enter qnn_init"); + + std::lock_guard lock(_init_mutex); + if (load_system() != 0) { + QNN_LOG_WARN("failed to load QNN system lib"); + return 1; + } else { + QNN_LOG_DEBUG("load QNN system lib successfully"); + } + + std::string backend_lib_path = _backend_lib_name; + if (_lib_path_to_backend_id.count(backend_lib_path) == 0) { + if (load_backend(backend_lib_path, saver_config) != 0) { + QNN_LOG_WARN("failed to load QNN backend"); + return 2; + } + } + + backend_id = _lib_path_to_backend_id[backend_lib_path]; + if (_loaded_backend.count(backend_id) == 0 || _loaded_lib_handle.count(backend_id) == 0) { + QNN_LOG_WARN( + "library %s is loaded but loaded backend count=%zu, " + "loaded lib_handle count=%zu", + backend_lib_path.c_str(), _loaded_backend.count(backend_id), _loaded_lib_handle.count(backend_id)); + return 3; + } + + _qnn_interface = std::make_shared(*_loaded_backend[backend_id]); + _qnn_interface->qnn_log_create(qnn::sdk_logcallback, _qnn_log_level, &_qnn_log_handle); + if (!_qnn_log_handle) { + // NPU backend not work on Qualcomm SoC equipped low-end phone + QNN_LOG_WARN("why failed to initialize qnn log"); + return 4; + } else { + QNN_LOG_DEBUG("initialize qnn log successfully"); + } + + std::vector temp_backend_config; + _qnn_interface->qnn_backend_create( + _qnn_log_handle, temp_backend_config.empty() ? nullptr : temp_backend_config.data(), &_qnn_backend_handle); + if (!_qnn_backend_handle) { + QNN_LOG_WARN("why failed to initialize qnn backend"); + return 5; + } else { + QNN_LOG_DEBUG("initialize qnn backend successfully"); + } + + auto qnn_status = _qnn_interface->qnn_property_has_capability(QNN_PROPERTY_GROUP_DEVICE); + if (QNN_PROPERTY_NOT_SUPPORTED == qnn_status) { + QNN_LOG_WARN("device property is not supported"); + } + if (QNN_PROPERTY_ERROR_UNKNOWN_KEY == qnn_status) { + QNN_LOG_WARN("device property is not known to backend"); + } + + qnn_status = QNN_SUCCESS; + if (_backend_lib_name.find("Htp") != _backend_lib_name.npos) { + const QnnDevice_PlatformInfo_t *p_info = nullptr; + qnn_status = _qnn_interface->qnn_device_get_platform_info(nullptr, &p_info); + if (qnn_status == QNN_SUCCESS) { + QNN_LOG_INFO("device counts %d", p_info->v1.numHwDevices); + QnnDevice_HardwareDeviceInfo_t *infos = p_info->v1.hwDevices; + QnnHtpDevice_OnChipDeviceInfoExtension_t chipinfo = {}; + for (uint32_t i = 0; i < p_info->v1.numHwDevices; i++) { + QNN_LOG_INFO("deviceID:%d, deviceType:%d, numCores %d", infos[i].v1.deviceId, infos[i].v1.deviceType, + infos[i].v1.numCores); + QnnDevice_DeviceInfoExtension_t devinfo = infos[i].v1.deviceInfoExtension; + chipinfo = devinfo->onChipDevice; + size_t htp_arch = (size_t)chipinfo.arch; + QNN_LOG_INFO("htp_type:%d(%s)", devinfo->devType, + (devinfo->devType == QNN_HTP_DEVICE_TYPE_ON_CHIP) ? "ON_CHIP" : ""); + QNN_LOG_INFO("qualcomm soc_model:%d(%s), htp_arch:%d(%s), vtcm_size:%d MB", chipinfo.socModel, + qnn::get_chipset_desc(chipinfo.socModel), htp_arch, qnn::get_htparch_desc(htp_arch), + chipinfo.vtcmSize); + _soc_info = {chipinfo.socModel, htp_arch, chipinfo.vtcmSize}; + } + _qnn_interface->qnn_device_free_platform_info(nullptr, p_info); + } else { + // For emulator, we can't get platform info + QNN_LOG_WARN("failed to get platform info, are we in emulator?"); + _soc_info = {NONE, UNKNOWN_SM, 0}; + } + + QnnHtpDevice_CustomConfig_t soc_customconfig; + soc_customconfig.option = QNN_HTP_DEVICE_CONFIG_OPTION_SOC; + soc_customconfig.socModel = _soc_info.soc_model; + QnnDevice_Config_t soc_devconfig; + soc_devconfig.option = QNN_DEVICE_CONFIG_OPTION_CUSTOM; + soc_devconfig.customConfig = &soc_customconfig; + + QnnHtpDevice_CustomConfig_t arch_customconfig; + arch_customconfig.option = QNN_HTP_DEVICE_CONFIG_OPTION_ARCH; + arch_customconfig.arch.arch = (QnnHtpDevice_Arch_t)_soc_info.htp_arch; + arch_customconfig.arch.deviceId = 0; // Id of device to be used. 0 will use by default. + QnnDevice_Config_t arch_devconfig; + arch_devconfig.option = QNN_DEVICE_CONFIG_OPTION_CUSTOM; + arch_devconfig.customConfig = &arch_customconfig; + + const QnnDevice_Config_t *p_deviceconfig[] = {&soc_devconfig, &arch_devconfig, nullptr}; + qnn_status = _qnn_interface->qnn_device_create(_qnn_log_handle, p_deviceconfig, &_qnn_device_handle); + } else { + qnn_status = _qnn_interface->qnn_device_create(_qnn_log_handle, nullptr, &_qnn_device_handle); + } + if (QNN_SUCCESS != qnn_status && QNN_DEVICE_ERROR_UNSUPPORTED_FEATURE != qnn_status) { + QNN_LOG_WARN("failed to create QNN device"); + } else { + QNN_LOG_INFO("create QNN device successfully"); + } + + if (_profile_level != sdk_profile_level::profile_off) { + QNN_LOG_INFO("profiling turned on; level = %d", _profile_level); + auto profile_level = + _profile_level == sdk_profile_level::profile_detail ? QNN_PROFILE_LEVEL_DETAILED : QNN_PROFILE_LEVEL_BASIC; + + if (QNN_PROFILE_NO_ERROR != + _qnn_interface->qnn_profile_create(_qnn_backend_handle, profile_level, &_qnn_profile_handle)) { + QNN_LOG_WARN("unable to create profile handle in the backend"); + return 6; + } else { + QNN_LOG_DEBUG("initialize qnn profile successfully"); + } + } + + _rpc_lib_handle = load_lib_with_fallback(kQnnRpcLibName, _additional_lib_load_path); + if (_rpc_lib_handle) { + _pfn_rpc_mem_alloc = reinterpret_cast(dl_sym(_rpc_lib_handle, "rpcmem_alloc")); + _pfn_rpc_mem_free = reinterpret_cast(dl_sym(_rpc_lib_handle, "rpcmem_free")); + _pfn_rpc_mem_to_fd = reinterpret_cast(dl_sym(_rpc_lib_handle, "rpcmem_to_fd")); + if (!_pfn_rpc_mem_alloc || !_pfn_rpc_mem_free || !_pfn_rpc_mem_to_fd) { + QNN_LOG_WARN("unable to access symbols in QNN RPC lib. error: %s", dl_error()); + dl_unload(_rpc_lib_handle); + return 9; + } + + _pfn_rpc_mem_init = reinterpret_cast(dl_sym(_rpc_lib_handle, "rpcmem_init")); + _pfn_rpc_mem_deinit = reinterpret_cast(dl_sym(_rpc_lib_handle, "rpcmem_deinit")); + if (_pfn_rpc_mem_init) { + _pfn_rpc_mem_init(); + } + + _rpcmem_initialized = true; + QNN_LOG_DEBUG("load rpcmem lib successfully"); + } else { + QNN_LOG_WARN("failed to load qualcomm rpc lib, skipping, error:%s", dl_error()); + } + + /* TODO: not used, keep it for further usage + QnnContext_Config_t qnn_context_config = QNN_CONTEXT_CONFIG_INIT; + qnn_context_config.priority = QNN_PRIORITY_DEFAULT; + const QnnContext_Config_t * context_configs[] = {&qnn_context_config, nullptr}; + */ + _qnn_interface->qnn_context_create(_qnn_backend_handle, _qnn_device_handle, nullptr, &_qnn_context_handle); + if (nullptr == _qnn_context_handle) { + QNN_LOG_WARN("why failed to initialize qnn context"); + return 10; + } else { + QNN_LOG_DEBUG("initialize qnn context successfully"); + } + + if (_backend_lib_name.find("Htp") != _backend_lib_name.npos) { + // TODO: faster approach to probe the accurate capacity of rpc ion memory + size_t candidate_size = 0; + uint8_t *rpc_buffer = nullptr; + const int size_in_mb = (1 << 20); + size_t probe_slots[] = {1024, 1536, 2048 - 48, 2048}; + size_t probe_counts = sizeof(probe_slots) / sizeof(size_t); + for (size_t idx = 0; idx < probe_counts; idx++) { + rpc_buffer = static_cast(alloc_rpcmem(probe_slots[idx] * size_in_mb, sizeof(void *))); + if (!rpc_buffer) { + QNN_LOG_DEBUG("alloc rpcmem %d (MB) failure, %s", probe_slots[idx], strerror(errno)); + break; + } else { + candidate_size = probe_slots[idx]; + free_rpcmem(rpc_buffer); + rpc_buffer = nullptr; + } + } + + _rpcmem_capacity = std::max(candidate_size, _rpcmem_capacity); + QNN_LOG_INFO("capacity of QNN rpc ion memory is about %d MB", _rpcmem_capacity); + + if (init_htp_perfinfra() != 0) { + QNN_LOG_WARN("initialize HTP performance failure"); + } + if (set_rpc_polling() != 0) { + QNN_LOG_WARN("set RPC polling failure"); + } + if (set_high_performance_mode() != 0) { + QNN_LOG_WARN("set HTP high performance mode failure"); + } + } + + QNN_LOG_DEBUG("leave qnn_init"); + + return 0; +} + +int qnn_instance::qnn_finalize() { + int ret_status = 0; + Qnn_ErrorHandle_t error = QNN_SUCCESS; + + if (_rpc_lib_handle) { + if (_pfn_rpc_mem_deinit) { + _pfn_rpc_mem_deinit(); + _pfn_rpc_mem_deinit = nullptr; + } + + if (dl_unload(_rpc_lib_handle)) { + QNN_LOG_DEBUG("succeed to close rpcmem lib"); + } else { + QNN_LOG_WARN("failed to unload qualcomm's rpc lib, error:%s", dl_error()); + } + } + + if (_backend_lib_name.find("Htp") != _backend_lib_name.npos) { + _qnn_htp_perfinfra->destroyPowerConfigId(_qnn_power_configid); + } + + if (_qnn_context_handle) { + error = _qnn_interface->qnn_context_free(_qnn_context_handle, _qnn_profile_handle); + if (error != QNN_SUCCESS) { + QNN_LOG_WARN("failed to free QNN context_handle: ID %u, error %d", _qnn_interface->get_backend_id(), + QNN_GET_ERROR_CODE(error)); + } + _qnn_context_handle = nullptr; + } + + if (_qnn_profile_handle) { + error = _qnn_interface->qnn_profile_free(_qnn_profile_handle); + if (error != QNN_SUCCESS) { + QNN_LOG_WARN("failed to free QNN profile_handle: ID %u, error %d", _qnn_interface->get_backend_id(), + QNN_GET_ERROR_CODE(error)); + } + _qnn_profile_handle = nullptr; + } + + if (_qnn_device_handle) { + error = _qnn_interface->qnn_device_free(_qnn_device_handle); + if (error != QNN_SUCCESS) { + QNN_LOG_WARN("failed to free QNN device_handle: ID %u, error %d", _qnn_interface->get_backend_id(), + QNN_GET_ERROR_CODE(error)); + } + _qnn_device_handle = nullptr; + } + + if (_qnn_backend_handle) { + error = _qnn_interface->qnn_backend_free(_qnn_backend_handle); + if (error != QNN_SUCCESS) { + QNN_LOG_WARN("failed to free QNN backend_handle: ID %u, error %d", _qnn_interface->get_backend_id(), + QNN_GET_ERROR_CODE(error)); + } + _qnn_backend_handle = nullptr; + } + + if (nullptr != _qnn_log_handle) { + error = _qnn_interface->qnn_log_free(_qnn_log_handle); + if (error != QNN_SUCCESS) { + QNN_LOG_WARN("failed to free QNN log_handle: ID %u, error %d", _qnn_interface->get_backend_id(), + QNN_GET_ERROR_CODE(error)); + } + _qnn_log_handle = nullptr; + } + + unload_backend(); + + _qnn_sys_interface.reset(); + + return ret_status; +} + +int qnn_instance::load_system() { + QNN_LOG_DEBUG("[%s]lib: %s", _backend_lib_name.c_str(), kQnnSystemLibName); + auto system_lib_handle = load_lib_with_fallback(kQnnSystemLibName, _additional_lib_load_path); + if (!system_lib_handle) { + QNN_LOG_WARN("can not load QNN library %s, error: %s", kQnnSystemLibName, dl_error()); + return 1; + } + + auto *get_providers = + dl_sym_typed(system_lib_handle, "QnnSystemInterface_getProviders"); + if (!get_providers) { + QNN_LOG_WARN("can not load QNN symbol QnnSystemInterface_getProviders: %s", dl_error()); + return 2; + } + + uint32_t num_providers = 0; + const QnnSystemInterface_t **provider_list = nullptr; + Qnn_ErrorHandle_t error = get_providers(&provider_list, &num_providers); + if (error != QNN_SUCCESS) { + QNN_LOG_WARN("failed to get providers, error %d", QNN_GET_ERROR_CODE(error)); + return 3; + } + + QNN_LOG_DEBUG("num_providers: %d", num_providers); + if (num_providers != _required_num_providers) { + QNN_LOG_WARN("providers is %d instead of required %d", num_providers, _required_num_providers); + return 4; + } + + if (!provider_list) { + QNN_LOG_WARN("can not get providers"); + return 5; + } + + QNN_SYSTEM_INTERFACE_VER_TYPE qnn_system_interface; + bool found_valid_system_interface = false; + for (size_t idx = 0; idx < num_providers; idx++) { + if (QNN_SYSTEM_API_VERSION_MAJOR == provider_list[idx]->systemApiVersion.major && + QNN_SYSTEM_API_VERSION_MINOR <= provider_list[idx]->systemApiVersion.minor) { + found_valid_system_interface = true; + qnn_system_interface = provider_list[idx]->QNN_SYSTEM_INTERFACE_VER_NAME; + break; + } + } + + if (!found_valid_system_interface) { + QNN_LOG_WARN("unable to find a valid qnn system interface"); + return 6; + } else { + QNN_LOG_DEBUG("find a valid qnn system interface"); + } + + auto qnn_sys_interface = std::make_shared(*provider_list[0], system_lib_handle); + if (!qnn_sys_interface->is_valid()) { + QNN_LOG_WARN("failed to create QNN system interface"); + return 7; + } + + _qnn_sys_interface = qnn_sys_interface; + return 0; +} + +int qnn_instance::load_backend(std::string &lib_path, const QnnSaver_Config_t ** /*saver_config*/) { + Qnn_ErrorHandle_t error = QNN_SUCCESS; + QNN_LOG_DEBUG("lib_path:%s", lib_path.c_str()); + + auto lib_handle = load_lib_with_fallback(lib_path, _additional_lib_load_path); + if (!lib_handle) { + QNN_LOG_WARN("can not open QNN library %s, with error: %s", lib_path.c_str(), dl_error()); + return 1; + } + + auto get_providers = dl_sym_typed(lib_handle, "QnnInterface_getProviders"); + if (!get_providers) { + QNN_LOG_WARN("can not load symbol QnnInterface_getProviders : %s", dl_error()); + return 2; + } + + std::uint32_t num_providers = 0; + const QnnInterface_t **provider_list = nullptr; + error = get_providers(&provider_list, &num_providers); + if (error != QNN_SUCCESS) { + QNN_LOG_WARN("failed to get providers, error %d", QNN_GET_ERROR_CODE(error)); + return 3; + } + QNN_LOG_DEBUG("num_providers=%d", num_providers); + if (num_providers != _required_num_providers) { + QNN_LOG_WARN("providers is %d instead of required %d", num_providers, _required_num_providers); + return 4; + } + + if (!provider_list) { + QNN_LOG_WARN("failed to get qnn interface providers"); + return 5; + } + bool found_valid_interface = false; + QNN_INTERFACE_VER_TYPE qnn_interface; + for (size_t idx = 0; idx < num_providers; idx++) { + if (QNN_API_VERSION_MAJOR == provider_list[idx]->apiVersion.coreApiVersion.major && + QNN_API_VERSION_MINOR <= provider_list[idx]->apiVersion.coreApiVersion.minor) { + found_valid_interface = true; + qnn_interface = provider_list[idx]->QNN_INTERFACE_VER_NAME; + break; + } + } + + if (!found_valid_interface) { + QNN_LOG_WARN("unable to find a valid qnn interface"); + return 6; + } else { + QNN_LOG_DEBUG("find a valid qnn interface"); + } + + BackendIdType backend_id = provider_list[0]->backendId; + _lib_path_to_backend_id[lib_path] = backend_id; + if (_loaded_backend.count(backend_id) > 0) { + QNN_LOG_WARN("lib_path %s is loaded, but backend %d already exists", lib_path.c_str(), backend_id); + } + _loaded_backend[backend_id] = provider_list[0]; + if (_loaded_lib_handle.count(backend_id) > 0) { + QNN_LOG_WARN("closing %p", _loaded_lib_handle[backend_id]); + if (!dl_unload(_loaded_lib_handle[backend_id])) { + QNN_LOG_WARN("fail to close %p with error %s", _loaded_lib_handle[backend_id], dl_error()); + } + } + _loaded_lib_handle[backend_id] = lib_handle; + _backend_id = backend_id; + + return 0; +} + +int qnn_instance::unload_backend() { + for (auto &it : _loaded_lib_handle) { + if (!dl_unload(it.second)) { + QNN_LOG_WARN("failed to close QNN backend %d, error %s", it.first, dl_error()); + } + } + + _loaded_lib_handle.clear(); + _lib_path_to_backend_id.clear(); + _loaded_backend.clear(); + + return 0; +} + } // namespace qnn diff --git a/ggml/src/ggml-qnn/qnn-lib.hpp b/ggml/src/ggml-qnn/qnn-lib.hpp index 454c0c6aa3..968df5bcf2 100644 --- a/ggml/src/ggml-qnn/qnn-lib.hpp +++ b/ggml/src/ggml-qnn/qnn-lib.hpp @@ -1,8 +1,10 @@ #pragma once -#include - #include +#include +#include +#include +#include #include #include #include @@ -22,27 +24,12 @@ #include #include +#include "dl_loader.hpp" #include "qnn-types.hpp" #include "utils.hpp" namespace qnn { -// TODO: those function should be moved to a separate file, and have separate implementation for each platform -typedef void *dl_handler_t; - -inline dl_handler_t dl_load(const std::string &lib_path) { return dlopen(lib_path.c_str(), RTLD_NOW | RTLD_LOCAL); } - -inline void *dl_sym(dl_handler_t handle, const std::string &symbol) { return dlsym(handle, symbol.c_str()); } - -inline int dl_unload(dl_handler_t handle) { return dlclose(handle); } - -inline const char *dl_error() { return dlerror(); } - -template -Fn dl_sym_typed(dl_handler_t handle, const std::string &function_name) { - return reinterpret_cast(dl_sym(handle, function_name)); -} - // ================================================================================================= // // wrapper class of Qualcomm QNN(Qualcomm Neural Network, aka Qualcomm AI Engine Direct) SDK @@ -52,6 +39,7 @@ Fn dl_sym_typed(dl_handler_t handle, const std::string &function_name) { // TODO: fix this for other compilers #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wextra-semi" +#pragma GCC diagnostic ignored "-Wpedantic" class qnn_system_interface { @@ -188,273 +176,10 @@ class qnn_instance { public: using BackendIdType = decltype(QnnInterface_t{}.backendId); - explicit qnn_instance(const std::string &lib_path, const std::string &backend_name, const std::string &model_name) - : _lib_path(std::move(lib_path)), _backend_name(std::move(backend_name)), _model_name(std::move(model_name)) {} - + explicit qnn_instance(const std::string &lib_path, const std::string &backend_lib_name); ~qnn_instance() {} - - int qnn_init(const QnnSaver_Config_t **saver_config) { - BackendIdType backend_id = QNN_BACKEND_ID_NULL; - QNN_LOG_DEBUG("enter qnn_init"); - - std::lock_guard lock(_init_mutex); - if (load_system() != 0) { - QNN_LOG_WARN("can not load QNN system lib, pls check why?"); - return 1; - } else { - QNN_LOG_DEBUG("load QNN system lib successfully"); - } - - std::string backend_lib_path = _lib_path + _backend_name; - if (_lib_path_to_backend_id.count(backend_lib_path) == 0) { - int is_load_ok = load_backend(backend_lib_path, saver_config); - if (is_load_ok != 0) { - QNN_LOG_WARN("failed to load QNN backend"); - return 2; - } - } - - backend_id = _lib_path_to_backend_id[backend_lib_path]; - if (_loaded_backend.count(backend_id) == 0 || _loaded_lib_handle.count(backend_id) == 0) { - QNN_LOG_WARN( - "library %s is loaded but loaded backend count=%zu, " - "loaded lib_handle count=%zu", - backend_lib_path.c_str(), _loaded_backend.count(backend_id), _loaded_lib_handle.count(backend_id)); - return 3; - } - - _qnn_interface = std::make_shared(*_loaded_backend[backend_id]); - _qnn_interface->qnn_log_create(qnn::sdk_logcallback, _qnn_log_level, &_qnn_log_handle); - if (nullptr == _qnn_log_handle) { - // NPU backend not work on Qualcomm SoC equipped low-end phone - QNN_LOG_WARN("why failed to initialize qnn log"); - return 4; - } else { - QNN_LOG_DEBUG("initialize qnn log successfully"); - } - - std::vector temp_backend_config; - _qnn_interface->qnn_backend_create( - _qnn_log_handle, temp_backend_config.empty() ? nullptr : temp_backend_config.data(), &_qnn_backend_handle); - if (nullptr == _qnn_backend_handle) { - QNN_LOG_WARN("why failed to initialize qnn backend"); - return 5; - } else { - QNN_LOG_DEBUG("initialize qnn backend successfully"); - } - - Qnn_ErrorHandle_t qnn_status = _qnn_interface->qnn_property_has_capability(QNN_PROPERTY_GROUP_DEVICE); - if (QNN_PROPERTY_NOT_SUPPORTED == qnn_status) { - QNN_LOG_WARN("device property is not supported"); - } - if (QNN_PROPERTY_ERROR_UNKNOWN_KEY == qnn_status) { - QNN_LOG_WARN("device property is not known to backend"); - } - - qnn_status = QNN_SUCCESS; - if (_backend_name.find("Htp") != _backend_name.npos) { - const QnnDevice_PlatformInfo_t *p_info = nullptr; - _qnn_interface->qnn_device_get_platform_info(nullptr, &p_info); - QNN_LOG_INFO("device counts %d", p_info->v1.numHwDevices); - QnnDevice_HardwareDeviceInfo_t *infos = p_info->v1.hwDevices; - QnnHtpDevice_OnChipDeviceInfoExtension_t chipinfo = {}; - for (uint32_t i = 0; i < p_info->v1.numHwDevices; i++) { - QNN_LOG_INFO("deviceID:%d, deviceType:%d, numCores %d", infos[i].v1.deviceId, infos[i].v1.deviceType, - infos[i].v1.numCores); - QnnDevice_DeviceInfoExtension_t devinfo = infos[i].v1.deviceInfoExtension; - chipinfo = devinfo->onChipDevice; - QnnHtpDevice_Arch_t htp_arch = chipinfo.arch; - QNN_LOG_INFO("htp_type:%d(%s)", devinfo->devType, - (devinfo->devType == QNN_HTP_DEVICE_TYPE_ON_CHIP) ? "ON_CHIP" : ""); - QNN_LOG_INFO("qualcomm soc_model:%d(%s), htp_arch:%d(%s), vtcm_size:%d MB", chipinfo.socModel, - qnn::get_chipset_desc(chipinfo.socModel), htp_arch, qnn::get_htparch_desc(htp_arch), - chipinfo.vtcmSize); - _soc_info = {chipinfo.socModel, htp_arch, chipinfo.vtcmSize}; - } - _qnn_interface->qnn_device_free_platform_info(nullptr, p_info); - - QnnHtpDevice_CustomConfig_t soc_customconfig; - soc_customconfig.option = QNN_HTP_DEVICE_CONFIG_OPTION_SOC; - soc_customconfig.socModel = chipinfo.socModel; - QnnDevice_Config_t soc_devconfig; - soc_devconfig.option = QNN_DEVICE_CONFIG_OPTION_CUSTOM; - soc_devconfig.customConfig = &soc_customconfig; - - QnnHtpDevice_CustomConfig_t arch_customconfig; - arch_customconfig.option = QNN_HTP_DEVICE_CONFIG_OPTION_ARCH; - arch_customconfig.arch.arch = chipinfo.arch; - arch_customconfig.arch.deviceId = 0; // Id of device to be used. If single device is used by default 0. - QnnDevice_Config_t arch_devconfig; - arch_devconfig.option = QNN_DEVICE_CONFIG_OPTION_CUSTOM; - arch_devconfig.customConfig = &arch_customconfig; - - const QnnDevice_Config_t *p_deviceconfig[] = {&soc_devconfig, &arch_devconfig, nullptr}; - qnn_status = _qnn_interface->qnn_device_create(_qnn_log_handle, p_deviceconfig, &_qnn_device_handle); - } else { - qnn_status = _qnn_interface->qnn_device_create(_qnn_log_handle, nullptr, &_qnn_device_handle); - } - if (QNN_SUCCESS != qnn_status && QNN_DEVICE_ERROR_UNSUPPORTED_FEATURE != qnn_status) { - QNN_LOG_WARN("failed to create QNN device"); - } else { - QNN_LOG_INFO("create QNN device successfully"); - } - - if (_profile_level != sdk_profile_level::profile_off) { - QNN_LOG_INFO("profiling turned on; level = %d", _profile_level); - auto profile_level = _profile_level == sdk_profile_level::profile_detail ? QNN_PROFILE_LEVEL_DETAILED - : QNN_PROFILE_LEVEL_BASIC; - - if (QNN_PROFILE_NO_ERROR != - _qnn_interface->qnn_profile_create(_qnn_backend_handle, profile_level, &_qnn_profile_handle)) { - QNN_LOG_WARN("unable to create profile handle in the backend"); - return 6; - } else { - QNN_LOG_DEBUG("initialize qnn profile successfully"); - } - } - - _rpc_lib_handle = dl_load("libcdsprpc.so"); - if (nullptr == _rpc_lib_handle) { - QNN_LOG_WARN("failed to load qualcomm's rpc lib, error:%s", dl_error()); - return 8; - } else { - QNN_LOG_DEBUG("load rpcmem lib successfully"); - set_rpcmem_initialized(true); - } - _pfn_rpc_mem_init = reinterpret_cast(dl_sym(_rpc_lib_handle, "rpcmem_init")); - _pfn_rpc_mem_deinit = reinterpret_cast(dl_sym(_rpc_lib_handle, "rpcmem_deinit")); - _pfn_rpc_mem_alloc = reinterpret_cast(dl_sym(_rpc_lib_handle, "rpcmem_alloc")); - _pfn_rpc_mem_free = reinterpret_cast(dl_sym(_rpc_lib_handle, "rpcmem_free")); - _pfn_rpc_mem_to_fd = reinterpret_cast(dl_sym(_rpc_lib_handle, "rpcmem_to_fd")); - if (nullptr == _pfn_rpc_mem_alloc || nullptr == _pfn_rpc_mem_free || nullptr == _pfn_rpc_mem_to_fd) { - QNN_LOG_WARN("unable to access symbols in QNN RPC lib. error: %s", dl_error()); - dl_unload(_rpc_lib_handle); - return 9; - } - - if (nullptr != _pfn_rpc_mem_init) { // make Qualcomm's SoC equipped low-end phone happy - _pfn_rpc_mem_init(); - } - - /* TODO: not used, keep it for further usage - QnnContext_Config_t qnn_context_config = QNN_CONTEXT_CONFIG_INIT; - qnn_context_config.priority = QNN_PRIORITY_DEFAULT; - const QnnContext_Config_t * context_configs[] = {&qnn_context_config, nullptr}; - */ - _qnn_interface->qnn_context_create(_qnn_backend_handle, _qnn_device_handle, nullptr, &_qnn_context_handle); - if (nullptr == _qnn_context_handle) { - QNN_LOG_WARN("why failed to initialize qnn context"); - return 10; - } else { - QNN_LOG_DEBUG("initialize qnn context successfully"); - } - - if (_backend_name.find("Htp") != _backend_name.npos) { - // TODO: faster approach to probe the accurate capacity of rpc ion memory - size_t candidate_size = 0; - uint8_t *rpc_buffer = nullptr; - const int size_in_mb = (1 << 20); - size_t probe_slots[] = {1024, 1536, 2048 - 48, 2048}; - size_t probe_counts = sizeof(probe_slots) / sizeof(size_t); - for (size_t idx = 0; idx < probe_counts; idx++) { - rpc_buffer = static_cast(alloc_rpcmem(probe_slots[idx] * size_in_mb, sizeof(void *))); - if (!rpc_buffer) { - QNN_LOG_DEBUG("alloc rpcmem %d (MB) failure, %s", probe_slots[idx], strerror(errno)); - break; - } else { - candidate_size = probe_slots[idx]; - free_rpcmem(rpc_buffer); - rpc_buffer = nullptr; - } - } - - _rpcmem_capacity = std::max(candidate_size, _rpcmem_capacity); - QNN_LOG_INFO("capacity of QNN rpc ion memory is about %d MB", _rpcmem_capacity); - - if (0 != init_htp_perfinfra()) { - QNN_LOG_WARN("initialize HTP performance failure"); - } - if (0 != set_rpc_polling()) { - QNN_LOG_WARN("set RPC polling failure"); - } - if (0 != set_high_performance_mode()) { - QNN_LOG_WARN("set HTP high performance mode failure"); - } - } - - QNN_LOG_DEBUG("leave qnn_init"); - - return 0; - } - - int qnn_finalize() { - int ret_status = 0; - Qnn_ErrorHandle_t error = QNN_SUCCESS; - - if (nullptr != _pfn_rpc_mem_deinit) // make Qualcomm's SoC equipped low-end phone happy - _pfn_rpc_mem_deinit(); - - if (dl_unload(_rpc_lib_handle) != 0) { - QNN_LOG_WARN("failed to unload qualcomm's rpc lib, error:%s", dl_error()); - } else { - QNN_LOG_DEBUG("succeed to close rpcmem lib"); - } - - if (_backend_name.find("Htp") != _backend_name.npos) { - _qnn_htp_perfinfra->destroyPowerConfigId(_qnn_power_configid); - } - - if (nullptr != _qnn_context_handle) { - error = _qnn_interface->qnn_context_free(_qnn_context_handle, _qnn_profile_handle); - if (error != QNN_SUCCESS) { - QNN_LOG_WARN("failed to free QNN context_handle: ID %u, error %d", _qnn_interface->get_backend_id(), - QNN_GET_ERROR_CODE(error)); - } - _qnn_context_handle = nullptr; - } - - if (nullptr != _qnn_profile_handle) { - error = _qnn_interface->qnn_profile_free(_qnn_profile_handle); - if (error != QNN_SUCCESS) { - QNN_LOG_WARN("failed to free QNN profile_handle: ID %u, error %d", _qnn_interface->get_backend_id(), - QNN_GET_ERROR_CODE(error)); - } - _qnn_profile_handle = nullptr; - } - - if (nullptr != _qnn_device_handle) { - error = _qnn_interface->qnn_device_free(_qnn_device_handle); - if (error != QNN_SUCCESS) { - QNN_LOG_WARN("failed to free QNN device_handle: ID %u, error %d", _qnn_interface->get_backend_id(), - QNN_GET_ERROR_CODE(error)); - } - _qnn_device_handle = nullptr; - } - - if (nullptr != _qnn_backend_handle) { - error = _qnn_interface->qnn_backend_free(_qnn_backend_handle); - if (error != QNN_SUCCESS) { - QNN_LOG_WARN("failed to free QNN backend_handle: ID %u, error %d", _qnn_interface->get_backend_id(), - QNN_GET_ERROR_CODE(error)); - } - _qnn_backend_handle = nullptr; - } - - if (nullptr != _qnn_log_handle) { - error = _qnn_interface->qnn_log_free(_qnn_log_handle); - if (error != QNN_SUCCESS) { - QNN_LOG_WARN("failed to free QNN log_handle: ID %u, error %d", _qnn_interface->get_backend_id(), - QNN_GET_ERROR_CODE(error)); - } - _qnn_log_handle = nullptr; - } - - unload_backend(); - - _qnn_sys_interface.reset(); - - return ret_status; - } + int qnn_init(const QnnSaver_Config_t **saver_config); + int qnn_finalize(); std::shared_ptr get_qnn_interface() { if (!_qnn_interface) { @@ -477,7 +202,7 @@ public: int init_htp_perfinfra() { QnnDevice_Infrastructure_t device_infra = nullptr; - int error = _qnn_interface->qnn_device_get_infrastructure(&device_infra); + auto error = _qnn_interface->qnn_device_get_infrastructure(&device_infra); if (error != QNN_SUCCESS) { QNN_LOG_WARN("failed to get qnn device infra"); return 1; @@ -578,8 +303,6 @@ public: bool is_rpcmem_initialized() { return _rpcmem_initialized; } - void set_rpcmem_initialized(bool initialized) { _rpcmem_initialized = initialized; } - size_t get_rpcmem_capacity() { return _rpcmem_capacity; } void *alloc_rpcmem(size_t bytes, size_t alignment) { @@ -665,7 +388,7 @@ public: } void unregister_rpcmem(Qnn_MemHandle_t mem_handle) { - Qnn_ErrorHandle_t error = _qnn_interface->qnn_mem_de_register(&mem_handle, 1); + auto error = _qnn_interface->qnn_mem_de_register(&mem_handle, 1); if (error != QNN_SUCCESS) { QNN_LOG_WARN("failed to unregister shared memory, error %d", QNN_GET_ERROR_CODE(error)); } @@ -686,163 +409,15 @@ public: const qnn::qcom_socinfo &get_soc_info() { return _soc_info; } private: - int load_system() { - Qnn_ErrorHandle_t error = QNN_SUCCESS; - - std::string system_lib_path = _lib_path + "libQnnSystem.so"; - QNN_LOG_DEBUG("system_lib_path:%s", system_lib_path.c_str()); - - auto system_lib_handle = dl_load(system_lib_path); - if (!system_lib_handle) { - QNN_LOG_WARN("can not load QNN library %s, error: %s", system_lib_path.c_str(), dl_error()); - return 1; - } - - auto *get_providers = dl_sym_typed( - system_lib_handle, "QnnSystemInterface_getProviders"); - if (!get_providers) { - QNN_LOG_WARN("can not load QNN symbol QnnSystemInterface_getProviders: %s", dl_error()); - return 2; - } - - uint32_t num_providers = 0; - const QnnSystemInterface_t **provider_list = nullptr; - error = get_providers(&provider_list, &num_providers); - if (error != QNN_SUCCESS) { - QNN_LOG_WARN("failed to get providers, error %d", QNN_GET_ERROR_CODE(error)); - return 3; - } - - if (num_providers != _required_num_providers) { - QNN_LOG_WARN("providers is %d instead of required %d", num_providers, _required_num_providers); - return 4; - } - - if (!provider_list) { - QNN_LOG_WARN("can not get providers"); - return 5; - } - - QNN_SYSTEM_INTERFACE_VER_TYPE qnn_system_interface; - bool found_valid_system_interface = false; - for (size_t idx = 0; idx < num_providers; idx++) { - if (QNN_SYSTEM_API_VERSION_MAJOR == provider_list[idx]->systemApiVersion.major && - QNN_SYSTEM_API_VERSION_MINOR <= provider_list[idx]->systemApiVersion.minor) { - found_valid_system_interface = true; - qnn_system_interface = provider_list[idx]->QNN_SYSTEM_INTERFACE_VER_NAME; - break; - } - } - if (!found_valid_system_interface) { - QNN_LOG_WARN("unable to find a valid qnn system interface"); - return 6; - } else { - QNN_LOG_DEBUG("find a valid qnn system interface"); - } - - auto qnn_sys_interface = std::make_shared(*provider_list[0], system_lib_handle); - if (!qnn_sys_interface->is_valid()) { - QNN_LOG_WARN("failed to create QNN system interface"); - return 7; - } - - _qnn_sys_interface = qnn_sys_interface; - return 0; - } - - int load_backend(std::string &lib_path, const QnnSaver_Config_t ** /*saver_config*/) { - Qnn_ErrorHandle_t error = QNN_SUCCESS; - QNN_LOG_DEBUG("lib_path:%s", lib_path.c_str()); - - auto lib_handle = dl_load(lib_path.c_str()); - if (!lib_handle) { - QNN_LOG_WARN("can not open QNN library %s, with error: %s", lib_path.c_str(), dl_error()); - return 1; - } - - auto get_providers = - qnn::dl_sym_typed(lib_handle, "QnnInterface_getProviders"); - if (!get_providers) { - QNN_LOG_WARN("can not load symbol QnnInterface_getProviders : %s", dl_error()); - return 2; - } - - std::uint32_t num_providers = 0; - const QnnInterface_t **provider_list = nullptr; - error = get_providers(&provider_list, &num_providers); - if (error != QNN_SUCCESS) { - QNN_LOG_WARN("failed to get providers, error %d", QNN_GET_ERROR_CODE(error)); - return 3; - } - QNN_LOG_DEBUG("num_providers=%d", num_providers); - if (num_providers != _required_num_providers) { - QNN_LOG_WARN("providers is %d instead of required %d", num_providers, _required_num_providers); - return 4; - } - - if (!provider_list) { - QNN_LOG_WARN("failed to get qnn interface providers"); - return 5; - } - bool found_valid_interface = false; - QNN_INTERFACE_VER_TYPE qnn_interface; - for (size_t idx = 0; idx < num_providers; idx++) { - if (QNN_API_VERSION_MAJOR == provider_list[idx]->apiVersion.coreApiVersion.major && - QNN_API_VERSION_MINOR <= provider_list[idx]->apiVersion.coreApiVersion.minor) { - found_valid_interface = true; - qnn_interface = provider_list[idx]->QNN_INTERFACE_VER_NAME; - break; - } - } - - if (!found_valid_interface) { - QNN_LOG_WARN("unable to find a valid qnn interface"); - return 6; - } else { - QNN_LOG_DEBUG("find a valid qnn interface"); - } - - BackendIdType backend_id = provider_list[0]->backendId; - _lib_path_to_backend_id[lib_path] = backend_id; - if (_loaded_backend.count(backend_id) > 0) { - QNN_LOG_WARN("lib_path %s is loaded, but backend %d already exists", lib_path.c_str(), backend_id); - } - _loaded_backend[backend_id] = provider_list[0]; - if (_loaded_lib_handle.count(backend_id) > 0) { - QNN_LOG_WARN("closing %p", _loaded_lib_handle[backend_id]); - int dlclose_error = dl_unload(_loaded_lib_handle[backend_id]); - if (dlclose_error != 0) { - QNN_LOG_WARN("fail to close %p with error %s", _loaded_lib_handle[backend_id], dl_error()); - } - } - _loaded_lib_handle[backend_id] = lib_handle; - _backend_id = backend_id; - - return 0; - } - - int unload_backend() { - int dlclose_error = 0; - for (auto &it : _loaded_lib_handle) { - dlclose_error = dl_unload(it.second); - if (dlclose_error != 0) { - QNN_LOG_WARN("failed to close QNN backend %d, error %s", it.first, dl_error()); - } - } - - _loaded_lib_handle.clear(); - _lib_path_to_backend_id.clear(); - _loaded_backend.clear(); - - return 0; - } + int load_system(); + int load_backend(std::string &lib_path, const QnnSaver_Config_t ** /*saver_config*/); + int unload_backend(); private: static constexpr const int _required_num_providers = 1; - std::string _lib_path; - std::string _backend_name; - std::string _model_name; // Qualcomm's dedicated prebuilt model name, keep it for further usage + std::string _additional_lib_load_path; + std::string _backend_lib_name; BackendIdType _backend_id; QnnLog_Level_t _qnn_log_level = QNN_LOG_LEVEL_DEBUG; @@ -874,17 +449,17 @@ private: std::unordered_map _qnn_rpc_buffer_to_handles; std::mutex _init_mutex; - std::unordered_map _loaded_lib_handle; + std::unordered_map _loaded_lib_handle; std::unordered_map _lib_path_to_backend_id; std::unordered_map _loaded_backend; dl_handler_t _rpc_lib_handle = nullptr; std::atomic_bool _rpcmem_initialized{false}; - qnn::pfn_rpc_mem_alloc _pfn_rpc_mem_alloc; - qnn::pfn_rpc_mem_free _pfn_rpc_mem_free; - qnn::pfn_rpc_mem_to_fd _pfn_rpc_mem_to_fd; - qnn::pfn_rpc_mem_init _pfn_rpc_mem_init; - qnn::pfn_rpc_mem_deinit _pfn_rpc_mem_deinit; + qnn::pfn_rpc_mem_alloc _pfn_rpc_mem_alloc = nullptr; + qnn::pfn_rpc_mem_free _pfn_rpc_mem_free = nullptr; + qnn::pfn_rpc_mem_to_fd _pfn_rpc_mem_to_fd = nullptr; + qnn::pfn_rpc_mem_init _pfn_rpc_mem_init = nullptr; + qnn::pfn_rpc_mem_deinit _pfn_rpc_mem_deinit = nullptr; std::unordered_map _rpcmem_store_map; size_t _rpcmem_capacity = 512; diff --git a/ggml/src/ggml-qnn/tensor.hpp b/ggml/src/ggml-qnn/tensor.hpp index 9720e682c8..423c3ba7fa 100644 --- a/ggml/src/ggml-qnn/tensor.hpp +++ b/ggml/src/ggml-qnn/tensor.hpp @@ -59,7 +59,7 @@ public: return true; } - can_unbind = false; + _can_unbind = false; return false; } @@ -68,7 +68,7 @@ public: return true; } - can_unbind = false; + _can_unbind = false; return false; } @@ -93,7 +93,7 @@ public: } bool bind_ggml_tensor(ggml_tensor *tensor) { - if (!can_unbind) { + if (!_can_unbind) { QNN_LOG_DEBUG("[%s]already has buffer storage, skip bind", _tensor_name.c_str()); return true; } @@ -137,7 +137,7 @@ public: return false; } - if (!can_unbind) { + if (!_can_unbind) { QNN_LOG_DEBUG("[%s]already has buffer storage, stop unbind", _tensor_name.c_str()); return true; } @@ -294,11 +294,14 @@ private: new_tensor_type); } - bool should_use_mem_handle() const { return false; } + bool should_use_mem_handle() const { + // TODO: figure out how to set rpc mem to multiple tensor + return false; + } std::string _tensor_name; qnn_buffer_ptr _buffer; - bool can_unbind = true; + bool _can_unbind = true; QNNBackend _device; std::shared_ptr _qnn_instance; Qnn_Tensor_t _qnn_tensor = qnn_tensor_init(kDefaultQnnTensorVersion); diff --git a/ggml/src/ggml-qnn/utils.cpp b/ggml/src/ggml-qnn/utils.cpp index 6e77ee5f5f..e9aa4d3737 100644 --- a/ggml/src/ggml-qnn/utils.cpp +++ b/ggml/src/ggml-qnn/utils.cpp @@ -1,8 +1,6 @@ #include "utils.hpp" -#include - #include #include "ggml-qnn.h" @@ -10,11 +8,23 @@ #include "QnnGraph.h" #include "qnn-types.hpp" -#ifdef __linux__ +#ifdef _WIN32 +#include +#else #include #include #endif +namespace { + +template +_Ty align_to_generic(size_t alignment, _Ty offset) { + return offset % alignment == 0 ? offset + : offset + (static_cast<_Ty>(alignment) - (offset % static_cast<_Ty>(alignment))); +} + +} // namespace + namespace qnn { qnn_dimension_array_t get_internal_dimension(const ggml_dimension_array_t &dims, uint32_t rank) { @@ -33,7 +43,7 @@ qnn_dimension_array_t get_internal_dimension(const ggml_dimension_array_t &dims, * The ggml tensor will have dimensions [3, 2], while the qnn tensor will have dimensions [2, 3]. */ for (uint32_t i = 0; i < rank; i++) { - internal_dims[i] = std::max(dims[rank - 1 - i], 1); + internal_dims[i] = std::max((uint32_t)dims[rank - 1 - i], 1); } return internal_dims; @@ -219,37 +229,41 @@ const char *get_htparch_desc(size_t htp_arch) { } } -intptr_t align_to(size_t alignment, intptr_t offset) { - return offset % alignment == 0 - ? offset - : offset + (static_cast(alignment) - (offset % static_cast(alignment))); +intptr_t align_to(size_t alignment, intptr_t offset) { return align_to_generic(alignment, offset); } + +uint32_t get_ggml_tensor_data_size(const ggml_tensor *tensor) { return (uint32_t)ggml_nbytes(tensor); } + +#ifdef _WIN32 +static void *_align_alloc(size_t alignment, size_t size) { return _aligned_malloc(size, alignment); } + +static size_t _get_page_size() { + SYSTEM_INFO si; + GetSystemInfo(&si); + return si.dwPageSize; } -uint32_t get_ggml_tensor_data_size(const ggml_tensor *tensor) { return ggml_nbytes(tensor); } +void align_free(void *ptr) { _aligned_free(ptr); } +#else +static void *_align_alloc(size_t alignment, size_t size) { return std::aligned_alloc(alignment, size); } + +static size_t _get_page_size() { return sysconf(_SC_PAGESIZE); } + +void align_free(void *ptr) { std::free(ptr); } +#endif void *page_align_alloc(size_t size) { - // TODO: fix this for other platforms - const size_t alignment = sysconf(_SC_PAGESIZE); - return align_alloc(alignment, size); -} - -void *align_alloc(size_t alignment, size_t size) { - size_t size_aligned = size; - if ((size_aligned % alignment) != 0) { - size_aligned += (alignment - (size_aligned % alignment)); - } - - void *data = std::aligned_alloc(alignment, size_aligned); + const size_t alignment = _get_page_size(); + size_t size_aligned = align_to_generic(alignment, size); + QNN_LOG_DEBUG("_align_alloc success, alignment: %ld, size: %ld, size_aligned: %ld", alignment, size, size_aligned); + void *data = _align_alloc(alignment, size_aligned); if (!data) { - QNN_LOG_WARN("aligned_alloc failed\n"); + QNN_LOG_WARN("_align_alloc failed, alignment: %ld, size: %ld, size_aligned: %ld", alignment, size, size_aligned); return nullptr; } return data; } -void align_free(void *ptr) { std::free(ptr); } - // ================================================================================================= // // QNN backend internal helper functions @@ -359,7 +373,29 @@ const char *get_qnn_error_string(Qnn_ErrorHandle_t error) { } } -#ifdef __linux__ +#ifdef _WIN32 + +size_t get_system_total_memory_in_bytes() { + MEMORYSTATUSEX mem = {}; + mem.dwLength = sizeof(mem); + if (GlobalMemoryStatusEx(&mem)) { + return mem.ullTotalPhys; + } + + return 0; +} + +size_t get_system_free_memory_in_bytes() { + MEMORYSTATUSEX mem = {}; + mem.dwLength = sizeof(mem); + if (GlobalMemoryStatusEx(&mem)) { + return mem.ullAvailPhys; + } + + return 0; +} + +#else size_t get_system_total_memory_in_bytes() { struct sysinfo info = {}; diff --git a/ggml/src/ggml-qnn/utils.hpp b/ggml/src/ggml-qnn/utils.hpp index 1ec0af4c96..cdff53e773 100644 --- a/ggml/src/ggml-qnn/utils.hpp +++ b/ggml/src/ggml-qnn/utils.hpp @@ -1,12 +1,8 @@ #pragma once -#include -#include -#include -#include -#include - #include +#include +#include #include #include "ggml.h" @@ -36,7 +32,6 @@ intptr_t align_to(size_t alignment, intptr_t offset); uint32_t get_ggml_tensor_data_size(const ggml_tensor *tensor); void *page_align_alloc(size_t size); -void *align_alloc(size_t alignment, size_t size); void align_free(void *ptr); const char *opname_from_ggmlop(enum ggml_op ggmlop);