#include "utils.hpp" #include #include "ggml-qnn.h" #include "qnn-types.hpp" #ifdef __linux__ #include #endif namespace qnn { qnn_dimension_array_t get_internal_dimension(const ggml_dimension_array_t &dims, uint32_t rank) { static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS should be 4"); GGML_ASSERT(rank <= GGML_MAX_DIMS && rank > 0); qnn_dimension_array_t internal_dims = {}; /* * Both the ggml and qnn tensor in memory are stored as row-major format. * But the dimensions of the tensor are stored in different order. * For example, a 2x3 matrix: * [ * [1, 2, 3], * [4, 5, 6], * ] * The ggml tensor will have dimensions [3, 2], while the qnn tensor will have dimensions [2, 3]. */ for (uint32_t i = 0; i < rank; i++) { internal_dims[i] = std::max(dims[rank - 1 - i], 1); } return internal_dims; } // TODO: mapping more ggml data type to QNN data type // ref:explanation of k-quants, https://github.com/ggerganov/llama.cpp/pull/1684 Qnn_DataType_t qnn_datatype_from_ggml_datatype(ggml_type ggml_type) { switch (ggml_type) { case GGML_TYPE_F32: return QNN_DATATYPE_FLOAT_32; case GGML_TYPE_F16: return QNN_DATATYPE_FLOAT_16; case GGML_TYPE_I32: return QNN_DATATYPE_INT_32; case GGML_TYPE_I16: return QNN_DATATYPE_INT_16; case GGML_TYPE_I8: return QNN_DATATYPE_INT_8; case GGML_TYPE_Q8_0: return QNN_DATATYPE_SFIXED_POINT_8; case GGML_TYPE_Q4_0: return QNN_DATATYPE_SFIXED_POINT_4; default: break; } return QNN_DATATYPE_UNDEFINED; } ggml_type ggml_datatype_from_qnn_datatype(Qnn_DataType_t qnn_type) { switch (qnn_type) { case QNN_DATATYPE_FLOAT_32: return GGML_TYPE_F32; case QNN_DATATYPE_FLOAT_16: return GGML_TYPE_F16; case QNN_DATATYPE_UINT_32: case QNN_DATATYPE_INT_32: return GGML_TYPE_I32; case QNN_DATATYPE_INT_16: return GGML_TYPE_I16; case QNN_DATATYPE_INT_8: return GGML_TYPE_I8; case QNN_DATATYPE_SFIXED_POINT_8: return GGML_TYPE_Q8_0; case QNN_DATATYPE_SFIXED_POINT_4: return GGML_TYPE_Q4_0; default: break; } return GGML_TYPE_COUNT; } size_t qnn_datatype_size(Qnn_DataType_t qnn_type) { switch (qnn_type) { case QNN_DATATYPE_FLOAT_32: return sizeof(float); case QNN_DATATYPE_FLOAT_16: return sizeof(uint16_t); case QNN_DATATYPE_UINT_32: case QNN_DATATYPE_INT_32: return sizeof(int32_t); case QNN_DATATYPE_INT_16: return sizeof(int16_t); case QNN_DATATYPE_INT_8: return sizeof(int8_t); case QNN_DATATYPE_SFIXED_POINT_8: return sizeof(int8_t); case QNN_DATATYPE_SFIXED_POINT_4: return sizeof(int8_t); default: break; } return 0; } const char *qnn_datatype_to_string(Qnn_DataType_t qnn_type) { switch (qnn_type) { case QNN_DATATYPE_FLOAT_32: return "QNN_DATATYPE_FLOAT_32"; case QNN_DATATYPE_FLOAT_16: return "QNN_DATATYPE_FLOAT_16"; case QNN_DATATYPE_UINT_32: return "QNN_DATATYPE_UINT_32"; case QNN_DATATYPE_INT_32: return "QNN_DATATYPE_INT_32"; case QNN_DATATYPE_INT_16: return "QNN_DATATYPE_INT_16"; case QNN_DATATYPE_INT_8: return "QNN_DATATYPE_INT_8"; case QNN_DATATYPE_SFIXED_POINT_8: return "QNN_DATATYPE_SFIXED_POINT_8"; case QNN_DATATYPE_SFIXED_POINT_4: return "QNN_DATATYPE_SFIXED_POINT_4"; default: break; } return "QNN_DATATYPE_UNDEFINED"; } uint32_t get_ggml_tensor_rank(const ggml_tensor *tensor) { uint32_t rank = 0; for (int i = 0; i < GGML_MAX_DIMS; i++) { if ((0 != tensor->ne[i]) && (1 != tensor->ne[i])) { rank++; } } return rank; } const char *get_ggml_type_name(ggml_type type) { const auto *traits = ggml_get_type_traits(type); return traits->type_name; } const char *get_backend_name(QNNBackend device_index) { switch (device_index) { case QNN_BACKEND_CPU: return "QNN-CPU"; case QNN_BACKEND_GPU: return "QNN-GPU"; case QNN_BACKEND_NPU: return "QNN-NPU"; case QNN_BACKEND_COUNT: default: return "unknown"; } } const char *get_chipset_desc(uint32_t chipset_id) { switch (chipset_id) { case SM8450: return "SM8450"; case SM8475: return "SM8475"; case SM8550: return "SM8550"; case SM8650: return "SM8650"; default: return "unknown"; } } const char *get_htparch_desc(size_t htp_arch) { switch (htp_arch) { case V68: return "QCOM_HTP_V68"; case V69: return "QCOM_HTP_V69"; case V73: return "QCOM_HTP_V73"; case V75: return "QCOM_HTP_V75"; default: return "unknown"; } } intptr_t align_to(size_t alignment, intptr_t offset) { return offset % alignment == 0 ? offset : offset + (static_cast(alignment) - (offset % static_cast(alignment))); } uint32_t get_ggml_tensor_data_size(const ggml_tensor *tensor) { /* size_t data_size = ggml_row_size(tensor->type, tensor->ne[0]); size_t n_dims = qnn_get_ggml_tensor_rank(tensor); for (int i = 1; i < n_dims; i++) { data_size *= tensor->ne[i]; } return data_size; */ return ggml_nbytes(tensor); } void *align_alloc(size_t alignment, size_t size) { size_t size_aligned = size; if ((size_aligned % alignment) != 0) { size_aligned += (alignment - (size_aligned % alignment)); } void *data = std::aligned_alloc(alignment, size_aligned); if (!data) { QNN_LOG_WARN("aligned_alloc failed\n"); return nullptr; } return data; } void align_free(void *ptr) { std::free(ptr); } // ================================================================================================= // // QNN backend internal helper functions // // ================================================================================================= // TODO: only support GGML_OP_ADD/GGML_OP_MUL/GGML_OP_MUL_MAT const char *opname_from_ggmlop(enum ggml_op ggmlop) { switch (ggmlop) { case GGML_OP_ADD: return QNN_OP_ELEMENT_WISE_ADD; case GGML_OP_MUL: return QNN_OP_ELEMENT_WISE_MULTIPLY; case GGML_OP_MUL_MAT: return QNN_OP_MAT_MUL; default: break; } return nullptr; } const char *get_qnn_error_string(Qnn_ErrorHandle_t error) { // A complete list of error codes can be found at here: // https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/api_error_codes.html switch (error) { case QNN_SUCCESS: return "QNN_SUCCESS"; case QNN_COMMON_ERROR_GENERAL: return "QNN_COMMON_ERROR_GENERAL"; // QnnGraph_Error_t case QNN_GRAPH_ERROR_UNSUPPORTED_FEATURE: return "QNN_GRAPH_ERROR_UNSUPPORTED_FEATURE"; case QNN_GRAPH_ERROR_MEM_ALLOC: return "QNN_GRAPH_ERROR_MEM_ALLOC"; case QNN_GRAPH_ERROR_INVALID_ARGUMENT: return "QNN_GRAPH_ERROR_INVALID_ARGUMENT"; case QNN_GRAPH_ERROR_INVALID_HANDLE: return "QNN_GRAPH_ERROR_INVALID_HANDLE"; case QNN_GRAPH_ERROR_GRAPH_DOES_NOT_EXIST: return "QNN_GRAPH_ERROR_GRAPH_DOES_NOT_EXIST"; case QNN_GRAPH_ERROR_INVALID_NAME: return "QNN_GRAPH_ERROR_INVALID_NAME"; case QNN_GRAPH_ERROR_INVALID_TENSOR: return "QNN_GRAPH_ERROR_INVALID_TENSOR"; case QNN_GRAPH_ERROR_INVALID_OP_CONFIG: return "QNN_GRAPH_ERROR_INVALID_OP_CONFIG"; case QNN_GRAPH_ERROR_SET_PROFILE: return "QNN_GRAPH_ERROR_SET_PROFILE"; case QNN_GRAPH_ERROR_UNCONNECTED_NODE: return "QNN_GRAPH_ERROR_UNCONNECTED_NODE"; case QNN_GRAPH_ERROR_CREATE_FAILED: return "QNN_GRAPH_ERROR_CREATE_FAILED"; // QnnOpPackage_Error_t case QNN_OP_PACKAGE_ERROR_LIBRARY_ALREADY_INITIALIZED: return "QNN_OP_PACKAGE_ERROR_LIBRARY_ALREADY_INITIALIZED"; case QNN_OP_PACKAGE_ERROR_LIBRARY_NOT_INITIALIZED: return "QNN_OP_PACKAGE_ERROR_LIBRARY_NOT_INITIALIZED"; case QNN_OP_PACKAGE_ERROR_INVALID_HANDLE: return "QNN_OP_PACKAGE_ERROR_INVALID_HANDLE"; case QNN_OP_PACKAGE_ERROR_INVALID_INFRASTRUCTURE: return "QNN_OP_PACKAGE_ERROR_INVALID_INFRASTRUCTURE"; case QNN_OP_PACKAGE_ERROR_INVALID_INFO: return "QNN_OP_PACKAGE_ERROR_INVALID_INFO"; case QNN_OP_PACKAGE_ERROR_VALIDATION_FAILURE: return "QNN_OP_PACKAGE_ERROR_VALIDATION_FAILURE"; case QNN_OP_PACKAGE_ERROR_INVALID_ARGUMENT: return "QNN_OP_PACKAGE_ERROR_INVALID_ARGUMENT"; default: return nullptr; } } #ifdef __linux__ size_t get_system_total_memory_in_bytes() { auto pages = (size_t)sysconf(_SC_PHYS_PAGES); auto page_size = (size_t)sysconf(_SC_PAGE_SIZE); return pages * page_size; } size_t get_system_free_memory_in_bytes() { auto avail_pages = (size_t)sysconf(_SC_AVPHYS_PAGES); auto page_size = (size_t)sysconf(_SC_PAGE_SIZE); return avail_pages * page_size; } #endif } // namespace qnn