diff --git a/ggml/include/ggml-qnn.h b/ggml/include/ggml-qnn.h index 48194106cf..6d3e66d3d3 100644 --- a/ggml/include/ggml-qnn.h +++ b/ggml/include/ggml-qnn.h @@ -1,24 +1,11 @@ #pragma once #include "ggml-backend.h" -#include "ggml.h" #ifdef __cplusplus extern "C" { #endif -#define GGML_QNN_NAME "qnn" -#define GGML_QNN_MAX_DEVICES QNN_BACKEND_COUNT - -enum QNNBackend { - QNN_BACKEND_CPU = 0, - QNN_BACKEND_GPU, - QNN_BACKEND_NPU, - QNN_BACKEND_COUNT, -}; - -GGML_API bool ggml_backend_is_qnn(ggml_backend_t backend); - GGML_API ggml_backend_reg_t ggml_backend_qnn_reg(void); #ifdef __cplusplus diff --git a/ggml/src/ggml-qnn/CMakeLists.txt b/ggml/src/ggml-qnn/CMakeLists.txt index b3591f903d..3e8fa3a1b8 100644 --- a/ggml/src/ggml-qnn/CMakeLists.txt +++ b/ggml/src/ggml-qnn/CMakeLists.txt @@ -1,9 +1,13 @@ message(STATUS "Using QNN backend") +option(GGML_HEXAGON_NPU_ONLY "ggml-qnn: Only use Hexagon NPU" OFF) +option(GGML_QNN_ENABLE_HEXAGON_BACKEND "ggml-qnn: Enable Hexagon custom package" ${GGML_HEXAGON_NPU_ONLY}) + if(CMAKE_SYSTEM_NAME STREQUAL "Android") find_library(LOG_LIB log) set(QNN_LINK_LIBRARIES ${LOG_LIB}) set(QNN_DEFAULT_LIB_SEARCH_PATH "/data/local/tmp/" CACHE STRING "customized library search path for QNN backend") + add_compile_options(-g -O0) elseif(CMAKE_SYSTEM_NAME STREQUAL "Windows" OR CMAKE_SYSTEM_NAME STREQUAL "Linux") set(QNN_DEFAULT_LIB_SEARCH_PATH "" CACHE STRING "customized library search path for QNN backend") else() @@ -21,15 +25,22 @@ if(NOT DEFINED GGML_QNN_SDK_PATH) endif() message("CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}") +message("CMAKE_CXX_FLAGS_DEBUG: ${CMAKE_CXX_FLAGS_DEBUG}") message("CMAKE_CXX_FLAGS_RELEASE: ${CMAKE_CXX_FLAGS_RELEASE}") message("QNN_SDK_PATH: ${GGML_QNN_SDK_PATH}") -file(GLOB QNN_SOURCES "${CMAKE_CURRENT_LIST_DIR}/*.cpp") +file(GLOB QNN_SOURCES "${CMAKE_CURRENT_LIST_DIR}/qnn/*.cpp") +file(GLOB COMMON_SOURCES "${CMAKE_CURRENT_LIST_DIR}/*.cpp") ggml_add_backend_library(ggml-qnn ${QNN_SOURCES} + ${COMMON_SOURCES} ) -target_include_directories(ggml-qnn PRIVATE ${GGML_QNN_SDK_PATH}/include/QNN ${CMAKE_CURRENT_LIST_DIR}) +target_include_directories(ggml-qnn PRIVATE + ${GGML_QNN_SDK_PATH}/include/QNN + ${CMAKE_CURRENT_LIST_DIR}/qnn + ${CMAKE_CURRENT_LIST_DIR} +) target_link_libraries(ggml-qnn PRIVATE ${QNN_LINK_LIBRARIES}) if(NOT "${QNN_DEFAULT_LIB_SEARCH_PATH}" STREQUAL "") @@ -52,3 +63,99 @@ if(GGML_QNN_ENABLE_PERFORMANCE_TRACKING) else() message("GGML_QNN_ENABLE_PERFORMANCE_TRACKING is disabled") endif() + +add_subdirectory(shared) + +if(GGML_HEXAGON_NPU_ONLY) + message("GGML_HEXAGON_NPU_ONLY is enabled") + add_compile_definitions(GGML_HEXAGON_NPU_ONLY) + set(GGML_QNN_ENABLE_HEXAGON_BACKEND ON) +else() + message("GGML_HEXAGON_NPU_ONLY is disabled") +endif() + +if(GGML_QNN_ENABLE_HEXAGON_BACKEND) + message("GGML_QNN_ENABLE_HEXAGON_BACKEND is enabled") + add_subdirectory(npu) + target_link_libraries(hexagon-npu-host runtime-common) + target_link_libraries(ggml-qnn PRIVATE hexagon-npu-host) +else() + message("GGML_QNN_ENABLE_HEXAGON_BACKEND is disabled") + target_link_libraries(ggml-qnn PRIVATE runtime-common) +endif() + +# Copy QNN dynamic libraries +set(QNN_DYNAMIC_LIBS "") + +if(CMAKE_SYSTEM_NAME STREQUAL "Android" OR CMAKE_SYSTEM_NAME STREQUAL "Linux") + if(CMAKE_SYSTEM_NAME STREQUAL "Android") + # Android + set(QNN_SDK_LIB_PATH "${GGML_QNN_SDK_PATH}/lib/aarch64-android") + elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + # Linux x86_64 + set(QNN_SDK_LIB_PATH "${GGML_QNN_SDK_PATH}/lib/x86_64-linux-clang") + else() + # Linux aarch64 + set(QNN_SDK_LIB_PATH "${GGML_QNN_SDK_PATH}/lib/aarch64-oe-linux-gcc11.2") + endif() + + list(APPEND QNN_DYNAMIC_LIBS "${QNN_SDK_LIB_PATH}/libQnnSystem.so") + list(APPEND QNN_DYNAMIC_LIBS "${QNN_SDK_LIB_PATH}/libQnnCpu.so") + list(APPEND QNN_DYNAMIC_LIBS "${QNN_SDK_LIB_PATH}/libQnnGpu.so") + list(APPEND QNN_DYNAMIC_LIBS "${QNN_SDK_LIB_PATH}/libQnnHtp.so") + file(GLOB HTP_STUB_LIBS "${QNN_SDK_LIB_PATH}/libQnnHtp*.so") + list(APPEND QNN_DYNAMIC_LIBS ${HTP_STUB_LIBS}) + + if(CMAKE_SYSTEM_NAME STREQUAL "Android") + file(GLOB HTP_SKEL_LIBS "${GGML_QNN_SDK_PATH}/lib/hexagon-*/unsigned/libQnnHtp*Skel.so") + list(APPEND QNN_DYNAMIC_LIBS ${HTP_SKEL_LIBS}) + + if(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64") + if(EXISTS "${CMAKE_ANDROID_NDK}/prebuilt/android-arm64/gdbserver/gdbserver") + list(APPEND QNN_DYNAMIC_LIBS "${CMAKE_ANDROID_NDK}/prebuilt/android-arm64/gdbserver/gdbserver") + message("old ndk, copy gdbserver") + else() + file(GLOB LLDB_SERVER "${CMAKE_ANDROID_NDK}/toolchains/llvm/prebuilt/linux-x86_64/lib64/clang/*/lib/linux/aarch64/lldb-server") + list(APPEND QNN_DYNAMIC_LIBS ${LLDB_SERVER}) + message("new ndk, copy lldb-server") + endif() + + file(GLOB OMP_LIBS "${CMAKE_ANDROID_NDK}/toolchains/llvm/prebuilt/linux-x86_64/lib64/clang/*/lib/linux/aarch64/libomp.so") + file(GLOB ASAN_LIBS "${CMAKE_ANDROID_NDK}/toolchains/llvm/prebuilt/linux-x86_64/lib64/clang/*/lib/linux/libclang_rt.asan-aarch64-android.so") + list(APPEND QNN_DYNAMIC_LIBS ${OMP_LIBS}) + list(APPEND QNN_DYNAMIC_LIBS ${ASAN_LIBS}) + endif() + else() + # Linux + list(APPEND QNN_DYNAMIC_LIBS "${QNN_SDK_LIB_PATH}/libHtpPrepare.so") + endif() +elseif(CMAKE_SYSTEM_NAME STREQUAL "Windows") + if(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + # x86_64 + set(QNN_SDK_LIB_PATH "${GGML_QNN_SDK_PATH}/lib/x86_64-windows-msvc") + else() + # aarch64 + set(QNN_SDK_LIB_PATH "${GGML_QNN_SDK_PATH}/lib/aarch64-windows-msvc") + endif() + + list(APPEND QNN_DYNAMIC_LIBS "${QNN_SDK_LIB_PATH}/QnnSystem.dll") + list(APPEND QNN_DYNAMIC_LIBS "${QNN_SDK_LIB_PATH}/QnnCpu.dll") + list(APPEND QNN_DYNAMIC_LIBS "${QNN_SDK_LIB_PATH}/QnnGpu.dll") + list(APPEND QNN_DYNAMIC_LIBS "${QNN_SDK_LIB_PATH}/QnnHtp.dll") + file(GLOB HTP_STUB_LIBS "${QNN_SDK_LIB_PATH}/QnnHtp*.dll") + + if(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + list(APPEND QNN_DYNAMIC_LIBS "${QNN_SDK_LIB_PATH}/HtpPrepare.dll") + endif() + + list(APPEND QNN_DYNAMIC_LIBS ${HTP_STUB_LIBS}) +endif() + +foreach(QNN_DYNAMIC_LIB ${QNN_DYNAMIC_LIBS}) + message("Copy: ${QNN_DYNAMIC_LIB} -> ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}") + add_custom_command( + TARGET ggml-qnn POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy + ${QNN_DYNAMIC_LIB} + ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}) +endforeach() diff --git a/ggml/src/ggml-qnn/backend-ops.hpp b/ggml/src/ggml-qnn/backend-ops.hpp deleted file mode 100644 index 64fb10f00d..0000000000 --- a/ggml/src/ggml-qnn/backend-ops.hpp +++ /dev/null @@ -1,11 +0,0 @@ -#pragma once - -#include "backend.hpp" -#include "ggml.h" - -namespace qnn { - -bool device_supports_op(ggml_backend_qnn_device_context * ctx, const ggml_tensor * op); -bool device_compute_graph(ggml_backend_qnn_device_context * ctx, ggml_cgraph * cgraph); - -} // namespace qnn diff --git a/ggml/src/ggml-qnn/npu/CMakeLists.txt b/ggml/src/ggml-qnn/npu/CMakeLists.txt new file mode 100644 index 0000000000..4c734bb098 --- /dev/null +++ b/ggml/src/ggml-qnn/npu/CMakeLists.txt @@ -0,0 +1,147 @@ +enable_language(ASM) +cmake_policy(SET CMP0115 OLD) + +if(DEFINED ENV{HEXAGON_SDK_ROOT}) + set(HEXAGON_SDK_ROOT $ENV{HEXAGON_SDK_ROOT}) + message("HEXAGON_SDK_ROOT: ${HEXAGON_SDK_ROOT}") +else() + message(FATAL_ERROR "HEXAGON_SDK_ROOT not defined") +endif() + +if(HEXAGON_SDK_ROOT) + include(${HEXAGON_SDK_ROOT}/build/cmake/hexagon_fun.cmake) +else() + include(${HEXAGON_CMAKE_ROOT}/hexagon_fun.cmake) +endif() + +# Base Include dirs for the Project +set(common_incs + ${CMAKE_CURRENT_BINARY_DIR}/ + ${HEXAGON_SDK_ROOT}/incs/ + ${HEXAGON_SDK_ROOT}/incs/stddef/ + ${HEXAGON_SDK_ROOT}/incs/HAP/ + ${HEXAGON_SDK_ROOT}/rtos/qurt/ + ${HEXAGON_SDK_ROOT}/utils/examples/ +) + +include_directories(${common_incs}) + +if(${CMAKE_SYSTEM_NAME} MATCHES "Android|Linux|Windows") + # host build + file(GLOB common_srcs "${CMAKE_CURRENT_LIST_DIR}/common/*.cpp") + file(GLOB host_srcs "${CMAKE_CURRENT_LIST_DIR}/host/*.cpp") + set(stub_srcs "${CMAKE_CURRENT_BINARY_DIR}/npu_device_stub.c") + add_library(hexagon-npu-host STATIC + ${common_srcs} + ${host_srcs} + ${stub_srcs} + ) + + # disable warnings for the stub + set_source_files_properties( + ${stub_srcs} + PROPERTIES + COMPILE_FLAGS "-w" + ) + + build_idl(idl/hexagon_npu.idl hexagon-npu-host) + + # Add compile definitions to the target + target_compile_definitions(hexagon-npu-host PUBLIC + VERIFY_PRINT_ERROR + GGML_QNN_ENABLE_HEXAGON_BACKEND + ) + + target_include_directories(hexagon-npu-host PRIVATE + ${HEXAGON_SDK_ROOT}/ipc/fastrpc/rpcmem/inc/ + ${QNN_SDK_ROOT}/include/QNN/ + ${CMAKE_CURRENT_LIST_DIR}/host/ + ${CMAKE_CURRENT_LIST_DIR}/ + ) + + target_include_directories(hexagon-npu-host PUBLIC + ${HEXAGON_SDK_ROOT}/incs/ # TODO: this is for rpc-mem + ) + + if(NOT ${CMAKE_SYSTEM_NAME} MATCHES "Windows") + set_target_properties(hexagon-npu-host PROPERTIES OUTPUT_NAME "hexagon_npu") + endif() + + if(${CMAKE_SYSTEM_NAME} MATCHES "Android|Linux") + target_link_options(hexagon-npu-host PUBLIC -pie) + endif() + + link_options(hexagon-npu-host) + + if(${CMAKE_SYSTEM_NAME} MATCHES "Android") + set(PREBUILT_LIB_DIR "android_aarch64") + elseif(${CMAKE_SYSTEM_NAME} MATCHES "Linux") + set(PREBUILT_LIB_DIR "UbuntuARM_aarch64") + else() + # Windows + set(PREBUILT_LIB_DIR "windows_aarch64") + endif() + + choose_dsprpc("3" dsprpc) # cdsprpc + link_custom_library(hexagon-npu-host ${dsprpc}) +else() + # hexagon npu build + cmake_minimum_required(VERSION 3.14.3) + project(hexagon_npu C CXX ASM) + + # check if QNN_SDK_ROOT is set + if(NOT DEFINED ENV{QNN_SDK_ROOT}) + message(FATAL_ERROR "QNN_SDK_ROOT not defined") + endif() + + set(QNN_SDK_ROOT $ENV{QNN_SDK_ROOT}) + message("QNN_SDK_ROOT: ${QNN_SDK_ROOT}") + include_directories( + ${QNN_SDK_ROOT}/include/QNN/ + ) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17") + + file(GLOB common_srcs "${CMAKE_CURRENT_LIST_DIR}/common/*.cpp") + file(GLOB device_srcs "${CMAKE_CURRENT_LIST_DIR}/device/*.cpp") + set(skel_srcs "${CMAKE_CURRENT_BINARY_DIR}/npu_device_skel.c") + add_library(hexagon_npu_skel_OBJS OBJECT + ${common_srcs} + ${device_srcs} + ${skel_srcs} + ) + + if(CMAKE_BUILD_TYPE MATCHES "Debug|Dbg") + message("Debug build, enable all logging") + target_compile_definitions(hexagon_npu_skel_OBJS PUBLIC + _DEBUG + DEBUG_LOGGING + ) + else() + message("Release build, disable debug logging") + target_compile_definitions(hexagon_npu_skel_OBJS PUBLIC + NDEBUG + RELEASE_LOGGING + ) + endif() + + build_idl(idl/hexagon_npu.idl hexagon_npu_skel_OBJS) + + # disable warnings for the skel + set_source_files_properties( + ${skel_srcs} + PROPERTIES + COMPILE_FLAGS "-w" + ) + + add_library(hexagon_npu_skel SHARED $) + + target_link_libraries(hexagon_npu_skel + ${HEXAGON_LIB_DIR}/${HEXAGON_ARCH}/G0/pic/libc++abi.a + ${HEXAGON_LIB_DIR}/${HEXAGON_ARCH}/G0/pic/libc++.a + ) + set_target_properties(hexagon_npu_skel PROPERTIES OUTPUT_NAME "hexagon_npu_skel_${HEXAGON_ARCH}") + + copy_binaries(hexagon_npu_skel) +endif() + +# vim: set noet fenc=utf-8 ff=unix ft=cmake : diff --git a/ggml/src/ggml-qnn/npu/device/device.cpp b/ggml/src/ggml-qnn/npu/device/device.cpp new file mode 100644 index 0000000000..2368d44f67 --- /dev/null +++ b/ggml/src/ggml-qnn/npu/device/device.cpp @@ -0,0 +1,173 @@ + +#include +#include +#include + +#include + +#include "graph.hpp" +#include "hexagon_npu.h" +#include "op_impl.hpp" +#include "remote.h" +#include "tensor.hpp" +#include "util.hpp" + +#define NPU_UNUSED(x) (void) (x) + +namespace { + +struct npu_device_context { + int unused = 0; + // TODO: should we add tensor context here? +}; + +inline hexagon::tensor * tensor_from_handle(npu_device_graph_handle_t h) { + return reinterpret_cast(h); +} + +inline npu_device_graph_handle_t tensor_to_handle(hexagon::tensor * tensor) { + return reinterpret_cast(tensor); +} + +inline hexagon::graph * graph_from_handle(npu_device_tensor_handle_t h) { + return reinterpret_cast(h); +} + +inline npu_device_tensor_handle_t graph_to_handle(hexagon::graph * graph) { + return reinterpret_cast(graph); +} + +} // namespace + +int npu_device_open(const char * uri, remote_handle64 * h) { + // TODO: should we have a device context here? + auto * context = new (std::nothrow) npu_device_context(); + if (!context) { + DEVICE_LOG_ERROR("Failed to allocate memory for the npu_device_context"); + return AEE_ENOMEMORY; + } + + *h = reinterpret_cast(context); + return AEE_SUCCESS; +} + +int npu_device_close(remote_handle64 h) { + auto * context = reinterpret_cast(h); + if (!context) { + DEVICE_LOG_ERROR("Invalid npu_device_context handle"); + return AEE_EINVHANDLE; + } + + delete context; + return AEE_SUCCESS; +} + +AEEResult npu_device_device_get_alignment(remote_handle64 _h, uint32_t * alignment) { + NPU_UNUSED(_h); + *alignment = sizeof(HVX_Vector); + return AEE_SUCCESS; +} + +AEEResult npu_device_device_support_op(remote_handle64 _h, const npu_device_tensor_spec * src0, + const npu_device_tensor_spec * src1, const npu_device_tensor_spec * dst, + npu_device_tensor_op op, boolean * is_supported) { + NPU_UNUSED(_h); + *is_supported = hexagon::support_op(*src0, *src1, *dst, op); + return AEE_SUCCESS; +} + +AEEResult npu_device_tensor_init(remote_handle64 _h, const npu_device_tensor_config * info, + npu_device_tensor_handle_t * tensor_handle) { + NPU_UNUSED(_h); + auto * tensor = new (std::nothrow) hexagon::tensor(*info); + if (!tensor) { + DEVICE_LOG_ERROR("Failed to allocate memory for the tensor"); + return AEE_ENOMEMORY; + } + + *tensor_handle = tensor_to_handle(tensor); + return AEE_SUCCESS; +} + +AEEResult npu_device_tensor_set_src(remote_handle64 _h, npu_device_tensor_handle_t tensor_handle, uint64_t index, + npu_device_tensor_handle_t src) { + NPU_UNUSED(_h); + auto * tensor = tensor_from_handle(tensor_handle); + if (!tensor) { + return AEE_EINVHANDLE; + } + + auto * src_tensor = tensor_from_handle(src); + tensor->set_src(index, src_tensor); + return AEE_SUCCESS; +} + +AEEResult npu_device_tensor_set_op(remote_handle64 _h, npu_device_tensor_handle_t tensor_handle, + npu_device_tensor_op op) { + NPU_UNUSED(_h); + auto * tensor = tensor_from_handle(tensor_handle); + if (!tensor) { + return AEE_EINVHANDLE; + } + + tensor->set_op(op); + return AEE_SUCCESS; +} + +AEEResult npu_device_tensor_free(remote_handle64 _h, npu_device_tensor_handle_t tensor_handle) { + NPU_UNUSED(_h); + auto * tensor = tensor_from_handle(tensor_handle); + if (!tensor) { + return AEE_EINVHANDLE; + } + + delete tensor; + return AEE_SUCCESS; +} + +AEEResult npu_device_graph_init(remote_handle64 _h, npu_device_graph_handle_t * graph_handle) { + NPU_UNUSED(_h); + auto * graph = new (std::nothrow) hexagon::graph(); + if (!graph) { + return AEE_ENOMEMORY; + } + + *graph_handle = graph_to_handle(graph); + return AEE_SUCCESS; +} + +AEEResult npu_device_graph_set_tensor(remote_handle64 _h, npu_device_graph_handle_t graph_handle, + const npu_device_tensor_handle_t * tensor_handles, int tensor_handlesLen) { + NPU_UNUSED(_h); + auto * graph = graph_from_handle(graph_handle); + if (!graph || !tensor_handles || tensor_handlesLen <= 0) { + return AEE_EINVHANDLE; + } + + graph->set_tensor(tensor_handles, tensor_handlesLen); + return AEE_SUCCESS; +} + +AEEResult npu_device_graph_compute(remote_handle64 _h, npu_device_graph_handle_t graph_handle) { + NPU_UNUSED(_h); + auto * graph = graph_from_handle(graph_handle); + if (!graph) { + return AEE_EINVHANDLE; + } + + if (!graph->compute()) { + return AEE_EFAILED; + } + + return AEE_SUCCESS; +} + +AEEResult npu_device_graph_free(remote_handle64 _h, npu_device_graph_handle_t graph_handle) { + NPU_UNUSED(_h); + auto * graph = graph_from_handle(graph_handle); + if (graph) { + delete graph; + } + + return AEE_SUCCESS; +} diff --git a/ggml/src/ggml-qnn/npu/device/graph.cpp b/ggml/src/ggml-qnn/npu/device/graph.cpp new file mode 100644 index 0000000000..b21b8add29 --- /dev/null +++ b/ggml/src/ggml-qnn/npu/device/graph.cpp @@ -0,0 +1,67 @@ + +#include "graph.hpp" + +#include + +#include "op_impl.hpp" +#include "util.hpp" + +namespace hexagon { + +graph::~graph() noexcept { + if (_tensors) { + delete[] _tensors; + } +} + +void graph::set_tensor(const npu_device_tensor_handle_t * tensors, int tensor_count) { + if (_tensor_count > 0) { + delete[] _tensors; + } + + if (tensor_count <= 0) { + _tensors = nullptr; + _tensor_count = 0; + return; + } + + _tensors = new (std::nothrow) tensor *[tensor_count]; + for (int i = 0; i < tensor_count; ++i) { + auto * tensor_obj = reinterpret_cast(tensors[i]); + _tensors[i] = tensor_obj; + DEVICE_LOG_DEBUG("graph(%p) set_tensor[%d]: %p(%p,%p), op: %d\n", (void *) this, i, (void *) tensor_obj, + (void *) tensor_obj->get_src(0), (void *) tensor_obj->get_src(1), tensor_obj->get_op()); + } + + _tensor_count = tensor_count; + DEVICE_LOG_DEBUG("graph(%p) tensor count: %zu\n", (void *) this, _tensor_count); +} + +bool graph::compute() { + if (!_tensors || !_tensor_count) { + DEVICE_LOG_DEBUG("graph(%p) no tensors to compute\n", (void *) this); + return true; // return success if no tensors to compute + } + + DEVICE_LOG_DEBUG("graph(%p) compute\n", (void *) this); + for (size_t i = 0; i < _tensor_count; ++i) { + auto * dst = _tensors[i]; + auto op = dst->get_op(); + auto * func = get_compute_func(op); + if (!func) { + DEVICE_LOG_ERROR("graph(%p) tensor[%zu] op %d not supported\n", (void *) this, i, op); + return false; + } + + if (!func(dst)) { + DEVICE_LOG_ERROR("graph(%p) tensor[%zu] op %d compute failed\n", (void *) this, i, op); + return false; + } + + dst->flush(); // TODO: optimize this + } + + return true; +} + +} // namespace hexagon diff --git a/ggml/src/ggml-qnn/npu/device/graph.hpp b/ggml/src/ggml-qnn/npu/device/graph.hpp new file mode 100644 index 0000000000..22f6615d14 --- /dev/null +++ b/ggml/src/ggml-qnn/npu/device/graph.hpp @@ -0,0 +1,29 @@ +#pragma once + +#include "hexagon_npu.h" +#include "tensor.hpp" + +namespace hexagon { + +class graph { + public: + // TODO: add execute direction here + explicit graph() noexcept {} + + ~graph() noexcept; + + void set_tensor(const npu_device_tensor_handle_t * tensors, int tensor_count); + + bool compute(); + + private: + tensor ** _tensors = nullptr; + size_t _tensor_count = 0; + + graph(const graph &) = delete; + void operator=(const graph &) = delete; + graph(graph &&) = delete; + void operator=(graph &&) = delete; +}; + +} // namespace hexagon diff --git a/ggml/src/ggml-qnn/npu/device/op_impl.cpp b/ggml/src/ggml-qnn/npu/device/op_impl.cpp new file mode 100644 index 0000000000..7067a1d52b --- /dev/null +++ b/ggml/src/ggml-qnn/npu/device/op_impl.cpp @@ -0,0 +1,194 @@ + + +#include "op_impl.hpp" + +#include +#include + +#include "op_mul_mat.hpp" + +namespace { + +template +inline void vec_op_f32_f32(const float * src0, const float * src1, size_t count, float * dst) { + HVX_Vector * iptr0 = ((HVX_Vector *) src0); + HVX_Vector * iptr0_end = ((HVX_Vector *) src0) + (count / hexagon::kFloatsPerVector); + HVX_Vector * iptr1 = ((HVX_Vector *) src1); + HVX_Vector * optr = ((HVX_Vector *) dst); + HVX_Vector prev0 = *iptr0++; + HVX_Vector prev1 = *iptr1++; + + // TODO: prefetch or just use VTCM? + while (iptr0 < iptr0_end) { + HVX_Vector curr0 = *iptr0++; + HVX_Vector curr1 = *iptr1++; + HVX_Vector s0 = Q6_V_valign_VVR(curr0, prev0, (size_t) src0); + HVX_Vector s1 = Q6_V_valign_VVR(curr1, prev1, (size_t) src1); + *optr++ = Q6_Vsf_equals_Vqf32(_OpIntrinsic(s0, s1)); + prev0 = curr0; + prev1 = curr1; + } + + if ((iptr0_end - ((HVX_Vector *) src0)) > 0) { + // handle the last vector + // see also: + // https://github.com/UbiquitousLearning/mllm/blob/babf4410352ce8730824c87699c025a0d4ce3a6f/src/backends/qnn/LLaMAOpPackageHtp/LLaMAPackage/src/ops/LLaMAMul.cpp#L147 + // or qualcomm sdk libs\qhl_hvx\src\qhblas_hvx\qhblas_hvx_aw_vector_add_ah.c + bool iptr0_aligned = hexagon::is_addr_aligned(iptr0); + HVX_Vector curr0 = iptr0_aligned ? prev0 : *iptr0; + iptr0 = iptr0_aligned ? iptr0 : iptr0 + 1; + bool iptr1_aligned = hexagon::is_addr_aligned(iptr1); + HVX_Vector curr1 = iptr1_aligned ? prev1 : *iptr1; + iptr1 = iptr1_aligned ? iptr1 : iptr1 + 1; + HVX_Vector s0 = Q6_V_valign_VVR(curr0, prev0, (size_t) src0); + HVX_Vector s1 = Q6_V_valign_VVR(curr1, prev1, (size_t) src1); + *optr++ = Q6_Vsf_equals_Vqf32(_OpIntrinsic(s0, s1)); + prev0 = curr0; + prev1 = curr1; + } + + const size_t leftover = count % hexagon::kFloatsPerVector; + const size_t leftover_bytes = leftover * sizeof(float); + if (leftover > 0) { + // handle the leftover elements + HVX_Vector curr0 = + (leftover_bytes + hexagon::unaligned_bytes(iptr0) > hexagon::kBytesPerVector) ? *iptr0 : prev0; + curr0 = Q6_V_valign_VVR(curr0, prev0, (size_t) src0); + + HVX_Vector curr1 = + (leftover_bytes + hexagon::unaligned_bytes(iptr1) > hexagon::kBytesPerVector) ? *iptr1 : prev1; + curr1 = Q6_V_valign_VVR(curr1, prev1, (size_t) src1); + + q6op_vstu_variable_ARV(optr, leftover_bytes, Q6_Vsf_equals_Vqf32(_OpIntrinsic(curr0, curr1))); + } +} + +inline HVX_Vector vadd_f32_f32(HVX_Vector a, HVX_Vector b) { + return Q6_Vqf32_vadd_VsfVsf(a, b); +} + +inline HVX_Vector vsub_f32_f32(HVX_Vector a, HVX_Vector b) { + return Q6_Vqf32_vsub_VsfVsf(a, b); +} + +inline HVX_Vector vmul_f32_f32(HVX_Vector a, HVX_Vector b) { + return Q6_Vqf32_vmpy_VsfVsf(a, b); +} + +template +bool element_wise_op(hexagon::tensor * out) { + if (!out) { + return false; + } + + auto * src0 = out->get_src(0); + auto * src1 = out->get_src(1); + if (!src0 || !src1) { + return true; // skip if no src + } + + if (src0->get_ne(0) != src1->get_ne(0)) { + // TODO: handle this case + DEVICE_LOG_ERROR("src0[0] and src1[0] not match: %ld vs %ld\n", (long) src0->get_ne(0), (long) src1->get_ne(0)); + return false; + } + + static_assert(DEVICE_TENSOR_MAX_DIMS == 4, "element_wise_op requires max dims 4"); + + const auto * src0_ptr = reinterpret_cast(src0->get_data()); + const auto * src1_ptr = reinterpret_cast(src1->get_data()); + auto * dst_ptr = reinterpret_cast(out->get_data()); + for (int64_t i3 = 0; i3 < out->get_ne(3); i3++) { + const auto * src0_cube = src0_ptr + i3 * src0->get_nb(3); + const auto * src1_cube = src1_ptr + (i3 % src1->get_ne(3)) * src1->get_nb(3); + auto * dst_cube = dst_ptr + i3 * out->get_nb(3); + for (int64_t i2 = 0; i2 < out->get_ne(2); i2++) { + const auto * src0_plane = src0_cube + i2 * src0->get_nb(2); + const auto * src1_plane = src1_cube + (i2 % src1->get_ne(2)) * src1->get_nb(2); + auto * dst_plane = dst_cube + i2 * out->get_nb(2); + for (int64_t i1 = 0; i1 < out->get_ne(1); i1++) { + // TODO: prefetch row? + auto * src0_row = src0_plane + i1 * src0->get_nb(1); + auto * src1_row = src1_plane + (i1 % src1->get_ne(1)) * src1->get_nb(1); + auto * dst_row = reinterpret_cast(dst_plane + i1 * out->get_nb(1)); + _RowFunc(reinterpret_cast(src0_row), reinterpret_cast(src1_row), + static_cast(out->get_ne(0)), reinterpret_cast<_TyDst *>(dst_row)); + } + } + } + + return true; +} + +bool is_element_wise_op_supported(const npu_device_tensor_spec & src0, const npu_device_tensor_spec & src1, + const npu_device_tensor_spec & dst, npu_device_tensor_op op) { + if (op != NPU_OP_ADD && op != NPU_OP_SUB && op != NPU_OP_MUL) { + DEVICE_LOG_DEBUG("Unsupported element wise op: %s\n", hexagon::op_get_name(op)); + return false; + } + + if (src0.ne[0] != src1.ne[0]) { + DEVICE_LOG_DEBUG("src0.ne[0] and src1.ne[0] not match: %ld vs %ld\n", (long) src0.ne[0], (long) src1.ne[0]); + return false; + } + + for (size_t i = 0; i < DEVICE_TENSOR_MAX_DIMS; ++i) { + if (src0.ne[i] != dst.ne[i]) { + DEVICE_LOG_DEBUG("src0.ne[%zu] and dst.ne[%zu] not match: %lld vs %lld\n", i, i, (long long) src0.ne[i], + (long long) dst.ne[i]); + return false; + } + } + + return true; +} + +struct op_capabilities { + npu_device_tensor_op op; + hexagon::compute_func_type compute_func; + hexagon::op_is_supported_func_type is_supported; +}; + +constexpr const op_capabilities kOpCapabilities[] = { + { NPU_OP_MUL_MAT, hexagon::mul_mat_f32, hexagon::is_mul_mat_supported }, + { NPU_OP_ADD, element_wise_op>, is_element_wise_op_supported }, + { NPU_OP_SUB, element_wise_op>, is_element_wise_op_supported }, + { NPU_OP_MUL, element_wise_op>, is_element_wise_op_supported }, +}; + +static_assert(kOpCapabilities[NPU_OP_MUL_MAT].compute_func == hexagon::mul_mat_f32, + "kOpArray[NPU_OP_MUL_MAT] != mul_mat_f32"); + +static_assert(std::size(kOpCapabilities) == NPU_OP_COUNT); +static_assert(kOpCapabilities[NPU_OP_MUL_MAT].op == NPU_OP_MUL_MAT, "kOpArray[NPU_OP_MUL_MAT].op != NPU_OP_MUL_MAT"); +static_assert(kOpCapabilities[NPU_OP_MUL].op == NPU_OP_MUL, "kOpArray[NPU_OP_MUL].op != NPU_OP_MUL"); + +} // namespace + +namespace hexagon { + +compute_func_type get_compute_func(npu_device_tensor_op op) { + if (op >= NPU_OP_COUNT) { + return nullptr; + } + + return kOpCapabilities[op].compute_func; +} + +bool support_op(const npu_device_tensor_spec & src0, const npu_device_tensor_spec & src1, + const npu_device_tensor_spec & dst, npu_device_tensor_op op) { + if (get_compute_func(op) == nullptr) { + DEVICE_LOG_ERROR("Unsupported op: %s, get_compute_func failed\n", op_get_name(op)); + return false; + } + + auto is_supported_func = kOpCapabilities[op].is_supported; + if (!is_supported_func || !is_supported_func(src0, src1, dst, op)) { + DEVICE_LOG_ERROR("Unsupported op: %s, is_supported_func failed\n", op_get_name(op)); + return false; + } + + return true; +} + +} // namespace hexagon diff --git a/ggml/src/ggml-qnn/npu/device/op_impl.hpp b/ggml/src/ggml-qnn/npu/device/op_impl.hpp new file mode 100644 index 0000000000..1fee7769ce --- /dev/null +++ b/ggml/src/ggml-qnn/npu/device/op_impl.hpp @@ -0,0 +1,17 @@ +#pragma once + +#include "hexagon_npu.h" +#include "tensor.hpp" + +namespace hexagon { + +typedef bool (*compute_func_type)(tensor * dst); +typedef bool (*op_is_supported_func_type)(const npu_device_tensor_spec & src0, const npu_device_tensor_spec & src1, + const npu_device_tensor_spec & dst, npu_device_tensor_op op); + +compute_func_type get_compute_func(npu_device_tensor_op op); + +bool support_op(const npu_device_tensor_spec & src0, const npu_device_tensor_spec & src1, + const npu_device_tensor_spec & dst, npu_device_tensor_op op); + +} // namespace hexagon diff --git a/ggml/src/ggml-qnn/npu/device/op_mul_mat.cpp b/ggml/src/ggml-qnn/npu/device/op_mul_mat.cpp new file mode 100644 index 0000000000..fbda69d2d7 --- /dev/null +++ b/ggml/src/ggml-qnn/npu/device/op_mul_mat.cpp @@ -0,0 +1,146 @@ +#include "op_mul_mat.hpp" + +#include + +namespace { + +inline float vec_dot_product_f32_f32(const float * src0, const float * src1, size_t count) { + HVX_Vector * iptr0 = ((HVX_Vector *) src0); + HVX_Vector * iptr0_end = ((HVX_Vector *) src0) + (count / hexagon::kFloatsPerVector); + HVX_Vector * iptr1 = ((HVX_Vector *) src1); + HVX_Vector prev0 = *iptr0++; + HVX_Vector prev1 = *iptr1++; + HVX_Vector sum = Q6_V_vzero(); + + // TODO: prefetch or just use VTCM? + while (iptr0 < iptr0_end) { + HVX_Vector curr0 = *iptr0++; + HVX_Vector curr1 = *iptr1++; + HVX_Vector s0 = Q6_V_valign_VVR(curr0, prev0, (size_t) src0); + HVX_Vector s1 = Q6_V_valign_VVR(curr1, prev1, (size_t) src1); + sum = Q6_Vqf32_vadd_Vqf32Vqf32(Q6_Vqf32_vmpy_VsfVsf(s0, s1), sum); + prev0 = curr0; + prev1 = curr1; + } + + if ((iptr0_end - ((HVX_Vector *) src0)) > 0) { + // handle the last vector + // see also: + // https://github.com/UbiquitousLearning/mllm/blob/babf4410352ce8730824c87699c025a0d4ce3a6f/src/backends/qnn/LLaMAOpPackageHtp/LLaMAPackage/src/ops/LLaMAMul.cpp#L147 + // or qualcomm sdk libs\qhl_hvx\src\qhblas_hvx\qhblas_hvx_aw_vector_add_ah.c + bool iptr0_aligned = hexagon::is_addr_aligned(iptr0); + HVX_Vector curr0 = iptr0_aligned ? prev0 : *iptr0; + iptr0 = iptr0_aligned ? iptr0 : iptr0 + 1; + bool iptr1_aligned = hexagon::is_addr_aligned(iptr1); + HVX_Vector curr1 = iptr1_aligned ? prev1 : *iptr1; + iptr1 = iptr1_aligned ? iptr1 : iptr1 + 1; + HVX_Vector s0 = Q6_V_valign_VVR(curr0, prev0, (size_t) src0); + HVX_Vector s1 = Q6_V_valign_VVR(curr1, prev1, (size_t) src1); + sum = Q6_Vqf32_vadd_Vqf32Vqf32(Q6_Vqf32_vmpy_VsfVsf(s0, s1), sum); + prev0 = curr0; + prev1 = curr1; + } + + const size_t leftover = count % hexagon::kFloatsPerVector; + const size_t leftover_bytes = leftover * sizeof(float); + if (leftover > 0) { + // handle the leftover elements + HVX_Vector curr0 = + (leftover_bytes + hexagon::unaligned_bytes(iptr0) > hexagon::kBytesPerVector) ? *iptr0 : prev0; + curr0 = Q6_V_valign_VVR(curr0, prev0, (size_t) src0); + + HVX_Vector curr1 = + (leftover_bytes + hexagon::unaligned_bytes(iptr1) > hexagon::kBytesPerVector) ? *iptr1 : prev1; + curr1 = Q6_V_valign_VVR(curr1, prev1, (size_t) src1); + + sum = Q6_Vqf32_vadd_Vqf32Vqf32( + Q6_V_valign_VVR(Q6_Vqf32_vmpy_VsfVsf(curr0, curr1), Q6_V_vzero(), leftover_bytes), sum); + } + + // TODO: do we have a better way to do the reduction? + for (size_t i = hexagon::kFloatsPerVector / 2; i > 0; i /= 2) { + sum = Q6_Vqf32_vadd_Vqf32Vqf32(sum, Q6_V_vror_VR(sum, i * sizeof(float))); + } + + float result; + q6op_vstu_variable_ARV(&result, sizeof(float), Q6_Vsf_equals_Vqf32(sum)); + return result; +} + +} // namespace + +namespace hexagon { + +bool mul_mat_f32(hexagon::tensor * out) { + if (!out) { + return false; + } + + auto * src0 = out->get_src(0); + auto * src1 = out->get_src(1); + if (!src0 || !src1) { + return true; // skip if no src + } + + static_assert(DEVICE_TENSOR_MAX_DIMS == 4, "mul_mat_f32 requires max dims 4"); + + const auto r02 = src1->get_ne(2) / src0->get_ne(2); + const auto r03 = src1->get_ne(3) / src0->get_ne(3); + const auto * src0_ptr = reinterpret_cast(src0->get_data()); + const auto * src1_ptr = reinterpret_cast(src1->get_data()); + auto * dst_ptr = reinterpret_cast(out->get_data()); + for (int64_t i3 = 0; i3 < out->get_ne(3); i3++) { + const auto * src0_cube = src0_ptr + i3 / r03 * src0->get_nb(3); + const auto * src1_cube = src1_ptr + i3 * src1->get_nb(3); + auto * dst_cube = dst_ptr + i3 * out->get_nb(3); + for (int64_t i2 = 0; i2 < out->get_ne(2); i2++) { + const auto * src0_plane = src0_cube + i2 / r02 * src0->get_nb(2); + const auto * src1_plane = src1_cube + i2 * src1->get_nb(2); + auto * dst_plane = dst_cube + i2 * out->get_nb(2); + for (int64_t i1 = 0; i1 < out->get_ne(1); i1++) { + // TODO: prefetch row? + auto * src1_row = src1_plane + i1 * src1->get_nb(1); + auto * dst_row = reinterpret_cast(dst_plane + i1 * out->get_nb(1)); + for (int64_t i0 = 0; i0 < out->get_ne(0); i0++) { + auto * src0_row = src0_plane + i0 * src0->get_nb(1); + // TODO: figure out how to handle a entire row + *dst_row++ = + vec_dot_product_f32_f32(reinterpret_cast(src0_row), + reinterpret_cast(src1_row), (size_t) src0->get_ne(0)); + } + } + } + } + + return true; +} + +bool is_mul_mat_supported(const npu_device_tensor_spec & src0, const npu_device_tensor_spec & src1, + const npu_device_tensor_spec & dst, npu_device_tensor_op op) { + if (op != NPU_OP_MUL_MAT) { + DEVICE_LOG_DEBUG("op is not NPU_OP_MUL_MAT: %d\n", op); + return false; + } + + if (src0.ne[0] != src1.ne[0] || src0.ne[1] != dst.ne[0]) { + DEVICE_LOG_DEBUG("src0 and src1 cannot multiply: %ldx%ld vs %ldx%ld\n", (long) src0.ne[0], (long) src0.ne[1], + (long) src1.ne[0], (long) src1.ne[1]); + return false; + } + + if (src1.ne[1] != dst.ne[1] || src1.ne[2] != dst.ne[2] || src1.ne[3] != dst.ne[3]) { + DEVICE_LOG_DEBUG("src1 and dst dimensions not match: %ldx%ld vs %ldx%ld\n", (long) src1.ne[2], + (long) src1.ne[3], (long) dst.ne[2], (long) dst.ne[3]); + return false; + } + + if (src1.ne[2] % src0.ne[2] || src1.ne[3] % src0.ne[3]) { + DEVICE_LOG_DEBUG("src0 cannot broadcast to src1: %ldx%ld vs %ldx%ld\n", (long) src0.ne[2], (long) src0.ne[3], + (long) src1.ne[2], (long) src1.ne[3]); + return false; + } + + return true; +} + +} // namespace hexagon diff --git a/ggml/src/ggml-qnn/npu/device/op_mul_mat.hpp b/ggml/src/ggml-qnn/npu/device/op_mul_mat.hpp new file mode 100644 index 0000000000..cc57d3d1fe --- /dev/null +++ b/ggml/src/ggml-qnn/npu/device/op_mul_mat.hpp @@ -0,0 +1,27 @@ +#pragma once + +#include + +#include + +#include "tensor.hpp" + +namespace hexagon { + +constexpr const size_t kBytesPerVector = sizeof(HVX_Vector); // 128 for v73 +constexpr const size_t kFloatsPerVector = kBytesPerVector / sizeof(float); +constexpr const size_t kAlignMask = kBytesPerVector - 1; + +inline size_t unaligned_bytes(const void * addr) { + return ((size_t) addr) & kAlignMask; +} + +inline bool is_addr_aligned(void * addr) { + return unaligned_bytes(addr) == 0; +} + +bool mul_mat_f32(tensor * out); +bool is_mul_mat_supported(const npu_device_tensor_spec & src0, const npu_device_tensor_spec & src1, + const npu_device_tensor_spec & dst, npu_device_tensor_op op); + +} // namespace hexagon diff --git a/ggml/src/ggml-qnn/npu/device/tensor.hpp b/ggml/src/ggml-qnn/npu/device/tensor.hpp new file mode 100644 index 0000000000..83aa29a609 --- /dev/null +++ b/ggml/src/ggml-qnn/npu/device/tensor.hpp @@ -0,0 +1,90 @@ +#pragma once + +#include +#include + +#include "hexagon_npu.h" +#include "util.hpp" + +namespace hexagon { + +constexpr const size_t kMaxTensorSrc = DEVICE_TENSOR_MAX_SRC; + +class tensor { + public: + explicit tensor(const npu_device_tensor_config & info) noexcept : _info(info) { + uint64 phy_address = 0; + void * mmap_address = nullptr; + auto ret = HAP_mmap_get(_info.buffer_fd, &mmap_address, &phy_address); + if (ret != AEE_SUCCESS) { + DEVICE_LOG_ERROR("Failed to mmap tensor buffer: %d", (int) ret); + return; + } + + _data = static_cast(mmap_address); + DEVICE_LOG_INFO("tensor(%p[%ldx%ldx%ldx%ld]), fd: %d, offset: %zu, mmap_address: %p, phy_address: 0x%lx\n", + (void *) this, (long) _info.ne[0], (long) _info.ne[1], (long) _info.ne[2], (long) _info.ne[3], + _info.buffer_fd, _info.offset, (void *) mmap_address, phy_address); + } + + ~tensor() noexcept { + auto ret = HAP_mmap_put(_info.buffer_fd); + if (ret != AEE_SUCCESS) { + DEVICE_LOG_ERROR("Failed to unmap tensor buffer: %d", (int) ret); + } + + DEVICE_LOG_INFO("~tensor(%p) fd: %d", (void *) this, _info.buffer_fd); + } + + void flush() { + if (_data) { + qurt_mem_cache_clean((qurt_addr_t) (_data + _info.offset), (qurt_size_t) _info.size, + QURT_MEM_CACHE_INVALIDATE, QURT_MEM_DCACHE); + } + } + + bool set_src(size_t index, tensor * src) { + if (index >= kMaxTensorSrc) { + return false; + } + + _src[index] = src; + return true; + } + + void set_op(npu_device_tensor_op op) { _info.op = op; } + + tensor * get_src(size_t index) const { + if (index >= kMaxTensorSrc) { + return nullptr; + } + + return _src[index]; + } + + const npu_device_tensor_config & get_info() const { return _info; } + + const int64_t get_ne(size_t index) const { return _info.ne[index]; } + + const size_t get_nb(size_t index) const { return _info.nb[index]; } + + npu_device_tensor_op get_op() const { return _info.op; } + + npu_device_tensor_data_type get_type() const { return _info.type; } + + uint8_t * get_data() const { return _data + _info.offset; } + + bool is_valid() const { return _data != nullptr; } + + private: + npu_device_tensor_config _info; + tensor * _src[kMaxTensorSrc] = {}; + uint8_t * _data = nullptr; + + tensor(const tensor &) = delete; + void operator=(const tensor &) = delete; + tensor(tensor &&) = delete; + void operator=(tensor &&) = delete; +}; + +} // namespace hexagon diff --git a/ggml/src/ggml-qnn/npu/device/util.hpp b/ggml/src/ggml-qnn/npu/device/util.hpp new file mode 100644 index 0000000000..12b7dde81e --- /dev/null +++ b/ggml/src/ggml-qnn/npu/device/util.hpp @@ -0,0 +1,36 @@ +#pragma once + +#include + +#include "hexagon_npu.h" + +#define DEVICE_LOG_ERROR(...) FARF(FATAL, __VA_ARGS__) +#define DEVICE_LOG_WARN(...) FARF(ERROR, __VA_ARGS__) +#define DEVICE_LOG_INFO(...) FARF(HIGH, __VA_ARGS__) + +#ifdef _DEBUG +# undef FARF_LOW +# define FARF_LOW 1 +# define DEVICE_LOG_DEBUG(...) FARF(LOW, __VA_ARGS__) +#else +# define DEVICE_LOG_DEBUG(...) (void) 0 +#endif + +namespace hexagon { + +constexpr const char * op_get_name(npu_device_tensor_op op) { + switch (op) { + case NPU_OP_MUL_MAT: + return "MUL_MAT"; + case NPU_OP_ADD: + return "ADD"; + case NPU_OP_SUB: + return "SUB"; + case NPU_OP_MUL: + return "MUL"; + default: + return "UNKNOWN"; + } +} + +} // namespace hexagon diff --git a/ggml/src/ggml-qnn/npu/host/buffer.cpp b/ggml/src/ggml-qnn/npu/host/buffer.cpp new file mode 100644 index 0000000000..ff5c8a320c --- /dev/null +++ b/ggml/src/ggml-qnn/npu/host/buffer.cpp @@ -0,0 +1,246 @@ +#include "buffer.hpp" + +#include + +#include "host_device.hpp" +#include "tensor.hpp" + +namespace { + +constexpr const int kRpcMemDefaultHeapId = RPCMEM_HEAP_ID_SYSTEM; +constexpr const uint32_t kRpcMemDefaultFlags = RPCMEM_DEFAULT_FLAGS; // TODO: should we use a different flag? + +static hexagon::host_buffer * get_buffer_object(ggml_backend_buffer_t buffer) { + return reinterpret_cast(buffer->context); +} + +static hexagon::host_buffer_type * get_buffer_type_object(ggml_backend_buffer_type_t buft) { + return reinterpret_cast(buft->context); +} + +void backend_buffer_free_buffer(ggml_backend_buffer_t buffer) { + delete get_buffer_object(buffer); +} + +void * backend_buffer_get_base(ggml_backend_buffer_t buffer) { + auto * buffer_obj = get_buffer_object(buffer); + GGML_ASSERT(buffer_obj != nullptr); + return buffer_obj->get_buffer(); +} + +ggml_status backend_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor) { + auto * buffer_type_obj = get_buffer_type_object(buffer->buft); + GGML_ASSERT(buffer_type_obj != nullptr); + + auto * device_object = buffer_type_obj->get_device(); + GGML_ASSERT(device_object != nullptr); + + auto * buffer_obj = get_buffer_object(buffer); + GGML_ASSERT(buffer_obj != nullptr); + + auto tensor_object = buffer_obj->init_tensor(tensor, device_object->get_device_handle()); + if (!tensor_object) { + LOG_ERROR("Failed to init tensor\n"); + return GGML_STATUS_ALLOC_FAILED; + } + + return GGML_STATUS_SUCCESS; +} + +void backend_buffer_set_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor, const void * data, size_t offset, + size_t size) { + GGML_UNUSED(buffer); + memcpy((char *) tensor->data + offset, data, size); +} + +void backend_buffer_get_tensor(ggml_backend_buffer_t buffer, const ggml_tensor * tensor, void * data, size_t offset, + size_t size) { + GGML_UNUSED(buffer); + memcpy(data, (const char *) tensor->data + offset, size); +} + +bool backend_buffer_cpy_tensor(ggml_backend_buffer_t buffer, const ggml_tensor * src, ggml_tensor * dst) { + GGML_UNUSED(buffer); + if (ggml_backend_buffer_is_host(src->buffer)) { + memcpy(dst->data, src->data, ggml_nbytes(src)); + return true; + } + + return false; +} + +void backend_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) { + auto * buffer_obj = get_buffer_object(buffer); + GGML_ASSERT(buffer_obj != nullptr); + memset(buffer_obj->get_buffer(), value, buffer_obj->get_size()); +} + +constexpr const ggml_backend_buffer_i backend_buffer_interface = { + /* .free_buffer = */ backend_buffer_free_buffer, + /* .get_base = */ backend_buffer_get_base, + /* .init_tensor = */ backend_buffer_init_tensor, + /* .memset_tensor = */ nullptr, + /* .set_tensor = */ backend_buffer_set_tensor, + /* .get_tensor = */ backend_buffer_get_tensor, + /* .cpy_tensor = */ backend_buffer_cpy_tensor, + /* .clear = */ backend_buffer_clear, + /* .reset = */ nullptr, +}; + +const char * backend_buffer_type_get_name(ggml_backend_buffer_type_t buft) { + auto * buffer_type_obj = get_buffer_type_object(buft); + GGML_ASSERT(buffer_type_obj != nullptr); + return buffer_type_obj->get_name(); +} + +ggml_backend_buffer_t backend_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) { + auto * buffer_type_obj = get_buffer_type_object(buft); + GGML_ASSERT(buffer_type_obj != nullptr); + return buffer_type_obj->allocate_buffer(size); +} + +size_t backend_buffer_type_get_alignment(ggml_backend_buffer_type_t buft) { + auto * buffer_type_obj = get_buffer_type_object(buft); + GGML_ASSERT(buffer_type_obj != nullptr); + return buffer_type_obj->get_buffer_alignment(); +} + +size_t backend_buffer_type_get_max_size(ggml_backend_buffer_type_t buft) { + auto * buffer_type_obj = get_buffer_type_object(buft); + GGML_ASSERT(buffer_type_obj != nullptr); + return buffer_type_obj->get_max_buffer_size(); +} + +bool backend_buffer_is_host(ggml_backend_buffer_type_t buft) { + return buft->iface.get_name == backend_buffer_type_get_name; +} + +} // namespace + +namespace hexagon { + +host_buffer::host_buffer(common::rpc_mem_ptr allocator, size_t size, uint32_t domain_id) : + _allocator(allocator), + _size(size), + _domain_id(domain_id) { + if (!_allocator->is_valid()) { + LOG_ERROR("rpc memory not initialized\n"); + return; + } + + if (size > _allocator->get_max_alloc_size()) { + LOG_ERROR("rpc memory size %zu exceeds max alloc size %zu\n", size, _allocator->get_max_alloc_size()); + return; + } + + _data = _allocator->alloc(kRpcMemDefaultHeapId, kRpcMemDefaultFlags, size); + if (!_data) { + LOG_ERROR("failed to allocate rpc memory, size: %d MB\n", (int) (size / (1 << 20))); + return; + } + + LOG_DEBUG("create host_buffer(%p), size: %zu, domain_id: %d\n", (void *) _data, size, (int) domain_id); +} + +host_buffer::~host_buffer() { + LOG_DEBUG("destroy host_buffer(%p), size: %zu, domain_id: %d\n", (void *) _data, _size, (int) _domain_id); + _tensors.clear(); + if (_buffer_fd != -1) { + auto ret = _allocator->fastrpc_munmap((int) _domain_id, _buffer_fd, nullptr, 0); + if (ret != AEE_SUCCESS) { + LOG_ERROR("failed to munmap rpc memory, fd: %d, ret: %d\n", _buffer_fd, ret); + return; + } + } + + _allocator->free(_data); +} + +std::shared_ptr host_buffer::init_tensor(ggml_tensor * tensor, remote_handle64 device_handle) { + if (!_data) { + LOG_ERROR("failed to init tensor, rpc memory not initialized\n"); + return std::shared_ptr(); + } + + if (_buffer_fd == -1) { + _buffer_fd = _allocator->to_fd(_data); + if (_buffer_fd < 0) { + LOG_ERROR("failed to get fd from rpc memory\n"); + return std::shared_ptr(); + } + + auto ret = _allocator->fastrpc_mmap((int) _domain_id, _buffer_fd, _data, 0, _size, FASTRPC_MAP_FD); + if (ret != AEE_SUCCESS) { + LOG_ERROR("failed to mmap rpc memory, fd: %d, ret: %d\n", _buffer_fd, ret); + return std::shared_ptr(); + } + + LOG_DEBUG("mmap rpc memory(%p), fd: %d, addr: %p, size: %zu\n", (void *) _data, _buffer_fd, _data, _size); + } + + auto tensor_object = std::make_shared( + tensor, _buffer_fd, (uint64_t) (reinterpret_cast(tensor->data) - reinterpret_cast(_data)), + device_handle); + if (!tensor_object->is_valid()) { + LOG_ERROR("failed to init tensor, device handle: %p\n", (void *) device_handle); + return std::shared_ptr(); + } + + _tensors.push_back(tensor_object); + return tensor_object; +} + +host_buffer_type::host_buffer_type(ggml_backend_dev_t dev, const std::string & name, common::rpc_mem_ptr rpc_mem) : + _name(name), + _rpc_mem(rpc_mem) { + iface = { + /* .get_name = */ backend_buffer_type_get_name, + /* .alloc_buffer = */ backend_buffer_type_alloc_buffer, + /* .get_alignment = */ backend_buffer_type_get_alignment, + /* .get_max_size = */ backend_buffer_type_get_max_size, + /* .get_alloc_size = */ nullptr, // defaults to ggml_nbytes + /* .is_host = */ backend_buffer_is_host, + }; + device = dev; + context = this; + + _device = reinterpret_cast(device->context); + LOG_DEBUG("[%s]create host_buffer_type %s\n", _device->get_name(), _name.c_str()); +} + +size_t host_buffer_type::get_buffer_alignment() const { + return _device->is_device_initialized() ? _device->get_alignment() : 128; +} + +size_t host_buffer_type::get_max_buffer_size() const { + if (!_rpc_mem) { + LOG_ERROR("rpc memory not initialized\n"); + return 0; + } + + return _rpc_mem->get_max_alloc_size(); +} + +ggml_backend_buffer_t host_buffer_type::allocate_buffer(size_t size) { + if (!_rpc_mem) { + LOG_ERROR("rpc memory not initialized\n"); + return nullptr; + } + + if (!_device->is_device_initialized()) { + LOG_ERROR("device is not initialized\n"); + return nullptr; + } + + auto * buffer = new host_buffer(_rpc_mem, size, _device->get_dsp_domain_id()); + if (!buffer->is_valid()) { + delete buffer; + LOG_ERROR("Failed to allocate buffer of size %zu\n", size); + return nullptr; + } + + LOG_DEBUG("[%s]allocate buffer %p, size: %zu\n", _device->get_name(), buffer->get_buffer(), size); + return ggml_backend_buffer_init(this, backend_buffer_interface, buffer, size); +} + +} // namespace hexagon diff --git a/ggml/src/ggml-qnn/npu/host/buffer.hpp b/ggml/src/ggml-qnn/npu/host/buffer.hpp new file mode 100644 index 0000000000..955944bb98 --- /dev/null +++ b/ggml/src/ggml-qnn/npu/host/buffer.hpp @@ -0,0 +1,66 @@ +#pragma once + +#include +#include + +#include "ggml-backend-impl.h" +#include "hexagon_npu.h" +#include "rpc-mem.hpp" + +namespace hexagon { + +class host_tensor; + +class host_buffer { + public: + explicit host_buffer(common::rpc_mem_ptr allocator, size_t size, uint32_t domain_id); + + ~host_buffer(); + + bool is_valid() const { return _data != nullptr; } + + void * get_buffer() { return _data; } + + size_t get_size() const { return _size; } + + std::shared_ptr init_tensor(ggml_tensor * tensor, remote_handle64 device_handle); + + private: + common::rpc_mem_ptr _allocator; + void * _data = nullptr; + size_t _size = 0; + int _buffer_fd = -1; + uint32_t _domain_id = 0; + + std::list> _tensors; + + DISABLE_COPY(host_buffer); + DISABLE_MOVE(host_buffer); +}; + +class npu_device; + +class host_buffer_type : public ggml_backend_buffer_type { + public: + explicit host_buffer_type(ggml_backend_dev_t dev, const std::string & name, common::rpc_mem_ptr rpc_mem); + + const char * get_name() const { return _name.c_str(); } + + size_t get_buffer_alignment() const; + + size_t get_max_buffer_size() const; + + ggml_backend_buffer_t allocate_buffer(size_t size); + + npu_device * get_device() const { return _device; } + + private: + npu_device * _device = nullptr; + std::string _name; + common::rpc_mem_ptr _rpc_mem; + + DISABLE_COPY(host_buffer_type); + DISABLE_MOVE(host_buffer_type); +}; + +} // namespace hexagon diff --git a/ggml/src/ggml-qnn/npu/host/graph.cpp b/ggml/src/ggml-qnn/npu/host/graph.cpp new file mode 100644 index 0000000000..9e8cf83204 --- /dev/null +++ b/ggml/src/ggml-qnn/npu/host/graph.cpp @@ -0,0 +1,82 @@ +#include "graph.hpp" + +#include "tensor.hpp" + +namespace hexagon { + +host_graph::host_graph(ggml_cgraph * cgraph, remote_handle64 device_handle) : _device_handle(device_handle) { + auto status = npu_device_graph_init(_device_handle, &_graph_handle); + if (status != AEE_SUCCESS) { + LOG_ERROR("Failed to init graph: %d", (int) status); + _graph_handle = 0; + return; + } + + update(cgraph); +} + +host_graph::~host_graph() { + if (_graph_handle) { + npu_device_graph_free(_device_handle, _graph_handle); + _graph_handle = 0; + } +} + +bool host_graph::update(ggml_cgraph * cgraph) { + if (!_graph_handle) { + LOG_ERROR("host_graph not initialized\n"); + return false; + } + + _tensor_handles.clear(); + _tensor_handles.reserve(cgraph->n_nodes); + for (int i = 0; i < cgraph->n_nodes; ++i) { + auto * node = cgraph->nodes[i]; + if (node->op == GGML_OP_NONE || node->op == GGML_OP_VIEW || node->op == GGML_OP_PERMUTE) { + // skip view liked ops + LOG_DEBUG("node[%d]%s(%s), addr: %p, type: %s, skipped\n", i, ggml_get_name(node), ggml_op_desc(node), + (void *) node, ggml_type_name(node->type)); + continue; + } + + auto * tensor_obj = host_tensor::from_ggml_tensor(node); + if (!tensor_obj) { + LOG_DEBUG("Unable to get host tensor from ggml tensor: %p\n", (void *) node); + continue; + } + + tensor_obj->set_op(node->op); + _tensor_handles.push_back(tensor_obj->get_device_tensor_handle()); + LOG_DEBUG("node[%d]%s(%s), addr: %p, type: %s, tensor_handle: %p\n", i, ggml_get_name(node), ggml_op_desc(node), + (void *) node, ggml_type_name(node->type), (void *) tensor_obj->get_device_tensor_handle()); + for (size_t j = 0; j < GGML_MAX_SRC && node->src[j]; ++j) { + auto * src = host_tensor::from_ggml_tensor(node->src[j]); + tensor_obj->set_src(j, src); + } + } + + LOG_DEBUG("host_graph::update, host_graph(%p), ggml_cgraph(%p), tensor count(%zu)\n", (void *) this, + (void *) cgraph, _tensor_handles.size()); + if (!_tensor_handles.empty()) { + npu_device_graph_set_tensor(_device_handle, _graph_handle, _tensor_handles.data(), + (int) _tensor_handles.size()); + } + return true; +} + +bool host_graph::compute() { + if (!_graph_handle) { + LOG_ERROR("host_graph not initialized\n"); + return false; + } + + auto status = npu_device_graph_compute(_device_handle, _graph_handle); + if (status != AEE_SUCCESS) { + LOG_ERROR("Failed to compute host_graph: 0x%x\n", (int) status); + return false; + } + + return true; +} + +} // namespace hexagon diff --git a/ggml/src/ggml-qnn/npu/host/graph.hpp b/ggml/src/ggml-qnn/npu/host/graph.hpp new file mode 100644 index 0000000000..20c917e120 --- /dev/null +++ b/ggml/src/ggml-qnn/npu/host/graph.hpp @@ -0,0 +1,32 @@ +#pragma once + +#include + +#include "common.hpp" +#include "ggml-backend-impl.h" +#include "hexagon_npu.h" + +namespace hexagon { + +class host_graph { + public: + host_graph(ggml_cgraph * cgraph, remote_handle64 device_handle); + + ~host_graph(); + + bool is_valid() const { return _graph_handle != 0; } + + bool update(ggml_cgraph * cgraph); + + bool compute(); + + private: + remote_handle64 _device_handle = 0; + npu_device_graph_handle_t _graph_handle = 0; + std::vector _tensor_handles; + + DISABLE_COPY(host_graph); + DISABLE_MOVE(host_graph); +}; + +} // namespace hexagon diff --git a/ggml/src/ggml-qnn/npu/host/host.cpp b/ggml/src/ggml-qnn/npu/host/host.cpp new file mode 100644 index 0000000000..90c4cd29e8 --- /dev/null +++ b/ggml/src/ggml-qnn/npu/host/host.cpp @@ -0,0 +1,153 @@ + +#include +#include + +#include "buffer.hpp" +#include "common.hpp" +#include "ggml-backend-impl.h" +#include "ggml-impl.h" +#include "host_device.hpp" + +namespace { + +hexagon::npu_device * get_device_object(ggml_backend_dev_t device) { + return reinterpret_cast(device->context); +} + +hexagon::npu_device * get_device_object(ggml_backend_t backend) { + return get_device_object(backend->device); +} + +const char * backend_dev_get_name(ggml_backend_dev_t dev) { + auto * dev_obj = get_device_object(dev); + GGML_ASSERT(dev_obj != nullptr); + return dev_obj->get_name(); +} + +const char * backend_dev_get_description(ggml_backend_dev_t dev) { + auto * dev_obj = get_device_object(dev); + GGML_ASSERT(dev_obj != nullptr); + return dev_obj->get_description(); +} + +bool backend_dev_is_npu_device(ggml_backend_dev_t dev) { + return dev->iface.get_name == backend_dev_get_name; +} + +void backend_dev_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) { + GGML_UNUSED(dev); + *free = common::get_system_free_memory_in_bytes(); + *total = common::get_system_total_memory_in_bytes(); +} + +enum ggml_backend_dev_type backend_dev_get_type(ggml_backend_dev_t dev) { + GGML_UNUSED(dev); + return GGML_BACKEND_DEVICE_TYPE_ACCEL; +} + +void backend_dev_get_props(ggml_backend_dev_t dev, struct ggml_backend_dev_props * props) { + GGML_ASSERT(get_device_object(dev) != nullptr); + props->name = backend_dev_get_name(dev); + props->description = backend_dev_get_description(dev); + props->type = backend_dev_get_type(dev); + backend_dev_get_memory(dev, &props->memory_free, &props->memory_total); + props->caps = {}; +} + +ggml_backend_t backend_dev_init_backend(ggml_backend_dev_t dev, const char * params) { + auto * dev_obj = get_device_object(dev); + GGML_ASSERT(dev_obj != nullptr); + if (!dev_obj->init_device(dev, params)) { + LOG_ERROR("[%s]Failed to init device\n", backend_dev_get_name(dev)); + return nullptr; + } + + return new hexagon::npu_backend(dev); +} + +ggml_backend_buffer_type_t backend_dev_get_buffer_type(ggml_backend_dev_t dev) { + auto * dev_obj = get_device_object(dev); + GGML_ASSERT(dev_obj != nullptr); + return dev_obj->get_default_buffer_type(dev); +} + +ggml_backend_buffer_t backend_dev_buffer_from_host_ptr(ggml_backend_dev_t dev, void * ptr, size_t size, + size_t max_tensor_size) { + // TODO: should we use the device memory here? + GGML_UNUSED(dev); + GGML_UNUSED(max_tensor_size); + return ggml_backend_cpu_buffer_from_ptr(ptr, size); +} + +bool backend_dev_supports_op(ggml_backend_dev_t dev, const struct ggml_tensor * op) { + if (!backend_dev_is_npu_device(dev)) { + return false; + } + + auto * dev_obj = get_device_object(dev); + GGML_ASSERT(dev_obj != nullptr); + return dev_obj->supports_op(op); +} + +bool backend_dev_supports_buft(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) { + if (!backend_dev_is_npu_device(dev)) { + return false; + } + + auto * dev_obj = get_device_object(dev); + GGML_ASSERT(dev_obj != nullptr); + return dev_obj->supports_buft(buft); +} + +bool backend_dev_offload_op(ggml_backend_dev_t dev, const struct ggml_tensor * op) { + if (!backend_dev_is_npu_device(dev)) { + return false; + } + + auto * dev_obj = get_device_object(dev); + GGML_ASSERT(dev_obj != nullptr); + return dev_obj->offload_op(op); +} + +constexpr const ggml_backend_device_i npu_device_interface = { + /* .get_name = */ backend_dev_get_name, + /* .get_description = */ backend_dev_get_description, + /* .get_memory = */ backend_dev_get_memory, + /* .get_type = */ backend_dev_get_type, + /* .get_props = */ backend_dev_get_props, + /* .init_backend = */ backend_dev_init_backend, + /* .get_buffer_type = */ backend_dev_get_buffer_type, + /* .get_host_buffer_type = */ nullptr, + /* .buffer_from_host_ptr = */ backend_dev_buffer_from_host_ptr, + /* .supports_op = */ backend_dev_supports_op, + /* .supports_buft = */ backend_dev_supports_buft, + /* .offload_op = */ backend_dev_offload_op, + /* .event_new = */ nullptr, + /* .event_free = */ nullptr, + /* .event_synchronize = */ nullptr, +}; + +class npu_device_proxy : public backend_device_proxy { + public: + explicit npu_device_proxy(backend_index_type device) { _device = std::make_unique(device); } + + const ggml_backend_device_i & get_iface() const { return npu_device_interface; } + + void * get_context() { return _device.get(); } + + private: + std::unique_ptr _device; + + DISABLE_COPY(npu_device_proxy); + DISABLE_MOVE(npu_device_proxy); +}; + +} // namespace + +backend_device_proxy_ptr create_hexagon_backend_context(backend_index_type device) { + if (device < QNN_BACKEND_COUNT || device >= TOTAL_BACKEND_COUNT) { + return backend_device_proxy_ptr(); + } + + return std::make_shared(device); +} diff --git a/ggml/src/ggml-qnn/npu/host/host_device.cpp b/ggml/src/ggml-qnn/npu/host/host_device.cpp new file mode 100644 index 0000000000..aa90cfa8bc --- /dev/null +++ b/ggml/src/ggml-qnn/npu/host/host_device.cpp @@ -0,0 +1,305 @@ +#include "host_device.hpp" + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wmissing-prototypes" +#include +#pragma GCC diagnostic pop + +#include + +#include "graph.hpp" +#include "util.hpp" + +#define SKEL_URI_DEFINE(arch) ("file:///libhexagon_npu_skel_" arch ".so?npu_device_skel_handle_invoke&_modver=1.0") + +namespace { + +struct device_library_info { + hexagon::hexagon_dsp_arch arch; + const char * device_lib_uri; +}; + +constexpr const device_library_info kDeviceLibraryInfo[] = { + { hexagon::NONE, SKEL_URI_DEFINE("") }, + { hexagon::V68, SKEL_URI_DEFINE("v68") }, + { hexagon::V69, SKEL_URI_DEFINE("v69") }, + { hexagon::V73, SKEL_URI_DEFINE("v73") }, + { hexagon::V75, SKEL_URI_DEFINE("v75") }, + { hexagon::V79, SKEL_URI_DEFINE("v79") }, +}; + +const device_library_info & get_device_library_info(hexagon::hexagon_dsp_arch arch) { + for (const auto & info : kDeviceLibraryInfo) { + if (info.arch == arch) { + return info; + } + } + + LOG_ERROR("Unknown DSP arch: %d, using hexagon::NONE\n", arch); + return kDeviceLibraryInfo[0]; +} + +const char * get_domain_param(uint32_t domain_id) { + for (const auto & domain : supported_domains) { + if ((uint32_t) domain.id == domain_id) { + return domain.uri; + } + } + + return ""; +} + +constexpr const ggml_guid kBackendNpuGuid = { 0x7a, 0xd7, 0x59, 0x7d, 0x8f, 0x66, 0x4f, 0x35, + 0x84, 0x8e, 0xf5, 0x9a, 0x9b, 0x83, 0x7d, 0x0a }; + +hexagon::npu_backend * get_backend_object(ggml_backend_t backend) { + return reinterpret_cast(backend); +} + +const char * backend_get_name(ggml_backend_t backend) { + auto * backend_obj = get_backend_object(backend); + GGML_ASSERT(backend_obj != nullptr); + return backend_obj->get_name(); +} + +void backend_free(ggml_backend_t backend) { + delete get_backend_object(backend); +} + +bool backend_cpy_tensor_async(ggml_backend_t backend_src, ggml_backend_t backend_dst, const ggml_tensor * src, + ggml_tensor * dst) { + // TODO: implement this + return false; +} + +ggml_status backend_graph_compute(ggml_backend_t backend, ggml_cgraph * cgraph) { + auto * backend_obj = get_backend_object(backend); + GGML_ASSERT(backend_obj != nullptr); + return backend_obj->graph_compute(cgraph); +} + +} // namespace + +namespace hexagon { + +// TODO: should we use another domain? +npu_device::npu_device(backend_index_type device) : _dsp_domain_id(CDSP_DOMAIN_ID) { + GGML_UNUSED(device); + LOG_DEBUG("[%s]NPU device created\n", _name.c_str()); +} + +npu_device::~npu_device() { + if (_device_handle) { + npu_device_close(_device_handle); + } +} + +size_t npu_device::get_alignment() const { + uint32_t alignment = 0; + npu_device_device_get_alignment(_device_handle, &alignment); + return alignment; +} + +bool npu_device::is_device_initialized() const { + if (!_device_handle) { + LOG_ERROR("[%s]NPU device not opened\n", get_name()); + return false; + } + + if (!_rpc_mem) { + LOG_ERROR("[%s]rpc memory not initialized\n", get_name()); + return false; + } + + return true; +} + +bool npu_device::init_device(ggml_backend_dev_t dev, const char * params) { + if (!init_rpc_mem()) { + return false; + } + + if (!_device_handle) { + auto arch = get_dsp_arch(_rpc_interface, _dsp_domain_id); + const auto & device_lib_info = get_device_library_info(arch); + std::string device_lib_uri = device_lib_info.device_lib_uri; + device_lib_uri += get_domain_param(_dsp_domain_id); + LOG_DEBUG("[%s]NPU device arch: %s, uri: %s\n", get_name(), get_dsp_arch_desc(arch), device_lib_uri.c_str()); + auto err = npu_device_open(device_lib_uri.c_str(), &_device_handle); + if (err != AEE_SUCCESS) { + if (err == AEE_ECONNREFUSED) { + LOG_DEBUG("[%s]NPU device is not available, trying to enable unsigned DSP module and reopen\n", + get_name()); + enable_unsigned_dsp_module(_rpc_interface, _dsp_domain_id); + err = npu_device_open(device_lib_uri.c_str(), &_device_handle); + } + + if (err != AEE_SUCCESS) { + LOG_ERROR("[%s]Unable to open NPU device, err: 0x%x, uri %s\n", get_name(), err, + device_lib_uri.c_str()); + _device_handle = 0; + return false; + } + } + + _description += ' '; + _description += get_dsp_arch_desc(arch); + LOG_DEBUG("[%s]NPU device opened successfully\n", get_name()); + } else { + LOG_DEBUG("[%s]NPU device is already opened\n", get_name()); + } + + return true; +} + +bool npu_device::supports_buft(ggml_backend_buffer_type_t buft) const { + return buft && buft->device && buft->device->context == this; +} + +bool npu_device::supports_op_impl(const ggml_tensor * op) { + if (op->op == GGML_OP_NONE) { + return true; + } + + if (type_to_npu_type(op->type) == NPU_DATA_TYPE_COUNT) { + LOG_DEBUG("[%s]Unsupported op tensor type: %s\n", get_name(), ggml_type_name(op->type)); + return false; + } + + auto * src0 = op->src[0]; + if (!src0) { + LOG_DEBUG("[%s]Unsupported inplace op: %s\n", get_name(), ggml_op_name(op->op)); + return false; + } + + if (type_to_npu_type(src0->type) == NPU_DATA_TYPE_COUNT) { + LOG_DEBUG("[%s]Unsupported src0 tensor type: %s\n", get_name(), ggml_type_name(src0->type)); + return false; + } + + auto * src1 = op->src[1]; + if (src1 && type_to_npu_type(src1->type) == NPU_DATA_TYPE_COUNT) { + LOG_DEBUG("[%s]Unsupported src1 tensor type: %s\n", get_name(), ggml_type_name(src1->type)); + return false; + } + + auto npu_op = op_to_npu_op(op->op); + if (npu_op == NPU_OP_COUNT) { + LOG_DEBUG("[%s]Unsupported op: %s\n", get_name(), ggml_op_name(op->op)); + return false; + } + + constexpr const auto get_spec = [](const ggml_tensor * tensor) -> npu_device_tensor_spec { + if (!tensor) { + return npu_device_tensor_spec{}; + } + + static_assert(DEVICE_TENSOR_MAX_DIMS == GGML_MAX_DIMS, "tensor dimensions mismatch"); + npu_device_tensor_spec spec{}; + spec.ne[0] = tensor->ne[0]; + spec.ne[1] = tensor->ne[1]; + spec.ne[2] = tensor->ne[2]; + spec.ne[3] = tensor->ne[3]; + spec.type = type_to_npu_type(tensor->type); + return spec; + }; + + boolean supported = false; + auto src0_spec = get_spec(src0); + auto src1_spec = get_spec(src1); + auto dst_spec = get_spec(op); + auto ret = npu_device_device_support_op(_device_handle, &src0_spec, &src1_spec, &dst_spec, npu_op, &supported); + if (ret != AEE_SUCCESS || !supported) { + LOG_DEBUG("[%s]Unsupported op: %s, ret: 0x%x, supported: %d\n", get_name(), ggml_op_name(op->op), ret, + supported); + return false; + } + + LOG_DEBUG("[%s]Supported op: %s\n", get_name(), ggml_op_name(op->op)); + return true; +} + +bool npu_device::init_rpc_mem() { + if (!_rpc_mem) { + auto rpc_interface = std::make_shared(); + if (!rpc_interface->is_valid()) { + LOG_ERROR("[%s]Failed to load rpc memory library\n", get_name()); + return false; + } + + auto rpc_mem = std::make_shared(rpc_interface); + _rpc_interface = rpc_interface; + _rpc_mem = rpc_mem; + LOG_DEBUG("[%s]rpc memory initialized\n", get_name()); + } else { + LOG_DEBUG("[%s]rpc memory already initialized\n", get_name()); + } + + return true; +} + +bool npu_device::offload_op(const ggml_tensor * op) { + // TODO: implement this + return false; +} + +ggml_backend_buffer_type_t npu_device::get_default_buffer_type(ggml_backend_dev_t dev) { + // Note that this function will be called before the npu_device::init_device + if (!init_rpc_mem()) { + return nullptr; + } + + if (!_default_buffer_type) { + LOG_DEBUG("[%s]Creating default buffer type\n", get_name()); + _default_buffer_type = std::make_unique(dev, _name + "_buffer_type", _rpc_mem); + if (!_default_buffer_type) { + LOG_ERROR("[%s]Default buffer type not initialized\n", get_name()); + return nullptr; + } + } else { + LOG_DEBUG("[%s]Default buffer type already created\n", get_name()); + } + + return _default_buffer_type.get(); +} + +npu_backend::npu_backend(ggml_backend_dev_t dev) : ggml_backend{} { + memccpy(&_guid, &kBackendNpuGuid, 0, sizeof(ggml_guid)); + device = dev; + guid = &_guid; + iface.get_name = backend_get_name; + iface.free = backend_free; + iface.cpy_tensor_async = backend_cpy_tensor_async; + iface.graph_compute = backend_graph_compute; + _device = reinterpret_cast(dev->context); +} + +ggml_status npu_backend::graph_compute(ggml_cgraph * cgraph) { + if (!cgraph || !cgraph->n_nodes) { + LOG_DEBUG("[%s]Graph is empty, nothing to compute\n", get_name()); + return GGML_STATUS_SUCCESS; + } + + std::shared_ptr graph; + if (_graph_cache.count(cgraph) == 0) { + LOG_DEBUG("[%s]graph(%p) not found in cache, creating new graph\n", get_name(), (void *) cgraph); + graph = std::make_shared(cgraph, _device->get_device_handle()); + if (!graph->is_valid()) { + LOG_ERROR("Failed to create graph\n"); + return GGML_STATUS_FAILED; + } + + _graph_cache[cgraph] = graph; + } else { + graph = _graph_cache[cgraph]; + LOG_DEBUG("[%s]graph(%p) found in cache, using existing graph\n", get_name(), (void *) cgraph); + if (!graph->update(cgraph)) { + LOG_ERROR("[%s]Failed to update graph(%p)\n", get_name(), (void *) cgraph); + return GGML_STATUS_FAILED; + } + } + + return graph->compute() ? GGML_STATUS_SUCCESS : GGML_STATUS_FAILED; +} + +} // namespace hexagon diff --git a/ggml/src/ggml-qnn/npu/host/host_device.hpp b/ggml/src/ggml-qnn/npu/host/host_device.hpp new file mode 100644 index 0000000000..efc7914f18 --- /dev/null +++ b/ggml/src/ggml-qnn/npu/host/host_device.hpp @@ -0,0 +1,107 @@ +#pragma once + +#include +#include +#ifndef NDEBUG +# include +#endif + +#include "buffer.hpp" +#include "common.hpp" +#include "ggml-backend-impl.h" +#include "hexagon_npu.h" +#include "rpc-mem.hpp" + +namespace hexagon { + +class npu_device { + public: + explicit npu_device(backend_index_type device); + + ~npu_device(); + + const char * get_name() const { return _name.c_str(); } + + const char * get_description() const { return _description.c_str(); } + + size_t get_alignment() const; + + uint32_t get_dsp_domain_id() const { return _dsp_domain_id; } + + ggml_backend_buffer_type_t get_default_buffer_type(ggml_backend_dev_t dev); + + bool is_device_initialized() const; + bool init_device(ggml_backend_dev_t dev, const char * params); + + bool supports_buft(ggml_backend_buffer_type_t buft) const; + bool offload_op(const ggml_tensor * op); + +#ifndef NDEBUG + bool supports_op(const ggml_tensor * op) { + if (supports_op_impl(op)) { + if (op->op != GGML_OP_NONE) { + _supported_op++; + LOG_DEBUG("[%s]Supported op: %s, supported/unsupported: %u/%u\n", get_name(), ggml_op_name(op->op), + _supported_op.load(), _unsupported_op.load()); + } + + return true; + } + + _unsupported_op++; + LOG_DEBUG("[%s]Unsupported op: %s, supported/unsupported: %u/%u\n", get_name(), ggml_op_name(op->op), + _supported_op.load(), _unsupported_op.load()); + return false; + } +#else + bool supports_op(const ggml_tensor * op) { return supports_op_impl(op); } +#endif + + remote_handle64 get_device_handle() const { return _device_handle; } + + private: + bool supports_op_impl(const ggml_tensor * op); + bool init_rpc_mem(); + + std::string _name = "hexagon-npu"; + std::string _description = "Hexagon NPU"; + common::rpc_interface_ptr _rpc_interface; + common::rpc_mem_ptr _rpc_mem; + remote_handle64 _device_handle = 0; + std::unique_ptr _default_buffer_type; + uint32_t _dsp_domain_id = 0; + +#ifndef NDEBUG + std::atomic_uint32_t _supported_op = 0; + std::atomic_uint32_t _unsupported_op = 0; +#endif + + DISABLE_COPY(npu_device); + DISABLE_MOVE(npu_device); +}; + +class host_graph; + +class npu_backend : public ggml_backend { + public: + explicit npu_backend(ggml_backend_dev_t dev); + + ~npu_backend() {} + + const char * get_name() const { + // TODO: should we use the device name here? + return _device->get_name(); + } + + ggml_status graph_compute(ggml_cgraph * cgraph); + + private: + ggml_guid _guid = {}; + npu_device * _device = nullptr; + std::unordered_map> _graph_cache; + + DISABLE_COPY(npu_backend); + DISABLE_MOVE(npu_backend); +}; + +} // namespace hexagon diff --git a/ggml/src/ggml-qnn/npu/host/tensor.hpp b/ggml/src/ggml-qnn/npu/host/tensor.hpp new file mode 100644 index 0000000000..e7d5f7a88a --- /dev/null +++ b/ggml/src/ggml-qnn/npu/host/tensor.hpp @@ -0,0 +1,88 @@ +#pragma once + +#include "common.hpp" +#include "ggml-impl.h" +#include "hexagon_npu.h" +#include "util.hpp" + +namespace hexagon { + +// TODO: merge this with device tensor? +class host_tensor { + public: + static host_tensor * from_ggml_tensor(ggml_tensor * tensor) { + if (!tensor || !tensor->extra) { + return nullptr; + } + return static_cast(tensor->extra); + } + + explicit host_tensor(ggml_tensor * tensor, int buffer_fd, uint64_t offset, remote_handle64 device_handle) : + _device_handle(device_handle) { + _info.buffer_fd = buffer_fd; + _info.offset = offset; + _info.type = type_to_npu_type(tensor->type); + _info.op = op_to_npu_op(tensor->op); + _info.size = ggml_nbytes(tensor); + + static_assert(DEVICE_TENSOR_MAX_DIMS == GGML_MAX_DIMS, "tensor dimensions mismatch"); + static_assert(sizeof(_info.ne) == sizeof(tensor->ne), "tensor ne size mismatch"); + static_assert(sizeof(_info.nb) == sizeof(tensor->nb), "tensor nb size mismatch"); + memcpy(_info.ne, tensor->ne, sizeof(_info.ne)); + memcpy(_info.nb, tensor->nb, sizeof(_info.nb)); + + auto status = npu_device_tensor_init(_device_handle, &_info, &_device_tensor_handle); + if (status != AEE_SUCCESS) { + LOG_ERROR("Failed to init tensor: %d", (int) status); + _device_tensor_handle = 0; + return; + } + + tensor->extra = this; + _ggml_tensor = tensor; + LOG_DEBUG( + "host_tensor(%p) created, ggml_tensor(%p[%ldx%ldx%ldx%ld], nb[%ld][%ld][%ld][%ld]), " + "device_tensor_handle(%p)\n", + (void *) this, (void *) tensor, (long) tensor->ne[0], (long) tensor->ne[1], (long) tensor->ne[2], + (long) tensor->ne[3], (long) tensor->nb[0], (long) tensor->nb[1], (long) tensor->nb[2], + (long) tensor->nb[3], (void *) _device_tensor_handle); + } + + ~host_tensor() { + LOG_DEBUG("host_tensor(%p) destroy, device_tensor_handle: %p\n", (void *) this, (void *) _device_tensor_handle); + if (_device_tensor_handle) { + npu_device_tensor_free(_device_handle, _device_tensor_handle); + _ggml_tensor->extra = nullptr; + } + } + + npu_device_tensor_handle_t get_device_tensor_handle() const { return _device_tensor_handle; } + + void set_src(size_t index, host_tensor * src) { + if (index >= DEVICE_TENSOR_MAX_SRC) { + LOG_ERROR("host_tensor(%p) set_src[%zu] out of range\n", (void *) this, index); + return; + } + + LOG_DEBUG("host_tensor(%p) set_src[%zu]: %p\n", (void *) this, index, (void *) src); + npu_device_tensor_set_src(_device_handle, _device_tensor_handle, index, src->get_device_tensor_handle()); + } + + void set_op(ggml_op op) { + _info.op = op_to_npu_op(op); + npu_device_tensor_set_op(_device_handle, _device_tensor_handle, _info.op); + } + + bool is_valid() const { return _device_tensor_handle != 0; } + + private: + remote_handle64 _device_handle = 0; + npu_device_tensor_handle_t _device_tensor_handle = 0; + npu_device_tensor_config _info = {}; + ggml_tensor * _ggml_tensor = nullptr; + + DISABLE_COPY(host_tensor); + DISABLE_MOVE(host_tensor); +}; + +} // namespace hexagon diff --git a/ggml/src/ggml-qnn/npu/host/util.cpp b/ggml/src/ggml-qnn/npu/host/util.cpp new file mode 100644 index 0000000000..5db54b661e --- /dev/null +++ b/ggml/src/ggml-qnn/npu/host/util.cpp @@ -0,0 +1,96 @@ +#include "util.hpp" + +#include + +namespace hexagon { + +enum npu_device_tensor_op op_to_npu_op(ggml_op op) { + switch (op) { + case GGML_OP_MUL_MAT: + return NPU_OP_MUL_MAT; + case GGML_OP_ADD: + return NPU_OP_ADD; + case GGML_OP_SUB: + return NPU_OP_SUB; + case GGML_OP_MUL: + return NPU_OP_MUL; + default: + return NPU_OP_COUNT; + } +} + +enum npu_device_tensor_data_type type_to_npu_type(ggml_type type) { + switch (type) { + case GGML_TYPE_F32: + return NPU_DATA_TYPE_F32; + default: + return NPU_DATA_TYPE_COUNT; + } +} + +hexagon_dsp_arch get_dsp_arch(common::rpc_interface_ptr rpc_interface, uint32_t domain_id) { + if (!rpc_interface || !rpc_interface->is_valid()) { + return NONE; + } + + remote_dsp_capability dsp_caps = {}; + dsp_caps.domain = domain_id; + dsp_caps.attribute_ID = ARCH_VER; + auto ret = rpc_interface->remote_handle_control(DSPRPC_GET_DSP_INFO, &dsp_caps, sizeof(dsp_caps)); + if (ret != AEE_SUCCESS) { + LOG_ERROR("failed to get DSP arch: %d\n", ret); + return NONE; + } + + LOG_DEBUG("get DSP arch: 0x%x\n", (int) dsp_caps.capability); + auto arch = dsp_caps.capability & 0xFF; + switch (arch) { + case 0x68: + return V68; + case 0x69: + return V69; + case 0x73: + return V73; + case 0x75: + return V75; + case 0x79: + return V79; + default: + LOG_ERROR("unknown DSP arch: %x\n", arch); + return NONE; + } +} + +const char * get_dsp_arch_desc(hexagon_dsp_arch arch) { + switch (arch) { + case V68: + return "V68"; + case V69: + return "V69"; + case V73: + return "V73"; + case V75: + return "V75"; + case V79: + return "V79"; + case NONE: + default: + return "UnknownArch"; + } +} + +void enable_unsigned_dsp_module(common::rpc_interface_ptr rpc_interface, uint32_t domain_id) { + if (!rpc_interface || !rpc_interface->is_valid()) { + return; + } + + remote_rpc_control_unsigned_module data = {}; + data.domain = domain_id; + data.enable = 1; + auto ret = rpc_interface->remote_session_control(DSPRPC_CONTROL_UNSIGNED_MODULE, &data, sizeof(data)); + if (ret != AEE_SUCCESS) { + LOG_ERROR("failed to enable unsigned DSP module: 0x%x\n", ret); + } +} + +} // namespace hexagon diff --git a/ggml/src/ggml-qnn/npu/host/util.hpp b/ggml/src/ggml-qnn/npu/host/util.hpp new file mode 100644 index 0000000000..c001272d4c --- /dev/null +++ b/ggml/src/ggml-qnn/npu/host/util.hpp @@ -0,0 +1,26 @@ +#include "ggml-impl.h" +#include "hexagon_npu.h" +#include "rpc-interface.hpp" + +namespace hexagon { + +enum npu_device_tensor_op op_to_npu_op(ggml_op op); +enum npu_device_tensor_data_type type_to_npu_type(ggml_type type); + +// TODO: merge with qcom_htp_arch +enum hexagon_dsp_arch { + NONE = 0, + V68, + V69, + V73, + V75, + V79, // SD 8 Gen 4 (SM8750) +}; + +hexagon_dsp_arch get_dsp_arch(common::rpc_interface_ptr rpc_interface, uint32_t domain_id); + +const char * get_dsp_arch_desc(hexagon_dsp_arch arch); + +void enable_unsigned_dsp_module(common::rpc_interface_ptr rpc_interface, uint32_t domain_id); + +} // namespace hexagon diff --git a/ggml/src/ggml-qnn/npu/idl/hexagon_npu.idl b/ggml/src/ggml-qnn/npu/idl/hexagon_npu.idl new file mode 100644 index 0000000000..d62e65b3bd --- /dev/null +++ b/ggml/src/ggml-qnn/npu/idl/hexagon_npu.idl @@ -0,0 +1,90 @@ +#include "AEEStdDef.idl" +#include "AEEStdErr.idl" +#include "remote.idl" + +const uint32_t DEVICE_TENSOR_MAX_DIMS = 4; +const uint32_t DEVICE_TENSOR_MAX_SRC = 2; + +interface npu_device : remote_handle64{ + + typedef int64_t ne_type[DEVICE_TENSOR_MAX_DIMS]; + typedef uint64_t tensor_handle_t; + typedef uint64_t graph_handle_t; + + enum tensor_op { + NPU_OP_MUL_MAT, + NPU_OP_ADD, + NPU_OP_SUB, + NPU_OP_MUL, + NPU_OP_COUNT + }; + + enum tensor_data_type { + NPU_DATA_TYPE_F32, + NPU_DATA_TYPE_COUNT + }; + + struct tensor_spec { + ne_type ne; + tensor_data_type type; + }; + + struct tensor_config { + ne_type ne; + uint64_t nb[DEVICE_TENSOR_MAX_DIMS]; + long buffer_fd; + uint64_t offset; + uint64_t size; + tensor_data_type type; + tensor_op op; + }; + + AEEResult device_get_alignment( + rout uint32_t alignment + ); + + AEEResult device_support_op( + in tensor_spec src0, + in tensor_spec src1, + in tensor_spec dst, + in tensor_op op, + rout boolean is_supported + ); + + AEEResult tensor_init( + in tensor_config info, + rout tensor_handle_t tensor_handle + ); + + AEEResult tensor_set_src( + in tensor_handle_t tensor_handle, + in uint64_t index, + in tensor_handle_t src + ); + + AEEResult tensor_set_op( + in tensor_handle_t tensor_handle, + in tensor_op op + ); + + AEEResult tensor_free( + in tensor_handle_t tensor_handle + ); + + AEEResult graph_init( + rout graph_handle_t graph_handle + ); + + AEEResult graph_set_tensor( + in graph_handle_t graph_handle, + in sequence tensor_handles + ); + + AEEResult graph_compute( + in graph_handle_t graph_handle + ); + + AEEResult graph_free( + in graph_handle_t graph_handle + ); +}; diff --git a/ggml/src/ggml-qnn/qnn-types.hpp b/ggml/src/ggml-qnn/qnn-types.hpp deleted file mode 100644 index 957f8b681f..0000000000 --- a/ggml/src/ggml-qnn/qnn-types.hpp +++ /dev/null @@ -1,61 +0,0 @@ - -#pragma once - -#include "QnnCommon.h" -#include "QnnInterface.h" -#include "QnnTypes.h" -#include "Saver/QnnSaver.h" -#include "System/QnnSystemInterface.h" - -namespace qnn { - -enum qcom_htp_arch { - NONE = 0, - V68 = 68, - V69 = 69, - V73 = 73, - V75 = 75, - V79 = 79, // SD 8 Gen 4 (SM8750) -}; - -enum qcom_chipset { - UNKNOWN_SM = 0, - SM8350 = 30, // v68, SD 888/888+ - SM8450 = 36, // v69, SD 8 Gen 1 - SA8295 = 39, // v68 - SM8475 = 42, // v69, SD 8+ Gen 1 - SM8550 = 43, // v73, SD 8 Gen 2 - SSG2115P = 46, // v73 - SM7675 = 70, // V73, SD 7+ Gen 3 - SM8635 = 68, // v73, SD 8s Gen 3 - SM8650 = 57, // v75, SD 8 Gen 3 - SM8750 = 69, // v79, SD 8 Gen 4 -}; - -struct qcom_socinfo { - uint32_t soc_model; - size_t htp_arch; - size_t vtcm_size_in_mb; -}; - -using pfn_rpc_mem_init = void (*)(void); -using pfn_rpc_mem_deinit = void (*)(void); -using pfn_rpc_mem_alloc = void * (*) (int, uint32_t, int); -using pfn_rpc_mem_free = void (*)(void *); -using pfn_rpc_mem_to_fd = int (*)(void *); - -using pfn_qnnsaver_initialize = decltype(QnnSaver_initialize); -using pfn_qnninterface_getproviders = decltype(QnnInterface_getProviders); -using pfn_qnnsysteminterface_getproviders = decltype(QnnSystemInterface_getProviders); -} // namespace qnn - -#define RPCMEM_DEFAULT_FLAGS 1 -#define RPCMEM_HEAP_ID_SYSTEM 25 - -#define DISABLE_COPY(class_name) \ - class_name(const class_name &) = delete; \ - void operator=(const class_name &) = delete - -#define DISABLE_MOVE(class_name) \ - class_name(class_name &&) = delete; \ - void operator=(class_name &&) = delete diff --git a/ggml/src/ggml-qnn/backend-ops.cpp b/ggml/src/ggml-qnn/qnn/backend-ops.cpp similarity index 94% rename from ggml/src/ggml-qnn/backend-ops.cpp rename to ggml/src/ggml-qnn/qnn/backend-ops.cpp index 857278bdaa..d4d2c57cbf 100644 --- a/ggml/src/ggml-qnn/backend-ops.cpp +++ b/ggml/src/ggml-qnn/qnn/backend-ops.cpp @@ -12,7 +12,7 @@ namespace { -qnn::qnn_graph * get_qnn_graph_from_cache(ggml_backend_qnn_device_context * ctx, const ggml_cgraph * cgraph) { +qnn::qnn_graph * get_qnn_graph_from_cache(qnn::ggml_backend_qnn_device_context * ctx, const ggml_cgraph * cgraph) { auto & graph_cache = ctx->qnn_graph_cache; std::string graph_key; auto op_data_type = qnn::qnn_graph::get_graph_key_from_cgraph(cgraph, graph_key); @@ -178,7 +178,7 @@ inline bool is_type_bit_enabled(uint64_t bits, ggml_type type) { return bits & (uint64_t(1) << type); } -inline bool is_tensor_size_valid(ggml_backend_qnn_device_context * ctx, const ggml_tensor * tensor) { +inline bool is_tensor_size_valid(qnn::ggml_backend_qnn_device_context * ctx, const ggml_tensor * tensor) { constexpr const auto get_tensor_size_in_bytes = [](const ggml_tensor * tensor, ggml_type type) -> size_t { return tensor->ne[0] * tensor->ne[1] * tensor->ne[2] * tensor->ne[3] * ggml_type_size(type); }; @@ -200,7 +200,7 @@ inline bool is_tensor_size_valid(ggml_backend_qnn_device_context * ctx, const gg return true; } -bool is_tensor_type_valid(ggml_backend_qnn_device_context * ctx, const ggml_tensor * tensor) { +bool is_tensor_type_valid(qnn::ggml_backend_qnn_device_context * ctx, const ggml_tensor * tensor) { if (!tensor) { QNN_LOG_DEBUG("tensor is nullptr\n"); return false; @@ -239,7 +239,7 @@ bool is_data_reinterpretation_op(ggml_op op) { return op == GGML_OP_VIEW || op == GGML_OP_PERMUTE; } -bool ggnl_qnn_supports_op_tensor(ggml_backend_qnn_device_context * ctx, const ggml_tensor * op) { +bool ggnl_qnn_supports_op_tensor(qnn::ggml_backend_qnn_device_context * ctx, const ggml_tensor * op) { if (op->op == GGML_OP_NONE) { return true; } @@ -265,7 +265,7 @@ bool ggnl_qnn_supports_op_tensor(ggml_backend_qnn_device_context * ctx, const gg return true; } -bool ggml_qnn_have_same_tensor_types(ggml_backend_qnn_device_context * ctx, const ggml_tensor * op) { +bool ggml_qnn_have_same_tensor_types(qnn::ggml_backend_qnn_device_context * ctx, const ggml_tensor * op) { auto * src0 = op->src[0]; auto * src1 = op->src[1]; if (src1) { @@ -291,7 +291,7 @@ bool ggml_qnn_have_same_tensor_types(ggml_backend_qnn_device_context * ctx, cons } // TODO: move to caps array? -bool ggml_qnn_supports_matmul_op(ggml_backend_qnn_device_context * ctx, const ggml_tensor * op) { +bool ggml_qnn_supports_matmul_op(qnn::ggml_backend_qnn_device_context * ctx, const ggml_tensor * op) { auto * src0 = op->src[0]; auto * src1 = op->src[1]; if (is_data_reinterpretation_op(src0->op) || is_data_reinterpretation_op(src1->op)) { @@ -343,7 +343,7 @@ bool ggml_qnn_supports_matmul_op(ggml_backend_qnn_device_context * ctx, const gg #ifndef NDEBUG -void print_tensor_info(ggml_backend_qnn_device_context * ctx, const ggml_tensor * op, bool is_supported) { +void print_tensor_info(qnn::ggml_backend_qnn_device_context * ctx, const ggml_tensor * op, bool is_supported) { const char * supported = is_supported ? "supported" : "unsupported"; std::string op_key; qnn::get_qnn_op_desc(op, true, GGML_TYPE_COUNT, op_key); @@ -358,7 +358,7 @@ void print_tensor_info(ggml_backend_qnn_device_context * ctx, const ggml_tensor namespace qnn { -bool device_supports_op(ggml_backend_qnn_device_context * ctx, const ggml_tensor * op) { +bool device_supports_op(qnn::ggml_backend_qnn_device_context * ctx, const ggml_tensor * op) { // Note that this function could be called before the device context is initialized if (op->op == GGML_OP_NONE) { return true; @@ -435,7 +435,7 @@ bool device_supports_op(ggml_backend_qnn_device_context * ctx, const ggml_tensor return is_op_supported; } -bool device_compute_graph(ggml_backend_qnn_device_context * ctx, ggml_cgraph * cgraph) { +bool device_compute_graph(qnn::ggml_backend_qnn_device_context * ctx, ggml_cgraph * cgraph) { QNN_LOG_DEBUG("[%s]compute graph start, nodes count: %d\n", qnn::get_backend_name(ctx->device), (int) cgraph->n_nodes); diff --git a/ggml/src/ggml-qnn/backend.hpp b/ggml/src/ggml-qnn/qnn/backend-ops.hpp similarity index 76% rename from ggml/src/ggml-qnn/backend.hpp rename to ggml/src/ggml-qnn/qnn/backend-ops.hpp index f2484a7a97..564a64a40e 100644 --- a/ggml/src/ggml-qnn/backend.hpp +++ b/ggml/src/ggml-qnn/qnn/backend-ops.hpp @@ -1,4 +1,3 @@ - #pragma once #ifndef NDEBUG @@ -18,15 +17,15 @@ #include "qnn-lib.hpp" namespace qnn { + typedef std::unordered_map> qnn_graph_cache_t; -} // namespace qnn struct ggml_backend_qnn_device_context { // initialize in constructor - QNNBackend device; - size_t threads; - std::string name; - std::string description; + backend_index_type device; + size_t threads; + std::string name; + std::string description; // initialize in qnn init qnn::qcom_socinfo socinfo = {}; @@ -46,10 +45,15 @@ struct ggml_backend_qnn_device_context { uint64_t supported_types; uint64_t cpu_preprocess_types; - explicit ggml_backend_qnn_device_context(QNNBackend device, size_t threads, const char * name, + explicit ggml_backend_qnn_device_context(backend_index_type device, size_t threads, const char * name, uint64_t supported_types) : device(device), threads(threads), name(name), supported_types(supported_types) {} }; + +bool device_supports_op(ggml_backend_qnn_device_context * ctx, const ggml_tensor * op); +bool device_compute_graph(ggml_backend_qnn_device_context * ctx, ggml_cgraph * cgraph); + +} // namespace qnn diff --git a/ggml/src/ggml-qnn/buffer.hpp b/ggml/src/ggml-qnn/qnn/buffer.hpp similarity index 100% rename from ggml/src/ggml-qnn/buffer.hpp rename to ggml/src/ggml-qnn/qnn/buffer.hpp diff --git a/ggml/src/ggml-qnn/convert.cpp b/ggml/src/ggml-qnn/qnn/convert.cpp similarity index 100% rename from ggml/src/ggml-qnn/convert.cpp rename to ggml/src/ggml-qnn/qnn/convert.cpp diff --git a/ggml/src/ggml-qnn/convert.hpp b/ggml/src/ggml-qnn/qnn/convert.hpp similarity index 100% rename from ggml/src/ggml-qnn/convert.hpp rename to ggml/src/ggml-qnn/qnn/convert.hpp diff --git a/ggml/src/ggml-qnn/ggml-qnn.cpp b/ggml/src/ggml-qnn/qnn/ggml-qnn.cpp similarity index 81% rename from ggml/src/ggml-qnn/ggml-qnn.cpp rename to ggml/src/ggml-qnn/qnn/ggml-qnn.cpp index 1d3e45562c..e559cfdb28 100644 --- a/ggml/src/ggml-qnn/ggml-qnn.cpp +++ b/ggml/src/ggml-qnn/qnn/ggml-qnn.cpp @@ -1,11 +1,9 @@ -#include "ggml-qnn.h" - #include #include #include #include "backend-ops.hpp" -#include "backend.hpp" +#include "common.hpp" #include "ggml-backend-impl.h" #include "ggml-impl.h" #include "logger.hpp" @@ -14,8 +12,8 @@ namespace { -ggml_backend_qnn_device_context * get_device_context(ggml_backend_dev_t dev) { - return reinterpret_cast(dev->context); +qnn::ggml_backend_qnn_device_context * get_device_context(ggml_backend_dev_t dev) { + return reinterpret_cast(dev->context); } qnn::qnn_buffer_interface * get_buffer_context(ggml_backend_buffer_t buffer) { @@ -141,6 +139,16 @@ void ggml_backend_qnn_free(ggml_backend_t backend) { delete backend; } +ggml_guid_t ggml_backend_qnn_guid() { + static ggml_guid guid = { 0x1a, 0x2b, 0x3c, 0x4d, 0x5e, 0x6f, 0x70, 0x81, + 0x92, 0xa3, 0xb4, 0xc5, 0xd6, 0xe7, 0xf8, 0x09 }; + return &guid; +} + +bool ggml_backend_is_qnn(ggml_backend_t backend) { + return ggml_guid_matches(backend->guid, ggml_backend_qnn_guid()); +} + bool ggml_backend_qnn_cpy_tensor_async(ggml_backend_t backend_src, ggml_backend_t backend_dst, const ggml_tensor * src, ggml_tensor * dst) { GGML_UNUSED(backend_src); @@ -154,7 +162,7 @@ bool ggml_backend_qnn_cpy_tensor_async(ggml_backend_t backend_src, ggml_backend_ } ggml_backend_buffer_type_t ggml_backend_qnn_buffer_type(ggml_backend_dev_t dev) { - static ggml_backend_buffer_type ggml_backend_qnn_buffer_types[GGML_QNN_MAX_DEVICES]; + static ggml_backend_buffer_type ggml_backend_qnn_buffer_types[QNN_BACKEND_COUNT]; auto * dev_ctx = get_device_context(dev); if (!ggml_backend_qnn_buffer_types[dev_ctx->device].device) { ggml_backend_qnn_buffer_types[dev_ctx->device] = { @@ -215,8 +223,8 @@ const char * ggml_backend_qnn_device_get_description(ggml_backend_dev_t dev) { void ggml_backend_qnn_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) { GGML_UNUSED(dev); - *free = qnn::get_system_free_memory_in_bytes(); - *total = qnn::get_system_total_memory_in_bytes(); + *free = common::get_system_free_memory_in_bytes(); + *total = common::get_system_total_memory_in_bytes(); QNN_LOG_DEBUG("free memory: %ldMB, total memory: %ldMB\n", (*free / 1048576), (*total) / 1048576); } @@ -237,12 +245,6 @@ void ggml_backend_qnn_device_get_props(ggml_backend_dev_t dev, ggml_backend_dev_ }; } -ggml_guid_t ggml_backend_qnn_guid() { - static ggml_guid guid = { 0x1a, 0x2b, 0x3c, 0x4d, 0x5e, 0x6f, 0x70, 0x81, - 0x92, 0xa3, 0xb4, 0xc5, 0xd6, 0xe7, 0xf8, 0x09 }; - return &guid; -} - ggml_backend_t ggml_backend_qnn_init_with_device_context(ggml_backend_dev_t dev, const char * extend_lib_search_path) { if (!extend_lib_search_path) { extend_lib_search_path = GGML_QNN_DEFAULT_LIB_SEARCH_PATH; @@ -256,8 +258,7 @@ ggml_backend_t ggml_backend_qnn_init_with_device_context(ggml_backend_dev_t dev, QNN_LOG_DEBUG("device %s\n", qnn::get_backend_name(device)); QNN_LOG_DEBUG("extend_lib_search_path %s\n", extend_lib_search_path); auto instance = std::make_shared(extend_lib_search_path, device); - auto result = instance->qnn_init(nullptr); - if (result != 0) { + if (!instance->qnn_init(nullptr)) { QNN_LOG_WARN("failed to init qnn backend %s\n", qnn::get_backend_name(device)); return nullptr; } @@ -351,80 +352,43 @@ constexpr const ggml_backend_device_i ggml_backend_qnn_device_interface = { /* .event_synchronize = */ nullptr, }; -/* - * ----------------------------------------------------------------------------------------------- - * qnn backend registry object - * ----------------------------------------------------------------------------------------------- - */ - -struct ggml_backend_qnn_reg_impl : ggml_backend_reg { - std::vector> device_contexts; - std::vector devices; - - explicit ggml_backend_qnn_reg_impl(ggml_backend_reg_i interface) { - context = this; - iface = interface; - - QNN_LOG_DEBUG("qnn backend registry init\n"); - for (size_t i = 0; i < QNN_BACKEND_COUNT; i++) { - const auto device_enum = (QNNBackend) (QNN_BACKEND_COUNT - 1 - i); // init from the last device, i.e. NPU -#ifndef GGML_QNN_ENABLE_CPU_BACKEND - if (device_enum == QNN_BACKEND_CPU) { - /* - * here we skip the initialization of CPU device, - * cause it'll block unsupported ops fallback to ggml cpu backend - */ - QNN_LOG_DEBUG("qnn backend registry skip CPU device\n"); - continue; - } -#endif - - const auto & device_caps = qnn::get_device_caps(device_enum); - device_contexts.emplace_back(std::make_unique( - /* .device = */ device_enum, // init from the last device, i.e. NPU - /* .threads = */ 1, - /* .name = */ qnn::get_backend_name(device_enum), - /* .supported_types = */ device_caps.supported_types)); - - devices.emplace_back(ggml_backend_device{ - /* iface = */ ggml_backend_qnn_device_interface, - /* reg = */ this, - /* context = */ device_contexts.back().get(), - }); - } +class qnn_device_proxy : public backend_device_proxy { + public: + explicit qnn_device_proxy(backend_index_type device) { + const auto & device_caps = qnn::get_device_caps(device); + _device_context = std::make_unique( + /* .device = */ device, // init from the last device, i.e. NPU + /* .threads = */ 1, // TODO: fix this + /* .name = */ qnn::get_backend_name(device), + /* .supported_types = */ device_caps.supported_types); } -}; -const char * ggml_backend_qnn_reg_get_name(ggml_backend_reg_t reg) { - GGML_UNUSED(reg); - return GGML_QNN_NAME; -} + const ggml_backend_device_i & get_iface() const { return ggml_backend_qnn_device_interface; } -size_t ggml_backend_qnn_reg_get_device_count(ggml_backend_reg_t reg) { - auto * ctx = (ggml_backend_qnn_reg_impl *) reg->context; - return ctx->devices.size(); -} + void * get_context() { return _device_context.get(); } -ggml_backend_dev_t ggml_backend_qnn_reg_get_device(ggml_backend_reg_t reg, size_t index) { - auto * ctx = (ggml_backend_qnn_reg_impl *) reg->context; - GGML_ASSERT(index < ctx->devices.size()); - return &(ctx->devices[index]); -} - -const ggml_backend_reg_i ggml_backend_qnn_reg_interface = { - /* .get_name = */ ggml_backend_qnn_reg_get_name, - /* .get_device_count = */ ggml_backend_qnn_reg_get_device_count, - /* .get_device_get = */ ggml_backend_qnn_reg_get_device, - /* .get_proc_address = */ nullptr, + private: + std::unique_ptr _device_context; }; } // namespace -bool ggml_backend_is_qnn(ggml_backend_t backend) { - return ggml_guid_matches(backend->guid, ggml_backend_qnn_guid()); -} +backend_device_proxy_ptr create_qnn_backend_context(backend_index_type device) { + if (device >= QNN_BACKEND_COUNT) { + QNN_LOG_ERROR("[qnn]invalid device %d\n", device); + return backend_device_proxy_ptr(); + } -ggml_backend_reg_t ggml_backend_qnn_reg() { - static ggml_backend_qnn_reg_impl reg{ ggml_backend_qnn_reg_interface }; - return ® +#ifndef GGML_QNN_ENABLE_CPU_BACKEND + if (device == QNN_BACKEND_CPU) { + /* + * here we skip the initialization of CPU device, + * cause it'll block unsupported ops fallback to ggml cpu backend + */ + GGML_LOG_DEBUG("qnn backend registry skip CPU device\n"); + return backend_device_proxy_ptr(); + } +#endif + + return std::make_unique(device); } diff --git a/ggml/src/ggml-qnn/graph.cpp b/ggml/src/ggml-qnn/qnn/graph.cpp similarity index 98% rename from ggml/src/ggml-qnn/graph.cpp rename to ggml/src/ggml-qnn/qnn/graph.cpp index 3021a6f0a2..70fc71c211 100644 --- a/ggml/src/ggml-qnn/graph.cpp +++ b/ggml/src/ggml-qnn/qnn/graph.cpp @@ -35,7 +35,7 @@ int get_op_max_rank(const ggml_tensor * op) { } qnn::qnn_tensor_ptr_t create_tensor_with_cache(ggml_tensor * tensor, qnn::ggml_qnn_tensor::tensor_type_t type, int rank, - ggml_type override_data_type, QNNBackend device, + ggml_type override_data_type, backend_index_type device, Qnn_GraphHandle_t graph_handle, std::shared_ptr qnn_instance, qnn_tensor_cache_t & tensor_cache) { @@ -60,7 +60,7 @@ qnn::qnn_tensor_ptr_t create_tensor_with_cache(ggml_tensor * tensor, qnn::ggml_q qnn::qnn_tensor_array_t create_tensors_with_cache(const qnn::ggml_tensor_array_t & ggml_tensors, qnn::ggml_qnn_tensor::tensor_type_t type, int rank, - ggml_type override_data_type, QNNBackend device, + ggml_type override_data_type, backend_index_type device, Qnn_GraphHandle_t graph_handle, std::shared_ptr qnn_instance, qnn_tensor_cache_t & tensor_cache) { @@ -74,7 +74,7 @@ qnn::qnn_tensor_array_t create_tensors_with_cache(const qnn::ggml_tensor_array_t } qnn::qnn_op_config_ptr_t create_operation_from_op_tensor(ggml_tensor * dst, const std::string & name, int rank, - QNNBackend device, Qnn_GraphHandle_t graph_handle, + backend_index_type device, Qnn_GraphHandle_t graph_handle, std::shared_ptr qnn_instance, qnn_tensor_cache_t & tensor_cache) { auto operation = qnn::create_op(dst, name, qnn_instance); @@ -335,7 +335,7 @@ ggml_type qnn_graph::get_graph_key_from_cgraph(const ggml_cgraph * cgraph, std:: return min_op_type; } -qnn_graph::qnn_graph(const std::string & graph_name, QNNBackend device, qnn_instance_ptr qnn_instance, +qnn_graph::qnn_graph(const std::string & graph_name, backend_index_type device, qnn_instance_ptr qnn_instance, htp_precision precision, size_t vtcm_size_in_mb) : _graph_name(graph_name), _device(device), diff --git a/ggml/src/ggml-qnn/graph.hpp b/ggml/src/ggml-qnn/qnn/graph.hpp similarity index 85% rename from ggml/src/ggml-qnn/graph.hpp rename to ggml/src/ggml-qnn/qnn/graph.hpp index a913b8bba3..5e862112fb 100644 --- a/ggml/src/ggml-qnn/graph.hpp +++ b/ggml/src/ggml-qnn/qnn/graph.hpp @@ -45,7 +45,7 @@ class qnn_graph { */ static ggml_type get_graph_key_from_cgraph(const ggml_cgraph * cgraph, std::string & output); - explicit qnn_graph(const std::string & graph_name, QNNBackend device, qnn_instance_ptr qnn_instance, + explicit qnn_graph(const std::string & graph_name, backend_index_type device, qnn_instance_ptr qnn_instance, htp_precision precision, size_t vtcm_size_in_mb); ~qnn_graph(); @@ -62,17 +62,17 @@ class qnn_graph { const std::string & get_name() const { return _graph_name; } - QNNBackend get_device() const { return _device; } + backend_index_type get_device() const { return _device; } private: bool finalize(); - const std::string _graph_name; - const QNNBackend _device; - Qnn_GraphHandle_t _graph_handle = nullptr; - qnn_instance_ptr _qnn_instance; - qnn_interface_ptr _qnn_interface; - qnn_op_config_array_t _operations; + const std::string _graph_name; + const backend_index_type _device; + Qnn_GraphHandle_t _graph_handle = nullptr; + qnn_instance_ptr _qnn_instance; + qnn_interface_ptr _qnn_interface; + qnn_op_config_array_t _operations; qnn_tensor_array_t _tensor_inputs; qnn_tensor_array_t _tensor_outputs; diff --git a/ggml/src/ggml-qnn/qnn/hexagon/GgmlOpPackage.xml b/ggml/src/ggml-qnn/qnn/hexagon/GgmlOpPackage.xml new file mode 100644 index 0000000000..f4c6575902 --- /dev/null +++ b/ggml/src/ggml-qnn/qnn/hexagon/GgmlOpPackage.xml @@ -0,0 +1,88 @@ + + + + + GgmlMulMat + + + GGML MulMat operator + + + + + in[0] + + src0 + + true + BACKEND_SPECIFIC + + 4D + NHWC + [N, C, H , W] + + + + + in[1] + + src1 + + true + BACKEND_SPECIFIC + + 4D + NHWC + [N, C, H , W] + + + + + out[0] + + dst + + true + BACKEND_SPECIFIC + + 4D + [N, C, H , W] + + + + + HTP + + + + + + + GgmlMulMat + + + + + GgmlMulMat + + + in[0] + QNN_DATATYPE_FLOAT_16 + QNN_DATATYPE_FLOAT_32 + + + in[1] + QNN_DATATYPE_FLOAT_16 + QNN_DATATYPE_FLOAT_32 + + + + out[0] + QNN_DATATYPE_FLOAT_16 + QNN_DATATYPE_FLOAT_32 + + + + + + diff --git a/ggml/src/ggml-qnn/qnn/hexagon/GgmlOpPackage/Makefile b/ggml/src/ggml-qnn/qnn/hexagon/GgmlOpPackage/Makefile new file mode 100644 index 0000000000..f177822d35 --- /dev/null +++ b/ggml/src/ggml-qnn/qnn/hexagon/GgmlOpPackage/Makefile @@ -0,0 +1,357 @@ +# check all setup prerequisites if the command goal is not clean +ifneq ($(MAKECMDGOALS),clean) +ifndef QNN_INCLUDE +$(info "INFO: Qnn include not explicitly defined, attempting to use QNN_SDK_ROOT if it is valid") +QNN_INCLUDE := $(QNN_SDK_ROOT)/include/QNN +endif +ifeq ($(wildcard $(QNN_INCLUDE)),) +$(error "ERROR: QNN_INCLUDE path is not set. QNN include paths must be set to obtain BE headers necessary to compile the package") +endif +ifndef QNN_TARGET_LIB +$(info "INFO: Qnn target not explicitly defined, attempting to use QNN_SDK_ROOT if it is valid") +QNN_TARGET_LIB := $(QNN_SDK_ROOT)/lib/aarch64-android +endif +ifeq ($(wildcard $(QNN_TARGET_LIB)),) +ifeq ($(MAKECMDGOALS),htp_aarch64) +$(error "ERROR: QNN_TARGET_LIB is needed to compile package for aarch64") +else ifeq ($(MAKECMDGOALS),all) +$(info "WARNING:QNN_TARGET_LIB may need to be defined to compile packages") +endif +endif + +ifndef HEXAGON_SDK_ROOT +$(error "ERROR: HEXAGON_SDK_ROOT is not set. Hexagon-SDK path must be set to the latest hexagon-sdk-x.y.z") +endif + +ifeq ($(wildcard $(HEXAGON_SDK_ROOT)),) +$(error "ERROR: HEXAGON_SDK_ROOT is not set correctly. Please set HEXAGON_SDK_ROOT to latest hexagon-sdk-X.Y.Z path") +endif + +HEXAGON_SDK_BASE := $(dir $(HEXAGON_SDK_ROOT)) + +$(info "HEXAGON_SDK_ROOT is [${HEXAGON_SDK_ROOT}]") +# Users should note that the tools version may change between hexagon sdk versions +# Following combination of SDK and Tool version is supported +# fix the sdk root for new versions +HEXAGON_SDK_ROOT_V68 := $(HEXAGON_SDK_ROOT) +HEXAGON_SDK_ROOT_V69 := $(HEXAGON_SDK_ROOT) +HEXAGON_SDK_ROOT_V73 := $(HEXAGON_SDK_ROOT) +HEXAGON_SDK_ROOT_V75 := $(HEXAGON_SDK_ROOT) +HEXAGON_SDK_ROOT_V79 := $(HEXAGON_SDK_ROOT) + +#Updated to point to latest sdk to match with libQnnHtp.so +HEXAGON_SDK_ROOT_X86 := $(HEXAGON_SDK_ROOT) +HEXAGON_TOOLS_VERSION_V68 := 8.7.06 +HEXAGON_TOOLS_VERSION_V69 := 8.7.06 +HEXAGON_TOOLS_VERSION_V73 := 8.7.06 +HEXAGON_TOOLS_VERSION_V75 := 8.7.06 +HEXAGON_TOOLS_VERSION_V79 := 8.7.06 + +#Updated to point to latest sdk to match with libQnnHtp.so +HEXAGON_TOOLS_VERSION_X86 := 8.7.06 + +ifndef ANDROID_NDK_ROOT +ifeq ($(MAKECMDGOALS),htp_aarch64) +$(error "ERROR: ANDROID_NDK_ROOT is not set. Android NDK path must be set to compile package for aarch64") +else ifeq ($(MAKECMDGOALS),all) +$(info "WARNING: ANDROID_NDK_ROOT is not set. Android NDK path must be set to compile package for aarch64") +endif +endif + +ifndef PACKAGE_NAME +export +PACKAGE_NAME := $(notdir $(shell pwd)) +$(info "INFO: No package name defined. Using current directory name: $(PACKAGE_NAME) as the package name") +endif + +WORK := build +SRC_DIR := src +OP_SRC_DIR := src/ops +OP_INCLUDE_DIR := ./include +OP_INCLUDES = #$(wildcard $(OP_INCLUDE_DIR)/*.h) user defined if any op specific headers are needed, add -I to common flags +LIBRARY_NAME := libQnn$(PACKAGE_NAME).so +SUPPORTED_TARGETS = x86_64-linux-clang hexagon-v68 hexagon-v69 hexagon-v73 hexagon-v75 hexagon-v79 aarch64-android + + +COMMON_CXX_FLAGS = -std=c++17 -I$(QNN_INCLUDE) -fPIC -Wall -Wreorder -Wno-missing-braces -Wno-unused-function +COMMON_CXX_FLAGS += -Werror -Wno-format -Wno-unused-command-line-argument -fvisibility=default -stdlib=libc++ +COMMON_CXX_FLAGS += -DQNN_API="__attribute__((visibility(\"default\")))" -D__QAIC_HEADER_EXPORT="__attribute__((visibility(\"default\")))" + +X86_LIBNATIVE_RELEASE_DIR := $(HEXAGON_SDK_ROOT_X86)/tools/HEXAGON_Tools/$(HEXAGON_TOOLS_VERSION_X86)/Tools + +# Ensure hexagon sdk tool version can be retrieved +ifeq ($(wildcard $(X86_LIBNATIVE_RELEASE_DIR)/.),) +$(error "Cannot retrieve hexagon tools from: $(X86_LIBNATIVE_RELEASE_DIR). \ + \ + Please check that hexagon tools version is correct. Expected: $(HEXAGON_TOOLS_VERSION_X86)") +endif + +#Check tools for hexagon_v68 are present. +ifeq ($(MAKECMDGOALS),htp_v68) +ifeq ($(wildcard $(HEXAGON_SDK_ROOT_V68)),) +$(error "ERROR: HEXAGON_SDK_ROOT_V68 is set incorrectly. Cannot retrieve $(HEXAGON_SDK_ROOT_V68)") +endif +endif + +ifeq ($(MAKECMDGOALS),htp_v69) +ifeq ($(wildcard $(HEXAGON_SDK_ROOT_V69)),) +$(error "ERROR: HEXAGON_SDK_ROOT_V69 is set incorrectly. Cannot retrieve $(HEXAGON_SDK_ROOT_V69)") +endif +endif + +ifeq ($(MAKECMDGOALS),htp_v73) +ifeq ($(wildcard $(HEXAGON_SDK_ROOT_V73)),) +$(error "ERROR: HEXAGON_SDK_ROOT_V73 is set incorrectly. Cannot retrieve $(HEXAGON_SDK_ROOT_V73)") +endif +endif + +ifeq ($(MAKECMDGOALS),htp_v75) +ifeq ($(wildcard $(HEXAGON_SDK_ROOT_V75)),) +$(error "ERROR: HEXAGON_SDK_ROOT_V75 is set incorrectly. Cannot retrieve $(HEXAGON_SDK_ROOT_V75)") +endif +endif + +#Check tools for hexagon_v79 are present. +ifeq ($(MAKECMDGOALS),htp_v79) +ifeq ($(wildcard $(HEXAGON_SDK_ROOT_V79)),) +$(error "ERROR: HEXAGON_SDK_ROOT_V79 is set incorrectly. Cannot retrieve $(HEXAGON_SDK_ROOT_V79)") +endif +endif + + + +endif +OP_SOURCES = $(wildcard $(OP_SRC_DIR)/*.cpp) +OTHER_SOURCES = $(wildcard $(SRC_DIR)/*.cpp) +HFILES = $(wildcard $(QNN_INCLUDE)/*.h) +HFILES += $(wildcard $(QNN_INCLUDE)/HTP/*.h) +HFILES += $(wildcard $(QNN_INCLUDE)/HTP/core/*.h) +OP_OBJS = $(patsubst $(SRC_DIR)/%,%,$(patsubst %.cpp,%.o,$(OP_SOURCES))) +OTHER_OBJS = $(patsubst $(SRC_DIR)/%,%,$(patsubst %.cpp,%.o,$(OTHER_SOURCES))) + +#======= Assembly ======== +OP_SOURCES_ASM_X86 += $(wildcard $(OP_SRC_DIR)/x86_asm/*.S) +OP_OBJS_ASM_X86 += $(subst /x86_asm/,/,$(patsubst $(SRC_DIR)/%,%,$(patsubst %.S,%.o,$(OP_SOURCES_ASM_X86)))) +OP_SOURCES_ASM_V68 += $(wildcard $(OP_SRC_DIR)/v68_asm/*.S) +OP_OBJS_ASM_V68 += $(subst /v68_asm/,/,$(patsubst $(SRC_DIR)/%,%,$(patsubst %.S,%.o,$(OP_SOURCES_ASM_V68)))) +OP_SOURCES_ASM_V69 += $(wildcard $(OP_SRC_DIR)/v69_asm/*.S) +OP_OBJS_ASM_V69 += $(subst /v69_asm/,/,$(patsubst $(SRC_DIR)/%,%,$(patsubst %.S,%.o,$(OP_SOURCES_ASM_V69)))) +OP_SOURCES_ASM_V73 += $(wildcard $(OP_SRC_DIR)/v73_asm/*.S) +OP_OBJS_ASM_V73 += $(subst /v73_asm/,/,$(patsubst $(SRC_DIR)/%,%,$(patsubst %.S,%.o,$(OP_SOURCES_ASM_V73)))) +OP_SOURCES_ASM_V75 += $(wildcard $(OP_SRC_DIR)/v75_asm/*.S) +OP_OBJS_ASM_V75 += $(subst /v75_asm/,/,$(patsubst $(SRC_DIR)/%,%,$(patsubst %.S,%.o,$(OP_SOURCES_ASM_V75)))) +OP_SOURCES_ASM_V79 += $(wildcard $(OP_SRC_DIR)/v79_asm/*.S) +OP_OBJS_ASM_V79 += $(subst /v79_asm/,/,$(patsubst $(SRC_DIR)/%,%,$(patsubst %.S,%.o,$(OP_SOURCES_ASM_V79)))) + +OP_SOURCES_ASM_ANDROID += $(wildcard $(OP_SRC_DIR)/android_asm/*.S) +OP_OBJS_ASM_ANDROID += $(subst /android_asm/,/,$(patsubst $(SRC_DIR)/%,%,$(patsubst %.S,%.o,$(OP_SOURCES_ASM_ANDROID)))) + + +all: htp_v68 htp_x86 htp_aarch64 + +#============================================================================================================ +# Setup compiler, compiler instructions and linker for x86 +X86_CXX ?= clang++-9 +# Checking if clang++-9 is present. If not switch to clang++ +ifeq ($(shell $(X86_CXX) -v 2>&1 | grep -c "clang version"), 0) + X86_CXX := clang++ +endif +X86_LDFLAGS:= -Wl,--whole-archive -L$(X86_LIBNATIVE_RELEASE_DIR)/libnative/lib -lnative -Wl,--no-whole-archive -lpthread -L$(QNN_SDK_ROOT)/lib/x86_64-linux-clang -lHtpPrepare +X86_C_FLAGS := -D__HVXDBL__ -I$(X86_LIBNATIVE_RELEASE_DIR)/libnative/include -ffast-math -DUSE_OS_LINUX +X86_CXX_FLAGS = $(COMMON_CXX_FLAGS) $(X86_C_FLAGS) -fomit-frame-pointer -Wno-invalid-offsetof +linux_objs = +#============================================================================================================ +# Setup compiler, compiler instructions and linker for hexagon +HEXAGON_CXX_FLAGS := $(COMMON_CXX_FLAGS) -mhvx -mhvx-length=128B -mhmx -DUSE_OS_QURT -O2 -Wno-reorder -DPREPARE_DISABLED + +HEXAGON_CXX_FLAGS_V68 := $(HEXAGON_CXX_FLAGS) -mv68 -I$(HEXAGON_SDK_ROOT_V68)/rtos/qurt/computev68/include/qurt -I$(HEXAGON_SDK_ROOT_V68)/rtos/qurt/computev68/include/posix -I$(HEXAGON_SDK_ROOT_V68)/incs -I$(HEXAGON_SDK_ROOT_V68)/incs/stddef +HEXAGON_CXX_FLAGS_V69 := $(HEXAGON_CXX_FLAGS) -mv69 -I$(HEXAGON_SDK_ROOT_V69)/rtos/qurt/computev69/include/qurt -I$(HEXAGON_SDK_ROOT_V69)/rtos/qurt/computev69/include/posix -I$(HEXAGON_SDK_ROOT_V69)/incs -I$(HEXAGON_SDK_ROOT_V69)/incs/stddef +HEXAGON_CXX_FLAGS_V73 := $(HEXAGON_CXX_FLAGS) -mv73 -I$(HEXAGON_SDK_ROOT_V73)/rtos/qurt/computev73/include/qurt -I$(HEXAGON_SDK_ROOT_V73)/rtos/qurt/computev73/include/posix -I$(HEXAGON_SDK_ROOT_V73)/incs -I$(HEXAGON_SDK_ROOT_V73)/incs/stddef +HEXAGON_CXX_FLAGS_V75 := $(HEXAGON_CXX_FLAGS) -mv75 -I$(HEXAGON_SDK_ROOT_V75)/rtos/qurt/computev75/include/qurt -I$(HEXAGON_SDK_ROOT_V75)/rtos/qurt/computev75/include/posix -I$(HEXAGON_SDK_ROOT_V75)/incs -I$(HEXAGON_SDK_ROOT_V75)/incs/stddef +HEXAGON_CXX_FLAGS_V79 := $(HEXAGON_CXX_FLAGS) -mv79 -I$(HEXAGON_SDK_ROOT_V79)/rtos/qurt/computev79/include/qurt -I$(HEXAGON_SDK_ROOT_V79)/rtos/qurt/computev79/include/posix -I$(HEXAGON_SDK_ROOT_V79)/incs -I$(HEXAGON_SDK_ROOT_V79)/incs/stddef + + +HEXAGON_CXX_V68 := $(HEXAGON_SDK_ROOT_V68)/tools/HEXAGON_Tools/$(HEXAGON_TOOLS_VERSION_V68)/Tools/bin/hexagon-clang++ +HEXAGON_CXX_V69 := $(HEXAGON_SDK_ROOT_V69)/tools/HEXAGON_Tools/$(HEXAGON_TOOLS_VERSION_V69)/Tools/bin/hexagon-clang++ +HEXAGON_CXX_V73 := $(HEXAGON_SDK_ROOT_V73)/tools/HEXAGON_Tools/$(HEXAGON_TOOLS_VERSION_V73)/Tools/bin/hexagon-clang++ +HEXAGON_CXX_V75 := $(HEXAGON_SDK_ROOT_V75)/tools/HEXAGON_Tools/$(HEXAGON_TOOLS_VERSION_V75)/Tools/bin/hexagon-clang++ +HEXAGON_CXX_V79 := $(HEXAGON_SDK_ROOT_V79)/tools/HEXAGON_Tools/$(HEXAGON_TOOLS_VERSION_V79)/Tools/bin/hexagon-clang++ + + +HEX_LDFLAGS = +hexagon_objs = +#============================================================================================================ +# Setup compiler, compiler instructions and linker for aarch64 +AARCH64_C__FLAGS = -D__HVXDBL__ -I$(X86_LIBNATIVE_RELEASE_DIR)/libnative/include -ffast-math -DUSE_OS_LINUX -DANDROID +AARCH64_CXX_FLAGS = $(COMMON_CXX_FLAGS) $(AARCH64_C__FLAGS) -fomit-frame-pointer -Wno-invalid-offsetof -Wno-unused-variable -Wno-unused-parameter -Wno-missing-braces -Wno-sign-compare -Wno-unused-private-field -Wno-unused-variable -Wno-ignored-qualifiers -Wno-missing-field-initializers +ARM_CLANG_OPTS =--target=aarch64-none-linux-android21 --sysroot=$(ANDROID_NDK_ROOT)/toolchains/llvm/prebuilt/linux-x86_64/sysroot -stdlib=libc++ -static-libstdc++ +AARCH64_CXX = $(ANDROID_NDK_ROOT)/toolchains/llvm/prebuilt/linux-x86_64/bin/clang++ $(ARM_CLANG_OPTS) +AARCH64_LDFLAGS = -L$(QNN_TARGET_LIB) -lQnnHtp -lQnnHtpPrepare +aarch64_objs = +#============================================================================================================ +# Setup targets and goals + +htp_x86: X86_BUILD + +htp_v68: HEXAGON_BUILD_V68 + +htp_v69: HEXAGON_BUILD_V69 + +htp_v73: HEXAGON_BUILD_V73 + +htp_v75: HEXAGON_BUILD_V75 + +htp_v79: HEXAGON_BUILD_V79 + + + +htp_aarch64: AARCH64_BUILD + +AARCH64_BUILD: $(WORK)/aarch64-android/$(LIBRARY_NAME) + +HEXAGON_BUILD_V68: $(WORK)/hexagon-v68/$(LIBRARY_NAME) + +HEXAGON_BUILD_V69: $(WORK)/hexagon-v69/$(LIBRARY_NAME) + +HEXAGON_BUILD_V73: $(WORK)/hexagon-v73/$(LIBRARY_NAME) + +HEXAGON_BUILD_V75: $(WORK)/hexagon-v75/$(LIBRARY_NAME) + +HEXAGON_BUILD_V79: $(WORK)/hexagon-v79/$(LIBRARY_NAME) + + + +X86_BUILD: $(WORK)/x86_64-linux-clang/$(LIBRARY_NAME) + + +define build_objs = +ifneq ($(filter $(2),$(SUPPORTED_TARGETS)),) +$(2)_objs += $(foreach x,$(1),$(WORK)/$(2)/$(x)) +else +$$(error "Unknown target option provided: $(2): Supported targets are: $(SUPPORTED_TARGETS)") +endif +endef + +$(eval $(call build_objs,$(OTHER_OBJS),x86_64-linux-clang)) +$(eval $(call build_objs,$(OP_OBJS),x86_64-linux-clang)) +$(eval $(call build_objs,$(OP_OBJS_ASM_X86),x86_64-linux-clang)) +$(eval $(call build_objs,$(OTHER_OBJS),hexagon-v68)) +$(eval $(call build_objs,$(OP_OBJS),hexagon-v68)) +$(eval $(call build_objs,$(OP_OBJS_ASM_V68),hexagon-v68)) +$(eval $(call build_objs,$(OTHER_OBJS),hexagon-v69)) +$(eval $(call build_objs,$(OP_OBJS),hexagon-v69)) +$(eval $(call build_objs,$(OP_OBJS_ASM_V69),hexagon-v69)) +$(eval $(call build_objs,$(OTHER_OBJS),hexagon-v73)) +$(eval $(call build_objs,$(OP_OBJS),hexagon-v73)) +$(eval $(call build_objs,$(OP_OBJS_ASM_V73),hexagon-v73)) +$(eval $(call build_objs,$(OTHER_OBJS),hexagon-v75)) +$(eval $(call build_objs,$(OP_OBJS),hexagon-v75)) +$(eval $(call build_objs,$(OP_OBJS_ASM_V75),hexagon-v75)) +$(eval $(call build_objs,$(OTHER_OBJS),hexagon-v79)) +$(eval $(call build_objs,$(OP_OBJS),hexagon-v79)) +$(eval $(call build_objs,$(OP_OBJS_ASM_V75),hexagon-v79)) + +$(eval $(call build_objs,$(OTHER_OBJS),aarch64-android)) +$(eval $(call build_objs,$(OP_OBJS),aarch64-android)) +$(eval $(call build_objs,$(OP_OBJS_ASM_ANDROID),aarch64-android)) + +# x86 +$(WORK)/x86_64-linux-clang $(WORK)/hexagon-v68 $(WORK)/hexagon-v69 $(WORK)/hexagon-v73 $(WORK)/hexagon-v75 $(WORK)/hexagon-v79 $(WORK)/aarch64-android: + @mkdir -p $@/ops + +$(WORK)/x86_64-linux-clang/%.o: $(SRC_DIR)/%.cpp | $(WORK)/x86_64-linux-clang + $(X86_CXX) $(X86_CXX_FLAGS) -DTHIS_PKG_NAME=$(PACKAGE_NAME) -MMD -c $< -o $@ + +$(WORK)/x86_64-linux-clang/ops/%.o: $(OP_SRC_DIR)/%.cpp | $(WORK)/x86_64-linux-clang + $(X86_CXX) $(X86_CXX_FLAGS) -DTHIS_PKG_NAME=$(PACKAGE_NAME) -MMD -c $< -o $@ + +$(WORK)/x86_64-linux-clang/ops/%.o: $(OP_SRC_DIR)/x86_asm/%.S | $(WORK)/x86_64-linux-clang + $(X86_CXX) $(X86_CXX_FLAGS) -DTHIS_PKG_NAME=$(PACKAGE_NAME) -MMD -c $< -o $@ + +$(WORK)/x86_64-linux-clang/$(LIBRARY_NAME): $(x86_64-linux-clang_objs) | $(HFILES) + $(X86_CXX) -fPIC -std=c++17 -g -shared -o $@ $^ $(X86_LDFLAGS) + +# v68 +$(WORK)/hexagon-v68/%.o: $(SRC_DIR)/%.cpp | $(WORK)/hexagon-v68 + $(HEXAGON_CXX_V68) $(HEXAGON_CXX_FLAGS_V68) -DTHIS_PKG_NAME=$(PACKAGE_NAME) -MMD -c $< -o $@ + +$(WORK)/hexagon-v68/ops/%.o: $(OP_SRC_DIR)/%.cpp | $(WORK)/hexagon-v68 + $(HEXAGON_CXX_V68) $(HEXAGON_CXX_FLAGS_V68) -DTHIS_PKG_NAME=$(PACKAGE_NAME) -MMD -c $< -o $@ + +$(WORK)/hexagon-v68/ops/%.o: $(OP_SRC_DIR)/v68_asm/%.S | $(WORK)/hexagon-v68 + $(HEXAGON_CXX_V68) $(HEXAGON_CXX_FLAGS_V68) -DTHIS_PKG_NAME=$(PACKAGE_NAME) -MMD -c $< -o $@ + +$(WORK)/hexagon-v68/$(LIBRARY_NAME): $(hexagon-v68_objs) | $(HFILES) + $(HEXAGON_CXX_V68) -fPIC -std=c++17 -g -shared -o $@ $^ $(HEX_LDFLAGS) + +# v69 +$(WORK)/hexagon-v69/%.o: $(SRC_DIR)/%.cpp | $(WORK)/hexagon-v69 + $(HEXAGON_CXX_V69) $(HEXAGON_CXX_FLAGS_V69) -DTHIS_PKG_NAME=$(PACKAGE_NAME) -MMD -c $< -o $@ + +$(WORK)/hexagon-v69/ops/%.o: $(OP_SRC_DIR)/%.cpp | $(WORK)/hexagon-v69 + $(HEXAGON_CXX_V69) $(HEXAGON_CXX_FLAGS_V69) -DTHIS_PKG_NAME=$(PACKAGE_NAME) -MMD -c $< -o $@ + +$(WORK)/hexagon-v69/ops/%.o: $(OP_SRC_DIR)/v69_asm/%.S | $(WORK)/hexagon-v69 + $(HEXAGON_CXX_V69) $(HEXAGON_CXX_FLAGS_V69) -DTHIS_PKG_NAME=$(PACKAGE_NAME) -MMD -c $< -o $@ + +$(WORK)/hexagon-v69/$(LIBRARY_NAME): $(hexagon-v69_objs) | $(HFILES) + $(HEXAGON_CXX_V69) -fPIC -std=c++17 -g -shared -o $@ $^ $(HEX_LDFLAGS) + +# v73 +$(WORK)/hexagon-v73/%.o: $(SRC_DIR)/%.cpp | $(WORK)/hexagon-v73 + $(HEXAGON_CXX_V73) $(HEXAGON_CXX_FLAGS_V73) -DTHIS_PKG_NAME=$(PACKAGE_NAME) -MMD -c $< -o $@ + +$(WORK)/hexagon-v73/ops/%.o: $(OP_SRC_DIR)/%.cpp | $(WORK)/hexagon-v73 + $(HEXAGON_CXX_V73) $(HEXAGON_CXX_FLAGS_V73) -DTHIS_PKG_NAME=$(PACKAGE_NAME) -MMD -c $< -o $@ + +$(WORK)/hexagon-v73/ops/%.o: $(OP_SRC_DIR)/v73_asm/%.S | $(WORK)/hexagon-v73 + $(HEXAGON_CXX_V73) $(HEXAGON_CXX_FLAGS_V73) -DTHIS_PKG_NAME=$(PACKAGE_NAME) -MMD -c $< -o $@ + +$(WORK)/hexagon-v73/$(LIBRARY_NAME): $(hexagon-v73_objs) | $(HFILES) + $(HEXAGON_CXX_V73) -fPIC -std=c++17 -g -shared -o $@ $^ $(HEX_LDFLAGS) + +#v75 +$(WORK)/hexagon-v75/%.o: $(SRC_DIR)/%.cpp | $(WORK)/hexagon-v75 + $(HEXAGON_CXX_V75) $(HEXAGON_CXX_FLAGS_V75) -DTHIS_PKG_NAME=$(PACKAGE_NAME) -MMD -c $< -o $@ + +$(WORK)/hexagon-v75/ops/%.o: $(OP_SRC_DIR)/%.cpp | $(WORK)/hexagon-v75 + $(HEXAGON_CXX_V75) $(HEXAGON_CXX_FLAGS_V75) -DTHIS_PKG_NAME=$(PACKAGE_NAME) -MMD -c $< -o $@ + +$(WORK)/hexagon-v75/ops/%.o: $(OP_SRC_DIR)/v75_asm/%.S | $(WORK)/hexagon-v75 + $(HEXAGON_CXX_V75) $(HEXAGON_CXX_FLAGS_V75) -DTHIS_PKG_NAME=$(PACKAGE_NAME) -MMD -c $< -o $@ + +$(WORK)/hexagon-v75/$(LIBRARY_NAME): $(hexagon-v75_objs) | $(HFILES) + $(HEXAGON_CXX_V75) -fPIC -std=c++17 -g -shared -o $@ $^ $(HEX_LDFLAGS) + +#v79 +$(WORK)/hexagon-v79/%.o: $(SRC_DIR)/%.cpp | $(WORK)/hexagon-v79 + $(HEXAGON_CXX_V79) $(HEXAGON_CXX_FLAGS_V79) -DTHIS_PKG_NAME=$(PACKAGE_NAME) -MMD -c $< -o $@ + +$(WORK)/hexagon-v79/ops/%.o: $(OP_SRC_DIR)/%.cpp | $(WORK)/hexagon-v79 + $(HEXAGON_CXX_V79) $(HEXAGON_CXX_FLAGS_V79) -DTHIS_PKG_NAME=$(PACKAGE_NAME) -MMD -c $< -o $@ + +$(WORK)/hexagon-v79/ops/%.o: $(OP_SRC_DIR)/v79_asm/%.S | $(WORK)/hexagon-v79 + $(HEXAGON_CXX_V79) $(HEXAGON_CXX_FLAGS_V79) -DTHIS_PKG_NAME=$(PACKAGE_NAME) -MMD -c $< -o $@ + +$(WORK)/hexagon-v79/$(LIBRARY_NAME): $(hexagon-v79_objs) | $(HFILES) + $(HEXAGON_CXX_V79) -fPIC -std=c++17 -g -shared -o $@ $^ $(HEX_LDFLAGS) + + + +# aarch64 +$(WORK)/aarch64-android/%.o: $(SRC_DIR)/%.cpp | $(WORK)/aarch64-android + $(AARCH64_CXX) $(AARCH64_CXX_FLAGS) -DTHIS_PKG_NAME=$(PACKAGE_NAME) -MMD -c $< -o $@ + +$(WORK)/aarch64-android/ops/%.o: $(OP_SRC_DIR)/%.cpp | $(WORK)/aarch64-android + $(AARCH64_CXX) $(AARCH64_CXX_FLAGS) -DTHIS_PKG_NAME=$(PACKAGE_NAME) -MMD -c $< -o $@ + +$(WORK)/aarch64-android/ops/%.o: $(OP_SRC_DIR)/android_asm/%.S | $(WORK)/aarch64-android + $(AARCH64_CXX) $(AARCH64_CXX_FLAGS) -DTHIS_PKG_NAME=$(PACKAGE_NAME) -MMD -c $< -o $@ + +$(WORK)/aarch64-android/$(LIBRARY_NAME): $(aarch64-android_objs) | $(HFILES) + $(AARCH64_CXX) -fPIC -std=c++17 -g -shared -o $@ $^ $(AARCH64_LDFLAGS) + +clean: + -rm -rf $(WORK) + +.PHONY: all clean diff --git a/ggml/src/ggml-qnn/qnn/hexagon/GgmlOpPackage/config/GgmlOpPackage.xml b/ggml/src/ggml-qnn/qnn/hexagon/GgmlOpPackage/config/GgmlOpPackage.xml new file mode 100644 index 0000000000..f4c6575902 --- /dev/null +++ b/ggml/src/ggml-qnn/qnn/hexagon/GgmlOpPackage/config/GgmlOpPackage.xml @@ -0,0 +1,88 @@ + + + + + GgmlMulMat + + + GGML MulMat operator + + + + + in[0] + + src0 + + true + BACKEND_SPECIFIC + + 4D + NHWC + [N, C, H , W] + + + + + in[1] + + src1 + + true + BACKEND_SPECIFIC + + 4D + NHWC + [N, C, H , W] + + + + + out[0] + + dst + + true + BACKEND_SPECIFIC + + 4D + [N, C, H , W] + + + + + HTP + + + + + + + GgmlMulMat + + + + + GgmlMulMat + + + in[0] + QNN_DATATYPE_FLOAT_16 + QNN_DATATYPE_FLOAT_32 + + + in[1] + QNN_DATATYPE_FLOAT_16 + QNN_DATATYPE_FLOAT_32 + + + + out[0] + QNN_DATATYPE_FLOAT_16 + QNN_DATATYPE_FLOAT_32 + + + + + + diff --git a/ggml/src/ggml-qnn/qnn/hexagon/GgmlOpPackage/src/GgmlOpPackageInterface.cpp b/ggml/src/ggml-qnn/qnn/hexagon/GgmlOpPackage/src/GgmlOpPackageInterface.cpp new file mode 100644 index 0000000000..df9ab36420 --- /dev/null +++ b/ggml/src/ggml-qnn/qnn/hexagon/GgmlOpPackage/src/GgmlOpPackageInterface.cpp @@ -0,0 +1,274 @@ +//============================================================================== +// Auto Generated Code for GgmlOpPackage +//============================================================================== + +#include "HTP/QnnHtpCommon.h" +#include "HTP/core/constraints.h" +#include "HTP/core/op_package_feature_support.h" +#include "HTP/core/op_register_ext.h" +#include "HTP/core/optimize.h" +#include "HTP/core/simple_reg.h" +#include "HTP/core/unique_types.h" +#include "QnnOpPackage.h" +#include "QnnSdkBuildId.h" + +DEFINE_UNIQ_TY() +BEGIN_PKG_OPS_OPTS_LIST() + +/** Note that the order of declarations given here defines the order in which ops and graph optimizations are + * registered to the HTP Core. + * Append the latest OpName at the bottom + */ +DECLARE_PKG_OPS_OPTS_LIST(PKG_GgmlMulMat) + +END_PKG_OPS_OPTS_LIST() + +// op package info +static constexpr auto sg_packageName = THIS_PKG_NAME_STR; // package name passed in as compile flag + +static std::array sg_opNames{{"GgmlMulMat"}}; + +static Qnn_ApiVersion_t sg_sdkApiVersion = QNN_HTP_API_VERSION_INIT; +static QnnOpPackage_Info_t sg_packageInfo = QNN_OP_PACKAGE_INFO_INIT; + +// global data +static QnnOpPackage_GlobalInfrastructure_t sg_globalInfra = +nullptr; // global infrastructure not in use for now +static bool sg_packageInitialized = false; + +/* + * user provided logging call back function + * currently only supported on linux x86-64 and nonrpc versions + * typedef void (*QnnLog_Callback_t)(const char* fmt, + * QnnLog_Level_t level, + * uint64_t timestamp, + * va_list args); + * usage: if(sg_logInitialized && level <= sg_maxLogLevel) + * sg_logCallback(fmt, level, timestamp, args); + * + * for cross rpc versions, skel side user provided logging call back function + * can be defined as part of op packages. maximal log level sg_maxLogLevel + * can be set by Qnn_ErrorHandle_t GgmlOpPackageLogSetLevel(QnnLog_Level_t maxLogLevel) + */ +/* + * for alternative logging method provided by HTP core, please refer to log.h + */ +static QnnLog_Callback_t sg_logCallback = + nullptr; // user provided call back function pointer for logging +static QnnLog_Level_t sg_maxLogLevel = + (QnnLog_Level_t)0; // maximal log level used in user provided logging +static bool sg_logInitialized = + false; // tracks whether user provided logging method has been initialized + + +/* +* op initialization +* needs to be global in the package +* one initialization per package before any op definitions +* syntax: INIT_PACKAGE_OP_DEF() +*/ +INIT_PACKAGE_OP_DEF() + +/* +* optimization initialization +* needs to be global in the package +* one initialization per package before any optimization definitions +* syntax: INIT_PACKAGE_OPTIMIZATION_DEF() +*/ +INIT_PACKAGE_OPTIMIZATION_DEF() + +/* + * op parameter order initialization + * needs to be global in the package + * one initialization per package before any op parameter order definitions + * syntax: INIT_PACKAGE_PARAM_ORDER_DEF() + */ +INIT_PACKAGE_PARAM_ORDER_DEF() + +/* + * axis parameter name list + * optional + * needs to be global in the package + * one list per package + * for listing axis parameter names passed into Qnn_AddNode API + * HTP backend auto-adjusts values in axis parameters based on HTP backfilling + * note: HTP backend backfills tensor dimensions to 4 dimensions + * syntax: LIST_PACKAGE_AXIS_PARAMS(...) + * e.g. LIST_PACKAGE_AXIS_PARAMS("Axis", "AXIS", "axis") + */ +// LIST_PACKAGE_AXIS_PARAMS() + +/* + * per-channel quantized op name list + * optional + * needs to be global in the package + * one list per package + * for listing op names which support per-channel quantization + * per-axis quantization info of an op is embeded in axisScaleOffsetEncoding + * inside Qnn_Tensor_t types + * HTP backend only supports per-channel scale ops + * i.e. along last dimension, offset is always zero + * if an op name is marked as having per-channel scale support, and in + * QNN_AddNode, at least one input, parameter, or output has + * QNN_QUANTIZATION_ENCODING_AXIS_SCALE_OFFSET type: + * then: + * HTP backend will pass to op implementation function the following: + * output(s), input(s), parameter(s), + * outputPerChannelScale(s), inputPerChannelScale(s), paramPerChannelScale(s) + * + * optimization rules can be used to remove extra perChannelScale tensors + * + * syntax: LIST_PACKAGE_PER_CHANNEL_QUANTIZED_OPS(...) + * e.g. LIST_PACKAGE_PER_CHANNEL_QUANTIZED_OPS(sg_op1Name, sg_op2Name) + */ + +// LIST_PACKAGE_PER_CHANNEL_QUANTIZED_OPS() + +/* +* Declare and define the special intialize function for HTP Backend to load +*/ +INIT_PKG_CORE_INIT_FUNC() + +/* op package API's */ + +Qnn_ErrorHandle_t GgmlOpPackageInit(QnnOpPackage_GlobalInfrastructure_t infrastructure) { + if (sg_packageInitialized) return QNN_OP_PACKAGE_ERROR_LIBRARY_ALREADY_INITIALIZED; + + /* + * op parameter order registration + * registers all defined op parameter orders in the package + * syntax: REGISTER_PACKAGE_PARAM_ORDERS() + */ + REGISTER_PACKAGE_PARAM_ORDERS() + + /* + * op axis parameter name registration + * registers all axis parameter names in the package + * used with LIST_PACKAGE_AXIS_PARAMS(...) + * syntax: REGISTER_PACKAGE_AXIS_PARAMS() + */ + REGISTER_PACKAGE_AXIS_PARAMS() + + /* + * per-channel scale op name registration + * registers all per-channel scale op names in the package + * used with LIST_PACKAGE_PER_CHANNEL_QUANTIZED_OPS(...) + * syntax: REGISTER_PACKAGE_PER_CHANNEL_QUANTIZED_OPS() + */ + REGISTER_PACKAGE_PER_CHANNEL_QUANTIZED_OPS() + + sg_globalInfra = infrastructure; + sg_packageInitialized = true; + return QNN_SUCCESS; +} + +Qnn_ErrorHandle_t GgmlOpPackageGetInfo(const QnnOpPackage_Info_t** info) { + if (!sg_packageInitialized) return QNN_OP_PACKAGE_ERROR_LIBRARY_NOT_INITIALIZED; + if (!info) return QNN_OP_PACKAGE_ERROR_INVALID_INFO; + + sg_packageInfo = QNN_OP_PACKAGE_INFO_INIT; + sg_packageInfo.packageName = sg_packageName; + sg_packageInfo.operationNames = sg_opNames.data(); + sg_packageInfo.numOperations = sg_opNames.size(); + sg_packageInfo.sdkBuildId = QNN_SDK_BUILD_ID; + sg_packageInfo.sdkApiVersion = &sg_sdkApiVersion; + + *info = &sg_packageInfo; + return QNN_SUCCESS; +} + +Qnn_ErrorHandle_t GgmlOpPackageLogInitialize(QnnLog_Callback_t callback, QnnLog_Level_t maxLogLevel) { + if (sg_logInitialized) return QNN_OP_PACKAGE_ERROR_LIBRARY_ALREADY_INITIALIZED; + if (!callback) return QNN_LOG_ERROR_INVALID_ARGUMENT; + if (maxLogLevel < QNN_LOG_LEVEL_ERROR) return QNN_LOG_ERROR_INVALID_ARGUMENT; + sg_logCallback = callback; + sg_maxLogLevel = maxLogLevel; + sg_logInitialized = true; + return QNN_SUCCESS; +} + +Qnn_ErrorHandle_t GgmlOpPackageLogSetLevel(QnnLog_Level_t maxLogLevel) { + if (maxLogLevel < QNN_LOG_LEVEL_ERROR) return QNN_LOG_ERROR_INVALID_ARGUMENT; + sg_maxLogLevel = maxLogLevel; + return QNN_SUCCESS; +} + +Qnn_ErrorHandle_t GgmlOpPackageLogTerminate() { + if (!sg_logInitialized) return QNN_OP_PACKAGE_ERROR_LIBRARY_NOT_INITIALIZED; + sg_logCallback = nullptr; + sg_maxLogLevel = (QnnLog_Level_t)0; + sg_logInitialized = false; + return QNN_SUCCESS; +} + +Qnn_ErrorHandle_t GgmlOpPackageValidateOpConfig (Qnn_OpConfig_t opConfig){ + if (std::string(sg_packageName) != opConfig.v1.packageName) { + return QNN_OP_PACKAGE_ERROR_VALIDATION_FAILURE; + } + + /* auto-generated validation code below + * Check if op config type matches any registered ops + * If a match is found, check number of inputs, outputs and params + */ + if (std::string(opConfig.v1.typeName) == "GgmlMulMat"){ + if (opConfig.v1.numOfParams != 0 || opConfig.v1.numOfInputs != 2 || opConfig.v1.numOfOutputs != 1){ + return QNN_OP_PACKAGE_ERROR_VALIDATION_FAILURE; + } + } + else{ + return QNN_OP_PACKAGE_ERROR_VALIDATION_FAILURE; + } + + /* + * additional validation code here + * */ + + return QNN_SUCCESS; +} + +/* The following three functions in this comment are not called by HTP backend for now, + * no auto-generated implementations are created. Users should see example for full function signatures. + * (version 1.3.0) Qnn_ErrorHandle_t GgmlOpPackageCreateKernels (QnnOpPackage_GraphInfrastructure_t + * graphInfrastructure, QnnOpPackage_Node_t node, QnnOpPackage_Kernel_t** kernels, uint32_t* + * numKernels) + * (version 1.3.0) Qnn_ErrorHandle_t GgmlOpPackageFreeKernels (QnnOpPackage_Kernel_t* kernels) + * + * (version 1.4.0) Qnn_ErrorHandle_t GgmlOpPackageCreateOpImpl (QnnOpPackage_GraphInfrastructure_t + * graphInfrastructure, QnnOpPackage_Node_t node, QnnOpPackage_OpImpl_t* opImpl) + *(version 1.4.0) Qnn_ErrorHandle_t GgmlOpPackageFreeOpImpl (QnnOpPackage_OpImpl_t opImpl) + */ + +Qnn_ErrorHandle_t GgmlOpPackageTerminate() { +if (!sg_packageInitialized) return QNN_OP_PACKAGE_ERROR_LIBRARY_NOT_INITIALIZED; + +sg_globalInfra = nullptr; +sg_packageInitialized = false; +return QNN_SUCCESS; +} + +#ifdef __cplusplus +extern "C" { +#endif + + +/* latest version */ +Qnn_ErrorHandle_t GgmlOpPackageInterfaceProvider(QnnOpPackage_Interface_t* interface) { + if (!interface) return QNN_OP_PACKAGE_ERROR_INVALID_ARGUMENT; + interface->interfaceVersion = {1, 4, 0}; + interface->v1_4.init = GgmlOpPackageInit; + interface->v1_4.terminate = GgmlOpPackageTerminate; + interface->v1_4.getInfo = GgmlOpPackageGetInfo; + interface->v1_4.validateOpConfig = GgmlOpPackageValidateOpConfig; + interface->v1_4.createOpImpl = nullptr; + interface->v1_4.freeOpImpl = nullptr; + interface->v1_4.logInitialize = GgmlOpPackageLogInitialize; + interface->v1_4.logSetLevel = GgmlOpPackageLogSetLevel; + interface->v1_4.logTerminate = GgmlOpPackageLogTerminate; + return QNN_SUCCESS; +} + +#ifdef __cplusplus +} +#endif + + diff --git a/ggml/src/ggml-qnn/qnn/hexagon/GgmlOpPackage/src/ops/GgmlMulMat.cpp b/ggml/src/ggml-qnn/qnn/hexagon/GgmlOpPackage/src/ops/GgmlMulMat.cpp new file mode 100644 index 0000000000..137522cc80 --- /dev/null +++ b/ggml/src/ggml-qnn/qnn/hexagon/GgmlOpPackage/src/ops/GgmlMulMat.cpp @@ -0,0 +1,213 @@ +//============================================================================== +// Auto Generated Code for GgmlOpPackage +//============================================================================== + +#include "HTP/core/constraints.h" +#include "HTP/core/op_package_feature_support.h" +#include "HTP/core/op_register_ext.h" +#include "HTP/core/optimize.h" +#include "HTP/core/simple_reg.h" +#include "QnnOpPackage.h" + +BEGIN_PKG_OP_DEFINITION(PKG_GgmlMulMat); + +// op execute function declarations +template +GraphStatus ggmlmulmatImpl(TensorType & out_0, const TensorType & in_0, const TensorType & in_1); + +// forward declaration of sample cost function +static float ggmlmulmatCostFunc(const Op * op); + +/* + * method 1 for defining op, using default cost value (i.e. GLACIAL) and default flag (Flags::RESOURCE_HVX) + * syntax: DEF_PACKAGE_OP(F,OP) + * e.g. DEF_PACKAGE_OP((ggmlmulmatImpl), "GgmlMulMat") + */ +DEF_PACKAGE_OP((ggmlmulmatImpl), "GgmlMulMat") + +/* + * method 2 for defining op with specified cost value (one of GLACIAL, SNAIL, FAST, FREE) + * and provided flags + * syntax: DEF_PACKAGE_OP_AND_COST_AND_FLAGS(F,OP,COST,...) + * can use zero or more flags, FLAG options are IS_CONST, INHIBIT_CONST_PROP, + * RESOURCE_HVX, RESOURCE_HMX(not supported in external op packages) + * e.g. DEF_PACKAGE_OP_AND_COST_AND_FLAGS((ggmlmulmatImpl), "GgmlMulMat", SNAIL) + */ + +/* + * method 3 for defining op with cost function pointer and provided flags + * cost function pointer type: typedef float (*cost_function) (const Op * op); + * syntax: DEF_PACKAGE_OP_AND_COST_F_AND_FLAGS(F,OP,COST_F,...) + * e.g. DEF_PACKAGE_OP_AND_COST_F_AND_FLAGS((ggmlmulmatImpl), + * "GgmlMulMat", ggmlmulmatCostFunc, Flags::RESOURCE_HVX) + */ + +/* + * optimization definitions + * need to be global in the package + * one definition per optimization + * syntax: DEF_PACKAGE_OPTIMIZATION(PRIORITY,MATCHCODE,CONSTRAINTCODE,REPLACECODE) + * PRIORITY predefined values include EARLY(2000), MIDDLE(3000), LATE(4000) + * HTP core provides some replacement functions for op package to use + * for more information about optimization rules, please refer to HTP core documentations + */ + +/* + * op parameter order definitions + * need to be global in the package + * one definition per op, and this is optional + * syntax: DEF_PACKAGE_PARAM_ORDER(OP,PARAM1,MANDATORY1,DEFAULT1,PARAM2,MANDATORY2,DEFAULT2...) + * one or more parameters can be specified for each op + * order of parameters listed determines the order of parameters passed into op execution functions + * if an op does not have a parameter order definition, parameter order passed into Qnn_addNode + * will be passed into op execution functions + * if an op has a parameter order definition, any parameter passed into Qnn_addNode with unlisted + * name will be abandoned + * if two or more op packages with the same package name will be registered, they cannot list + * conflicting parameter orders + * PARAM refers to parameter name as a string literal + * MANDATORY refers to whether this parameter is required to be provided at Qnn_addNode + * DEFAULT is used when MANDATORY is false + * if provided as Qnn_Param_t*, + * DEFAULT will be used for graph construction when this parameter is not provided at + * Qnn_addNode + * if provided as nullptr, + * graph construction will skip this parameter when this parameter is not provided at + * Qnn_addNode + */ + +namespace { + +constexpr const size_t kBytesPerVector = sizeof(HVX_Vector); // 128 for v73 +constexpr const size_t kFloatsPerVector = kBytesPerVector / sizeof(float); +constexpr const size_t kAlignMask = kBytesPerVector - 1; + +inline size_t unaligned_bytes(const void * addr) { + return ((size_t) addr) & kAlignMask; +} + +inline bool is_addr_aligned(void * addr) { + return unaligned_bytes(addr) == 0; +} + +inline float vec_dot_product_f32(const float * src0, const float * src1, size_t count) { + HVX_Vector * iptr0 = ((HVX_Vector *) src0); + HVX_Vector * iptr0_end = ((HVX_Vector *) src0) + (count / kFloatsPerVector); + HVX_Vector * iptr1 = ((HVX_Vector *) src1); + HVX_Vector prev0 = *iptr0++; + HVX_Vector prev1 = *iptr1++; + HVX_Vector sum = Q6_V_vzero(); + + // TODO: prefetch? + while (iptr0 < iptr0_end) { + HVX_Vector curr0 = *iptr0++; + HVX_Vector curr1 = *iptr1++; + HVX_Vector s0 = Q6_V_valign_VVR(curr0, prev0, (size_t) src0); + HVX_Vector s1 = Q6_V_valign_VVR(curr1, prev1, (size_t) src1); + sum = Q6_Vqf32_vadd_Vqf32Vqf32(Q6_Vqf32_vmpy_VsfVsf(s0, s1), sum); + prev0 = curr0; + prev1 = curr1; + } + + if ((iptr0_end - ((HVX_Vector *) src0)) > 0) { + // handle the last vector + // see also: https://github.com/UbiquitousLearning/mllm/blob/babf4410352ce8730824c87699c025a0d4ce3a6f/src/backends/qnn/LLaMAOpPackageHtp/LLaMAPackage/src/ops/LLaMAMul.cpp#L147 + HVX_Vector curr0 = is_addr_aligned(iptr0) ? prev0 : *iptr0++; + HVX_Vector curr1 = is_addr_aligned(iptr1) ? prev1 : *iptr1++; + HVX_Vector s0 = Q6_V_valign_VVR(curr0, prev0, (size_t) src0); + HVX_Vector s1 = Q6_V_valign_VVR(curr1, prev1, (size_t) src1); + sum = Q6_Vqf32_vadd_Vqf32Vqf32(Q6_Vqf32_vmpy_VsfVsf(s0, s1), sum); + prev0 = curr0; + prev1 = curr1; + } + + const size_t leftover = count % kFloatsPerVector; + const size_t leftover_bytes = leftover * sizeof(float); + if (leftover > 0) { + // handle the leftover elements + HVX_Vector curr0 = (leftover_bytes + unaligned_bytes(iptr0) > kBytesPerVector) ? *iptr0 : prev0; + curr0 = Q6_V_valign_VVR(curr0, prev0, (size_t) src0); + + HVX_Vector curr1 = (leftover_bytes + unaligned_bytes(iptr1) > kBytesPerVector) ? *iptr1 : prev1; + curr1 = Q6_V_valign_VVR(curr1, prev1, (size_t) src1); + + sum = Q6_Vqf32_vadd_Vqf32Vqf32( + Q6_V_valign_VVR(Q6_Vqf32_vmpy_VsfVsf(curr0, curr1), Q6_V_vzero(), leftover_bytes), sum); + } + + // TODO: do we have a better way to do the reduction? + for (size_t i = kFloatsPerVector / 2; i > 0; i /= 2) { + sum = Q6_Vqf32_vadd_Vqf32Vqf32(sum, Q6_V_vror_VR(sum, i * sizeof(float))); + } + + float result; + q6op_vstu_variable_ARV(&result, sizeof(float), Q6_Vsf_equals_Vqf32(sum)); + return result; +} + +template +inline GraphStatus mul_mat_2d_f32(TensorType & out_0, const TensorType & in_0, const TensorType & in_1) { + // TODO: handle strides? + if (in_1.dim(1) != in_0.dim(1)) { + return GraphStatus::ErrorDimensions; + } + + size_t dims[4] = { in_1.dim(0), in_0.dim(0) }; + out_0.set_dims(dims); + + auto in0_ptr = (float *) in_0.raw_data_const(); + auto in1_ptr = (float *) in_1.raw_data_const(); + auto out_ptr = (float *) out_0.raw_data(); + + for (size_t i = 0; i < dims[0]; i++) { + // TODO: prefetch? + auto * in1_row = in1_ptr + i * in_1.dim(1); + auto * out_row = out_ptr + i * dims[1]; + for (size_t j = 0; j < dims[1]; j++) { + *out_row++ = vec_dot_product_f32(in0_ptr + j * in_0.dim(1), in1_row, in_0.dim(1)); + } + } + + return GraphStatus::Success; +} + +} // namespace + +/* execute functions for ops */ + +template +GraphStatus ggmlmulmatImpl(TensorType & out_0, const TensorType & in_0, const TensorType & in_1) { + if (!in_0.raw_data_const() || !in_1.raw_data_const() || !out_0.raw_data()) { + return GraphStatus::ErrorBadInput; + } + + if (in_0.rank() != in_1.rank()) { + return GraphStatus::ErrorRank; + } + + auto rank = in_0.rank(); + switch (rank) { + case 4: + case 3: + // TODO: add implementation + return GraphStatus::ErrorUnsupported; + case 2: + return mul_mat_2d_f32(out_0, in_0, in_1); + } + + return GraphStatus::ErrorRank; +} + +__attribute__((unused)) static float ggmlmulmatCostFunc(const Op * op) { + /* + * add code here + * */ + + float cost = 0.0; // add cost computation here + return cost; +} + +/* At the bottom of the op file, call END_PKG_OP_DEFINITION(), + where is as BEGIN_PKG_OP_DEFINITION +*/ +END_PKG_OP_DEFINITION(PKG_GgmlMulMat); diff --git a/ggml/src/ggml-qnn/logger.cpp b/ggml/src/ggml-qnn/qnn/logger.cpp similarity index 100% rename from ggml/src/ggml-qnn/logger.cpp rename to ggml/src/ggml-qnn/qnn/logger.cpp diff --git a/ggml/src/ggml-qnn/logger.hpp b/ggml/src/ggml-qnn/qnn/logger.hpp similarity index 100% rename from ggml/src/ggml-qnn/logger.hpp rename to ggml/src/ggml-qnn/qnn/logger.hpp diff --git a/ggml/src/ggml-qnn/op-config-base.hpp b/ggml/src/ggml-qnn/qnn/op-config-base.hpp similarity index 98% rename from ggml/src/ggml-qnn/op-config-base.hpp rename to ggml/src/ggml-qnn/qnn/op-config-base.hpp index 87ca798272..c2370000b2 100644 --- a/ggml/src/ggml-qnn/op-config-base.hpp +++ b/ggml/src/ggml-qnn/qnn/op-config-base.hpp @@ -3,6 +3,7 @@ #include #include +#include "common.hpp" #include "ggml-qnn.h" #include "qnn-types.hpp" #include "tensor.hpp" @@ -60,7 +61,7 @@ class ggml_qnn_op_config { * @param graph_handle * @return true if tensors and nodes are successfully created, false otherwise. */ - virtual bool initialize_op_nodes(QNNBackend device, Qnn_GraphHandle_t graph_handle) = 0; + virtual bool initialize_op_nodes(backend_index_type device, Qnn_GraphHandle_t graph_handle) = 0; /** * @brief Pure virtual function to retrieve the input tensors. diff --git a/ggml/src/ggml-qnn/op-config-caps.cpp b/ggml/src/ggml-qnn/qnn/op-config-caps.cpp similarity index 95% rename from ggml/src/ggml-qnn/op-config-caps.cpp rename to ggml/src/ggml-qnn/qnn/op-config-caps.cpp index 6fd65aec08..d5b55eff97 100644 --- a/ggml/src/ggml-qnn/op-config-caps.cpp +++ b/ggml/src/ggml-qnn/qnn/op-config-caps.cpp @@ -224,18 +224,23 @@ static_assert(kOpCaps[GGML_OP_COUNT + GGML_UNARY_OP_GELU].qnn_op_name, static_assert(std::size(kOpCaps) == (GGML_OP_COUNT + GGML_UNARY_OP_COUNT), "GGML_OP_COUNT does not match the size of the kOpCaps table"); -std::shared_ptr mat_mul_op_constructor(const ggml_tensor * op, - const std::string & instance_name, - std::shared_ptr qnn_instance) { - GGML_UNUSED(op); +std::shared_ptr mat_mul_op_constructor(const ggml_tensor * op, + const std::string & instance_name, + qnn::qnn_instance_ptr qnn_instance) { + if (qnn_instance->has_custom_op_package() && ggml_n_dims(op) == 2) { + QNN_LOG_DEBUG("create GgmlMulMat, name %s, use GgmlOpPackage\n", instance_name.c_str()); + return std::make_shared(instance_name, "GgmlOpPackage", "GgmlMulMat", + qnn_instance); + } + QNN_LOG_DEBUG("create QNN_OP_MAT_MUL, name %s\n", instance_name.c_str()); return std::make_shared(instance_name, qnn_instance); } template -std::shared_ptr generic_op_constructor(const ggml_tensor * op, - const std::string & instance_name, - std::shared_ptr qnn_instance) { +std::shared_ptr generic_op_constructor(const ggml_tensor * op, + const std::string & instance_name, + qnn::qnn_instance_ptr qnn_instance) { GGML_UNUSED(op); static_assert(_op < std::size(kOpCaps)); static_assert(kOpCaps[_op].qnn_op_name != nullptr); @@ -251,8 +256,9 @@ void add_type_parameters(std::shared_ptr op, const } template -std::shared_ptr op_constructor_with_type_param( - const ggml_tensor * op, const std::string & instance_name, std::shared_ptr qnn_instance) { +std::shared_ptr op_constructor_with_type_param(const ggml_tensor * op, + const std::string & instance_name, + qnn::qnn_instance_ptr qnn_instance) { static_assert(std::is_base_of::value); static_assert(_op < std::size(kOpCaps)); diff --git a/ggml/src/ggml-qnn/op-config-impl.cpp b/ggml/src/ggml-qnn/qnn/op-config-impl.cpp similarity index 94% rename from ggml/src/ggml-qnn/op-config-impl.cpp rename to ggml/src/ggml-qnn/qnn/op-config-impl.cpp index b85f145045..e546da4929 100644 --- a/ggml/src/ggml-qnn/op-config-impl.cpp +++ b/ggml/src/ggml-qnn/qnn/op-config-impl.cpp @@ -48,7 +48,7 @@ void ggml_qnn_op_config_base::add_scalar_param(const std::string & name, const Q bool ggml_qnn_op_config_base::add_tensor_param(const std::string & name, const qnn_dimension_array_t & dimensions, int rank, const uint8_t * data, const Qnn_DataType_t data_type, - QNNBackend device, Qnn_GraphHandle_t graph_handle) { + backend_index_type device, Qnn_GraphHandle_t graph_handle) { std::string tensor_name = _name + name + std::to_string(_tensor_parameters.size()); auto param_tensor = std::make_shared(ggml_qnn_tensor::PARAMETER, tensor_name, dimensions, data_type, rank, device, graph_handle, _qnn_instance); @@ -131,7 +131,8 @@ bool ggml_qnn_op_config_base::add_op_to_graph(Qnn_GraphHandle_t graph_handle) { auto qnn_interface = _qnn_instance->get_qnn_interface(); auto error = qnn_interface->qnn_graph_add_node(graph_handle, get_op_config()); if (error != QNN_SUCCESS) { - QNN_LOG_ERROR("[%s]qnn_graph_add_node.error: %s\n", _name.c_str(), get_qnn_error_string(error)); + QNN_LOG_ERROR("[%s][%s][%s]qnn_graph_add_node.error: %s\n", _name.c_str(), _package_name.c_str(), + _op_type.c_str(), get_qnn_error_string(error)); return false; } @@ -183,13 +184,13 @@ Qnn_OpConfig_t ggml_qnn_op_config_base::get_op_config() { return config; } -bool ggml_qnn_single_op_config::initialize_op_nodes(QNNBackend device, Qnn_GraphHandle_t graph_handle) { +bool ggml_qnn_single_op_config::initialize_op_nodes(backend_index_type device, Qnn_GraphHandle_t graph_handle) { GGML_UNUSED(device); GGML_UNUSED(graph_handle); return true; } -bool ggml_qnn_rmsnorm_op_config::initialize_op_nodes(QNNBackend device, Qnn_GraphHandle_t graph_handle) { +bool ggml_qnn_rmsnorm_op_config::initialize_op_nodes(backend_index_type device, Qnn_GraphHandle_t graph_handle) { constexpr const uint32_t kAxes[] = { 0 }; add_tensor_param(QNN_OP_RMS_NORM_PARAM_AXES, { 1 }, 1, reinterpret_cast(kAxes), QNN_DATATYPE_UINT_32, device, graph_handle); @@ -220,7 +221,7 @@ bool ggml_qnn_aggregate_op_config::bind_output_tensors(const ggml_tensor_array_t return qnn::bind_tensors(tensor_outputs, _tensor_outputs); } -bool ggml_qnn_matmul_op_config::initialize_op_nodes(QNNBackend device, Qnn_GraphHandle_t graph_handle) { +bool ggml_qnn_matmul_op_config::initialize_op_nodes(backend_index_type device, Qnn_GraphHandle_t graph_handle) { GGML_ASSERT(_tensor_inputs.size() == 2); GGML_ASSERT(_tensor_outputs.size() == 1); @@ -251,8 +252,9 @@ bool ggml_qnn_matmul_op_config::initialize_op_nodes(QNNBackend device, Qnn_Graph return true; } -qnn_tensor_ptr_t ggml_qnn_matmul_op_config::create_gather_nodes(QNNBackend device, Qnn_GraphHandle_t graph_handle, - const int rank, qnn_tensor_ptr_t tensor_input, +qnn_tensor_ptr_t ggml_qnn_matmul_op_config::create_gather_nodes(backend_index_type device, + Qnn_GraphHandle_t graph_handle, const int rank, + qnn_tensor_ptr_t tensor_input, qnn_dimension_array_t output_dimensions) { if (rank <= 2) { return tensor_input; @@ -270,7 +272,7 @@ qnn_tensor_ptr_t ggml_qnn_matmul_op_config::create_gather_nodes(QNNBackend devic // create concat nodes, to convert tensor shape from [ne03, ne02, n, k] to [ne03 * x, ne02 * y, n, k] constexpr const auto create_node = [](const std::string & name, const int rank, const int axis, const qnn_dimension_array_t & dimensions, - qnn_tensor_ptr_t tensor_input, QNNBackend device, Qnn_GraphHandle_t graph_handle, + qnn_tensor_ptr_t tensor_input, backend_index_type device, Qnn_GraphHandle_t graph_handle, qnn_instance_ptr qnn_instance, qnn_tensor_ptr_t & tensor_output) -> qnn_op_config_ptr_t { auto gather_out = std::make_shared(ggml_qnn_tensor::INTERMEDIATE, name + "_out", dimensions, @@ -318,8 +320,8 @@ qnn_tensor_ptr_t ggml_qnn_matmul_op_config::create_gather_nodes(QNNBackend devic return gather1_out; } -Qnn_DataType_t ggml_qnn_matmul_op_config::create_input_convert_nodes(QNNBackend device, Qnn_GraphHandle_t graph_handle, - const int rank, +Qnn_DataType_t ggml_qnn_matmul_op_config::create_input_convert_nodes(backend_index_type device, + Qnn_GraphHandle_t graph_handle, const int rank, qnn_tensor_array_t & tensor_inputs) { if (device == QNN_BACKEND_GPU) { // there's no convert op for GPU, so we should create matmul nodes directly. @@ -352,8 +354,8 @@ Qnn_DataType_t ggml_qnn_matmul_op_config::create_input_convert_nodes(QNNBackend return tensor_type; } -qnn_op_config_ptr_t ggml_qnn_matmul_op_config::create_output_convert_nodes(QNNBackend device, - Qnn_GraphHandle_t graph_handle, +qnn_op_config_ptr_t ggml_qnn_matmul_op_config::create_output_convert_nodes(backend_index_type device, + Qnn_GraphHandle_t graph_handle, const int rank, Qnn_DataType_t tensor_type, qnn_tensor_array_t & tensor_outputs) { GGML_ASSERT(tensor_outputs.size() == 1); diff --git a/ggml/src/ggml-qnn/op-config-impl.hpp b/ggml/src/ggml-qnn/qnn/op-config-impl.hpp similarity index 83% rename from ggml/src/ggml-qnn/op-config-impl.hpp rename to ggml/src/ggml-qnn/qnn/op-config-impl.hpp index 558b5cafbe..36de66858a 100644 --- a/ggml/src/ggml-qnn/op-config-impl.hpp +++ b/ggml/src/ggml-qnn/qnn/op-config-impl.hpp @@ -23,7 +23,7 @@ class ggml_qnn_op_config_base : public ggml_qnn_op_config { void add_scalar_param(const std::string & name, const Qnn_Scalar_t scalar); bool add_tensor_param(const std::string & name, const qnn_dimension_array_t & dimensions, int rank, - const uint8_t * data, const Qnn_DataType_t data_type, QNNBackend device, + const uint8_t * data, const Qnn_DataType_t data_type, backend_index_type device, Qnn_GraphHandle_t graph_handle); void set_input_tensors(qnn::qnn_tensor_array_t & tensor_inputs) override; @@ -65,7 +65,7 @@ class ggml_qnn_single_op_config : public ggml_qnn_op_config_base { const std::string & op_type, qnn_instance_ptr qnn_instance) : ggml_qnn_op_config_base(name, package_name, op_type, qnn_instance) {} - bool initialize_op_nodes(QNNBackend device, Qnn_GraphHandle_t graph_handle) override; + bool initialize_op_nodes(backend_index_type device, Qnn_GraphHandle_t graph_handle) override; private: DISABLE_COPY(ggml_qnn_single_op_config); @@ -78,7 +78,7 @@ class ggml_qnn_rmsnorm_op_config : public ggml_qnn_op_config_base { const std::string & op_type, qnn_instance_ptr qnn_instance) : ggml_qnn_op_config_base(name, package_name, op_type, qnn_instance) {} - bool initialize_op_nodes(QNNBackend device, Qnn_GraphHandle_t graph_handle) override; + bool initialize_op_nodes(backend_index_type device, Qnn_GraphHandle_t graph_handle) override; private: DISABLE_COPY(ggml_qnn_rmsnorm_op_config); @@ -143,15 +143,16 @@ class ggml_qnn_matmul_op_config : public ggml_qnn_aggregate_op_config { ggml_qnn_matmul_op_config(const std::string & name, qnn_instance_ptr qnn_instance) : ggml_qnn_aggregate_op_config(name, qnn_instance) {} - bool initialize_op_nodes(QNNBackend device, Qnn_GraphHandle_t graph_handle) override; + bool initialize_op_nodes(backend_index_type device, Qnn_GraphHandle_t graph_handle) override; private: - qnn_tensor_ptr_t create_gather_nodes(QNNBackend device, Qnn_GraphHandle_t graph_handle, const int rank, - qnn_tensor_ptr_t tensor_input, qnn_dimension_array_t output_dimensions); - Qnn_DataType_t create_input_convert_nodes(QNNBackend device, Qnn_GraphHandle_t graph_handle, const int rank, - qnn_tensor_array_t & tensor_inputs); - qnn_op_config_ptr_t create_output_convert_nodes(QNNBackend device, Qnn_GraphHandle_t graph_handle, const int rank, - Qnn_DataType_t tensor_type, qnn_tensor_array_t & tensor_outputs); + qnn_tensor_ptr_t create_gather_nodes(backend_index_type device, Qnn_GraphHandle_t graph_handle, const int rank, + qnn_tensor_ptr_t tensor_input, qnn_dimension_array_t output_dimensions); + Qnn_DataType_t create_input_convert_nodes(backend_index_type device, Qnn_GraphHandle_t graph_handle, const int rank, + qnn_tensor_array_t & tensor_inputs); + qnn_op_config_ptr_t create_output_convert_nodes(backend_index_type device, Qnn_GraphHandle_t graph_handle, + const int rank, Qnn_DataType_t tensor_type, + qnn_tensor_array_t & tensor_outputs); bool create_mat_mul_nodes(qnn_tensor_array_t & tensor_inputs, qnn_tensor_array_t & tensor_outputs); DISABLE_COPY(ggml_qnn_matmul_op_config); diff --git a/ggml/src/ggml-qnn/op-config.hpp b/ggml/src/ggml-qnn/qnn/op-config.hpp similarity index 100% rename from ggml/src/ggml-qnn/op-config.hpp rename to ggml/src/ggml-qnn/qnn/op-config.hpp diff --git a/ggml/src/ggml-qnn/profiler.cpp b/ggml/src/ggml-qnn/qnn/profiler.cpp similarity index 100% rename from ggml/src/ggml-qnn/profiler.cpp rename to ggml/src/ggml-qnn/qnn/profiler.cpp diff --git a/ggml/src/ggml-qnn/profiler.hpp b/ggml/src/ggml-qnn/qnn/profiler.hpp similarity index 100% rename from ggml/src/ggml-qnn/profiler.hpp rename to ggml/src/ggml-qnn/qnn/profiler.hpp diff --git a/ggml/src/ggml-qnn/qnn-lib.cpp b/ggml/src/ggml-qnn/qnn/qnn-lib.cpp similarity index 69% rename from ggml/src/ggml-qnn/qnn-lib.cpp rename to ggml/src/ggml-qnn/qnn/qnn-lib.cpp index 2ec76939c9..12e94aaac7 100644 --- a/ggml/src/ggml-qnn/qnn-lib.cpp +++ b/ggml/src/ggml-qnn/qnn/qnn-lib.cpp @@ -3,6 +3,9 @@ #include +#include "common.hpp" +#include "rpc-mem.hpp" + #if defined(__linux__) # include #endif @@ -10,19 +13,23 @@ namespace { #ifdef _WIN32 -constexpr const char * kQnnSystemLibName = "QnnSystem.dll"; -constexpr const char * kQnnRpcLibName = "libcdsprpc.dll"; -constexpr const char * kQnnCpuLibName = "QnnCpu.dll"; -constexpr const char * kQnnGpuLibName = "QnnGpu.dll"; -constexpr const char * kQnnNpuLibName = "QnnHtp.dll"; +# define PLATFORM_LIB_FILENAME(name) (name ".dll") #else -constexpr const char * kQnnSystemLibName = "libQnnSystem.so"; -constexpr const char * kQnnRpcLibName = "libcdsprpc.so"; -constexpr const char * kQnnCpuLibName = "libQnnCpu.so"; -constexpr const char * kQnnGpuLibName = "libQnnGpu.so"; -constexpr const char * kQnnNpuLibName = "libQnnHtp.so"; +# define PLATFORM_LIB_FILENAME(name) ("lib" name ".so") #endif +#if defined(__aarch64__) || defined(_M_ARM64) // TODO: check for other platforms +# define PLATFORM_LIB_POSFIX "_aarch64" +#else +# define PLATFORM_LIB_POSFIX "_x64" +#endif + +constexpr const char * kQnnSystemLibName = PLATFORM_LIB_FILENAME("QnnSystem"); +constexpr const char * kQnnCpuLibName = PLATFORM_LIB_FILENAME("QnnCpu"); +constexpr const char * kQnnGpuLibName = PLATFORM_LIB_FILENAME("QnnGpu"); +constexpr const char * kQnnNpuLibName = PLATFORM_LIB_FILENAME("QnnHtp"); +constexpr const char * kQnnCpuPackageLibName = PLATFORM_LIB_FILENAME("QnnGgmlOpPackage" PLATFORM_LIB_POSFIX); + constexpr const qnn::device_caps kDeviceCaps[] = { { // https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/CpuOpDefSupplement.html#matmul @@ -46,8 +53,8 @@ constexpr const qnn::device_caps kDeviceCaps[] = { }, }; -static_assert(sizeof(kDeviceCaps) / sizeof(kDeviceCaps[0]) == GGML_QNN_MAX_DEVICES, - "The number of qnn devices should be equal to GGML_QNN_MAX_DEVICES"); +static_assert(sizeof(kDeviceCaps) / sizeof(kDeviceCaps[0]) == QNN_BACKEND_COUNT, + "The number of qnn devices should be equal to QNN_BACKEND_COUNT"); static_assert(kDeviceCaps[QNN_BACKEND_NPU].type == GGML_BACKEND_DEVICE_TYPE_ACCEL, "The NPU device should be an accelerator device"); static_assert(kDeviceCaps[QNN_BACKEND_GPU].type == GGML_BACKEND_DEVICE_TYPE_GPU, @@ -102,23 +109,67 @@ bool set_qnn_lib_search_path(const std::string & custom_lib_search_path) { return true; } -qnn::dl_handler_t load_lib_with_fallback(const std::string & lib_path, const std::string & load_directory) { +common::dl_handler_t load_lib_with_fallback(const std::string & lib_path, const std::string & load_directory) { std::filesystem::path full_path(load_directory); full_path /= std::filesystem::path(lib_path).filename(); - auto handle = qnn::dl_load(full_path.string()); + auto handle = common::dl_load(full_path.string()); if (!handle) { QNN_LOG_WARN("failed to load %s, fallback to %s\n", full_path.c_str(), lib_path.c_str()); - handle = qnn::dl_load(lib_path); + handle = common::dl_load(lib_path); } return handle; } +struct op_package_lib_info { + const char * lib_name; + const char * interface; + const char * type; + size_t htp_arch; + const char * extra_lib_name = nullptr; +}; + +const op_package_lib_info & get_op_package_lib_info(uint32_t soc_model, size_t htp_arch) { + constexpr static const op_package_lib_info kOpPackageLibInfo[] = { + { kQnnCpuPackageLibName, "GgmlOpPackageInterfaceProvider", "CPU", qnn::NONE, + PLATFORM_LIB_FILENAME("HtpPrepare") }, + { PLATFORM_LIB_FILENAME("QnnGgmlOpPackage_v68"), "GgmlOpPackageInterfaceProvider", "HTP", qnn::V68 }, + { PLATFORM_LIB_FILENAME("QnnGgmlOpPackage_v69"), "GgmlOpPackageInterfaceProvider", "HTP", qnn::V69 }, + { PLATFORM_LIB_FILENAME("QnnGgmlOpPackage_v73"), "GgmlOpPackageInterfaceProvider", "HTP", qnn::V73 }, + { PLATFORM_LIB_FILENAME("QnnGgmlOpPackage_v75"), "GgmlOpPackageInterfaceProvider", "HTP", qnn::V75 }, + { PLATFORM_LIB_FILENAME("QnnGgmlOpPackage_v79"), "GgmlOpPackageInterfaceProvider", "HTP", qnn::V79 }, + }; + + if (soc_model == qnn::UNKNOWN || soc_model == qnn::EMULATOR_X64 || soc_model == qnn::EMULATOR_AARCH64) { + return kOpPackageLibInfo[0]; + } + + switch (htp_arch) { + case qnn::V68: + static_assert(kOpPackageLibInfo[1].htp_arch == qnn::V68); + return kOpPackageLibInfo[1]; + case qnn::V69: + static_assert(kOpPackageLibInfo[2].htp_arch == qnn::V69); + return kOpPackageLibInfo[2]; + case qnn::V73: + static_assert(kOpPackageLibInfo[3].htp_arch == qnn::V73); + return kOpPackageLibInfo[3]; + case qnn::V75: + static_assert(kOpPackageLibInfo[4].htp_arch == qnn::V75); + return kOpPackageLibInfo[4]; + case qnn::V79: + default: + static_assert(kOpPackageLibInfo[5].htp_arch == qnn::V79); + return kOpPackageLibInfo[5]; + } +} + } // namespace namespace qnn { -qnn_system_interface::qnn_system_interface(const QnnSystemInterface_t & qnn_sys_interface, dl_handler_t lib_handle) : +qnn_system_interface::qnn_system_interface(const QnnSystemInterface_t & qnn_sys_interface, + common::dl_handler_t lib_handle) : _qnn_sys_interface(qnn_sys_interface), _lib_handle(lib_handle) { qnn_system_context_create(&_qnn_system_handle); @@ -139,15 +190,16 @@ qnn_system_interface::~qnn_system_interface() { } if (_lib_handle) { - if (!dl_unload(_lib_handle)) { - QNN_LOG_WARN("failed to close QnnSystem library, error %s\n", dl_error()); + if (!common::dl_unload(_lib_handle)) { + QNN_LOG_WARN("failed to close QnnSystem library, error %s\n", common::dl_error()); } } else { QNN_LOG_WARN("system lib handle is null\n"); } } -qnn_instance::qnn_instance(const std::string & lib_path, QNNBackend device) : _additional_lib_load_path(lib_path) { +qnn_instance::qnn_instance(const std::string & lib_path, backend_index_type device) : + _additional_lib_load_path(lib_path) { _backend_lib_name = kDeviceCaps[device].lib_name; if (set_qnn_lib_search_path(lib_path)) { QNN_LOG_DEBUG("[%s] set_qnn_lib_search_path succeed\n", _backend_lib_name.c_str()); @@ -156,23 +208,23 @@ qnn_instance::qnn_instance(const std::string & lib_path, QNNBackend device) : _a } } -int qnn_instance::qnn_init(const QnnSaver_Config_t ** saver_config) { +bool qnn_instance::qnn_init(const QnnSaver_Config_t ** saver_config) { BackendIdType backend_id = QNN_BACKEND_ID_NULL; QNN_LOG_DEBUG("enter qnn_init\n"); std::lock_guard lock(_init_mutex); if (load_system() != 0) { QNN_LOG_WARN("failed to load QNN system lib\n"); - return 1; + return false; } else { QNN_LOG_DEBUG("load QNN system lib successfully\n"); } std::string backend_lib_path = _backend_lib_name; if (_lib_path_to_backend_id.count(backend_lib_path) == 0) { - if (load_backend(backend_lib_path, saver_config) != 0) { + if (!load_backend(backend_lib_path, saver_config)) { QNN_LOG_WARN("failed to load QNN backend\n"); - return 2; + return false; } } @@ -182,15 +234,15 @@ int qnn_instance::qnn_init(const QnnSaver_Config_t ** saver_config) { "library %s is loaded but loaded backend count=%zu, " "loaded lib_handle count=%zu", backend_lib_path.c_str(), _loaded_backend.count(backend_id), _loaded_lib_handle.count(backend_id)); - return 3; + return false; } _qnn_interface = std::make_shared(*_loaded_backend[backend_id]); _qnn_interface->qnn_log_create(qnn::sdk_logcallback, _qnn_log_level, &_qnn_log_handle); if (!_qnn_log_handle) { // NPU backend not work on Qualcomm SoC equipped low-end phone - QNN_LOG_WARN("why failed to initialize qnn log\n"); - return 4; + QNN_LOG_WARN("failed to initialize qnn log\n"); + return false; } else { QNN_LOG_DEBUG("initialize qnn log successfully\n"); } @@ -199,22 +251,23 @@ int qnn_instance::qnn_init(const QnnSaver_Config_t ** saver_config) { _qnn_interface->qnn_backend_create( _qnn_log_handle, temp_backend_config.empty() ? nullptr : temp_backend_config.data(), &_qnn_backend_handle); if (!_qnn_backend_handle) { - QNN_LOG_WARN("why failed to initialize qnn backend\n"); - return 5; + QNN_LOG_WARN("failed to initialize qnn backend\n"); + return false; } else { QNN_LOG_DEBUG("initialize qnn backend successfully\n"); } auto qnn_status = _qnn_interface->qnn_property_has_capability(QNN_PROPERTY_GROUP_DEVICE); - if (QNN_PROPERTY_NOT_SUPPORTED == qnn_status) { - QNN_LOG_WARN("device property is not supported\n"); - } - if (QNN_PROPERTY_ERROR_UNKNOWN_KEY == qnn_status) { - QNN_LOG_WARN("device property is not known to backend\n"); + switch (qnn_status) { + case QNN_PROPERTY_NOT_SUPPORTED: + QNN_LOG_WARN("device property is not supported\n"); + break; + case QNN_PROPERTY_ERROR_UNKNOWN_KEY: + QNN_LOG_WARN("device property is unknown\n"); + break; } - qnn_status = QNN_SUCCESS; - if (_backend_lib_name.find("Htp") != _backend_lib_name.npos) { + { const QnnDevice_PlatformInfo_t * p_info = nullptr; qnn_status = _qnn_interface->qnn_device_get_platform_info(nullptr, &p_info); if (qnn_status == QNN_SUCCESS) { @@ -243,57 +296,50 @@ int qnn_instance::qnn_init(const QnnSaver_Config_t ** saver_config) { _qnn_interface->qnn_device_free_platform_info(nullptr, p_info); } else { // For emulator, we can't get platform info - QNN_LOG_WARN("failed to get platform info, are we in emulator?\n"); - _soc_info = { NONE, UNKNOWN_SM, 0 }; + QNN_LOG_INFO("failed to get platform info, emulator or cpu backend?\n"); +#if defined(__aarch64__) || defined(_M_ARM64) + _soc_info = { EMULATOR_AARCH64, NONE, 0 }; +#elif defined(__x86_64__) || defined(__amd64__) || defined(_M_X64) + _soc_info = { EMULATOR_X64, NONE, 0 }; +#else + _soc_info = { UNKNOWN, NONE, 0 }; +#endif } + } - QnnHtpDevice_CustomConfig_t soc_customconfig; - soc_customconfig.option = QNN_HTP_DEVICE_CONFIG_OPTION_SOC; - soc_customconfig.socModel = _soc_info.soc_model; - QnnDevice_Config_t soc_devconfig; - soc_devconfig.option = QNN_DEVICE_CONFIG_OPTION_CUSTOM; - soc_devconfig.customConfig = &soc_customconfig; - - QnnHtpDevice_CustomConfig_t arch_customconfig; - arch_customconfig.option = QNN_HTP_DEVICE_CONFIG_OPTION_ARCH; - arch_customconfig.arch.arch = (QnnHtpDevice_Arch_t) _soc_info.htp_arch; - arch_customconfig.arch.deviceId = 0; // Id of device to be used. 0 will use by default. - QnnDevice_Config_t arch_devconfig; - arch_devconfig.option = QNN_DEVICE_CONFIG_OPTION_CUSTOM; - arch_devconfig.customConfig = &arch_customconfig; - - const QnnDevice_Config_t * p_deviceconfig[] = { &soc_devconfig, &arch_devconfig, nullptr }; - qnn_status = _qnn_interface->qnn_device_create(_qnn_log_handle, p_deviceconfig, &_qnn_device_handle); - } else { + { qnn_status = _qnn_interface->qnn_device_create(_qnn_log_handle, nullptr, &_qnn_device_handle); - } - if (QNN_SUCCESS != qnn_status && QNN_DEVICE_ERROR_UNSUPPORTED_FEATURE != qnn_status) { - QNN_LOG_WARN("failed to create QNN device\n"); - } else { - QNN_LOG_INFO("create QNN device successfully\n"); + if (QNN_SUCCESS != qnn_status && QNN_DEVICE_ERROR_UNSUPPORTED_FEATURE != qnn_status) { + QNN_LOG_WARN("failed to create QNN device\n"); + } else { + QNN_LOG_INFO("create QNN device successfully\n"); + } } - _rpc_lib_handle = load_lib_with_fallback(kQnnRpcLibName, _additional_lib_load_path); - if (_rpc_lib_handle) { - _pfn_rpc_mem_alloc = reinterpret_cast(dl_sym(_rpc_lib_handle, "rpcmem_alloc")); - _pfn_rpc_mem_free = reinterpret_cast(dl_sym(_rpc_lib_handle, "rpcmem_free")); - _pfn_rpc_mem_to_fd = reinterpret_cast(dl_sym(_rpc_lib_handle, "rpcmem_to_fd")); - if (!_pfn_rpc_mem_alloc || !_pfn_rpc_mem_free || !_pfn_rpc_mem_to_fd) { - QNN_LOG_WARN("unable to access symbols in QNN RPC lib. error: %s\n", dl_error()); - dl_unload(_rpc_lib_handle); - return 9; + { + auto rpc_mem = std::make_unique(); + if (rpc_mem->is_valid()) { + _rpc_mem = std::move(rpc_mem); + } + } + + { + auto & op_package_info = get_op_package_lib_info(_soc_info.soc_model, _soc_info.htp_arch); + if (op_package_info.extra_lib_name) { + _custom_op_extra_lib_handle = + load_lib_with_fallback(op_package_info.extra_lib_name, _additional_lib_load_path); } - _pfn_rpc_mem_init = reinterpret_cast(dl_sym(_rpc_lib_handle, "rpcmem_init")); - _pfn_rpc_mem_deinit = reinterpret_cast(dl_sym(_rpc_lib_handle, "rpcmem_deinit")); - if (_pfn_rpc_mem_init) { - _pfn_rpc_mem_init(); + qnn_status = _qnn_interface->qnn_backend_register_op_package(_qnn_backend_handle, op_package_info.lib_name, + op_package_info.interface, op_package_info.type); + if (qnn_status != QNN_SUCCESS) { + QNN_LOG_WARN("failed to register op package %s, interface: %s, error: %s\n", op_package_info.lib_name, + op_package_info.interface, qnn::get_qnn_error_string(qnn_status)); + } else { + QNN_LOG_DEBUG("register op package %s successfully, ID %u\n", op_package_info.lib_name, + _qnn_interface->get_backend_id()); + _has_custom_op_package = true; } - - _rpcmem_initialized = true; - QNN_LOG_DEBUG("load rpcmem lib successfully\n"); - } else { - QNN_LOG_WARN("failed to load qualcomm rpc lib, skipping, error:%s\n", dl_error()); } /* TODO: not used, keep it for further usage @@ -302,35 +348,14 @@ int qnn_instance::qnn_init(const QnnSaver_Config_t ** saver_config) { const QnnContext_Config_t * context_configs[] = {&qnn_context_config, nullptr}; */ _qnn_interface->qnn_context_create(_qnn_backend_handle, _qnn_device_handle, nullptr, &_qnn_context_handle); - if (nullptr == _qnn_context_handle) { - QNN_LOG_WARN("why failed to initialize qnn context\n"); - return 10; + if (!_qnn_context_handle) { + QNN_LOG_WARN("failed to initialize qnn context\n"); + return false; } else { QNN_LOG_DEBUG("initialize qnn context successfully\n"); } if (_backend_lib_name.find("Htp") != _backend_lib_name.npos) { - // TODO: faster approach to probe the accurate capacity of rpc ion memory - size_t candidate_size = 0; - uint8_t * rpc_buffer = nullptr; - const int size_in_mb = (1 << 20); - size_t probe_slots[] = { 1024, 1536, 2048 - 48, 2048 }; - size_t probe_counts = sizeof(probe_slots) / sizeof(size_t); - for (size_t idx = 0; idx < probe_counts; idx++) { - rpc_buffer = static_cast(alloc_rpcmem(probe_slots[idx] * size_in_mb, sizeof(void *))); - if (!rpc_buffer) { - QNN_LOG_DEBUG("alloc rpcmem %d (MB) failure, %s\n", (int) probe_slots[idx], strerror(errno)); - break; - } else { - candidate_size = probe_slots[idx]; - free_rpcmem(rpc_buffer); - rpc_buffer = nullptr; - } - } - - _rpcmem_capacity = std::max(candidate_size, _rpcmem_capacity); - QNN_LOG_INFO("capacity of QNN rpc ion memory is about %d MB\n", (int) _rpcmem_capacity); - if (init_htp_perfinfra() != 0) { QNN_LOG_WARN("initialize HTP performance failure\n"); } @@ -343,33 +368,16 @@ int qnn_instance::qnn_init(const QnnSaver_Config_t ** saver_config) { } QNN_LOG_DEBUG("leave qnn_init\n"); - - return 0; + return true; } -int qnn_instance::qnn_finalize() { - int ret_status = 0; - Qnn_ErrorHandle_t error = QNN_SUCCESS; - - if (_rpc_lib_handle) { - if (_pfn_rpc_mem_deinit) { - _pfn_rpc_mem_deinit(); - _pfn_rpc_mem_deinit = nullptr; - } - - if (dl_unload(_rpc_lib_handle)) { - QNN_LOG_DEBUG("succeed to close rpcmem lib\n"); - } else { - QNN_LOG_WARN("failed to unload qualcomm's rpc lib, error:%s\n", dl_error()); - } - } - +bool qnn_instance::qnn_finalize() { if (_backend_lib_name.find("Htp") != _backend_lib_name.npos) { _qnn_htp_perfinfra->destroyPowerConfigId(_qnn_power_configid); } if (_qnn_context_handle) { - error = _qnn_interface->qnn_context_free(_qnn_context_handle, nullptr); + auto error = _qnn_interface->qnn_context_free(_qnn_context_handle, nullptr); if (error != QNN_SUCCESS) { QNN_LOG_WARN("failed to free QNN context_handle: ID %u, error %d\n", _qnn_interface->get_backend_id(), (int) QNN_GET_ERROR_CODE(error)); @@ -378,7 +386,7 @@ int qnn_instance::qnn_finalize() { } if (_qnn_device_handle) { - error = _qnn_interface->qnn_device_free(_qnn_device_handle); + auto error = _qnn_interface->qnn_device_free(_qnn_device_handle); if (error != QNN_SUCCESS) { QNN_LOG_WARN("failed to free QNN device_handle: ID %u, error %d\n", _qnn_interface->get_backend_id(), (int) QNN_GET_ERROR_CODE(error)); @@ -387,7 +395,7 @@ int qnn_instance::qnn_finalize() { } if (_qnn_backend_handle) { - error = _qnn_interface->qnn_backend_free(_qnn_backend_handle); + auto error = _qnn_interface->qnn_backend_free(_qnn_backend_handle); if (error != QNN_SUCCESS) { QNN_LOG_WARN("failed to free QNN backend_handle: ID %u, error %d\n", _qnn_interface->get_backend_id(), (int) QNN_GET_ERROR_CODE(error)); @@ -396,7 +404,7 @@ int qnn_instance::qnn_finalize() { } if (_qnn_log_handle) { - error = _qnn_interface->qnn_log_free(_qnn_log_handle); + auto error = _qnn_interface->qnn_log_free(_qnn_log_handle); if (error != QNN_SUCCESS) { QNN_LOG_WARN("failed to free QNN log_handle: ID %u, error %d\n", _qnn_interface->get_backend_id(), (int) QNN_GET_ERROR_CODE(error)); @@ -404,25 +412,31 @@ int qnn_instance::qnn_finalize() { _qnn_log_handle = nullptr; } + if (_custom_op_extra_lib_handle) { + common::dl_unload(_custom_op_extra_lib_handle); + } + unload_backend(); _qnn_sys_interface.reset(); - return ret_status; + _rpc_mem.reset(); + + return true; } int qnn_instance::load_system() { QNN_LOG_DEBUG("[%s]lib: %s\n", _backend_lib_name.c_str(), kQnnSystemLibName); auto system_lib_handle = load_lib_with_fallback(kQnnSystemLibName, _additional_lib_load_path); if (!system_lib_handle) { - QNN_LOG_WARN("can not load QNN library %s, error: %s\n", kQnnSystemLibName, dl_error()); + QNN_LOG_WARN("can not load QNN library %s, error: %s\n", kQnnSystemLibName, common::dl_error()); return 1; } - auto * get_providers = - dl_sym_typed(system_lib_handle, "QnnSystemInterface_getProviders"); + auto * get_providers = common::dl_sym_typed( + system_lib_handle, "QnnSystemInterface_getProviders"); if (!get_providers) { - QNN_LOG_WARN("can not load QNN symbol QnnSystemInterface_getProviders: %s\n", dl_error()); + QNN_LOG_WARN("can not load QNN symbol QnnSystemInterface_getProviders: %s\n", common::dl_error()); return 2; } @@ -473,38 +487,42 @@ int qnn_instance::load_system() { return 0; } -int qnn_instance::load_backend(std::string & lib_path, const QnnSaver_Config_t ** /*saver_config*/) { - Qnn_ErrorHandle_t error = QNN_SUCCESS; +bool qnn_instance::load_backend(std::string & lib_path, const QnnSaver_Config_t ** /*saver_config*/) { QNN_LOG_DEBUG("lib_path:%s\n", lib_path.c_str()); auto lib_handle = load_lib_with_fallback(lib_path, _additional_lib_load_path); if (!lib_handle) { - QNN_LOG_WARN("can not open QNN library %s, with error: %s\n", lib_path.c_str(), dl_error()); - return 1; + QNN_LOG_WARN("can not open QNN library %s, with error: %s\n", lib_path.c_str(), common::dl_error()); + return false; } - auto get_providers = dl_sym_typed(lib_handle, "QnnInterface_getProviders"); + auto get_providers = + common::dl_sym_typed(lib_handle, "QnnInterface_getProviders"); if (!get_providers) { - QNN_LOG_WARN("can not load symbol QnnInterface_getProviders : %s\n", dl_error()); - return 2; + QNN_LOG_WARN("can not load symbol QnnInterface_getProviders : %s\n", common::dl_error()); + common::dl_unload(lib_handle); + return false; } std::uint32_t num_providers = 0; const QnnInterface_t ** provider_list = nullptr; - error = get_providers(&provider_list, &num_providers); + auto error = get_providers(&provider_list, &num_providers); if (error != QNN_SUCCESS) { QNN_LOG_WARN("failed to get providers, error %d\n", (int) QNN_GET_ERROR_CODE(error)); - return 3; + common::dl_unload(lib_handle); + return false; } QNN_LOG_DEBUG("num_providers=%d\n", num_providers); if (num_providers != _required_num_providers) { QNN_LOG_WARN("providers is %d instead of required %d\n", num_providers, _required_num_providers); - return 4; + common::dl_unload(lib_handle); + return false; } if (!provider_list) { QNN_LOG_WARN("failed to get qnn interface providers\n"); - return 5; + common::dl_unload(lib_handle); + return false; } bool found_valid_interface = false; QNN_INTERFACE_VER_TYPE qnn_interface; @@ -519,7 +537,8 @@ int qnn_instance::load_backend(std::string & lib_path, const QnnSaver_Config_t * if (!found_valid_interface) { QNN_LOG_WARN("unable to find a valid qnn interface\n"); - return 6; + common::dl_unload(lib_handle); + return false; } else { QNN_LOG_DEBUG("find a valid qnn interface\n"); } @@ -532,31 +551,29 @@ int qnn_instance::load_backend(std::string & lib_path, const QnnSaver_Config_t * _loaded_backend[backend_id] = provider_list[0]; if (_loaded_lib_handle.count(backend_id) > 0) { QNN_LOG_WARN("closing %p\n", _loaded_lib_handle[backend_id]); - if (!dl_unload(_loaded_lib_handle[backend_id])) { - QNN_LOG_WARN("fail to close %p with error %s\n", _loaded_lib_handle[backend_id], dl_error()); + if (!common::dl_unload(_loaded_lib_handle[backend_id])) { + QNN_LOG_WARN("fail to close %p with error %s\n", _loaded_lib_handle[backend_id], common::dl_error()); } } _loaded_lib_handle[backend_id] = lib_handle; _backend_id = backend_id; - return 0; + return true; } -int qnn_instance::unload_backend() { +void qnn_instance::unload_backend() { for (auto & it : _loaded_lib_handle) { - if (!dl_unload(it.second)) { - QNN_LOG_WARN("failed to close QNN backend %d, error %s\n", it.first, dl_error()); + if (!common::dl_unload(it.second)) { + QNN_LOG_WARN("failed to close QNN backend %d, error %s\n", it.first, common::dl_error()); } } _loaded_lib_handle.clear(); _lib_path_to_backend_id.clear(); _loaded_backend.clear(); - - return 0; } -const device_caps & get_device_caps(QNNBackend device) { +const device_caps & get_device_caps(backend_index_type device) { return kDeviceCaps[device]; } diff --git a/ggml/src/ggml-qnn/qnn-lib.hpp b/ggml/src/ggml-qnn/qnn/qnn-lib.hpp similarity index 91% rename from ggml/src/ggml-qnn/qnn-lib.hpp rename to ggml/src/ggml-qnn/qnn/qnn-lib.hpp index 3d0084b868..2e7c9339aa 100644 --- a/ggml/src/ggml-qnn/qnn-lib.hpp +++ b/ggml/src/ggml-qnn/qnn/qnn-lib.hpp @@ -24,8 +24,9 @@ #include #include -#include "dl-loader.hpp" +#include "dyn-lib-loader.hpp" #include "qnn-types.hpp" +#include "rpc-mem.hpp" #include "utils.hpp" namespace qnn { @@ -48,7 +49,7 @@ class qnn_system_interface { } public: - qnn_system_interface(const QnnSystemInterface_t & qnn_sys_interface, dl_handler_t lib_handle); + qnn_system_interface(const QnnSystemInterface_t & qnn_sys_interface, common::dl_handler_t lib_handle); ~qnn_system_interface(); bool is_valid() const { return _qnn_system_handle != nullptr; } @@ -67,7 +68,7 @@ class qnn_system_interface { void operator=(qnn_system_interface &&) = delete; const QnnSystemInterface_t _qnn_sys_interface = {}; - dl_handler_t _lib_handle = nullptr; + common::dl_handler_t _lib_handle = nullptr; QnnSystemContext_Handle_t _qnn_system_handle = nullptr; }; @@ -152,12 +153,12 @@ class qnn_instance { public: using BackendIdType = decltype(QnnInterface_t{}.backendId); - explicit qnn_instance(const std::string & lib_path, QNNBackend device); + explicit qnn_instance(const std::string & lib_path, backend_index_type device); ~qnn_instance() {} - int qnn_init(const QnnSaver_Config_t ** saver_config); - int qnn_finalize(); + bool qnn_init(const QnnSaver_Config_t ** saver_config); + bool qnn_finalize(); qnn_interface_ptr get_qnn_interface() { if (!_qnn_interface) { @@ -277,18 +278,14 @@ class qnn_instance { std::string & get_qnn_graph_name() { return _graph_name; } - bool is_rpcmem_initialized() { return _rpcmem_initialized; } - - size_t get_rpcmem_capacity() { return _rpcmem_capacity; } - void * alloc_rpcmem(size_t bytes, size_t alignment) { - if (!_rpcmem_initialized) { + if (!_rpc_mem) { QNN_LOG_WARN("rpc memory not initialized\n"); return nullptr; } auto allocate_bytes = static_cast(bytes + alignment); - void * buf = _pfn_rpc_mem_alloc(RPCMEM_HEAP_ID_SYSTEM, RPCMEM_DEFAULT_FLAGS, (int) allocate_bytes); + void * buf = _rpc_mem->alloc(RPCMEM_HEAP_ID_SYSTEM, RPCMEM_DEFAULT_FLAGS, (int) allocate_bytes); if (!buf) { QNN_LOG_WARN("failed to allocate rpc memory, size: %d MB\n", (int) (allocate_bytes / (1 << 20))); return nullptr; @@ -298,32 +295,34 @@ class qnn_instance { bool status = _rpcmem_store_map.insert(std::pair(aligned_buf, buf)).second; if (!status) { QNN_LOG_WARN("failed to allocate rpc memory\n"); - _pfn_rpc_mem_free(buf); + _rpc_mem->free(buf); } return aligned_buf; } void free_rpcmem(void * buf) { - if (!_rpcmem_initialized) { + if (!_rpc_mem) { QNN_LOG_WARN("rpc memory not initialized\n"); } else if (_rpcmem_store_map.count(buf) == 0) { QNN_LOG_WARN("no allocated tensor\n"); } else { - _pfn_rpc_mem_free(_rpcmem_store_map[buf]); + _rpc_mem->free(_rpcmem_store_map[buf]); _rpcmem_store_map.erase(buf); } } - int32_t rpcmem_to_fd(void * buf) { - int32_t mem_fd = -1; - if (!is_rpcmem_initialized()) { + int rpcmem_to_fd(void * buf) { + int fd = -1; + if (!_rpc_mem) { QNN_LOG_WARN("rpc memory not initialized\n"); + } else if (_rpcmem_store_map.count(buf) == 0) { + QNN_LOG_WARN("no allocated tensor\n"); } else { - mem_fd = _pfn_rpc_mem_to_fd(buf); + buf = _rpcmem_store_map[buf]; + fd = _rpc_mem->to_fd(buf); } - - return mem_fd; + return fd; } Qnn_MemHandle_t register_rpcmem(void * p_data, const uint32_t rank, uint32_t * dimensions, @@ -333,7 +332,7 @@ class qnn_instance { return nullptr; } - if (!is_rpcmem_initialized()) { + if (!_rpc_mem) { QNN_LOG_WARN("rpc memory not initialized\n"); return nullptr; } @@ -390,10 +389,12 @@ class qnn_instance { const qnn::qcom_socinfo & get_soc_info() { return _soc_info; } + bool has_custom_op_package() const { return _has_custom_op_package; } + private: - int load_system(); - int load_backend(std::string & lib_path, const QnnSaver_Config_t ** /*saver_config*/); - int unload_backend(); + int load_system(); + bool load_backend(std::string & lib_path, const QnnSaver_Config_t ** /*saver_config*/); + void unload_backend(); private: static constexpr const int _required_num_providers = 1; @@ -422,23 +423,19 @@ class qnn_instance { std::unordered_map _qnn_rpc_buffer_to_handles; std::mutex _init_mutex; - std::unordered_map _loaded_lib_handle; + std::unordered_map _loaded_lib_handle; std::unordered_map _lib_path_to_backend_id; std::unordered_map _loaded_backend; - dl_handler_t _rpc_lib_handle = nullptr; - std::atomic_bool _rpcmem_initialized{ false }; - qnn::pfn_rpc_mem_alloc _pfn_rpc_mem_alloc = nullptr; - qnn::pfn_rpc_mem_free _pfn_rpc_mem_free = nullptr; - qnn::pfn_rpc_mem_to_fd _pfn_rpc_mem_to_fd = nullptr; - qnn::pfn_rpc_mem_init _pfn_rpc_mem_init = nullptr; - qnn::pfn_rpc_mem_deinit _pfn_rpc_mem_deinit = nullptr; + std::unique_ptr _rpc_mem; std::unordered_map _rpcmem_store_map; - size_t _rpcmem_capacity = 512; std::string _graph_name; qnn::qcom_socinfo _soc_info = {}; + + bool _has_custom_op_package = false; + common::dl_handler_t _custom_op_extra_lib_handle = nullptr; }; using qnn_instance_ptr = std::shared_ptr; @@ -457,6 +454,6 @@ struct device_caps { size_t max_tensor_size_in_bytes; }; -const device_caps & get_device_caps(QNNBackend device); +const device_caps & get_device_caps(backend_index_type device); } // namespace qnn diff --git a/ggml/src/ggml-qnn/qnn/qnn-types.hpp b/ggml/src/ggml-qnn/qnn/qnn-types.hpp new file mode 100644 index 0000000000..4fe3e9155b --- /dev/null +++ b/ggml/src/ggml-qnn/qnn/qnn-types.hpp @@ -0,0 +1,51 @@ + +#pragma once + +#include +#include +#include +#include +#include + +#include "common.hpp" + +namespace qnn { + +enum qcom_htp_arch { + NONE = 0, + V68 = 68, + V69 = 69, + V73 = 73, + V75 = 75, + V79 = 79, // SD 8 Gen 4 (SM8750) +}; + +enum qcom_chipset { + UNKNOWN = 0, + EMULATOR_X64 = 0xFF00, // x86_64 emulator + EMULATOR_AARCH64 = 0xFF01, // ARM64 emulator + SM8350 = 30, // v68, SD 888/888+ + SM8450 = 36, // v69, SD 8 Gen 1 + SA8295 = 39, // v68 + SM8475 = 42, // v69, SD 8+ Gen 1 + SM8550 = 43, // v73, SD 8 Gen 2 + SSG2115P = 46, // v73 + SM7675 = 70, // V73, SD 7+ Gen 3 + SM8635 = 68, // v73, SD 8s Gen 3 + SM8650 = 57, // v75, SD 8 Gen 3 + SM8750 = 69, // v79, SD 8 Gen 4 +}; + +struct qcom_socinfo { + uint32_t soc_model; + size_t htp_arch; + size_t vtcm_size_in_mb; +}; + +using pfn_qnnsaver_initialize = decltype(QnnSaver_initialize); +using pfn_qnninterface_getproviders = decltype(QnnInterface_getProviders); +using pfn_qnnsysteminterface_getproviders = decltype(QnnSystemInterface_getProviders); +} // namespace qnn + +#define RPCMEM_DEFAULT_FLAGS 1 +#define RPCMEM_HEAP_ID_SYSTEM 25 diff --git a/ggml/src/ggml-qnn/tensor.hpp b/ggml/src/ggml-qnn/qnn/tensor.hpp similarity index 98% rename from ggml/src/ggml-qnn/tensor.hpp rename to ggml/src/ggml-qnn/qnn/tensor.hpp index 608a80fcf5..ef501135b5 100644 --- a/ggml/src/ggml-qnn/tensor.hpp +++ b/ggml/src/ggml-qnn/qnn/tensor.hpp @@ -25,7 +25,7 @@ class ggml_qnn_tensor : public std::enable_shared_from_this { explicit ggml_qnn_tensor(tensor_type_t tensor_type, const std::string & name, const qnn_dimension_array_t & dimensions, Qnn_DataType_t data_type, int rank, - QNNBackend device, Qnn_GraphHandle_t graph_handle, qnn_instance_ptr qnn_instance) : + backend_index_type device, Qnn_GraphHandle_t graph_handle, qnn_instance_ptr qnn_instance) : _tensor_name(name), _device(device), _qnn_instance(qnn_instance), @@ -45,7 +45,7 @@ class ggml_qnn_tensor : public std::enable_shared_from_this { explicit ggml_qnn_tensor(tensor_type_t tensor_type, const std::string & name, const ggml_dimension_array_t & dimensions, ggml_type data_type, int rank, - QNNBackend device, Qnn_GraphHandle_t graph_handle, qnn_instance_ptr qnn_instance) : + backend_index_type device, Qnn_GraphHandle_t graph_handle, qnn_instance_ptr qnn_instance) : ggml_qnn_tensor(tensor_type, name, get_internal_dimension(dimensions, rank), qnn_datatype_from_ggml_datatype(data_type), rank, device, graph_handle, qnn_instance) {} @@ -318,7 +318,7 @@ class ggml_qnn_tensor : public std::enable_shared_from_this { std::string _tensor_name; qnn_buffer_ptr _buffer; bool _can_unbind = true; - QNNBackend _device; + backend_index_type _device; qnn_instance_ptr _qnn_instance; Qnn_Tensor_t _qnn_tensor = qnn_tensor_init(kDefaultQnnTensorVersion); qnn_dimension_array_t _dimensions = {}; @@ -408,7 +408,7 @@ struct tensor_create_common_params { const char * name_prefix; int tensor_rank; bool is_input; - QNNBackend device; + backend_index_type device; Qnn_GraphHandle_t graph_handle; std::shared_ptr qnn_instance; }; diff --git a/ggml/src/ggml-qnn/utils.cpp b/ggml/src/ggml-qnn/qnn/utils.cpp similarity index 92% rename from ggml/src/ggml-qnn/utils.cpp rename to ggml/src/ggml-qnn/qnn/utils.cpp index 9696101b8b..8f3878aa03 100644 --- a/ggml/src/ggml-qnn/utils.cpp +++ b/ggml/src/ggml-qnn/qnn/utils.cpp @@ -178,7 +178,7 @@ const char * get_ggml_type_name(ggml_type type) { return traits->type_name; } -const char * get_backend_name(QNNBackend device) { +const char * get_backend_name(backend_index_type device) { switch (device) { case QNN_BACKEND_CPU: return "qnn-cpu"; @@ -192,7 +192,7 @@ const char * get_backend_name(QNNBackend device) { } } -const char * get_backend_desc(QNNBackend device) { +const char * get_backend_desc(backend_index_type device) { switch (device) { case QNN_BACKEND_CPU: return "CPU"; @@ -224,6 +224,10 @@ const char * get_chipset_desc(uint32_t soc_model) { return "Snapdragon 8 Gen 3"; case SM8750: return "Snapdragon 8 Elite"; + case EMULATOR_AARCH64: + return "AArch64 Emulator"; + case EMULATOR_X64: + return "x86_64 Emulator"; default: return "unknown"; } @@ -251,6 +255,10 @@ const char * get_chipset_model(uint32_t soc_model) { return "SM8650"; case SM8750: return "SM8750"; + case EMULATOR_AARCH64: + return "AARCH64EMU"; + case EMULATOR_X64: + return "X64EMU"; default: return "unknown"; } @@ -456,52 +464,4 @@ const char * get_qnn_error_string(Qnn_ErrorHandle_t error) { } } -#ifdef _WIN32 - -size_t get_system_total_memory_in_bytes() { - MEMORYSTATUSEX mem = {}; - mem.dwLength = sizeof(mem); - if (GlobalMemoryStatusEx(&mem)) { - return mem.ullTotalPhys; - } - - return 0; -} - -size_t get_system_free_memory_in_bytes() { - MEMORYSTATUSEX mem = {}; - mem.dwLength = sizeof(mem); - if (GlobalMemoryStatusEx(&mem)) { - return mem.ullAvailPhys; - } - - return 0; -} - -#else - -size_t get_system_total_memory_in_bytes() { - struct sysinfo info = {}; - if (sysinfo(&info) == 0) { - return (info.totalram + info.totalswap) * info.mem_unit; - } - - auto pages = (size_t) sysconf(_SC_PHYS_PAGES); - auto page_size = (size_t) sysconf(_SC_PAGE_SIZE); - return pages * page_size; -} - -size_t get_system_free_memory_in_bytes() { - struct sysinfo info = {}; - if (sysinfo(&info) == 0) { - return (info.freeram + info.freeswap) * info.mem_unit; - } - - auto avail_pages = (size_t) sysconf(_SC_AVPHYS_PAGES); - auto page_size = (size_t) sysconf(_SC_PAGE_SIZE); - return avail_pages * page_size; -} - -#endif - } // namespace qnn diff --git a/ggml/src/ggml-qnn/utils.hpp b/ggml/src/ggml-qnn/qnn/utils.hpp similarity index 97% rename from ggml/src/ggml-qnn/utils.hpp rename to ggml/src/ggml-qnn/qnn/utils.hpp index 2e55e2f2d8..09596c4e6f 100644 --- a/ggml/src/ggml-qnn/utils.hpp +++ b/ggml/src/ggml-qnn/qnn/utils.hpp @@ -5,6 +5,7 @@ #include #include +#include "common.hpp" #include "ggml-qnn.h" #include "ggml.h" #include "logger.hpp" @@ -23,8 +24,8 @@ qnn_dimension_array_t get_view_internal_dimension(const ggml_tensor * tensor, si uint32_t get_ggml_tensor_rank(const ggml_tensor * tensor); const char * get_ggml_type_name(ggml_type type); -const char * get_backend_name(QNNBackend device); -const char * get_backend_desc(QNNBackend device); +const char * get_backend_name(backend_index_type device); +const char * get_backend_desc(backend_index_type device); const char * get_chipset_desc(uint32_t soc_model); const char * get_chipset_model(uint32_t soc_model); const char * get_htparch_desc(size_t htp_arch); @@ -199,8 +200,6 @@ Qnn_DataType_t qnn_datatype_from_ggml_datatype(ggml_type ggml_type); ggml_type ggml_datatype_from_qnn_datatype(Qnn_DataType_t qnn_type); size_t qnn_datatype_size(Qnn_DataType_t qnn_type); const char * qnn_datatype_to_string(Qnn_DataType_t qnn_type); -size_t get_system_total_memory_in_bytes(); -size_t get_system_free_memory_in_bytes(); } // namespace qnn diff --git a/ggml/src/ggml-qnn/shared/CMakeLists.txt b/ggml/src/ggml-qnn/shared/CMakeLists.txt new file mode 100644 index 0000000000..b901e656b9 --- /dev/null +++ b/ggml/src/ggml-qnn/shared/CMakeLists.txt @@ -0,0 +1,35 @@ + +file(GLOB common_srcs "${CMAKE_CURRENT_LIST_DIR}/*.cpp") + +add_library(runtime-common STATIC + ${common_srcs} +) + +target_include_directories(runtime-common PUBLIC + ${CMAKE_CURRENT_LIST_DIR}/ + ${CMAKE_CURRENT_LIST_DIR}/../ + ${CMAKE_CURRENT_LIST_DIR}/../../ + ${CMAKE_CURRENT_LIST_DIR}/../../../include/ # TODO: figure out how to remove this +) + +if(GGML_QNN_ENABLE_HEXAGON_BACKEND) + if(DEFINED ENV{QNN_SDK_PATH}) + set(HEXAGON_SDK_ROOT $ENV{HEXAGON_SDK_ROOT}) + message("found HEXAGON_SDK_ROOT, setting to ${HEXAGON_SDK_ROOT}") + else() + message(FATAL_ERROR "HEXAGON_SDK_ROOT not defined") + endif() + + target_include_directories(runtime-common PUBLIC + ${HEXAGON_SDK_ROOT}/incs/ + ${HEXAGON_SDK_ROOT}/incs/stddef/ + ${HEXAGON_SDK_ROOT}/incs/HAP/ + ${HEXAGON_SDK_ROOT}/rtos/qurt/ + ${HEXAGON_SDK_ROOT}/utils/examples/ + ) + target_compile_definitions(runtime-common PRIVATE + GGML_QNN_ENABLE_HEXAGON_BACKEND + ) +else() + message("HEXAGON_SDK_ROOT not defined, not appending to include directories") +endif() diff --git a/ggml/src/ggml-qnn/shared/common.cpp b/ggml/src/ggml-qnn/shared/common.cpp new file mode 100644 index 0000000000..d89a31c20e --- /dev/null +++ b/ggml/src/ggml-qnn/shared/common.cpp @@ -0,0 +1,146 @@ + +#include "common.hpp" + +#include + +#include "ggml-backend-impl.h" +#include "ggml-impl.h" +#include "ggml-qnn.h" + +#ifdef _WIN32 +# include +#else +# include +# include +#endif + +namespace { + +struct ggml_backend_qnn_reg_impl : ggml_backend_reg { + std::vector device_proxies; + std::vector devices; + + explicit ggml_backend_qnn_reg_impl(ggml_backend_reg_i backend_iface) { + context = this; + iface = backend_iface; + + LOG_INFO("backend registry init\n"); + for (size_t i = 0; i < TOTAL_BACKEND_COUNT; i++) { + const auto device_enum = + (backend_index_type) (TOTAL_BACKEND_COUNT - 1 - i); // init from the last device, i.e. NPU + + backend_device_proxy_ptr device_proxy; + if (device_enum < QNN_BACKEND_COUNT) { +#ifdef GGML_HEXAGON_NPU_ONLY + device_proxy = create_qnn_backend_context(device_enum); +#else + LOG_DEBUG("skip qnn device %d\n", (int) device_enum); + continue; +#endif + } else { +#ifdef GGML_QNN_ENABLE_HEXAGON_BACKEND + device_proxy = create_hexagon_backend_context(device_enum); +#else + LOG_DEBUG("skip hexagon device %d\n", (int) device_enum); + continue; +#endif + } + + if (!device_proxy) { + LOG_DEBUG("skip device %d\n", (int) device_enum); + continue; + } + + devices.emplace_back(ggml_backend_device{ + /* iface = */ device_proxy->get_iface(), + /* reg = */ this, + /* context = */ device_proxy->get_context(), + }); + + device_proxies.emplace_back(device_proxy); + } + } +}; + +const char * ggml_backend_qnn_reg_get_name(ggml_backend_reg_t reg) { + GGML_UNUSED(reg); + // TODO: should we use a different name? + return "qualcomm"; +} + +size_t ggml_backend_qnn_reg_get_device_count(ggml_backend_reg_t reg) { + auto * ctx = (ggml_backend_qnn_reg_impl *) reg->context; + return ctx->devices.size(); +} + +ggml_backend_dev_t ggml_backend_qnn_reg_get_device(ggml_backend_reg_t reg, size_t index) { + auto * ctx = (ggml_backend_qnn_reg_impl *) reg->context; + GGML_ASSERT(index < ctx->devices.size()); + return &(ctx->devices[index]); +} + +const ggml_backend_reg_i ggml_backend_qnn_reg_interface = { + /* .get_name = */ ggml_backend_qnn_reg_get_name, + /* .get_device_count = */ ggml_backend_qnn_reg_get_device_count, + /* .get_device_get = */ ggml_backend_qnn_reg_get_device, + /* .get_proc_address = */ nullptr, +}; + +} // namespace + +ggml_backend_reg_t ggml_backend_qnn_reg() { + static ggml_backend_qnn_reg_impl reg{ ggml_backend_qnn_reg_interface }; + return ® +} + +namespace common { + +#ifdef _WIN32 + +size_t get_system_total_memory_in_bytes() { + MEMORYSTATUSEX mem = {}; + mem.dwLength = sizeof(mem); + if (GlobalMemoryStatusEx(&mem)) { + return mem.ullTotalPhys; + } + + return 0; +} + +size_t get_system_free_memory_in_bytes() { + MEMORYSTATUSEX mem = {}; + mem.dwLength = sizeof(mem); + if (GlobalMemoryStatusEx(&mem)) { + return mem.ullAvailPhys; + } + + return 0; +} + +#else + +size_t get_system_total_memory_in_bytes() { + struct sysinfo info = {}; + if (sysinfo(&info) == 0) { + return (info.totalram + info.totalswap) * info.mem_unit; + } + + auto pages = (size_t) sysconf(_SC_PHYS_PAGES); + auto page_size = (size_t) sysconf(_SC_PAGE_SIZE); + return pages * page_size; +} + +size_t get_system_free_memory_in_bytes() { + struct sysinfo info = {}; + if (sysinfo(&info) == 0) { + return (info.freeram + info.freeswap) * info.mem_unit; + } + + auto avail_pages = (size_t) sysconf(_SC_AVPHYS_PAGES); + auto page_size = (size_t) sysconf(_SC_PAGE_SIZE); + return avail_pages * page_size; +} + +#endif + +} // namespace common diff --git a/ggml/src/ggml-qnn/shared/common.hpp b/ggml/src/ggml-qnn/shared/common.hpp new file mode 100644 index 0000000000..4feb3365ce --- /dev/null +++ b/ggml/src/ggml-qnn/shared/common.hpp @@ -0,0 +1,56 @@ +#pragma once + +#include +#include + +#include "ggml-backend-impl.h" +#include "ggml-impl.h" + +enum backend_index_type { + QNN_BACKEND_CPU = 0, + QNN_BACKEND_GPU, + QNN_BACKEND_NPU, + + HEXAGON_BACKEND, + + TOTAL_BACKEND_COUNT, + QNN_BACKEND_COUNT = HEXAGON_BACKEND, +}; + +class backend_device_proxy { + public: + virtual ~backend_device_proxy() = default; + + virtual const ggml_backend_device_i & get_iface() const = 0; + virtual void * get_context() = 0; +}; + +using backend_device_proxy_ptr = std::shared_ptr; + +backend_device_proxy_ptr create_qnn_backend_context(backend_index_type device); +backend_device_proxy_ptr create_hexagon_backend_context(backend_index_type device); + +namespace common { + +size_t get_system_total_memory_in_bytes(); +size_t get_system_free_memory_in_bytes(); + +} // namespace common + +#define DISABLE_COPY(class_name) \ + class_name(const class_name &) = delete; \ + void operator=(const class_name &) = delete + +#define DISABLE_MOVE(class_name) \ + class_name(class_name &&) = delete; \ + void operator=(class_name &&) = delete + +#define LOG_ERROR(...) (GGML_LOG_ERROR(__VA_ARGS__)) +#define LOG_WARN(...) (GGML_LOG_WARN(__VA_ARGS__)) +#define LOG_INFO(...) (GGML_LOG_INFO(__VA_ARGS__)) + +#ifndef NDEBUG +# define LOG_DEBUG(...) (GGML_LOG_DEBUG(__VA_ARGS__)) +#else +# define LOG_DEBUG(...) +#endif diff --git a/ggml/src/ggml-qnn/dl-loader.hpp b/ggml/src/ggml-qnn/shared/dyn-lib-loader.hpp similarity index 67% rename from ggml/src/ggml-qnn/dl-loader.hpp rename to ggml/src/ggml-qnn/shared/dyn-lib-loader.hpp index e183d190ce..22cf8901f3 100644 --- a/ggml/src/ggml-qnn/dl-loader.hpp +++ b/ggml/src/ggml-qnn/shared/dyn-lib-loader.hpp @@ -13,20 +13,20 @@ #include -namespace qnn { +namespace common { #ifdef __linux__ typedef void * dl_handler_t; -inline qnn::dl_handler_t dl_load(const std::string & lib_path) { - return dlopen(lib_path.c_str(), RTLD_NOW | RTLD_LOCAL); +inline dl_handler_t dl_load(const std::string & lib_path) { + return dlopen(lib_path.c_str(), RTLD_NOW | RTLD_GLOBAL); } -inline void * dl_sym(qnn::dl_handler_t handle, const std::string & symbol) { +inline void * dl_sym(dl_handler_t handle, const std::string & symbol) { return dlsym(handle, symbol.c_str()); } -inline bool dl_unload(qnn::dl_handler_t handle) { +inline bool dl_unload(dl_handler_t handle) { return dlclose(handle) == 0; } @@ -36,7 +36,7 @@ inline const char * dl_error() { #elif defined(_WIN32) using dl_handler_t = HMODULE; -inline qnn::dl_handler_t dl_load(const std::string & lib_path) { +inline dl_handler_t dl_load(const std::string & lib_path) { // suppress error dialogs for missing DLLs auto old_mode = SetErrorMode(SEM_FAILCRITICALERRORS); SetErrorMode(old_mode | SEM_FAILCRITICALERRORS); @@ -47,7 +47,7 @@ inline qnn::dl_handler_t dl_load(const std::string & lib_path) { return handle; } -inline void * dl_sym(qnn::dl_handler_t handle, const std::string & symbol) { +inline void * dl_sym(dl_handler_t handle, const std::string & symbol) { auto old_mode = SetErrorMode(SEM_FAILCRITICALERRORS); SetErrorMode(old_mode | SEM_FAILCRITICALERRORS); @@ -57,7 +57,7 @@ inline void * dl_sym(qnn::dl_handler_t handle, const std::string & symbol) { return p; } -inline bool dl_unload(qnn::dl_handler_t handle) { +inline bool dl_unload(dl_handler_t handle) { FreeLibrary(handle); return true; } @@ -69,8 +69,8 @@ inline const char * dl_error() { #endif -template Fn dl_sym_typed(qnn::dl_handler_t handle, const std::string & function_name) { +template Fn dl_sym_typed(dl_handler_t handle, const std::string & function_name) { return reinterpret_cast(dl_sym(handle, function_name)); } -} // namespace qnn +} // namespace common diff --git a/ggml/src/ggml-qnn/shared/rpc-interface.hpp b/ggml/src/ggml-qnn/shared/rpc-interface.hpp new file mode 100644 index 0000000000..5a64a03646 --- /dev/null +++ b/ggml/src/ggml-qnn/shared/rpc-interface.hpp @@ -0,0 +1,223 @@ +#pragma once + +#include + +#include "common.hpp" +#include "dyn-lib-loader.hpp" +#ifdef GGML_QNN_ENABLE_HEXAGON_BACKEND +# include +#else +// TODO: remove this when not needed + +/** + * @enum fastrpc_map_flags for fastrpc_mmap and fastrpc_munmap + * @brief Types of maps with cache maintenance + */ +enum fastrpc_map_flags { + /** + * Map memory pages with RW- permission and CACHE WRITEBACK. + * Driver will clean cache when buffer passed in a FastRPC call. + * Same remote virtual address will be assigned for subsequent + * FastRPC calls. + */ + FASTRPC_MAP_STATIC, + + /** Reserved for compatibility with deprecated flag */ + FASTRPC_MAP_RESERVED, + + /** + * Map memory pages with RW- permission and CACHE WRITEBACK. + * Mapping tagged with a file descriptor. User is responsible for + * maintenance of CPU and DSP caches for the buffer. Get virtual address + * of buffer on DSP using HAP_mmap_get() and HAP_mmap_put() functions. + */ + FASTRPC_MAP_FD, + + /** + * Mapping delayed until user calls HAP_mmap() and HAP_munmap() + * functions on DSP. User is responsible for maintenance of CPU and DSP + * caches for the buffer. Delayed mapping is useful for users to map + * buffer on DSP with other than default permissions and cache modes + * using HAP_mmap() and HAP_munmap() functions. + */ + FASTRPC_MAP_FD_DELAYED, + + /** Reserved for compatibility **/ + FASTRPC_MAP_RESERVED_4, + FASTRPC_MAP_RESERVED_5, + FASTRPC_MAP_RESERVED_6, + FASTRPC_MAP_RESERVED_7, + FASTRPC_MAP_RESERVED_8, + FASTRPC_MAP_RESERVED_9, + FASTRPC_MAP_RESERVED_10, + FASTRPC_MAP_RESERVED_11, + FASTRPC_MAP_RESERVED_12, + FASTRPC_MAP_RESERVED_13, + FASTRPC_MAP_RESERVED_14, + FASTRPC_MAP_RESERVED_15, + + /** + * This flag is used to skip CPU mapping, + * otherwise behaves similar to FASTRPC_MAP_FD_DELAYED flag. + */ + FASTRPC_MAP_FD_NOMAP, + + /** Update FASTRPC_MAP_MAX when adding new value to this enum **/ +}; + +#endif + +namespace common { + +#ifdef _WIN32 +constexpr const char * kQnnRpcLibName = "libcdsprpc.dll"; +#else +constexpr const char * kQnnRpcLibName = "libcdsprpc.so"; +#endif + +class rpc_interface { + using rpc_mem_init_t = void (*)(); + using rpc_mem_deinit_t = void (*)(); + using rpc_mem_alloc_t = void * (*) (int heapid, uint32_t flags, int size); + using rpc_mem_alloc2_t = void * (*) (int heapid, uint32_t flags, size_t size); + using rpc_mem_free_t = void (*)(void * po); + using rpc_mem_to_fd_t = int (*)(void * po); + using rpc_mem_fastrpc_mmap_t = int (*)(int domain, int fd, void * addr, int offset, size_t length, + enum fastrpc_map_flags flags); + using rpc_mem_fastrpc_munmap_t = int (*)(int domain, int fd, void * addr, size_t length); + using remote_handle_control_t = int (*)(uint32_t req, void * data, uint32_t datalen); + using remote_session_control_t = int (*)(uint32_t req, void * data, uint32_t datalen); + + public: + rpc_interface(const std::string & rpc_lib_path = kQnnRpcLibName) { + _rpc_lib_handle = dl_load(rpc_lib_path); + if (!_rpc_lib_handle) { + LOG_ERROR("failed to load %s, error: %s\n", rpc_lib_path.c_str(), dl_error()); + return; + } + + _rpc_mem_init = reinterpret_cast(dl_sym(_rpc_lib_handle, "rpcmem_init")); + _rpc_mem_deinit = reinterpret_cast(dl_sym(_rpc_lib_handle, "rpcmem_deinit")); + _rpc_mem_alloc = reinterpret_cast(dl_sym(_rpc_lib_handle, "rpcmem_alloc")); + _rpc_mem_alloc2 = reinterpret_cast(dl_sym(_rpc_lib_handle, "rpcmem_alloc2")); + _rpc_mem_free = reinterpret_cast(dl_sym(_rpc_lib_handle, "rpcmem_free")); + _rpc_mem_to_fd = reinterpret_cast(dl_sym(_rpc_lib_handle, "rpcmem_to_fd")); + _rpc_mem_fastrpc_mmap = reinterpret_cast(dl_sym(_rpc_lib_handle, "fastrpc_mmap")); + _rpc_mem_fastrpc_munmap = reinterpret_cast(dl_sym(_rpc_lib_handle, "fastrpc_munmap")); + _remote_handle_control = + reinterpret_cast(dl_sym(_rpc_lib_handle, "remote_handle_control")); + _remote_session_control = + reinterpret_cast(dl_sym(_rpc_lib_handle, "remote_session_control")); + } + + bool is_valid() const { return _rpc_lib_handle != nullptr; } + + bool is_alloc2_available() const { return _rpc_mem_alloc2 != nullptr; } + + void rpcmem_init() { + if (_rpc_mem_init) { + _rpc_mem_init(); + } + } + + void rpcmem_deinit() { + if (_rpc_mem_deinit) { + _rpc_mem_deinit(); + } + } + + void * rpcmem_alloc(int heapid, uint32_t flags, int size) { + if (!is_valid()) { + return nullptr; + } + + return _rpc_mem_alloc(heapid, flags, size); + } + + void * rpcmem_alloc2(int heapid, uint32_t flags, size_t size) { + if (!is_valid()) { + return nullptr; + } + + return _rpc_mem_alloc2(heapid, flags, size); + } + + void rpcmem_free(void * buf) { + if (is_valid()) { + _rpc_mem_free(buf); + } + } + + int rpcmem_to_fd(void * buf) { + int mem_fd = -1; + if (is_valid()) { + mem_fd = _rpc_mem_to_fd(buf); + } + + return mem_fd; + } + + int fastrpc_mmap(int domain, int fd, void * addr, int offset, size_t length, enum fastrpc_map_flags flags) { + if (!is_valid()) { + return -1; + } + + return _rpc_mem_fastrpc_mmap(domain, fd, addr, offset, length, flags); + } + + int fastrpc_munmap(int domain, int fd, void * addr, size_t length) { + if (!is_valid()) { + return -1; + } + + return _rpc_mem_fastrpc_munmap(domain, fd, addr, length); + } + + int remote_handle_control(uint32_t req, void * data, uint32_t datalen) { + if (!is_valid()) { + return -1; + } + + return _remote_handle_control(req, data, datalen); + } + + int remote_session_control(uint32_t req, void * data, uint32_t datalen) { + if (!is_valid()) { + return -1; + } + + return _remote_session_control(req, data, datalen); + } + + ~rpc_interface() { + if (_rpc_lib_handle) { + if (_rpc_mem_deinit) { + _rpc_mem_deinit(); + } + + dl_unload(_rpc_lib_handle); + } + } + + private: + dl_handler_t _rpc_lib_handle = nullptr; + rpc_mem_init_t _rpc_mem_init = nullptr; + rpc_mem_deinit_t _rpc_mem_deinit = nullptr; + rpc_mem_alloc_t _rpc_mem_alloc = nullptr; + rpc_mem_alloc2_t _rpc_mem_alloc2 = nullptr; + rpc_mem_free_t _rpc_mem_free = nullptr; + rpc_mem_to_fd_t _rpc_mem_to_fd = nullptr; + rpc_mem_fastrpc_mmap_t _rpc_mem_fastrpc_mmap = nullptr; + rpc_mem_fastrpc_munmap_t _rpc_mem_fastrpc_munmap = nullptr; + remote_handle_control_t _remote_handle_control = nullptr; + remote_session_control_t _remote_session_control = nullptr; + + rpc_interface(const rpc_interface &) = delete; + rpc_interface & operator=(const rpc_interface &) = delete; + rpc_interface(rpc_interface &&) = delete; + rpc_interface & operator=(rpc_interface &&) = delete; +}; + +using rpc_interface_ptr = std::shared_ptr; + +} // namespace common diff --git a/ggml/src/ggml-qnn/shared/rpc-mem.hpp b/ggml/src/ggml-qnn/shared/rpc-mem.hpp new file mode 100644 index 0000000000..ba8449192b --- /dev/null +++ b/ggml/src/ggml-qnn/shared/rpc-mem.hpp @@ -0,0 +1,129 @@ + +#pragma once + +#include +#include + +#include "common.hpp" +#include "dyn-lib-loader.hpp" +#include "rpc-interface.hpp" + +namespace common { + +class rpc_mem { + public: + rpc_mem() { + auto interface = std::make_shared(); + if (!interface->is_valid()) { + LOG_ERROR("failed to load rpcmem lib\n"); + return; + } + + interface->rpcmem_init(); + _rpc_interface = interface; + LOG_DEBUG("load rpcmem lib successfully\n"); + } + + explicit rpc_mem(rpc_interface_ptr interface) { + if (!interface->is_valid()) { + LOG_ERROR("failed to load rpcmem lib\n"); + return; + } + + interface->rpcmem_init(); + _rpc_interface = interface; + LOG_DEBUG("load rpcmem lib successfully\n"); + } + + ~rpc_mem() { + if (!is_valid()) { + LOG_DEBUG("rpc memory not initialized\n"); + return; + } + + if (_rpc_interface) { + _rpc_interface->rpcmem_deinit(); + _rpc_interface.reset(); + } + + LOG_DEBUG("unload rpcmem lib successfully\n"); + } + + bool is_valid() const { return (bool) _rpc_interface; } + + void * alloc(int heapid, uint32_t flags, size_t size) { + if (!is_valid()) { + LOG_ERROR("rpc memory not initialized\n"); + return nullptr; + } + + if (size > get_max_alloc_size()) { + LOG_ERROR("rpc memory size %zu exceeds max alloc size %zu\n", size, get_max_alloc_size()); + return nullptr; + } + + void * buf = nullptr; + if (_rpc_interface->is_alloc2_available()) { + buf = _rpc_interface->rpcmem_alloc2(heapid, flags, size); + } else { + buf = _rpc_interface->rpcmem_alloc(heapid, flags, size); + } + + if (!buf) { + LOG_ERROR("failed to allocate rpc memory, size: %d MB\n", (int) (size / (1 << 20))); + return nullptr; + } + + LOG_DEBUG("rpc buffer allocated, heapid: %d, flags: 0x%x, size: %zu\n", heapid, flags, size); + return buf; + } + + void free(void * buf) { + if (!is_valid()) { + LOG_ERROR("rpc memory not initialized\n"); + } else { + _rpc_interface->rpcmem_free(buf); + } + } + + int to_fd(void * buf) { + int mem_fd = -1; + if (!is_valid()) { + LOG_ERROR("rpc memory not initialized\n"); + } else { + mem_fd = _rpc_interface->rpcmem_to_fd(buf); + } + + return mem_fd; + } + + size_t get_max_alloc_size() { + return _rpc_interface->is_alloc2_available() ? std::numeric_limits::max() : + std::numeric_limits::max(); + } + + int fastrpc_mmap(int domain, int fd, void * addr, int offset, size_t length, enum fastrpc_map_flags flags) { + if (!is_valid()) { + LOG_ERROR("rpc memory not initialized\n"); + return -1; + } + + return _rpc_interface->fastrpc_mmap(domain, fd, addr, offset, length, flags); + } + + int fastrpc_munmap(int domain, int fd, void * addr, size_t length) { + if (!is_valid()) { + LOG_ERROR("rpc memory not initialized\n"); + return -1; + } + + return _rpc_interface->fastrpc_munmap(domain, fd, addr, length); + } + + private: + rpc_interface_ptr _rpc_interface; +}; + +using rpc_mem_ptr = std::shared_ptr; + +} // namespace common