From fcb013847c2c983967e9d8c9a13b16829fb799e6 Mon Sep 17 00:00:00 2001 From: "M. Mediouni" Date: Mon, 24 Nov 2025 01:54:49 +0100 Subject: [PATCH] ggml-hexagon: Initial Hexagon v68/v69 support (#17394) * ggml-hexagon: fix build error with GCC Add stdexcept include to fix GCC build errors Signed-off-by: Mohamed Mediouni * ggml-hexagon: check VTCM acquire failures Signed-off-by: Mohamed Mediouni * ggml-hexagon: disable destination bypass on older than v73 v68 errors out if having bypass enabled when the VTCM is the destination. At least on v68 this made things actually work... not a proper fix though, so to look at later... Signed-off-by: Mohamed Mediouni * ggml-hexagon: add initial v68/v69 support v68 is the Hexagon revision notably used on the Snapdragon 8cx Gen 3 and the QCM6490. Also add support for v69. 8MB isn't a supported page size, so relax asked for page size constraint for HAP_compute_res_attr_set_vtcm_param_v2 to optimal. Signed-off-by: Mohamed Mediouni --------- Signed-off-by: Mohamed Mediouni --- ggml/src/ggml-hexagon/CMakeLists.txt | 10 ++++++++++ ggml/src/ggml-hexagon/ggml-hexagon.cpp | 1 + ggml/src/ggml-hexagon/htp-utils.c | 6 ++++++ ggml/src/ggml-hexagon/htp/htp-dma.h | 7 +++++++ ggml/src/ggml-hexagon/htp/hvx-utils.h | 20 ++++++++++++++++++++ ggml/src/ggml-hexagon/htp/main.c | 15 ++++++++++++--- 6 files changed, 56 insertions(+), 3 deletions(-) diff --git a/ggml/src/ggml-hexagon/CMakeLists.txt b/ggml/src/ggml-hexagon/CMakeLists.txt index 166825c2c5..ac422027b9 100644 --- a/ggml/src/ggml-hexagon/CMakeLists.txt +++ b/ggml/src/ggml-hexagon/CMakeLists.txt @@ -43,6 +43,14 @@ set(HTP_CMAKE_ARGS -DHEXAGON_TOOLS_ROOT=$ENV{HEXAGON_TOOLS_ROOT} -DHEXAGON_HTP_DEBUG=${GGML_HEXAGON_HTP_DEBUG}) +ExternalProject_Add(htp-v68 + SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/htp BUILD_ALWAYS ON + CMAKE_ARGS ${HTP_CMAKE_ARGS} -DDSP_VERSION=v68 -DPREBUILT_LIB_DIR="toolv19_v68") + +ExternalProject_Add(htp-v69 + SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/htp BUILD_ALWAYS ON + CMAKE_ARGS ${HTP_CMAKE_ARGS} -DDSP_VERSION=v69 -DPREBUILT_LIB_DIR="toolv19_v69") + ExternalProject_Add(htp-v73 SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/htp BUILD_ALWAYS ON CMAKE_ARGS ${HTP_CMAKE_ARGS} -DDSP_VERSION=v73 -DPREBUILT_LIB_DIR="toolv19_v73") @@ -61,6 +69,8 @@ ExternalProject_Add(htp-v81 # Install Hexagon skels required at runtime install(FILES + ${CMAKE_CURRENT_BINARY_DIR}/libggml-htp-v68.so + ${CMAKE_CURRENT_BINARY_DIR}/libggml-htp-v69.so ${CMAKE_CURRENT_BINARY_DIR}/libggml-htp-v73.so ${CMAKE_CURRENT_BINARY_DIR}/libggml-htp-v75.so ${CMAKE_CURRENT_BINARY_DIR}/libggml-htp-v79.so diff --git a/ggml/src/ggml-hexagon/ggml-hexagon.cpp b/ggml/src/ggml-hexagon/ggml-hexagon.cpp index 0b4e2c3d4d..881ed39ae3 100644 --- a/ggml/src/ggml-hexagon/ggml-hexagon.cpp +++ b/ggml/src/ggml-hexagon/ggml-hexagon.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #ifdef _WIN32 # include diff --git a/ggml/src/ggml-hexagon/htp-utils.c b/ggml/src/ggml-hexagon/htp-utils.c index e8a035af8c..3f335bf71c 100644 --- a/ggml/src/ggml-hexagon/htp-utils.c +++ b/ggml/src/ggml-hexagon/htp-utils.c @@ -390,6 +390,12 @@ int get_hex_arch_ver(int domain, int * arch) { } switch (arch_ver.capability & 0xff) { + case 0x68: + *arch = 68; + return 0; + case 0x69: + *arch = 69; + return 0; case 0x73: *arch = 73; return 0; diff --git a/ggml/src/ggml-hexagon/htp/htp-dma.h b/ggml/src/ggml-hexagon/htp/htp-dma.h index 4d0d54ce85..7d3fc4078c 100644 --- a/ggml/src/ggml-hexagon/htp/htp-dma.h +++ b/ggml/src/ggml-hexagon/htp/htp-dma.h @@ -66,6 +66,13 @@ static inline bool dma_queue_push(dma_queue * q, desc->desctype = HEXAGON_UDMA_DESC_DESCTYPE_TYPE1; desc->dstbypass = 1; desc->srcbypass = 1; +#if __HVX_ARCH__ >= 73 + desc->dstbypass = 1; + desc->srcbypass = 1; +#else + desc->dstbypass = 0; + desc->srcbypass = 1; +#endif desc->order = 0; desc->dstate = HEXAGON_UDMA_DESC_DSTATE_INCOMPLETE; desc->src = (void *) src; diff --git a/ggml/src/ggml-hexagon/htp/hvx-utils.h b/ggml/src/ggml-hexagon/htp/hvx-utils.h index 28b0014fb5..80658105c5 100644 --- a/ggml/src/ggml-hexagon/htp/hvx-utils.h +++ b/ggml/src/ggml-hexagon/htp/hvx-utils.h @@ -21,6 +21,26 @@ typedef union { float fp32[VLEN_FP32]; } __attribute__((aligned(VLEN), packed)) HVX_VectorAlias; +/* Q6_Vsf_equals_Vw is only available on v73+.*/ +#if __HVX_ARCH__ < 73 +static inline HVX_Vector int32_to_qfloat(HVX_Vector const in) +{ + HVX_Vector const vzero = Q6_V_vzero(); + HVX_VectorPred is_zero = Q6_Q_vcmp_eq_VwVw(in, vzero); + HVX_Vector lshift = Q6_Vw_vnormamt_Vw(in); + HVX_Vector normalized = Q6_Vw_vasl_VwVw(in, lshift); + HVX_Vector vexp = Q6_Vw_vsub_VwVw(Q6_V_vsplat_R(0x7f + 30), lshift); + HVX_Vector mant = Q6_V_vand_VV(Q6_V_vsplat_R(0xFFFFFF00), normalized); + HVX_Vector ret = Q6_V_vmux_QVV(is_zero, vzero, Q6_Vw_vadd_VwVw(mant, vexp)); + return ret; +} + +static inline HVX_Vector Q6_Vsf_equals_Vw(HVX_Vector const in) +{ + return Q6_Vsf_equals_Vqf32(int32_to_qfloat(in)); +} +#endif + static inline HVX_Vector hvx_vec_splat_fp32(float i) { union { float f; diff --git a/ggml/src/ggml-hexagon/htp/main.c b/ggml/src/ggml-hexagon/htp/main.c index 10e2733324..b60b352a7b 100644 --- a/ggml/src/ggml-hexagon/htp/main.c +++ b/ggml/src/ggml-hexagon/htp/main.c @@ -143,16 +143,25 @@ AEEResult htp_iface_disable_etm(remote_handle64 handle) { } static int vtcm_acquire(struct htp_context * ctx) { + int err; if (!ctx->vtcm_valid) { // Temporarily bump thread priority to make sure it's higher than other sessions. // This way the resource manager will notify the other thread to release VTCM. // Note that we need to reaquire VTCM at normal priority for this to work next time. qurt_thread_set_priority(qurt_thread_get_id(), ctx->thread_prio - 10); - HAP_compute_res_acquire_cached(ctx->vtcm_rctx, 1000000); + err = HAP_compute_res_acquire_cached(ctx->vtcm_rctx, 1000000); + if (err != 0) { + FARF(ERROR, "Failed to acquire VTCM: 0x%08x", (unsigned)err); + abort(); + } HAP_compute_res_release_cached(ctx->vtcm_rctx); qurt_thread_set_priority(qurt_thread_get_id(), ctx->thread_prio); - HAP_compute_res_acquire_cached(ctx->vtcm_rctx, 1000000); + err = HAP_compute_res_acquire_cached(ctx->vtcm_rctx, 1000000); + if (err != 0) { + FARF(ERROR, "Failed to acquire VTCM: 0x%08x", (unsigned)err); + abort(); + } ctx->vtcm_valid = true; } @@ -201,7 +210,7 @@ static int vtcm_alloc(struct htp_context * ctx) { HAP_compute_res_attr_init(&attr); HAP_compute_res_attr_set_serialize(&attr, 0); HAP_compute_res_attr_set_cache_mode(&attr, 1); - HAP_compute_res_attr_set_vtcm_param_v2(&attr, vtcm_size, vtcm_size, vtcm_size); + HAP_compute_res_attr_set_vtcm_param_v2(&attr, vtcm_size, 0, vtcm_size); HAP_compute_res_attr_set_release_callback(&attr, vtcm_release_callback, (void *) ctx); HAP_compute_res_attr_set_hmx_param(&attr, 1);