add itrace

This commit is contained in:
kevin 2025-12-06 22:25:36 +08:00
parent a81a569577
commit fff5417f54
2 changed files with 33 additions and 0 deletions

View File

@ -11,6 +11,7 @@ target_include_directories(htp_iface PUBLIC
${HEXAGON_SDK_ROOT}/incs
${HEXAGON_SDK_ROOT}/incs/stddef
${HEXAGON_SDK_ROOT}/utils/examples
${HEXAGON_SDK_ROOT}/libs/itrace/inc
${CMAKE_CURRENT_SOURCE_DIR}/htp
${CMAKE_CURRENT_BINARY_DIR})
@ -32,6 +33,7 @@ ggml_add_backend_library(${TARGET_NAME}
ggml-hexagon.cpp htp-utils.c htp-utils.h ../../include/ggml-hexagon.h)
target_link_libraries(${TARGET_NAME} PRIVATE htp_iface)
target_link_libraries(${TARGET_NAME} PRIVATE ${HEXAGON_SDK_ROOT}/libs/itrace/prebuilt/android_aarch64/libitrace.so)
target_include_directories(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/htp ${CMAKE_CURRENT_BINARY_DIR})
# Build HTP bits

View File

@ -11,6 +11,9 @@
#include <string>
#include <stdexcept>
#include "itrace.h"
#include "itrace_types.h"
#include "itrace_cpu_events.h"
#ifdef _WIN32
# include <sal.h>
# ifndef _WINDOWS
@ -45,6 +48,9 @@ static int opt_arch = 0; // autodetect
static int opt_etm = 0;
static int opt_verbose = 0;
static int opt_profile = 0;
static int opt_trace = 1;
static itrace_logger_handle_t g_itrace_logger_handle = NULL;
static itrace_profiler_handle_t g_itrace_cpu_profiler_handle = NULL;
static int opt_hostbuf = 1;
static int opt_experimental = 0;
@ -694,6 +700,9 @@ static void init_row_q4x4x2(block_q4_0 * x, int64_t k) {
// repack q4_0 data into q4x4x2 tensor
static void repack_q4_0_q4x4x2(ggml_tensor * t, const void * data, size_t size) {
if (opt_trace) {
itrace_start_section(g_itrace_cpu_profiler_handle, (std::string("ggml-hex-repack-q4_0-q4x4x2-") + t->name).c_str(), NULL);
}
int64_t nrows = ggml_nrows(t);
size_t row_size = ggml_row_size(t->type, t->ne[0]);
@ -751,6 +760,9 @@ static void repack_q4_0_q4x4x2(ggml_tensor * t, const void * data, size_t size)
ggml_aligned_free(buf_pd, row_size_pd);
ggml_aligned_free(buf_rp, row_size_rp);
if (opt_trace) {
itrace_end_section(g_itrace_cpu_profiler_handle, NULL);
}
}
// repack q4x4x2 tensor into q4_0 data
@ -2323,6 +2335,9 @@ static void hex_dump_dspbuf(const struct ggml_tensor * t, const dspqueue_buffer
}
static void ggml_hexagon_mul_mat(const struct ggml_tensor * op, uint32_t flags) {
if (opt_trace) {
itrace_start_section(g_itrace_cpu_profiler_handle, (std::string("ggml-hex-mul-mat-") + op->name).c_str(), NULL);
}
const struct ggml_tensor * src0 = op->src[0];
const struct ggml_tensor * src1 = op->src[1];
const struct ggml_tensor * dst = op;
@ -2390,6 +2405,10 @@ static void ggml_hexagon_mul_mat(const struct ggml_tensor * op, uint32_t flags)
(uint32_t) src1->ne[2], (uint32_t) src1->ne[3], dst->name, (uint32_t) dst->ne[0], (uint32_t) dst->ne[1],
(uint32_t) dst->ne[2], (uint32_t) dst->ne[3], sess->prof_usecs, sess->prof_cycles, sess->prof_pkts,
(float) sess->prof_cycles / sess->prof_pkts, (unsigned long long) t2 - t1);
if (opt_trace) {
itrace_end_section(g_itrace_cpu_profiler_handle, NULL);
}
}
static void ggml_hexagon_mul_mat_id(const struct ggml_tensor * op, uint32_t flags) {
@ -3429,6 +3448,12 @@ ggml_hexagon_registry::~ggml_hexagon_registry() {
auto sess = static_cast<ggml_hexagon_session *>(devices[i].context);
delete sess;
}
// Flush and close itrace logger if profiling was enabled
if (opt_trace) {
itrace_flush_logs(g_itrace_logger_handle);
itrace_close_logger(g_itrace_logger_handle);
}
}
static const char * ggml_backend_hexagon_reg_get_name(ggml_backend_reg_t reg) {
@ -3477,6 +3502,12 @@ static void ggml_hexagon_init(ggml_backend_reg * reg) {
opt_etm = getenv("GGML_HEXAGON_ETM") != nullptr;
opt_experimental = getenv("GGML_HEXAGON_EXPERIMENTAL") != nullptr;
// Initialize itrace if profiling is enabled
if (opt_trace) {
itrace_open_logger(CPU_DOMAIN_ID, &g_itrace_logger_handle);
itrace_open_profiler(g_itrace_logger_handle, CPU_DOMAIN_ID, 0, &g_itrace_cpu_profiler_handle);
}
const char * str_opmask = getenv("GGML_HEXAGON_OPMASK");
if (str_opmask != nullptr) {
opt_opmask = strtoul(str_opmask, NULL, 0);