From f1fa387c1ad9676a9bca8a696271b8c1635b9e25 Mon Sep 17 00:00:00 2001 From: chraac Date: Tue, 25 Nov 2025 00:41:07 +0800 Subject: [PATCH] add comment --- ggml/src/ggml-hexagon/ggml-hexagon.cpp | 44 +++++++++++++++----------- 1 file changed, 26 insertions(+), 18 deletions(-) diff --git a/ggml/src/ggml-hexagon/ggml-hexagon.cpp b/ggml/src/ggml-hexagon/ggml-hexagon.cpp index 54f99d634d..5c7837a478 100644 --- a/ggml/src/ggml-hexagon/ggml-hexagon.cpp +++ b/ggml/src/ggml-hexagon/ggml-hexagon.cpp @@ -2383,24 +2383,21 @@ template static void ggml_hexagon_binary(const struct ggm init_htp_tensor(&req.dst, dst); dspqueue_buffer bufs[3]; - // First buffer = First Operand of Binary op - // This is a buffer that the CPU writes and the DSP reads, so we'll - // need to flush CPU caches and invalidate DSP ones. On platforms - // with I/O coherency support the framework will automatically skip - // cache operations where possible. + + // Buffer 0 (src0): Weights (mulmat) or First Operand (binary op). + // If constant (e.g. weights), no cache management is needed. + // Otherwise (CPU writes, DSP reads), we flush CPU caches and invalidate DSP caches. + // Note: On platforms with I/O coherency, the framework skips cache ops automatically. dspqueue_buffers_init(bufs, src0, _IsSrc0Constant ? DSP_BUFFER_TYPE_CONSTANT : DSP_BUFFER_TYPE_CPU_WRITE_DSP_READ); - // Second buffer = Second Operand of Binary op - // This is a buffer that the CPU writes and the DSP reads, so we'll - // need to flush CPU caches and invalidate DSP ones. On platforms - // with I/O coherency support the framework will automatically skip - // cache operations where possible. + // Buffer 1 (src1): Input Activations (mulmat) or Second Operand (binary op). + // CPU writes, DSP reads: flush CPU caches and invalidate DSP caches. dspqueue_buffers_init(&bufs[1], src1, DSP_BUFFER_TYPE_CPU_WRITE_DSP_READ); - // Third buffer = Output Activations. We'll handle DSP - // cache maintenance in the response message but need to flush - // CPU caches to ensure any previously written dirty lines are - // written out before writes from the DSP start. + // Buffer 2 (dst): Output Activations. + // DSP writes, CPU reads. + // We flush CPU caches to ensure consistency before DSP writes. + // DSP cache maintenance is handled in the response message. dspqueue_buffers_init(&bufs[2], dst, DSP_BUFFER_TYPE_DSP_WRITE_CPU_READ); auto * sess = get_session_from_tensor(src0); @@ -2470,13 +2467,24 @@ template static void ggml_hexagon_binary_id(const struct init_htp_tensor(&req.dst, dst); dspqueue_buffer bufs[4]; - // First buffer = input activations + + // Buffer 0 (src0): Weights (mulmat) or Input Activations (other op). + // If constant, no cache management is needed. + // Otherwise (CPU writes, DSP reads), we flush CPU caches and invalidate DSP caches. dspqueue_buffers_init(bufs, src0, _IsSrc0Constant ? DSP_BUFFER_TYPE_CONSTANT : DSP_BUFFER_TYPE_CPU_WRITE_DSP_READ); - // Second buffer = experts bias + + // Buffer 1 (src1): Input Activations (mulmat) or Experts Bias (other op). + // CPU writes, DSP reads: flush CPU caches and invalidate DSP caches. dspqueue_buffers_init(&bufs[1], src1, DSP_BUFFER_TYPE_CPU_WRITE_DSP_READ); - // Third buffer = activated experts + + // Buffer 2 (src2): Expert IDs (mulmat) or Activated Experts (other op). + // CPU writes, DSP reads: flush CPU caches and invalidate DSP caches. dspqueue_buffers_init(&bufs[2], src2, DSP_BUFFER_TYPE_CPU_WRITE_DSP_READ); - // Forth buffer = output activations + + // Buffer 3 (dst): Output Activations. + // DSP writes, CPU reads. + // We flush CPU caches to ensure consistency before DSP writes. + // DSP cache maintenance is handled in the response message. dspqueue_buffers_init(&bufs[3], dst, DSP_BUFFER_TYPE_DSP_WRITE_CPU_READ); auto * sess = get_session_from_tensor(src0);