chore: reformat code with clang-formatter to pass cli test

2025-12-16 08:41:54 -05:00 · 2025-12-16 08:41:54 -05:00 · 952877ec24
parent 05693357c8
commit 952877ec24
6 changed files with 221 additions and 260 deletions
--- a/ggml/src/ggml-hexagon/ggml-hexagon.cpp
+++ b/ggml/src/ggml-hexagon/ggml-hexagon.cpp
@ -8,8 +8,8 @@
 #include <atomic>
 #include <chrono>
 #include <mutex>
-#include <string>
 #include <stdexcept>
+#include <string>

 #ifdef _WIN32
 #    include <sal.h>
@ -53,10 +53,12 @@ static int opt_opmask = HTP_OPMASK_QUEUE | HTP_OPMASK_QUANTIZE | HTP_OPMASK_COMP
 static int opt_opsync = 0;  // synchronous ops

 #define HEX_VERBOSE(...) \
-    if (opt_verbose) GGML_LOG_DEBUG(__VA_ARGS__)
+    if (opt_verbose)     \
+    GGML_LOG_DEBUG(__VA_ARGS__)

 #define HEX_PROFILE(...) \
-    if (opt_profile) GGML_LOG_INFO(__VA_ARGS__)
+    if (opt_profile)     \
+    GGML_LOG_INFO(__VA_ARGS__)

 static inline uint64_t hex_is_aligned(void * addr, uint32_t align) {
    return ((size_t) addr & (align - 1)) == 0;
@ -258,7 +260,10 @@ static inline void hex_print_op_info(const ggml_tensor * op, ggml_hexagon_sessio
                names, dims, types, strides, buffs, req_flags);
 }

-void ggml_hexagon_session::enqueue(struct htp_general_req &req, struct dspqueue_buffer *bufs, uint32_t n_bufs, bool sync) {
+void ggml_hexagon_session::enqueue(struct htp_general_req & req,
+                                   struct dspqueue_buffer * bufs,
+                                   uint32_t                 n_bufs,
+                                   bool                     sync) {
    // Bump pending flag (cleared in the session::flush once we get the responce)
    this->op_pending++;  // atomic inc

@ -354,8 +359,8 @@ struct ggml_backend_hexagon_buffer_context {

        int err = fastrpc_mmap(s->domain_id, this->fd, (void *) this->base, 0, this->size, FASTRPC_MAP_FD);
        if (err != 0) {
-            GGML_LOG_ERROR("ggml-hex: buffer mapping failed : domain_id %d size %zu fd %d error 0x%08x\n",
-                    s->domain_id, this->size, this->fd, (unsigned) err);
+            GGML_LOG_ERROR("ggml-hex: buffer mapping failed : domain_id %d size %zu fd %d error 0x%08x\n", s->domain_id,
+                           this->size, this->fd, (unsigned) err);
            return false;
        }

@ -386,10 +391,12 @@ struct ggml_backend_hexagon_buffer_context {
        size += 4 * 1024;  // extra page for padding

        if (rpcmem_alloc2) {
-            this->base = (uint8_t *) rpcmem_alloc2(RPCMEM_HEAP_ID_SYSTEM, RPCMEM_DEFAULT_FLAGS | RPCMEM_HEAP_NOREG, size);
+            this->base =
+                (uint8_t *) rpcmem_alloc2(RPCMEM_HEAP_ID_SYSTEM, RPCMEM_DEFAULT_FLAGS | RPCMEM_HEAP_NOREG, size);
        } else {
            GGML_LOG_INFO("ggml-hex: %s rpcmem_alloc2 not found, falling back to rpcmem_alloc\n", sess->name.c_str());
-            this->base = (uint8_t *) rpcmem_alloc(RPCMEM_HEAP_ID_SYSTEM, RPCMEM_DEFAULT_FLAGS | RPCMEM_HEAP_NOREG, size);
+            this->base =
+                (uint8_t *) rpcmem_alloc(RPCMEM_HEAP_ID_SYSTEM, RPCMEM_DEFAULT_FLAGS | RPCMEM_HEAP_NOREG, size);
        }

        if (!this->base) {
@ -1592,25 +1599,28 @@ static const char * ggml_backend_hexagon_buffer_type_name(ggml_backend_buffer_ty
    return static_cast<ggml_backend_hexagon_buffer_type_context *>(buffer_type->context)->name.c_str();
 }

-static ggml_backend_buffer_t ggml_backend_hexagon_buffer_type_alloc_buffer(
-            ggml_backend_buffer_type_t buffer_type, size_t size) {
+static ggml_backend_buffer_t ggml_backend_hexagon_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buffer_type,
+                                                                           size_t                     size) {
    auto sess = static_cast<ggml_backend_hexagon_buffer_type_context *>(buffer_type->context)->sess;
    try {
-        ggml_backend_hexagon_buffer_context * ctx = new ggml_backend_hexagon_buffer_context(sess, size, false /*repack*/);
+        ggml_backend_hexagon_buffer_context * ctx =
+            new ggml_backend_hexagon_buffer_context(sess, size, false /*repack*/);
        return ggml_backend_buffer_init(buffer_type, ggml_backend_hexagon_buffer_interface, ctx, size);
-    } catch (std::exception const &exc) {
+    } catch (const std::exception & exc) {
        GGML_LOG_ERROR("ggml-hex: %s failed to allocate buffer context: %s\n", sess->name.c_str(), exc.what());
        return nullptr;
    }
 }

 static ggml_backend_buffer_t ggml_backend_hexagon_repack_buffer_type_alloc_buffer(
-            ggml_backend_buffer_type_t buffer_type, size_t size) {
+    ggml_backend_buffer_type_t buffer_type,
+    size_t                     size) {
    auto sess = static_cast<ggml_backend_hexagon_buffer_type_context *>(buffer_type->context)->sess;
    try {
-        ggml_backend_hexagon_buffer_context * ctx = new ggml_backend_hexagon_buffer_context(sess, size, true /*repack*/);
+        ggml_backend_hexagon_buffer_context * ctx =
+            new ggml_backend_hexagon_buffer_context(sess, size, true /*repack*/);
        return ggml_backend_buffer_init(buffer_type, ggml_backend_hexagon_buffer_interface, ctx, size);
-    } catch (std::exception const &exc) {
+    } catch (const std::exception & exc) {
        GGML_LOG_ERROR("ggml-hex: %s failed to allocate buffer context: %s\n", sess->name.c_str(), exc.what());
        return nullptr;
    }
@ -1621,7 +1631,8 @@ static size_t ggml_backend_hexagon_buffer_type_get_alignment(ggml_backend_buffer
    GGML_UNUSED(buffer_type);
 }

-static size_t ggml_backend_hexagon_buffer_type_get_alloc_size(ggml_backend_buffer_type_t buft, const struct ggml_tensor * t) {
+static size_t ggml_backend_hexagon_buffer_type_get_alloc_size(ggml_backend_buffer_type_t buft,
+                                                              const struct ggml_tensor * t) {
    return ggml_nbytes(t);
 }

@ -1707,7 +1718,8 @@ void ggml_hexagon_session::allocate(int dev_id) noexcept(false) {
    char session_uri[256];
    {
        char htp_uri[256];
-        snprintf(htp_uri, sizeof(htp_uri), "file:///libggml-htp-v%u.so?htp_iface_skel_handle_invoke&_modver=1.0", opt_arch);
+        snprintf(htp_uri, sizeof(htp_uri), "file:///libggml-htp-v%u.so?htp_iface_skel_handle_invoke&_modver=1.0",
+                 opt_arch);

        struct remote_rpc_get_uri u = {};
        u.session_id                = this->session_id;
@ -1725,7 +1737,9 @@ void ggml_hexagon_session::allocate(int dev_id) noexcept(false) {

            snprintf(session_uri, htp_URI_domain_len, "%s%s", htp_uri, my_domain->uri);

-            GGML_LOG_WARN("ggml-hex: failed to get URI for session %d : error 0x%x. Falling back to single session URI: %s\n", dev_id, err, session_uri);
+            GGML_LOG_WARN(
+                "ggml-hex: failed to get URI for session %d : error 0x%x. Falling back to single session URI: %s\n",
+                dev_id, err, session_uri);
        }
    }

@ -1852,7 +1866,7 @@ ggml_hexagon_session::ggml_hexagon_session(int dev_id, ggml_backend_dev_t dev) n

        repack_buffer_type.iface   = ggml_backend_hexagon_repack_buffer_type_interface;
        repack_buffer_type.context = new ggml_backend_hexagon_buffer_type_context(this->name + "-REPACK", this);
-    } catch (std::exception const &exc) {
+    } catch (const std::exception & exc) {
        release();
        throw;
    }
@ -2670,8 +2684,7 @@ static void ggml_hexagon_unary(const struct ggml_tensor * op, uint32_t flags) {
            if (ggml_get_unary_op(dst) == GGML_UNARY_OP_SILU) {
                req.op    = HTP_OP_UNARY_SILU;
                supported = true;
-            }
-            else if (ggml_get_unary_op(dst) == GGML_UNARY_OP_GELU){
+            } else if (ggml_get_unary_op(dst) == GGML_UNARY_OP_GELU) {
                req.op    = HTP_OP_UNARY_GELU;
                supported = true;
            }
@ -2902,8 +2915,7 @@ static inline bool op_reuse_src1(const ggml_tensor * op1, const ggml_tensor * op
    return (op0 && op0->src[1] == op1->src[1]);
 }

-static inline bool is_compute_op(ggml_tensor *node)
-{
+static inline bool is_compute_op(ggml_tensor * node) {
    return !(ggml_op_is_empty(node->op) || ggml_is_empty(node));
 }

@ -3013,29 +3025,17 @@ struct node_info {

    std::vector<ggml_tensor *> fused;

-    ggml_op op() const {
-        return node->op;
-    }
+    ggml_op op() const { return node->op; }

-    const ggml_tensor * dst() const {
-        return fused.empty() ? node : fused.back();
-    }
+    const ggml_tensor * dst() const { return fused.empty() ? node : fused.back(); }

-    const ggml_tensor * src0() const {
-        return node->src[0];
-    }
+    const ggml_tensor * src0() const { return node->src[0]; }

-    const ggml_tensor * src1() const {
-        return node->src[1];
-    }
+    const ggml_tensor * src1() const { return node->src[1]; }

-    bool is_empty() const {
-        return ggml_op_is_empty(node->op);
-    }
+    bool is_empty() const { return ggml_op_is_empty(node->op); }

-    void add_fused(ggml_tensor * t) {
-        fused.push_back(t);
-    }
+    void add_fused(ggml_tensor * t) { fused.push_back(t); }

    bool stackable() const {
        switch (this->op()) {
@ -3047,9 +3047,7 @@ struct node_info {
        }
    }

-    bool same_input(const node_info& n) const {
-        return n.src1() == this->src1();
-    }
+    bool same_input(const node_info & n) const { return n.src1() == this->src1(); }
 };

 static std::vector<int> ggml_hexagon_graph_optimize_reorder(const std::vector<node_info> & nodes) {
@ -3120,19 +3118,15 @@ static void ggml_backend_hexagon_graph_optimize(ggml_backend_t backend, ggml_cgr

        // fuse only ops that start with these operations
        // can be expanded when needed
-        if (node.op() == GGML_OP_ADD ||
-            node.op() == GGML_OP_NORM ||
-            node.op() == GGML_OP_RMS_NORM) {
+        if (node.op() == GGML_OP_ADD || node.op() == GGML_OP_NORM || node.op() == GGML_OP_RMS_NORM) {
            ops[0] = node.op();

            int f = i + 1;
            while (f < n && f < i + MAX_FUSE) {
                // conservatively allow fusing only these ops
                // can be expanded when needed
-                if (gf->nodes[f]->op != GGML_OP_ADD &&
-                    gf->nodes[f]->op != GGML_OP_MUL &&
-                    gf->nodes[f]->op != GGML_OP_NORM &&
-                    gf->nodes[f]->op != GGML_OP_RMS_NORM) {
+                if (gf->nodes[f]->op != GGML_OP_ADD && gf->nodes[f]->op != GGML_OP_MUL &&
+                    gf->nodes[f]->op != GGML_OP_NORM && gf->nodes[f]->op != GGML_OP_RMS_NORM) {
                    break;
                }
                ops[f - i] = gf->nodes[f]->op;
@ -3308,8 +3302,7 @@ static bool ggml_backend_hexagon_device_supports_op(ggml_backend_dev_t dev, cons
        case GGML_OP_UNARY:
            if (ggml_get_unary_op(op) == GGML_UNARY_OP_SILU) {
                supp = ggml_hexagon_supported_activations(sess, op);
-            }
-            else if (ggml_get_unary_op(op) == GGML_UNARY_OP_GELU){
+            } else if (ggml_get_unary_op(op) == GGML_UNARY_OP_GELU) {
                supp = ggml_hexagon_supported_activations(sess, op);
            }
            break;
@ -3429,7 +3422,7 @@ ggml_hexagon_registry::ggml_hexagon_registry(ggml_backend_reg_t reg) {
        devices[i].reg   = reg;
        try {
            devices[i].context = new ggml_hexagon_session(i, &devices[i]);
-        } catch (std::exception const &exc) {
+        } catch (const std::exception & exc) {
            GGML_LOG_ERROR("ggml-hex: failed to create device/session %zu\n", i);
            devices[i].context = nullptr;
        }
--- a/ggml/src/ggml-hexagon/htp/act-ops.c
+++ b/ggml/src/ggml-hexagon/htp/act-ops.c
@ -255,7 +255,6 @@ static void glu_swiglu_oai_fp32_per_thread(const struct htp_tensor * src0,
         src1->ne[3], dst->ne[0], dst->ne[1], dst->ne[2], dst->ne[3], (unsigned) HAP_perf_qtimer_count_to_us(t2 - t1));
 }

-
 static void unary_gelu_fp32_per_thread(const struct htp_tensor * src0,
                                       struct htp_tensor *       dst,
                                       const int32_t *           op_params,
@ -318,8 +317,7 @@ static void unary_gelu_fp32_per_thread(const struct htp_tensor * src0,
                hvx_mul_scalar_f32((const uint8_t *) src0, (float) 1.702, (uint8_t *) src0_spad_data, ne0);
                hvx_fast_sigmoid_f32((const uint8_t *) src0_spad_data, (uint8_t *) src0_spad_data, ne0);
                hvx_mul_f32_opt((const uint8_t *) src0, src0_spad_data, (uint8_t *) dst, ne0);
-            } 
-            else {
+            } else {
                hvx_mul_scalar_f32((const uint8_t *) src0, (float) 1.702, (uint8_t *) src0_spad_data, ne0);
                hvx_sigmoid_f32((const uint8_t *) src0_spad_data, (uint8_t *) src0_spad_data, ne0);
                hvx_mul_f32((const uint8_t *) src0, src0_spad_data, (uint8_t *) dst, ne0);
@ -339,8 +337,6 @@ static void unary_gelu_fp32(unsigned int n, unsigned int i, void * data) {
                               octx->src0_nrows_per_thread);
 }

-
-
 static void unary_silu_fp32_per_thread(const struct htp_tensor * src0,
                                       struct htp_tensor *       dst,
                                       const int32_t *           op_params,
--- a/ggml/src/ggml-hexagon/htp/hvx-utils.c
+++ b/ggml/src/ggml-hexagon/htp/hvx-utils.c
@ -49,7 +49,6 @@ void hvx_mul_f32(const uint8_t * restrict src0,
        FARF(HIGH, "hvx_mul_f32: unaligned loop in hvx op, possibly slower execution\n");
    }

-
    bool handled_leftover = false;
    if (0 == unaligned_loop) {
        HVX_Vector * restrict vec_in1 = (HVX_Vector *) src0;
@ -65,12 +64,10 @@ void hvx_mul_f32(const uint8_t * restrict src0,
        int step_of_1     = num_elems_whole >> 5;  // divby 32, because 32 float = 128 bytes per HVX vector
        int leftover_size = left_over * sizeof(float);

-
        HVX_Vector * restrict vec_in1  = (HVX_Vector *) src0;
        HVX_Vector * restrict vec_in2  = (HVX_Vector *) src1;
        HVX_UVector * restrict vec_out = (HVX_UVector *) dst;

-
        HVX_Vector slinep;
        HVX_Vector slinec;
        HVX_Vector sline;
@ -102,15 +99,10 @@ void hvx_mul_f32(const uint8_t * restrict src0,
            sline2p                        = sline2c;
        }
        if (left_over > 0) {
-
-            slinec = (is_in_one_chunk(vec_in1, leftover_size, VLEN)
-                    ? slinep
-                    : *vec_in1++);
+            slinec = (is_in_one_chunk(vec_in1, leftover_size, VLEN) ? slinep : *vec_in1++);

            sline   = Q6_V_valign_VVR(slinec, slinep, (size_t) src0);
-            sline2c = (is_in_one_chunk(vec_in2, leftover_size, VLEN)
-                    ? sline2p
-                    : *vec_in2++);
+            sline2c = (is_in_one_chunk(vec_in2, leftover_size, VLEN) ? sline2p : *vec_in2++);
            sline2  = Q6_V_valign_VVR(sline2c, sline2p, (size_t) src1);

            HVX_Vector out = Q6_Vqf32_vmpy_VsfVsf(sline, sline2);
@ -119,7 +111,6 @@ void hvx_mul_f32(const uint8_t * restrict src0,
        }
    }

-
    if (left_over > 0 && !handled_leftover) {
        const float * src0f = (const float *) src0 + num_elems_whole;
        const float * src1f = (const float *) src1 + num_elems_whole;
@ -527,12 +518,9 @@ void hvx_mul_scalar_f32(const uint8_t * restrict src, const float val, uint8_t *
        int step_of_1     = num_elems >> 5;  // divby 32, because 32 float = 128 bytes per HVX vector
        int leftover_size = left_over * sizeof(float);

-
-
        HVX_Vector *  input_v_ptr  = (HVX_Vector *) src;
        HVX_UVector * output_v_ptr = (HVX_UVector *) dst;

-
        HVX_Vector slinep;
        HVX_Vector slinec;
        HVX_Vector sline;
@ -549,7 +537,6 @@ void hvx_mul_scalar_f32(const uint8_t * restrict src, const float val, uint8_t *
        }

        if (step_of_1 > 0) {
-
            slinec = htp_is_aligned(input_v_ptr, VLEN) && left_over == 0 ? slinep : *input_v_ptr++;
            sline  = Q6_V_valign_VVR(slinec, slinep, (size_t) src);
            *((HVX_UVector *) (output_v_ptr++)) = Q6_Vsf_equals_Vqf32(Q6_Vqf32_vmpy_VsfVsf(sline, val_vec));
@ -558,9 +545,7 @@ void hvx_mul_scalar_f32(const uint8_t * restrict src, const float val, uint8_t *
        }

        if (leftover_size > 0) {
-            slinec = (is_in_one_chunk(input_v_ptr, leftover_size, VLEN)
-                    ? slinep
-                    : *input_v_ptr++);
+            slinec = (is_in_one_chunk(input_v_ptr, leftover_size, VLEN) ? slinep : *input_v_ptr++);

            sline = Q6_V_valign_VVR(slinec, slinep, (size_t) src);

--- a/ggml/src/ggml-hexagon/htp/hvx-utils.h
+++ b/ggml/src/ggml-hexagon/htp/hvx-utils.h
@ -23,9 +23,8 @@ typedef union {

 /* Q6_Vsf_equals_Vw is only available on v73+.*/
 #if __HVX_ARCH__ < 73
-static inline HVX_Vector int32_to_qfloat(HVX_Vector const in)
-{
-    HVX_Vector const vzero = Q6_V_vzero();
+static inline HVX_Vector int32_to_qfloat(const HVX_Vector in) {
+    const HVX_Vector vzero      = Q6_V_vzero();
    HVX_VectorPred   is_zero    = Q6_Q_vcmp_eq_VwVw(in, vzero);
    HVX_Vector       lshift     = Q6_Vw_vnormamt_Vw(in);
    HVX_Vector       normalized = Q6_Vw_vasl_VwVw(in, lshift);
@ -35,8 +34,7 @@ static inline HVX_Vector int32_to_qfloat(HVX_Vector const in)
    return ret;
 }

-static inline HVX_Vector Q6_Vsf_equals_Vw(HVX_Vector const in)
-{
+static inline HVX_Vector Q6_Vsf_equals_Vw(const HVX_Vector in) {
    return Q6_Vsf_equals_Vqf32(int32_to_qfloat(in));
 }
 #endif
@ -265,7 +263,6 @@ static inline void hvx_bcast_fp32_a(uint8_t * restrict dst, float elem, uint32_t
    }
 }

-
 /* Return whether 'n' elements from vector are in the one chunk of 'chunk_size'. */
 static __attribute__((always_inline)) int32_t is_in_one_chunk(void * addr, uint32_t n, uint32_t chunk_size) {
    uint32_t left_off  = (size_t) addr & (chunk_size - 1);
@ -273,8 +270,6 @@ static __attribute__((always_inline)) int32_t is_in_one_chunk(void * addr, uint3
    return right_off <= chunk_size;
 }

-
-
 static void hvx_vec_dump_fp16_n(char * pref, HVX_Vector v, uint32_t n) {
    HVX_VectorAlias u = { .v = v };

@ -998,7 +993,6 @@ static inline void hvx_fast_sigmoid_f32(const uint8_t * restrict src, uint8_t *
    }
 }

-
 static inline void hvx_sigmoid_f32(const uint8_t * restrict src, uint8_t * restrict dst, const int num_elems) {
    int step_of_1 = num_elems >> 5;  // divby 32, because 32 float = 128 bytes per HVX vector
    int leftover  = num_elems - (step_of_1 * VLEN_FP32);
@ -1018,12 +1012,10 @@ static inline void hvx_sigmoid_f32(const uint8_t * restrict src, uint8_t * restr
    HVX_Vector *  input_v_ptr  = (HVX_Vector *) input;
    HVX_UVector * output_v_ptr = (HVX_UVector *) output;

-
    HVX_Vector slinep;
    HVX_Vector slinec;
    HVX_Vector sline;

-
    slinep = *input_v_ptr++;
 #pragma unroll(4)
    for (uint32_t i = step_of_1 - 1; i > 0; i--) {
@ -1035,28 +1027,23 @@ static inline void hvx_sigmoid_f32(const uint8_t * restrict src, uint8_t * restr
    }

    if (step_of_1 > 0) {
-
        slinec = htp_is_aligned(input_v_ptr, 128) && leftover == 0 ? slinep : *input_v_ptr++;
        sline  = Q6_V_valign_VVR(slinec, slinep, (size_t) input);
-        *((HVX_UVector *)(output_v_ptr++)) =  hvx_vec_fast_sigmoid_fp32_guard(sline, one, max_exp, min_exp);;
+        *((HVX_UVector *) (output_v_ptr++)) = hvx_vec_fast_sigmoid_fp32_guard(sline, one, max_exp, min_exp);
+        ;

        slinep = slinec;
    }
    if (leftover > 0) {
-        slinec = (is_in_one_chunk(input_v_ptr, leftover_size, 128)
-                   ? slinep
-                   : *input_v_ptr++);
+        slinec = (is_in_one_chunk(input_v_ptr, leftover_size, 128) ? slinep : *input_v_ptr++);

        sline = Q6_V_valign_VVR(slinec, slinep, (size_t) input);

        HVX_Vector sout = hvx_vec_fast_sigmoid_fp32_guard(sline, one, max_exp, min_exp);
        hvx_vec_store_u(output_v_ptr, leftover_size, sout);
    }
-
-  
 }

-
 float hvx_sum_of_squares_f32(const uint8_t * restrict src, const int num_elems);
 void  hvx_mul_f32(const uint8_t * restrict src0,
                  const uint8_t * restrict src1,