From 2c96a5cd8be58dc7a59995b1dd7230342e2e321a Mon Sep 17 00:00:00 2001
From: "zhou.weiguo" <zhouwg2000@gmail.com>
Date: Mon, 10 Jun 2024 11:43:03 +0800
Subject: [PATCH] examples: refine tensor dump

---
 examples/benchmark/benchmark-matmult.cpp | 53 +++++++++++++++++++++++-
 1 file changed, 51 insertions(+), 2 deletions(-)

diff --git a/examples/benchmark/benchmark-matmult.cpp b/examples/benchmark/benchmark-matmult.cpp
index 47cb16c69d..7446568eef 100644
--- a/examples/benchmark/benchmark-matmult.cpp
+++ b/examples/benchmark/benchmark-matmult.cpp
@@ -31,15 +31,64 @@ static void ggml_graph_compute_helper(std::vector<uint8_t> & buf, ggml_cgraph *
     ggml_graph_compute(graph, &plan);
 }
 
+#define QK8_0 32
+
+typedef struct {
+    uint16_t d;       // delta
+    int8_t qs[QK8_0]; // quants
+} block_q8_0;
+
+static inline float ggml_compute_fp16_to_fp32(uint16_t h) {
+    uint16_t tmp;
+    memcpy(&tmp, &h, sizeof(uint16_t));
+    return (float) tmp;
+}
+
 static float tensor_sum_elements(const ggml_tensor * tensor) {
-    double sum = 0;
+    double sum                  = 0;
+    float  floatvalue           = 0;
+    unsigned short shortvalue   = 0;
+
     if (tensor->type == GGML_TYPE_F32) {
         for (int j = 0; j < tensor->ne[1]; j++) {
             for (int k = 0; k < tensor->ne[0]; k++) {
-                sum += ((float *) tensor->data)[j*tensor->ne[0] + k];
+                sum += ((float *) tensor->data)[j * tensor->ne[0] + k];
             }
         }
     }
+
+    if (tensor->type == GGML_TYPE_I8) {
+        for (int j = 0; j < tensor->ne[1]; j++) {
+            for (int k = 0; k < tensor->ne[0]; k++) {
+                sum += ((int8_t *) tensor->data)[j * tensor->ne[0] + k];
+            }
+        }
+    }
+
+    if (tensor->type == GGML_TYPE_F16) {
+        for (int j = 0; j < tensor->ne[1]; j++) {
+            for (int k = 0; k < tensor->ne[0]; k++) {
+                shortvalue = ((unsigned short *) tensor->data)[j * tensor->ne[0] + k];
+                floatvalue = ggml_compute_fp16_to_fp32(shortvalue);
+                sum        += floatvalue;
+            }
+        }
+    }
+
+    if (tensor->type == GGML_TYPE_Q8_0) {
+        int blocks = 0;
+        block_q8_0 * quant_datas = (block_q8_0 *)tensor->data;
+        for (int j = 0; j < tensor->ne[1]; j++) {
+            blocks = tensor->ne[0] / QK8_0;
+            for (int i = 0; i < blocks; i++) {
+                floatvalue = ggml_compute_fp16_to_fp32(quant_datas[j * blocks + i].d);
+                for (int k = 0; k < QK8_0; k++) {
+                    sum += (quant_datas[j * blocks + i].qs[k] * floatvalue);
+                }
+            }
+        }
+    }
+
     return sum;
 }