From 072f3d858a3d70f41b069b47ddf032bc86eb7eed Mon Sep 17 00:00:00 2001 From: Rohan Date: Wed, 24 Sep 2025 19:35:06 +0000 Subject: [PATCH 1/4] Test for nrc=2 as well | i8mm kernels --- tests/test-quantize-fns.cpp | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/tests/test-quantize-fns.cpp b/tests/test-quantize-fns.cpp index 037c0582bb..392ee64991 100644 --- a/tests/test-quantize-fns.cpp +++ b/tests/test-quantize-fns.cpp @@ -79,7 +79,7 @@ static float dot_product(const float * a1, const float * a2, size_t test_size) { } // Total dot product error -static float dot_product_error(const ggml_type_traits * qfns, const ggml_type_traits_cpu * qfns_cpu, size_t test_size, const float * test_data1, const float * test_data2) { +static float dot_product_error(const ggml_type_traits * qfns, const ggml_type_traits_cpu * qfns_cpu, size_t test_size, const float * test_data1, const float * test_data2, const int nrc) { GGML_UNUSED(qfns); std::vector tmp_q1(2*test_size); @@ -91,7 +91,7 @@ static float dot_product_error(const ggml_type_traits * qfns, const ggml_type_tr vdot->from_float(test_data2, tmp_q2.data(), test_size); float result = INFINITY; - qfns_cpu->vec_dot(test_size, &result, 0, tmp_q1.data(), 0, tmp_q2.data(), 0, 1); + qfns_cpu->vec_dot(test_size, &result, 0, tmp_q1.data(), 0, tmp_q2.data(), 0, nrc); const float dot_ref = dot_product(test_data1, test_data2, test_size); @@ -163,7 +163,7 @@ int main(int argc, char * argv[]) { printf("%5s reference implementation error: %s (%f)\n", ggml_type_name(type), RESULT_STR[failed], reference_error); } - const float vec_dot_error = dot_product_error(qfns, qfns_cpu, test_size, test_data.data(), test_data2.data()); + const float vec_dot_error = dot_product_error(qfns, qfns_cpu, test_size, test_data.data(), test_data2.data(), 1); const float max_allowed_error = type == GGML_TYPE_Q2_K || type == GGML_TYPE_IQ2_XS || type == GGML_TYPE_IQ2_XXS || type == GGML_TYPE_IQ3_XXS || type == GGML_TYPE_IQ3_S || type == GGML_TYPE_IQ2_S ? MAX_DOT_PRODUCT_ERROR_LOWBIT @@ -175,6 +175,17 @@ int main(int argc, char * argv[]) { if (failed || verbose) { printf("%5s dot product error: %s (%f)\n", ggml_type_name(type), RESULT_STR[failed], vec_dot_error); } + + // Test i8mm path (nrc=2) for supported types + if (type == GGML_TYPE_Q4_0 || type == GGML_TYPE_Q4_1 || type == GGML_TYPE_Q8_0 || + type == GGML_TYPE_Q4_K || type == GGML_TYPE_Q6_K) { + const float vec_dot_error_i8mm = dot_product_error(qfns, qfns_cpu, test_size, test_data.data(), test_data2.data(), 2); + failed = !(vec_dot_error_i8mm < max_allowed_error); + num_failed += failed; + if (failed || verbose) { + printf("%5s dot product error (i8mm): %s (%f)\n", ggml_type_name(type), RESULT_STR[failed], vec_dot_error_i8mm); + } + } } } From e592bb06a2fb14e26bf04fee1d70de6c262cc6ce Mon Sep 17 00:00:00 2001 From: Rohan Date: Wed, 24 Sep 2025 19:38:04 +0000 Subject: [PATCH 2/4] Trigger only on supported HW --- tests/test-quantize-fns.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/test-quantize-fns.cpp b/tests/test-quantize-fns.cpp index 392ee64991..03674cb91c 100644 --- a/tests/test-quantize-fns.cpp +++ b/tests/test-quantize-fns.cpp @@ -179,12 +179,18 @@ int main(int argc, char * argv[]) { // Test i8mm path (nrc=2) for supported types if (type == GGML_TYPE_Q4_0 || type == GGML_TYPE_Q4_1 || type == GGML_TYPE_Q8_0 || type == GGML_TYPE_Q4_K || type == GGML_TYPE_Q6_K) { +#if defined(__ARM_FEATURE_MATMUL_INT8) const float vec_dot_error_i8mm = dot_product_error(qfns, qfns_cpu, test_size, test_data.data(), test_data2.data(), 2); failed = !(vec_dot_error_i8mm < max_allowed_error); num_failed += failed; if (failed || verbose) { printf("%5s dot product error (i8mm): %s (%f)\n", ggml_type_name(type), RESULT_STR[failed], vec_dot_error_i8mm); } +#else + if (verbose) { + printf("%5s dot product (i8mm): SKIPPED (not supported)\n", ggml_type_name(type)); + } +#endif } } } From 10955f24d1dfee7b20925690e37aa9a4cdd5ec3e Mon Sep 17 00:00:00 2001 From: Rohan Date: Thu, 25 Sep 2025 15:08:20 +0000 Subject: [PATCH 3/4] Remove trailing whitespace --- tests/test-quantize-fns.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test-quantize-fns.cpp b/tests/test-quantize-fns.cpp index 03674cb91c..521fabade1 100644 --- a/tests/test-quantize-fns.cpp +++ b/tests/test-quantize-fns.cpp @@ -177,7 +177,7 @@ int main(int argc, char * argv[]) { } // Test i8mm path (nrc=2) for supported types - if (type == GGML_TYPE_Q4_0 || type == GGML_TYPE_Q4_1 || type == GGML_TYPE_Q8_0 || + if (type == GGML_TYPE_Q4_0 || type == GGML_TYPE_Q4_1 || type == GGML_TYPE_Q8_0 || type == GGML_TYPE_Q4_K || type == GGML_TYPE_Q6_K) { #if defined(__ARM_FEATURE_MATMUL_INT8) const float vec_dot_error_i8mm = dot_product_error(qfns, qfns_cpu, test_size, test_data.data(), test_data2.data(), 2); From e211822fcb88423264ec2f00900f8e81bf20adbf Mon Sep 17 00:00:00 2001 From: Rohan Date: Mon, 29 Sep 2025 16:53:39 +0000 Subject: [PATCH 4/4] Address review comment --- tests/test-quantize-fns.cpp | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/tests/test-quantize-fns.cpp b/tests/test-quantize-fns.cpp index 521fabade1..c2a4f4db43 100644 --- a/tests/test-quantize-fns.cpp +++ b/tests/test-quantize-fns.cpp @@ -176,21 +176,14 @@ int main(int argc, char * argv[]) { printf("%5s dot product error: %s (%f)\n", ggml_type_name(type), RESULT_STR[failed], vec_dot_error); } - // Test i8mm path (nrc=2) for supported types - if (type == GGML_TYPE_Q4_0 || type == GGML_TYPE_Q4_1 || type == GGML_TYPE_Q8_0 || - type == GGML_TYPE_Q4_K || type == GGML_TYPE_Q6_K) { -#if defined(__ARM_FEATURE_MATMUL_INT8) - const float vec_dot_error_i8mm = dot_product_error(qfns, qfns_cpu, test_size, test_data.data(), test_data2.data(), 2); - failed = !(vec_dot_error_i8mm < max_allowed_error); + // Test nrc=2 path for types that support it + if (qfns_cpu->nrows == 2) { + const float vec_dot_error_nrc2 = dot_product_error(qfns, qfns_cpu, test_size, test_data.data(), test_data2.data(), 2); + failed = !(vec_dot_error_nrc2 < max_allowed_error); num_failed += failed; if (failed || verbose) { - printf("%5s dot product error (i8mm): %s (%f)\n", ggml_type_name(type), RESULT_STR[failed], vec_dot_error_i8mm); + printf("%5s dot product error (nrc=2): %s (%f)\n", ggml_type_name(type), RESULT_STR[failed], vec_dot_error_nrc2); } -#else - if (verbose) { - printf("%5s dot product (i8mm): SKIPPED (not supported)\n", ggml_type_name(type)); - } -#endif } } }