From d216b62435cd21b36db74f86784466d1a7a6d746 Mon Sep 17 00:00:00 2001
From: Aaron Teo <aaron.teo1@ibm.com>
Date: Sun, 21 Dec 2025 16:01:01 +0800
Subject: [PATCH] ggml-blas: refactor min_batch to graph_compute

Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>
---
 ggml/src/ggml-blas/ggml-blas.cpp | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/ggml/src/ggml-blas/ggml-blas.cpp b/ggml/src/ggml-blas/ggml-blas.cpp
index 85df7cd140..69950f2bbd 100644
--- a/ggml/src/ggml-blas/ggml-blas.cpp
+++ b/ggml/src/ggml-blas/ggml-blas.cpp
@@ -326,6 +326,19 @@ static ggml_status ggml_backend_blas_graph_compute(
         switch (node->op) {
             case GGML_OP_MUL_MAT:
                 {
+                    const ggml_tensor * src1 = node->src[1];
+
+                    const int64_t ne10 = src1->ne[0];
+                    const int64_t ne0  = node->ne[0];
+                    const int64_t ne1  = node->ne[1];
+
+                    // TODO: find the optimal value
+                    const int64_t min_batch = 32;
+
+                    if (ne0 <= min_batch && ne1 <= min_batch && ne10 <= min_batch) {
+                        return GGML_STATUS_FAILED;
+                    }
+
                     ggml_blas_compute_forward_mul_mat(ctx, node);
                 } break;
             case GGML_OP_OUT_PROD:
@@ -495,20 +508,12 @@ static bool ggml_backend_blas_device_supports_op(ggml_backend_dev_t dev, const g
     switch (dst->op) {
         case GGML_OP_MUL_MAT:
         {
-            const int64_t ne10 = src1->ne[0];
-            const int64_t ne0  = dst->ne[0];
-            const int64_t ne1  = dst->ne[1];
-
-            // TODO: find the optimal value
-            const int64_t min_batch = 32;
-
             return ggml_is_contiguous(src0)
                    && ggml_is_contiguous(src1)
                    && src1->type == GGML_TYPE_F32
                    // NOTE: llama-bench creates views that somehow does not go through init_tensor
                    //       this prevents the uninitialized views from being used in BLAS
                    && src0->view_src == nullptr && src1->view_src == nullptr
-                   && (ne0 >= min_batch && ne1 >= min_batch && ne10 >= min_batch)
                    && (src0->type == GGML_TYPE_F32 || ggml_get_type_traits(src0->type)->to_float != NULL);
         }