From afe65aa28239906dfda63f70a52f60d3a7527fdc Mon Sep 17 00:00:00 2001
From: Neo Zhang <zhang.jianyu@outlook.com>
Date: Sun, 29 Mar 2026 09:02:45 +0800
Subject: [PATCH] [SYCL] Enhance build script to use half cores to build, avoid
 OS hang (#21093)

* use half cores to build, avoid OS hang

* reduce the output text num to short test time

* avoid to return 0
---
 examples/sycl/build.sh      | 2 +-
 examples/sycl/run-llama2.sh | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/examples/sycl/build.sh b/examples/sycl/build.sh
index 635e74fe64..bf7d6b53bf 100755
--- a/examples/sycl/build.sh
+++ b/examples/sycl/build.sh
@@ -20,4 +20,4 @@ cmake .. -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA
 #cmake --build . --config Release --target llama-bench
 
 #build all binary
-cmake --build . --config Release -j -v
+cmake --build . --config Release -j$((($(nproc)+1)/2)) -v
diff --git a/examples/sycl/run-llama2.sh b/examples/sycl/run-llama2.sh
index d33f82f339..6ed2535bbb 100755
--- a/examples/sycl/run-llama2.sh
+++ b/examples/sycl/run-llama2.sh
@@ -23,9 +23,9 @@ if [ $# -gt 0 ]; then
     GGML_SYCL_DEVICE=$1
     echo "use $GGML_SYCL_DEVICE as main GPU"
     #use signle GPU only
-    ZES_ENABLE_SYSMAN=1 ./build/bin/llama-completion -m ${MODEL_FILE} -no-cnv -p "${INPUT_PROMPT}" -n 400 -e -ngl ${NGL} -s 0 -c ${CONTEXT} -mg $GGML_SYCL_DEVICE -sm none ${LOAD_MODE}
+    ZES_ENABLE_SYSMAN=1 ./build/bin/llama-completion -m ${MODEL_FILE} -no-cnv -p "${INPUT_PROMPT}" -n 200 -e -ngl ${NGL} -s 0 -c ${CONTEXT} -mg $GGML_SYCL_DEVICE -sm none ${LOAD_MODE}
 
 else
     #use multiple GPUs with same max compute units
-    ZES_ENABLE_SYSMAN=1 ./build/bin/llama-completion -m ${MODEL_FILE} -no-cnv -p "${INPUT_PROMPT}" -n 400 -e -ngl ${NGL} -s 0 -c ${CONTEXT} ${LOAD_MODE}
+    ZES_ENABLE_SYSMAN=1 ./build/bin/llama-completion -m ${MODEL_FILE} -no-cnv -p "${INPUT_PROMPT}" -n 200 -e -ngl ${NGL} -s 0 -c ${CONTEXT} ${LOAD_MODE}
 fi