[WIP] llama: enable KleidiAI and disable tier 4 due to `+sve+sve2` bug caused by `ggml_add_cpu_backend_variant_impl` as explained below

```CMake
if (NOT SME_ENABLED MATCHES -1)
...
    set(PRIVATE_ARCH_FLAGS "-fno-tree-vectorize;${PRIVATE_ARCH_FLAGS}+sve+sve2")
...
```
This commit is contained in:
Han Yin 2025-05-14 11:50:02 -07:00
parent ead41ff655
commit 98c8f5e59e
2 changed files with 11 additions and 3 deletions

View File

@ -31,6 +31,13 @@ function(build_llama_tier tier march)
# ---------- 2.1 configure & build core code in an external project -----
set(build_dir ${CMAKE_BINARY_DIR}/llama_build_${tier})
# KleidiAI requires dotprod and i8mm
if(${tier} STREQUAL "t0" OR ${tier} STREQUAL "t1")
set(kleidi OFF)
else()
set(kleidi ON)
endif()
ExternalProject_Add(llama_build_${tier}
SOURCE_DIR ${LLAMA_SRC}
BINARY_DIR ${build_dir}
@ -42,6 +49,7 @@ function(build_llama_tier tier march)
-DANDROID_STL=${ANDROID_STL}
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
# ---- llama / ggml feature switches ----------------------------
-DGGML_CPU_KLEIDIAI=${kleidi}
-DGGML_LLAMAFILE=OFF
-DGGML_OPENMP=OFF
-DLLAMA_BUILD_COMMON=ON
@ -128,12 +136,12 @@ build_llama_tier(t0 "armv8-a+simd")
build_llama_tier(t1 "armv8.2-a+dotprod")
build_llama_tier(t2 "armv8.6-a+dotprod+i8mm")
build_llama_tier(t3 "armv9-a+dotprod+i8mm+sve+sve2")
build_llama_tier(t4 "armv9.2-a+dotprod+i8mm+sve+sve2+sme")
#build_llama_tier(t4 "armv9.2-a+dotprod+i8mm+sve+sve2+sme")
add_dependencies(llama_build_t1 llama_build_t0)
add_dependencies(llama_build_t2 llama_build_t1)
add_dependencies(llama_build_t3 llama_build_t2)
add_dependencies(llama_build_t4 llama_build_t3)
#add_dependencies(llama_build_t4 llama_build_t3)
# --------------------------------------------------------------------------
# 4. Default variant when Gradle hasnt told us (keeps IDE happy)

View File

@ -240,7 +240,7 @@ class LLamaAndroid private constructor() : InferenceEngine {
private val TAG = LLamaAndroid::class.simpleName
// TODO-han.yin: replace with dynamic loader
private const val LIB_LLAMA_ANDROID = "llama_android_t4"
private const val LIB_LLAMA_ANDROID = "llama_android_t3"
// Enforce only one instance of Llm.
private val _instance: LLamaAndroid = LLamaAndroid()