core: support GGML_CPU_ALL_VARIANTS on Android!

2025-09-03 13:53:47 -07:00 · 2025-09-03 13:53:47 -07:00 · 6cde2fe1bd
parent 0c6ce7b9a3
commit 6cde2fe1bd
9 changed files with 72 additions and 197 deletions
--- a/common/cmake/build-info-gen-cpp.cmake
+++ b/common/cmake/build-info-gen-cpp.cmake
@ -1,28 +0,0 @@
-# Resolve the repository root no matter where this script is executed.
-get_filename_component(LLAMA_ROOT "${CMAKE_CURRENT_LIST_DIR}/../.." ABSOLUTE)
-
-# Load the helper macros that fill @BUILD_*@ variables
-include(${LLAMA_ROOT}/cmake/build-info.cmake)
-
-set(TEMPLATE_FILE "${LLAMA_ROOT}/common/build-info.cpp.in")
-set(OUTPUT_FILE   "${LLAMA_ROOT}/common/build-info.cpp")
-
-# Only write the build info if it changed
-if(EXISTS ${OUTPUT_FILE})
-    file(READ ${OUTPUT_FILE} CONTENTS)
-    string(REGEX MATCH "LLAMA_COMMIT = \"([^\"]*)\";" _ ${CONTENTS})
-    set(OLD_COMMIT ${CMAKE_MATCH_1})
-    string(REGEX MATCH "LLAMA_COMPILER = \"([^\"]*)\";" _ ${CONTENTS})
-    set(OLD_COMPILER ${CMAKE_MATCH_1})
-    string(REGEX MATCH "LLAMA_BUILD_TARGET = \"([^\"]*)\";" _ ${CONTENTS})
-    set(OLD_TARGET ${CMAKE_MATCH_1})
-    if (
-        NOT OLD_COMMIT   STREQUAL BUILD_COMMIT   OR
-        NOT OLD_COMPILER STREQUAL BUILD_COMPILER OR
-        NOT OLD_TARGET   STREQUAL BUILD_TARGET
-    )
-        configure_file(${TEMPLATE_FILE} ${OUTPUT_FILE})
-    endif()
-else()
-    configure_file(${TEMPLATE_FILE} ${OUTPUT_FILE})
-endif()
--- a/examples/llama.android/app/src/main/AndroidManifest.xml
+++ b/examples/llama.android/app/src/main/AndroidManifest.xml
@ -6,6 +6,7 @@

    <application
        android:name=".KleidiLlamaApplication"
+        android:extractNativeLibs="true"
        android:allowBackup="true"
        android:dataExtractionRules="@xml/data_extraction_rules"
        android:fullBackupContent="@xml/backup_rules"
--- a/examples/llama.android/llama/build.gradle.kts
+++ b/examples/llama.android/llama/build.gradle.kts
@ -8,7 +8,7 @@ android {
    namespace = "android.llama.cpp"
    compileSdk = 36

-    ndkVersion = "29.0.13113456 rc1"
+    ndkVersion = "29.0.13113456"

    defaultConfig {
        minSdk = 33
@ -24,6 +24,14 @@ android {
                arguments += "-DCMAKE_BUILD_TYPE=Release"
                arguments += "-DCMAKE_MESSAGE_LOG_LEVEL=DEBUG"
                arguments += "-DCMAKE_VERBOSE_MAKEFILE=ON"
+
+                arguments += "-DGGML_SYSTEM_ARCH=ARM"  // Undocumented before 3.21
+                arguments += "-DGGML_NATIVE=OFF"
+
+                arguments += "-DGGML_BACKEND_DL=ON"
+                arguments += "-DGGML_CPU_ALL_VARIANTS=ON"
+
+                arguments += "-DGGML_OPENMP=ON"
            }
        }
        aarMetadata {
--- a/examples/llama.android/llama/src/main/cpp/CMakeLists.txt
+++ b/examples/llama.android/llama/src/main/cpp/CMakeLists.txt
@ -1,23 +1,18 @@
-# ============================================================================
-#  Multi-tier Android build for llama.cpp
-#  --------------------------------------
-#  Produces five DSOs, each compiled with an increasingly aggressive
-#  -march string.  At runtime you pick the highest tier the device
-#  supports and call `System.loadLibrary("llama_android_tX")`.
-# ============================================================================
+cmake_minimum_required(VERSION 3.31.6)

-cmake_minimum_required(VERSION 3.22.1)
-project("llama_android" LANGUAGES C CXX)
+project("kleidi-llama" VERSION 1.0.0 LANGUAGES C CXX)
+
+set(CMAKE_C_STANDARD 11)
+set(CMAKE_C_STANDARD_REQUIRED true)
+
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD_REQUIRED true)
+
+set(CMAKE_C_FLAGS   "${CMAKE_C_FLAGS}"   CACHE STRING "" FORCE)
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}" CACHE STRING "" FORCE)

 # --------------------------------------------------------------------------
-# 0. Language / toolchain defaults
-# --------------------------------------------------------------------------
-set(CMAKE_C_STANDARD   11  CACHE STRING "" FORCE)
-set(CMAKE_CXX_STANDARD 17  CACHE STRING "" FORCE)
-set(CMAKE_POSITION_INDEPENDENT_CODE ON)
-
-# --------------------------------------------------------------------------
-# 1.a CPU feature detection library
+# 1. CPU feature detection library
 # --------------------------------------------------------------------------
 add_subdirectory(
        ${CMAKE_CURRENT_LIST_DIR}/../../../../../../include/cpu_features
@ -29,141 +24,31 @@ target_link_libraries(llama_cpu_detector
        log)

 # --------------------------------------------------------------------------
-# 1.b Make the LLVM OpenMP runtime available
+# 2. Kleidi Llama library
 # --------------------------------------------------------------------------
-find_package(OpenMP REQUIRED)

-# --------------------------------------------------------------------------
-# 2. Locate the root of the llama.cpp source tree
-#    (six levels up from this CMakeLists.txt).
-# --------------------------------------------------------------------------
+set(LLAMA_BUILD_COMMON    ON  CACHE BOOL "" FORCE)
+set(LLAMA_CURL            OFF CACHE BOOL "" FORCE)
+set(GGML_LLAMAFILE        OFF CACHE BOOL "" FORCE)
+set(GGML_CPU_KLEIDIAI     ON  CACHE BOOL "" FORCE)
+set(GGML_OPENMP           ON  CACHE BOOL "" FORCE)
+
+
 set(LLAMA_SRC ${CMAKE_CURRENT_LIST_DIR}/../../../../../../)
+add_subdirectory(${LLAMA_SRC} build-llama)

-# --------------------------------------------------------------------------
-# 3. Build helper  –  one invocation = one hardware tier
-# --------------------------------------------------------------------------
-include(ExternalProject)
+add_library(${CMAKE_PROJECT_NAME} SHARED
+        kleidi-llama.cpp)

-function(build_llama_tier tier march)
-    # ---------- 3.1  configure & build core code in an external project -----
-    set(build_dir ${CMAKE_BINARY_DIR}/llama_build_${tier})
-
-    # KleidiAI requires dotprod and i8mm
-    if(${tier} STREQUAL "t0" OR ${tier} STREQUAL "t1")
-        set(kleidi OFF)
-    else()
-        set(kleidi ON)
-    endif()
-
-    ExternalProject_Add(llama_build_${tier}
-        SOURCE_DIR   ${LLAMA_SRC}
-        BINARY_DIR   ${build_dir}
-        # ---- pass Android cross-compile context straight through ----------
-        CMAKE_ARGS
-            -DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}
-            -DANDROID_ABI=${ANDROID_ABI}
-            -DANDROID_PLATFORM=${ANDROID_PLATFORM}
-            -DANDROID_STL=${ANDROID_STL}
-            -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
-            # ---- llama / ggml feature switches ----------------------------
-            -DGGML_CPU_KLEIDIAI=${kleidi}
-            -DGGML_LLAMAFILE=OFF
-            -DGGML_OPENMP=ON
-            -DLLAMA_BUILD_COMMON=ON
-            -DLLAMA_CURL=OFF
-            -DBUILD_SHARED_LIBS=OFF       # we want static libs to embed
-            # ---- tier-specific ISA flags ----------------------------------
-            -DCMAKE_C_FLAGS=-march=${march}
-            -DCMAKE_CXX_FLAGS=-march=${march}
-            # ---- put the .a files right in ${build_dir} for easy pick-up --
-            -DCMAKE_ARCHIVE_OUTPUT_DIRECTORY=${build_dir}
-
-        INSTALL_COMMAND ""                # nothing to install
-
-        BUILD_BYPRODUCTS
-            ${build_dir}/libllama.a
-            ${build_dir}/libcommon.a
-            ${build_dir}/libggml.a
-            ${build_dir}/libggml-base.a
-            ${build_dir}/libggml-cpu.a
-    )
-
-    # ---------- 3.2  make the static libs produced above visible ------------
-    set(llama_a   ${build_dir}/libllama.a)
-    set(common_a  ${build_dir}/libcommon.a)
-    set(ggml_a    ${build_dir}/libggml.a)
-    set(ggml_base_a ${build_dir}/libggml-base.a)
-    set(ggml_cpu_a  ${build_dir}/libggml-cpu.a)
-
-    add_library(llama_core_${tier}  STATIC IMPORTED GLOBAL)
-    set_target_properties(llama_core_${tier}  PROPERTIES
-        IMPORTED_LOCATION ${llama_a})
-    add_dependencies(llama_core_${tier} llama_build_${tier})
-
-    add_library(common_core_${tier} STATIC IMPORTED GLOBAL)
-    set_target_properties(common_core_${tier} PROPERTIES
-        IMPORTED_LOCATION ${common_a})
-    add_dependencies(common_core_${tier} llama_build_${tier})
-
-    add_library(ggml_core_${tier}   STATIC IMPORTED GLOBAL)
-    set_target_properties(ggml_core_${tier} PROPERTIES
-        IMPORTED_LOCATION ${ggml_a})
-    add_dependencies(ggml_core_${tier} llama_build_${tier})
-
-    add_library(ggml_base_core_${tier} STATIC IMPORTED GLOBAL)
-    set_target_properties(ggml_base_core_${tier} PROPERTIES
-        IMPORTED_LOCATION ${ggml_base_a})
-    add_dependencies(ggml_base_core_${tier} llama_build_${tier})
-
-    add_library(ggml_cpu_core_${tier}  STATIC IMPORTED GLOBAL)
-    set_target_properties(ggml_cpu_core_${tier} PROPERTIES
-        IMPORTED_LOCATION ${ggml_cpu_a})
-    add_dependencies(ggml_cpu_core_${tier} llama_build_${tier})
-
-    # ---------- 3.3  JNI wrapper DSO ---------------------------------------
-    add_library(llama_android_${tier} SHARED llama-android.cpp)
-
-    target_compile_options(llama_android_${tier} PRIVATE "-march=${march}")
-
-    target_include_directories(llama_android_${tier} PRIVATE
+target_include_directories(${CMAKE_PROJECT_NAME} PRIVATE
        ${LLAMA_SRC}
        ${LLAMA_SRC}/common
        ${LLAMA_SRC}/include
        ${LLAMA_SRC}/ggml/include
        ${LLAMA_SRC}/ggml/src)

-    target_link_libraries(llama_android_${tier} PRIVATE
-        llama_core_${tier}
-        common_core_${tier}
-        ggml_core_${tier}        # umbrella (brings in few weak deps)
-        ggml_cpu_core_${tier}    # back-end & scheduler
-        ggml_base_core_${tier}   # core math
-        OpenMP::OpenMP_CXX       # OpenMP
+target_link_libraries(${CMAKE_PROJECT_NAME}
+        llama
+        common
        android
        log)
-
-    # ---------- 3.4  nice SONAME & filename -------------------------------
-    set_target_properties(llama_android_${tier} PROPERTIES
-        OUTPUT_NAME "llama_android_${tier}")
-endfunction()
-
-# --------------------------------------------------------------------------
-# 4.  Build all five tiers
-# --------------------------------------------------------------------------
-build_llama_tier(t0 "armv8-a+simd")
-build_llama_tier(t1 "armv8.2-a+dotprod")
-build_llama_tier(t2 "armv8.6-a+dotprod+i8mm")
-build_llama_tier(t3 "armv9-a+dotprod+i8mm+sve+sve2")
-#build_llama_tier(t4 "armv9.2-a+dotprod+i8mm+sve+sve2+sme")
-
-add_dependencies(llama_build_t1 llama_build_t0)
-add_dependencies(llama_build_t2 llama_build_t1)
-add_dependencies(llama_build_t3 llama_build_t2)
-#add_dependencies(llama_build_t4 llama_build_t3)
-
-# --------------------------------------------------------------------------
-# 5.  Default variant when Gradle hasn’t told us (keeps IDE happy)
-# --------------------------------------------------------------------------
-if(NOT CMAKE_BUILD_TYPE)
-    set(CMAKE_BUILD_TYPE Release CACHE STRING "" FORCE)
-endif()
--- a/examples/llama.android/llama/src/main/cpp/llama-android.cpp
+++ b/examples/llama.android/llama/src/main/cpp/llama-android.cpp
@ -72,10 +72,16 @@ static void log_callback(ggml_log_level level, const char *fmt, void *data) {

 extern "C"
 JNIEXPORT void JNICALL
-Java_android_llama_cpp_internal_InferenceEngineImpl_init(JNIEnv *env, jobject /*unused*/) {
+Java_android_llama_cpp_internal_InferenceEngineImpl_init(JNIEnv *env, jobject /*unused*/, jstring nativeLibDir) {
    // Set llama log handler to Android
    llama_log_set(log_callback, nullptr);

+    // Loading all CPU backend variants
+    const auto *path_to_backend = env->GetStringUTFChars(nativeLibDir, 0);
+    LOGi("Loading backends from %s", path_to_backend);
+    ggml_backend_load_all_from_path(path_to_backend);
+    env->ReleaseStringUTFChars(nativeLibDir, path_to_backend);
+
    // Initialize backends
    llama_backend_init();
    LOGi("Backend initiated; Log handler set.");
--- a/examples/llama.android/llama/src/main/java/android/llama/cpp/internal/InferenceEngineImpl.kt
+++ b/examples/llama.android/llama/src/main/java/android/llama/cpp/internal/InferenceEngineImpl.kt
@ -1,7 +1,7 @@
 package android.llama.cpp.internal

+import android.content.Context
 import android.llama.cpp.InferenceEngine
-import android.llama.cpp.LLamaTier
 import android.llama.cpp.UnsupportedArchitectureException
 import android.util.Log
 import kotlinx.coroutines.CancellationException
@ -40,7 +40,7 @@ import java.io.IOException
 * @see llama-android.cpp for the native implementation details
 */
 internal class InferenceEngineImpl private constructor(
-    private val tier: LLamaTier
+    private val nativeLibDir: String
 ) : InferenceEngine {

    companion object {
@ -49,22 +49,24 @@ internal class InferenceEngineImpl private constructor(
        private var initialized = false

        /**
-         * Create [InferenceEngineImpl] instance with specific tier
+         * Create [InferenceEngineImpl] instance at runtime
         *
-         * @throws IllegalArgumentException if tier's library name is invalid
+         * @param Context for obtaining native library directory
+         * @throws IllegalArgumentException if native library path is invalid
         * @throws UnsatisfiedLinkError if library failed to load
         */
-        internal fun createWithTier(tier: LLamaTier): InferenceEngineImpl {
+        internal fun create(context: Context): InferenceEngineImpl {
            assert(!initialized) { "Inference Engine has already been initialized!" }

-            require(tier.libraryName.isNotBlank()) { "Unexpected library: ${tier.libraryName}" }
+            val nativeLibDir = context.applicationInfo.nativeLibraryDir
+            require(nativeLibDir.isNotBlank()) { "Expected native library" }

            return try {
-                Log.i(TAG, "Instantiating InferenceEngineImpl w/ ${tier.libraryName}")
-                InferenceEngineImpl(tier).also { initialized = true }
+                Log.i(TAG, "Instantiating InferenceEngineImpl,,,")
+                InferenceEngineImpl(nativeLibDir).also { initialized = true }

            } catch (e: UnsatisfiedLinkError) {
-                Log.e(TAG, "Failed to load ${tier.libraryName}", e)
+                Log.e(TAG, "Failed to load native library from $nativeLibDir", e)
                throw e
            }
        }
@ -74,7 +76,7 @@ internal class InferenceEngineImpl private constructor(
     * JNI methods
     * @see llama-android.cpp
     */
-    private external fun init()
+    private external fun init(nativeLibDir: String)
    private external fun load(modelPath: String): Int
    private external fun prepare(): Int

@ -108,10 +110,9 @@ internal class InferenceEngineImpl private constructor(
                    "Cannot load native library in ${_state.value.javaClass.simpleName}!"
                }
                _state.value = InferenceEngine.State.Initializing
-                Log.i(TAG, "Loading native library for $tier")
-
-                System.loadLibrary(tier.libraryName)
-                init()
+                Log.i(TAG, "Loading native library...")
+                System.load(File(nativeLibDir, "libkleidi-llama.so").absolutePath)
+                init(nativeLibDir)
                _state.value = InferenceEngine.State.Initialized
                Log.i(TAG, "Native library loaded! System info: \n${systemInfo()}")

--- a/examples/llama.android/llama/src/main/java/android/llama/cpp/internal/InferenceEngineLoader.kt
+++ b/examples/llama.android/llama/src/main/java/android/llama/cpp/internal/InferenceEngineLoader.kt
@ -54,19 +54,11 @@ internal object InferenceEngineLoader {
        _cachedInstance?.let { return it }

        return runBlocking {
-            // Obtain the optimal tier from cache if available
-            val tier = obtainTier(context)
-            if (tier == null || tier == LLamaTier.NONE) {
-                Log.e(TAG, "Aborted instantiating Inference Engine due to invalid tier")
-                return@runBlocking null
-            }
-
            try {
                // Create and cache the inference engine instance
-                Log.i(TAG, "Using tier: ${tier.name} (${tier.description})")
-                InferenceEngineImpl.createWithTier(tier).also {
+                InferenceEngineImpl.create(context).also {
                    _cachedInstance = it
-                    Log.i(TAG, "Successfully instantiated Inference Engine w/ ${tier.name}")
+                    Log.i(TAG, "Successfully instantiated Inference Engine")
                }

            } catch (e: Exception) {
--- a/ggml/src/CMakeLists.txt
+++ b/ggml/src/CMakeLists.txt
@ -355,6 +355,9 @@ if (GGML_CPU_ALL_VARIANTS)
            ggml_add_cpu_backend_variant(android_armv8.2_1    DOTPROD)
            ggml_add_cpu_backend_variant(android_armv8.2_2    DOTPROD FP16_VECTOR_ARITHMETIC)
            ggml_add_cpu_backend_variant(android_armv8.6_1    DOTPROD FP16_VECTOR_ARITHMETIC MATMUL_INT8)
+            ggml_add_cpu_backend_variant(android_armv9.0_1    DOTPROD MATMUL_INT8 FP16_VECTOR_ARITHMETIC SVE2)
+            ggml_add_cpu_backend_variant(android_armv9.2_1    DOTPROD MATMUL_INT8 FP16_VECTOR_ARITHMETIC SME)
+            ggml_add_cpu_backend_variant(android_armv9.2_2    DOTPROD MATMUL_INT8 FP16_VECTOR_ARITHMETIC SVE SME)
        elseif (APPLE)
            ggml_add_cpu_backend_variant(apple_m1             DOTPROD)
            ggml_add_cpu_backend_variant(apple_m2_m3          DOTPROD MATMUL_INT8)
--- a/ggml/src/ggml-cpu/CMakeLists.txt
+++ b/ggml/src/ggml-cpu/CMakeLists.txt
@ -212,8 +212,15 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
                set(FEAT_INPUT_FILE "/dev/null")
            endif()

+            # specify Android cross compile target
+            if("${GGML_CPU_NAME}" MATCHES ".*android.*")
+                set(ANDROID_TARGET_FLAG "--target=aarch64-linux-android${ANDROID_API_LEVEL}")
+            else()
+                set(ANDROID_TARGET_FLAG "")
+            endif()
+
            execute_process(
-                COMMAND ${CMAKE_C_COMPILER} ${ARCH_FLAGS} -dM -E -
+                COMMAND ${CMAKE_C_COMPILER} ${ARCH_FLAGS} ${ANDROID_TARGET_FLAG} -dM -E -
                INPUT_FILE ${FEAT_INPUT_FILE}
                OUTPUT_VARIABLE ARM_FEATURE
                RESULT_VARIABLE ARM_FEATURE_RESULT