core: support GGML_CPU_ALL_VARIANTS on Android!

This commit is contained in:
Han Yin 2025-09-03 13:53:47 -07:00
parent 0c6ce7b9a3
commit 6cde2fe1bd
9 changed files with 72 additions and 197 deletions

View File

@ -1,28 +0,0 @@
# Resolve the repository root no matter where this script is executed.
get_filename_component(LLAMA_ROOT "${CMAKE_CURRENT_LIST_DIR}/../.." ABSOLUTE)
# Load the helper macros that fill @BUILD_*@ variables
include(${LLAMA_ROOT}/cmake/build-info.cmake)
set(TEMPLATE_FILE "${LLAMA_ROOT}/common/build-info.cpp.in")
set(OUTPUT_FILE "${LLAMA_ROOT}/common/build-info.cpp")
# Only write the build info if it changed
if(EXISTS ${OUTPUT_FILE})
file(READ ${OUTPUT_FILE} CONTENTS)
string(REGEX MATCH "LLAMA_COMMIT = \"([^\"]*)\";" _ ${CONTENTS})
set(OLD_COMMIT ${CMAKE_MATCH_1})
string(REGEX MATCH "LLAMA_COMPILER = \"([^\"]*)\";" _ ${CONTENTS})
set(OLD_COMPILER ${CMAKE_MATCH_1})
string(REGEX MATCH "LLAMA_BUILD_TARGET = \"([^\"]*)\";" _ ${CONTENTS})
set(OLD_TARGET ${CMAKE_MATCH_1})
if (
NOT OLD_COMMIT STREQUAL BUILD_COMMIT OR
NOT OLD_COMPILER STREQUAL BUILD_COMPILER OR
NOT OLD_TARGET STREQUAL BUILD_TARGET
)
configure_file(${TEMPLATE_FILE} ${OUTPUT_FILE})
endif()
else()
configure_file(${TEMPLATE_FILE} ${OUTPUT_FILE})
endif()

View File

@ -6,6 +6,7 @@
<application
android:name=".KleidiLlamaApplication"
android:extractNativeLibs="true"
android:allowBackup="true"
android:dataExtractionRules="@xml/data_extraction_rules"
android:fullBackupContent="@xml/backup_rules"

View File

@ -8,7 +8,7 @@ android {
namespace = "android.llama.cpp"
compileSdk = 36
ndkVersion = "29.0.13113456 rc1"
ndkVersion = "29.0.13113456"
defaultConfig {
minSdk = 33
@ -24,6 +24,14 @@ android {
arguments += "-DCMAKE_BUILD_TYPE=Release"
arguments += "-DCMAKE_MESSAGE_LOG_LEVEL=DEBUG"
arguments += "-DCMAKE_VERBOSE_MAKEFILE=ON"
arguments += "-DGGML_SYSTEM_ARCH=ARM" // Undocumented before 3.21
arguments += "-DGGML_NATIVE=OFF"
arguments += "-DGGML_BACKEND_DL=ON"
arguments += "-DGGML_CPU_ALL_VARIANTS=ON"
arguments += "-DGGML_OPENMP=ON"
}
}
aarMetadata {

View File

@ -1,23 +1,18 @@
# ============================================================================
# Multi-tier Android build for llama.cpp
# --------------------------------------
# Produces five DSOs, each compiled with an increasingly aggressive
# -march string. At runtime you pick the highest tier the device
# supports and call `System.loadLibrary("llama_android_tX")`.
# ============================================================================
cmake_minimum_required(VERSION 3.31.6)
cmake_minimum_required(VERSION 3.22.1)
project("llama_android" LANGUAGES C CXX)
project("kleidi-llama" VERSION 1.0.0 LANGUAGES C CXX)
set(CMAKE_C_STANDARD 11)
set(CMAKE_C_STANDARD_REQUIRED true)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED true)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}" CACHE STRING "" FORCE)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}" CACHE STRING "" FORCE)
# --------------------------------------------------------------------------
# 0. Language / toolchain defaults
# --------------------------------------------------------------------------
set(CMAKE_C_STANDARD 11 CACHE STRING "" FORCE)
set(CMAKE_CXX_STANDARD 17 CACHE STRING "" FORCE)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
# --------------------------------------------------------------------------
# 1.a CPU feature detection library
# 1. CPU feature detection library
# --------------------------------------------------------------------------
add_subdirectory(
${CMAKE_CURRENT_LIST_DIR}/../../../../../../include/cpu_features
@ -29,141 +24,31 @@ target_link_libraries(llama_cpu_detector
log)
# --------------------------------------------------------------------------
# 1.b Make the LLVM OpenMP runtime available
# 2. Kleidi Llama library
# --------------------------------------------------------------------------
find_package(OpenMP REQUIRED)
# --------------------------------------------------------------------------
# 2. Locate the root of the llama.cpp source tree
# (six levels up from this CMakeLists.txt).
# --------------------------------------------------------------------------
set(LLAMA_BUILD_COMMON ON CACHE BOOL "" FORCE)
set(LLAMA_CURL OFF CACHE BOOL "" FORCE)
set(GGML_LLAMAFILE OFF CACHE BOOL "" FORCE)
set(GGML_CPU_KLEIDIAI ON CACHE BOOL "" FORCE)
set(GGML_OPENMP ON CACHE BOOL "" FORCE)
set(LLAMA_SRC ${CMAKE_CURRENT_LIST_DIR}/../../../../../../)
add_subdirectory(${LLAMA_SRC} build-llama)
# --------------------------------------------------------------------------
# 3. Build helper one invocation = one hardware tier
# --------------------------------------------------------------------------
include(ExternalProject)
add_library(${CMAKE_PROJECT_NAME} SHARED
kleidi-llama.cpp)
function(build_llama_tier tier march)
# ---------- 3.1 configure & build core code in an external project -----
set(build_dir ${CMAKE_BINARY_DIR}/llama_build_${tier})
# KleidiAI requires dotprod and i8mm
if(${tier} STREQUAL "t0" OR ${tier} STREQUAL "t1")
set(kleidi OFF)
else()
set(kleidi ON)
endif()
ExternalProject_Add(llama_build_${tier}
SOURCE_DIR ${LLAMA_SRC}
BINARY_DIR ${build_dir}
# ---- pass Android cross-compile context straight through ----------
CMAKE_ARGS
-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}
-DANDROID_ABI=${ANDROID_ABI}
-DANDROID_PLATFORM=${ANDROID_PLATFORM}
-DANDROID_STL=${ANDROID_STL}
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
# ---- llama / ggml feature switches ----------------------------
-DGGML_CPU_KLEIDIAI=${kleidi}
-DGGML_LLAMAFILE=OFF
-DGGML_OPENMP=ON
-DLLAMA_BUILD_COMMON=ON
-DLLAMA_CURL=OFF
-DBUILD_SHARED_LIBS=OFF # we want static libs to embed
# ---- tier-specific ISA flags ----------------------------------
-DCMAKE_C_FLAGS=-march=${march}
-DCMAKE_CXX_FLAGS=-march=${march}
# ---- put the .a files right in ${build_dir} for easy pick-up --
-DCMAKE_ARCHIVE_OUTPUT_DIRECTORY=${build_dir}
INSTALL_COMMAND "" # nothing to install
BUILD_BYPRODUCTS
${build_dir}/libllama.a
${build_dir}/libcommon.a
${build_dir}/libggml.a
${build_dir}/libggml-base.a
${build_dir}/libggml-cpu.a
)
# ---------- 3.2 make the static libs produced above visible ------------
set(llama_a ${build_dir}/libllama.a)
set(common_a ${build_dir}/libcommon.a)
set(ggml_a ${build_dir}/libggml.a)
set(ggml_base_a ${build_dir}/libggml-base.a)
set(ggml_cpu_a ${build_dir}/libggml-cpu.a)
add_library(llama_core_${tier} STATIC IMPORTED GLOBAL)
set_target_properties(llama_core_${tier} PROPERTIES
IMPORTED_LOCATION ${llama_a})
add_dependencies(llama_core_${tier} llama_build_${tier})
add_library(common_core_${tier} STATIC IMPORTED GLOBAL)
set_target_properties(common_core_${tier} PROPERTIES
IMPORTED_LOCATION ${common_a})
add_dependencies(common_core_${tier} llama_build_${tier})
add_library(ggml_core_${tier} STATIC IMPORTED GLOBAL)
set_target_properties(ggml_core_${tier} PROPERTIES
IMPORTED_LOCATION ${ggml_a})
add_dependencies(ggml_core_${tier} llama_build_${tier})
add_library(ggml_base_core_${tier} STATIC IMPORTED GLOBAL)
set_target_properties(ggml_base_core_${tier} PROPERTIES
IMPORTED_LOCATION ${ggml_base_a})
add_dependencies(ggml_base_core_${tier} llama_build_${tier})
add_library(ggml_cpu_core_${tier} STATIC IMPORTED GLOBAL)
set_target_properties(ggml_cpu_core_${tier} PROPERTIES
IMPORTED_LOCATION ${ggml_cpu_a})
add_dependencies(ggml_cpu_core_${tier} llama_build_${tier})
# ---------- 3.3 JNI wrapper DSO ---------------------------------------
add_library(llama_android_${tier} SHARED llama-android.cpp)
target_compile_options(llama_android_${tier} PRIVATE "-march=${march}")
target_include_directories(llama_android_${tier} PRIVATE
target_include_directories(${CMAKE_PROJECT_NAME} PRIVATE
${LLAMA_SRC}
${LLAMA_SRC}/common
${LLAMA_SRC}/include
${LLAMA_SRC}/ggml/include
${LLAMA_SRC}/ggml/src)
target_link_libraries(llama_android_${tier} PRIVATE
llama_core_${tier}
common_core_${tier}
ggml_core_${tier} # umbrella (brings in few weak deps)
ggml_cpu_core_${tier} # back-end & scheduler
ggml_base_core_${tier} # core math
OpenMP::OpenMP_CXX # OpenMP
target_link_libraries(${CMAKE_PROJECT_NAME}
llama
common
android
log)
# ---------- 3.4 nice SONAME & filename -------------------------------
set_target_properties(llama_android_${tier} PROPERTIES
OUTPUT_NAME "llama_android_${tier}")
endfunction()
# --------------------------------------------------------------------------
# 4. Build all five tiers
# --------------------------------------------------------------------------
build_llama_tier(t0 "armv8-a+simd")
build_llama_tier(t1 "armv8.2-a+dotprod")
build_llama_tier(t2 "armv8.6-a+dotprod+i8mm")
build_llama_tier(t3 "armv9-a+dotprod+i8mm+sve+sve2")
#build_llama_tier(t4 "armv9.2-a+dotprod+i8mm+sve+sve2+sme")
add_dependencies(llama_build_t1 llama_build_t0)
add_dependencies(llama_build_t2 llama_build_t1)
add_dependencies(llama_build_t3 llama_build_t2)
#add_dependencies(llama_build_t4 llama_build_t3)
# --------------------------------------------------------------------------
# 5. Default variant when Gradle hasnt told us (keeps IDE happy)
# --------------------------------------------------------------------------
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release CACHE STRING "" FORCE)
endif()

View File

@ -72,10 +72,16 @@ static void log_callback(ggml_log_level level, const char *fmt, void *data) {
extern "C"
JNIEXPORT void JNICALL
Java_android_llama_cpp_internal_InferenceEngineImpl_init(JNIEnv *env, jobject /*unused*/) {
Java_android_llama_cpp_internal_InferenceEngineImpl_init(JNIEnv *env, jobject /*unused*/, jstring nativeLibDir) {
// Set llama log handler to Android
llama_log_set(log_callback, nullptr);
// Loading all CPU backend variants
const auto *path_to_backend = env->GetStringUTFChars(nativeLibDir, 0);
LOGi("Loading backends from %s", path_to_backend);
ggml_backend_load_all_from_path(path_to_backend);
env->ReleaseStringUTFChars(nativeLibDir, path_to_backend);
// Initialize backends
llama_backend_init();
LOGi("Backend initiated; Log handler set.");

View File

@ -1,7 +1,7 @@
package android.llama.cpp.internal
import android.content.Context
import android.llama.cpp.InferenceEngine
import android.llama.cpp.LLamaTier
import android.llama.cpp.UnsupportedArchitectureException
import android.util.Log
import kotlinx.coroutines.CancellationException
@ -40,7 +40,7 @@ import java.io.IOException
* @see llama-android.cpp for the native implementation details
*/
internal class InferenceEngineImpl private constructor(
private val tier: LLamaTier
private val nativeLibDir: String
) : InferenceEngine {
companion object {
@ -49,22 +49,24 @@ internal class InferenceEngineImpl private constructor(
private var initialized = false
/**
* Create [InferenceEngineImpl] instance with specific tier
* Create [InferenceEngineImpl] instance at runtime
*
* @throws IllegalArgumentException if tier's library name is invalid
* @param Context for obtaining native library directory
* @throws IllegalArgumentException if native library path is invalid
* @throws UnsatisfiedLinkError if library failed to load
*/
internal fun createWithTier(tier: LLamaTier): InferenceEngineImpl {
internal fun create(context: Context): InferenceEngineImpl {
assert(!initialized) { "Inference Engine has already been initialized!" }
require(tier.libraryName.isNotBlank()) { "Unexpected library: ${tier.libraryName}" }
val nativeLibDir = context.applicationInfo.nativeLibraryDir
require(nativeLibDir.isNotBlank()) { "Expected native library" }
return try {
Log.i(TAG, "Instantiating InferenceEngineImpl w/ ${tier.libraryName}")
InferenceEngineImpl(tier).also { initialized = true }
Log.i(TAG, "Instantiating InferenceEngineImpl,,,")
InferenceEngineImpl(nativeLibDir).also { initialized = true }
} catch (e: UnsatisfiedLinkError) {
Log.e(TAG, "Failed to load ${tier.libraryName}", e)
Log.e(TAG, "Failed to load native library from $nativeLibDir", e)
throw e
}
}
@ -74,7 +76,7 @@ internal class InferenceEngineImpl private constructor(
* JNI methods
* @see llama-android.cpp
*/
private external fun init()
private external fun init(nativeLibDir: String)
private external fun load(modelPath: String): Int
private external fun prepare(): Int
@ -108,10 +110,9 @@ internal class InferenceEngineImpl private constructor(
"Cannot load native library in ${_state.value.javaClass.simpleName}!"
}
_state.value = InferenceEngine.State.Initializing
Log.i(TAG, "Loading native library for $tier")
System.loadLibrary(tier.libraryName)
init()
Log.i(TAG, "Loading native library...")
System.load(File(nativeLibDir, "libkleidi-llama.so").absolutePath)
init(nativeLibDir)
_state.value = InferenceEngine.State.Initialized
Log.i(TAG, "Native library loaded! System info: \n${systemInfo()}")

View File

@ -54,19 +54,11 @@ internal object InferenceEngineLoader {
_cachedInstance?.let { return it }
return runBlocking {
// Obtain the optimal tier from cache if available
val tier = obtainTier(context)
if (tier == null || tier == LLamaTier.NONE) {
Log.e(TAG, "Aborted instantiating Inference Engine due to invalid tier")
return@runBlocking null
}
try {
// Create and cache the inference engine instance
Log.i(TAG, "Using tier: ${tier.name} (${tier.description})")
InferenceEngineImpl.createWithTier(tier).also {
InferenceEngineImpl.create(context).also {
_cachedInstance = it
Log.i(TAG, "Successfully instantiated Inference Engine w/ ${tier.name}")
Log.i(TAG, "Successfully instantiated Inference Engine")
}
} catch (e: Exception) {

View File

@ -355,6 +355,9 @@ if (GGML_CPU_ALL_VARIANTS)
ggml_add_cpu_backend_variant(android_armv8.2_1 DOTPROD)
ggml_add_cpu_backend_variant(android_armv8.2_2 DOTPROD FP16_VECTOR_ARITHMETIC)
ggml_add_cpu_backend_variant(android_armv8.6_1 DOTPROD FP16_VECTOR_ARITHMETIC MATMUL_INT8)
ggml_add_cpu_backend_variant(android_armv9.0_1 DOTPROD MATMUL_INT8 FP16_VECTOR_ARITHMETIC SVE2)
ggml_add_cpu_backend_variant(android_armv9.2_1 DOTPROD MATMUL_INT8 FP16_VECTOR_ARITHMETIC SME)
ggml_add_cpu_backend_variant(android_armv9.2_2 DOTPROD MATMUL_INT8 FP16_VECTOR_ARITHMETIC SVE SME)
elseif (APPLE)
ggml_add_cpu_backend_variant(apple_m1 DOTPROD)
ggml_add_cpu_backend_variant(apple_m2_m3 DOTPROD MATMUL_INT8)

View File

@ -212,8 +212,15 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
set(FEAT_INPUT_FILE "/dev/null")
endif()
# specify Android cross compile target
if("${GGML_CPU_NAME}" MATCHES ".*android.*")
set(ANDROID_TARGET_FLAG "--target=aarch64-linux-android${ANDROID_API_LEVEL}")
else()
set(ANDROID_TARGET_FLAG "")
endif()
execute_process(
COMMAND ${CMAKE_C_COMPILER} ${ARCH_FLAGS} -dM -E -
COMMAND ${CMAKE_C_COMPILER} ${ARCH_FLAGS} ${ANDROID_TARGET_FLAG} -dM -E -
INPUT_FILE ${FEAT_INPUT_FILE}
OUTPUT_VARIABLE ARM_FEATURE
RESULT_VARIABLE ARM_FEATURE_RESULT