core: support GGML_CPU_ALL_VARIANTS on Android!
This commit is contained in:
parent
0c6ce7b9a3
commit
6cde2fe1bd
|
|
@ -1,28 +0,0 @@
|
|||
# Resolve the repository root no matter where this script is executed.
|
||||
get_filename_component(LLAMA_ROOT "${CMAKE_CURRENT_LIST_DIR}/../.." ABSOLUTE)
|
||||
|
||||
# Load the helper macros that fill @BUILD_*@ variables
|
||||
include(${LLAMA_ROOT}/cmake/build-info.cmake)
|
||||
|
||||
set(TEMPLATE_FILE "${LLAMA_ROOT}/common/build-info.cpp.in")
|
||||
set(OUTPUT_FILE "${LLAMA_ROOT}/common/build-info.cpp")
|
||||
|
||||
# Only write the build info if it changed
|
||||
if(EXISTS ${OUTPUT_FILE})
|
||||
file(READ ${OUTPUT_FILE} CONTENTS)
|
||||
string(REGEX MATCH "LLAMA_COMMIT = \"([^\"]*)\";" _ ${CONTENTS})
|
||||
set(OLD_COMMIT ${CMAKE_MATCH_1})
|
||||
string(REGEX MATCH "LLAMA_COMPILER = \"([^\"]*)\";" _ ${CONTENTS})
|
||||
set(OLD_COMPILER ${CMAKE_MATCH_1})
|
||||
string(REGEX MATCH "LLAMA_BUILD_TARGET = \"([^\"]*)\";" _ ${CONTENTS})
|
||||
set(OLD_TARGET ${CMAKE_MATCH_1})
|
||||
if (
|
||||
NOT OLD_COMMIT STREQUAL BUILD_COMMIT OR
|
||||
NOT OLD_COMPILER STREQUAL BUILD_COMPILER OR
|
||||
NOT OLD_TARGET STREQUAL BUILD_TARGET
|
||||
)
|
||||
configure_file(${TEMPLATE_FILE} ${OUTPUT_FILE})
|
||||
endif()
|
||||
else()
|
||||
configure_file(${TEMPLATE_FILE} ${OUTPUT_FILE})
|
||||
endif()
|
||||
|
|
@ -6,6 +6,7 @@
|
|||
|
||||
<application
|
||||
android:name=".KleidiLlamaApplication"
|
||||
android:extractNativeLibs="true"
|
||||
android:allowBackup="true"
|
||||
android:dataExtractionRules="@xml/data_extraction_rules"
|
||||
android:fullBackupContent="@xml/backup_rules"
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ android {
|
|||
namespace = "android.llama.cpp"
|
||||
compileSdk = 36
|
||||
|
||||
ndkVersion = "29.0.13113456 rc1"
|
||||
ndkVersion = "29.0.13113456"
|
||||
|
||||
defaultConfig {
|
||||
minSdk = 33
|
||||
|
|
@ -24,6 +24,14 @@ android {
|
|||
arguments += "-DCMAKE_BUILD_TYPE=Release"
|
||||
arguments += "-DCMAKE_MESSAGE_LOG_LEVEL=DEBUG"
|
||||
arguments += "-DCMAKE_VERBOSE_MAKEFILE=ON"
|
||||
|
||||
arguments += "-DGGML_SYSTEM_ARCH=ARM" // Undocumented before 3.21
|
||||
arguments += "-DGGML_NATIVE=OFF"
|
||||
|
||||
arguments += "-DGGML_BACKEND_DL=ON"
|
||||
arguments += "-DGGML_CPU_ALL_VARIANTS=ON"
|
||||
|
||||
arguments += "-DGGML_OPENMP=ON"
|
||||
}
|
||||
}
|
||||
aarMetadata {
|
||||
|
|
|
|||
|
|
@ -1,23 +1,18 @@
|
|||
# ============================================================================
|
||||
# Multi-tier Android build for llama.cpp
|
||||
# --------------------------------------
|
||||
# Produces five DSOs, each compiled with an increasingly aggressive
|
||||
# -march string. At runtime you pick the highest tier the device
|
||||
# supports and call `System.loadLibrary("llama_android_tX")`.
|
||||
# ============================================================================
|
||||
cmake_minimum_required(VERSION 3.31.6)
|
||||
|
||||
cmake_minimum_required(VERSION 3.22.1)
|
||||
project("llama_android" LANGUAGES C CXX)
|
||||
project("kleidi-llama" VERSION 1.0.0 LANGUAGES C CXX)
|
||||
|
||||
set(CMAKE_C_STANDARD 11)
|
||||
set(CMAKE_C_STANDARD_REQUIRED true)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED true)
|
||||
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}" CACHE STRING "" FORCE)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}" CACHE STRING "" FORCE)
|
||||
|
||||
# --------------------------------------------------------------------------
|
||||
# 0. Language / toolchain defaults
|
||||
# --------------------------------------------------------------------------
|
||||
set(CMAKE_C_STANDARD 11 CACHE STRING "" FORCE)
|
||||
set(CMAKE_CXX_STANDARD 17 CACHE STRING "" FORCE)
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
# --------------------------------------------------------------------------
|
||||
# 1.a CPU feature detection library
|
||||
# 1. CPU feature detection library
|
||||
# --------------------------------------------------------------------------
|
||||
add_subdirectory(
|
||||
${CMAKE_CURRENT_LIST_DIR}/../../../../../../include/cpu_features
|
||||
|
|
@ -29,141 +24,31 @@ target_link_libraries(llama_cpu_detector
|
|||
log)
|
||||
|
||||
# --------------------------------------------------------------------------
|
||||
# 1.b Make the LLVM OpenMP runtime available
|
||||
# 2. Kleidi Llama library
|
||||
# --------------------------------------------------------------------------
|
||||
find_package(OpenMP REQUIRED)
|
||||
|
||||
# --------------------------------------------------------------------------
|
||||
# 2. Locate the root of the llama.cpp source tree
|
||||
# (six levels up from this CMakeLists.txt).
|
||||
# --------------------------------------------------------------------------
|
||||
set(LLAMA_BUILD_COMMON ON CACHE BOOL "" FORCE)
|
||||
set(LLAMA_CURL OFF CACHE BOOL "" FORCE)
|
||||
set(GGML_LLAMAFILE OFF CACHE BOOL "" FORCE)
|
||||
set(GGML_CPU_KLEIDIAI ON CACHE BOOL "" FORCE)
|
||||
set(GGML_OPENMP ON CACHE BOOL "" FORCE)
|
||||
|
||||
|
||||
set(LLAMA_SRC ${CMAKE_CURRENT_LIST_DIR}/../../../../../../)
|
||||
add_subdirectory(${LLAMA_SRC} build-llama)
|
||||
|
||||
# --------------------------------------------------------------------------
|
||||
# 3. Build helper – one invocation = one hardware tier
|
||||
# --------------------------------------------------------------------------
|
||||
include(ExternalProject)
|
||||
add_library(${CMAKE_PROJECT_NAME} SHARED
|
||||
kleidi-llama.cpp)
|
||||
|
||||
function(build_llama_tier tier march)
|
||||
# ---------- 3.1 configure & build core code in an external project -----
|
||||
set(build_dir ${CMAKE_BINARY_DIR}/llama_build_${tier})
|
||||
|
||||
# KleidiAI requires dotprod and i8mm
|
||||
if(${tier} STREQUAL "t0" OR ${tier} STREQUAL "t1")
|
||||
set(kleidi OFF)
|
||||
else()
|
||||
set(kleidi ON)
|
||||
endif()
|
||||
|
||||
ExternalProject_Add(llama_build_${tier}
|
||||
SOURCE_DIR ${LLAMA_SRC}
|
||||
BINARY_DIR ${build_dir}
|
||||
# ---- pass Android cross-compile context straight through ----------
|
||||
CMAKE_ARGS
|
||||
-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}
|
||||
-DANDROID_ABI=${ANDROID_ABI}
|
||||
-DANDROID_PLATFORM=${ANDROID_PLATFORM}
|
||||
-DANDROID_STL=${ANDROID_STL}
|
||||
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
|
||||
# ---- llama / ggml feature switches ----------------------------
|
||||
-DGGML_CPU_KLEIDIAI=${kleidi}
|
||||
-DGGML_LLAMAFILE=OFF
|
||||
-DGGML_OPENMP=ON
|
||||
-DLLAMA_BUILD_COMMON=ON
|
||||
-DLLAMA_CURL=OFF
|
||||
-DBUILD_SHARED_LIBS=OFF # we want static libs to embed
|
||||
# ---- tier-specific ISA flags ----------------------------------
|
||||
-DCMAKE_C_FLAGS=-march=${march}
|
||||
-DCMAKE_CXX_FLAGS=-march=${march}
|
||||
# ---- put the .a files right in ${build_dir} for easy pick-up --
|
||||
-DCMAKE_ARCHIVE_OUTPUT_DIRECTORY=${build_dir}
|
||||
|
||||
INSTALL_COMMAND "" # nothing to install
|
||||
|
||||
BUILD_BYPRODUCTS
|
||||
${build_dir}/libllama.a
|
||||
${build_dir}/libcommon.a
|
||||
${build_dir}/libggml.a
|
||||
${build_dir}/libggml-base.a
|
||||
${build_dir}/libggml-cpu.a
|
||||
)
|
||||
|
||||
# ---------- 3.2 make the static libs produced above visible ------------
|
||||
set(llama_a ${build_dir}/libllama.a)
|
||||
set(common_a ${build_dir}/libcommon.a)
|
||||
set(ggml_a ${build_dir}/libggml.a)
|
||||
set(ggml_base_a ${build_dir}/libggml-base.a)
|
||||
set(ggml_cpu_a ${build_dir}/libggml-cpu.a)
|
||||
|
||||
add_library(llama_core_${tier} STATIC IMPORTED GLOBAL)
|
||||
set_target_properties(llama_core_${tier} PROPERTIES
|
||||
IMPORTED_LOCATION ${llama_a})
|
||||
add_dependencies(llama_core_${tier} llama_build_${tier})
|
||||
|
||||
add_library(common_core_${tier} STATIC IMPORTED GLOBAL)
|
||||
set_target_properties(common_core_${tier} PROPERTIES
|
||||
IMPORTED_LOCATION ${common_a})
|
||||
add_dependencies(common_core_${tier} llama_build_${tier})
|
||||
|
||||
add_library(ggml_core_${tier} STATIC IMPORTED GLOBAL)
|
||||
set_target_properties(ggml_core_${tier} PROPERTIES
|
||||
IMPORTED_LOCATION ${ggml_a})
|
||||
add_dependencies(ggml_core_${tier} llama_build_${tier})
|
||||
|
||||
add_library(ggml_base_core_${tier} STATIC IMPORTED GLOBAL)
|
||||
set_target_properties(ggml_base_core_${tier} PROPERTIES
|
||||
IMPORTED_LOCATION ${ggml_base_a})
|
||||
add_dependencies(ggml_base_core_${tier} llama_build_${tier})
|
||||
|
||||
add_library(ggml_cpu_core_${tier} STATIC IMPORTED GLOBAL)
|
||||
set_target_properties(ggml_cpu_core_${tier} PROPERTIES
|
||||
IMPORTED_LOCATION ${ggml_cpu_a})
|
||||
add_dependencies(ggml_cpu_core_${tier} llama_build_${tier})
|
||||
|
||||
# ---------- 3.3 JNI wrapper DSO ---------------------------------------
|
||||
add_library(llama_android_${tier} SHARED llama-android.cpp)
|
||||
|
||||
target_compile_options(llama_android_${tier} PRIVATE "-march=${march}")
|
||||
|
||||
target_include_directories(llama_android_${tier} PRIVATE
|
||||
target_include_directories(${CMAKE_PROJECT_NAME} PRIVATE
|
||||
${LLAMA_SRC}
|
||||
${LLAMA_SRC}/common
|
||||
${LLAMA_SRC}/include
|
||||
${LLAMA_SRC}/ggml/include
|
||||
${LLAMA_SRC}/ggml/src)
|
||||
|
||||
target_link_libraries(llama_android_${tier} PRIVATE
|
||||
llama_core_${tier}
|
||||
common_core_${tier}
|
||||
ggml_core_${tier} # umbrella (brings in few weak deps)
|
||||
ggml_cpu_core_${tier} # back-end & scheduler
|
||||
ggml_base_core_${tier} # core math
|
||||
OpenMP::OpenMP_CXX # OpenMP
|
||||
target_link_libraries(${CMAKE_PROJECT_NAME}
|
||||
llama
|
||||
common
|
||||
android
|
||||
log)
|
||||
|
||||
# ---------- 3.4 nice SONAME & filename -------------------------------
|
||||
set_target_properties(llama_android_${tier} PROPERTIES
|
||||
OUTPUT_NAME "llama_android_${tier}")
|
||||
endfunction()
|
||||
|
||||
# --------------------------------------------------------------------------
|
||||
# 4. Build all five tiers
|
||||
# --------------------------------------------------------------------------
|
||||
build_llama_tier(t0 "armv8-a+simd")
|
||||
build_llama_tier(t1 "armv8.2-a+dotprod")
|
||||
build_llama_tier(t2 "armv8.6-a+dotprod+i8mm")
|
||||
build_llama_tier(t3 "armv9-a+dotprod+i8mm+sve+sve2")
|
||||
#build_llama_tier(t4 "armv9.2-a+dotprod+i8mm+sve+sve2+sme")
|
||||
|
||||
add_dependencies(llama_build_t1 llama_build_t0)
|
||||
add_dependencies(llama_build_t2 llama_build_t1)
|
||||
add_dependencies(llama_build_t3 llama_build_t2)
|
||||
#add_dependencies(llama_build_t4 llama_build_t3)
|
||||
|
||||
# --------------------------------------------------------------------------
|
||||
# 5. Default variant when Gradle hasn’t told us (keeps IDE happy)
|
||||
# --------------------------------------------------------------------------
|
||||
if(NOT CMAKE_BUILD_TYPE)
|
||||
set(CMAKE_BUILD_TYPE Release CACHE STRING "" FORCE)
|
||||
endif()
|
||||
|
|
|
|||
|
|
@ -72,10 +72,16 @@ static void log_callback(ggml_log_level level, const char *fmt, void *data) {
|
|||
|
||||
extern "C"
|
||||
JNIEXPORT void JNICALL
|
||||
Java_android_llama_cpp_internal_InferenceEngineImpl_init(JNIEnv *env, jobject /*unused*/) {
|
||||
Java_android_llama_cpp_internal_InferenceEngineImpl_init(JNIEnv *env, jobject /*unused*/, jstring nativeLibDir) {
|
||||
// Set llama log handler to Android
|
||||
llama_log_set(log_callback, nullptr);
|
||||
|
||||
// Loading all CPU backend variants
|
||||
const auto *path_to_backend = env->GetStringUTFChars(nativeLibDir, 0);
|
||||
LOGi("Loading backends from %s", path_to_backend);
|
||||
ggml_backend_load_all_from_path(path_to_backend);
|
||||
env->ReleaseStringUTFChars(nativeLibDir, path_to_backend);
|
||||
|
||||
// Initialize backends
|
||||
llama_backend_init();
|
||||
LOGi("Backend initiated; Log handler set.");
|
||||
|
|
@ -1,7 +1,7 @@
|
|||
package android.llama.cpp.internal
|
||||
|
||||
import android.content.Context
|
||||
import android.llama.cpp.InferenceEngine
|
||||
import android.llama.cpp.LLamaTier
|
||||
import android.llama.cpp.UnsupportedArchitectureException
|
||||
import android.util.Log
|
||||
import kotlinx.coroutines.CancellationException
|
||||
|
|
@ -40,7 +40,7 @@ import java.io.IOException
|
|||
* @see llama-android.cpp for the native implementation details
|
||||
*/
|
||||
internal class InferenceEngineImpl private constructor(
|
||||
private val tier: LLamaTier
|
||||
private val nativeLibDir: String
|
||||
) : InferenceEngine {
|
||||
|
||||
companion object {
|
||||
|
|
@ -49,22 +49,24 @@ internal class InferenceEngineImpl private constructor(
|
|||
private var initialized = false
|
||||
|
||||
/**
|
||||
* Create [InferenceEngineImpl] instance with specific tier
|
||||
* Create [InferenceEngineImpl] instance at runtime
|
||||
*
|
||||
* @throws IllegalArgumentException if tier's library name is invalid
|
||||
* @param Context for obtaining native library directory
|
||||
* @throws IllegalArgumentException if native library path is invalid
|
||||
* @throws UnsatisfiedLinkError if library failed to load
|
||||
*/
|
||||
internal fun createWithTier(tier: LLamaTier): InferenceEngineImpl {
|
||||
internal fun create(context: Context): InferenceEngineImpl {
|
||||
assert(!initialized) { "Inference Engine has already been initialized!" }
|
||||
|
||||
require(tier.libraryName.isNotBlank()) { "Unexpected library: ${tier.libraryName}" }
|
||||
val nativeLibDir = context.applicationInfo.nativeLibraryDir
|
||||
require(nativeLibDir.isNotBlank()) { "Expected native library" }
|
||||
|
||||
return try {
|
||||
Log.i(TAG, "Instantiating InferenceEngineImpl w/ ${tier.libraryName}")
|
||||
InferenceEngineImpl(tier).also { initialized = true }
|
||||
Log.i(TAG, "Instantiating InferenceEngineImpl,,,")
|
||||
InferenceEngineImpl(nativeLibDir).also { initialized = true }
|
||||
|
||||
} catch (e: UnsatisfiedLinkError) {
|
||||
Log.e(TAG, "Failed to load ${tier.libraryName}", e)
|
||||
Log.e(TAG, "Failed to load native library from $nativeLibDir", e)
|
||||
throw e
|
||||
}
|
||||
}
|
||||
|
|
@ -74,7 +76,7 @@ internal class InferenceEngineImpl private constructor(
|
|||
* JNI methods
|
||||
* @see llama-android.cpp
|
||||
*/
|
||||
private external fun init()
|
||||
private external fun init(nativeLibDir: String)
|
||||
private external fun load(modelPath: String): Int
|
||||
private external fun prepare(): Int
|
||||
|
||||
|
|
@ -108,10 +110,9 @@ internal class InferenceEngineImpl private constructor(
|
|||
"Cannot load native library in ${_state.value.javaClass.simpleName}!"
|
||||
}
|
||||
_state.value = InferenceEngine.State.Initializing
|
||||
Log.i(TAG, "Loading native library for $tier")
|
||||
|
||||
System.loadLibrary(tier.libraryName)
|
||||
init()
|
||||
Log.i(TAG, "Loading native library...")
|
||||
System.load(File(nativeLibDir, "libkleidi-llama.so").absolutePath)
|
||||
init(nativeLibDir)
|
||||
_state.value = InferenceEngine.State.Initialized
|
||||
Log.i(TAG, "Native library loaded! System info: \n${systemInfo()}")
|
||||
|
||||
|
|
|
|||
|
|
@ -54,19 +54,11 @@ internal object InferenceEngineLoader {
|
|||
_cachedInstance?.let { return it }
|
||||
|
||||
return runBlocking {
|
||||
// Obtain the optimal tier from cache if available
|
||||
val tier = obtainTier(context)
|
||||
if (tier == null || tier == LLamaTier.NONE) {
|
||||
Log.e(TAG, "Aborted instantiating Inference Engine due to invalid tier")
|
||||
return@runBlocking null
|
||||
}
|
||||
|
||||
try {
|
||||
// Create and cache the inference engine instance
|
||||
Log.i(TAG, "Using tier: ${tier.name} (${tier.description})")
|
||||
InferenceEngineImpl.createWithTier(tier).also {
|
||||
InferenceEngineImpl.create(context).also {
|
||||
_cachedInstance = it
|
||||
Log.i(TAG, "Successfully instantiated Inference Engine w/ ${tier.name}")
|
||||
Log.i(TAG, "Successfully instantiated Inference Engine")
|
||||
}
|
||||
|
||||
} catch (e: Exception) {
|
||||
|
|
|
|||
|
|
@ -355,6 +355,9 @@ if (GGML_CPU_ALL_VARIANTS)
|
|||
ggml_add_cpu_backend_variant(android_armv8.2_1 DOTPROD)
|
||||
ggml_add_cpu_backend_variant(android_armv8.2_2 DOTPROD FP16_VECTOR_ARITHMETIC)
|
||||
ggml_add_cpu_backend_variant(android_armv8.6_1 DOTPROD FP16_VECTOR_ARITHMETIC MATMUL_INT8)
|
||||
ggml_add_cpu_backend_variant(android_armv9.0_1 DOTPROD MATMUL_INT8 FP16_VECTOR_ARITHMETIC SVE2)
|
||||
ggml_add_cpu_backend_variant(android_armv9.2_1 DOTPROD MATMUL_INT8 FP16_VECTOR_ARITHMETIC SME)
|
||||
ggml_add_cpu_backend_variant(android_armv9.2_2 DOTPROD MATMUL_INT8 FP16_VECTOR_ARITHMETIC SVE SME)
|
||||
elseif (APPLE)
|
||||
ggml_add_cpu_backend_variant(apple_m1 DOTPROD)
|
||||
ggml_add_cpu_backend_variant(apple_m2_m3 DOTPROD MATMUL_INT8)
|
||||
|
|
|
|||
|
|
@ -212,8 +212,15 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
|
|||
set(FEAT_INPUT_FILE "/dev/null")
|
||||
endif()
|
||||
|
||||
# specify Android cross compile target
|
||||
if("${GGML_CPU_NAME}" MATCHES ".*android.*")
|
||||
set(ANDROID_TARGET_FLAG "--target=aarch64-linux-android${ANDROID_API_LEVEL}")
|
||||
else()
|
||||
set(ANDROID_TARGET_FLAG "")
|
||||
endif()
|
||||
|
||||
execute_process(
|
||||
COMMAND ${CMAKE_C_COMPILER} ${ARCH_FLAGS} -dM -E -
|
||||
COMMAND ${CMAKE_C_COMPILER} ${ARCH_FLAGS} ${ANDROID_TARGET_FLAG} -dM -E -
|
||||
INPUT_FILE ${FEAT_INPUT_FILE}
|
||||
OUTPUT_VARIABLE ARM_FEATURE
|
||||
RESULT_VARIABLE ARM_FEATURE_RESULT
|
||||
|
|
|
|||
Loading…
Reference in New Issue