core: implement cpu_detector native lib
This commit is contained in:
parent
bff98a68e6
commit
1b79db877d
|
|
@ -17,18 +17,32 @@ set(CMAKE_CXX_STANDARD 17 CACHE STRING "" FORCE)
|
||||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||||
|
|
||||||
# --------------------------------------------------------------------------
|
# --------------------------------------------------------------------------
|
||||||
# 1. Locate the root of the llama.cpp source tree
|
# 1 CPU feature detection library
|
||||||
|
# --------------------------------------------------------------------------
|
||||||
|
add_subdirectory(
|
||||||
|
${CMAKE_CURRENT_LIST_DIR}/../../../../../../include/cpu_features
|
||||||
|
${CMAKE_BINARY_DIR}/cpu_features_build)
|
||||||
|
|
||||||
|
# CPU feature detection library (lightweight, loads first)
|
||||||
|
add_library(llama_cpu_detector SHARED cpu_detector.cpp)
|
||||||
|
target_link_libraries(llama_cpu_detector
|
||||||
|
PRIVATE CpuFeatures::cpu_features
|
||||||
|
android
|
||||||
|
log)
|
||||||
|
|
||||||
|
# --------------------------------------------------------------------------
|
||||||
|
# 2. Locate the root of the llama.cpp source tree
|
||||||
# (six levels up from this CMakeLists.txt).
|
# (six levels up from this CMakeLists.txt).
|
||||||
# --------------------------------------------------------------------------
|
# --------------------------------------------------------------------------
|
||||||
set(LLAMA_SRC ${CMAKE_CURRENT_LIST_DIR}/../../../../../../)
|
set(LLAMA_SRC ${CMAKE_CURRENT_LIST_DIR}/../../../../../../)
|
||||||
|
|
||||||
# --------------------------------------------------------------------------
|
# --------------------------------------------------------------------------
|
||||||
# 2. Build helper – one invocation = one hardware tier
|
# 3. Build helper – one invocation = one hardware tier
|
||||||
# --------------------------------------------------------------------------
|
# --------------------------------------------------------------------------
|
||||||
include(ExternalProject)
|
include(ExternalProject)
|
||||||
|
|
||||||
function(build_llama_tier tier march)
|
function(build_llama_tier tier march)
|
||||||
# ---------- 2.1 configure & build core code in an external project -----
|
# ---------- 3.1 configure & build core code in an external project -----
|
||||||
set(build_dir ${CMAKE_BINARY_DIR}/llama_build_${tier})
|
set(build_dir ${CMAKE_BINARY_DIR}/llama_build_${tier})
|
||||||
|
|
||||||
# KleidiAI requires dotprod and i8mm
|
# KleidiAI requires dotprod and i8mm
|
||||||
|
|
@ -71,7 +85,7 @@ function(build_llama_tier tier march)
|
||||||
${build_dir}/libggml-cpu.a
|
${build_dir}/libggml-cpu.a
|
||||||
)
|
)
|
||||||
|
|
||||||
# ---------- 2.2 make the static libs produced above visible ------------
|
# ---------- 3.2 make the static libs produced above visible ------------
|
||||||
set(llama_a ${build_dir}/libllama.a)
|
set(llama_a ${build_dir}/libllama.a)
|
||||||
set(common_a ${build_dir}/libcommon.a)
|
set(common_a ${build_dir}/libcommon.a)
|
||||||
set(ggml_a ${build_dir}/libggml.a)
|
set(ggml_a ${build_dir}/libggml.a)
|
||||||
|
|
@ -103,7 +117,7 @@ function(build_llama_tier tier march)
|
||||||
IMPORTED_LOCATION ${ggml_cpu_a})
|
IMPORTED_LOCATION ${ggml_cpu_a})
|
||||||
add_dependencies(ggml_cpu_core_${tier} llama_build_${tier})
|
add_dependencies(ggml_cpu_core_${tier} llama_build_${tier})
|
||||||
|
|
||||||
# ---------- 2.3 JNI wrapper DSO ---------------------------------------
|
# ---------- 3.3 JNI wrapper DSO ---------------------------------------
|
||||||
add_library(llama_android_${tier} SHARED llama-android.cpp)
|
add_library(llama_android_${tier} SHARED llama-android.cpp)
|
||||||
|
|
||||||
target_compile_options(llama_android_${tier} PRIVATE "-march=${march}")
|
target_compile_options(llama_android_${tier} PRIVATE "-march=${march}")
|
||||||
|
|
@ -124,13 +138,13 @@ function(build_llama_tier tier march)
|
||||||
android
|
android
|
||||||
log)
|
log)
|
||||||
|
|
||||||
# ---------- 2.4 nice SONAME & filename -------------------------------
|
# ---------- 3.4 nice SONAME & filename -------------------------------
|
||||||
set_target_properties(llama_android_${tier} PROPERTIES
|
set_target_properties(llama_android_${tier} PROPERTIES
|
||||||
OUTPUT_NAME "llama_android_${tier}")
|
OUTPUT_NAME "llama_android_${tier}")
|
||||||
endfunction()
|
endfunction()
|
||||||
|
|
||||||
# --------------------------------------------------------------------------
|
# --------------------------------------------------------------------------
|
||||||
# 3. Build all five tiers
|
# 4. Build all five tiers
|
||||||
# --------------------------------------------------------------------------
|
# --------------------------------------------------------------------------
|
||||||
build_llama_tier(t0 "armv8-a+simd")
|
build_llama_tier(t0 "armv8-a+simd")
|
||||||
build_llama_tier(t1 "armv8.2-a+dotprod")
|
build_llama_tier(t1 "armv8.2-a+dotprod")
|
||||||
|
|
@ -144,7 +158,7 @@ add_dependencies(llama_build_t3 llama_build_t2)
|
||||||
#add_dependencies(llama_build_t4 llama_build_t3)
|
#add_dependencies(llama_build_t4 llama_build_t3)
|
||||||
|
|
||||||
# --------------------------------------------------------------------------
|
# --------------------------------------------------------------------------
|
||||||
# 4. Default variant when Gradle hasn’t told us (keeps IDE happy)
|
# 5. Default variant when Gradle hasn’t told us (keeps IDE happy)
|
||||||
# --------------------------------------------------------------------------
|
# --------------------------------------------------------------------------
|
||||||
if(NOT CMAKE_BUILD_TYPE)
|
if(NOT CMAKE_BUILD_TYPE)
|
||||||
set(CMAKE_BUILD_TYPE Release CACHE STRING "" FORCE)
|
set(CMAKE_BUILD_TYPE Release CACHE STRING "" FORCE)
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,62 @@
|
||||||
|
#include <jni.h>
|
||||||
|
#include <cpuinfo_aarch64.h>
|
||||||
|
#include <android/log.h>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
using namespace cpu_features;
|
||||||
|
|
||||||
|
#define LOG_TAG "CpuDetector"
|
||||||
|
#define LOGI(...) __android_log_print(ANDROID_LOG_INFO, LOG_TAG, __VA_ARGS__)
|
||||||
|
|
||||||
|
static const Aarch64Info info = GetAarch64Info();
|
||||||
|
static const Aarch64Features features = info.features;
|
||||||
|
|
||||||
|
extern "C" JNIEXPORT jint JNICALL
|
||||||
|
Java_android_llama_cpp_LLamaLibraryLoader_getOptimalTier(
|
||||||
|
JNIEnv* env,
|
||||||
|
jclass clazz) {
|
||||||
|
int tier = 0; // Default to T0 (baseline)
|
||||||
|
|
||||||
|
// Check features in reverse order (highest tier first)
|
||||||
|
// TODO-han.yin: implement T4 once obtaining an Android device with SME!
|
||||||
|
if (features.sve && features.sve2) {
|
||||||
|
tier = 3; // T3: ARMv9-a with SVE/SVE2
|
||||||
|
LOGI("Detected SVE/SVE2 support - selecting T3");
|
||||||
|
}
|
||||||
|
else if (features.i8mm) {
|
||||||
|
tier = 2; // T2: ARMv8.6-a with i8mm
|
||||||
|
LOGI("Detected i8mm support - selecting T2");
|
||||||
|
}
|
||||||
|
else if (features.asimddp) {
|
||||||
|
tier = 1; // T1: ARMv8.2-a with dotprod
|
||||||
|
LOGI("Detected dotprod support - selecting T1");
|
||||||
|
}
|
||||||
|
else if (features.asimd) {
|
||||||
|
tier = 0; // T0: baseline ARMv8-a with SIMD
|
||||||
|
LOGI("Detected basic ASIMD support - selecting T0");
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// Fallback - this shouldn't happen on arm64-v8a devices
|
||||||
|
tier = 0;
|
||||||
|
LOGI("No expected features detected - falling back to T0");
|
||||||
|
}
|
||||||
|
|
||||||
|
return tier;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Optional: Keep a feature string function for debugging
|
||||||
|
extern "C" JNIEXPORT jstring JNICALL
|
||||||
|
Java_android_llama_cpp_LLamaLibraryLoader_getCpuFeaturesString(
|
||||||
|
JNIEnv* env,
|
||||||
|
jclass clazz) {
|
||||||
|
std::string text;
|
||||||
|
|
||||||
|
if (features.asimd) text += "ASIMD ";
|
||||||
|
if (features.asimddp) text += "ASIMDDP ";
|
||||||
|
if (features.i8mm) text += "I8MM ";
|
||||||
|
if (features.sve) text += "SVE ";
|
||||||
|
if (features.sve2) text += "SVE2 ";
|
||||||
|
if (features.sme) text += "SME ";
|
||||||
|
|
||||||
|
return env->NewStringUTF(text.c_str());
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,170 @@
|
||||||
|
package android.llama.cpp
|
||||||
|
|
||||||
|
import android.content.Context
|
||||||
|
import android.content.SharedPreferences
|
||||||
|
import android.util.Log
|
||||||
|
import androidx.core.content.edit
|
||||||
|
|
||||||
|
enum class LLamaTier(val rawValue: Int, val libraryName: String, val description: String) {
|
||||||
|
T0(0, "llama_android_t0", "ARMv8-a baseline with SIMD"),
|
||||||
|
T1(1, "llama_android_t1", "ARMv8.2-a with DotProd"),
|
||||||
|
T2(2, "llama_android_t2", "ARMv8.6-a with DotProd + I8MM"),
|
||||||
|
T3(3, "llama_android_t3", "ARMv9-a with DotProd + I8MM + SVE/SVE2");
|
||||||
|
// TODO-han.yin: implement T4 once obtaining an Android device with SME!
|
||||||
|
|
||||||
|
companion object {
|
||||||
|
fun fromRawValue(value: Int): LLamaTier? {
|
||||||
|
return entries.find { it.rawValue == value }
|
||||||
|
}
|
||||||
|
|
||||||
|
fun getMaxSupportedTier(): LLamaTier = T3
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class LLamaLibraryLoader private constructor() {
|
||||||
|
|
||||||
|
companion object {
|
||||||
|
private val TAG = LLamaLibraryLoader::class.simpleName
|
||||||
|
|
||||||
|
private const val DETECTION_VERSION = 1
|
||||||
|
private const val PREFS_NAME = "llama_cpu_detection"
|
||||||
|
private const val KEY_DETECTED_TIER = "detected_tier"
|
||||||
|
private const val KEY_DETECTION_VERSION = "detection_version"
|
||||||
|
|
||||||
|
@JvmStatic
|
||||||
|
private external fun getOptimalTier(): Int
|
||||||
|
|
||||||
|
@JvmStatic
|
||||||
|
private external fun getCpuFeaturesString(): String
|
||||||
|
|
||||||
|
private var _cachedInstance: LLamaAndroid? = null
|
||||||
|
private var _detectedTier: LLamaTier? = null
|
||||||
|
val detectedTier: LLamaTier? get() = _detectedTier
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Factory method to get a configured LLamaAndroid instance.
|
||||||
|
* Handles tier detection, caching, and library loading automatically.
|
||||||
|
*/
|
||||||
|
@Synchronized
|
||||||
|
fun createInstance(context: Context): LLamaAndroid? {
|
||||||
|
// Return cached instance if available
|
||||||
|
_cachedInstance?.let { return it }
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Obtain the optimal tier from cache if available
|
||||||
|
val tier = getOrDetectOptimalTier(context) ?: run {
|
||||||
|
Log.e(TAG, "Failed to determine optimal tier")
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
_detectedTier = tier
|
||||||
|
Log.i(TAG, "Using tier: ${tier.name} (${tier.description})")
|
||||||
|
|
||||||
|
// Create and cache LLamaAndroid instance
|
||||||
|
val instance = LLamaAndroid.createWithTier(tier) ?: run {
|
||||||
|
Log.e(TAG, "Failed to instantiate LLamaAndroid")
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
_cachedInstance = instance
|
||||||
|
Log.i(TAG, "Successfully created LLamaAndroid instance with ${tier.name}")
|
||||||
|
|
||||||
|
return instance
|
||||||
|
|
||||||
|
} catch (e: Exception) {
|
||||||
|
Log.e(TAG, "Error creating LLamaAndroid instance", e)
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Clear cached detection results (for testing/debugging)
|
||||||
|
*/
|
||||||
|
fun clearCache(context: Context) {
|
||||||
|
getSharedPrefs(context).edit { clear() }
|
||||||
|
_cachedInstance = null
|
||||||
|
_detectedTier = null
|
||||||
|
Log.i(TAG, "Cleared detection results and cached instance")
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get optimal tier from cache or detect it fresh
|
||||||
|
*/
|
||||||
|
private fun getOrDetectOptimalTier(context: Context): LLamaTier? {
|
||||||
|
val prefs = getSharedPrefs(context)
|
||||||
|
|
||||||
|
// Check if we have a cached result with the current detection version
|
||||||
|
val cachedVersion = prefs.getInt(KEY_DETECTION_VERSION, -1)
|
||||||
|
val cachedTierValue = prefs.getInt(KEY_DETECTED_TIER, -1)
|
||||||
|
if (cachedVersion == DETECTION_VERSION && cachedTierValue >= 0) {
|
||||||
|
val cachedTier = LLamaTier.fromRawValue(cachedTierValue)
|
||||||
|
if (cachedTier != null) {
|
||||||
|
Log.i(TAG, "Using cached tier detection: ${cachedTier.name}")
|
||||||
|
return cachedTier
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// No valid cache, detect fresh
|
||||||
|
Log.i(TAG, "Performing fresh tier detection")
|
||||||
|
return detectAndCacheOptimalTier(context)
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Detect optimal tier and save to cache
|
||||||
|
*/
|
||||||
|
private fun detectAndCacheOptimalTier(context: Context): LLamaTier? {
|
||||||
|
try {
|
||||||
|
// Load CPU detection library
|
||||||
|
System.loadLibrary("llama_cpu_detector")
|
||||||
|
Log.i(TAG, "CPU feature detector loaded successfully")
|
||||||
|
|
||||||
|
// Detect optimal tier
|
||||||
|
val tierValue = getOptimalTier()
|
||||||
|
val features = getCpuFeaturesString()
|
||||||
|
Log.i(TAG, "Raw tier $tierValue w/ CPU features: $features")
|
||||||
|
|
||||||
|
// Convert to enum and validate
|
||||||
|
val tier = LLamaTier.fromRawValue(tierValue) ?: run {
|
||||||
|
Log.w(TAG, "Invalid tier value $tierValue")
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ensure we don't exceed maximum supported tier
|
||||||
|
val finalTier = if (tier.rawValue > LLamaTier.getMaxSupportedTier().rawValue) {
|
||||||
|
Log.w(TAG, "Detected tier ${tier.name} exceeds max supported, using ${LLamaTier.getMaxSupportedTier().name}")
|
||||||
|
LLamaTier.getMaxSupportedTier()
|
||||||
|
} else {
|
||||||
|
tier
|
||||||
|
}
|
||||||
|
|
||||||
|
// Cache the result
|
||||||
|
getSharedPrefs(context).edit {
|
||||||
|
putInt(KEY_DETECTED_TIER, finalTier.rawValue)
|
||||||
|
putInt(KEY_DETECTION_VERSION, DETECTION_VERSION)
|
||||||
|
}
|
||||||
|
|
||||||
|
Log.i(TAG, "Detected and cached optimal tier: ${finalTier.name}")
|
||||||
|
return finalTier
|
||||||
|
|
||||||
|
} catch (e: UnsatisfiedLinkError) {
|
||||||
|
Log.e(TAG, "Failed to load CPU detection library", e)
|
||||||
|
|
||||||
|
// Fallback to T0 and cache it
|
||||||
|
val fallbackTier = LLamaTier.T0
|
||||||
|
getSharedPrefs(context).edit {
|
||||||
|
putInt(KEY_DETECTED_TIER, fallbackTier.rawValue)
|
||||||
|
putInt(KEY_DETECTION_VERSION, DETECTION_VERSION)
|
||||||
|
}
|
||||||
|
|
||||||
|
Log.i(TAG, "Using fallback tier: ${fallbackTier.name}")
|
||||||
|
return fallbackTier
|
||||||
|
|
||||||
|
} catch (e: Exception) {
|
||||||
|
Log.e(TAG, "Unexpected error during tier detection", e)
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun getSharedPrefs(context: Context): SharedPreferences {
|
||||||
|
return context.getSharedPreferences(PREFS_NAME, Context.MODE_PRIVATE)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue