misc: rename LlamaAndroid related class to InferenceEngine prefixes

This commit is contained in:
Han Yin 2025-06-26 13:21:17 -07:00
parent 72822f0236
commit 4b3f6ef8d7
8 changed files with 41 additions and 38 deletions

View File

@ -13,14 +13,14 @@
android:label="@string/app_name" android:label="@string/app_name"
android:roundIcon="@mipmap/ic_launcher_round" android:roundIcon="@mipmap/ic_launcher_round"
android:supportsRtl="true" android:supportsRtl="true"
android:theme="@style/Theme.LlamaAndroid" android:theme="@style/Theme.KleidiLlama"
> >
<activity <activity
android:name=".MainActivity" android:name=".MainActivity"
android:exported="true" android:exported="true"
android:screenOrientation="portrait" android:screenOrientation="portrait"
android:theme="@style/Theme.LlamaAndroid"> android:theme="@style/Theme.KleidiLlama">
<intent-filter> <intent-filter>
<action android:name="android.intent.action.MAIN" /> <action android:name="android.intent.action.MAIN" />

View File

@ -2,7 +2,7 @@ package com.example.llama.di
import android.content.Context import android.content.Context
import android.llama.cpp.InferenceEngine import android.llama.cpp.InferenceEngine
import android.llama.cpp.LLamaLibraryLoader import android.llama.cpp.InferenceEngineLoader
import com.example.llama.data.local.AppDatabase import com.example.llama.data.local.AppDatabase
import com.example.llama.data.remote.HuggingFaceApiService import com.example.llama.data.remote.HuggingFaceApiService
import com.example.llama.data.remote.HuggingFaceRemoteDataSource import com.example.llama.data.remote.HuggingFaceRemoteDataSource
@ -58,14 +58,15 @@ internal abstract class AppModule {
): HuggingFaceRemoteDataSource ): HuggingFaceRemoteDataSource
companion object { companion object {
private const val USE_REAL_ENGINE = true private const val USE_STUB_ENGINE = false
@Provides @Provides
fun provideInferenceEngine(@ApplicationContext context: Context): InferenceEngine { fun provideInferenceEngine(@ApplicationContext context: Context): InferenceEngine {
return if (USE_REAL_ENGINE) { return if (USE_STUB_ENGINE) {
LLamaLibraryLoader.createInstance(context) ?: throw InstantiationException("Cannot instantiate LlamaAndroid!")
} else {
StubInferenceEngine() StubInferenceEngine()
} else {
InferenceEngineLoader.createInstance(context)
?: throw InstantiationException("Cannot instantiate InferenceEngine!")
} }
} }

View File

@ -1,3 +1,3 @@
<resources> <resources>
<string name="app_name">LlamaAndroid</string> <string name="app_name">Kleidi Llama</string>
</resources> </resources>

View File

@ -1,5 +1,5 @@
<?xml version="1.0" encoding="utf-8"?> <?xml version="1.0" encoding="utf-8"?>
<resources> <resources>
<style name="Theme.LlamaAndroid" parent="android:Theme.Material.Light.NoActionBar" /> <style name="Theme.KleidiLlama" parent="android:Theme.Material.Light.NoActionBar" />
</resources> </resources>

View File

@ -12,7 +12,7 @@ static const Aarch64Info info = GetAarch64Info();
static const Aarch64Features features = info.features; static const Aarch64Features features = info.features;
extern "C" JNIEXPORT jint JNICALL extern "C" JNIEXPORT jint JNICALL
Java_android_llama_cpp_LLamaLibraryLoader_getOptimalTier( Java_android_llama_cpp_InferenceEngineLoader_getOptimalTier(
JNIEnv* env, JNIEnv* env,
jclass clazz) { jclass clazz) {
int tier = 0; // Default to T0 (baseline) int tier = 0; // Default to T0 (baseline)
@ -46,7 +46,7 @@ Java_android_llama_cpp_LLamaLibraryLoader_getOptimalTier(
// Optional: Keep a feature string function for debugging // Optional: Keep a feature string function for debugging
extern "C" JNIEXPORT jstring JNICALL extern "C" JNIEXPORT jstring JNICALL
Java_android_llama_cpp_LLamaLibraryLoader_getCpuFeaturesString( Java_android_llama_cpp_InferenceEngineLoader_getCpuFeaturesString(
JNIEnv* env, JNIEnv* env,
jclass clazz) { jclass clazz) {
std::string text; std::string text;

View File

@ -72,7 +72,7 @@ static void log_callback(ggml_log_level level, const char *fmt, void *data) {
extern "C" extern "C"
JNIEXPORT void JNICALL JNIEXPORT void JNICALL
Java_android_llama_cpp_LLamaAndroid_init(JNIEnv *env, jobject /*unused*/) { Java_android_llama_cpp_InferenceEngineImpl_init(JNIEnv *env, jobject /*unused*/) {
// Set llama log handler to Android // Set llama log handler to Android
llama_log_set(log_callback, nullptr); llama_log_set(log_callback, nullptr);
@ -83,7 +83,7 @@ Java_android_llama_cpp_LLamaAndroid_init(JNIEnv *env, jobject /*unused*/) {
extern "C" extern "C"
JNIEXPORT jint JNICALL JNIEXPORT jint JNICALL
Java_android_llama_cpp_LLamaAndroid_load(JNIEnv *env, jobject, jstring jmodel_path) { Java_android_llama_cpp_InferenceEngineImpl_load(JNIEnv *env, jobject, jstring jmodel_path) {
llama_model_params model_params = llama_model_default_params(); llama_model_params model_params = llama_model_default_params();
const auto *model_path = env->GetStringUTFChars(jmodel_path, 0); const auto *model_path = env->GetStringUTFChars(jmodel_path, 0);
@ -137,7 +137,7 @@ static common_sampler *new_sampler(float temp) {
extern "C" extern "C"
JNIEXPORT jint JNICALL JNIEXPORT jint JNICALL
Java_android_llama_cpp_LLamaAndroid_prepare(JNIEnv * /*env*/, jobject /*unused*/) { Java_android_llama_cpp_InferenceEngineImpl_prepare(JNIEnv * /*env*/, jobject /*unused*/) {
auto *context = init_context(g_model); auto *context = init_context(g_model);
if (!context) { return 1; } if (!context) { return 1; }
g_context = context; g_context = context;
@ -161,13 +161,13 @@ static std::string get_backend() {
extern "C" extern "C"
JNIEXPORT jstring JNICALL JNIEXPORT jstring JNICALL
Java_android_llama_cpp_LLamaAndroid_systemInfo(JNIEnv *env, jobject /*unused*/) { Java_android_llama_cpp_InferenceEngineImpl_systemInfo(JNIEnv *env, jobject /*unused*/) {
return env->NewStringUTF(llama_print_system_info()); return env->NewStringUTF(llama_print_system_info());
} }
extern "C" extern "C"
JNIEXPORT jstring JNICALL JNIEXPORT jstring JNICALL
Java_android_llama_cpp_LLamaAndroid_benchModel(JNIEnv *env, jobject /*unused*/, jint pp, jint tg, Java_android_llama_cpp_InferenceEngineImpl_benchModel(JNIEnv *env, jobject /*unused*/, jint pp, jint tg,
jint pl, jint nr) { jint pl, jint nr) {
auto *context = init_context(g_model, pp); auto *context = init_context(g_model, pp);
if (!context) { if (!context) {
@ -377,7 +377,7 @@ static int decode_tokens_in_batches(
extern "C" extern "C"
JNIEXPORT jint JNICALL JNIEXPORT jint JNICALL
Java_android_llama_cpp_LLamaAndroid_processSystemPrompt( Java_android_llama_cpp_InferenceEngineImpl_processSystemPrompt(
JNIEnv *env, JNIEnv *env,
jobject /*unused*/, jobject /*unused*/,
jstring jsystem_prompt jstring jsystem_prompt
@ -426,7 +426,7 @@ Java_android_llama_cpp_LLamaAndroid_processSystemPrompt(
extern "C" extern "C"
JNIEXPORT jint JNICALL JNIEXPORT jint JNICALL
Java_android_llama_cpp_LLamaAndroid_processUserPrompt( Java_android_llama_cpp_InferenceEngineImpl_processUserPrompt(
JNIEnv *env, JNIEnv *env,
jobject /*unused*/, jobject /*unused*/,
jstring juser_prompt, jstring juser_prompt,
@ -510,7 +510,7 @@ static bool is_valid_utf8(const char *string) {
extern "C" extern "C"
JNIEXPORT jstring JNICALL JNIEXPORT jstring JNICALL
Java_android_llama_cpp_LLamaAndroid_generateNextToken( Java_android_llama_cpp_InferenceEngineImpl_generateNextToken(
JNIEnv *env, JNIEnv *env,
jobject /*unused*/ jobject /*unused*/
) { ) {
@ -570,7 +570,7 @@ Java_android_llama_cpp_LLamaAndroid_generateNextToken(
extern "C" extern "C"
JNIEXPORT void JNICALL JNIEXPORT void JNICALL
Java_android_llama_cpp_LLamaAndroid_unload(JNIEnv * /*unused*/, jobject /*unused*/) { Java_android_llama_cpp_InferenceEngineImpl_unload(JNIEnv * /*unused*/, jobject /*unused*/) {
// Reset long-term & short-term states // Reset long-term & short-term states
reset_long_term_states(); reset_long_term_states();
reset_short_term_states(); reset_short_term_states();
@ -585,6 +585,6 @@ Java_android_llama_cpp_LLamaAndroid_unload(JNIEnv * /*unused*/, jobject /*unused
extern "C" extern "C"
JNIEXPORT void JNICALL JNIEXPORT void JNICALL
Java_android_llama_cpp_LLamaAndroid_shutdown(JNIEnv *env, jobject /*unused*/) { Java_android_llama_cpp_InferenceEngineImpl_shutdown(JNIEnv *env, jobject /*unused*/) {
llama_backend_free(); llama_backend_free();
} }

View File

@ -36,25 +36,27 @@ import java.io.File
* *
* @see llama-android.cpp for the native implementation details * @see llama-android.cpp for the native implementation details
*/ */
class LLamaAndroid private constructor(private val tier: LLamaTier) : InferenceEngine { internal class InferenceEngineImpl private constructor(
private val tier: LLamaTier
) : InferenceEngine {
companion object { companion object {
private val TAG = LLamaAndroid::class.java.simpleName private val TAG = InferenceEngineImpl::class.java.simpleName
private var initialized = false private var initialized = false
/** /**
* Create LLamaAndroid instance with specific tier * Create [InferenceEngineImpl] instance with specific tier
*/ */
internal fun createWithTier(tier: LLamaTier): LLamaAndroid? { internal fun createWithTier(tier: LLamaTier): InferenceEngineImpl? {
if (initialized) { if (initialized) {
Log.w(TAG, "LLamaAndroid already initialized") Log.w(TAG, "LLamaAndroid already initialized")
return null return null
} }
try { try {
Log.i(TAG, "Instantiating LLamaAndroid w/ ${tier.libraryName}") Log.i(TAG, "Instantiating InferenceEngineImpl w/ ${tier.libraryName}")
val instance = LLamaAndroid(tier) val instance = InferenceEngineImpl(tier)
initialized = true initialized = true
return instance return instance

View File

@ -21,10 +21,10 @@ enum class LLamaTier(val rawValue: Int, val libraryName: String, val description
} }
} }
class LLamaLibraryLoader private constructor() { class InferenceEngineLoader private constructor() {
companion object { companion object {
private val TAG = LLamaLibraryLoader::class.simpleName private val TAG = InferenceEngineLoader::class.simpleName
private const val DETECTION_VERSION = 1 private const val DETECTION_VERSION = 1
private const val PREFS_NAME = "llama_cpu_detection" private const val PREFS_NAME = "llama_cpu_detection"
@ -37,16 +37,16 @@ class LLamaLibraryLoader private constructor() {
@JvmStatic @JvmStatic
private external fun getCpuFeaturesString(): String private external fun getCpuFeaturesString(): String
private var _cachedInstance: LLamaAndroid? = null private var _cachedInstance: InferenceEngineImpl? = null
private var _detectedTier: LLamaTier? = null private var _detectedTier: LLamaTier? = null
val detectedTier: LLamaTier? get() = _detectedTier val detectedTier: LLamaTier? get() = _detectedTier
/** /**
* Factory method to get a configured LLamaAndroid instance. * Factory method to get a configured [InferenceEngineImpl] instance.
* Handles tier detection, caching, and library loading automatically. * Handles tier detection, caching, and library loading automatically.
*/ */
@Synchronized @Synchronized
fun createInstance(context: Context): LLamaAndroid? { fun createInstance(context: Context): InferenceEngine? {
// Return cached instance if available // Return cached instance if available
_cachedInstance?.let { return it } _cachedInstance?.let { return it }
@ -59,18 +59,18 @@ class LLamaLibraryLoader private constructor() {
_detectedTier = tier _detectedTier = tier
Log.i(TAG, "Using tier: ${tier.name} (${tier.description})") Log.i(TAG, "Using tier: ${tier.name} (${tier.description})")
// Create and cache LLamaAndroid instance // Create and cache the inference engine instance
val instance = LLamaAndroid.createWithTier(tier) ?: run { val instance = InferenceEngineImpl.createWithTier(tier) ?: run {
Log.e(TAG, "Failed to instantiate LLamaAndroid") Log.e(TAG, "Failed to instantiate InferenceEngineImpl")
return null return null
} }
_cachedInstance = instance _cachedInstance = instance
Log.i(TAG, "Successfully created LLamaAndroid instance with ${tier.name}") Log.i(TAG, "Successfully created InferenceEngineImpl instance with ${tier.name}")
return instance return instance
} catch (e: Exception) { } catch (e: Exception) {
Log.e(TAG, "Error creating LLamaAndroid instance", e) Log.e(TAG, "Error creating InferenceEngineImpl instance", e)
return null return null
} }
} }