misc: rename LlamaAndroid related class to InferenceEngine prefixes

This commit is contained in:
Han Yin 2025-06-26 13:21:17 -07:00
parent 72822f0236
commit 4b3f6ef8d7
8 changed files with 41 additions and 38 deletions

View File

@ -13,14 +13,14 @@
android:label="@string/app_name"
android:roundIcon="@mipmap/ic_launcher_round"
android:supportsRtl="true"
android:theme="@style/Theme.LlamaAndroid"
android:theme="@style/Theme.KleidiLlama"
>
<activity
android:name=".MainActivity"
android:exported="true"
android:screenOrientation="portrait"
android:theme="@style/Theme.LlamaAndroid">
android:theme="@style/Theme.KleidiLlama">
<intent-filter>
<action android:name="android.intent.action.MAIN" />

View File

@ -2,7 +2,7 @@ package com.example.llama.di
import android.content.Context
import android.llama.cpp.InferenceEngine
import android.llama.cpp.LLamaLibraryLoader
import android.llama.cpp.InferenceEngineLoader
import com.example.llama.data.local.AppDatabase
import com.example.llama.data.remote.HuggingFaceApiService
import com.example.llama.data.remote.HuggingFaceRemoteDataSource
@ -58,14 +58,15 @@ internal abstract class AppModule {
): HuggingFaceRemoteDataSource
companion object {
private const val USE_REAL_ENGINE = true
private const val USE_STUB_ENGINE = false
@Provides
fun provideInferenceEngine(@ApplicationContext context: Context): InferenceEngine {
return if (USE_REAL_ENGINE) {
LLamaLibraryLoader.createInstance(context) ?: throw InstantiationException("Cannot instantiate LlamaAndroid!")
} else {
return if (USE_STUB_ENGINE) {
StubInferenceEngine()
} else {
InferenceEngineLoader.createInstance(context)
?: throw InstantiationException("Cannot instantiate InferenceEngine!")
}
}

View File

@ -1,3 +1,3 @@
<resources>
<string name="app_name">LlamaAndroid</string>
<string name="app_name">Kleidi Llama</string>
</resources>

View File

@ -1,5 +1,5 @@
<?xml version="1.0" encoding="utf-8"?>
<resources>
<style name="Theme.LlamaAndroid" parent="android:Theme.Material.Light.NoActionBar" />
<style name="Theme.KleidiLlama" parent="android:Theme.Material.Light.NoActionBar" />
</resources>

View File

@ -12,7 +12,7 @@ static const Aarch64Info info = GetAarch64Info();
static const Aarch64Features features = info.features;
extern "C" JNIEXPORT jint JNICALL
Java_android_llama_cpp_LLamaLibraryLoader_getOptimalTier(
Java_android_llama_cpp_InferenceEngineLoader_getOptimalTier(
JNIEnv* env,
jclass clazz) {
int tier = 0; // Default to T0 (baseline)
@ -46,7 +46,7 @@ Java_android_llama_cpp_LLamaLibraryLoader_getOptimalTier(
// Optional: Keep a feature string function for debugging
extern "C" JNIEXPORT jstring JNICALL
Java_android_llama_cpp_LLamaLibraryLoader_getCpuFeaturesString(
Java_android_llama_cpp_InferenceEngineLoader_getCpuFeaturesString(
JNIEnv* env,
jclass clazz) {
std::string text;

View File

@ -72,7 +72,7 @@ static void log_callback(ggml_log_level level, const char *fmt, void *data) {
extern "C"
JNIEXPORT void JNICALL
Java_android_llama_cpp_LLamaAndroid_init(JNIEnv *env, jobject /*unused*/) {
Java_android_llama_cpp_InferenceEngineImpl_init(JNIEnv *env, jobject /*unused*/) {
// Set llama log handler to Android
llama_log_set(log_callback, nullptr);
@ -83,7 +83,7 @@ Java_android_llama_cpp_LLamaAndroid_init(JNIEnv *env, jobject /*unused*/) {
extern "C"
JNIEXPORT jint JNICALL
Java_android_llama_cpp_LLamaAndroid_load(JNIEnv *env, jobject, jstring jmodel_path) {
Java_android_llama_cpp_InferenceEngineImpl_load(JNIEnv *env, jobject, jstring jmodel_path) {
llama_model_params model_params = llama_model_default_params();
const auto *model_path = env->GetStringUTFChars(jmodel_path, 0);
@ -137,7 +137,7 @@ static common_sampler *new_sampler(float temp) {
extern "C"
JNIEXPORT jint JNICALL
Java_android_llama_cpp_LLamaAndroid_prepare(JNIEnv * /*env*/, jobject /*unused*/) {
Java_android_llama_cpp_InferenceEngineImpl_prepare(JNIEnv * /*env*/, jobject /*unused*/) {
auto *context = init_context(g_model);
if (!context) { return 1; }
g_context = context;
@ -161,14 +161,14 @@ static std::string get_backend() {
extern "C"
JNIEXPORT jstring JNICALL
Java_android_llama_cpp_LLamaAndroid_systemInfo(JNIEnv *env, jobject /*unused*/) {
Java_android_llama_cpp_InferenceEngineImpl_systemInfo(JNIEnv *env, jobject /*unused*/) {
return env->NewStringUTF(llama_print_system_info());
}
extern "C"
JNIEXPORT jstring JNICALL
Java_android_llama_cpp_LLamaAndroid_benchModel(JNIEnv *env, jobject /*unused*/, jint pp, jint tg,
jint pl, jint nr) {
Java_android_llama_cpp_InferenceEngineImpl_benchModel(JNIEnv *env, jobject /*unused*/, jint pp, jint tg,
jint pl, jint nr) {
auto *context = init_context(g_model, pp);
if (!context) {
const auto err_msg = "Fail to init_context! Bench aborted.";
@ -377,7 +377,7 @@ static int decode_tokens_in_batches(
extern "C"
JNIEXPORT jint JNICALL
Java_android_llama_cpp_LLamaAndroid_processSystemPrompt(
Java_android_llama_cpp_InferenceEngineImpl_processSystemPrompt(
JNIEnv *env,
jobject /*unused*/,
jstring jsystem_prompt
@ -426,7 +426,7 @@ Java_android_llama_cpp_LLamaAndroid_processSystemPrompt(
extern "C"
JNIEXPORT jint JNICALL
Java_android_llama_cpp_LLamaAndroid_processUserPrompt(
Java_android_llama_cpp_InferenceEngineImpl_processUserPrompt(
JNIEnv *env,
jobject /*unused*/,
jstring juser_prompt,
@ -510,7 +510,7 @@ static bool is_valid_utf8(const char *string) {
extern "C"
JNIEXPORT jstring JNICALL
Java_android_llama_cpp_LLamaAndroid_generateNextToken(
Java_android_llama_cpp_InferenceEngineImpl_generateNextToken(
JNIEnv *env,
jobject /*unused*/
) {
@ -570,7 +570,7 @@ Java_android_llama_cpp_LLamaAndroid_generateNextToken(
extern "C"
JNIEXPORT void JNICALL
Java_android_llama_cpp_LLamaAndroid_unload(JNIEnv * /*unused*/, jobject /*unused*/) {
Java_android_llama_cpp_InferenceEngineImpl_unload(JNIEnv * /*unused*/, jobject /*unused*/) {
// Reset long-term & short-term states
reset_long_term_states();
reset_short_term_states();
@ -585,6 +585,6 @@ Java_android_llama_cpp_LLamaAndroid_unload(JNIEnv * /*unused*/, jobject /*unused
extern "C"
JNIEXPORT void JNICALL
Java_android_llama_cpp_LLamaAndroid_shutdown(JNIEnv *env, jobject /*unused*/) {
Java_android_llama_cpp_InferenceEngineImpl_shutdown(JNIEnv *env, jobject /*unused*/) {
llama_backend_free();
}

View File

@ -36,25 +36,27 @@ import java.io.File
*
* @see llama-android.cpp for the native implementation details
*/
class LLamaAndroid private constructor(private val tier: LLamaTier) : InferenceEngine {
internal class InferenceEngineImpl private constructor(
private val tier: LLamaTier
) : InferenceEngine {
companion object {
private val TAG = LLamaAndroid::class.java.simpleName
private val TAG = InferenceEngineImpl::class.java.simpleName
private var initialized = false
/**
* Create LLamaAndroid instance with specific tier
* Create [InferenceEngineImpl] instance with specific tier
*/
internal fun createWithTier(tier: LLamaTier): LLamaAndroid? {
internal fun createWithTier(tier: LLamaTier): InferenceEngineImpl? {
if (initialized) {
Log.w(TAG, "LLamaAndroid already initialized")
return null
}
try {
Log.i(TAG, "Instantiating LLamaAndroid w/ ${tier.libraryName}")
val instance = LLamaAndroid(tier)
Log.i(TAG, "Instantiating InferenceEngineImpl w/ ${tier.libraryName}")
val instance = InferenceEngineImpl(tier)
initialized = true
return instance

View File

@ -21,10 +21,10 @@ enum class LLamaTier(val rawValue: Int, val libraryName: String, val description
}
}
class LLamaLibraryLoader private constructor() {
class InferenceEngineLoader private constructor() {
companion object {
private val TAG = LLamaLibraryLoader::class.simpleName
private val TAG = InferenceEngineLoader::class.simpleName
private const val DETECTION_VERSION = 1
private const val PREFS_NAME = "llama_cpu_detection"
@ -37,16 +37,16 @@ class LLamaLibraryLoader private constructor() {
@JvmStatic
private external fun getCpuFeaturesString(): String
private var _cachedInstance: LLamaAndroid? = null
private var _cachedInstance: InferenceEngineImpl? = null
private var _detectedTier: LLamaTier? = null
val detectedTier: LLamaTier? get() = _detectedTier
/**
* Factory method to get a configured LLamaAndroid instance.
* Factory method to get a configured [InferenceEngineImpl] instance.
* Handles tier detection, caching, and library loading automatically.
*/
@Synchronized
fun createInstance(context: Context): LLamaAndroid? {
fun createInstance(context: Context): InferenceEngine? {
// Return cached instance if available
_cachedInstance?.let { return it }
@ -59,18 +59,18 @@ class LLamaLibraryLoader private constructor() {
_detectedTier = tier
Log.i(TAG, "Using tier: ${tier.name} (${tier.description})")
// Create and cache LLamaAndroid instance
val instance = LLamaAndroid.createWithTier(tier) ?: run {
Log.e(TAG, "Failed to instantiate LLamaAndroid")
// Create and cache the inference engine instance
val instance = InferenceEngineImpl.createWithTier(tier) ?: run {
Log.e(TAG, "Failed to instantiate InferenceEngineImpl")
return null
}
_cachedInstance = instance
Log.i(TAG, "Successfully created LLamaAndroid instance with ${tier.name}")
Log.i(TAG, "Successfully created InferenceEngineImpl instance with ${tier.name}")
return instance
} catch (e: Exception) {
Log.e(TAG, "Error creating LLamaAndroid instance", e)
Log.e(TAG, "Error creating InferenceEngineImpl instance", e)
return null
}
}