core: further improve the performance on native methods

This commit is contained in:
Han Yin 2025-09-11 10:30:10 -07:00
parent d5220549b6
commit 2223c54cc6
2 changed files with 21 additions and 4 deletions

View File

@ -23,7 +23,7 @@ static std::string join(const std::vector<T> &values, const std::string &delim)
/** /**
* Logging utils * Logging utils
*/ */
#define TAG "llama-android.cpp" #define TAG "kleidi-llama"
#define LOGv(...) __android_log_print(ANDROID_LOG_VERBOSE, TAG, __VA_ARGS__) #define LOGv(...) __android_log_print(ANDROID_LOG_VERBOSE, TAG, __VA_ARGS__)
#define LOGd(...) __android_log_print(ANDROID_LOG_DEBUG, TAG, __VA_ARGS__) #define LOGd(...) __android_log_print(ANDROID_LOG_DEBUG, TAG, __VA_ARGS__)
#define LOGi(...) __android_log_print(ANDROID_LOG_INFO, TAG, __VA_ARGS__) #define LOGi(...) __android_log_print(ANDROID_LOG_INFO, TAG, __VA_ARGS__)
@ -177,7 +177,7 @@ Java_android_llama_cpp_internal_InferenceEngineImpl_benchModel(JNIEnv *env, jobj
jint pl, jint nr) { jint pl, jint nr) {
auto *context = init_context(g_model, pp); auto *context = init_context(g_model, pp);
if (!context) { if (!context) {
const auto err_msg = "Fail to init_context! Bench aborted."; const auto *const err_msg = "Fail to init_context! Bench aborted.";
LOGe(err_msg); LOGe(err_msg);
return env->NewStringUTF(err_msg); return env->NewStringUTF(err_msg);
} }

View File

@ -4,6 +4,7 @@ import android.content.Context
import android.llama.cpp.InferenceEngine import android.llama.cpp.InferenceEngine
import android.llama.cpp.UnsupportedArchitectureException import android.llama.cpp.UnsupportedArchitectureException
import android.util.Log import android.util.Log
import dalvik.annotation.optimization.FastNative
import kotlinx.coroutines.CancellationException import kotlinx.coroutines.CancellationException
import kotlinx.coroutines.CoroutineScope import kotlinx.coroutines.CoroutineScope
import kotlinx.coroutines.Dispatchers import kotlinx.coroutines.Dispatchers
@ -37,7 +38,7 @@ import java.io.IOException
* *
* State transitions are managed automatically and validated at each operation. * State transitions are managed automatically and validated at each operation.
* *
* @see llama-android.cpp for the native implementation details * @see kleidi-llama.cpp for the native implementation details
*/ */
internal class InferenceEngineImpl private constructor( internal class InferenceEngineImpl private constructor(
private val nativeLibDir: String private val nativeLibDir: String
@ -74,20 +75,36 @@ internal class InferenceEngineImpl private constructor(
/** /**
* JNI methods * JNI methods
* @see llama-android.cpp * @see kleidi-llama.cpp
*/ */
@FastNative
private external fun init(nativeLibDir: String) private external fun init(nativeLibDir: String)
@FastNative
private external fun load(modelPath: String): Int private external fun load(modelPath: String): Int
@FastNative
private external fun prepare(): Int private external fun prepare(): Int
@FastNative
private external fun systemInfo(): String private external fun systemInfo(): String
@FastNative
private external fun benchModel(pp: Int, tg: Int, pl: Int, nr: Int): String private external fun benchModel(pp: Int, tg: Int, pl: Int, nr: Int): String
@FastNative
private external fun processSystemPrompt(systemPrompt: String): Int private external fun processSystemPrompt(systemPrompt: String): Int
@FastNative
private external fun processUserPrompt(userPrompt: String, predictLength: Int): Int private external fun processUserPrompt(userPrompt: String, predictLength: Int): Int
@FastNative
private external fun generateNextToken(): String? private external fun generateNextToken(): String?
@FastNative
private external fun unload() private external fun unload()
@FastNative
private external fun shutdown() private external fun shutdown()
private val _state = private val _state =