diff --git a/common/cmake/build-info-gen-cpp.cmake b/common/cmake/build-info-gen-cpp.cmake deleted file mode 100644 index d7ba2624d8..0000000000 --- a/common/cmake/build-info-gen-cpp.cmake +++ /dev/null @@ -1,28 +0,0 @@ -# Resolve the repository root no matter where this script is executed. -get_filename_component(LLAMA_ROOT "${CMAKE_CURRENT_LIST_DIR}/../.." ABSOLUTE) - -# Load the helper macros that fill @BUILD_*@ variables -include(${LLAMA_ROOT}/cmake/build-info.cmake) - -set(TEMPLATE_FILE "${LLAMA_ROOT}/common/build-info.cpp.in") -set(OUTPUT_FILE "${LLAMA_ROOT}/common/build-info.cpp") - -# Only write the build info if it changed -if(EXISTS ${OUTPUT_FILE}) - file(READ ${OUTPUT_FILE} CONTENTS) - string(REGEX MATCH "LLAMA_COMMIT = \"([^\"]*)\";" _ ${CONTENTS}) - set(OLD_COMMIT ${CMAKE_MATCH_1}) - string(REGEX MATCH "LLAMA_COMPILER = \"([^\"]*)\";" _ ${CONTENTS}) - set(OLD_COMPILER ${CMAKE_MATCH_1}) - string(REGEX MATCH "LLAMA_BUILD_TARGET = \"([^\"]*)\";" _ ${CONTENTS}) - set(OLD_TARGET ${CMAKE_MATCH_1}) - if ( - NOT OLD_COMMIT STREQUAL BUILD_COMMIT OR - NOT OLD_COMPILER STREQUAL BUILD_COMPILER OR - NOT OLD_TARGET STREQUAL BUILD_TARGET - ) - configure_file(${TEMPLATE_FILE} ${OUTPUT_FILE}) - endif() -else() - configure_file(${TEMPLATE_FILE} ${OUTPUT_FILE}) -endif() diff --git a/examples/llama.android/app/src/main/AndroidManifest.xml b/examples/llama.android/app/src/main/AndroidManifest.xml index 616cf384e5..40f2f1cbe6 100644 --- a/examples/llama.android/app/src/main/AndroidManifest.xml +++ b/examples/llama.android/app/src/main/AndroidManifest.xml @@ -6,6 +6,7 @@ GetStringUTFChars(nativeLibDir, 0); + LOGi("Loading backends from %s", path_to_backend); + ggml_backend_load_all_from_path(path_to_backend); + env->ReleaseStringUTFChars(nativeLibDir, path_to_backend); + // Initialize backends llama_backend_init(); LOGi("Backend initiated; Log handler set."); diff --git a/examples/llama.android/llama/src/main/java/android/llama/cpp/internal/InferenceEngineImpl.kt b/examples/llama.android/llama/src/main/java/android/llama/cpp/internal/InferenceEngineImpl.kt index 008ce9ebdd..114f7e4d19 100644 --- a/examples/llama.android/llama/src/main/java/android/llama/cpp/internal/InferenceEngineImpl.kt +++ b/examples/llama.android/llama/src/main/java/android/llama/cpp/internal/InferenceEngineImpl.kt @@ -1,7 +1,7 @@ package android.llama.cpp.internal +import android.content.Context import android.llama.cpp.InferenceEngine -import android.llama.cpp.LLamaTier import android.llama.cpp.UnsupportedArchitectureException import android.util.Log import kotlinx.coroutines.CancellationException @@ -40,7 +40,7 @@ import java.io.IOException * @see llama-android.cpp for the native implementation details */ internal class InferenceEngineImpl private constructor( - private val tier: LLamaTier + private val nativeLibDir: String ) : InferenceEngine { companion object { @@ -49,22 +49,24 @@ internal class InferenceEngineImpl private constructor( private var initialized = false /** - * Create [InferenceEngineImpl] instance with specific tier + * Create [InferenceEngineImpl] instance at runtime * - * @throws IllegalArgumentException if tier's library name is invalid + * @param Context for obtaining native library directory + * @throws IllegalArgumentException if native library path is invalid * @throws UnsatisfiedLinkError if library failed to load */ - internal fun createWithTier(tier: LLamaTier): InferenceEngineImpl { + internal fun create(context: Context): InferenceEngineImpl { assert(!initialized) { "Inference Engine has already been initialized!" } - require(tier.libraryName.isNotBlank()) { "Unexpected library: ${tier.libraryName}" } + val nativeLibDir = context.applicationInfo.nativeLibraryDir + require(nativeLibDir.isNotBlank()) { "Expected native library" } return try { - Log.i(TAG, "Instantiating InferenceEngineImpl w/ ${tier.libraryName}") - InferenceEngineImpl(tier).also { initialized = true } + Log.i(TAG, "Instantiating InferenceEngineImpl,,,") + InferenceEngineImpl(nativeLibDir).also { initialized = true } } catch (e: UnsatisfiedLinkError) { - Log.e(TAG, "Failed to load ${tier.libraryName}", e) + Log.e(TAG, "Failed to load native library from $nativeLibDir", e) throw e } } @@ -74,7 +76,7 @@ internal class InferenceEngineImpl private constructor( * JNI methods * @see llama-android.cpp */ - private external fun init() + private external fun init(nativeLibDir: String) private external fun load(modelPath: String): Int private external fun prepare(): Int @@ -108,10 +110,9 @@ internal class InferenceEngineImpl private constructor( "Cannot load native library in ${_state.value.javaClass.simpleName}!" } _state.value = InferenceEngine.State.Initializing - Log.i(TAG, "Loading native library for $tier") - - System.loadLibrary(tier.libraryName) - init() + Log.i(TAG, "Loading native library...") + System.load(File(nativeLibDir, "libkleidi-llama.so").absolutePath) + init(nativeLibDir) _state.value = InferenceEngine.State.Initialized Log.i(TAG, "Native library loaded! System info: \n${systemInfo()}") diff --git a/examples/llama.android/llama/src/main/java/android/llama/cpp/internal/InferenceEngineLoader.kt b/examples/llama.android/llama/src/main/java/android/llama/cpp/internal/InferenceEngineLoader.kt index e01edf2e5f..e3c2174e82 100644 --- a/examples/llama.android/llama/src/main/java/android/llama/cpp/internal/InferenceEngineLoader.kt +++ b/examples/llama.android/llama/src/main/java/android/llama/cpp/internal/InferenceEngineLoader.kt @@ -54,19 +54,11 @@ internal object InferenceEngineLoader { _cachedInstance?.let { return it } return runBlocking { - // Obtain the optimal tier from cache if available - val tier = obtainTier(context) - if (tier == null || tier == LLamaTier.NONE) { - Log.e(TAG, "Aborted instantiating Inference Engine due to invalid tier") - return@runBlocking null - } - try { // Create and cache the inference engine instance - Log.i(TAG, "Using tier: ${tier.name} (${tier.description})") - InferenceEngineImpl.createWithTier(tier).also { + InferenceEngineImpl.create(context).also { _cachedInstance = it - Log.i(TAG, "Successfully instantiated Inference Engine w/ ${tier.name}") + Log.i(TAG, "Successfully instantiated Inference Engine") } } catch (e: Exception) { diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt index ba281b8e6d..2a18847725 100644 --- a/ggml/src/CMakeLists.txt +++ b/ggml/src/CMakeLists.txt @@ -355,6 +355,9 @@ if (GGML_CPU_ALL_VARIANTS) ggml_add_cpu_backend_variant(android_armv8.2_1 DOTPROD) ggml_add_cpu_backend_variant(android_armv8.2_2 DOTPROD FP16_VECTOR_ARITHMETIC) ggml_add_cpu_backend_variant(android_armv8.6_1 DOTPROD FP16_VECTOR_ARITHMETIC MATMUL_INT8) + ggml_add_cpu_backend_variant(android_armv9.0_1 DOTPROD MATMUL_INT8 FP16_VECTOR_ARITHMETIC SVE2) + ggml_add_cpu_backend_variant(android_armv9.2_1 DOTPROD MATMUL_INT8 FP16_VECTOR_ARITHMETIC SME) + ggml_add_cpu_backend_variant(android_armv9.2_2 DOTPROD MATMUL_INT8 FP16_VECTOR_ARITHMETIC SVE SME) elseif (APPLE) ggml_add_cpu_backend_variant(apple_m1 DOTPROD) ggml_add_cpu_backend_variant(apple_m2_m3 DOTPROD MATMUL_INT8) diff --git a/ggml/src/ggml-cpu/CMakeLists.txt b/ggml/src/ggml-cpu/CMakeLists.txt index 34323afa07..5fe1d3a29e 100644 --- a/ggml/src/ggml-cpu/CMakeLists.txt +++ b/ggml/src/ggml-cpu/CMakeLists.txt @@ -212,8 +212,15 @@ function(ggml_add_cpu_backend_variant_impl tag_name) set(FEAT_INPUT_FILE "/dev/null") endif() + # specify Android cross compile target + if("${GGML_CPU_NAME}" MATCHES ".*android.*") + set(ANDROID_TARGET_FLAG "--target=aarch64-linux-android${ANDROID_API_LEVEL}") + else() + set(ANDROID_TARGET_FLAG "") + endif() + execute_process( - COMMAND ${CMAKE_C_COMPILER} ${ARCH_FLAGS} -dM -E - + COMMAND ${CMAKE_C_COMPILER} ${ARCH_FLAGS} ${ANDROID_TARGET_FLAG} -dM -E - INPUT_FILE ${FEAT_INPUT_FILE} OUTPUT_VARIABLE ARM_FEATURE RESULT_VARIABLE ARM_FEATURE_RESULT