diff --git a/common/cmake/build-info-gen-cpp.cmake b/common/cmake/build-info-gen-cpp.cmake
deleted file mode 100644
index d7ba2624d8..0000000000
--- a/common/cmake/build-info-gen-cpp.cmake
+++ /dev/null
@@ -1,28 +0,0 @@
-# Resolve the repository root no matter where this script is executed.
-get_filename_component(LLAMA_ROOT "${CMAKE_CURRENT_LIST_DIR}/../.." ABSOLUTE)
-
-# Load the helper macros that fill @BUILD_*@ variables
-include(${LLAMA_ROOT}/cmake/build-info.cmake)
-
-set(TEMPLATE_FILE "${LLAMA_ROOT}/common/build-info.cpp.in")
-set(OUTPUT_FILE   "${LLAMA_ROOT}/common/build-info.cpp")
-
-# Only write the build info if it changed
-if(EXISTS ${OUTPUT_FILE})
-    file(READ ${OUTPUT_FILE} CONTENTS)
-    string(REGEX MATCH "LLAMA_COMMIT = \"([^\"]*)\";" _ ${CONTENTS})
-    set(OLD_COMMIT ${CMAKE_MATCH_1})
-    string(REGEX MATCH "LLAMA_COMPILER = \"([^\"]*)\";" _ ${CONTENTS})
-    set(OLD_COMPILER ${CMAKE_MATCH_1})
-    string(REGEX MATCH "LLAMA_BUILD_TARGET = \"([^\"]*)\";" _ ${CONTENTS})
-    set(OLD_TARGET ${CMAKE_MATCH_1})
-    if (
-        NOT OLD_COMMIT   STREQUAL BUILD_COMMIT   OR
-        NOT OLD_COMPILER STREQUAL BUILD_COMPILER OR
-        NOT OLD_TARGET   STREQUAL BUILD_TARGET
-    )
-        configure_file(${TEMPLATE_FILE} ${OUTPUT_FILE})
-    endif()
-else()
-    configure_file(${TEMPLATE_FILE} ${OUTPUT_FILE})
-endif()
diff --git a/examples/llama.android/app/src/main/AndroidManifest.xml b/examples/llama.android/app/src/main/AndroidManifest.xml
index 616cf384e5..40f2f1cbe6 100644
--- a/examples/llama.android/app/src/main/AndroidManifest.xml
+++ b/examples/llama.android/app/src/main/AndroidManifest.xml
@@ -6,6 +6,7 @@
 
     <application
         android:name=".KleidiLlamaApplication"
+        android:extractNativeLibs="true"
         android:allowBackup="true"
         android:dataExtractionRules="@xml/data_extraction_rules"
         android:fullBackupContent="@xml/backup_rules"
diff --git a/examples/llama.android/llama/build.gradle.kts b/examples/llama.android/llama/build.gradle.kts
index e5aeb58ad9..03a1309c88 100644
--- a/examples/llama.android/llama/build.gradle.kts
+++ b/examples/llama.android/llama/build.gradle.kts
@@ -8,7 +8,7 @@ android {
     namespace = "android.llama.cpp"
     compileSdk = 36
 
-    ndkVersion = "29.0.13113456 rc1"
+    ndkVersion = "29.0.13113456"
 
     defaultConfig {
         minSdk = 33
@@ -24,6 +24,14 @@ android {
                 arguments += "-DCMAKE_BUILD_TYPE=Release"
                 arguments += "-DCMAKE_MESSAGE_LOG_LEVEL=DEBUG"
                 arguments += "-DCMAKE_VERBOSE_MAKEFILE=ON"
+
+                arguments += "-DGGML_SYSTEM_ARCH=ARM"  // Undocumented before 3.21
+                arguments += "-DGGML_NATIVE=OFF"
+
+                arguments += "-DGGML_BACKEND_DL=ON"
+                arguments += "-DGGML_CPU_ALL_VARIANTS=ON"
+
+                arguments += "-DGGML_OPENMP=ON"
             }
         }
         aarMetadata {
diff --git a/examples/llama.android/llama/src/main/cpp/CMakeLists.txt b/examples/llama.android/llama/src/main/cpp/CMakeLists.txt
index 20983187b7..ae45302cc1 100644
--- a/examples/llama.android/llama/src/main/cpp/CMakeLists.txt
+++ b/examples/llama.android/llama/src/main/cpp/CMakeLists.txt
@@ -1,23 +1,18 @@
-# ============================================================================
-#  Multi-tier Android build for llama.cpp
-#  --------------------------------------
-#  Produces five DSOs, each compiled with an increasingly aggressive
-#  -march string.  At runtime you pick the highest tier the device
-#  supports and call `System.loadLibrary("llama_android_tX")`.
-# ============================================================================
+cmake_minimum_required(VERSION 3.31.6)
 
-cmake_minimum_required(VERSION 3.22.1)
-project("llama_android" LANGUAGES C CXX)
+project("kleidi-llama" VERSION 1.0.0 LANGUAGES C CXX)
+
+set(CMAKE_C_STANDARD 11)
+set(CMAKE_C_STANDARD_REQUIRED true)
+
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD_REQUIRED true)
+
+set(CMAKE_C_FLAGS   "${CMAKE_C_FLAGS}"   CACHE STRING "" FORCE)
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}" CACHE STRING "" FORCE)
 
 # --------------------------------------------------------------------------
-# 0. Language / toolchain defaults
-# --------------------------------------------------------------------------
-set(CMAKE_C_STANDARD   11  CACHE STRING "" FORCE)
-set(CMAKE_CXX_STANDARD 17  CACHE STRING "" FORCE)
-set(CMAKE_POSITION_INDEPENDENT_CODE ON)
-
-# --------------------------------------------------------------------------
-# 1.a CPU feature detection library
+# 1. CPU feature detection library
 # --------------------------------------------------------------------------
 add_subdirectory(
         ${CMAKE_CURRENT_LIST_DIR}/../../../../../../include/cpu_features
@@ -29,141 +24,31 @@ target_link_libraries(llama_cpu_detector
         log)
 
 # --------------------------------------------------------------------------
-# 1.b Make the LLVM OpenMP runtime available
+# 2. Kleidi Llama library
 # --------------------------------------------------------------------------
-find_package(OpenMP REQUIRED)
 
-# --------------------------------------------------------------------------
-# 2. Locate the root of the llama.cpp source tree
-#    (six levels up from this CMakeLists.txt).
-# --------------------------------------------------------------------------
+set(LLAMA_BUILD_COMMON    ON  CACHE BOOL "" FORCE)
+set(LLAMA_CURL            OFF CACHE BOOL "" FORCE)
+set(GGML_LLAMAFILE        OFF CACHE BOOL "" FORCE)
+set(GGML_CPU_KLEIDIAI     ON  CACHE BOOL "" FORCE)
+set(GGML_OPENMP           ON  CACHE BOOL "" FORCE)
+
+
 set(LLAMA_SRC ${CMAKE_CURRENT_LIST_DIR}/../../../../../../)
+add_subdirectory(${LLAMA_SRC} build-llama)
 
-# --------------------------------------------------------------------------
-# 3. Build helper  –  one invocation = one hardware tier
-# --------------------------------------------------------------------------
-include(ExternalProject)
+add_library(${CMAKE_PROJECT_NAME} SHARED
+        kleidi-llama.cpp)
 
-function(build_llama_tier tier march)
-    # ---------- 3.1  configure & build core code in an external project -----
-    set(build_dir ${CMAKE_BINARY_DIR}/llama_build_${tier})
-
-    # KleidiAI requires dotprod and i8mm
-    if(${tier} STREQUAL "t0" OR ${tier} STREQUAL "t1")
-        set(kleidi OFF)
-    else()
-        set(kleidi ON)
-    endif()
-
-    ExternalProject_Add(llama_build_${tier}
-        SOURCE_DIR   ${LLAMA_SRC}
-        BINARY_DIR   ${build_dir}
-        # ---- pass Android cross-compile context straight through ----------
-        CMAKE_ARGS
-            -DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}
-            -DANDROID_ABI=${ANDROID_ABI}
-            -DANDROID_PLATFORM=${ANDROID_PLATFORM}
-            -DANDROID_STL=${ANDROID_STL}
-            -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
-            # ---- llama / ggml feature switches ----------------------------
-            -DGGML_CPU_KLEIDIAI=${kleidi}
-            -DGGML_LLAMAFILE=OFF
-            -DGGML_OPENMP=ON
-            -DLLAMA_BUILD_COMMON=ON
-            -DLLAMA_CURL=OFF
-            -DBUILD_SHARED_LIBS=OFF       # we want static libs to embed
-            # ---- tier-specific ISA flags ----------------------------------
-            -DCMAKE_C_FLAGS=-march=${march}
-            -DCMAKE_CXX_FLAGS=-march=${march}
-            # ---- put the .a files right in ${build_dir} for easy pick-up --
-            -DCMAKE_ARCHIVE_OUTPUT_DIRECTORY=${build_dir}
-
-        INSTALL_COMMAND ""                # nothing to install
-
-        BUILD_BYPRODUCTS
-            ${build_dir}/libllama.a
-            ${build_dir}/libcommon.a
-            ${build_dir}/libggml.a
-            ${build_dir}/libggml-base.a
-            ${build_dir}/libggml-cpu.a
-    )
-
-    # ---------- 3.2  make the static libs produced above visible ------------
-    set(llama_a   ${build_dir}/libllama.a)
-    set(common_a  ${build_dir}/libcommon.a)
-    set(ggml_a    ${build_dir}/libggml.a)
-    set(ggml_base_a ${build_dir}/libggml-base.a)
-    set(ggml_cpu_a  ${build_dir}/libggml-cpu.a)
-
-    add_library(llama_core_${tier}  STATIC IMPORTED GLOBAL)
-    set_target_properties(llama_core_${tier}  PROPERTIES
-        IMPORTED_LOCATION ${llama_a})
-    add_dependencies(llama_core_${tier} llama_build_${tier})
-
-    add_library(common_core_${tier} STATIC IMPORTED GLOBAL)
-    set_target_properties(common_core_${tier} PROPERTIES
-        IMPORTED_LOCATION ${common_a})
-    add_dependencies(common_core_${tier} llama_build_${tier})
-
-    add_library(ggml_core_${tier}   STATIC IMPORTED GLOBAL)
-    set_target_properties(ggml_core_${tier} PROPERTIES
-        IMPORTED_LOCATION ${ggml_a})
-    add_dependencies(ggml_core_${tier} llama_build_${tier})
-
-    add_library(ggml_base_core_${tier} STATIC IMPORTED GLOBAL)
-    set_target_properties(ggml_base_core_${tier} PROPERTIES
-        IMPORTED_LOCATION ${ggml_base_a})
-    add_dependencies(ggml_base_core_${tier} llama_build_${tier})
-
-    add_library(ggml_cpu_core_${tier}  STATIC IMPORTED GLOBAL)
-    set_target_properties(ggml_cpu_core_${tier} PROPERTIES
-        IMPORTED_LOCATION ${ggml_cpu_a})
-    add_dependencies(ggml_cpu_core_${tier} llama_build_${tier})
-
-    # ---------- 3.3  JNI wrapper DSO ---------------------------------------
-    add_library(llama_android_${tier} SHARED llama-android.cpp)
-
-    target_compile_options(llama_android_${tier} PRIVATE "-march=${march}")
-
-    target_include_directories(llama_android_${tier} PRIVATE
+target_include_directories(${CMAKE_PROJECT_NAME} PRIVATE
         ${LLAMA_SRC}
         ${LLAMA_SRC}/common
         ${LLAMA_SRC}/include
         ${LLAMA_SRC}/ggml/include
         ${LLAMA_SRC}/ggml/src)
 
-    target_link_libraries(llama_android_${tier} PRIVATE
-        llama_core_${tier}
-        common_core_${tier}
-        ggml_core_${tier}        # umbrella (brings in few weak deps)
-        ggml_cpu_core_${tier}    # back-end & scheduler
-        ggml_base_core_${tier}   # core math
-        OpenMP::OpenMP_CXX       # OpenMP
+target_link_libraries(${CMAKE_PROJECT_NAME}
+        llama
+        common
         android
         log)
-
-    # ---------- 3.4  nice SONAME & filename -------------------------------
-    set_target_properties(llama_android_${tier} PROPERTIES
-        OUTPUT_NAME "llama_android_${tier}")
-endfunction()
-
-# --------------------------------------------------------------------------
-# 4.  Build all five tiers
-# --------------------------------------------------------------------------
-build_llama_tier(t0 "armv8-a+simd")
-build_llama_tier(t1 "armv8.2-a+dotprod")
-build_llama_tier(t2 "armv8.6-a+dotprod+i8mm")
-build_llama_tier(t3 "armv9-a+dotprod+i8mm+sve+sve2")
-#build_llama_tier(t4 "armv9.2-a+dotprod+i8mm+sve+sve2+sme")
-
-add_dependencies(llama_build_t1 llama_build_t0)
-add_dependencies(llama_build_t2 llama_build_t1)
-add_dependencies(llama_build_t3 llama_build_t2)
-#add_dependencies(llama_build_t4 llama_build_t3)
-
-# --------------------------------------------------------------------------
-# 5.  Default variant when Gradle hasn’t told us (keeps IDE happy)
-# --------------------------------------------------------------------------
-if(NOT CMAKE_BUILD_TYPE)
-    set(CMAKE_BUILD_TYPE Release CACHE STRING "" FORCE)
-endif()
diff --git a/examples/llama.android/llama/src/main/cpp/llama-android.cpp b/examples/llama.android/llama/src/main/cpp/kleidi-llama.cpp
similarity index 98%
rename from examples/llama.android/llama/src/main/cpp/llama-android.cpp
rename to examples/llama.android/llama/src/main/cpp/kleidi-llama.cpp
index 3b8f526043..2f090da5c7 100644
--- a/examples/llama.android/llama/src/main/cpp/llama-android.cpp
+++ b/examples/llama.android/llama/src/main/cpp/kleidi-llama.cpp
@@ -72,10 +72,16 @@ static void log_callback(ggml_log_level level, const char *fmt, void *data) {
 
 extern "C"
 JNIEXPORT void JNICALL
-Java_android_llama_cpp_internal_InferenceEngineImpl_init(JNIEnv *env, jobject /*unused*/) {
+Java_android_llama_cpp_internal_InferenceEngineImpl_init(JNIEnv *env, jobject /*unused*/, jstring nativeLibDir) {
     // Set llama log handler to Android
     llama_log_set(log_callback, nullptr);
 
+    // Loading all CPU backend variants
+    const auto *path_to_backend = env->GetStringUTFChars(nativeLibDir, 0);
+    LOGi("Loading backends from %s", path_to_backend);
+    ggml_backend_load_all_from_path(path_to_backend);
+    env->ReleaseStringUTFChars(nativeLibDir, path_to_backend);
+
     // Initialize backends
     llama_backend_init();
     LOGi("Backend initiated; Log handler set.");
diff --git a/examples/llama.android/llama/src/main/java/android/llama/cpp/internal/InferenceEngineImpl.kt b/examples/llama.android/llama/src/main/java/android/llama/cpp/internal/InferenceEngineImpl.kt
index 008ce9ebdd..114f7e4d19 100644
--- a/examples/llama.android/llama/src/main/java/android/llama/cpp/internal/InferenceEngineImpl.kt
+++ b/examples/llama.android/llama/src/main/java/android/llama/cpp/internal/InferenceEngineImpl.kt
@@ -1,7 +1,7 @@
 package android.llama.cpp.internal
 
+import android.content.Context
 import android.llama.cpp.InferenceEngine
-import android.llama.cpp.LLamaTier
 import android.llama.cpp.UnsupportedArchitectureException
 import android.util.Log
 import kotlinx.coroutines.CancellationException
@@ -40,7 +40,7 @@ import java.io.IOException
  * @see llama-android.cpp for the native implementation details
  */
 internal class InferenceEngineImpl private constructor(
-    private val tier: LLamaTier
+    private val nativeLibDir: String
 ) : InferenceEngine {
 
     companion object {
@@ -49,22 +49,24 @@ internal class InferenceEngineImpl private constructor(
         private var initialized = false
 
         /**
-         * Create [InferenceEngineImpl] instance with specific tier
+         * Create [InferenceEngineImpl] instance at runtime
          *
-         * @throws IllegalArgumentException if tier's library name is invalid
+         * @param Context for obtaining native library directory
+         * @throws IllegalArgumentException if native library path is invalid
          * @throws UnsatisfiedLinkError if library failed to load
          */
-        internal fun createWithTier(tier: LLamaTier): InferenceEngineImpl {
+        internal fun create(context: Context): InferenceEngineImpl {
             assert(!initialized) { "Inference Engine has already been initialized!" }
 
-            require(tier.libraryName.isNotBlank()) { "Unexpected library: ${tier.libraryName}" }
+            val nativeLibDir = context.applicationInfo.nativeLibraryDir
+            require(nativeLibDir.isNotBlank()) { "Expected native library" }
 
             return try {
-                Log.i(TAG, "Instantiating InferenceEngineImpl w/ ${tier.libraryName}")
-                InferenceEngineImpl(tier).also { initialized = true }
+                Log.i(TAG, "Instantiating InferenceEngineImpl,,,")
+                InferenceEngineImpl(nativeLibDir).also { initialized = true }
 
             } catch (e: UnsatisfiedLinkError) {
-                Log.e(TAG, "Failed to load ${tier.libraryName}", e)
+                Log.e(TAG, "Failed to load native library from $nativeLibDir", e)
                 throw e
             }
         }
@@ -74,7 +76,7 @@ internal class InferenceEngineImpl private constructor(
      * JNI methods
      * @see llama-android.cpp
      */
-    private external fun init()
+    private external fun init(nativeLibDir: String)
     private external fun load(modelPath: String): Int
     private external fun prepare(): Int
 
@@ -108,10 +110,9 @@ internal class InferenceEngineImpl private constructor(
                     "Cannot load native library in ${_state.value.javaClass.simpleName}!"
                 }
                 _state.value = InferenceEngine.State.Initializing
-                Log.i(TAG, "Loading native library for $tier")
-
-                System.loadLibrary(tier.libraryName)
-                init()
+                Log.i(TAG, "Loading native library...")
+                System.load(File(nativeLibDir, "libkleidi-llama.so").absolutePath)
+                init(nativeLibDir)
                 _state.value = InferenceEngine.State.Initialized
                 Log.i(TAG, "Native library loaded! System info: \n${systemInfo()}")
 
diff --git a/examples/llama.android/llama/src/main/java/android/llama/cpp/internal/InferenceEngineLoader.kt b/examples/llama.android/llama/src/main/java/android/llama/cpp/internal/InferenceEngineLoader.kt
index e01edf2e5f..e3c2174e82 100644
--- a/examples/llama.android/llama/src/main/java/android/llama/cpp/internal/InferenceEngineLoader.kt
+++ b/examples/llama.android/llama/src/main/java/android/llama/cpp/internal/InferenceEngineLoader.kt
@@ -54,19 +54,11 @@ internal object InferenceEngineLoader {
         _cachedInstance?.let { return it }
 
         return runBlocking {
-            // Obtain the optimal tier from cache if available
-            val tier = obtainTier(context)
-            if (tier == null || tier == LLamaTier.NONE) {
-                Log.e(TAG, "Aborted instantiating Inference Engine due to invalid tier")
-                return@runBlocking null
-            }
-
             try {
                 // Create and cache the inference engine instance
-                Log.i(TAG, "Using tier: ${tier.name} (${tier.description})")
-                InferenceEngineImpl.createWithTier(tier).also {
+                InferenceEngineImpl.create(context).also {
                     _cachedInstance = it
-                    Log.i(TAG, "Successfully instantiated Inference Engine w/ ${tier.name}")
+                    Log.i(TAG, "Successfully instantiated Inference Engine")
                 }
 
             } catch (e: Exception) {
diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt
index ba281b8e6d..2a18847725 100644
--- a/ggml/src/CMakeLists.txt
+++ b/ggml/src/CMakeLists.txt
@@ -355,6 +355,9 @@ if (GGML_CPU_ALL_VARIANTS)
             ggml_add_cpu_backend_variant(android_armv8.2_1    DOTPROD)
             ggml_add_cpu_backend_variant(android_armv8.2_2    DOTPROD FP16_VECTOR_ARITHMETIC)
             ggml_add_cpu_backend_variant(android_armv8.6_1    DOTPROD FP16_VECTOR_ARITHMETIC MATMUL_INT8)
+            ggml_add_cpu_backend_variant(android_armv9.0_1    DOTPROD MATMUL_INT8 FP16_VECTOR_ARITHMETIC SVE2)
+            ggml_add_cpu_backend_variant(android_armv9.2_1    DOTPROD MATMUL_INT8 FP16_VECTOR_ARITHMETIC SME)
+            ggml_add_cpu_backend_variant(android_armv9.2_2    DOTPROD MATMUL_INT8 FP16_VECTOR_ARITHMETIC SVE SME)
         elseif (APPLE)
             ggml_add_cpu_backend_variant(apple_m1             DOTPROD)
             ggml_add_cpu_backend_variant(apple_m2_m3          DOTPROD MATMUL_INT8)
diff --git a/ggml/src/ggml-cpu/CMakeLists.txt b/ggml/src/ggml-cpu/CMakeLists.txt
index 34323afa07..5fe1d3a29e 100644
--- a/ggml/src/ggml-cpu/CMakeLists.txt
+++ b/ggml/src/ggml-cpu/CMakeLists.txt
@@ -212,8 +212,15 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
                 set(FEAT_INPUT_FILE "/dev/null")
             endif()
 
+            # specify Android cross compile target
+            if("${GGML_CPU_NAME}" MATCHES ".*android.*")
+                set(ANDROID_TARGET_FLAG "--target=aarch64-linux-android${ANDROID_API_LEVEL}")
+            else()
+                set(ANDROID_TARGET_FLAG "")
+            endif()
+
             execute_process(
-                COMMAND ${CMAKE_C_COMPILER} ${ARCH_FLAGS} -dM -E -
+                COMMAND ${CMAKE_C_COMPILER} ${ARCH_FLAGS} ${ANDROID_TARGET_FLAG} -dM -E -
                 INPUT_FILE ${FEAT_INPUT_FILE}
                 OUTPUT_VARIABLE ARM_FEATURE
                 RESULT_VARIABLE ARM_FEATURE_RESULT