From 58062860afb88e555857c1266d3a17e1b65b5eb9 Mon Sep 17 00:00:00 2001 From: Aadeshveer Singh Date: Wed, 17 Dec 2025 09:17:01 +0530 Subject: [PATCH 01/21] ggml : use WARP_SIZE/2 for argmax reduction offset (#18092) --- ggml/src/ggml-cuda/argmax.cu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ggml/src/ggml-cuda/argmax.cu b/ggml/src/ggml-cuda/argmax.cu index 5340eedc08..51967c667c 100644 --- a/ggml/src/ggml-cuda/argmax.cu +++ b/ggml/src/ggml-cuda/argmax.cu @@ -21,7 +21,7 @@ static __global__ void argmax_f32(const float * __restrict__ x, int32_t * __rest } #pragma unroll - for (int offset = 16; offset > 0; offset >>= 1) { + for (int offset = WARP_SIZE/2; offset > 0; offset >>= 1) { const float val = __shfl_xor_sync(0xFFFFFFFF, maxval, offset, WARP_SIZE); const int col = __shfl_xor_sync(0xFFFFFFFF, argmax, offset, WARP_SIZE); if (val > maxval) { @@ -50,7 +50,7 @@ static __global__ void argmax_f32(const float * __restrict__ x, int32_t * __rest argmax = shared_argmax[lane_id]; } #pragma unroll - for (int offset = 16; offset > 0; offset >>= 1) { + for (int offset = WARP_SIZE/2; offset > 0; offset >>= 1) { const float val = __shfl_xor_sync(0xFFFFFFFF, maxval, offset, WARP_SIZE); const int col = __shfl_xor_sync(0xFFFFFFFF, argmax, offset, WARP_SIZE); if (val > maxval) { From 4b2a4778f81f222c12271ce3b1997990b3071faf Mon Sep 17 00:00:00 2001 From: TrevorS Date: Tue, 16 Dec 2025 22:33:02 -0800 Subject: [PATCH 02/21] arg: allow -kvu flag for llama-perplexity (#18117) The -kvu (--kv-unified) flag is required for hellaswag and winogrande benchmarks which use coupled sequences. Without unified KV cache, these benchmarks fail with: split_equal: sequential split is not supported when there are coupled sequences in the input batch (you may need to use the -kvu flag) This change adds LLAMA_EXAMPLE_PERPLEXITY to the allowed examples for the -kvu argument, enabling its use with llama-perplexity. --- common/arg.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/arg.cpp b/common/arg.cpp index f2aec895ba..a4ffd5586c 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -1140,7 +1140,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex [](common_params & params) { params.kv_unified = true; } - ).set_env("LLAMA_ARG_KV_UNIFIED").set_examples({LLAMA_EXAMPLE_SERVER})); + ).set_env("LLAMA_ARG_KV_UNIFIED").set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_PERPLEXITY})); add_opt(common_arg( {"--context-shift"}, {"--no-context-shift"}, From 5c0d18881e0e9794c96b2602736b758bac9d9388 Mon Sep 17 00:00:00 2001 From: Naco Siren Date: Wed, 17 Dec 2025 00:14:47 -0800 Subject: [PATCH 03/21] llama.android : Rewrite Android binding (w/o cpu_features dep) (#17413) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * UI: implement basic UI components * util: implement performance monitor; wrap it with a viewmodel * util: implement user preferences utility * UI: implement core flow's screens * UI: add a new MainActivity; update manifest * [WIP] DI: implement simple local vm factory provider * UI: disable triggering drawer via gesture; enable alert dialog on back navigation inside conversation and benchmark * UI: allow drawer's gesture control only on Home and Settings screens; enable alert dialog on back navigation inside conversation and benchmark * UI: split a nested parent settings screen into separate child settings screens * UI: polish system prompt setup UI * Deps: bump Kotlin plugin; introduce KSP; apply in :app subproject * DB: setup Room database * data: introduce repo for System Prompt; flow data from Room to VM * bugfix: properly handle user's quitting conversation screen while tokens in generation * UI: rename `ModeSelection` to `ModelLoading` for better clarity * UI: update app name to be more Arm * UI: polish conversation screen * data: code polish * UI: code polish * bugfix: handle user quitting on model loading * UI: locks user in alert dialog when model is unloading * vm: replace token metrics stubs with actual implementation * UI: refactor top app bars * nit: combine temperatureMetrics and useFahrenheit * DI: introduce Hilt plugin + processor + lib dependencies * DI: make app Hilt injectable * DI: make viewmodels Hilt injectable * DI: replace manual DI with Hilt DI * UI: optimize AppContent's composing * bugfix: wait for model to load before navigating to benchmark screen; use NavigationActions instead of raw navController * UI: navigation with more natural animated transitions * DI: Optimize AppModule * Feature: Introduce ModelRepository and ModelsManagementViewModel; update AppModule * UI: polish UI for ModelsManagementScreen; inject ModelsManagementVieModel * DI: abstract the protocol of SystemPromptRepository; update AppModule * data: [WIP] prepare for ModelRepository refactor & impl * data: introduce Model entity and DAO; update DI module * UI: replace Models Management screen's stubbing with instrumentation * UI: polish sort order menu * data: import local model with file picker * bugfix: use List instead of Collection for ModelDao's deletion * data: add a util file for extracting file name & size and model metadata * UI: enrich ModelManagementState; extract filename to show correct importing UI * UI: implement multiple models deletion; update Models Management screen * UI: handle back navigation when user is in multi-selection mode * util: extract file size formatting into ModelUtils * UI: add a confirmation step when user picks a file; refactor model import overlay into AlertDialog * UI: extract a shared ModelCard component * UI: replace model selection screen's data stubbing; add empty view * nit: tidy SystemPromptViewModel * Util: split FileUtils from ModelUtils; extract copy methods into FileUtils * data: pass through getModelById from ModelDao into ModelRepository * core: extract conversation and benchmark logics into InferenceManager; add logs and missing state updates in stub InferenceEngine * vm: split mono MainViewModel into separate individual ViewModels * vm: merge SystemPromptViewModel into ModelLoadingViewModel * core: break down InferenceManager due to Interface Segregation Principle * UI: show model card in Model Loading screen * UI: show model card in Conversation screen * UI: unify Model Card components * core: swap in LLamaAndroid and mark stub engine for testing only * data: allow canceling the ongoing model import * UI: update UI ongoing model import's cancellation * LLama: update engine state after handling the cancellation of sendUserPrompt * VM: handle the cancellation of ongoing token generation * LLama: refactor loadModel by splitting the system prompt setting into a separate method * feature: check for available space before copying local model * UI: centralize the AppScaffold and modularize its configs * UI: refactor BottomBarConfig.ModelsManagement APIs * UI: combine TopBarConfig and BottomBarConfig into each route's ScaffoldConfig * UI: replace ugly optional as casts in AppScaffold with extension functions * UI: fix the typo `totalGb` in `StorageMetrics` * UI: remove code duplication in sort menu * LLama: add ModelUnloadingState to engine State; add missing state checks in stub engine; fix instrumentation engine's error messages * UI: refactor back handling by removing centralized BackHandlerSetup and UnloadModelConfirmationDialog from AppContent * UI: implement BenchmarkScreen's individual back handling * LLama: add a new Initializing state; ; add two extension properties; rename LibraryLoaded state to Initialized * UI: Introduce an abstract ViewModel to handle additional model unloading logics * UI: expose a single facade ModelUnloadDialogHandler; move UnloadModelState into ModelUnloadingViewModel.kt * UI: migrate ModelLoadingScreen onto ModelLoadingViewModel; update & refine ModelLoadingScreen * UI: migrate ConversationViewModel onto ModelLoadingViewModel; update & refine ConversationScreen * nit: extract app name into a constant value; remove unused onBackPressed callbacks * UI: update AppContent to pass in correct navigation callbacks * nit: polish ModelLoadingScreen UI * core: throw Exception instead of returning null if model fails to load * navigation: sink model loading state management from AppContent down into ModelLoadingScreen; pass ModelLoadingMetrics to Benchmark and Conversation screens * gguf: add GGUF metadata data holder and its corresponding extractor implementation * DB: introduce Kotlin serialization extension's library and plugin; add Room runtime library * GGUF: make GgufMetadata serializable in order to be compatible with Room * nit: refactor data.local package structure * nit: rename lastUsed field to dateLastUsed; add dateAdded field * UI: refactor ModelCard UI to show GGUF metadata * UI: update ModelSelectionScreen with a preselect mechanism * UI: polish model card * nit: allow deselect model on Model Selection screen * nit: revert accidental committing of debug code * UI: polish ModelLoading screen * util: extract formatting helper functions from FileUtils into a new FormatUtils * UI: polish model cards on Benchmark and Conversation screens to show model loading metrics * UI: show a Snack bar to warn user that system prompt is not always supported * UI: handle back press on Model Selection screen * UI: finally support theme modes; remove hardcoded color schemes, default to dynamic color scheme implementation * feature: support searching on Model Selection screen * nit: move scaffold related UI components into a separate package * UI: extract InfoView out into a separate file for reusability * data: move Model related actions (query, filter, sort) into ModelInfo file * UI: animate FAB on model preselection states * feature: support filtering in Model Management screen * ui: show empty models info in Model Management screen * ui: add filter off icon to "Clear filters" menu item * [WIP] ui: polish Benchmark screen; implement its bottom app bar * ui: polish Benchmark screen; implement its bottom app bar's rerun and share * nit: disable mode selection's radio buttons when loading model * feature: implement Conversation screen's bottom app bar * pkg: restructure BottomAppBars into separate files in a child package * pkg: restructure TopBarApps into separate files in a child package * pkg: restructure system metrics into a separate file * UI: polish Conversation screen * data: update system prompt presets * UI: allow hide or show model card on Conversation & Benchmark screens; fix message arrangement * data: update & enhance system prompt presets * deps: introduce Retrofit2 * data: implement HuggingFace data model, data source with Retrofit API * data: update Model data repository to support fetching HuggingFace models * [WIP] UI: replace the HuggingFace stub in Model Management screen with actual API call * UI: map language codes into country Emojis * ui: add "clear results" action to Benchmark screen * nit: print current pp & tg in llama-bench * UI: disable landscape mode; prevent duplicated benchmark running * llama: migrate C/CXX flags into CMakeList * [WIP] llama: ABI split builds five .so artifacts. However, all .so are performing on SVE level * [WIP] llama: ABI split where five tiers are built sequentially. * [WIP] llama: disable OpenMP in ABI split since most SoCs are big.LITTLE * [WIP] llama: enable KleidiAI and disable tier 4 due to `+sve+sve2` bug caused by `ggml_add_cpu_backend_variant_impl` as explained below ```CMake if (NOT SME_ENABLED MATCHES -1) ... set(PRIVATE_ARCH_FLAGS "-fno-tree-vectorize;${PRIVATE_ARCH_FLAGS}+sve+sve2") ... ``` * core: add Google's cpu_features as a submodule * core: implement cpu_detector native lib * core: swap out hardcoded LlamaAndroid library loading * core: add back OpenMP due to huge perf loss on TG128 * misc: reorg the pkg structure * misc: rename LlamaAndroid related class to InferenceEngine prefixes * [WIP] lib: move GgufMetadata into the lib submodule * lib: expose GgufMetadataReader as interface only * lib: replace the naive & plain SharedPreferences with DataStore implementation * lib: hide the internal implementations, only expose a facade and interfaces * lib: expose Arm features * di: add a stub TierDetection; provide both actual impl and stub in AppModule * UI: add visualizer UI for Arm features * misc: UI polish * lib: refactored InferenceEngineLoader; added a `NONE` Llama Tier * UI: support `NONE` Llama Tier in general settings * lib: optimize engine loader; always perform a fresh detection when cache is null * remote: add HuggingFaceModelDetails data class * remote: refine HuggingFaceModel data class * nit: remove `trendingScore` field from HuggingFace model entities, weird... * remote: refactor HuggingFaceApiService; implement download feature in HuggingFaceRemoteDataSource * remote: fix the incorrect parse of HuggingFace's inconsistent & weird JSON response * UI: scaffold Models Management screen and view model * UI: implement a dialog UI to show fetched HuggingFace models. * UI: use a broadcast receiver to listen for download complete events and show local import dialog. * data: handle network exceptions elegantly * pkg: restructure `data`'s packages * data: extract local file info, copy and cleanup logics into LocalFileDataSource * nit: minor UI patch; add missing comments * bugfix: tapping "Home" in navigation drawer should simply close it without any navigation action. * UI: improve autoscroll during token generation * lib: tested on JFrog Artifactory for Maven publishing * UI: show RAM warning if model too large * UI: polish model management screen's error dialog * util: add more items into the mapping table of ISO 639-1 language code to ISO 3166-1 country code * llm: properly propagate error to UI upon failing to load selected model * UI: avoid duplicated calculation of token metrics * lib: read & validate the magic number from the picked source file before executing the import * UI: add "Learn More" hyperlinks to Error dialog upon model import failures * lib: refactor the GgufMetadataReader to take InputStream instead of absolute path as argument * lib: fix the `SIMD` typo in Tier description * core: verify model file path is readable * lib: add UnsupportedArchitectureException for triaged error message * util: split FormatUtils into multiple utils for better readability * UI: change benchmark screen from raw markdown to table view * bugfix: reset preselection upon running the preselected model * misc: linter issue * bugfix: fix the malfunctioning monitoring switch * UI: update Arm features indicator; fix the broken hyperlinks * UI: add quick action buttons to benchmark screen's result card * UI: hide share fab after clearing all benchmark results * UI: fix the model unload dialog message; elevate the model card and hide it by default on Conversation screen; * UI: hide the stubbing actions in Conversation screen * UI: add show/hide stats control to conversation screen's assistant message bubble; fix placeholder * UI: add a info button to explain token metrics * misc: remove the redundant `Companion` added due to refactoring * UI: show corresponding system metrics detailed info upon tapping RAM / storage / temperature indicator * UI: add info button to System Prompt switch; expand the model card by default * UI: disable tag & language chips; add section headers to explain what they are * misc: replace top bar indicator's spacer with padding * UI: merge the Model Selection and Model Management into a unified Models screen * UI: split the ModelsManagementViewModel from a unified ModelsViewModel due to huge complexity * UI: add model loading in progress view; polish the empty model info view * UI: polish the bottom bars and info view when no models found; show loading in progress while fetching models * build: [BREAKING] bump the versions of libraries and plugins * UI: fix the breaking build * UI: add Tooltip on Import FAB for user onboarding * UI: adds AppPreferences to track user onboarding status * UI: tracks user's first success on importing a model * data: add hand crafted rules to filter the models fetched from HuggingFace API * UI: update app name & about; polish top bars' indicators & buttons * UI: polish Hugging Face download dialog UI * UX: implement onboarding tooltips for model import and onboarding * misc: use sentence case for CTA button labels * [WIP] UI: add Arm color palette from Philip.Watson3 * UI: address Rojin's UX feedbacks * UI: address Rojin's UX feedbacks - part 2 * UI: update Arm color palette from Philip.Watson3 * data: make sure fetch preselected models in the same order of their IDs * UI: fix UI issues in the generic settings screen and navigation drawer * nit: address Rojin's feedbacks on model import message again * nit: append `®` to all `Arm` labels * UI: extract a reusable InfoAlertDialog * core: support GGML_CPU_ALL_VARIANTS on Android! * core: restructure Kleidi-Llama library * core: organizing cmake arguments * data: sort preselected models according to device's available RAM * app: update adaptive + themed + legacy icons and app name * UI: fix the font size auto scaling for ArmFeaturesVisualizer * core: further improve the performance on native methods * UI: minor color palette changes; emphasize the bottom bar FABs; fix Settings Screen menu item label * UI: make more room for assistant message bubble's width * UI: better usage of tertiary colors to highlight model cards but not for warnings * UI: fix the layout issue on large font sizes * lib: support x86-64 by dynamically set Arm related definitions * lib: replace the factory pattern for deprecated tiered lib loading with single instance pattern * llama: update the library name in JNI and CMake project * llama: update the library's package name and namespace * llama: update the app's package name and namespace * app: bump ksp version * app: remove deprecated SystemUIController from accompanist by migrating to EdgeToEdge * app: extract AppContent from MainActivity to a separate file in ui package * lib: add File version for GGUF Magic number verification * lib: perform engine state check inclusively instead of exclusively * lib: change `LlamaTier` to `ArmCpuTier` * lib: remove kleidi-llama related namings * cleanup: remove Arm AI Chat/Playground app source code; replace with the basic sample app from https://github.com/hanyin-arm/Arm-AI-Chat-Sample Note: the full Google Play version of AI Chat app will be open will be open sourced in another repo soon, therefore didn't go through the trouble of pruning the history using `git filter-repo` here. * [WIP] doc: update main and Android README docs; add self to code owners * lib: revert System.load back to System.loadLibrary * jni: introduce a logging util to filter different logging levels on different build types * lib: enable app optimization * doc: replace stub Google Play app URL with the actual link add screenshots; add my GitHub ID to maintainer list * Remove cpu_features * Fix linters issues in editorconfig-checker job https://github.com/ggml-org/llama.cpp/actions/runs/19548770247/job/55974800633?pr=17413 * Remove unnecessary Android CMake flag * purge include/cpu_features directory --------- Co-authored-by: Han Yin --- CODEOWNERS | 2 +- README.md | 1 + docs/android.md | 20 + examples/llama.android/app/build.gradle.kts | 52 +- examples/llama.android/app/proguard-rules.pro | 8 + .../app/src/main/AndroidManifest.xml | 13 +- .../java/com/example/llama/Downloadable.kt | 119 ---- .../java/com/example/llama/MainActivity.kt | 359 +++++++---- .../java/com/example/llama/MainViewModel.kt | 105 ---- .../java/com/example/llama/MessageAdapter.kt | 51 ++ .../java/com/example/llama/ui/theme/Color.kt | 11 - .../java/com/example/llama/ui/theme/Theme.kt | 70 --- .../java/com/example/llama/ui/theme/Type.kt | 34 - .../res/drawable/bg_assistant_message.xml | 4 + .../src/main/res/drawable/bg_user_message.xml | 4 + .../res/drawable/outline_folder_open_24.xml | 10 + .../src/main/res/drawable/outline_send_24.xml | 11 + .../app/src/main/res/layout/activity_main.xml | 76 +++ .../res/layout/item_message_assistant.xml | 15 + .../src/main/res/layout/item_message_user.xml | 15 + .../app/src/main/res/values/strings.xml | 2 +- .../app/src/main/res/values/themes.xml | 7 +- examples/llama.android/build.gradle.kts | 6 +- examples/llama.android/gradle.properties | 1 + .../llama.android/gradle/libs.versions.toml | 53 ++ .../gradle/wrapper/gradle-wrapper.properties | 4 +- .../llama.android/{llama => lib}/.gitignore | 0 examples/llama.android/lib/build.gradle.kts | 78 +++ examples/llama.android/lib/consumer-rules.pro | 8 + .../{llama => lib}/proguard-rules.pro | 0 .../llama/cpp/ExampleInstrumentedTest.kt | 0 .../src/main/AndroidManifest.xml | 0 .../lib/src/main/cpp/CMakeLists.txt | 56 ++ .../lib/src/main/cpp/ai_chat.cpp | 565 +++++++++++++++++ .../llama.android/lib/src/main/cpp/logging.h | 61 ++ .../src/main/java/com/arm/aichat/AiChat.kt | 14 + .../java/com/arm/aichat/InferenceEngine.kt | 89 +++ .../main/java/com/arm/aichat/gguf/FileType.kt | 61 ++ .../java/com/arm/aichat/gguf/GgufMetadata.kt | 132 ++++ .../com/arm/aichat/gguf/GgufMetadataReader.kt | 77 +++ .../aichat/internal/InferenceEngineImpl.kt | 309 +++++++++ .../internal/gguf/GgufMetadataReaderImpl.kt | 590 ++++++++++++++++++ .../java/android/llama/cpp/ExampleUnitTest.kt | 0 examples/llama.android/llama/build.gradle.kts | 71 --- .../llama.android/llama/consumer-rules.pro | 0 .../llama/src/main/cpp/CMakeLists.txt | 53 -- .../llama/src/main/cpp/llama-android.cpp | 452 -------------- .../java/android/llama/cpp/LLamaAndroid.kt | 180 ------ examples/llama.android/settings.gradle.kts | 6 +- ggml/src/CMakeLists.txt | 3 + 50 files changed, 2588 insertions(+), 1270 deletions(-) delete mode 100644 examples/llama.android/app/src/main/java/com/example/llama/Downloadable.kt delete mode 100644 examples/llama.android/app/src/main/java/com/example/llama/MainViewModel.kt create mode 100644 examples/llama.android/app/src/main/java/com/example/llama/MessageAdapter.kt delete mode 100644 examples/llama.android/app/src/main/java/com/example/llama/ui/theme/Color.kt delete mode 100644 examples/llama.android/app/src/main/java/com/example/llama/ui/theme/Theme.kt delete mode 100644 examples/llama.android/app/src/main/java/com/example/llama/ui/theme/Type.kt create mode 100644 examples/llama.android/app/src/main/res/drawable/bg_assistant_message.xml create mode 100644 examples/llama.android/app/src/main/res/drawable/bg_user_message.xml create mode 100644 examples/llama.android/app/src/main/res/drawable/outline_folder_open_24.xml create mode 100644 examples/llama.android/app/src/main/res/drawable/outline_send_24.xml create mode 100644 examples/llama.android/app/src/main/res/layout/activity_main.xml create mode 100644 examples/llama.android/app/src/main/res/layout/item_message_assistant.xml create mode 100644 examples/llama.android/app/src/main/res/layout/item_message_user.xml create mode 100644 examples/llama.android/gradle/libs.versions.toml rename examples/llama.android/{llama => lib}/.gitignore (100%) create mode 100644 examples/llama.android/lib/build.gradle.kts create mode 100644 examples/llama.android/lib/consumer-rules.pro rename examples/llama.android/{llama => lib}/proguard-rules.pro (100%) rename examples/llama.android/{llama => lib}/src/androidTest/java/android/llama/cpp/ExampleInstrumentedTest.kt (100%) rename examples/llama.android/{llama => lib}/src/main/AndroidManifest.xml (100%) create mode 100644 examples/llama.android/lib/src/main/cpp/CMakeLists.txt create mode 100644 examples/llama.android/lib/src/main/cpp/ai_chat.cpp create mode 100644 examples/llama.android/lib/src/main/cpp/logging.h create mode 100644 examples/llama.android/lib/src/main/java/com/arm/aichat/AiChat.kt create mode 100644 examples/llama.android/lib/src/main/java/com/arm/aichat/InferenceEngine.kt create mode 100644 examples/llama.android/lib/src/main/java/com/arm/aichat/gguf/FileType.kt create mode 100644 examples/llama.android/lib/src/main/java/com/arm/aichat/gguf/GgufMetadata.kt create mode 100644 examples/llama.android/lib/src/main/java/com/arm/aichat/gguf/GgufMetadataReader.kt create mode 100644 examples/llama.android/lib/src/main/java/com/arm/aichat/internal/InferenceEngineImpl.kt create mode 100644 examples/llama.android/lib/src/main/java/com/arm/aichat/internal/gguf/GgufMetadataReaderImpl.kt rename examples/llama.android/{llama => lib}/src/test/java/android/llama/cpp/ExampleUnitTest.kt (100%) delete mode 100644 examples/llama.android/llama/build.gradle.kts delete mode 100644 examples/llama.android/llama/consumer-rules.pro delete mode 100644 examples/llama.android/llama/src/main/cpp/CMakeLists.txt delete mode 100644 examples/llama.android/llama/src/main/cpp/llama-android.cpp delete mode 100644 examples/llama.android/llama/src/main/java/android/llama/cpp/LLamaAndroid.kt diff --git a/CODEOWNERS b/CODEOWNERS index 8a0c98c968..750096d9a1 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -32,7 +32,7 @@ /examples/export-docs/ @ggerganov /examples/gen-docs/ @ggerganov /examples/gguf/ @ggerganov -/examples/llama.android/ @ggerganov +/examples/llama.android/ @ggerganov @hanyin-arm @naco-siren /examples/llama.swiftui/ @ggerganov /examples/llama.vim @ggerganov /examples/lookahead/ @ggerganov diff --git a/README.md b/README.md index 5f2076d0a3..ed956bb02e 100644 --- a/README.md +++ b/README.md @@ -190,6 +190,7 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo - Swift [ShenghaiWang/SwiftLlama](https://github.com/ShenghaiWang/SwiftLlama) - Delphi [Embarcadero/llama-cpp-delphi](https://github.com/Embarcadero/llama-cpp-delphi) - Go (no CGo needed): [hybridgroup/yzma](https://github.com/hybridgroup/yzma) +- Android: [llama.android](/examples/llama.android) diff --git a/docs/android.md b/docs/android.md index d2a835653f..28b966ffc7 100644 --- a/docs/android.md +++ b/docs/android.md @@ -1,6 +1,26 @@ # Android +## Build with Android Studio + +Import the `examples/llama.android` directory into Android Studio, then perform a Gradle sync and build the project. +![Project imported into Android Studio](./android/imported-into-android-studio.png) + +This Android binding supports hardware acceleration up to `SME2` for **Arm** and `AMX` for **x86-64** CPUs on Android and ChromeOS devices. +It automatically detects the host's hardware to load compatible kernels. As a result, it runs seamlessly on both the latest premium devices and older devices that may lack modern CPU features or have limited RAM, without requiring any manual configuration. + +A minimal Android app frontend is included to showcase the binding’s core functionalities: +1. **Parse GGUF metadata** via `GgufMetadataReader` from either a `ContentResolver` provided `Uri` or a local `File`. +2. **Obtain a `TierDetection` or `InferenceEngine`** instance through the high-level facade APIs. +3. **Send a raw user prompt** for automatic template formatting, prefill, and decoding. Then collect the generated tokens in a Kotlin `Flow`. + +For a production-ready experience that leverages advanced features such as system prompts and benchmarks, check out [Arm AI Chat](https://play.google.com/store/apps/details?id=com.arm.aichat) on Google Play. +This project is made possible through a collaborative effort by Arm's **CT-ML**, **CE-ML** and **STE** groups: + +| ![Home screen](./android/arm-ai-chat-home-screen.png) | ![System prompt](./android/system-prompt-setup.png) | !["Haiku"](./android/chat-with-system-prompt-haiku.png) | +|:------------------------------------------------------:|:----------------------------------------------------:|:--------------------------------------------------------:| +| Home screen | System prompt | "Haiku" | + ## Build on Android using Termux [Termux](https://termux.dev/en/) is an Android terminal emulator and Linux environment app (no root required). As of writing, Termux is available experimentally in the Google Play Store; otherwise, it may be obtained directly from the project repo or on F-Droid. diff --git a/examples/llama.android/app/build.gradle.kts b/examples/llama.android/app/build.gradle.kts index 8d1b37195e..3524fe39c4 100644 --- a/examples/llama.android/app/build.gradle.kts +++ b/examples/llama.android/app/build.gradle.kts @@ -1,16 +1,18 @@ plugins { - id("com.android.application") - id("org.jetbrains.kotlin.android") + alias(libs.plugins.android.application) + alias(libs.plugins.jetbrains.kotlin.android) } android { namespace = "com.example.llama" - compileSdk = 34 + compileSdk = 36 defaultConfig { - applicationId = "com.example.llama" + applicationId = "com.example.llama.aichat" + minSdk = 33 - targetSdk = 34 + targetSdk = 36 + versionCode = 1 versionName = "1.0" @@ -21,8 +23,17 @@ android { } buildTypes { + debug { + isMinifyEnabled = true + isShrinkResources = true + proguardFiles( + getDefaultProguardFile("proguard-android.txt"), + "proguard-rules.pro" + ) + } release { - isMinifyEnabled = false + isMinifyEnabled = true + isShrinkResources = true proguardFiles( getDefaultProguardFile("proguard-android-optimize.txt"), "proguard-rules.pro" @@ -36,30 +47,15 @@ android { kotlinOptions { jvmTarget = "1.8" } - buildFeatures { - compose = true - } - composeOptions { - kotlinCompilerExtensionVersion = "1.5.1" - } } dependencies { + implementation(libs.bundles.androidx) + implementation(libs.material) - implementation("androidx.core:core-ktx:1.12.0") - implementation("androidx.lifecycle:lifecycle-runtime-ktx:2.6.2") - implementation("androidx.activity:activity-compose:1.8.2") - implementation(platform("androidx.compose:compose-bom:2023.08.00")) - implementation("androidx.compose.ui:ui") - implementation("androidx.compose.ui:ui-graphics") - implementation("androidx.compose.ui:ui-tooling-preview") - implementation("androidx.compose.material3:material3") - implementation(project(":llama")) - testImplementation("junit:junit:4.13.2") - androidTestImplementation("androidx.test.ext:junit:1.1.5") - androidTestImplementation("androidx.test.espresso:espresso-core:3.5.1") - androidTestImplementation(platform("androidx.compose:compose-bom:2023.08.00")) - androidTestImplementation("androidx.compose.ui:ui-test-junit4") - debugImplementation("androidx.compose.ui:ui-tooling") - debugImplementation("androidx.compose.ui:ui-test-manifest") + implementation(project(":lib")) + + testImplementation(libs.junit) + androidTestImplementation(libs.androidx.junit) + androidTestImplementation(libs.androidx.espresso.core) } diff --git a/examples/llama.android/app/proguard-rules.pro b/examples/llama.android/app/proguard-rules.pro index f1b424510d..358020d2d2 100644 --- a/examples/llama.android/app/proguard-rules.pro +++ b/examples/llama.android/app/proguard-rules.pro @@ -19,3 +19,11 @@ # If you keep the line number information, uncomment this to # hide the original source file name. #-renamesourcefileattribute SourceFile + +-keep class com.arm.aichat.* { *; } +-keep class com.arm.aichat.gguf.* { *; } + +-assumenosideeffects class android.util.Log { + public static int v(...); + public static int d(...); +} diff --git a/examples/llama.android/app/src/main/AndroidManifest.xml b/examples/llama.android/app/src/main/AndroidManifest.xml index 41a358a299..8f7c606b41 100644 --- a/examples/llama.android/app/src/main/AndroidManifest.xml +++ b/examples/llama.android/app/src/main/AndroidManifest.xml @@ -1,24 +1,21 @@ - - - + + android:exported="true"> diff --git a/examples/llama.android/app/src/main/java/com/example/llama/Downloadable.kt b/examples/llama.android/app/src/main/java/com/example/llama/Downloadable.kt deleted file mode 100644 index 78c231ae55..0000000000 --- a/examples/llama.android/app/src/main/java/com/example/llama/Downloadable.kt +++ /dev/null @@ -1,119 +0,0 @@ -package com.example.llama - -import android.app.DownloadManager -import android.net.Uri -import android.util.Log -import androidx.compose.material3.Button -import androidx.compose.material3.Text -import androidx.compose.runtime.Composable -import androidx.compose.runtime.getValue -import androidx.compose.runtime.mutableDoubleStateOf -import androidx.compose.runtime.mutableStateOf -import androidx.compose.runtime.remember -import androidx.compose.runtime.rememberCoroutineScope -import androidx.compose.runtime.setValue -import androidx.core.database.getLongOrNull -import androidx.core.net.toUri -import kotlinx.coroutines.delay -import kotlinx.coroutines.launch -import java.io.File - -data class Downloadable(val name: String, val source: Uri, val destination: File) { - companion object { - @JvmStatic - private val tag: String? = this::class.qualifiedName - - sealed interface State - data object Ready: State - data class Downloading(val id: Long): State - data class Downloaded(val downloadable: Downloadable): State - data class Error(val message: String): State - - @JvmStatic - @Composable - fun Button(viewModel: MainViewModel, dm: DownloadManager, item: Downloadable) { - var status: State by remember { - mutableStateOf( - if (item.destination.exists()) Downloaded(item) - else Ready - ) - } - var progress by remember { mutableDoubleStateOf(0.0) } - - val coroutineScope = rememberCoroutineScope() - - suspend fun waitForDownload(result: Downloading, item: Downloadable): State { - while (true) { - val cursor = dm.query(DownloadManager.Query().setFilterById(result.id)) - - if (cursor == null) { - Log.e(tag, "dm.query() returned null") - return Error("dm.query() returned null") - } - - if (!cursor.moveToFirst() || cursor.count < 1) { - cursor.close() - Log.i(tag, "cursor.moveToFirst() returned false or cursor.count < 1, download canceled?") - return Ready - } - - val pix = cursor.getColumnIndex(DownloadManager.COLUMN_BYTES_DOWNLOADED_SO_FAR) - val tix = cursor.getColumnIndex(DownloadManager.COLUMN_TOTAL_SIZE_BYTES) - val sofar = cursor.getLongOrNull(pix) ?: 0 - val total = cursor.getLongOrNull(tix) ?: 1 - cursor.close() - - if (sofar == total) { - return Downloaded(item) - } - - progress = (sofar * 1.0) / total - - delay(1000L) - } - } - - fun onClick() { - when (val s = status) { - is Downloaded -> { - viewModel.load(item.destination.path) - } - - is Downloading -> { - coroutineScope.launch { - status = waitForDownload(s, item) - } - } - - else -> { - item.destination.delete() - - val request = DownloadManager.Request(item.source).apply { - setTitle("Downloading model") - setDescription("Downloading model: ${item.name}") - setAllowedNetworkTypes(DownloadManager.Request.NETWORK_WIFI) - setDestinationUri(item.destination.toUri()) - } - - viewModel.log("Saving ${item.name} to ${item.destination.path}") - Log.i(tag, "Saving ${item.name} to ${item.destination.path}") - - val id = dm.enqueue(request) - status = Downloading(id) - onClick() - } - } - } - - Button(onClick = { onClick() }, enabled = status !is Downloading) { - when (status) { - is Downloading -> Text(text = "Downloading ${(progress * 100).toInt()}%") - is Downloaded -> Text("Load ${item.name}") - is Ready -> Text("Download ${item.name}") - is Error -> Text("Download ${item.name}") - } - } - } - - } -} diff --git a/examples/llama.android/app/src/main/java/com/example/llama/MainActivity.kt b/examples/llama.android/app/src/main/java/com/example/llama/MainActivity.kt index 9da04f7d3c..52c5dc2154 100644 --- a/examples/llama.android/app/src/main/java/com/example/llama/MainActivity.kt +++ b/examples/llama.android/app/src/main/java/com/example/llama/MainActivity.kt @@ -1,154 +1,257 @@ package com.example.llama -import android.app.ActivityManager -import android.app.DownloadManager -import android.content.ClipData -import android.content.ClipboardManager import android.net.Uri import android.os.Bundle -import android.os.StrictMode -import android.os.StrictMode.VmPolicy -import android.text.format.Formatter -import androidx.activity.ComponentActivity -import androidx.activity.compose.setContent -import androidx.activity.viewModels -import androidx.compose.foundation.layout.Box -import androidx.compose.foundation.layout.Column -import androidx.compose.foundation.layout.Row -import androidx.compose.foundation.layout.fillMaxSize -import androidx.compose.foundation.layout.padding -import androidx.compose.foundation.lazy.LazyColumn -import androidx.compose.foundation.lazy.items -import androidx.compose.foundation.lazy.rememberLazyListState -import androidx.compose.material3.Button -import androidx.compose.material3.LocalContentColor -import androidx.compose.material3.MaterialTheme -import androidx.compose.material3.OutlinedTextField -import androidx.compose.material3.Surface -import androidx.compose.material3.Text -import androidx.compose.runtime.Composable -import androidx.compose.ui.Modifier -import androidx.compose.ui.unit.dp -import androidx.core.content.getSystemService -import com.example.llama.ui.theme.LlamaAndroidTheme +import android.util.Log +import android.widget.EditText +import android.widget.TextView +import android.widget.Toast +import androidx.activity.enableEdgeToEdge +import androidx.activity.result.contract.ActivityResultContracts +import androidx.appcompat.app.AppCompatActivity +import androidx.lifecycle.lifecycleScope +import androidx.recyclerview.widget.LinearLayoutManager +import androidx.recyclerview.widget.RecyclerView +import com.arm.aichat.AiChat +import com.arm.aichat.InferenceEngine +import com.arm.aichat.gguf.GgufMetadata +import com.arm.aichat.gguf.GgufMetadataReader +import com.google.android.material.floatingactionbutton.FloatingActionButton +import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.flow.onCompletion +import kotlinx.coroutines.launch +import kotlinx.coroutines.withContext import java.io.File +import java.io.FileOutputStream +import java.io.InputStream +import java.util.UUID -class MainActivity( - activityManager: ActivityManager? = null, - downloadManager: DownloadManager? = null, - clipboardManager: ClipboardManager? = null, -): ComponentActivity() { - private val tag: String? = this::class.simpleName +class MainActivity : AppCompatActivity() { - private val activityManager by lazy { activityManager ?: getSystemService()!! } - private val downloadManager by lazy { downloadManager ?: getSystemService()!! } - private val clipboardManager by lazy { clipboardManager ?: getSystemService()!! } + // Android views + private lateinit var ggufTv: TextView + private lateinit var messagesRv: RecyclerView + private lateinit var userInputEt: EditText + private lateinit var userActionFab: FloatingActionButton - private val viewModel: MainViewModel by viewModels() + // Arm AI Chat inference engine + private lateinit var engine: InferenceEngine - // Get a MemoryInfo object for the device's current memory status. - private fun availableMemory(): ActivityManager.MemoryInfo { - return ActivityManager.MemoryInfo().also { memoryInfo -> - activityManager.getMemoryInfo(memoryInfo) - } - } + // Conversation states + private var isModelReady = false + private val messages = mutableListOf() + private val lastAssistantMsg = StringBuilder() + private val messageAdapter = MessageAdapter(messages) override fun onCreate(savedInstanceState: Bundle?) { super.onCreate(savedInstanceState) + enableEdgeToEdge() + setContentView(R.layout.activity_main) - StrictMode.setVmPolicy( - VmPolicy.Builder(StrictMode.getVmPolicy()) - .detectLeakedClosableObjects() - .build() - ) + // Find views + ggufTv = findViewById(R.id.gguf) + messagesRv = findViewById(R.id.messages) + messagesRv.layoutManager = LinearLayoutManager(this) + messagesRv.adapter = messageAdapter + userInputEt = findViewById(R.id.user_input) + userActionFab = findViewById(R.id.fab) - val free = Formatter.formatFileSize(this, availableMemory().availMem) - val total = Formatter.formatFileSize(this, availableMemory().totalMem) - - viewModel.log("Current memory: $free / $total") - viewModel.log("Downloads directory: ${getExternalFilesDir(null)}") - - val extFilesDir = getExternalFilesDir(null) - - val models = listOf( - Downloadable( - "Phi-2 7B (Q4_0, 1.6 GiB)", - Uri.parse("https://huggingface.co/ggml-org/models/resolve/main/phi-2/ggml-model-q4_0.gguf?download=true"), - File(extFilesDir, "phi-2-q4_0.gguf"), - ), - Downloadable( - "TinyLlama 1.1B (f16, 2.2 GiB)", - Uri.parse("https://huggingface.co/ggml-org/models/resolve/main/tinyllama-1.1b/ggml-model-f16.gguf?download=true"), - File(extFilesDir, "tinyllama-1.1-f16.gguf"), - ), - Downloadable( - "Phi 2 DPO (Q3_K_M, 1.48 GiB)", - Uri.parse("https://huggingface.co/TheBloke/phi-2-dpo-GGUF/resolve/main/phi-2-dpo.Q3_K_M.gguf?download=true"), - File(extFilesDir, "phi-2-dpo.Q3_K_M.gguf") - ), - ) - - setContent { - LlamaAndroidTheme { - // A surface container using the 'background' color from the theme - Surface( - modifier = Modifier.fillMaxSize(), - color = MaterialTheme.colorScheme.background - ) { - MainCompose( - viewModel, - clipboardManager, - downloadManager, - models, - ) - } + // Arm AI Chat initialization + lifecycleScope.launch(Dispatchers.Default) { + engine = AiChat.getInferenceEngine(applicationContext) + } + // Upon CTA button tapped + userActionFab.setOnClickListener { + if (isModelReady) { + // If model is ready, validate input and send to engine + handleUserInput() + } else { + // Otherwise, prompt user to select a GGUF metadata on the device + getContent.launch(arrayOf("*/*")) } } } -} -@Composable -fun MainCompose( - viewModel: MainViewModel, - clipboard: ClipboardManager, - dm: DownloadManager, - models: List -) { - Column { - val scrollState = rememberLazyListState() + private val getContent = registerForActivityResult( + ActivityResultContracts.OpenDocument() + ) { uri -> + Log.i(TAG, "Selected file uri:\n $uri") + uri?.let { handleSelectedModel(it) } + } - Box(modifier = Modifier.weight(1f)) { - LazyColumn(state = scrollState) { - items(viewModel.messages) { - Text( - it, - style = MaterialTheme.typography.bodyLarge.copy(color = LocalContentColor.current), - modifier = Modifier.padding(16.dp) - ) + /** + * Handles the file Uri from [getContent] result + */ + private fun handleSelectedModel(uri: Uri) { + // Update UI states + userActionFab.isEnabled = false + userInputEt.hint = "Parsing GGUF..." + ggufTv.text = "Parsing metadata from selected file \n$uri" + + lifecycleScope.launch(Dispatchers.IO) { + // Parse GGUF metadata + Log.i(TAG, "Parsing GGUF metadata...") + contentResolver.openInputStream(uri)?.use { + GgufMetadataReader.create().readStructuredMetadata(it) + }?.let { metadata -> + // Update UI to show GGUF metadata to user + Log.i(TAG, "GGUF parsed: \n$metadata") + withContext(Dispatchers.Main) { + ggufTv.text = metadata.toString() } - } - } - OutlinedTextField( - value = viewModel.message, - onValueChange = { viewModel.updateMessage(it) }, - label = { Text("Message") }, - ) - Row { - Button({ viewModel.send() }) { Text("Send") } - Button({ viewModel.bench(8, 4, 1) }) { Text("Bench") } - Button({ viewModel.clear() }) { Text("Clear") } - Button({ - viewModel.messages.joinToString("\n").let { - clipboard.setPrimaryClip(ClipData.newPlainText("", it)) - } - }) { Text("Copy") } - } - Column { - for (model in models) { - Downloadable.Button(viewModel, dm, model) + // Ensure the model file is available + val modelName = metadata.filename() + FILE_EXTENSION_GGUF + contentResolver.openInputStream(uri)?.use { input -> + ensureModelFile(modelName, input) + }?.let { modelFile -> + loadModel(modelName, modelFile) + + withContext(Dispatchers.Main) { + isModelReady = true + userInputEt.hint = "Type and send a message!" + userInputEt.isEnabled = true + userActionFab.setImageResource(R.drawable.outline_send_24) + userActionFab.isEnabled = true + } + } } } } + + /** + * Prepare the model file within app's private storage + */ + private suspend fun ensureModelFile(modelName: String, input: InputStream) = + withContext(Dispatchers.IO) { + File(ensureModelsDirectory(), modelName).also { file -> + // Copy the file into local storage if not yet done + if (!file.exists()) { + Log.i(TAG, "Start copying file to $modelName") + withContext(Dispatchers.Main) { + userInputEt.hint = "Copying file..." + } + + FileOutputStream(file).use { input.copyTo(it) } + Log.i(TAG, "Finished copying file to $modelName") + } else { + Log.i(TAG, "File already exists $modelName") + } + } + } + + /** + * Load the model file from the app private storage + */ + private suspend fun loadModel(modelName: String, modelFile: File) = + withContext(Dispatchers.IO) { + Log.i(TAG, "Loading model $modelName") + withContext(Dispatchers.Main) { + userInputEt.hint = "Loading model..." + } + engine.loadModel(modelFile.path) + } + + /** + * Validate and send the user message into [InferenceEngine] + */ + private fun handleUserInput() { + userInputEt.text.toString().also { userSsg -> + if (userSsg.isEmpty()) { + Toast.makeText(this, "Input message is empty!", Toast.LENGTH_SHORT).show() + } else { + userInputEt.text = null + userActionFab.isEnabled = false + + // Update message states + messages.add(Message(UUID.randomUUID().toString(), userSsg, true)) + lastAssistantMsg.clear() + messages.add(Message(UUID.randomUUID().toString(), lastAssistantMsg.toString(), false)) + + lifecycleScope.launch(Dispatchers.Default) { + engine.sendUserPrompt(userSsg) + .onCompletion { + withContext(Dispatchers.Main) { + userActionFab.isEnabled = true + } + }.collect { token -> + val messageCount = messages.size + check(messageCount > 0 && !messages[messageCount - 1].isUser) + + messages.removeAt(messageCount - 1).copy( + content = lastAssistantMsg.append(token).toString() + ).let { messages.add(it) } + + withContext(Dispatchers.Main) { + messageAdapter.notifyItemChanged(messages.size - 1) + } + } + } + } + } + } + + /** + * Run a benchmark with the model file + */ + private suspend fun runBenchmark(modelName: String, modelFile: File) = + withContext(Dispatchers.Default) { + Log.i(TAG, "Starts benchmarking $modelName") + withContext(Dispatchers.Main) { + userInputEt.hint = "Running benchmark..." + } + engine.bench( + pp=BENCH_PROMPT_PROCESSING_TOKENS, + tg=BENCH_TOKEN_GENERATION_TOKENS, + pl=BENCH_SEQUENCE, + nr=BENCH_REPETITION + ).let { result -> + messages.add(Message(UUID.randomUUID().toString(), result, false)) + withContext(Dispatchers.Main) { + messageAdapter.notifyItemChanged(messages.size - 1) + } + } + } + + /** + * Create the `models` directory if not exist. + */ + private fun ensureModelsDirectory() = + File(filesDir, DIRECTORY_MODELS).also { + if (it.exists() && !it.isDirectory) { it.delete() } + if (!it.exists()) { it.mkdir() } + } + + companion object { + private val TAG = MainActivity::class.java.simpleName + + private const val DIRECTORY_MODELS = "models" + private const val FILE_EXTENSION_GGUF = ".gguf" + + private const val BENCH_PROMPT_PROCESSING_TOKENS = 512 + private const val BENCH_TOKEN_GENERATION_TOKENS = 128 + private const val BENCH_SEQUENCE = 1 + private const val BENCH_REPETITION = 3 + } +} + +fun GgufMetadata.filename() = when { + basic.name != null -> { + basic.name?.let { name -> + basic.sizeLabel?.let { size -> + "$name-$size" + } ?: name + } + } + architecture?.architecture != null -> { + architecture?.architecture?.let { arch -> + basic.uuid?.let { uuid -> + "$arch-$uuid" + } ?: "$arch-${System.currentTimeMillis()}" + } + } + else -> { + "model-${System.currentTimeMillis().toHexString()}" + } } diff --git a/examples/llama.android/app/src/main/java/com/example/llama/MainViewModel.kt b/examples/llama.android/app/src/main/java/com/example/llama/MainViewModel.kt deleted file mode 100644 index 45ac29938f..0000000000 --- a/examples/llama.android/app/src/main/java/com/example/llama/MainViewModel.kt +++ /dev/null @@ -1,105 +0,0 @@ -package com.example.llama - -import android.llama.cpp.LLamaAndroid -import android.util.Log -import androidx.compose.runtime.getValue -import androidx.compose.runtime.mutableStateOf -import androidx.compose.runtime.setValue -import androidx.lifecycle.ViewModel -import androidx.lifecycle.viewModelScope -import kotlinx.coroutines.flow.catch -import kotlinx.coroutines.launch - -class MainViewModel(private val llamaAndroid: LLamaAndroid = LLamaAndroid.instance()): ViewModel() { - companion object { - @JvmStatic - private val NanosPerSecond = 1_000_000_000.0 - } - - private val tag: String? = this::class.simpleName - - var messages by mutableStateOf(listOf("Initializing...")) - private set - - var message by mutableStateOf("") - private set - - override fun onCleared() { - super.onCleared() - - viewModelScope.launch { - try { - llamaAndroid.unload() - } catch (exc: IllegalStateException) { - messages += exc.message!! - } - } - } - - fun send() { - val text = message - message = "" - - // Add to messages console. - messages += text - messages += "" - - viewModelScope.launch { - llamaAndroid.send(text) - .catch { - Log.e(tag, "send() failed", it) - messages += it.message!! - } - .collect { messages = messages.dropLast(1) + (messages.last() + it) } - } - } - - fun bench(pp: Int, tg: Int, pl: Int, nr: Int = 1) { - viewModelScope.launch { - try { - val start = System.nanoTime() - val warmupResult = llamaAndroid.bench(pp, tg, pl, nr) - val end = System.nanoTime() - - messages += warmupResult - - val warmup = (end - start).toDouble() / NanosPerSecond - messages += "Warm up time: $warmup seconds, please wait..." - - if (warmup > 5.0) { - messages += "Warm up took too long, aborting benchmark" - return@launch - } - - messages += llamaAndroid.bench(512, 128, 1, 3) - } catch (exc: IllegalStateException) { - Log.e(tag, "bench() failed", exc) - messages += exc.message!! - } - } - } - - fun load(pathToModel: String) { - viewModelScope.launch { - try { - llamaAndroid.load(pathToModel) - messages += "Loaded $pathToModel" - } catch (exc: IllegalStateException) { - Log.e(tag, "load() failed", exc) - messages += exc.message!! - } - } - } - - fun updateMessage(newMessage: String) { - message = newMessage - } - - fun clear() { - messages = listOf() - } - - fun log(message: String) { - messages += message - } -} diff --git a/examples/llama.android/app/src/main/java/com/example/llama/MessageAdapter.kt b/examples/llama.android/app/src/main/java/com/example/llama/MessageAdapter.kt new file mode 100644 index 0000000000..0439f96441 --- /dev/null +++ b/examples/llama.android/app/src/main/java/com/example/llama/MessageAdapter.kt @@ -0,0 +1,51 @@ +package com.example.llama + +import android.view.LayoutInflater +import android.view.View +import android.view.ViewGroup +import android.widget.TextView +import androidx.recyclerview.widget.RecyclerView + +data class Message( + val id: String, + val content: String, + val isUser: Boolean +) + +class MessageAdapter( + private val messages: List +) : RecyclerView.Adapter() { + + companion object { + private const val VIEW_TYPE_USER = 1 + private const val VIEW_TYPE_ASSISTANT = 2 + } + + override fun getItemViewType(position: Int): Int { + return if (messages[position].isUser) VIEW_TYPE_USER else VIEW_TYPE_ASSISTANT + } + + override fun onCreateViewHolder(parent: ViewGroup, viewType: Int): RecyclerView.ViewHolder { + val layoutInflater = LayoutInflater.from(parent.context) + return if (viewType == VIEW_TYPE_USER) { + val view = layoutInflater.inflate(R.layout.item_message_user, parent, false) + UserMessageViewHolder(view) + } else { + val view = layoutInflater.inflate(R.layout.item_message_assistant, parent, false) + AssistantMessageViewHolder(view) + } + } + + override fun onBindViewHolder(holder: RecyclerView.ViewHolder, position: Int) { + val message = messages[position] + if (holder is UserMessageViewHolder || holder is AssistantMessageViewHolder) { + val textView = holder.itemView.findViewById(R.id.msg_content) + textView.text = message.content + } + } + + override fun getItemCount(): Int = messages.size + + class UserMessageViewHolder(view: View) : RecyclerView.ViewHolder(view) + class AssistantMessageViewHolder(view: View) : RecyclerView.ViewHolder(view) +} diff --git a/examples/llama.android/app/src/main/java/com/example/llama/ui/theme/Color.kt b/examples/llama.android/app/src/main/java/com/example/llama/ui/theme/Color.kt deleted file mode 100644 index 40c30e8d97..0000000000 --- a/examples/llama.android/app/src/main/java/com/example/llama/ui/theme/Color.kt +++ /dev/null @@ -1,11 +0,0 @@ -package com.example.llama.ui.theme - -import androidx.compose.ui.graphics.Color - -val Purple80 = Color(0xFFD0BCFF) -val PurpleGrey80 = Color(0xFFCCC2DC) -val Pink80 = Color(0xFFEFB8C8) - -val Purple40 = Color(0xFF6650a4) -val PurpleGrey40 = Color(0xFF625b71) -val Pink40 = Color(0xFF7D5260) diff --git a/examples/llama.android/app/src/main/java/com/example/llama/ui/theme/Theme.kt b/examples/llama.android/app/src/main/java/com/example/llama/ui/theme/Theme.kt deleted file mode 100644 index e742220a8d..0000000000 --- a/examples/llama.android/app/src/main/java/com/example/llama/ui/theme/Theme.kt +++ /dev/null @@ -1,70 +0,0 @@ -package com.example.llama.ui.theme - -import android.app.Activity -import android.os.Build -import androidx.compose.foundation.isSystemInDarkTheme -import androidx.compose.material3.MaterialTheme -import androidx.compose.material3.darkColorScheme -import androidx.compose.material3.dynamicDarkColorScheme -import androidx.compose.material3.dynamicLightColorScheme -import androidx.compose.material3.lightColorScheme -import androidx.compose.runtime.Composable -import androidx.compose.runtime.SideEffect -import androidx.compose.ui.graphics.toArgb -import androidx.compose.ui.platform.LocalContext -import androidx.compose.ui.platform.LocalView -import androidx.core.view.WindowCompat - -private val DarkColorScheme = darkColorScheme( - primary = Purple80, - secondary = PurpleGrey80, - tertiary = Pink80 -) - -private val LightColorScheme = lightColorScheme( - primary = Purple40, - secondary = PurpleGrey40, - tertiary = Pink40 - - /* Other default colors to override - background = Color(0xFFFFFBFE), - surface = Color(0xFFFFFBFE), - onPrimary = Color.White, - onSecondary = Color.White, - onTertiary = Color.White, - onBackground = Color(0xFF1C1B1F), - onSurface = Color(0xFF1C1B1F), - */ -) - -@Composable -fun LlamaAndroidTheme( - darkTheme: Boolean = isSystemInDarkTheme(), - // Dynamic color is available on Android 12+ - dynamicColor: Boolean = true, - content: @Composable () -> Unit -) { - val colorScheme = when { - dynamicColor && Build.VERSION.SDK_INT >= Build.VERSION_CODES.S -> { - val context = LocalContext.current - if (darkTheme) dynamicDarkColorScheme(context) else dynamicLightColorScheme(context) - } - - darkTheme -> DarkColorScheme - else -> LightColorScheme - } - val view = LocalView.current - if (!view.isInEditMode) { - SideEffect { - val window = (view.context as Activity).window - window.statusBarColor = colorScheme.primary.toArgb() - WindowCompat.getInsetsController(window, view).isAppearanceLightStatusBars = darkTheme - } - } - - MaterialTheme( - colorScheme = colorScheme, - typography = Typography, - content = content - ) -} diff --git a/examples/llama.android/app/src/main/java/com/example/llama/ui/theme/Type.kt b/examples/llama.android/app/src/main/java/com/example/llama/ui/theme/Type.kt deleted file mode 100644 index 0b87946ca3..0000000000 --- a/examples/llama.android/app/src/main/java/com/example/llama/ui/theme/Type.kt +++ /dev/null @@ -1,34 +0,0 @@ -package com.example.llama.ui.theme - -import androidx.compose.material3.Typography -import androidx.compose.ui.text.TextStyle -import androidx.compose.ui.text.font.FontFamily -import androidx.compose.ui.text.font.FontWeight -import androidx.compose.ui.unit.sp - -// Set of Material typography styles to start with -val Typography = Typography( - bodyLarge = TextStyle( - fontFamily = FontFamily.Default, - fontWeight = FontWeight.Normal, - fontSize = 16.sp, - lineHeight = 24.sp, - letterSpacing = 0.5.sp - ) - /* Other default text styles to override - titleLarge = TextStyle( - fontFamily = FontFamily.Default, - fontWeight = FontWeight.Normal, - fontSize = 22.sp, - lineHeight = 28.sp, - letterSpacing = 0.sp - ), - labelSmall = TextStyle( - fontFamily = FontFamily.Default, - fontWeight = FontWeight.Medium, - fontSize = 11.sp, - lineHeight = 16.sp, - letterSpacing = 0.5.sp - ) - */ -) diff --git a/examples/llama.android/app/src/main/res/drawable/bg_assistant_message.xml b/examples/llama.android/app/src/main/res/drawable/bg_assistant_message.xml new file mode 100644 index 0000000000..f90c3db458 --- /dev/null +++ b/examples/llama.android/app/src/main/res/drawable/bg_assistant_message.xml @@ -0,0 +1,4 @@ + + + + diff --git a/examples/llama.android/app/src/main/res/drawable/bg_user_message.xml b/examples/llama.android/app/src/main/res/drawable/bg_user_message.xml new file mode 100644 index 0000000000..3ca7daefec --- /dev/null +++ b/examples/llama.android/app/src/main/res/drawable/bg_user_message.xml @@ -0,0 +1,4 @@ + + + + diff --git a/examples/llama.android/app/src/main/res/drawable/outline_folder_open_24.xml b/examples/llama.android/app/src/main/res/drawable/outline_folder_open_24.xml new file mode 100644 index 0000000000..f58b501e3b --- /dev/null +++ b/examples/llama.android/app/src/main/res/drawable/outline_folder_open_24.xml @@ -0,0 +1,10 @@ + + + diff --git a/examples/llama.android/app/src/main/res/drawable/outline_send_24.xml b/examples/llama.android/app/src/main/res/drawable/outline_send_24.xml new file mode 100644 index 0000000000..712adc00c4 --- /dev/null +++ b/examples/llama.android/app/src/main/res/drawable/outline_send_24.xml @@ -0,0 +1,11 @@ + + + diff --git a/examples/llama.android/app/src/main/res/layout/activity_main.xml b/examples/llama.android/app/src/main/res/layout/activity_main.xml new file mode 100644 index 0000000000..bf6ef35925 --- /dev/null +++ b/examples/llama.android/app/src/main/res/layout/activity_main.xml @@ -0,0 +1,76 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/examples/llama.android/app/src/main/res/layout/item_message_assistant.xml b/examples/llama.android/app/src/main/res/layout/item_message_assistant.xml new file mode 100644 index 0000000000..b7fb500393 --- /dev/null +++ b/examples/llama.android/app/src/main/res/layout/item_message_assistant.xml @@ -0,0 +1,15 @@ + + + + + diff --git a/examples/llama.android/app/src/main/res/layout/item_message_user.xml b/examples/llama.android/app/src/main/res/layout/item_message_user.xml new file mode 100644 index 0000000000..fe871f12fa --- /dev/null +++ b/examples/llama.android/app/src/main/res/layout/item_message_user.xml @@ -0,0 +1,15 @@ + + + + + diff --git a/examples/llama.android/app/src/main/res/values/strings.xml b/examples/llama.android/app/src/main/res/values/strings.xml index 7a9d314e29..36059fc799 100644 --- a/examples/llama.android/app/src/main/res/values/strings.xml +++ b/examples/llama.android/app/src/main/res/values/strings.xml @@ -1,3 +1,3 @@ - LlamaAndroid + AI Chat basic sample diff --git a/examples/llama.android/app/src/main/res/values/themes.xml b/examples/llama.android/app/src/main/res/values/themes.xml index 8a24fda566..2e4fdad72e 100644 --- a/examples/llama.android/app/src/main/res/values/themes.xml +++ b/examples/llama.android/app/src/main/res/values/themes.xml @@ -1,5 +1,10 @@ - + +