From 81069a808a29ae3048231e8b8c69758239f8e050 Mon Sep 17 00:00:00 2001 From: Todor Boinovski Date: Fri, 10 Apr 2026 15:57:23 -0700 Subject: [PATCH] hexagon: add support for linux on snapdragon (#21707) * hexagon: add support for debian on ex2 * hexagon: add -fvectotize to c/c++ cmake flags * hexagon: remove trailing white space * update onboarding steps * hexagon: update linux setup documentation * hexagon: update intallation scripts * Hexagon: update docs * hexagon: update onboarding scripts --------- Co-authored-by: Zack Li --- cmake/arm64-linux-clang.cmake | 17 ++++++ docs/backend/snapdragon/CMakeUserPresets.json | 31 +++++++++- docs/backend/snapdragon/linux.md | 58 +++++++++++++++++++ 3 files changed, 105 insertions(+), 1 deletion(-) create mode 100644 cmake/arm64-linux-clang.cmake create mode 100644 docs/backend/snapdragon/linux.md diff --git a/cmake/arm64-linux-clang.cmake b/cmake/arm64-linux-clang.cmake new file mode 100644 index 0000000000..f16e280ec6 --- /dev/null +++ b/cmake/arm64-linux-clang.cmake @@ -0,0 +1,17 @@ +set( CMAKE_SYSTEM_NAME Linux ) +set( CMAKE_SYSTEM_PROCESSOR arm64 ) + +set( target aarch64-linux-gnu ) + +set( CMAKE_C_COMPILER clang ) +set( CMAKE_CXX_COMPILER clang++ ) + +set( CMAKE_C_COMPILER_TARGET ${target} ) +set( CMAKE_CXX_COMPILER_TARGET ${target} ) + +set( arch_c_flags "-march=armv8.7-a -fvectorize -ffp-model=fast -fno-finite-math-only" ) +set( warn_c_flags "-Wno-format -Wno-unused-variable -Wno-unused-function -Wno-gnu-zero-variadic-macro-arguments" ) + +set( CMAKE_C_FLAGS_INIT "${arch_c_flags} ${warn_c_flags}" ) +set( CMAKE_CXX_FLAGS_INIT "${arch_c_flags} ${warn_c_flags}" ) + diff --git a/docs/backend/snapdragon/CMakeUserPresets.json b/docs/backend/snapdragon/CMakeUserPresets.json index 1faae2f3db..c07bf5ca0c 100644 --- a/docs/backend/snapdragon/CMakeUserPresets.json +++ b/docs/backend/snapdragon/CMakeUserPresets.json @@ -52,10 +52,39 @@ } }, + { + "name": "arm64-linux-snapdragon", + "hidden": true, + "architecture": { "value": "arm64", "strategy": "external" }, + "toolset": { "value": "host=x86_64", "strategy": "external" }, + "cacheVariables": { + "CMAKE_TOOLCHAIN_FILE": "cmake/arm64-linux-clang.cmake", + "CMAKE_C_FLAGS": "-march=armv8 -fno-finite-math-only -flto -D_GNU_SOURCE", + "CMAKE_CXX_FLAGS": "-march=armv8 -fno-finite-math-only -flto -D_GNU_SOURCE", + "CMAKE_C_FLAGS_RELEASE": "-O3 -DNDEBUG", + "CMAKE_CXX_FLAGS_RELEASE": "-O3 -DNDEBUG", + "CMAKE_C_FLAGS_RELWITHDEBINFO": "-O3 -DNDEBUG -g", + "CMAKE_CXX_FLAGS_RELWITHDEBINFO": "-O3 -DNDEBUG -g", + "CMAKE_PREFIX_PATH": "$env{OPENCL_SDK_ROOT}", + "HEXAGON_SDK_ROOT": "$env{HEXAGON_SDK_ROOT}", + "HEXAGON_TOOLS_ROOT": "$env{HEXAGON_TOOLS_ROOT}", + "PREBUILT_LIB_DIR": "linux_aarch64", + "GGML_OPENMP": "OFF", + "GGML_LLAMAFILE": "OFF", + "GGML_OPENCL": "OFF", + "GGML_HEXAGON": "ON", + "GGML_HEXAGON_FP32_QUANTIZE_GROUP_SIZE": "128", + "LLAMA_OPENSSL": "OFF" + } + }, + { "name": "arm64-android-snapdragon-debug" , "inherits": [ "base", "arm64-android-snapdragon", "debug" ] }, { "name": "arm64-android-snapdragon-release", "inherits": [ "base", "arm64-android-snapdragon", "release" ] }, { "name": "arm64-windows-snapdragon-debug" , "inherits": [ "base", "arm64-windows-snapdragon", "debug" ] }, - { "name": "arm64-windows-snapdragon-release", "inherits": [ "base", "arm64-windows-snapdragon", "release" ] } + { "name": "arm64-windows-snapdragon-release", "inherits": [ "base", "arm64-windows-snapdragon", "release" ] }, + + { "name": "arm64-linux-snapdragon-debug" , "inherits": [ "base", "arm64-linux-snapdragon", "debug" ] }, + { "name": "arm64-linux-snapdragon-release", "inherits": [ "base", "arm64-linux-snapdragon", "release" ] } ] } diff --git a/docs/backend/snapdragon/linux.md b/docs/backend/snapdragon/linux.md new file mode 100644 index 0000000000..90fdadb6c9 --- /dev/null +++ b/docs/backend/snapdragon/linux.md @@ -0,0 +1,58 @@ +# Snapdragon-based Linux devices + +## Docker Setup + +The easiest way to build llama.cpp for a Snapdragon-based Linux device is using the toolchain Docker image (see [github.com/snapdragon-toolchain](https://github.com/snapdragon-toolchain)). +This image includes OpenCL SDK, Hexagon SDK, CMake, and the ARM64 Linux cross-compilation toolchain. + +Cross-compilation is supported on **Linux X86** hosts. The resulting binaries are deployed to and run on the target **Qualcomm Snapdragon ARM64 Linux** device. + +``` +~/src/llama.cpp$ docker run -it -u $(id -u):$(id -g) --volume $(pwd):/workspace --platform linux/amd64 ghcr.io/snapdragon-toolchain/arm64-linux:v0.1 +[d]/> cd /workspace +``` + +Note: The rest of the **Linux** build process assumes that you're running inside the toolchain container. + + +## How to Build + +Let's build llama.cpp with CPU, OpenCL, and Hexagon backends via CMake presets: + +``` +[d]/workspace> cp docs/backend/snapdragon/CMakeUserPresets.json . + +[d]/workspace> cmake --preset arm64-linux-snapdragon-release -B build-snapdragon + +[d]/workspace> cmake --build build-snapdragon -j $(nproc) +``` + +To generate an installable "package" simply use cmake --install, then zip it: + +``` +[d]/workspace> cmake --install build-snapdragon --prefix pkg-snapdragon +[d]/workspace> zip -r pkg-snapdragon.zip pkg-snapdragon +``` + +## How to Install + +For this step, you will deploy the built binaries and libraries to the target Linux device. Transfer `pkg-snapdragon.zip` to the target device, then unzip it and set up the environment variables: + +``` +$ unzip pkg-snapdragon.zip +$ cd pkg-snapdragon +$ export LD_LIBRARY_PATH=./lib +$ export ADSP_LIBRARY_PATH=./lib +``` + +At this point, you should also download some models onto the device: + +``` +$ wget https://huggingface.co/bartowski/Llama-3.2-3B-Instruct-GGUF/resolve/main/Llama-3.2-3B-Instruct-Q4_0.gguf +``` + +## How to Run +Next, since we have setup the environment variables, we can run the llama-cli with the Hexagon backends: +``` +$ ./bin/llama-cli -m Llama-3.2-3B-Instruct-Q4_0.gguf --device HTP0 -ngl 99 -p "what is the most popular cookie in the world?" +```