hexagon: enable offloading to Hexagon on Windows on Snapdragon (#19150)
* hexagon: updates to enable offloading to HTP on WoS * Update windows.md * Update windows.md * hexagon: enable -O3 optimizations * hexagon: move all _WINDOWS conditional compilation to _WIN32 * hexagon: updates to enable offloading to HTP on WoS * hexagon: use run-time vs load-time dynamic linking for cdsp driver interface * refactor htp-drv * hexagon: add run-bench.ps1 script * hexagon: htdrv refactor * hexagon: unify Android and Windows build readmes * hexagon: update README.md * hexagon: refactor htpdrv * hexagon: drv refactor * hexagon: more drv refactor * hexagon: fixes for android builds * hexagon: factor out dl into ggml-backend-dl * hexagon: add run-tool.ps1 script * hexagon: merge htp-utils in htp-drv and remove unused code * wos: no need for getopt_custom.h * wos: add missing CR in htpdrv * hexagon: ndev enforecement applies only to the Android devices * hexagon: add support for generating and signing .cat file * hexagon: add .inf file * hexagon: working auto-signing and improved windows builds * hexagon: futher improve skel build * hexagon: add rough WoS guide * hexagon: updated windows guide * hexagon: improve cmake handling of certs and logging * hexagon: improve windows setup/build doc * hexagon: more windows readme updates * hexagon: windows readme updates * hexagon: windows readme updates * hexagon: windows readme updates * hexagon: windows readme updates * Update windows.md * Update windows.md * snapdragon: rename docs/backend/hexagon to docs/backends/snapdragon Also added a power shell script to simplify build env setup. * hexagon: remove trailing whitespace and move cmake requirement to user-presets * hexagon: fix CMakeUserPresets path in workflow yaml * hexagon: introduce local version of libdl.h * hexagon: fix src1 reuse logic gpt-oss needs a bigger lookahead window. The check for src[1] itself being quantized was wrong. --------- Co-authored-by: Max Krasnyansky <maxk@qti.qualcomm.com>
This commit is contained in:
parent
4fdbc1e4db
commit
ce38a4db47
|
|
@ -1371,7 +1371,7 @@ jobs:
|
||||||
id: update_presets
|
id: update_presets
|
||||||
if: ${{ matrix.build == 'arm64-snapdragon' }}
|
if: ${{ matrix.build == 'arm64-snapdragon' }}
|
||||||
run: |
|
run: |
|
||||||
cp docs/backend/hexagon/CMakeUserPresets.json .
|
cp docs/backend/snapdragon/CMakeUserPresets.json .
|
||||||
|
|
||||||
- name: Build
|
- name: Build
|
||||||
id: ndk_build
|
id: ndk_build
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,10 @@
|
||||||
{
|
{
|
||||||
"version": 4,
|
"version": 5,
|
||||||
|
"cmakeMinimumRequired": {
|
||||||
|
"major": 3,
|
||||||
|
"minor": 28,
|
||||||
|
"patch": 0
|
||||||
|
},
|
||||||
"configurePresets": [
|
"configurePresets": [
|
||||||
{
|
{
|
||||||
"name": "arm64-android-snapdragon",
|
"name": "arm64-android-snapdragon",
|
||||||
|
|
@ -16,7 +21,9 @@
|
||||||
"CMAKE_CXX_FLAGS_RELEASE": "-O3 -DNDEBUG",
|
"CMAKE_CXX_FLAGS_RELEASE": "-O3 -DNDEBUG",
|
||||||
"CMAKE_C_FLAGS_RELWITHDEBINFO": "-O3 -DNDEBUG -g",
|
"CMAKE_C_FLAGS_RELWITHDEBINFO": "-O3 -DNDEBUG -g",
|
||||||
"CMAKE_CXX_FLAGS_RELWITHDEBINFO": "-O3 -DNDEBUG -g",
|
"CMAKE_CXX_FLAGS_RELWITHDEBINFO": "-O3 -DNDEBUG -g",
|
||||||
"HEXAGON_SDK_ROOT": "$env{HEXAGON_SDK_ROOT}",
|
"CMAKE_PREFIX_PATH": "$env{OPENCL_SDK_ROOT}",
|
||||||
|
"HEXAGON_SDK_ROOT": "$env{HEXAGON_SDK_ROOT}",
|
||||||
|
"HEXAGON_TOOLS_ROOT": "$env{HEXAGON_TOOLS_ROOT}",
|
||||||
"PREBUILT_LIB_DIR": "android_aarch64",
|
"PREBUILT_LIB_DIR": "android_aarch64",
|
||||||
"GGML_OPENMP": "OFF",
|
"GGML_OPENMP": "OFF",
|
||||||
"GGML_LLAMAFILE": "OFF",
|
"GGML_LLAMAFILE": "OFF",
|
||||||
|
|
@ -31,7 +38,15 @@
|
||||||
"name": "arm64-windows-snapdragon",
|
"name": "arm64-windows-snapdragon",
|
||||||
"inherits": [ "base", "arm64-windows-llvm" ],
|
"inherits": [ "base", "arm64-windows-llvm" ],
|
||||||
"cacheVariables": {
|
"cacheVariables": {
|
||||||
"HEXAGON_SDK_ROOT": "$env{HEXAGON_SDK_ROOT}",
|
"CMAKE_C_FLAGS": "-march=armv8.7a+fp16 -fvectorize -ffp-model=fast -flto -D_GNU_SOURCE",
|
||||||
|
"CMAKE_CXX_FLAGS": "-march=armv8.7a+fp16 -fvectorize -ffp-model=fast -flto -D_GNU_SOURCE",
|
||||||
|
"CMAKE_C_FLAGS_RELEASE": "-O3 -DNDEBUG",
|
||||||
|
"CMAKE_CXX_FLAGS_RELEASE": "-O3 -DNDEBUG",
|
||||||
|
"CMAKE_C_FLAGS_RELWITHDEBINFO": "-O3 -DNDEBUG -g",
|
||||||
|
"CMAKE_CXX_FLAGS_RELWITHDEBINFO": "-O3 -DNDEBUG -g",
|
||||||
|
"CMAKE_PREFIX_PATH": "$env{OPENCL_SDK_ROOT}",
|
||||||
|
"HEXAGON_SDK_ROOT": "$env{HEXAGON_SDK_ROOT}",
|
||||||
|
"HEXAGON_TOOLS_ROOT": "$env{HEXAGON_TOOLS_ROOT}",
|
||||||
"PREBUILT_LIB_DIR": "windows_aarch64",
|
"PREBUILT_LIB_DIR": "windows_aarch64",
|
||||||
"GGML_OPENMP": "OFF",
|
"GGML_OPENMP": "OFF",
|
||||||
"GGML_LLAMAFILE": "OFF",
|
"GGML_LLAMAFILE": "OFF",
|
||||||
|
|
@ -1,6 +1,8 @@
|
||||||
# Snapdragon-based Android devices
|
# Snapdragon-based devices
|
||||||
|
|
||||||
## How to Build
|
## Setup
|
||||||
|
|
||||||
|
### Android
|
||||||
|
|
||||||
The easiest way to build llama.cpp for a Snapdragon-based Android device is using the toolchain Docker image (see github.com/snapdragon-toolchain).
|
The easiest way to build llama.cpp for a Snapdragon-based Android device is using the toolchain Docker image (see github.com/snapdragon-toolchain).
|
||||||
This image includes Android NDK, OpenCL SDK, Hexagon SDK, CMake, etc.
|
This image includes Android NDK, OpenCL SDK, Hexagon SDK, CMake, etc.
|
||||||
|
|
@ -12,7 +14,24 @@ This method works on Linux, macOS, and Windows. macOS and Windows users should i
|
||||||
[d]/> cd /workspace
|
[d]/> cd /workspace
|
||||||
```
|
```
|
||||||
|
|
||||||
The rest of the Android build process assumes that you're running inside the toolchain container.
|
Note: The rest of the **Android** build process assumes that you're running inside the toolchain container.
|
||||||
|
|
||||||
|
### Windows On Snapdragon
|
||||||
|
|
||||||
|
Native Windows 11 arm64 builds has the following tools dependencies:
|
||||||
|
- MS Visual Studio 2026 (Community Edition or Pro)
|
||||||
|
- MSVC arm64 standard and runtime libraries
|
||||||
|
- UCRT and Driver Kit
|
||||||
|
- LLVM core libraries and Clang compiler (winget)
|
||||||
|
- CMake, Git, Python (winget)
|
||||||
|
- Hexagon SDK Community Edition 6.4 or later (see windows.md)
|
||||||
|
- OpenCL SDK 2.3 or later (see windows.md)
|
||||||
|
|
||||||
|
Note: The rest of the **Windows** build process assumes that you're running natively in Powershell.
|
||||||
|
Adapt below build commands accordingly.
|
||||||
|
|
||||||
|
## How to Build
|
||||||
|
|
||||||
Let's build llama.cpp with CPU, OpenCL, and Hexagon backends via CMake presets:
|
Let's build llama.cpp with CPU, OpenCL, and Hexagon backends via CMake presets:
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
@ -49,24 +68,26 @@ Preset CMake variables:
|
||||||
To generate an installable "package" simply use cmake --install:
|
To generate an installable "package" simply use cmake --install:
|
||||||
|
|
||||||
```
|
```
|
||||||
[d]/workspace> cmake --install build-snapdragon --prefix pkg-adb/llama.cpp
|
[d]/workspace> cmake --install build-snapdragon --prefix pkg-snapdragon/llama.cpp
|
||||||
-- Install configuration: "Release"
|
-- Install configuration: "Release"
|
||||||
-- Installing: /workspace/pkg-adb/llama.cpp/lib/libggml-cpu.so
|
-- Installing: /workspace/pkg-snapdragon/llama.cpp/lib/libggml-cpu.so
|
||||||
-- Installing: /workspace/pkg-adb/llama.cpp/lib/libggml-opencl.so
|
-- Installing: /workspace/pkg-snapdragon/llama.cpp/lib/libggml-opencl.so
|
||||||
-- Installing: /workspace/pkg-adb/llama.cpp/lib/libggml-hexagon.so
|
-- Installing: /workspace/pkg-snapdragon/llama.cpp/lib/libggml-hexagon.so
|
||||||
-- Installing: /workspace/pkg-adb/llama.cpp/lib/libggml-htp-v73.so
|
-- Installing: /workspace/pkg-snapdragon/llama.cpp/lib/libggml-htp-v73.so
|
||||||
-- Installing: /workspace/pkg-adb/llama.cpp/lib/libggml-htp-v75.so
|
-- Installing: /workspace/pkg-snapdragon/llama.cpp/lib/libggml-htp-v75.so
|
||||||
-- Installing: /workspace/pkg-adb/llama.cpp/lib/libggml-htp-v79.so
|
-- Installing: /workspace/pkg-snapdragon/llama.cpp/lib/libggml-htp-v79.so
|
||||||
-- Installing: /workspace/pkg-adb/llama.cpp/lib/libggml-htp-v81.so
|
-- Installing: /workspace/pkg-snapdragon/llama.cpp/lib/libggml-htp-v81.so
|
||||||
-- Installing: /workspace/pkg-adb/llama.cpp/lib/libggml.so
|
-- Installing: /workspace/pkg-snapdragon/llama.cpp/lib/libggml.so
|
||||||
...
|
...
|
||||||
-- Installing: /workspace/pkg-adb/llama.cpp/bin/llama-bench
|
-- Installing: /workspace/pkg-snapdragon/llama.cpp/bin/llama-bench
|
||||||
-- Installing: /workspace/pkg-adb/llama.cpp/bin/llama-cli
|
-- Installing: /workspace/pkg-snapdragon/llama.cpp/bin/llama-cli
|
||||||
...
|
...
|
||||||
```
|
```
|
||||||
|
|
||||||
## How to Install
|
## How to Install
|
||||||
|
|
||||||
|
### Android
|
||||||
|
|
||||||
For this step, your device needs to be configured for on-device development.
|
For this step, your device needs to be configured for on-device development.
|
||||||
Please see https://developer.android.com/studio/debug/dev-options for details.
|
Please see https://developer.android.com/studio/debug/dev-options for details.
|
||||||
|
|
||||||
|
|
@ -74,10 +95,10 @@ Once ADB is enabled, use `adb push` to install `pkg-snapdragon` on the device.
|
||||||
**Note that the toolchain Docker image doesn't have ADB and doesn't set up the ADB bridge. Please use native ADB on the host.**
|
**Note that the toolchain Docker image doesn't have ADB and doesn't set up the ADB bridge. Please use native ADB on the host.**
|
||||||
|
|
||||||
```
|
```
|
||||||
~/src/llama.cpp$ adb push pkg-adb/llama.cpp /data/local/tmp/
|
~/src/llama.cpp$ adb push pkg-snapdragon/llama.cpp /data/local/tmp/
|
||||||
pkg-adb/llama.cpp/bin/: 67 files pushed, 0 skipped. 190.2 MB/s (919095042 bytes in 4.607s)
|
pkg-snapdragon/llama.cpp/bin/: 67 files pushed, 0 skipped. 190.2 MB/s (919095042 bytes in 4.607s)
|
||||||
pkg-adb/llama.cpp/include/: 19 files pushed, 0 skipped. 20.5 MB/s (255173 bytes in 0.012s)
|
pkg-snapdragon/llama.cpp/include/: 19 files pushed, 0 skipped. 20.5 MB/s (255173 bytes in 0.012s)
|
||||||
pkg-adb/llama.cpp/lib/: 16 files pushed, 0 skipped. 144.4 MB/s (43801382 bytes in 0.289s)
|
pkg-snapdragon/llama.cpp/lib/: 16 files pushed, 0 skipped. 144.4 MB/s (43801382 bytes in 0.289s)
|
||||||
102 files pushed, 0 skipped. 186.9 MB/s (963151597 bytes in 4.914s)
|
102 files pushed, 0 skipped. 186.9 MB/s (963151597 bytes in 4.914s)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
@ -92,6 +113,11 @@ At this point, you should also install some models:
|
||||||
Llama-3.2-1B-Instruct-Q4_0.gguf: 1 file pushed, 0 skipped. 38.3 MB/s (773025920 bytes in 19.250s)
|
Llama-3.2-1B-Instruct-Q4_0.gguf: 1 file pushed, 0 skipped. 38.3 MB/s (773025920 bytes in 19.250s)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Windows
|
||||||
|
|
||||||
|
All artifacts are already installed in the `pkg-snapdragon` folder.
|
||||||
|
To run, adapt below instructions to use Powershell scrits in `scripts/snapdragon/windows`.
|
||||||
|
|
||||||
## How to Run
|
## How to Run
|
||||||
|
|
||||||
The easiest way to run llama.cpp cli tools is using provided wrapper scripts that properly set up all required environment variables.
|
The easiest way to run llama.cpp cli tools is using provided wrapper scripts that properly set up all required environment variables.
|
||||||
|
|
@ -0,0 +1,161 @@
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
The document covers procedures for installing the latest GPU and NPU drivers, and OpenCL and Hexagon SDKs.
|
||||||
|
|
||||||
|
|
||||||
|
In order to use Hexagon NPU on Snapdragon Windows devices the underlying HTP Ops libraries (e.g libggml-htp-v73.so)
|
||||||
|
must be included in the .cat file digitally signed with a trusted certificate.
|
||||||
|
|
||||||
|
This document covers details on how to generate personal certificate files (.pfx) and how to configure the system
|
||||||
|
to allow for test signatures (aka test-signing).
|
||||||
|
|
||||||
|
## Install the latest Adreno OpenCL SDK
|
||||||
|
|
||||||
|
Either use the trimmed down version (optimized for CI) from
|
||||||
|
|
||||||
|
https://github.com/snapdragon-toolchain/opencl-sdk/releases/download/v2.3.2/adreno-opencl-sdk-v2.3.2-arm64-wos.tar.xz
|
||||||
|
|
||||||
|
Or download the complete official version from
|
||||||
|
|
||||||
|
https://softwarecenter.qualcomm.com/catalog/item/Adreno_OpenCL_SDK?version=2.3.2
|
||||||
|
|
||||||
|
Unzip/untar the archive into
|
||||||
|
```
|
||||||
|
c:\Qualcomm\OpenCL_SDK\2.3.2
|
||||||
|
```
|
||||||
|
|
||||||
|
## Install the latest Hexagon SDK Community Edition
|
||||||
|
|
||||||
|
Either use the trimmed down version (optimized for CI) from
|
||||||
|
|
||||||
|
https://github.com/snapdragon-toolchain/hexagon-sdk/releases/download/v6.4.0.2/hexagon-sdk-v6.4.0.2-arm64-wos.tar.xz
|
||||||
|
|
||||||
|
Or download the complete official version from
|
||||||
|
|
||||||
|
https://softwarecenter.qualcomm.com/catalog/item/Hexagon_SDK?version=6.4.0.2
|
||||||
|
|
||||||
|
Unzip/untar the archive into
|
||||||
|
```
|
||||||
|
c:\Qualcomm\Hexagon_SDK\6.4.0.2
|
||||||
|
```
|
||||||
|
|
||||||
|
## Install the latest Adreno GPU driver
|
||||||
|
|
||||||
|
Download the driver from
|
||||||
|
|
||||||
|
https://softwarecenter.qualcomm.com/catalog/item/Windows_Graphics_Driver
|
||||||
|
|
||||||
|
After the automated installation and reboot please make sure that the GPU device shows up in the `Device Manager` (under 'Display Adapters`)
|
||||||
|
|
||||||
|
## Install the latest Qualcomm NPU driver
|
||||||
|
|
||||||
|
Download the driver from
|
||||||
|
|
||||||
|
https://softwarecenter.qualcomm.com/catalog/item/Qualcomm_HND
|
||||||
|
|
||||||
|
After the automated installation and reboot please make sure that the Hexagon NPU device shows up in the `Device Manager` (under `Neural Processors`).
|
||||||
|
|
||||||
|
If the device is not available you can try installing all components (`qcnspmcdm8380`, `qcnspmcdm8380_ext`) manually.
|
||||||
|
The components are extracted into
|
||||||
|
```
|
||||||
|
c:\QCDrivers\qcnspmcdm...
|
||||||
|
```
|
||||||
|
|
||||||
|
## Enable NPU driver test signatures
|
||||||
|
|
||||||
|
Please note that the following steps are required only for the Hexagon NPU.
|
||||||
|
Adreno GPU backend does not require test signatures.
|
||||||
|
|
||||||
|
### Enable testsigning
|
||||||
|
|
||||||
|
Use `bcdedit` to enable test-signing
|
||||||
|
```
|
||||||
|
> bcdedit /set TESTSIGNING ON
|
||||||
|
```
|
||||||
|
(Secure Boot may need to be disabled for this to work)
|
||||||
|
|
||||||
|
Make sure test-signing is enabled after reboot
|
||||||
|
```
|
||||||
|
> bcdedit /enum
|
||||||
|
...
|
||||||
|
testsigning Yes
|
||||||
|
...
|
||||||
|
```
|
||||||
|
For additional details see Microsoft guide at
|
||||||
|
|
||||||
|
https://learn.microsoft.com/en-us/windows-hardware/drivers/install/the-testsigning-boot-configuration-option
|
||||||
|
|
||||||
|
### Create personal certificate
|
||||||
|
|
||||||
|
The tools required for this procedure are available as part of Windows SDK and Windows Driver Kit which should be
|
||||||
|
installed as part of the MS Visual Studio.
|
||||||
|
They are typically located at
|
||||||
|
```
|
||||||
|
c:\Program Files (x86)\Windows Kits\10\bin\10.0.26100.0
|
||||||
|
```
|
||||||
|
(replace 10.0.26100.0 with correct version).
|
||||||
|
|
||||||
|
To create personal self-signed certificate run the following commands (either from cmd or power-shell):
|
||||||
|
```
|
||||||
|
> cd c:\Users\MyUser
|
||||||
|
> mkdir Certs
|
||||||
|
> cd Certs
|
||||||
|
> makecert -r -pe -ss PrivateCertStore -n CN=GGML.HTP.v1 -eku 1.3.6.1.5.5.7.3.3 -sv ggml-htp-v1.pvk ggml-htp-v1.cer
|
||||||
|
> pvk2pfx.exe -pvk ggml-htp-v1.pvk -spc ggml-htp-v1.cer -pfx ggml-htp-v1.pfx
|
||||||
|
```
|
||||||
|
(replace `MyUser` with your username).
|
||||||
|
|
||||||
|
Add this certificate to `Trusted Root Certification Authorities` and `Trusted Publishers` stores.
|
||||||
|
This can be done using `certlm` Certificate Manager tool.
|
||||||
|
Right click on the certificate store, select `All Tasks -> Import` and follow the prompts to import the certificate from the
|
||||||
|
PFX file you created above.
|
||||||
|
|
||||||
|
For additional details see Microsoft guide at
|
||||||
|
|
||||||
|
https://learn.microsoft.com/en-us/windows-hardware/drivers/install/introduction-to-test-signing
|
||||||
|
|
||||||
|
Make sure to save the PFX file, you will need it for the build procedures.
|
||||||
|
Please note that the same certificate can be used for signing any number of builds.
|
||||||
|
|
||||||
|
## Build Hexagon backend with signed HTP ops libraries
|
||||||
|
|
||||||
|
The overall Hexagon backend build procedure for Windows on Snapdragon is the same as for other platforms.
|
||||||
|
However, additional settings are required for generating and signing HTP Ops libraries.
|
||||||
|
```
|
||||||
|
> $env:OPENCL_SDK_ROOT="C:\Qualcomm\OpenCL_SDK\2.3.2"
|
||||||
|
> $env:HEXAGON_SDK_ROOT="C:\Qualcomm\Hexagon_SDK\6.4.0.2"
|
||||||
|
> $env:HEXAGON_TOOLS_ROOT="C:\Qualcomm\Hexagon_SDK\6.4.0.2\tools\HEXAGON_Tools\19.0.04"
|
||||||
|
> $env:HEXAGON_HTP_CERT="c:\Users\MyUsers\Certs\ggml-htp-v1.pfx"
|
||||||
|
> $env:WINDOWS_SDK_BIN="C:\Program Files (x86)\Windows Kits\10\bin\10.0.26100.0\arm64"
|
||||||
|
|
||||||
|
> cmake --preset arm64-windows-snapdragon -B build-wos
|
||||||
|
...
|
||||||
|
> cmake --install build-wos --prefix pkg-snapdragon
|
||||||
|
```
|
||||||
|
|
||||||
|
Once the build is complete HTP ops libraries will be installed like this
|
||||||
|
```
|
||||||
|
> dir pkg-snapdragon/lib
|
||||||
|
...
|
||||||
|
-a---- 1/22/2026 6:01 PM 187656 libggml-htp-v73.so
|
||||||
|
-a---- 1/22/2026 6:01 PM 191752 libggml-htp-v75.so
|
||||||
|
-a---- 1/22/2026 6:01 PM 187656 libggml-htp-v79.so
|
||||||
|
-a---- 1/22/2026 6:01 PM 187656 libggml-htp-v81.so
|
||||||
|
-a---- 1/22/2026 6:01 PM 4139 libggml-htp.cat
|
||||||
|
```
|
||||||
|
|
||||||
|
The .cat file, the signature and proper certicate installation can be verified with
|
||||||
|
|
||||||
|
```
|
||||||
|
> signtool.exe verify /v /pa .\pkg-snapdragon\lib\libggml-htp.cat
|
||||||
|
Verifying: .\pkg-snapdragon\lib\libggml-htp.cat
|
||||||
|
|
||||||
|
Signature Index: 0 (Primary Signature)
|
||||||
|
Hash of file (sha256): 9820C664DA59D5EAE31DBB664127FCDAEF59CDC31502496BC567544EC2F401CF
|
||||||
|
|
||||||
|
Signing Certificate Chain:
|
||||||
|
Issued to: GGML.HTP.v1
|
||||||
|
...
|
||||||
|
Successfully verified: .\pkg-snapdragon\lib\libggml-htp.cat
|
||||||
|
...
|
||||||
|
```
|
||||||
|
|
@ -222,6 +222,7 @@ if (GGML_SCHED_NO_REALLOC)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
add_library(ggml
|
add_library(ggml
|
||||||
|
ggml-backend-dl.cpp
|
||||||
ggml-backend-reg.cpp)
|
ggml-backend-reg.cpp)
|
||||||
add_library(ggml::ggml ALIAS ggml)
|
add_library(ggml::ggml ALIAS ggml)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,48 @@
|
||||||
|
#include "ggml-backend-dl.h"
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
|
||||||
|
dl_handle * dl_load_library(const fs::path & path) {
|
||||||
|
// suppress error dialogs for missing DLLs
|
||||||
|
DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
|
||||||
|
SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
|
||||||
|
|
||||||
|
HMODULE handle = LoadLibraryW(path.wstring().c_str());
|
||||||
|
|
||||||
|
SetErrorMode(old_mode);
|
||||||
|
|
||||||
|
return handle;
|
||||||
|
}
|
||||||
|
|
||||||
|
void * dl_get_sym(dl_handle * handle, const char * name) {
|
||||||
|
DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
|
||||||
|
SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
|
||||||
|
|
||||||
|
void * p = (void *) GetProcAddress(handle, name);
|
||||||
|
|
||||||
|
SetErrorMode(old_mode);
|
||||||
|
|
||||||
|
return p;
|
||||||
|
}
|
||||||
|
|
||||||
|
const char * dl_error() {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
dl_handle * dl_load_library(const fs::path & path) {
|
||||||
|
dl_handle * handle = dlopen(path.string().c_str(), RTLD_NOW | RTLD_LOCAL);
|
||||||
|
return handle;
|
||||||
|
}
|
||||||
|
|
||||||
|
void * dl_get_sym(dl_handle * handle, const char * name) {
|
||||||
|
return dlsym(handle, name);
|
||||||
|
}
|
||||||
|
|
||||||
|
const char * dl_error() {
|
||||||
|
const char *rslt = dlerror();
|
||||||
|
return rslt != nullptr ? rslt : "";
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
@ -0,0 +1,45 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
# define WIN32_LEAN_AND_MEAN
|
||||||
|
# ifndef NOMINMAX
|
||||||
|
# define NOMINMAX
|
||||||
|
# endif
|
||||||
|
# include <windows.h>
|
||||||
|
# include <winevt.h>
|
||||||
|
#else
|
||||||
|
# include <dlfcn.h>
|
||||||
|
# include <unistd.h>
|
||||||
|
#endif
|
||||||
|
#include <filesystem>
|
||||||
|
|
||||||
|
namespace fs = std::filesystem;
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
|
||||||
|
using dl_handle = std::remove_pointer_t<HMODULE>;
|
||||||
|
|
||||||
|
struct dl_handle_deleter {
|
||||||
|
void operator()(HMODULE handle) {
|
||||||
|
FreeLibrary(handle);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
using dl_handle = void;
|
||||||
|
|
||||||
|
struct dl_handle_deleter {
|
||||||
|
void operator()(void * handle) {
|
||||||
|
dlclose(handle);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
using dl_handle_ptr = std::unique_ptr<dl_handle, dl_handle_deleter>;
|
||||||
|
|
||||||
|
dl_handle * dl_load_library(const fs::path & path);
|
||||||
|
void * dl_get_sym(dl_handle * handle, const char * name);
|
||||||
|
const char * dl_error();
|
||||||
|
|
||||||
|
|
@ -1,5 +1,6 @@
|
||||||
#include "ggml-backend-impl.h"
|
#include "ggml-backend-impl.h"
|
||||||
#include "ggml-backend.h"
|
#include "ggml-backend.h"
|
||||||
|
#include "ggml-backend-dl.h"
|
||||||
#include "ggml-impl.h"
|
#include "ggml-impl.h"
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
|
|
@ -98,72 +99,6 @@ static std::string path_str(const fs::path & path) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef _WIN32
|
|
||||||
|
|
||||||
using dl_handle = std::remove_pointer_t<HMODULE>;
|
|
||||||
|
|
||||||
struct dl_handle_deleter {
|
|
||||||
void operator()(HMODULE handle) {
|
|
||||||
FreeLibrary(handle);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
static dl_handle * dl_load_library(const fs::path & path) {
|
|
||||||
// suppress error dialogs for missing DLLs
|
|
||||||
DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
|
|
||||||
SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
|
|
||||||
|
|
||||||
HMODULE handle = LoadLibraryW(path.wstring().c_str());
|
|
||||||
|
|
||||||
SetErrorMode(old_mode);
|
|
||||||
|
|
||||||
return handle;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void * dl_get_sym(dl_handle * handle, const char * name) {
|
|
||||||
DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
|
|
||||||
SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
|
|
||||||
|
|
||||||
void * p = (void *) GetProcAddress(handle, name);
|
|
||||||
|
|
||||||
SetErrorMode(old_mode);
|
|
||||||
|
|
||||||
return p;
|
|
||||||
}
|
|
||||||
|
|
||||||
static const char * dl_error() {
|
|
||||||
return "";
|
|
||||||
}
|
|
||||||
|
|
||||||
#else
|
|
||||||
|
|
||||||
using dl_handle = void;
|
|
||||||
|
|
||||||
struct dl_handle_deleter {
|
|
||||||
void operator()(void * handle) {
|
|
||||||
dlclose(handle);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
static void * dl_load_library(const fs::path & path) {
|
|
||||||
dl_handle * handle = dlopen(path.string().c_str(), RTLD_NOW | RTLD_LOCAL);
|
|
||||||
|
|
||||||
return handle;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void * dl_get_sym(dl_handle * handle, const char * name) {
|
|
||||||
return dlsym(handle, name);
|
|
||||||
}
|
|
||||||
|
|
||||||
static const char * dl_error() {
|
|
||||||
const char *rslt = dlerror();
|
|
||||||
return rslt != nullptr ? rslt : "";
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
using dl_handle_ptr = std::unique_ptr<dl_handle, dl_handle_deleter>;
|
|
||||||
|
|
||||||
struct ggml_backend_reg_entry {
|
struct ggml_backend_reg_entry {
|
||||||
ggml_backend_reg_t reg;
|
ggml_backend_reg_t reg;
|
||||||
dl_handle_ptr handle;
|
dl_handle_ptr handle;
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,17 @@
|
||||||
|
file(TO_CMAKE_PATH "${HEXAGON_SDK_ROOT}" HEXAGON_SDK_ROOT)
|
||||||
|
file(TO_CMAKE_PATH "${HEXAGON_TOOLS_ROOT}" HEXAGON_TOOLS_ROOT)
|
||||||
|
|
||||||
|
if (NOT IS_DIRECTORY "${HEXAGON_SDK_ROOT}" OR NOT IS_DIRECTORY "${HEXAGON_TOOLS_ROOT}")
|
||||||
|
message(FATAL_ERROR "Make sure HEXAGON_SDK_ROOT and HEXAGON_TOOLS_ROOT point to the correct Hexagon SDK installation.")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
message(STATUS "hexagon: using ${HEXAGON_SDK_ROOT} and ${HEXAGON_TOOLS_ROOT} for building libggml-htp skels")
|
||||||
|
|
||||||
include(${HEXAGON_SDK_ROOT}/build/cmake/hexagon_fun.cmake)
|
include(${HEXAGON_SDK_ROOT}/build/cmake/hexagon_fun.cmake)
|
||||||
include(ExternalProject)
|
include(ExternalProject)
|
||||||
|
|
||||||
option(GGML_HEXAGON_HTP_DEBUG "ggml-hexagon: enable HTP debug output" OFF)
|
option(GGML_HEXAGON_HTP_DEBUG "ggml-hexagon: enable HTP debug output" OFF)
|
||||||
|
set(GGML_HEXAGON_HTP_CERT "$ENV{HEXAGON_HTP_CERT}" CACHE PATH "ggml-hexagon: enable HTP library signing using certificate")
|
||||||
set(GGML_HEXAGON_FP32_QUANTIZE_GROUP_SIZE 128 CACHE STRING "ggml-hexagon: quantize group size (32, 64, or 128)")
|
set(GGML_HEXAGON_FP32_QUANTIZE_GROUP_SIZE 128 CACHE STRING "ggml-hexagon: quantize group size (32, 64, or 128)")
|
||||||
|
|
||||||
add_library(htp_iface OBJECT
|
add_library(htp_iface OBJECT
|
||||||
|
|
@ -25,56 +35,71 @@ else()
|
||||||
target_link_options(htp_iface PUBLIC -ldl)
|
target_link_options(htp_iface PUBLIC -ldl)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
link_custom_library(htp_iface cdsprpc)
|
|
||||||
link_custom_library(htp_iface rpcmem)
|
|
||||||
|
|
||||||
set(TARGET_NAME ggml-hexagon)
|
set(TARGET_NAME ggml-hexagon)
|
||||||
ggml_add_backend_library(${TARGET_NAME}
|
ggml_add_backend_library(${TARGET_NAME}
|
||||||
ggml-hexagon.cpp htp-utils.c htp-utils.h ../../include/ggml-hexagon.h)
|
ggml-hexagon.cpp
|
||||||
|
htp-drv.cpp
|
||||||
|
htp-drv.h
|
||||||
|
libdl.h
|
||||||
|
../../include/ggml-hexagon.h)
|
||||||
|
|
||||||
target_link_libraries(${TARGET_NAME} PRIVATE htp_iface)
|
target_link_libraries(${TARGET_NAME} PRIVATE htp_iface)
|
||||||
target_include_directories(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/htp ${CMAKE_CURRENT_BINARY_DIR})
|
target_include_directories(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/htp ${CMAKE_CURRENT_BINARY_DIR})
|
||||||
|
|
||||||
# Build HTP bits
|
# Build HTP skels
|
||||||
set(HTP_CMAKE_ARGS
|
set(HTP_SKELS)
|
||||||
-DCMAKE_TOOLCHAIN_FILE=${CMAKE_CURRENT_SOURCE_DIR}/htp/cmake-toolchain.cmake
|
function(build_htp_skel V)
|
||||||
-DCMAKE_BUILD_TYPE=Release
|
ExternalProject_Add(htp-${V}
|
||||||
-DCMAKE_INSTALL_LIBDIR=${CMAKE_CURRENT_BINARY_DIR}
|
SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/htp BUILD_ALWAYS ON
|
||||||
-DHEXAGON_SDK_ROOT=$ENV{HEXAGON_SDK_ROOT}
|
BUILD_BYPRODUCTS ${CMAKE_CURRENT_BINARY_DIR}/libggml-htp-${V}.so
|
||||||
-DHEXAGON_TOOLS_ROOT=$ENV{HEXAGON_TOOLS_ROOT}
|
CMAKE_ARGS
|
||||||
-DHEXAGON_HTP_DEBUG=${GGML_HEXAGON_HTP_DEBUG}
|
-DCMAKE_BUILD_TYPE=Release
|
||||||
-DGGML_HEXAGON_FP32_QUANTIZE_GROUP_SIZE=${GGML_HEXAGON_FP32_QUANTIZE_GROUP_SIZE})
|
-DCMAKE_TOOLCHAIN_FILE=${CMAKE_CURRENT_SOURCE_DIR}/htp/cmake-toolchain.cmake
|
||||||
|
-DCMAKE_INSTALL_LIBDIR=${CMAKE_CURRENT_BINARY_DIR}
|
||||||
|
-DHEXAGON_SDK_ROOT=${HEXAGON_SDK_ROOT}
|
||||||
|
-DHEXAGON_TOOLS_ROOT=${HEXAGON_TOOLS_ROOT}
|
||||||
|
-DHEXAGON_HTP_DEBUG=${GGML_HEXAGON_HTP_DEBUG}
|
||||||
|
-DGGML_HEXAGON_FP32_QUANTIZE_GROUP_SIZE=${GGML_HEXAGON_FP32_QUANTIZE_GROUP_SIZE}
|
||||||
|
-DDSP_VERSION=${V}
|
||||||
|
-DPREBUILT_LIB_DIR="toolv19_${V}")
|
||||||
|
list(APPEND HTP_SKELS ${CMAKE_CURRENT_BINARY_DIR}/libggml-htp-${V}.so)
|
||||||
|
set(HTP_SKELS ${HTP_SKELS} PARENT_SCOPE)
|
||||||
|
endfunction()
|
||||||
|
|
||||||
ExternalProject_Add(htp-v68
|
build_htp_skel(v68)
|
||||||
SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/htp BUILD_ALWAYS ON
|
build_htp_skel(v69)
|
||||||
CMAKE_ARGS ${HTP_CMAKE_ARGS} -DDSP_VERSION=v68 -DPREBUILT_LIB_DIR="toolv19_v68")
|
build_htp_skel(v73)
|
||||||
|
build_htp_skel(v75)
|
||||||
ExternalProject_Add(htp-v69
|
build_htp_skel(v79)
|
||||||
SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/htp BUILD_ALWAYS ON
|
build_htp_skel(v81)
|
||||||
CMAKE_ARGS ${HTP_CMAKE_ARGS} -DDSP_VERSION=v69 -DPREBUILT_LIB_DIR="toolv19_v69")
|
|
||||||
|
|
||||||
ExternalProject_Add(htp-v73
|
|
||||||
SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/htp BUILD_ALWAYS ON
|
|
||||||
CMAKE_ARGS ${HTP_CMAKE_ARGS} -DDSP_VERSION=v73 -DPREBUILT_LIB_DIR="toolv19_v73")
|
|
||||||
|
|
||||||
ExternalProject_Add(htp-v75
|
|
||||||
SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/htp BUILD_ALWAYS ON
|
|
||||||
CMAKE_ARGS ${HTP_CMAKE_ARGS} -DDSP_VERSION=v75 -DPREBUILT_LIB_DIR="toolv19_v75")
|
|
||||||
|
|
||||||
ExternalProject_Add(htp-v79
|
|
||||||
SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/htp BUILD_ALWAYS ON
|
|
||||||
CMAKE_ARGS ${HTP_CMAKE_ARGS} -DDSP_VERSION=v79 -DPREBUILT_LIB_DIR="toolv19_v79")
|
|
||||||
|
|
||||||
ExternalProject_Add(htp-v81
|
|
||||||
SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/htp BUILD_ALWAYS ON
|
|
||||||
CMAKE_ARGS ${HTP_CMAKE_ARGS} -DDSP_VERSION=v81 -DPREBUILT_LIB_DIR="toolv19_v81")
|
|
||||||
|
|
||||||
# Install Hexagon skels required at runtime
|
# Install Hexagon skels required at runtime
|
||||||
install(FILES
|
install(FILES ${HTP_SKELS} TYPE LIB)
|
||||||
${CMAKE_CURRENT_BINARY_DIR}/libggml-htp-v68.so
|
|
||||||
${CMAKE_CURRENT_BINARY_DIR}/libggml-htp-v69.so
|
if (CMAKE_SYSTEM_NAME MATCHES Windows AND GGML_HEXAGON_HTP_CERT)
|
||||||
${CMAKE_CURRENT_BINARY_DIR}/libggml-htp-v73.so
|
file(TO_CMAKE_PATH "$ENV{WINDOWS_SDK_BIN}/arm64" WINSDK_BIN0_ARM64)
|
||||||
${CMAKE_CURRENT_BINARY_DIR}/libggml-htp-v75.so
|
file(TO_CMAKE_PATH "$ENV{WINDOWS_SDK_BIN}/x86" WINSDK_BIN0_X86)
|
||||||
${CMAKE_CURRENT_BINARY_DIR}/libggml-htp-v79.so
|
file(TO_CMAKE_PATH "$ENV{WindowsSdkVerBinPath}/arm64" WINSDK_BIN1_ARM64)
|
||||||
${CMAKE_CURRENT_BINARY_DIR}/libggml-htp-v81.so
|
file(TO_CMAKE_PATH "$ENV{WindowsSdkVerBinPath}/x86" WINSDK_BIN1_X86)
|
||||||
TYPE LIB)
|
|
||||||
|
set(WINSDK_PATHS ${WINSDK_BIN0_ARM64} ${WINSDK_BIN0_X86} ${WINSDK_BIN1_ARM64} ${WINSDK_BIN1_X86})
|
||||||
|
|
||||||
|
find_program(INF2CAT NAMES inf2cat.exe PATHS ${WINSDK_PATHS} REQUIRED)
|
||||||
|
find_program(SIGNTOOL NAMES signtool.exe PATHS ${WINSDK_PATHS} REQUIRED)
|
||||||
|
|
||||||
|
message(STATUS "hexagon: using ${GGML_HEXAGON_HTP_CERT} to sign libggml-htp skels")
|
||||||
|
|
||||||
|
set(LIBGGML_HTP_CAT ${CMAKE_CURRENT_BINARY_DIR}/libggml-htp.cat)
|
||||||
|
add_custom_target(libggml-htp-cat
|
||||||
|
BYPRODUCTS ${LIBGGML_HTP_CAT}
|
||||||
|
DEPENDS libggml-htp.inf ${HTP_SKELS}
|
||||||
|
COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/libggml-htp.inf ${CMAKE_CURRENT_BINARY_DIR}
|
||||||
|
COMMAND ${INF2CAT} /driver:${CMAKE_CURRENT_BINARY_DIR} /os:10_25H2_ARM64
|
||||||
|
COMMAND ${SIGNTOOL} sign /fd sha256 /f ${GGML_HEXAGON_HTP_CERT} ${LIBGGML_HTP_CAT}
|
||||||
|
COMMENT "generating and signing libggml-htp.cat file"
|
||||||
|
VERBATIM
|
||||||
|
)
|
||||||
|
|
||||||
|
add_dependencies(${TARGET_NAME} libggml-htp-cat)
|
||||||
|
install(FILES ${LIBGGML_HTP_CAT} TYPE LIB)
|
||||||
|
endif()
|
||||||
|
|
|
||||||
|
|
@ -14,9 +14,6 @@
|
||||||
|
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
# include <sal.h>
|
# include <sal.h>
|
||||||
# ifndef _WINDOWS
|
|
||||||
# define _WINDOWS
|
|
||||||
# endif
|
|
||||||
#else
|
#else
|
||||||
# include <semaphore.h>
|
# include <semaphore.h>
|
||||||
# include <unistd.h>
|
# include <unistd.h>
|
||||||
|
|
@ -25,8 +22,6 @@
|
||||||
#pragma clang diagnostic ignored "-Wnested-anon-types"
|
#pragma clang diagnostic ignored "-Wnested-anon-types"
|
||||||
#pragma clang diagnostic ignored "-Wgnu-anonymous-struct"
|
#pragma clang diagnostic ignored "-Wgnu-anonymous-struct"
|
||||||
|
|
||||||
#include "htp-utils.h"
|
|
||||||
|
|
||||||
#include <AEEStdErr.h>
|
#include <AEEStdErr.h>
|
||||||
#include <dspqueue.h>
|
#include <dspqueue.h>
|
||||||
#include <rpcmem.h>
|
#include <rpcmem.h>
|
||||||
|
|
@ -40,6 +35,7 @@
|
||||||
#include "op-desc.h"
|
#include "op-desc.h"
|
||||||
#include "htp-msg.h"
|
#include "htp-msg.h"
|
||||||
#include "htp_iface.h"
|
#include "htp_iface.h"
|
||||||
|
#include "htp-drv.h"
|
||||||
|
|
||||||
static size_t opt_ndev = 1;
|
static size_t opt_ndev = 1;
|
||||||
static size_t opt_nhvx = 0; // use all
|
static size_t opt_nhvx = 0; // use all
|
||||||
|
|
@ -150,9 +146,9 @@ void ggml_hexagon_session::enqueue(struct htp_general_req &req, struct dspqueue_
|
||||||
0, // flags - the framework will autoset this
|
0, // flags - the framework will autoset this
|
||||||
n_bufs, // number of buffers
|
n_bufs, // number of buffers
|
||||||
bufs, // buffer references
|
bufs, // buffer references
|
||||||
sizeof(req),
|
sizeof(req), // Message length
|
||||||
(const uint8_t *) &req, // Message
|
(const uint8_t *) &req, // Message
|
||||||
1000000 // Timeout
|
DSPQUEUE_TIMEOUT // Timeout
|
||||||
);
|
);
|
||||||
|
|
||||||
if (err != 0) {
|
if (err != 0) {
|
||||||
|
|
@ -182,13 +178,13 @@ void ggml_hexagon_session::flush() {
|
||||||
|
|
||||||
// Read response packet from queue
|
// Read response packet from queue
|
||||||
int err = dspqueue_read(q, &flags,
|
int err = dspqueue_read(q, &flags,
|
||||||
HTP_MAX_PACKET_BUFFERS, // Maximum number of buffer references
|
HTP_MAX_PACKET_BUFFERS, // Maximum number of buffer references
|
||||||
&n_bufs, // Number of buffer references
|
&n_bufs, // Number of buffer references
|
||||||
bufs, // Buffer references
|
bufs, // Buffer references
|
||||||
sizeof(rsp), // Max message length
|
sizeof(rsp), // Max message length
|
||||||
&rsp_size, // Message length
|
&rsp_size, // Message length
|
||||||
(uint8_t *) &rsp,
|
(uint8_t *) &rsp, // Message
|
||||||
1000000); // Timeout
|
DSPQUEUE_TIMEOUT); // Timeout
|
||||||
|
|
||||||
if (err == AEE_EEXPIRED) {
|
if (err == AEE_EEXPIRED) {
|
||||||
// TODO: might need to bail out if the HTP is stuck on something
|
// TODO: might need to bail out if the HTP is stuck on something
|
||||||
|
|
@ -269,13 +265,7 @@ struct ggml_backend_hexagon_buffer_context {
|
||||||
ggml_backend_hexagon_buffer_context(ggml_hexagon_session * sess, size_t size, bool repack) {
|
ggml_backend_hexagon_buffer_context(ggml_hexagon_session * sess, size_t size, bool repack) {
|
||||||
size += 4 * 1024; // extra page for padding
|
size += 4 * 1024; // extra page for padding
|
||||||
|
|
||||||
if (rpcmem_alloc2) {
|
this->base = (uint8_t *) rpcmem_alloc2(RPCMEM_HEAP_ID_SYSTEM, RPCMEM_DEFAULT_FLAGS | RPCMEM_HEAP_NOREG, size);
|
||||||
this->base = (uint8_t *) rpcmem_alloc2(RPCMEM_HEAP_ID_SYSTEM, RPCMEM_DEFAULT_FLAGS | RPCMEM_HEAP_NOREG, size);
|
|
||||||
} else {
|
|
||||||
GGML_LOG_INFO("ggml-hex: %s rpcmem_alloc2 not found, falling back to rpcmem_alloc\n", sess->name.c_str());
|
|
||||||
this->base = (uint8_t *) rpcmem_alloc(RPCMEM_HEAP_ID_SYSTEM, RPCMEM_DEFAULT_FLAGS | RPCMEM_HEAP_NOREG, size);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!this->base) {
|
if (!this->base) {
|
||||||
GGML_LOG_ERROR("ggml-hex: %s failed to allocate buffer : size %zu\n", sess->name.c_str(), size);
|
GGML_LOG_ERROR("ggml-hex: %s failed to allocate buffer : size %zu\n", sess->name.c_str(), size);
|
||||||
throw std::runtime_error("ggml-hex: rpcmem_alloc failed (see log for details)");
|
throw std::runtime_error("ggml-hex: rpcmem_alloc failed (see log for details)");
|
||||||
|
|
@ -2461,12 +2451,12 @@ static void ggml_backend_hexagon_free(ggml_backend_t backend) {
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bool op_reuse_src1(const ggml_tensor * op1, const ggml_tensor * op0) {
|
static inline bool op_reuse_src1(const ggml_tensor * op1, const ggml_tensor * op0) {
|
||||||
return (op0 && op0->src[1] == op1->src[1] && ggml_is_quantized(op0->src[0]->type) && ggml_is_quantized(op1->src[1]->type));
|
return (op0 && op0->src[1] == op1->src[1] && ggml_is_quantized(op0->src[0]->type));
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bool is_compute_op(ggml_tensor *node)
|
static inline bool is_compute_op(ggml_tensor *node)
|
||||||
{
|
{
|
||||||
return !(ggml_op_is_empty(node->op) || ggml_is_empty(node));
|
return !ggml_op_is_empty(node->op) && !ggml_is_empty(node) && (node->flags & GGML_TENSOR_FLAG_COMPUTE);
|
||||||
}
|
}
|
||||||
|
|
||||||
// scan the graph and figure out last compute op index
|
// scan the graph and figure out last compute op index
|
||||||
|
|
@ -2488,7 +2478,7 @@ static ggml_status ggml_backend_hexagon_graph_compute(ggml_backend_t backend, gg
|
||||||
|
|
||||||
const int last = last_compute_op(graph);
|
const int last = last_compute_op(graph);
|
||||||
|
|
||||||
const struct ggml_tensor * prev_quant_op = nullptr; // prev executed op with quantizer
|
const struct ggml_tensor * prev_op = nullptr; // prev executed op
|
||||||
|
|
||||||
for (int i = 0; i < graph->n_nodes; ++i) {
|
for (int i = 0; i < graph->n_nodes; ++i) {
|
||||||
ggml_tensor * node = graph->nodes[i];
|
ggml_tensor * node = graph->nodes[i];
|
||||||
|
|
@ -2497,17 +2487,15 @@ static ggml_status ggml_backend_hexagon_graph_compute(ggml_backend_t backend, gg
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((node->flags & GGML_TENSOR_FLAG_COMPUTE) == 0) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
uint32_t flags = 0;
|
uint32_t flags = 0;
|
||||||
|
|
||||||
// skip quantizer if src1 is reused
|
// skip quantizer if src1 is reused
|
||||||
if (op_reuse_src1(node, prev_quant_op)) {
|
if (op_reuse_src1(node, prev_op)) {
|
||||||
flags |= HTP_OPFLAGS_SKIP_QUANTIZE;
|
flags |= HTP_OPFLAGS_SKIP_QUANTIZE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
prev_op = node;
|
||||||
|
|
||||||
// ask for early notification for the last Op
|
// ask for early notification for the last Op
|
||||||
if (i == last) {
|
if (i == last) {
|
||||||
flags |= HTP_OPFLAGS_EARLY_WAKEUP;
|
flags |= HTP_OPFLAGS_EARLY_WAKEUP;
|
||||||
|
|
@ -2520,7 +2508,6 @@ static ggml_status ggml_backend_hexagon_graph_compute(ggml_backend_t backend, gg
|
||||||
} else {
|
} else {
|
||||||
ggml_hexagon_dispatch_op<init_binary_req<false>>(sess, node, flags);
|
ggml_hexagon_dispatch_op<init_binary_req<false>>(sess, node, flags);
|
||||||
}
|
}
|
||||||
prev_quant_op = node;
|
|
||||||
break;
|
break;
|
||||||
case GGML_OP_MUL_MAT_ID:
|
case GGML_OP_MUL_MAT_ID:
|
||||||
if (ggml_is_quantized(node->src[0]->type)) {
|
if (ggml_is_quantized(node->src[0]->type)) {
|
||||||
|
|
@ -2528,7 +2515,6 @@ static ggml_status ggml_backend_hexagon_graph_compute(ggml_backend_t backend, gg
|
||||||
} else {
|
} else {
|
||||||
ggml_hexagon_dispatch_op<init_binary_id_req<false>>(sess, node, flags);
|
ggml_hexagon_dispatch_op<init_binary_id_req<false>>(sess, node, flags);
|
||||||
}
|
}
|
||||||
prev_quant_op = node;
|
|
||||||
break;
|
break;
|
||||||
case GGML_OP_MUL:
|
case GGML_OP_MUL:
|
||||||
case GGML_OP_ADD:
|
case GGML_OP_ADD:
|
||||||
|
|
@ -2670,7 +2656,7 @@ static std::vector<int> ggml_hexagon_graph_optimize_reorder(const std::vector<no
|
||||||
}
|
}
|
||||||
|
|
||||||
// that many nodes forward to search for stackable nodes that can reuse VTCM
|
// that many nodes forward to search for stackable nodes that can reuse VTCM
|
||||||
constexpr int N_FORWARD = 8;
|
constexpr int N_FORWARD = 16;
|
||||||
|
|
||||||
for (int i1 = i0 + 1; i1 < i0 + N_FORWARD && i1 < n; i1++) {
|
for (int i1 = i0 + 1; i1 < i0 + N_FORWARD && i1 < n; i1++) {
|
||||||
if (used[i1]) {
|
if (used[i1]) {
|
||||||
|
|
@ -3056,10 +3042,12 @@ ggml_hexagon_registry::ggml_hexagon_registry(ggml_backend_reg_t reg) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if defined(__ANDROID__)
|
||||||
if (opt_arch < 75) {
|
if (opt_arch < 75) {
|
||||||
opt_ndev = 1;
|
opt_ndev = 1;
|
||||||
GGML_LOG_WARN("ggml-hex: forcing ndev to 1 for SoCs archs lower than v75.\n");
|
GGML_LOG_WARN("ggml-hex: forcing ndev to 1 for SoCs archs lower than v75.\n");
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
GGML_LOG_INFO("ggml-hex: Hexagon Arch version v%d\n", opt_arch);
|
GGML_LOG_INFO("ggml-hex: Hexagon Arch version v%d\n", opt_arch);
|
||||||
|
|
||||||
|
|
@ -3156,6 +3144,8 @@ static void ggml_hexagon_init(ggml_backend_reg * reg) {
|
||||||
opt_arch = strtoul(str_arch, NULL, 0);
|
opt_arch = strtoul(str_arch, NULL, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
opt_hostbuf = str_hostbuf ? atoi(str_hostbuf) : 1;
|
||||||
|
|
||||||
reg->context = new ggml_hexagon_registry(reg);
|
reg->context = new ggml_hexagon_registry(reg);
|
||||||
|
|
||||||
HEX_VERBOSE("ggml-hex: size-of-general-req %zu size-of-general-rsp %zu\n", sizeof(struct htp_general_req),
|
HEX_VERBOSE("ggml-hex: size-of-general-req %zu size-of-general-rsp %zu\n", sizeof(struct htp_general_req),
|
||||||
|
|
@ -3180,6 +3170,11 @@ ggml_backend_reg_t ggml_backend_hexagon_reg(void) {
|
||||||
static std::mutex mutex;
|
static std::mutex mutex;
|
||||||
std::lock_guard<std::mutex> lock(mutex);
|
std::lock_guard<std::mutex> lock(mutex);
|
||||||
if (!initialized) {
|
if (!initialized) {
|
||||||
|
auto nErr = htpdrv_init();
|
||||||
|
if (nErr != AEE_SUCCESS) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
ggml_hexagon_init(®);
|
ggml_hexagon_init(®);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,418 @@
|
||||||
|
// sample drv interface
|
||||||
|
|
||||||
|
#pragma clang diagnostic ignored "-Wgnu-anonymous-struct"
|
||||||
|
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||||
|
#pragma clang diagnostic ignored "-Wsign-compare"
|
||||||
|
|
||||||
|
#include <filesystem>
|
||||||
|
#include <set>
|
||||||
|
#include <sstream>
|
||||||
|
#include <string>
|
||||||
|
#ifdef _WIN32
|
||||||
|
# define WIN32_LEAN_AND_MEAN
|
||||||
|
# ifndef NOMINMAX
|
||||||
|
# define NOMINMAX
|
||||||
|
# endif
|
||||||
|
# include <windows.h>
|
||||||
|
# include <winevt.h>
|
||||||
|
#else
|
||||||
|
# include <dlfcn.h>
|
||||||
|
# include <unistd.h>
|
||||||
|
#endif
|
||||||
|
#include "ggml-impl.h"
|
||||||
|
#include "htp-drv.h"
|
||||||
|
#include "libdl.h"
|
||||||
|
|
||||||
|
#include <domain.h>
|
||||||
|
|
||||||
|
//
|
||||||
|
// Driver API types
|
||||||
|
//
|
||||||
|
|
||||||
|
typedef void * (*rpcmem_alloc_pfn_t)(int heapid, uint32_t flags, int size);
|
||||||
|
typedef void * (*rpcmem_alloc2_pfn_t)(int heapid, uint32_t flags, size_t size);
|
||||||
|
typedef void (*rpcmem_free_pfn_t)(void * po);
|
||||||
|
typedef int (*rpcmem_to_fd_pfn_t)(void * po);
|
||||||
|
|
||||||
|
typedef AEEResult (*dspqueue_create_pfn_t)(int domain,
|
||||||
|
uint32_t flags,
|
||||||
|
uint32_t req_queue_size,
|
||||||
|
uint32_t resp_queue_size,
|
||||||
|
dspqueue_callback_t packet_callback,
|
||||||
|
dspqueue_callback_t error_callback,
|
||||||
|
void * callback_context,
|
||||||
|
dspqueue_t * queue);
|
||||||
|
typedef AEEResult (*dspqueue_close_pfn_t)(dspqueue_t queue);
|
||||||
|
typedef AEEResult (*dspqueue_export_pfn_t)(dspqueue_t queue, uint64_t *queue_id);
|
||||||
|
typedef AEEResult (*dspqueue_write_pfn_t)(dspqueue_t queue, uint32_t flags,
|
||||||
|
uint32_t num_buffers,
|
||||||
|
struct dspqueue_buffer *buffers,
|
||||||
|
uint32_t message_length,
|
||||||
|
const uint8_t *message,
|
||||||
|
uint32_t timeout_us);
|
||||||
|
typedef AEEResult (*dspqueue_read_pfn_t)(dspqueue_t queue, uint32_t *flags,
|
||||||
|
uint32_t max_buffers, uint32_t *num_buffers,
|
||||||
|
struct dspqueue_buffer *buffers,
|
||||||
|
uint32_t max_message_length,
|
||||||
|
uint32_t *message_length, uint8_t *message,
|
||||||
|
uint32_t timeout_us);
|
||||||
|
|
||||||
|
typedef int (*fastrpc_mmap_pfn_t)(int domain, int fd, void *addr, int offset, size_t length, enum fastrpc_map_flags flags);
|
||||||
|
typedef int (*fastrpc_munmap_pfn_t)(int domain, int fd, void *addr, size_t length);
|
||||||
|
|
||||||
|
typedef int (*remote_handle64_open_pfn_t)(const char* name, remote_handle64 *ph);
|
||||||
|
typedef int (*remote_handle64_invoke_pfn_t)(remote_handle64 h, uint32_t dwScalars, remote_arg *pra);
|
||||||
|
typedef int (*remote_handle64_close_pfn_t)(remote_handle h);
|
||||||
|
typedef int (*remote_handle_control_pfn_t)(uint32_t req, void* data, uint32_t datalen);
|
||||||
|
typedef int (*remote_handle64_control_pfn_t)(remote_handle64 h, uint32_t req, void* data, uint32_t datalen);
|
||||||
|
typedef int (*remote_session_control_pfn_t)(uint32_t req, void *data, uint32_t datalen);
|
||||||
|
|
||||||
|
//
|
||||||
|
// Driver API pfns
|
||||||
|
//
|
||||||
|
|
||||||
|
rpcmem_alloc_pfn_t rpcmem_alloc_pfn = nullptr;
|
||||||
|
rpcmem_alloc2_pfn_t rpcmem_alloc2_pfn = nullptr;
|
||||||
|
rpcmem_free_pfn_t rpcmem_free_pfn = nullptr;
|
||||||
|
rpcmem_to_fd_pfn_t rpcmem_to_fd_pfn = nullptr;
|
||||||
|
|
||||||
|
fastrpc_mmap_pfn_t fastrpc_mmap_pfn = nullptr;
|
||||||
|
fastrpc_munmap_pfn_t fastrpc_munmap_pfn = nullptr;
|
||||||
|
|
||||||
|
dspqueue_create_pfn_t dspqueue_create_pfn = nullptr;
|
||||||
|
dspqueue_close_pfn_t dspqueue_close_pfn = nullptr;
|
||||||
|
dspqueue_export_pfn_t dspqueue_export_pfn = nullptr;
|
||||||
|
dspqueue_write_pfn_t dspqueue_write_pfn = nullptr;
|
||||||
|
dspqueue_read_pfn_t dspqueue_read_pfn = nullptr;
|
||||||
|
|
||||||
|
remote_handle64_open_pfn_t remote_handle64_open_pfn = nullptr;
|
||||||
|
remote_handle64_invoke_pfn_t remote_handle64_invoke_pfn = nullptr;
|
||||||
|
remote_handle64_close_pfn_t remote_handle64_close_pfn = nullptr;
|
||||||
|
remote_handle_control_pfn_t remote_handle_control_pfn = nullptr;
|
||||||
|
remote_handle64_control_pfn_t remote_handle64_control_pfn = nullptr;
|
||||||
|
remote_session_control_pfn_t remote_session_control_pfn = nullptr;
|
||||||
|
|
||||||
|
//
|
||||||
|
// Driver API
|
||||||
|
//
|
||||||
|
|
||||||
|
void * rpcmem_alloc(int heapid, uint32_t flags, int size) {
|
||||||
|
return rpcmem_alloc_pfn(heapid, flags, size);
|
||||||
|
}
|
||||||
|
|
||||||
|
void * rpcmem_alloc2(int heapid, uint32_t flags, size_t size) {
|
||||||
|
if (rpcmem_alloc2_pfn) {
|
||||||
|
return rpcmem_alloc2_pfn(heapid, flags, size);
|
||||||
|
} else {
|
||||||
|
GGML_LOG_INFO("ggml-hex: rpcmem_alloc2 not found, falling back to rpcmem_alloc\n");
|
||||||
|
return rpcmem_alloc_pfn(heapid, flags, size);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void rpcmem_free(void * po) {
|
||||||
|
return rpcmem_free_pfn(po);
|
||||||
|
}
|
||||||
|
|
||||||
|
int rpcmem_to_fd(void * po) {
|
||||||
|
return rpcmem_to_fd_pfn(po);
|
||||||
|
}
|
||||||
|
|
||||||
|
HTPDRV_API int fastrpc_mmap(int domain, int fd, void * addr, int offset, size_t length, enum fastrpc_map_flags flags) {
|
||||||
|
return fastrpc_mmap_pfn(domain, fd, addr, offset, length, flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
HTPDRV_API int fastrpc_munmap(int domain, int fd, void * addr, size_t length) {
|
||||||
|
return fastrpc_munmap_pfn(domain, fd, addr, length);
|
||||||
|
}
|
||||||
|
|
||||||
|
AEEResult dspqueue_create(int domain,
|
||||||
|
uint32_t flags,
|
||||||
|
uint32_t req_queue_size,
|
||||||
|
uint32_t resp_queue_size,
|
||||||
|
dspqueue_callback_t packet_callback,
|
||||||
|
dspqueue_callback_t error_callback,
|
||||||
|
void * callback_context,
|
||||||
|
dspqueue_t * queue) {
|
||||||
|
return dspqueue_create_pfn(domain, flags, req_queue_size, resp_queue_size, packet_callback, error_callback,
|
||||||
|
callback_context, queue);
|
||||||
|
}
|
||||||
|
|
||||||
|
AEEResult dspqueue_close(dspqueue_t queue) {
|
||||||
|
return dspqueue_close_pfn(queue);
|
||||||
|
}
|
||||||
|
|
||||||
|
AEEResult dspqueue_export(dspqueue_t queue, uint64_t * queue_id) {
|
||||||
|
return dspqueue_export_pfn(queue, queue_id);
|
||||||
|
}
|
||||||
|
|
||||||
|
AEEResult dspqueue_write(dspqueue_t queue,
|
||||||
|
uint32_t flags,
|
||||||
|
uint32_t num_buffers,
|
||||||
|
struct dspqueue_buffer * buffers,
|
||||||
|
uint32_t message_length,
|
||||||
|
const uint8_t * message,
|
||||||
|
uint32_t timeout_us) {
|
||||||
|
return dspqueue_write_pfn(queue, flags, num_buffers, buffers, message_length, message, timeout_us);
|
||||||
|
}
|
||||||
|
|
||||||
|
AEEResult dspqueue_read(dspqueue_t queue,
|
||||||
|
uint32_t * flags,
|
||||||
|
uint32_t max_buffers,
|
||||||
|
uint32_t * num_buffers,
|
||||||
|
struct dspqueue_buffer * buffers,
|
||||||
|
uint32_t max_message_length,
|
||||||
|
uint32_t * message_length,
|
||||||
|
uint8_t * message,
|
||||||
|
uint32_t timeout_us) {
|
||||||
|
return dspqueue_read_pfn(queue, flags, max_buffers, num_buffers, buffers, max_message_length, message_length,
|
||||||
|
message, timeout_us);
|
||||||
|
}
|
||||||
|
|
||||||
|
HTPDRV_API int remote_handle64_open(const char * name, remote_handle64 * ph) {
|
||||||
|
return remote_handle64_open_pfn(name, ph);
|
||||||
|
}
|
||||||
|
|
||||||
|
HTPDRV_API int remote_handle64_invoke(remote_handle64 h, uint32_t dwScalars, remote_arg * pra) {
|
||||||
|
return remote_handle64_invoke_pfn(h, dwScalars, pra);
|
||||||
|
}
|
||||||
|
|
||||||
|
HTPDRV_API int remote_handle64_close(remote_handle64 h) {
|
||||||
|
return remote_handle64_close_pfn(h);
|
||||||
|
}
|
||||||
|
|
||||||
|
HTPDRV_API int remote_handle_control(uint32_t req, void * data, uint32_t datalen) {
|
||||||
|
return remote_handle_control_pfn(req, data, datalen);
|
||||||
|
}
|
||||||
|
|
||||||
|
HTPDRV_API int remote_handle64_control(remote_handle64 h, uint32_t req, void * data, uint32_t datalen) {
|
||||||
|
return remote_handle64_control_pfn(h, req, data, datalen);
|
||||||
|
}
|
||||||
|
|
||||||
|
HTPDRV_API int remote_session_control(uint32_t req, void * data, uint32_t datalen) {
|
||||||
|
return remote_session_control_pfn(req, data, datalen);
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
|
||||||
|
static std::string wstr_to_str(std::wstring_view wstr) {
|
||||||
|
std::string result;
|
||||||
|
if (wstr.empty()) {
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
auto bytes_needed = WideCharToMultiByte(CP_UTF8, WC_ERR_INVALID_CHARS,
|
||||||
|
wstr.data(), (int) wstr.size(),
|
||||||
|
nullptr, 0, nullptr, nullptr);
|
||||||
|
if (bytes_needed == 0) {
|
||||||
|
GGML_LOG_ERROR("ggml-hex: WideCharToMultiByte failed. Error %lu\n", GetLastError());
|
||||||
|
throw std::runtime_error("Invalid wstring input");
|
||||||
|
}
|
||||||
|
|
||||||
|
result.resize(bytes_needed, '\0');
|
||||||
|
int bytes_written = WideCharToMultiByte(CP_UTF8, WC_ERR_INVALID_CHARS,
|
||||||
|
wstr.data(), (int) wstr.size(),
|
||||||
|
result.data(), bytes_needed,
|
||||||
|
nullptr, nullptr);
|
||||||
|
if (bytes_written == 0) {
|
||||||
|
GGML_LOG_ERROR("ggml-hex: WideCharToMultiByte failed. Error %lu\n", GetLastError());
|
||||||
|
throw std::runtime_error("Wstring conversion failed");
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static std::string get_driver_path() {
|
||||||
|
std::wstring serviceName = L"qcnspmcdm";
|
||||||
|
std::string result;
|
||||||
|
|
||||||
|
// Get a handle to the SCM database.
|
||||||
|
SC_HANDLE schSCManager = OpenSCManagerW(NULL, NULL, STANDARD_RIGHTS_READ);
|
||||||
|
if (nullptr == schSCManager) {
|
||||||
|
GGML_LOG_ERROR("ggml-hex: Failed to open SCManager. Error: %lu\n", GetLastError());
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get a handle to the service.
|
||||||
|
SC_HANDLE schService = OpenServiceW(schSCManager, // SCM database
|
||||||
|
serviceName.c_str(), // name of service
|
||||||
|
SERVICE_QUERY_CONFIG); // need query config access
|
||||||
|
|
||||||
|
if (nullptr == schService) {
|
||||||
|
GGML_LOG_ERROR("ggml-hex: Failed to open qcnspmcdm service. Error: %lu\n", GetLastError());
|
||||||
|
CloseServiceHandle(schSCManager);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Store the size of buffer used as an output.
|
||||||
|
DWORD bufferSize;
|
||||||
|
if (!QueryServiceConfigW(schService, NULL, 0, &bufferSize) &&
|
||||||
|
(GetLastError() != ERROR_INSUFFICIENT_BUFFER)) {
|
||||||
|
GGML_LOG_ERROR("ggml-hex: Failed to query service config. Error: %lu\n", GetLastError());
|
||||||
|
CloseServiceHandle(schService);
|
||||||
|
CloseServiceHandle(schSCManager);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
// Get the configuration of the service.
|
||||||
|
LPQUERY_SERVICE_CONFIGW serviceConfig =
|
||||||
|
static_cast<LPQUERY_SERVICE_CONFIGW>(LocalAlloc(LMEM_FIXED, bufferSize));
|
||||||
|
if (!QueryServiceConfigW(schService, serviceConfig, bufferSize, &bufferSize)) {
|
||||||
|
fprintf(stderr, "ggml-hex: Failed to query service config. Error: %lu\n", GetLastError());
|
||||||
|
LocalFree(serviceConfig);
|
||||||
|
CloseServiceHandle(schService);
|
||||||
|
CloseServiceHandle(schSCManager);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read the driver file path get its parent directory
|
||||||
|
std::wstring driverPath = std::wstring(serviceConfig->lpBinaryPathName);
|
||||||
|
driverPath = driverPath.substr(0, driverPath.find_last_of(L"\\"));
|
||||||
|
|
||||||
|
// Clean up resources
|
||||||
|
LocalFree(serviceConfig);
|
||||||
|
CloseServiceHandle(schService);
|
||||||
|
CloseServiceHandle(schSCManager);
|
||||||
|
|
||||||
|
// Driver path would contain invalid path string, like:
|
||||||
|
// \SystemRoot\System32\DriverStore\FileRepository\qcadsprpc8280.inf_arm64_c2b9460c9a072f37
|
||||||
|
// "\SystemRoot" should be replace with a correct one (e.g. C:\Windows)
|
||||||
|
const std::wstring systemRootPlaceholder = L"\\SystemRoot";
|
||||||
|
if (0 != driverPath.compare(0, systemRootPlaceholder.length(), systemRootPlaceholder)) {
|
||||||
|
GGML_LOG_ERROR("ggml-hex: String pattern not found in driver path.\n");
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Replace \SystemRoot with an absolute path from system ENV windir
|
||||||
|
const std::wstring systemRootEnv = L"windir";
|
||||||
|
|
||||||
|
// Query the number of wide charactors this variable requires
|
||||||
|
DWORD numWords = GetEnvironmentVariableW(systemRootEnv.c_str(), NULL, 0);
|
||||||
|
if (numWords == 0) {
|
||||||
|
GGML_LOG_ERROR("ggml-hex: Failed get systemRoot environment variable\n");
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Query the actual system root name from environment variable
|
||||||
|
std::vector<wchar_t> systemRoot(numWords + 1);
|
||||||
|
numWords = GetEnvironmentVariableW(systemRootEnv.c_str(), systemRoot.data(), numWords + 1);
|
||||||
|
if (numWords == 0) {
|
||||||
|
GGML_LOG_ERROR("ggml-hex: Failed to read windir environment variable\n");
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
driverPath.replace(0, systemRootPlaceholder.length(), std::wstring(systemRoot.data()));
|
||||||
|
|
||||||
|
return wstr_to_str(driverPath);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
using dl_handle_ptr = std::unique_ptr<dl_handle, dl_handle_deleter>;
|
||||||
|
|
||||||
|
int htpdrv_init() {
|
||||||
|
static dl_handle_ptr lib_cdsp_rpc_handle = nullptr;
|
||||||
|
static bool initialized = false;
|
||||||
|
#ifdef _WIN32
|
||||||
|
std::string drv_path = get_driver_path() + "\\" + "libcdsprpc.dll";
|
||||||
|
#else
|
||||||
|
std::string drv_path = "libcdsprpc.so";
|
||||||
|
#endif
|
||||||
|
if (initialized) {
|
||||||
|
GGML_LOG_INFO("ggml-hex: Driver already loaded\n");
|
||||||
|
return AEE_SUCCESS;
|
||||||
|
}
|
||||||
|
GGML_LOG_INFO("ggml-hex: Loading driver %s\n", drv_path.c_str());
|
||||||
|
|
||||||
|
fs::path path{ drv_path.c_str() };
|
||||||
|
dl_handle_ptr handle { dl_load_library(path) };
|
||||||
|
if (!handle) {
|
||||||
|
GGML_LOG_ERROR("ggml-hex: failed to load %s: %s\n", path.u8string().c_str(), dl_error());
|
||||||
|
return AEE_EUNABLETOLOAD;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define dlsym(drv, type, pfn, symbol, ignore) \
|
||||||
|
do { \
|
||||||
|
pfn = (type) dl_get_sym(drv, #symbol); \
|
||||||
|
if (!ignore && nullptr == pfn) { \
|
||||||
|
GGML_LOG_ERROR("ggml-hex: failed to dlsym %s\n", #symbol); \
|
||||||
|
return AEE_EUNABLETOLOAD; \
|
||||||
|
} \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
dlsym(handle.get(), rpcmem_alloc_pfn_t, rpcmem_alloc_pfn, rpcmem_alloc, false);
|
||||||
|
dlsym(handle.get(), rpcmem_alloc2_pfn_t, rpcmem_alloc2_pfn, rpcmem_alloc2, true);
|
||||||
|
dlsym(handle.get(), rpcmem_free_pfn_t, rpcmem_free_pfn, rpcmem_free, false);
|
||||||
|
dlsym(handle.get(), rpcmem_to_fd_pfn_t, rpcmem_to_fd_pfn, rpcmem_to_fd, false);
|
||||||
|
dlsym(handle.get(), fastrpc_mmap_pfn_t, fastrpc_mmap_pfn, fastrpc_mmap, false);
|
||||||
|
dlsym(handle.get(), fastrpc_munmap_pfn_t, fastrpc_munmap_pfn, fastrpc_munmap, false);
|
||||||
|
dlsym(handle.get(), dspqueue_create_pfn_t, dspqueue_create_pfn, dspqueue_create, false);
|
||||||
|
dlsym(handle.get(), dspqueue_close_pfn_t, dspqueue_close_pfn, dspqueue_close, false);
|
||||||
|
dlsym(handle.get(), dspqueue_export_pfn_t, dspqueue_export_pfn, dspqueue_export, false);
|
||||||
|
dlsym(handle.get(), dspqueue_write_pfn_t, dspqueue_write_pfn, dspqueue_write, false);
|
||||||
|
dlsym(handle.get(), dspqueue_read_pfn_t, dspqueue_read_pfn, dspqueue_read, false);
|
||||||
|
dlsym(handle.get(), remote_handle64_open_pfn_t, remote_handle64_open_pfn, remote_handle64_open, false);
|
||||||
|
dlsym(handle.get(), remote_handle64_invoke_pfn_t, remote_handle64_invoke_pfn, remote_handle64_invoke, false);
|
||||||
|
dlsym(handle.get(), remote_handle_control_pfn_t, remote_handle_control_pfn, remote_handle_control, false);
|
||||||
|
dlsym(handle.get(), remote_handle64_control_pfn_t, remote_handle64_control_pfn, remote_handle64_control, false);
|
||||||
|
dlsym(handle.get(), remote_session_control_pfn_t, remote_session_control_pfn, remote_session_control, false);
|
||||||
|
dlsym(handle.get(), remote_handle64_close_pfn_t, remote_handle64_close_pfn, remote_handle64_close, false);
|
||||||
|
|
||||||
|
lib_cdsp_rpc_handle = std::move(handle);
|
||||||
|
initialized = true;
|
||||||
|
|
||||||
|
return AEE_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
domain * get_domain(int domain_id) {
|
||||||
|
int i = 0;
|
||||||
|
int size = sizeof(supported_domains) / sizeof(domain);
|
||||||
|
|
||||||
|
for (i = 0; i < size; i++) {
|
||||||
|
if (supported_domains[i].id == domain_id) {
|
||||||
|
return &supported_domains[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
int get_hex_arch_ver(int domain, int * arch) {
|
||||||
|
if (!remote_handle_control_pfn) {
|
||||||
|
GGML_LOG_ERROR("ggml-hex: remote_handle_control is not supported on this device\n");
|
||||||
|
return AEE_EUNSUPPORTEDAPI;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct remote_dsp_capability arch_ver;
|
||||||
|
arch_ver.domain = (uint32_t) domain;
|
||||||
|
arch_ver.attribute_ID = ARCH_VER;
|
||||||
|
arch_ver.capability = (uint32_t) 0;
|
||||||
|
|
||||||
|
int err = remote_handle_control(DSPRPC_GET_DSP_INFO, &arch_ver, sizeof(arch_ver));
|
||||||
|
if ((err & 0xff) == (AEE_EUNSUPPORTEDAPI & 0xff)) {
|
||||||
|
GGML_LOG_ERROR("ggml-hex: FastRPC capability API is not supported on this device\n");
|
||||||
|
return AEE_EUNSUPPORTEDAPI;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (err != AEE_SUCCESS) {
|
||||||
|
GGML_LOG_ERROR("ggml-hex: FastRPC capability query failed (err %d)\n", err);
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (arch_ver.capability & 0xff) {
|
||||||
|
case 0x68:
|
||||||
|
*arch = 68;
|
||||||
|
return 0;
|
||||||
|
case 0x69:
|
||||||
|
*arch = 69;
|
||||||
|
return 0;
|
||||||
|
case 0x73:
|
||||||
|
*arch = 73;
|
||||||
|
return 0;
|
||||||
|
case 0x75:
|
||||||
|
*arch = 75;
|
||||||
|
return 0;
|
||||||
|
case 0x79:
|
||||||
|
*arch = 79;
|
||||||
|
return 0;
|
||||||
|
case 0x81:
|
||||||
|
*arch = 81;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,121 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
# pragma clang diagnostic ignored "-Wignored-attributes"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <AEEStdErr.h>
|
||||||
|
#include <rpcmem.h>
|
||||||
|
#include <remote.h>
|
||||||
|
#include <dspqueue.h>
|
||||||
|
|
||||||
|
#if defined(_WIN32) && !defined(__MINGW32__)
|
||||||
|
# ifdef GGML_BACKEND_BUILD
|
||||||
|
# define HTPDRV_API __declspec(dllexport) extern
|
||||||
|
# else
|
||||||
|
# define HTPDRV_API __declspec(dllimport) extern
|
||||||
|
# endif
|
||||||
|
#else
|
||||||
|
# define HTPDRV_API __attribute__ ((visibility ("default"))) extern
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Offset to differentiate HLOS and Hexagon error codes.
|
||||||
|
Stores the value of AEE_EOFFSET for Hexagon. */
|
||||||
|
#ifndef DSP_OFFSET
|
||||||
|
# define DSP_OFFSET 0x80000400
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Errno for connection reset by peer. */
|
||||||
|
#ifndef ECONNRESET
|
||||||
|
# ifdef __hexagon__
|
||||||
|
# define ECONNRESET 104
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Abstraction of different OS specific sleep APIs.
|
||||||
|
SLEEP accepts input in seconds. */
|
||||||
|
#ifndef SLEEP
|
||||||
|
# ifdef __hexagon__
|
||||||
|
# define SLEEP(x) \
|
||||||
|
{ /* Do nothing for simulator. */ \
|
||||||
|
}
|
||||||
|
# else
|
||||||
|
# ifdef _WIN32
|
||||||
|
# define SLEEP(x) Sleep(1000 * x) /* Sleep accepts input in milliseconds. */
|
||||||
|
# else
|
||||||
|
# define SLEEP(x) sleep(x) /* sleep accepts input in seconds. */
|
||||||
|
# endif
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Include windows specific header files. */
|
||||||
|
#ifdef _WIN32
|
||||||
|
# include <windows.h>
|
||||||
|
# include <sysinfoapi.h>
|
||||||
|
# define _CRT_SECURE_NO_WARNINGS 1
|
||||||
|
# define _WINSOCK_DEPRECATED_NO_WARNINGS 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Includes and defines for all HLOS except windows */
|
||||||
|
#if !defined(__hexagon__) && !defined(_WIN32)
|
||||||
|
# include "unistd.h"
|
||||||
|
|
||||||
|
# include <sys/time.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Includes and defines for Hexagon and all HLOS except Windows. */
|
||||||
|
#if !defined(_WIN32)
|
||||||
|
/* Weak reference to remote symbol for compilation. */
|
||||||
|
# pragma weak remote_session_control
|
||||||
|
# pragma weak remote_handle_control
|
||||||
|
# pragma weak remote_handle64_control
|
||||||
|
# pragma weak fastrpc_mmap
|
||||||
|
# pragma weak fastrpc_munmap
|
||||||
|
# pragma weak rpcmem_alloc2
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if !defined(_WIN32)
|
||||||
|
# pragma weak remote_system_request
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
# define DSPQUEUE_TIMEOUT DSPQUEUE_TIMEOUT_NONE
|
||||||
|
#else
|
||||||
|
# define DSPQUEUE_TIMEOUT 1000000
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/**
|
||||||
|
* htpdrv_init API: driver interface entry point
|
||||||
|
*
|
||||||
|
* @return Return AEE error codes as defined in Hexagon SDK.
|
||||||
|
*/
|
||||||
|
HTPDRV_API int htpdrv_init(void);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* get_domain API: get domain struct from domain value.
|
||||||
|
*
|
||||||
|
* @param[in] domain value of a domain
|
||||||
|
* @return Returns domain struct of the domain if it is supported or else
|
||||||
|
* returns NULL.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
HTPDRV_API domain * get_domain(int domain_id);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* get_hex_arch_ver API: query the Hexagon processor architecture version information
|
||||||
|
*
|
||||||
|
* @param[in] domain_id value of a domain
|
||||||
|
* @param[out] Arch version (73, 75, ...)
|
||||||
|
* @return 0 if query is successful.
|
||||||
|
* non-zero if error, return value points to the error.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
HTPDRV_API int get_hex_arch_ver(int domain, int * arch);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
@ -1,454 +0,0 @@
|
||||||
|
|
||||||
#pragma clang diagnostic ignored "-Wgnu-anonymous-struct"
|
|
||||||
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
|
||||||
#pragma clang diagnostic ignored "-Wsign-compare"
|
|
||||||
|
|
||||||
#define GGML_COMMON_IMPL_C
|
|
||||||
#include "ggml-backend-impl.h"
|
|
||||||
#include "ggml-common.h"
|
|
||||||
#include "ggml-hexagon.h"
|
|
||||||
#include "ggml-impl.h"
|
|
||||||
|
|
||||||
#include "htp-utils.h"
|
|
||||||
|
|
||||||
#include <domain.h>
|
|
||||||
#include <remote.h>
|
|
||||||
#include <stdbool.h>
|
|
||||||
#include <stdint.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <string.h>
|
|
||||||
|
|
||||||
domain * get_domain(int domain_id) {
|
|
||||||
int i = 0;
|
|
||||||
int size = sizeof(supported_domains) / sizeof(domain);
|
|
||||||
|
|
||||||
for (i = 0; i < size; i++) {
|
|
||||||
if (supported_domains[i].id == domain_id) {
|
|
||||||
return &supported_domains[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool is_valid_domain_id(int domain_id, int compute_only) {
|
|
||||||
int i = 0;
|
|
||||||
int size = sizeof(supported_domains) / sizeof(domain);
|
|
||||||
|
|
||||||
if (compute_only) {
|
|
||||||
return is_CDSP(domain_id);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (i = 0; i < size; i++) {
|
|
||||||
if (supported_domains[i].id == domain_id) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
int get_domains_info(char * domain_type, int * num_domains, fastrpc_domain ** domains_info) {
|
|
||||||
int nErr = AEE_SUCCESS;
|
|
||||||
int ss_info = 0;
|
|
||||||
if (domain_type != NULL) {
|
|
||||||
if (strcmp(domain_type, "LPASS") == 0) {
|
|
||||||
ss_info = FASTRPC_LPASS;
|
|
||||||
} else if (strcmp(domain_type, "HPASS") == 0) {
|
|
||||||
ss_info = FASTRPC_HPASS;
|
|
||||||
} else {
|
|
||||||
ss_info = FASTRPC_NSP;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
system_req_payload req = { 0 };
|
|
||||||
req.id = FASTRPC_GET_DOMAINS;
|
|
||||||
req.sys.domains = NULL;
|
|
||||||
fastrpc_domain * domain = NULL;
|
|
||||||
if (ss_info != 0) {
|
|
||||||
req.sys.flags = DOMAINS_LIST_FLAGS_SET_TYPE(req.sys.flags, ss_info);
|
|
||||||
} else {
|
|
||||||
req.sys.flags = 0;
|
|
||||||
}
|
|
||||||
#ifdef _WIN32
|
|
||||||
nErr = AEE_EUNSUPPORTED;
|
|
||||||
goto bail;
|
|
||||||
#endif
|
|
||||||
if (remote_system_request) {
|
|
||||||
nErr = remote_system_request(&req);
|
|
||||||
if (nErr != AEE_SUCCESS) {
|
|
||||||
GGML_LOG_ERROR("Failure in remote_system_request call: %d.\n", nErr);
|
|
||||||
goto bail;
|
|
||||||
}
|
|
||||||
// Allocate memory for domain-info array
|
|
||||||
req.sys.max_domains = req.sys.num_domains;
|
|
||||||
if ((req.sys.domains = calloc(req.sys.num_domains, sizeof(fastrpc_domain))) == NULL) {
|
|
||||||
nErr = AEE_ENOMEMORY;
|
|
||||||
GGML_LOG_ERROR("Unable to allocate memory for req.sys.domains");
|
|
||||||
goto bail;
|
|
||||||
}
|
|
||||||
|
|
||||||
nErr = remote_system_request(&req);
|
|
||||||
if (nErr != AEE_SUCCESS) {
|
|
||||||
GGML_LOG_ERROR("Failure in remote_system_request call: %d.\n", nErr);
|
|
||||||
goto bail;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int i = 0; i < req.sys.num_domains; i++) {
|
|
||||||
// Verify that only requested type domains were returned
|
|
||||||
domain = &req.sys.domains[i];
|
|
||||||
if (domain->type != ss_info && domain_type != NULL) {
|
|
||||||
nErr = -1;
|
|
||||||
GGML_LOG_ERROR("Incorrect data received from remote_system_request.\n");
|
|
||||||
goto bail;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
*domains_info = req.sys.domains;
|
|
||||||
*num_domains = req.sys.num_domains;
|
|
||||||
} else {
|
|
||||||
nErr = AEE_EUNSUPPORTED;
|
|
||||||
goto bail;
|
|
||||||
}
|
|
||||||
bail:
|
|
||||||
if (nErr && !req.sys.domains) {
|
|
||||||
free(req.sys.domains);
|
|
||||||
}
|
|
||||||
return nErr;
|
|
||||||
}
|
|
||||||
|
|
||||||
int get_effective_domain_id(char * domain_name, int session_id, int * effec_domain_id) {
|
|
||||||
int err = 0;
|
|
||||||
remote_rpc_effective_domain_id_t sess = { 0 };
|
|
||||||
|
|
||||||
sess.domain_name = domain_name;
|
|
||||||
sess.domain_name_len = strlen(domain_name);
|
|
||||||
sess.session_id = session_id;
|
|
||||||
|
|
||||||
err = remote_session_control(FASTRPC_GET_EFFECTIVE_DOMAIN_ID, &sess, sizeof(sess));
|
|
||||||
if (err) {
|
|
||||||
GGML_LOG_ERROR("Error 0x%x: failed to get effective domain id for %s, session id %d\n", err, sess.domain_name,
|
|
||||||
session_id);
|
|
||||||
return err;
|
|
||||||
}
|
|
||||||
|
|
||||||
*effec_domain_id = sess.effective_domain_id;
|
|
||||||
return err;
|
|
||||||
}
|
|
||||||
|
|
||||||
int get_dsp_support(int * domain) {
|
|
||||||
int nErr = AEE_SUCCESS;
|
|
||||||
*domain = CDSP_DOMAIN_ID; // DSP domain default value is CDSP_DOMAIN_ID
|
|
||||||
|
|
||||||
if (remote_handle_control) {
|
|
||||||
struct remote_dsp_capability dsp_capability_domain = { CDSP_DOMAIN_ID, DOMAIN_SUPPORT, 0 };
|
|
||||||
nErr = remote_handle_control(DSPRPC_GET_DSP_INFO, &dsp_capability_domain, sizeof(struct remote_dsp_capability));
|
|
||||||
if ((nErr & 0xFF) == (AEE_EUNSUPPORTEDAPI & 0xFF)) {
|
|
||||||
GGML_LOG_ERROR("\nFastRPC Capability API is not supported on this device\n");
|
|
||||||
goto bail;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (dsp_capability_domain.capability == 0) {
|
|
||||||
dsp_capability_domain.domain = ADSP_DOMAIN_ID; // Check for ADSP support.
|
|
||||||
dsp_capability_domain.attribute_ID = DOMAIN_SUPPORT;
|
|
||||||
dsp_capability_domain.capability = 0;
|
|
||||||
nErr = remote_handle_control(DSPRPC_GET_DSP_INFO, &dsp_capability_domain,
|
|
||||||
sizeof(struct remote_dsp_capability));
|
|
||||||
if (dsp_capability_domain.capability) {
|
|
||||||
*domain = ADSP_DOMAIN_ID; // For targets like Agatti (not having cDSP), domain is ADSP_DOMAIN_ID
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (nErr != AEE_SUCCESS) {
|
|
||||||
GGML_LOG_ERROR("\nget_dsp_support failed with Error 0x%x\n", nErr);
|
|
||||||
goto bail;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
nErr = AEE_EUNSUPPORTEDAPI;
|
|
||||||
GGML_LOG_ERROR("remote_dsp_capability interface is not supported on this device\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
bail:
|
|
||||||
return nErr;
|
|
||||||
}
|
|
||||||
|
|
||||||
int get_vtcm_info(int domain, uint32_t * capability, uint32_t attr) {
|
|
||||||
int nErr = AEE_SUCCESS;
|
|
||||||
*capability = 0;
|
|
||||||
|
|
||||||
if (attr == VTCM_PAGE || attr == VTCM_COUNT) {
|
|
||||||
} else {
|
|
||||||
nErr = AEE_EBADPARM;
|
|
||||||
GGML_LOG_ERROR("Unsupported attr. Only VTCM_PAGE and VTCM_COUNT supported\n");
|
|
||||||
goto bail;
|
|
||||||
}
|
|
||||||
if (remote_handle_control) {
|
|
||||||
if (domain == ADSP_DOMAIN_ID || domain == CDSP_DOMAIN_ID) {
|
|
||||||
/*
|
|
||||||
* Query the DSP for VTCM information
|
|
||||||
* Since the ADSP does not have a dedicated VTCM, we expect the output to be 0
|
|
||||||
*/
|
|
||||||
struct remote_dsp_capability dsp_capability_vtcm_dsp;
|
|
||||||
dsp_capability_vtcm_dsp.domain = (uint32_t) domain;
|
|
||||||
dsp_capability_vtcm_dsp.attribute_ID = attr;
|
|
||||||
dsp_capability_vtcm_dsp.capability = (uint32_t) 0;
|
|
||||||
nErr = remote_handle_control(DSPRPC_GET_DSP_INFO, &dsp_capability_vtcm_dsp,
|
|
||||||
sizeof(struct remote_dsp_capability));
|
|
||||||
if ((nErr & 0xFF) == (AEE_EUNSUPPORTEDAPI & 0xFF)) {
|
|
||||||
GGML_LOG_ERROR("\nFastRPC Capability API is not supported on this device\n");
|
|
||||||
GGML_LOG_ERROR("Running the usecase without checking the capability\n");
|
|
||||||
nErr = AEE_SUCCESS;
|
|
||||||
goto bail;
|
|
||||||
} else if (nErr == AEE_SUCCESS) {
|
|
||||||
*capability = dsp_capability_vtcm_dsp.capability;
|
|
||||||
} else {
|
|
||||||
GGML_LOG_ERROR("\nget_vtcm_info failed with Error 0x%x\n", nErr);
|
|
||||||
goto bail;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
nErr = AEE_EUNSUPPORTED;
|
|
||||||
GGML_LOG_ERROR("Unsupported domain %d\n", domain);
|
|
||||||
goto bail;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
nErr = AEE_EUNSUPPORTEDAPI;
|
|
||||||
GGML_LOG_ERROR("remote_dsp_capability interface is not supported on this device\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
bail:
|
|
||||||
return nErr;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool is_unsignedpd_supported(int domain_id) {
|
|
||||||
int nErr = AEE_SUCCESS;
|
|
||||||
if (remote_handle_control) {
|
|
||||||
struct remote_dsp_capability dsp_capability_domain = { domain_id, UNSIGNED_PD_SUPPORT, 0 };
|
|
||||||
nErr = remote_handle_control(DSPRPC_GET_DSP_INFO, &dsp_capability_domain, sizeof(struct remote_dsp_capability));
|
|
||||||
if ((nErr & 0xFF) == (AEE_EUNSUPPORTEDAPI & 0xFF)) {
|
|
||||||
GGML_LOG_ERROR("\nFastRPC Capability API is not supported on this device. Falling back to signed pd.\n");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
if (nErr) {
|
|
||||||
GGML_LOG_ERROR("\nERROR 0x%x: FastRPC Capability API failed. Falling back to signed pd.", nErr);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
if (dsp_capability_domain.capability == 1) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
nErr = AEE_EUNSUPPORTEDAPI;
|
|
||||||
GGML_LOG_ERROR("remote_dsp_capability interface is not supported on this device. Falling back to signed pd.\n");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool get_unsignedpd_support(void) {
|
|
||||||
return is_unsignedpd_supported(CDSP_DOMAIN_ID);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool is_async_fastrpc_supported(int domain) {
|
|
||||||
int nErr = AEE_SUCCESS;
|
|
||||||
if (remote_handle_control) {
|
|
||||||
if (domain == CDSP_DOMAIN_ID) {
|
|
||||||
/*
|
|
||||||
* Query the DSP for ASYNC_FASTRPC_SUPPORT information
|
|
||||||
* Async fastrpc is supported only on CDSP
|
|
||||||
*/
|
|
||||||
struct remote_dsp_capability dsp_capability_async_support;
|
|
||||||
dsp_capability_async_support.domain = (uint32_t) domain;
|
|
||||||
dsp_capability_async_support.attribute_ID = ASYNC_FASTRPC_SUPPORT;
|
|
||||||
dsp_capability_async_support.capability = (uint32_t) 0;
|
|
||||||
nErr = remote_handle_control(DSPRPC_GET_DSP_INFO, &dsp_capability_async_support,
|
|
||||||
sizeof(struct remote_dsp_capability));
|
|
||||||
if ((nErr & 0xFF) == (AEE_EUNSUPPORTEDAPI & 0xFF)) {
|
|
||||||
GGML_LOG_ERROR("\nFastRPC Capability API is not supported on this device\n");
|
|
||||||
GGML_LOG_ERROR("Running the usecase without checking the capability\n");
|
|
||||||
nErr = AEE_SUCCESS;
|
|
||||||
goto bail;
|
|
||||||
} else if (dsp_capability_async_support.capability == 1) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
if (nErr != AEE_SUCCESS) {
|
|
||||||
GGML_LOG_ERROR("\nis_async_fastrpc_supported failed with Error 0x%x\n", nErr);
|
|
||||||
goto bail;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
nErr = AEE_EUNSUPPORTED;
|
|
||||||
GGML_LOG_ERROR("Async fastrpc is not supported on domain %d\n", domain);
|
|
||||||
goto bail;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
nErr = AEE_EUNSUPPORTEDAPI;
|
|
||||||
GGML_LOG_ERROR("remote_dsp_capability interface is not supported on this device\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
bail:
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool is_status_notification_supported(int domain) {
|
|
||||||
int nErr = AEE_SUCCESS;
|
|
||||||
|
|
||||||
if (remote_handle_control) {
|
|
||||||
/*
|
|
||||||
* Query the DSP for STATUS_NOTIFICATION_SUPPORT information
|
|
||||||
* DSP User PD status notification Support
|
|
||||||
*/
|
|
||||||
struct remote_dsp_capability dsp_capability_status_notification_support;
|
|
||||||
dsp_capability_status_notification_support.domain = (uint32_t) domain;
|
|
||||||
dsp_capability_status_notification_support.attribute_ID = STATUS_NOTIFICATION_SUPPORT;
|
|
||||||
dsp_capability_status_notification_support.capability = (uint32_t) 0;
|
|
||||||
nErr = remote_handle_control(DSPRPC_GET_DSP_INFO, &dsp_capability_status_notification_support,
|
|
||||||
sizeof(struct remote_dsp_capability));
|
|
||||||
if ((nErr & 0xFF) == (AEE_EUNSUPPORTEDAPI & 0xFF)) {
|
|
||||||
GGML_LOG_ERROR("\nFastRPC Capability API is not supported on this device\n");
|
|
||||||
GGML_LOG_ERROR("Running the usecase without checking the capability\n");
|
|
||||||
nErr = AEE_SUCCESS;
|
|
||||||
goto bail;
|
|
||||||
} else if (dsp_capability_status_notification_support.capability == 1) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
if (nErr != AEE_SUCCESS) {
|
|
||||||
GGML_LOG_ERROR("\nis_status_notification_supported failed with Error 0x%x\n", nErr);
|
|
||||||
goto bail;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
nErr = AEE_EUNSUPPORTEDAPI;
|
|
||||||
GGML_LOG_ERROR("remote_dsp_capability interface is not supported on this device\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
bail:
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
int get_hmx_support_info(int domain, uint32_t * capability, uint32_t attr) {
|
|
||||||
int nErr = AEE_SUCCESS;
|
|
||||||
*capability = 0;
|
|
||||||
|
|
||||||
if (attr != HMX_SUPPORT_SPATIAL && attr != HMX_SUPPORT_DEPTH) {
|
|
||||||
nErr = AEE_EBADPARM;
|
|
||||||
GGML_LOG_ERROR("Unsupported attr. Only HMX_SUPPORT_SPATIAL and HMX_SUPPORT_DEPTH supported\n");
|
|
||||||
goto bail;
|
|
||||||
}
|
|
||||||
if (remote_handle_control) {
|
|
||||||
if (domain == CDSP_DOMAIN_ID) {
|
|
||||||
/*
|
|
||||||
* Query the DSP for HMX SUPPORT information
|
|
||||||
* HMX is supported on CDSP only
|
|
||||||
*/
|
|
||||||
struct remote_dsp_capability dsp_capability_hmx_dsp;
|
|
||||||
dsp_capability_hmx_dsp.domain = (uint32_t) domain;
|
|
||||||
dsp_capability_hmx_dsp.attribute_ID = attr;
|
|
||||||
dsp_capability_hmx_dsp.capability = (uint32_t) 0;
|
|
||||||
nErr = remote_handle_control(DSPRPC_GET_DSP_INFO, &dsp_capability_hmx_dsp,
|
|
||||||
sizeof(struct remote_dsp_capability));
|
|
||||||
if ((nErr & 0xFF) == (AEE_EUNSUPPORTEDAPI & 0xFF)) {
|
|
||||||
GGML_LOG_ERROR("\nFastRPC Capability API is not supported on this device\n");
|
|
||||||
GGML_LOG_ERROR("Running the usecase without checking the capability\n");
|
|
||||||
nErr = AEE_SUCCESS;
|
|
||||||
goto bail;
|
|
||||||
} else if (nErr == AEE_SUCCESS) {
|
|
||||||
*capability = dsp_capability_hmx_dsp.capability;
|
|
||||||
} else {
|
|
||||||
GGML_LOG_ERROR("\nget_hmx_support_info failed with Error 0x%x\n", nErr);
|
|
||||||
goto bail;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
nErr = AEE_EUNSUPPORTED;
|
|
||||||
GGML_LOG_ERROR("HMX support is not there for domain %d\n", domain);
|
|
||||||
goto bail;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
nErr = AEE_EUNSUPPORTEDAPI;
|
|
||||||
GGML_LOG_ERROR("remote_dsp_capability interface is not supported on this device\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
bail:
|
|
||||||
return nErr;
|
|
||||||
}
|
|
||||||
|
|
||||||
int get_hex_arch_ver(int domain, int * arch) {
|
|
||||||
if (!remote_handle_control) {
|
|
||||||
GGML_LOG_ERROR("ggml-hex: remote_handle_control is not supported on this device\n");
|
|
||||||
return AEE_EUNSUPPORTEDAPI;
|
|
||||||
}
|
|
||||||
|
|
||||||
struct remote_dsp_capability arch_ver;
|
|
||||||
arch_ver.domain = (uint32_t) domain;
|
|
||||||
arch_ver.attribute_ID = ARCH_VER;
|
|
||||||
arch_ver.capability = (uint32_t) 0;
|
|
||||||
|
|
||||||
int err = remote_handle_control(DSPRPC_GET_DSP_INFO, &arch_ver, sizeof(arch_ver));
|
|
||||||
if ((err & 0xff) == (AEE_EUNSUPPORTEDAPI & 0xff)) {
|
|
||||||
GGML_LOG_ERROR("ggml-hex: FastRPC capability API is not supported on this device\n");
|
|
||||||
return AEE_EUNSUPPORTEDAPI;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (err != AEE_SUCCESS) {
|
|
||||||
GGML_LOG_ERROR("ggml-hex: FastRPC capability query failed (err %d)\n", err);
|
|
||||||
return err;
|
|
||||||
}
|
|
||||||
|
|
||||||
switch (arch_ver.capability & 0xff) {
|
|
||||||
case 0x68:
|
|
||||||
*arch = 68;
|
|
||||||
return 0;
|
|
||||||
case 0x69:
|
|
||||||
*arch = 69;
|
|
||||||
return 0;
|
|
||||||
case 0x73:
|
|
||||||
*arch = 73;
|
|
||||||
return 0;
|
|
||||||
case 0x75:
|
|
||||||
*arch = 75;
|
|
||||||
return 0;
|
|
||||||
case 0x79:
|
|
||||||
*arch = 79;
|
|
||||||
return 0;
|
|
||||||
case 0x81:
|
|
||||||
*arch = 81;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
int get_hvx_support_info(int domain, uint32_t * capability, uint32_t attr) {
|
|
||||||
int nErr = AEE_SUCCESS;
|
|
||||||
*capability = 0;
|
|
||||||
|
|
||||||
if (remote_handle_control) {
|
|
||||||
if (domain == CDSP_DOMAIN_ID) {
|
|
||||||
/*
|
|
||||||
* Query the DSP for HVX SUPPORT information
|
|
||||||
* HVX is supported on CDSP only
|
|
||||||
*/
|
|
||||||
struct remote_dsp_capability dsp_capability_hvx_dsp;
|
|
||||||
dsp_capability_hvx_dsp.domain = (uint32_t) domain;
|
|
||||||
dsp_capability_hvx_dsp.attribute_ID = attr;
|
|
||||||
dsp_capability_hvx_dsp.capability = (uint32_t) 0;
|
|
||||||
nErr = remote_handle_control(DSPRPC_GET_DSP_INFO, &dsp_capability_hvx_dsp,
|
|
||||||
sizeof(struct remote_dsp_capability));
|
|
||||||
if ((nErr & 0xFF) == (AEE_EUNSUPPORTEDAPI & 0xFF)) {
|
|
||||||
GGML_LOG_ERROR("\nFastRPC Capability API is not supported on this device\n");
|
|
||||||
GGML_LOG_ERROR("Running the usecase without checking the capability\n");
|
|
||||||
nErr = AEE_SUCCESS;
|
|
||||||
goto bail;
|
|
||||||
} else if (nErr == AEE_SUCCESS) {
|
|
||||||
*capability = dsp_capability_hvx_dsp.capability;
|
|
||||||
} else {
|
|
||||||
GGML_LOG_ERROR("\nget_hvx_support_info failed with Error 0x%x\n", nErr);
|
|
||||||
goto bail;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
nErr = AEE_EUNSUPPORTED;
|
|
||||||
GGML_LOG_ERROR("HVX support is not available on domain %d\n", domain);
|
|
||||||
goto bail;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
nErr = AEE_EUNSUPPORTEDAPI;
|
|
||||||
GGML_LOG_ERROR("remote_dsp_capability interface is not supported on this device\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
bail:
|
|
||||||
return nErr;
|
|
||||||
}
|
|
||||||
|
|
@ -1,221 +0,0 @@
|
||||||
#ifndef HTP_UTILS_H
|
|
||||||
#define HTP_UTILS_H
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#include <AEEStdErr.h>
|
|
||||||
#include <inttypes.h>
|
|
||||||
#include <remote.h>
|
|
||||||
#include <rpcmem.h>
|
|
||||||
#include <stdbool.h>
|
|
||||||
|
|
||||||
/* Offset to differentiate HLOS and Hexagon error codes.
|
|
||||||
Stores the value of AEE_EOFFSET for Hexagon. */
|
|
||||||
#ifndef DSP_OFFSET
|
|
||||||
# define DSP_OFFSET 0x80000400
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Errno for connection reset by peer. */
|
|
||||||
#ifndef ECONNRESET
|
|
||||||
# ifdef __hexagon__
|
|
||||||
# define ECONNRESET 104
|
|
||||||
# endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Abstraction of different OS specific sleep APIs.
|
|
||||||
SLEEP accepts input in seconds. */
|
|
||||||
#ifndef SLEEP
|
|
||||||
# ifdef __hexagon__
|
|
||||||
# define SLEEP(x) \
|
|
||||||
{ /* Do nothing for simulator. */ \
|
|
||||||
}
|
|
||||||
# else
|
|
||||||
# ifdef _WINDOWS
|
|
||||||
# define SLEEP(x) Sleep(1000 * x) /* Sleep accepts input in milliseconds. */
|
|
||||||
# else
|
|
||||||
# define SLEEP(x) sleep(x) /* sleep accepts input in seconds. */
|
|
||||||
# endif
|
|
||||||
# endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Include windows specific header files. */
|
|
||||||
#ifdef _WINDOWS
|
|
||||||
# include <sysinfoapi.h>
|
|
||||||
# include <windows.h>
|
|
||||||
# define _CRT_SECURE_NO_WARNINGS 1
|
|
||||||
# define _WINSOCK_DEPRECATED_NO_WARNINGS 1
|
|
||||||
/* Including this file for custom implementation of getopt function. */
|
|
||||||
# include "getopt_custom.h"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Includes and defines for all HLOS except windows */
|
|
||||||
#if !defined(__hexagon__) && !defined(_WINDOWS)
|
|
||||||
# include "unistd.h"
|
|
||||||
|
|
||||||
# include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Includes and defines for Hexagon and all HLOS except Windows. */
|
|
||||||
#if !defined(_WINDOWS)
|
|
||||||
/* Weak reference to remote symbol for compilation. */
|
|
||||||
# pragma weak remote_session_control
|
|
||||||
# pragma weak remote_handle_control
|
|
||||||
# pragma weak remote_handle64_control
|
|
||||||
# pragma weak fastrpc_mmap
|
|
||||||
# pragma weak fastrpc_munmap
|
|
||||||
# pragma weak rpcmem_alloc2
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(_WINDOWS)
|
|
||||||
# pragma weak remote_system_request
|
|
||||||
#endif
|
|
||||||
/**
|
|
||||||
* Wrapper for FastRPC Capability API: query DSP support.
|
|
||||||
*
|
|
||||||
* @param[out] domain pointer to supported domain.
|
|
||||||
* @return 0 if query is successful.
|
|
||||||
* non-zero if error, return value points to the error.
|
|
||||||
*/
|
|
||||||
int get_dsp_support(int * domain);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Wrapper for FastRPC Capability API: query VTCM information.
|
|
||||||
*
|
|
||||||
* @param[in] domain value of domain in the queried.
|
|
||||||
* @param[out] capability capability value of the attribute queried.
|
|
||||||
* @param[in] attr value of the attribute to the queried.
|
|
||||||
* @return 0 if query is successful.
|
|
||||||
* non-zero if error, return value points to the error.
|
|
||||||
*/
|
|
||||||
int get_vtcm_info(int domain, uint32_t * capability, uint32_t attr);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Wrapper for FastRPC Capability API: query unsigned pd support on CDSP domain.
|
|
||||||
*
|
|
||||||
* @return true if unsigned pd is supported.
|
|
||||||
* false if unsigned pd is not supported, capability query failed.
|
|
||||||
*/
|
|
||||||
|
|
||||||
bool get_unsignedpd_support(void);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Wrapper for FastRPC Capability API: query unsigned pd support.
|
|
||||||
*
|
|
||||||
* @param[in] domain value of domain in the queried.
|
|
||||||
* @return true if unsigned pd is supported.
|
|
||||||
* false if unsigned pd is not supported, capability query failed.
|
|
||||||
*/
|
|
||||||
|
|
||||||
bool is_unsignedpd_supported(int domain_id);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* is_valid_domain_id API: query a domain id is valid.
|
|
||||||
*
|
|
||||||
* @param[in] domain value of domain in the queried.
|
|
||||||
* @param[in] compute_only value of domain is only compared with CDSP domains supported by the target when enabled.
|
|
||||||
* @return true if value of domain is valid.
|
|
||||||
* false if value of domain is not valid.
|
|
||||||
*/
|
|
||||||
|
|
||||||
bool is_valid_domain_id(int domain_id, int compute_only);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* get_domain API: get domain struct from domain value.
|
|
||||||
*
|
|
||||||
* @param[in] domain value of a domain
|
|
||||||
* @return Returns domain struct of the domain if it is supported or else
|
|
||||||
* returns NULL.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
domain * get_domain(int domain_id);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* get_domains_info API: get information for all the domains available on the device
|
|
||||||
*
|
|
||||||
* @param[in] domain_type pointer to domain type
|
|
||||||
* @param[in] num_domains pointer to number of domains
|
|
||||||
* @param[in] domains_info pointer to save discovered domains information.
|
|
||||||
* @return 0 if query is successful.
|
|
||||||
* non-zero if error, return value points to the error.
|
|
||||||
*
|
|
||||||
* It is user's responsibility to free the memory used to store the domains info whose address is present in domains_info before closing the application.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
int get_domains_info(char * domain_type, int * num_domains, fastrpc_domain ** domains_info);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* get_effective_domain_id API: get effective domain id for given session id
|
|
||||||
*
|
|
||||||
* @param[in] domain_name pointer to domain name
|
|
||||||
* @param[in] session_id
|
|
||||||
* @param[in] effec_domain_id pointer to save obtained effective domain id.
|
|
||||||
* @return 0 if query is successful.
|
|
||||||
* non-zero if error, return value points to the error.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
int get_effective_domain_id(char * domain_name, int session_id, int * effec_domain_id);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* is_async_fastrpc_supported API: query a domain id has async fastrpc supported or not
|
|
||||||
*
|
|
||||||
* @param[in] domain_id value of a domain
|
|
||||||
* @return Returns true or false stating support of Async FastRPC
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
bool is_async_fastrpc_supported(int domain_id);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* is_status_notification_supported API: query the DSP for STATUS_NOTIFICATION_SUPPORT information
|
|
||||||
*
|
|
||||||
* @param[in] domain_id value of a domain
|
|
||||||
* @return Returns true or false stating status notification support information
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
bool is_status_notification_supported(int domain_id);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* get_hmx_support_info API: query the DSP for HMX SUPPORT information
|
|
||||||
*
|
|
||||||
* @param[in] domain_id value of a domain
|
|
||||||
* @param[out] capability capability value of the attribute queried.
|
|
||||||
* @param[in] attr value of the attribute to the queried.
|
|
||||||
* @return 0 if query is successful.
|
|
||||||
* non-zero if error, return value points to the error.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
int get_hmx_support_info(int domain, uint32_t * capability, uint32_t attr);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* get_hex_arch_ver API: query the Hexagon processor architecture version information
|
|
||||||
*
|
|
||||||
* @param[in] domain_id value of a domain
|
|
||||||
* @param[out] Arch version (73, 75, ...)
|
|
||||||
* @return 0 if query is successful.
|
|
||||||
* non-zero if error, return value points to the error.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
int get_hex_arch_ver(int domain, int * arch);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* get_hvx_support_info API: query the DSP for HVX SUPPORT information
|
|
||||||
*
|
|
||||||
* @param[in] domain_id value of a domain
|
|
||||||
* @param[out] capability capability value of the attribute queried.
|
|
||||||
* @param[in] attr value of the attribute to the queried.
|
|
||||||
* @return 0 if query is successful.
|
|
||||||
* non-zero if error, return value points to the error.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
int get_hvx_support_info(int domain, uint32_t * capability, uint32_t attr);
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif //DSP_CAPABILITIES_UTILS_H
|
|
||||||
|
|
@ -0,0 +1,79 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
# define WIN32_LEAN_AND_MEAN
|
||||||
|
# ifndef NOMINMAX
|
||||||
|
# define NOMINMAX
|
||||||
|
# endif
|
||||||
|
# include <windows.h>
|
||||||
|
# include <winevt.h>
|
||||||
|
#else
|
||||||
|
# include <dlfcn.h>
|
||||||
|
# include <unistd.h>
|
||||||
|
#endif
|
||||||
|
#include <filesystem>
|
||||||
|
|
||||||
|
namespace fs = std::filesystem;
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
|
||||||
|
using dl_handle = std::remove_pointer_t<HMODULE>;
|
||||||
|
|
||||||
|
struct dl_handle_deleter {
|
||||||
|
void operator()(HMODULE handle) {
|
||||||
|
FreeLibrary(handle);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
static inline dl_handle * dl_load_library(const fs::path & path) {
|
||||||
|
// suppress error dialogs for missing DLLs
|
||||||
|
DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
|
||||||
|
SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
|
||||||
|
|
||||||
|
HMODULE handle = LoadLibraryW(path.wstring().c_str());
|
||||||
|
|
||||||
|
SetErrorMode(old_mode);
|
||||||
|
|
||||||
|
return handle;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void * dl_get_sym(dl_handle * handle, const char * name) {
|
||||||
|
DWORD old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
|
||||||
|
SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
|
||||||
|
|
||||||
|
void * p = (void *) GetProcAddress(handle, name);
|
||||||
|
|
||||||
|
SetErrorMode(old_mode);
|
||||||
|
|
||||||
|
return p;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline const char * dl_error() {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
using dl_handle = void;
|
||||||
|
|
||||||
|
struct dl_handle_deleter {
|
||||||
|
void operator()(void * handle) {
|
||||||
|
dlclose(handle);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
static inline dl_handle * dl_load_library(const fs::path & path) {
|
||||||
|
dl_handle * handle = dlopen(path.string().c_str(), RTLD_NOW | RTLD_LOCAL);
|
||||||
|
return handle;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void * dl_get_sym(dl_handle * handle, const char * name) {
|
||||||
|
return dlsym(handle, name);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline const char * dl_error() {
|
||||||
|
const char *rslt = dlerror();
|
||||||
|
return rslt != nullptr ? rslt : "";
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
@ -0,0 +1,38 @@
|
||||||
|
[Version]
|
||||||
|
Signature = "$WINDOWS NT$"
|
||||||
|
Class = ComputeAccelerator
|
||||||
|
ClassGuid = {F01A9D53-3FF6-48D2-9F97-C8A7004BE10C}
|
||||||
|
Provider = %GGML%
|
||||||
|
DriverVer = 01/01/2026,1.0.0.0
|
||||||
|
CatalogFile = libggml-htp.cat
|
||||||
|
PnpLockDown = 1
|
||||||
|
|
||||||
|
[DestinationDirs]
|
||||||
|
Drivers_Dir = 6
|
||||||
|
|
||||||
|
[SourceDisksNames]
|
||||||
|
1 = %DiskId%
|
||||||
|
|
||||||
|
[SourceDisksFiles]
|
||||||
|
libggml-htp-v68.so = 1
|
||||||
|
libggml-htp-v69.so = 1
|
||||||
|
libggml-htp-v73.so = 1
|
||||||
|
libggml-htp-v75.so = 1
|
||||||
|
libggml-htp-v81.so = 1
|
||||||
|
|
||||||
|
[ControlFlags]
|
||||||
|
ExcludeFromSelect = *
|
||||||
|
|
||||||
|
[DefaultInstall.NTarm64]
|
||||||
|
CopyFiles=Drivers_Dir
|
||||||
|
|
||||||
|
[Drivers_Dir]
|
||||||
|
libggml-htp-v68.so,,,0x10 ;COPYFLG_NO_OVERWRITE
|
||||||
|
libggml-htp-v69.so,,,0x10 ;COPYFLG_NO_OVERWRITE
|
||||||
|
libggml-htp-v73.so,,,0x10 ;COPYFLG_NO_OVERWRITE
|
||||||
|
libggml-htp-v75.so,,,0x10 ;COPYFLG_NO_OVERWRITE
|
||||||
|
libggml-htp-v81.so,,,0x10 ;COPYFLG_NO_OVERWRITE
|
||||||
|
|
||||||
|
[Strings]
|
||||||
|
GGML = 'GGML'
|
||||||
|
DiskId = 'GGML HTP library'
|
||||||
|
|
@ -0,0 +1,40 @@
|
||||||
|
|
||||||
|
#!/usr/bin/env pwsh
|
||||||
|
|
||||||
|
# Basedir on device
|
||||||
|
$basedir=".\pkg-snapdragon"
|
||||||
|
|
||||||
|
$cli_opts=$args
|
||||||
|
|
||||||
|
$model="Llama-3.2-3B-Instruct-Q4_0.gguf"
|
||||||
|
if ($null -ne $env:M) {
|
||||||
|
$model=$env:M
|
||||||
|
}
|
||||||
|
|
||||||
|
$device="HTP0"
|
||||||
|
if ($null -ne $env:D) {
|
||||||
|
$device=$env:D
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($null -ne $env:V) {
|
||||||
|
$env:GGML_HEXAGON_VERBOSE=$env:V
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($null -ne $env:OPMASK) {
|
||||||
|
$env:GGML_HEXAGON_OPMASK=$env:OPMASK
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($null -ne $env:NHVX) {
|
||||||
|
$env:GGML_HEXAGON_NHVX=$env:NHVX
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($null -ne $env:NDEV) {
|
||||||
|
$env:GGML_HEXAGON_NDEV=$env:NDEV
|
||||||
|
}
|
||||||
|
|
||||||
|
$env:ADSP_LIBRARY_PATH="$basedir\lib"
|
||||||
|
|
||||||
|
& "$basedir\bin\llama-bench.exe" `
|
||||||
|
--mmap 0 -m $basedir\..\..\gguf\$model `
|
||||||
|
--poll 1000 -t 6 --cpu-mask 0xfc --cpu-strict 1 `
|
||||||
|
--batch-size 128 -ngl 99 --device $device $cli_opts
|
||||||
|
|
@ -0,0 +1,53 @@
|
||||||
|
|
||||||
|
#!/usr/bin/env pwsh
|
||||||
|
|
||||||
|
# Basedir on device
|
||||||
|
$basedir=".\pkg-snapdragon"
|
||||||
|
|
||||||
|
$cli_opts=$args
|
||||||
|
|
||||||
|
$model="Llama-3.2-3B-Instruct-Q4_0.gguf"
|
||||||
|
if ($null -ne $env:M) {
|
||||||
|
$model=$env:M
|
||||||
|
}
|
||||||
|
|
||||||
|
$device="HTP0"
|
||||||
|
if ($null -ne $env:D) {
|
||||||
|
$device=$env:D
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($null -ne $env:V) {
|
||||||
|
$env:GGML_HEXAGON_VERBOSE=$env:V
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($null -ne $env:E) {
|
||||||
|
$env:GGML_HEXAGON_EXPERIMENTAL=$env:E
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($null -ne $env:SCHED) {
|
||||||
|
$env:GGML_SCHED_DEBUG=$env:SCHED; $cli_opts="$cli_opts -v"
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($null -ne $env:PROF) {
|
||||||
|
$env:GGML_HEXAGON_PROFILE=$env:PROF; $env:GGML_HEXAGON_OPSYNC=1
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($null -ne $env:OPMASK) {
|
||||||
|
$env:GGML_HEXAGON_OPMASK=$env:OPMASK
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($null -ne $env:NHVX) {
|
||||||
|
$env:GGML_HEXAGON_NHVX=$env:NHVX
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($null -ne $env:NDEV) {
|
||||||
|
$env:GGML_HEXAGON_NDEV=$env:NDEV
|
||||||
|
}
|
||||||
|
|
||||||
|
$env:ADSP_LIBRARY_PATH="$basedir\lib"
|
||||||
|
|
||||||
|
& "$basedir\bin\llama-completion.exe" `
|
||||||
|
--no-mmap -no-cnv -m $basedir\..\..\gguf\$model `
|
||||||
|
--poll 1000 -t 6 --cpu-mask 0xfc --cpu-strict 1 `
|
||||||
|
--ctx-size 8192 --batch-size 128 -ctk q8_0 -ctv q8_0 -fa on `
|
||||||
|
-ngl 99 --device $device $cli_opts
|
||||||
|
|
@ -0,0 +1,56 @@
|
||||||
|
|
||||||
|
#!/usr/bin/env pwsh
|
||||||
|
|
||||||
|
# Basedir on device
|
||||||
|
$basedir=".\pkg-snapdragon"
|
||||||
|
|
||||||
|
if ($args.Count -eq 0) {
|
||||||
|
Write-Host "No arguments provided.Expected the tool and argument to run."
|
||||||
|
exit -1
|
||||||
|
}
|
||||||
|
|
||||||
|
$tool=$args[0]
|
||||||
|
$cli_opts=@()
|
||||||
|
|
||||||
|
if ($args.Count -gt 1) {
|
||||||
|
$cli_opts=$args[1..($args.Count - 1)]
|
||||||
|
$remainingArgs = $args[1..($args.Count - 1)]
|
||||||
|
}
|
||||||
|
|
||||||
|
$device="HTP0"
|
||||||
|
if ($null -ne $env:D) {
|
||||||
|
$device=$env:D
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($null -ne $env:V) {
|
||||||
|
$env:GGML_HEXAGON_VERBOSE=$env:V
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($null -ne $env:E) {
|
||||||
|
$env:GGML_HEXAGON_EXPERIMENTAL=$env:E
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($null -ne $env:SCHED) {
|
||||||
|
$env:GGML_SCHED_DEBUG=$env:SCHED; $cli_opts="$cli_opts -v"
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($null -ne $env:PROF) {
|
||||||
|
$env:GGML_HEXAGON_PROFILE=$env:PROF; $env:GGML_HEXAGON_OPSYNC=1
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($null -ne $env:OPMASK) {
|
||||||
|
$env:GGML_HEXAGON_OPMASK=$env:OPMASK
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($null -ne $env:NHVX) {
|
||||||
|
$env:GGML_HEXAGON_NHVX=$env:NHVX
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($null -ne $env:NDEV) {
|
||||||
|
$env:GGML_HEXAGON_NDEV=$env:NDEV
|
||||||
|
}
|
||||||
|
|
||||||
|
$env:ADSP_LIBRARY_PATH="$basedir\lib"
|
||||||
|
|
||||||
|
& "$basedir\bin\$tool" `
|
||||||
|
$cli_opts
|
||||||
|
|
@ -0,0 +1,105 @@
|
||||||
|
# Requires Run as Administrator is NOT strictly necessary for User-scope env vars,
|
||||||
|
# but recommended for creating directories in C:\ root if permissions are restricted.
|
||||||
|
|
||||||
|
$ErrorActionPreference = "Stop"
|
||||||
|
|
||||||
|
# --- Configuration ---
|
||||||
|
$BaseDir = "C:\Qualcomm"
|
||||||
|
|
||||||
|
# SDK 1: Hexagon
|
||||||
|
$HexagonUrl = "https://github.com/snapdragon-toolchain/hexagon-sdk/releases/download/v6.4.0.2/hexagon-sdk-v6.4.0.2-arm64-wos.tar.xz"
|
||||||
|
$HexagonParent = Join-Path $BaseDir "Hexagon_SDK"
|
||||||
|
$HexagonSdkVersion = "6.4.0.2"
|
||||||
|
$HexagonToolsVersion = "19.0.04"
|
||||||
|
$HexagonSdkTarget = Join-Path $HexagonParent $HexagonSdkVersion
|
||||||
|
$HexagonToolsTarget = Join-Path $HexagonSdkTarget "\tools\HEXAGON_Tools\$HexagonToolsVersion"
|
||||||
|
|
||||||
|
# SDK 2: OpenCL
|
||||||
|
$OpenCLUrl = "https://github.com/snapdragon-toolchain/opencl-sdk/releases/download/v2.3.2/adreno-opencl-sdk-v2.3.2-arm64-wos.tar.xz"
|
||||||
|
$OpenCLParent = Join-Path $BaseDir "OpenCL_SDK"
|
||||||
|
$OpenCLVersion = "2.3.2"
|
||||||
|
$OpenCLTarget = Join-Path $OpenCLParent $OpenCLVersion
|
||||||
|
|
||||||
|
# --- Helper Function ---
|
||||||
|
function Install-QualcommSDK {
|
||||||
|
param (
|
||||||
|
[string]$Url,
|
||||||
|
[string]$ParentDir,
|
||||||
|
[string]$TargetDir,
|
||||||
|
[string]$Name
|
||||||
|
)
|
||||||
|
|
||||||
|
# 1. Create Parent Directory
|
||||||
|
if (-not (Test-Path -Path $ParentDir)) {
|
||||||
|
Write-Host "Creating directory: $ParentDir" -ForegroundColor Cyan
|
||||||
|
New-Item -Path $ParentDir -ItemType Directory -Force | Out-Null
|
||||||
|
}
|
||||||
|
|
||||||
|
# 2. Check for Specific Version Directory
|
||||||
|
if (Test-Path -Path $TargetDir) {
|
||||||
|
Write-Host "$Name ($TargetDir) already exists. Skipping download." -ForegroundColor Green
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
Write-Host "$Name not found. preparing to download..." -ForegroundColor Yellow
|
||||||
|
|
||||||
|
# Create the target directory to extract into
|
||||||
|
New-Item -Path $TargetDir -ItemType Directory -Force | Out-Null
|
||||||
|
|
||||||
|
# Define temporary archive path
|
||||||
|
$TempFile = Join-Path $ParentDir "temp_sdk.tar.xz"
|
||||||
|
|
||||||
|
try {
|
||||||
|
# Download
|
||||||
|
Write-Host "Downloading from: $Url"
|
||||||
|
Invoke-WebRequest -Uri $Url -OutFile $TempFile
|
||||||
|
|
||||||
|
# Untar
|
||||||
|
# Note: We assume Windows includes tar.exe (Win 10 build 17063+)
|
||||||
|
Write-Host "Extracting archive to $TargetDir..."
|
||||||
|
|
||||||
|
# We use -C to extract contents INTO the target directory created above
|
||||||
|
tar -xJvf $TempFile -C $TargetDir\..
|
||||||
|
|
||||||
|
Write-Host "Extraction complete." -ForegroundColor Green
|
||||||
|
}
|
||||||
|
catch {
|
||||||
|
Write-Error "Failed to download or extract $Name. Error: $_"
|
||||||
|
# Cleanup target dir if failed so script tries again next time
|
||||||
|
Remove-Item -Path $TargetDir -Recurse -Force -ErrorAction SilentlyContinue
|
||||||
|
}
|
||||||
|
finally {
|
||||||
|
# Cleanup Archive
|
||||||
|
if (Test-Path $TempFile) { Remove-Item $TempFile -Force }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# --- Execution ---
|
||||||
|
|
||||||
|
# 1. Ensure Base C:\Qualcomm exists
|
||||||
|
if (-not (Test-Path $BaseDir)) {
|
||||||
|
New-Item -Path $BaseDir -ItemType Directory -Force | Out-Null
|
||||||
|
}
|
||||||
|
|
||||||
|
# 2. Run Install Logic
|
||||||
|
Install-QualcommSDK -Url $HexagonUrl -ParentDir $HexagonParent -TargetDir $HexagonSdkTarget -Name "Hexagon SDK"
|
||||||
|
Install-QualcommSDK -Url $OpenCLUrl -ParentDir $OpenCLParent -TargetDir $OpenCLTarget -Name "OpenCL SDK"
|
||||||
|
|
||||||
|
# --- Environment Variables ---
|
||||||
|
|
||||||
|
Write-Host "`nSetting Environment Variables..." -ForegroundColor Cyan
|
||||||
|
|
||||||
|
# Set OPENCL_SDK_ROOT
|
||||||
|
[System.Environment]::SetEnvironmentVariable('OPENCL_SDK_ROOT', $OpenCLTarget, [System.EnvironmentVariableTarget]::User)
|
||||||
|
$env:OPENCL_SDK_ROOT = $OpenCLTarget # Set for current session as well
|
||||||
|
Write-Host "OPENCL_SDK_ROOT set to: $OpenCLTarget"
|
||||||
|
|
||||||
|
# Set HEXAGON_SDK_ROOT
|
||||||
|
[System.Environment]::SetEnvironmentVariable('HEXAGON_SDK_ROOT', $HexagonSdkTarget, [System.EnvironmentVariableTarget]::User)
|
||||||
|
$env:HEXAGON_SDK_ROOT = $HexagonSdkTarget # Set for current session as well
|
||||||
|
Write-Host "HEXAGON_SDK_ROOT set to: $HexagonSdkTarget"
|
||||||
|
|
||||||
|
# Set HEXAGON_SDK_ROOT
|
||||||
|
[System.Environment]::SetEnvironmentVariable('HEXAGON_TOOLS_ROOT', $HexagonToolsTarget, [System.EnvironmentVariableTarget]::User)
|
||||||
|
$env:HEXAGON_TOOLS_ROOT = $HexagonToolsTarget # Set for current session as well
|
||||||
|
Write-Host "HEXAGON_TOOLS_ROOT set to: $HexagonToolsTarget"
|
||||||
Loading…
Reference in New Issue