Merge branch 'sf/deepseek-ocr' of github.com:sfallah/llama.cpp into sf/deepseek-ocr
This commit is contained in:
commit
43dfc0c8d6
|
|
@ -66,14 +66,21 @@ jobs:
|
|||
id: pack_artifacts
|
||||
run: |
|
||||
cp LICENSE ./build/bin/
|
||||
zip -r llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.zip ./build/bin/*
|
||||
zip -y -r llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.zip ./build/bin/*
|
||||
tar -czvf llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.tar.gz -C ./build/bin .
|
||||
|
||||
- name: Upload artifacts
|
||||
- name: Upload artifacts (zip)
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
path: llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.zip
|
||||
name: llama-bin-macos-arm64.zip
|
||||
|
||||
- name: Upload artifacts (tar)
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
path: llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.tar.gz
|
||||
name: llama-bin-macos-arm64.tar.gz
|
||||
|
||||
macOS-x64:
|
||||
runs-on: macos-15-intel
|
||||
|
||||
|
|
@ -120,14 +127,21 @@ jobs:
|
|||
id: pack_artifacts
|
||||
run: |
|
||||
cp LICENSE ./build/bin/
|
||||
zip -r llama-${{ steps.tag.outputs.name }}-bin-macos-x64.zip ./build/bin/*
|
||||
zip -y -r llama-${{ steps.tag.outputs.name }}-bin-macos-x64.zip ./build/bin/*
|
||||
tar -czvf llama-${{ steps.tag.outputs.name }}-bin-macos-x64.tar.gz -C ./build/bin .
|
||||
|
||||
- name: Upload artifacts
|
||||
- name: Upload artifacts (zip)
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
path: llama-${{ steps.tag.outputs.name }}-bin-macos-x64.zip
|
||||
name: llama-bin-macos-x64.zip
|
||||
|
||||
- name: Upload artifacts (tar)
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
path: llama-${{ steps.tag.outputs.name }}-bin-macos-x64.tar.gz
|
||||
name: llama-bin-macos-x64.tar.gz
|
||||
|
||||
ubuntu-22-cpu:
|
||||
strategy:
|
||||
matrix:
|
||||
|
|
@ -182,14 +196,21 @@ jobs:
|
|||
id: pack_artifacts
|
||||
run: |
|
||||
cp LICENSE ./build/bin/
|
||||
zip -r llama-${{ steps.tag.outputs.name }}-bin-ubuntu-${{ matrix.build }}.zip ./build/bin/*
|
||||
zip -y -r llama-${{ steps.tag.outputs.name }}-bin-ubuntu-${{ matrix.build }}.zip ./build/bin/*
|
||||
tar -czvf llama-${{ steps.tag.outputs.name }}-bin-ubuntu-${{ matrix.build }}.tar.gz -C ./build/bin .
|
||||
|
||||
- name: Upload artifacts
|
||||
- name: Upload artifacts (zip)
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-${{ matrix.build }}.zip
|
||||
name: llama-bin-ubuntu-${{ matrix.build }}.zip
|
||||
|
||||
- name: Upload artifacts (tar)
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-${{ matrix.build }}.tar.gz
|
||||
name: llama-bin-ubuntu-${{ matrix.build }}.tar.gz
|
||||
|
||||
ubuntu-22-vulkan:
|
||||
runs-on: ubuntu-22.04
|
||||
|
||||
|
|
@ -235,14 +256,21 @@ jobs:
|
|||
id: pack_artifacts
|
||||
run: |
|
||||
cp LICENSE ./build/bin/
|
||||
zip -r llama-${{ steps.tag.outputs.name }}-bin-ubuntu-vulkan-x64.zip ./build/bin/*
|
||||
zip -y -r llama-${{ steps.tag.outputs.name }}-bin-ubuntu-vulkan-x64.zip ./build/bin/*
|
||||
tar -czvf llama-${{ steps.tag.outputs.name }}-bin-ubuntu-vulkan-x64.tar.gz -C ./build/bin .
|
||||
|
||||
- name: Upload artifacts
|
||||
- name: Upload artifacts (zip)
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-vulkan-x64.zip
|
||||
name: llama-bin-ubuntu-vulkan-x64.zip
|
||||
|
||||
- name: Upload artifacts (tar)
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-vulkan-x64.tar.gz
|
||||
name: llama-bin-ubuntu-vulkan-x64.tar.gz
|
||||
|
||||
windows-cpu:
|
||||
runs-on: windows-2025
|
||||
|
||||
|
|
@ -298,7 +326,7 @@ jobs:
|
|||
run: |
|
||||
Copy-Item $env:CURL_PATH\bin\libcurl-${{ matrix.arch }}.dll .\build\bin\Release\
|
||||
Copy-Item "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Redist\MSVC\14.44.35112\debug_nonredist\${{ matrix.arch }}\Microsoft.VC143.OpenMP.LLVM\libomp140.${{ matrix.arch == 'x64' && 'x86_64' || 'aarch64' }}.dll" .\build\bin\Release\
|
||||
7z a llama-bin-win-cpu-${{ matrix.arch }}.zip .\build\bin\Release\*
|
||||
7z a -snl llama-bin-win-cpu-${{ matrix.arch }}.zip .\build\bin\Release\*
|
||||
|
||||
- name: Upload artifacts
|
||||
uses: actions/upload-artifact@v4
|
||||
|
|
@ -380,7 +408,7 @@ jobs:
|
|||
- name: Pack artifacts
|
||||
id: pack_artifacts
|
||||
run: |
|
||||
7z a llama-bin-win-${{ matrix.backend }}-${{ matrix.arch }}.zip .\build\bin\Release\${{ matrix.target }}.dll
|
||||
7z a -snl llama-bin-win-${{ matrix.backend }}-${{ matrix.arch }}.zip .\build\bin\Release\${{ matrix.target }}.dll
|
||||
|
||||
- name: Upload artifacts
|
||||
uses: actions/upload-artifact@v4
|
||||
|
|
@ -434,7 +462,7 @@ jobs:
|
|||
- name: Pack artifacts
|
||||
id: pack_artifacts
|
||||
run: |
|
||||
7z a llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip .\build\bin\Release\ggml-cuda.dll
|
||||
7z a -snl llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip .\build\bin\Release\ggml-cuda.dll
|
||||
|
||||
- name: Upload artifacts
|
||||
uses: actions/upload-artifact@v4
|
||||
|
|
@ -526,7 +554,7 @@ jobs:
|
|||
cp "${{ env.ONEAPI_ROOT }}/umf/latest/bin/umf.dll" ./build/bin
|
||||
|
||||
echo "cp oneAPI running time dll files to ./build/bin done"
|
||||
7z a llama-bin-win-sycl-x64.zip ./build/bin/*
|
||||
7z a -snl llama-bin-win-sycl-x64.zip ./build/bin/*
|
||||
|
||||
- name: Upload the release package
|
||||
uses: actions/upload-artifact@v4
|
||||
|
|
@ -632,7 +660,7 @@ jobs:
|
|||
- name: Pack artifacts
|
||||
id: pack_artifacts
|
||||
run: |
|
||||
7z a llama-bin-win-hip-${{ matrix.name }}-x64.zip .\build\bin\*
|
||||
7z a -snl llama-bin-win-hip-${{ matrix.name }}-x64.zip .\build\bin\*
|
||||
|
||||
- name: Upload artifacts
|
||||
uses: actions/upload-artifact@v4
|
||||
|
|
@ -685,13 +713,20 @@ jobs:
|
|||
- name: Pack artifacts
|
||||
id: pack_artifacts
|
||||
run: |
|
||||
zip --symlinks -r llama-${{ steps.tag.outputs.name }}-xcframework.zip build-apple/llama.xcframework
|
||||
zip -y -r llama-${{ steps.tag.outputs.name }}-xcframework.zip build-apple/llama.xcframework
|
||||
tar -czvf llama-${{ steps.tag.outputs.name }}-xcframework.tar.gz -C build-apple llama.xcframework
|
||||
|
||||
- name: Upload artifacts
|
||||
- name: Upload artifacts (zip)
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
path: llama-${{ steps.tag.outputs.name }}-xcframework.zip
|
||||
name: llama-${{ steps.tag.outputs.name }}-xcframework
|
||||
name: llama-${{ steps.tag.outputs.name }}-xcframework.zip
|
||||
|
||||
- name: Upload artifacts (tar)
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
path: llama-${{ steps.tag.outputs.name }}-xcframework.tar.gz
|
||||
name: llama-${{ steps.tag.outputs.name }}-xcframework.tar.gz
|
||||
|
||||
openEuler-cann:
|
||||
strategy:
|
||||
|
|
@ -730,14 +765,21 @@ jobs:
|
|||
- name: Pack artifacts
|
||||
run: |
|
||||
cp LICENSE ./build/bin/
|
||||
zip -r llama-${{ steps.tag.outputs.name }}-bin-${{ matrix.chip_type }}-openEuler-${{ matrix.arch }}.zip ./build/bin/*
|
||||
zip -y -r llama-${{ steps.tag.outputs.name }}-bin-${{ matrix.chip_type }}-openEuler-${{ matrix.arch }}.zip ./build/bin/*
|
||||
tar -czvf llama-${{ steps.tag.outputs.name }}-bin-${{ matrix.chip_type }}-openEuler-${{ matrix.arch }}.tar.gz -C ./build/bin .
|
||||
|
||||
- name: Upload artifacts
|
||||
- name: Upload artifacts (zip)
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
path: llama-${{ steps.tag.outputs.name }}-bin-${{ matrix.chip_type }}-openEuler-${{ matrix.arch }}.zip
|
||||
name: llama-bin-${{ matrix.chip_type }}-openEuler-${{ matrix.arch }}.zip
|
||||
|
||||
- name: Upload artifacts (tar)
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
path: llama-${{ steps.tag.outputs.name }}-bin-${{ matrix.chip_type }}-openEuler-${{ matrix.arch }}.tar.gz
|
||||
name: llama-bin-${{ matrix.chip_type }}-openEuler-${{ matrix.arch }}.tar.gz
|
||||
|
||||
release:
|
||||
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
||||
|
||||
|
|
@ -814,6 +856,7 @@ jobs:
|
|||
|
||||
echo "Moving other artifacts..."
|
||||
mv -v artifact/*.zip release
|
||||
mv -v artifact/*.tar.gz release
|
||||
|
||||
- name: Create release
|
||||
id: create_release
|
||||
|
|
@ -822,6 +865,39 @@ jobs:
|
|||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
with:
|
||||
tag_name: ${{ steps.tag.outputs.name }}
|
||||
body: |
|
||||
> [!WARNING]
|
||||
> **Release Format Update**: Linux releases will soon use .tar.gz archives instead of .zip. Please make the necessary changes to your deployment scripts.
|
||||
|
||||
**macOS/iOS:**
|
||||
- [macOS Apple Silicon (arm64)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.tar.gz)
|
||||
- [macOS Intel (x64)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-macos-x64.tar.gz)
|
||||
- [iOS XCFramework](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-xcframework.tar.gz)
|
||||
|
||||
**Linux:**
|
||||
- [Ubuntu x64 (CPU)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-ubuntu-x64.tar.gz)
|
||||
- [Ubuntu x64 (Vulkan)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-ubuntu-vulkan-x64.tar.gz)
|
||||
- [Ubuntu s390x (CPU)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-ubuntu-s390x.tar.gz)
|
||||
|
||||
**Windows:**
|
||||
- [Windows x64 (CPU)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-win-cpu-x64.zip)
|
||||
- [Windows arm64 (CPU)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-win-cpu-arm64.zip)
|
||||
- [Windows x64 (CUDA)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-win-cuda-12.4-x64.zip)
|
||||
- [Windows x64 (Vulkan)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-win-vulkan-x64.zip)
|
||||
- [Windows x64 (SYCL)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-win-sycl-x64.zip)
|
||||
- [Windows x64 (HIP)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-win-hip-radeon-x64.zip)
|
||||
|
||||
**openEuler:**
|
||||
- [openEuler x86 (310p)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-310p-openEuler-x86.tar.gz)
|
||||
- [openEuler x86 (910b)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-910b-openEuler-x86.tar.gz)
|
||||
- [openEuler aarch64 (310p)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-310p-openEuler-aarch64.tar.gz)
|
||||
- [openEuler aarch64 (910b)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-910b-openEuler-aarch64.tar.gz)
|
||||
|
||||
<details>
|
||||
|
||||
${{ github.event.head_commit.message }}
|
||||
|
||||
</details>
|
||||
|
||||
- name: Upload release
|
||||
id: upload_release
|
||||
|
|
@ -833,7 +909,7 @@ jobs:
|
|||
const fs = require('fs');
|
||||
const release_id = '${{ steps.create_release.outputs.id }}';
|
||||
for (let file of await fs.readdirSync('./release')) {
|
||||
if (path.extname(file) === '.zip') {
|
||||
if (path.extname(file) === '.zip' || file.endsWith('.tar.gz')) {
|
||||
console.log('uploadReleaseAsset', file);
|
||||
await github.repos.uploadReleaseAsset({
|
||||
owner: context.repo.owner,
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ jobs:
|
|||
update:
|
||||
name: Update Winget Package
|
||||
runs-on: ubuntu-latest
|
||||
if: ${{ github.repository.owner.login == 'ggml-org' }}
|
||||
|
||||
steps:
|
||||
- name: Install cargo binstall
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@
|
|||
/ci/ @ggerganov
|
||||
/cmake/ @ggerganov
|
||||
/common/CMakeLists.txt @ggerganov
|
||||
/common/arg.* @ggerganov @ericcurtin
|
||||
/common/arg.* @ggerganov
|
||||
/common/base64.hpp.* @ggerganov
|
||||
/common/build-info.* @ggerganov
|
||||
/common/common.* @ggerganov
|
||||
|
|
@ -87,8 +87,7 @@
|
|||
/tools/perplexity/ @ggerganov
|
||||
/tools/quantize/ @ggerganov
|
||||
/tools/rpc/ @rgerganov
|
||||
/tools/run/ @ericcurtin
|
||||
/tools/server/* @ngxson @ggerganov @ericcurtin # no subdir
|
||||
/tools/server/* @ngxson @ggerganov # no subdir
|
||||
/tools/server/webui/ @allozaur
|
||||
/tools/tokenize/ @ggerganov
|
||||
/tools/tts/ @ggerganov
|
||||
|
|
|
|||
|
|
@ -19,6 +19,7 @@ The project differentiates between 3 levels of contributors:
|
|||
- If your PR becomes stale, don't hesitate to ping the maintainers in the comments
|
||||
- Maintainers will rely on your insights and approval when making a final decision to approve and merge a PR
|
||||
- Consider adding yourself to [CODEOWNERS](CODEOWNERS) to indicate your availability for reviewing related PRs
|
||||
- Using AI to generate PRs is permitted. However, you must (1) explicitly disclose how AI was used and (2) conduct a thorough manual review before publishing the PR. Note that trivial tab autocompletions do not require disclosure.
|
||||
|
||||
# Pull requests (for maintainers)
|
||||
|
||||
|
|
|
|||
|
|
@ -613,3 +613,4 @@ $ echo "source ~/.llama-completion.bash" >> ~/.bashrc
|
|||
- [linenoise.cpp](./tools/run/linenoise.cpp/linenoise.cpp) - C++ library that provides readline-like line editing capabilities, used by `llama-run` - BSD 2-Clause License
|
||||
- [curl](https://curl.se/) - Client-side URL transfer library, used by various tools/examples - [CURL License](https://curl.se/docs/copyright.html)
|
||||
- [miniaudio.h](https://github.com/mackron/miniaudio) - Single-header audio format decoder, used by multimodal subsystem - Public domain
|
||||
- [subprocess.h](https://github.com/sheredom/subprocess.h) - Single-header process launching solution for C and C++ - Public domain
|
||||
|
|
|
|||
|
|
@ -65,4 +65,6 @@ However, If you have discovered a security vulnerability in this project, please
|
|||
|
||||
Please disclose it as a private [security advisory](https://github.com/ggml-org/llama.cpp/security/advisories/new).
|
||||
|
||||
Please note that using AI to identify vulnerabilities and generate reports is permitted. However, you must (1) explicitly disclose how AI was used and (2) conduct a thorough manual review before submitting the report.
|
||||
|
||||
A team of volunteers on a reasonable-effort basis maintains this project. As such, please give us at least 90 days to work on a fix before public exposure.
|
||||
|
|
|
|||
|
|
@ -212,13 +212,13 @@ struct handle_model_result {
|
|||
static handle_model_result common_params_handle_model(
|
||||
struct common_params_model & model,
|
||||
const std::string & bearer_token,
|
||||
const std::string & model_path_default,
|
||||
bool offline) {
|
||||
handle_model_result result;
|
||||
// handle pre-fill default model path and url based on hf_repo and hf_file
|
||||
{
|
||||
if (!model.docker_repo.empty()) { // Handle Docker URLs by resolving them to local paths
|
||||
model.path = common_docker_resolve_model(model.docker_repo);
|
||||
model.name = model.docker_repo; // set name for consistency
|
||||
} else if (!model.hf_repo.empty()) {
|
||||
// short-hand to avoid specifying --hf-file -> default it to --model
|
||||
if (model.hf_file.empty()) {
|
||||
|
|
@ -227,7 +227,8 @@ static handle_model_result common_params_handle_model(
|
|||
if (auto_detected.repo.empty() || auto_detected.ggufFile.empty()) {
|
||||
exit(1); // built without CURL, error message already printed
|
||||
}
|
||||
model.hf_repo = auto_detected.repo;
|
||||
model.name = model.hf_repo; // repo name with tag
|
||||
model.hf_repo = auto_detected.repo; // repo name without tag
|
||||
model.hf_file = auto_detected.ggufFile;
|
||||
if (!auto_detected.mmprojFile.empty()) {
|
||||
result.found_mmproj = true;
|
||||
|
|
@ -257,8 +258,6 @@ static handle_model_result common_params_handle_model(
|
|||
model.path = fs_get_cache_file(string_split<std::string>(f, '/').back());
|
||||
}
|
||||
|
||||
} else if (model.path.empty()) {
|
||||
model.path = model_path_default;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -405,7 +404,7 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context
|
|||
|
||||
// handle model and download
|
||||
{
|
||||
auto res = common_params_handle_model(params.model, params.hf_token, DEFAULT_MODEL_PATH, params.offline);
|
||||
auto res = common_params_handle_model(params.model, params.hf_token, params.offline);
|
||||
if (params.no_mmproj) {
|
||||
params.mmproj = {};
|
||||
} else if (res.found_mmproj && params.mmproj.path.empty() && params.mmproj.url.empty()) {
|
||||
|
|
@ -415,12 +414,18 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context
|
|||
// only download mmproj if the current example is using it
|
||||
for (auto & ex : mmproj_examples) {
|
||||
if (ctx_arg.ex == ex) {
|
||||
common_params_handle_model(params.mmproj, params.hf_token, "", params.offline);
|
||||
common_params_handle_model(params.mmproj, params.hf_token, params.offline);
|
||||
break;
|
||||
}
|
||||
}
|
||||
common_params_handle_model(params.speculative.model, params.hf_token, "", params.offline);
|
||||
common_params_handle_model(params.vocoder.model, params.hf_token, "", params.offline);
|
||||
common_params_handle_model(params.speculative.model, params.hf_token, params.offline);
|
||||
common_params_handle_model(params.vocoder.model, params.hf_token, params.offline);
|
||||
}
|
||||
|
||||
// model is required (except for server)
|
||||
// TODO @ngxson : maybe show a list of available models in CLI in this case
|
||||
if (params.model.path.empty() && ctx_arg.ex != LLAMA_EXAMPLE_SERVER) {
|
||||
throw std::invalid_argument("error: --model is required\n");
|
||||
}
|
||||
|
||||
if (params.escape) {
|
||||
|
|
@ -2105,11 +2110,8 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|||
add_opt(common_arg(
|
||||
{"-m", "--model"}, "FNAME",
|
||||
ex == LLAMA_EXAMPLE_EXPORT_LORA
|
||||
? std::string("model path from which to load base model")
|
||||
: string_format(
|
||||
"model path (default: `models/$filename` with filename from `--hf-file` "
|
||||
"or `--model-url` if set, otherwise %s)", DEFAULT_MODEL_PATH
|
||||
),
|
||||
? "model path from which to load base model"
|
||||
: "model path to load",
|
||||
[](common_params & params, const std::string & value) {
|
||||
params.model.path = value;
|
||||
}
|
||||
|
|
@ -2507,6 +2509,27 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|||
}
|
||||
}
|
||||
).set_examples({LLAMA_EXAMPLE_SERVER}));
|
||||
add_opt(common_arg(
|
||||
{"--models-dir"}, "PATH",
|
||||
"directory containing models for the router server (default: disabled)",
|
||||
[](common_params & params, const std::string & value) {
|
||||
params.models_dir = value;
|
||||
}
|
||||
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_MODELS_DIR"));
|
||||
add_opt(common_arg(
|
||||
{"--models-max"}, "N",
|
||||
string_format("for router server, maximum number of models to load simultaneously (default: %d, 0 = unlimited)", params.models_max),
|
||||
[](common_params & params, int value) {
|
||||
params.models_max = value;
|
||||
}
|
||||
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_MODELS_MAX"));
|
||||
add_opt(common_arg(
|
||||
{"--no-models-autoload"},
|
||||
"disables automatic loading of models (default: enabled)",
|
||||
[](common_params & params) {
|
||||
params.models_autoload = false;
|
||||
}
|
||||
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_NO_MODELS_AUTOLOAD"));
|
||||
add_opt(common_arg(
|
||||
{"--jinja"},
|
||||
string_format("use jinja template for chat (default: %s)\n", params.use_jinja ? "enabled" : "disabled"),
|
||||
|
|
@ -2654,7 +2677,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|||
[](common_params &, const std::string & value) {
|
||||
common_log_set_file(common_log_main(), value.c_str());
|
||||
}
|
||||
));
|
||||
).set_env("LLAMA_LOG_FILE"));
|
||||
add_opt(common_arg(
|
||||
{"--log-colors"}, "[on|off|auto]",
|
||||
"Set colored logging ('on', 'off', or 'auto', default: 'auto')\n"
|
||||
|
|
@ -2689,7 +2712,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|||
).set_env("LLAMA_OFFLINE"));
|
||||
add_opt(common_arg(
|
||||
{"-lv", "--verbosity", "--log-verbosity"}, "N",
|
||||
"Set the verbosity threshold. Messages with a higher verbosity will be ignored.",
|
||||
string_format("Set the verbosity threshold. Messages with a higher verbosity will be ignored. Values:\n"
|
||||
" - 0: generic output\n"
|
||||
" - 1: error\n"
|
||||
" - 2: warning\n"
|
||||
" - 3: info\n"
|
||||
" - 4: debug\n"
|
||||
"(default: %d)\n", params.verbosity),
|
||||
[](common_params & params, int value) {
|
||||
params.verbosity = value;
|
||||
common_log_set_verbosity_thold(value);
|
||||
|
|
|
|||
|
|
@ -163,7 +163,7 @@ common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::strin
|
|||
if (tool_choice == "required") {
|
||||
return COMMON_CHAT_TOOL_CHOICE_REQUIRED;
|
||||
}
|
||||
throw std::runtime_error("Invalid tool_choice: " + tool_choice);
|
||||
throw std::invalid_argument("Invalid tool_choice: " + tool_choice);
|
||||
}
|
||||
|
||||
bool common_chat_templates_support_enable_thinking(const common_chat_templates * chat_templates) {
|
||||
|
|
@ -186,17 +186,17 @@ std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const json & messa
|
|||
try {
|
||||
|
||||
if (!messages.is_array()) {
|
||||
throw std::runtime_error("Expected 'messages' to be an array, got " + messages.dump());
|
||||
throw std::invalid_argument("Expected 'messages' to be an array, got " + messages.dump());
|
||||
}
|
||||
|
||||
for (const auto & message : messages) {
|
||||
if (!message.is_object()) {
|
||||
throw std::runtime_error("Expected 'message' to be an object, got " + message.dump());
|
||||
throw std::invalid_argument("Expected 'message' to be an object, got " + message.dump());
|
||||
}
|
||||
|
||||
common_chat_msg msg;
|
||||
if (!message.contains("role")) {
|
||||
throw std::runtime_error("Missing 'role' in message: " + message.dump());
|
||||
throw std::invalid_argument("Missing 'role' in message: " + message.dump());
|
||||
}
|
||||
msg.role = message.at("role");
|
||||
|
||||
|
|
@ -209,11 +209,11 @@ std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const json & messa
|
|||
} else if (content.is_array()) {
|
||||
for (const auto & part : content) {
|
||||
if (!part.contains("type")) {
|
||||
throw std::runtime_error("Missing content part type: " + part.dump());
|
||||
throw std::invalid_argument("Missing content part type: " + part.dump());
|
||||
}
|
||||
const auto & type = part.at("type");
|
||||
if (type != "text") {
|
||||
throw std::runtime_error("Unsupported content part type: " + type.dump());
|
||||
throw std::invalid_argument("Unsupported content part type: " + type.dump());
|
||||
}
|
||||
common_chat_msg_content_part msg_part;
|
||||
msg_part.type = type;
|
||||
|
|
@ -221,25 +221,25 @@ std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const json & messa
|
|||
msg.content_parts.push_back(msg_part);
|
||||
}
|
||||
} else if (!content.is_null()) {
|
||||
throw std::runtime_error("Invalid 'content' type: expected string or array, got " + content.dump() + " (ref: https://github.com/ggml-org/llama.cpp/issues/8367)");
|
||||
throw std::invalid_argument("Invalid 'content' type: expected string or array, got " + content.dump() + " (ref: https://github.com/ggml-org/llama.cpp/issues/8367)");
|
||||
}
|
||||
}
|
||||
if (has_tool_calls) {
|
||||
for (const auto & tool_call : message.at("tool_calls")) {
|
||||
common_chat_tool_call tc;
|
||||
if (!tool_call.contains("type")) {
|
||||
throw std::runtime_error("Missing tool call type: " + tool_call.dump());
|
||||
throw std::invalid_argument("Missing tool call type: " + tool_call.dump());
|
||||
}
|
||||
const auto & type = tool_call.at("type");
|
||||
if (type != "function") {
|
||||
throw std::runtime_error("Unsupported tool call type: " + tool_call.dump());
|
||||
throw std::invalid_argument("Unsupported tool call type: " + tool_call.dump());
|
||||
}
|
||||
if (!tool_call.contains("function")) {
|
||||
throw std::runtime_error("Missing tool call function: " + tool_call.dump());
|
||||
throw std::invalid_argument("Missing tool call function: " + tool_call.dump());
|
||||
}
|
||||
const auto & fc = tool_call.at("function");
|
||||
if (!fc.contains("name")) {
|
||||
throw std::runtime_error("Missing tool call name: " + tool_call.dump());
|
||||
throw std::invalid_argument("Missing tool call name: " + tool_call.dump());
|
||||
}
|
||||
tc.name = fc.at("name");
|
||||
tc.arguments = fc.at("arguments");
|
||||
|
|
@ -250,7 +250,7 @@ std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const json & messa
|
|||
}
|
||||
}
|
||||
if (!has_content && !has_tool_calls) {
|
||||
throw std::runtime_error("Expected 'content' or 'tool_calls' (ref: https://github.com/ggml-org/llama.cpp/issues/8367 & https://github.com/ggml-org/llama.cpp/issues/12279)");
|
||||
throw std::invalid_argument("Expected 'content' or 'tool_calls' (ref: https://github.com/ggml-org/llama.cpp/issues/8367 & https://github.com/ggml-org/llama.cpp/issues/12279)");
|
||||
}
|
||||
if (message.contains("reasoning_content")) {
|
||||
msg.reasoning_content = message.at("reasoning_content");
|
||||
|
|
@ -353,18 +353,18 @@ std::vector<common_chat_tool> common_chat_tools_parse_oaicompat(const json & too
|
|||
try {
|
||||
if (!tools.is_null()) {
|
||||
if (!tools.is_array()) {
|
||||
throw std::runtime_error("Expected 'tools' to be an array, got " + tools.dump());
|
||||
throw std::invalid_argument("Expected 'tools' to be an array, got " + tools.dump());
|
||||
}
|
||||
for (const auto & tool : tools) {
|
||||
if (!tool.contains("type")) {
|
||||
throw std::runtime_error("Missing tool type: " + tool.dump());
|
||||
throw std::invalid_argument("Missing tool type: " + tool.dump());
|
||||
}
|
||||
const auto & type = tool.at("type");
|
||||
if (!type.is_string() || type != "function") {
|
||||
throw std::runtime_error("Unsupported tool type: " + tool.dump());
|
||||
throw std::invalid_argument("Unsupported tool type: " + tool.dump());
|
||||
}
|
||||
if (!tool.contains("function")) {
|
||||
throw std::runtime_error("Missing tool function: " + tool.dump());
|
||||
throw std::invalid_argument("Missing tool function: " + tool.dump());
|
||||
}
|
||||
|
||||
const auto & function = tool.at("function");
|
||||
|
|
|
|||
|
|
@ -912,7 +912,7 @@ std::string fs_get_cache_file(const std::string & filename) {
|
|||
return cache_directory + filename;
|
||||
}
|
||||
|
||||
std::vector<common_file_info> fs_list_files(const std::string & path) {
|
||||
std::vector<common_file_info> fs_list(const std::string & path, bool include_directories) {
|
||||
std::vector<common_file_info> files;
|
||||
if (path.empty()) return files;
|
||||
|
||||
|
|
@ -927,14 +927,22 @@ std::vector<common_file_info> fs_list_files(const std::string & path) {
|
|||
const auto & p = entry.path();
|
||||
if (std::filesystem::is_regular_file(p)) {
|
||||
common_file_info info;
|
||||
info.path = p.string();
|
||||
info.name = p.filename().string();
|
||||
info.path = p.string();
|
||||
info.name = p.filename().string();
|
||||
info.is_dir = false;
|
||||
try {
|
||||
info.size = static_cast<size_t>(std::filesystem::file_size(p));
|
||||
} catch (const std::filesystem::filesystem_error &) {
|
||||
info.size = 0;
|
||||
}
|
||||
files.push_back(std::move(info));
|
||||
} else if (include_directories && std::filesystem::is_directory(p)) {
|
||||
common_file_info info;
|
||||
info.path = p.string();
|
||||
info.name = p.filename().string();
|
||||
info.size = 0; // Directories have no size
|
||||
info.is_dir = true;
|
||||
files.push_back(std::move(info));
|
||||
}
|
||||
} catch (const std::filesystem::filesystem_error &) {
|
||||
// skip entries we cannot inspect
|
||||
|
|
|
|||
|
|
@ -26,8 +26,6 @@
|
|||
fprintf(stderr, "%s: built with %s for %s\n", __func__, LLAMA_COMPILER, LLAMA_BUILD_TARGET); \
|
||||
} while(0)
|
||||
|
||||
#define DEFAULT_MODEL_PATH "models/7B/ggml-model-f16.gguf"
|
||||
|
||||
struct common_time_meas {
|
||||
common_time_meas(int64_t & t_acc, bool disable = false);
|
||||
~common_time_meas();
|
||||
|
|
@ -223,6 +221,7 @@ struct common_params_model {
|
|||
std::string hf_repo = ""; // HF repo // NOLINT
|
||||
std::string hf_file = ""; // HF file // NOLINT
|
||||
std::string docker_repo = ""; // Docker repo // NOLINT
|
||||
std::string name = ""; // in format <user>/<model>[:<tag>] (tag is optional) // NOLINT
|
||||
};
|
||||
|
||||
struct common_params_speculative {
|
||||
|
|
@ -369,7 +368,7 @@ struct common_params {
|
|||
|
||||
std::vector<common_control_vector_load_info> control_vectors; // control vector with user defined scale
|
||||
|
||||
int32_t verbosity = 0;
|
||||
int32_t verbosity = 3; // LOG_LEVEL_INFO
|
||||
int32_t control_vector_layer_start = -1; // layer range for control vector
|
||||
int32_t control_vector_layer_end = -1; // layer range for control vector
|
||||
bool offline = false;
|
||||
|
|
@ -479,6 +478,11 @@ struct common_params {
|
|||
bool endpoint_props = false; // only control POST requests, not GET
|
||||
bool endpoint_metrics = false;
|
||||
|
||||
// router server configs
|
||||
std::string models_dir = ""; // directory containing models for the router server
|
||||
int models_max = 4; // maximum number of models to load simultaneously
|
||||
bool models_autoload = true; // automatically load models when requested via the router server
|
||||
|
||||
bool log_json = false;
|
||||
|
||||
std::string slot_save_path;
|
||||
|
|
@ -642,8 +646,9 @@ struct common_file_info {
|
|||
std::string path;
|
||||
std::string name;
|
||||
size_t size = 0; // in bytes
|
||||
bool is_dir = false;
|
||||
};
|
||||
std::vector<common_file_info> fs_list_files(const std::string & path);
|
||||
std::vector<common_file_info> fs_list(const std::string & path, bool include_directories);
|
||||
|
||||
//
|
||||
// Model utils
|
||||
|
|
|
|||
|
|
@ -430,7 +430,7 @@ std::pair<long, std::vector<char>> common_remote_get_content(const std::string &
|
|||
curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str());
|
||||
curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 1L);
|
||||
curl_easy_setopt(curl.get(), CURLOPT_FOLLOWLOCATION, 1L);
|
||||
curl_easy_setopt(curl.get(), CURLOPT_VERBOSE, 1L);
|
||||
curl_easy_setopt(curl.get(), CURLOPT_VERBOSE, 0L);
|
||||
typedef size_t(*CURLOPT_WRITEFUNCTION_PTR)(void * ptr, size_t size, size_t nmemb, void * data);
|
||||
auto write_callback = [](void * ptr, size_t size, size_t nmemb, void * data) -> size_t {
|
||||
auto data_vec = static_cast<std::vector<char> *>(data);
|
||||
|
|
@ -517,16 +517,18 @@ static bool common_pull_file(httplib::Client & cli,
|
|||
headers.emplace("Range", "bytes=" + std::to_string(existing_size) + "-");
|
||||
}
|
||||
|
||||
std::atomic<size_t> downloaded{existing_size};
|
||||
const char * func = __func__; // avoid __func__ inside a lambda
|
||||
size_t downloaded = existing_size;
|
||||
size_t progress_step = 0;
|
||||
|
||||
auto res = cli.Get(resolve_path, headers,
|
||||
[&](const httplib::Response &response) {
|
||||
if (existing_size > 0 && response.status != 206) {
|
||||
LOG_WRN("%s: server did not respond with 206 Partial Content for a resume request. Status: %d\n", __func__, response.status);
|
||||
LOG_WRN("%s: server did not respond with 206 Partial Content for a resume request. Status: %d\n", func, response.status);
|
||||
return false;
|
||||
}
|
||||
if (existing_size == 0 && response.status != 200) {
|
||||
LOG_WRN("%s: download received non-successful status code: %d\n", __func__, response.status);
|
||||
LOG_WRN("%s: download received non-successful status code: %d\n", func, response.status);
|
||||
return false;
|
||||
}
|
||||
if (total_size == 0 && response.has_header("Content-Length")) {
|
||||
|
|
@ -534,7 +536,7 @@ static bool common_pull_file(httplib::Client & cli,
|
|||
size_t content_length = std::stoull(response.get_header_value("Content-Length"));
|
||||
total_size = existing_size + content_length;
|
||||
} catch (const std::exception &e) {
|
||||
LOG_WRN("%s: invalid Content-Length header: %s\n", __func__, e.what());
|
||||
LOG_WRN("%s: invalid Content-Length header: %s\n", func, e.what());
|
||||
}
|
||||
}
|
||||
return true;
|
||||
|
|
@ -542,11 +544,16 @@ static bool common_pull_file(httplib::Client & cli,
|
|||
[&](const char *data, size_t len) {
|
||||
ofs.write(data, len);
|
||||
if (!ofs) {
|
||||
LOG_ERR("%s: error writing to file: %s\n", __func__, path_tmp.c_str());
|
||||
LOG_ERR("%s: error writing to file: %s\n", func, path_tmp.c_str());
|
||||
return false;
|
||||
}
|
||||
downloaded += len;
|
||||
print_progress(downloaded, total_size);
|
||||
progress_step += len;
|
||||
|
||||
if (progress_step >= total_size / 1000 || downloaded == total_size) {
|
||||
print_progress(downloaded, total_size);
|
||||
progress_step = 0;
|
||||
}
|
||||
return true;
|
||||
},
|
||||
nullptr
|
||||
|
|
@ -1047,7 +1054,7 @@ std::string common_docker_resolve_model(const std::string &) {
|
|||
std::vector<common_cached_model_info> common_list_cached_models() {
|
||||
std::vector<common_cached_model_info> models;
|
||||
const std::string cache_dir = fs_get_cache_directory();
|
||||
const std::vector<common_file_info> files = fs_list_files(cache_dir);
|
||||
const std::vector<common_file_info> files = fs_list(cache_dir, false);
|
||||
for (const auto & file : files) {
|
||||
if (string_starts_with(file.name, "manifest=") && string_ends_with(file.name, ".json")) {
|
||||
common_cached_model_info model_info;
|
||||
|
|
|
|||
|
|
@ -14,8 +14,10 @@ struct common_cached_model_info {
|
|||
std::string model;
|
||||
std::string tag;
|
||||
size_t size = 0; // GGUF size in bytes
|
||||
// return string representation like "user/model:tag"
|
||||
// if tag is "latest", it will be omitted
|
||||
std::string to_string() const {
|
||||
return user + "/" + model + ":" + tag;
|
||||
return user + "/" + model + (tag == "latest" ? "" : ":" + tag);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -974,7 +974,7 @@ public:
|
|||
|
||||
void check_errors() {
|
||||
if (!_errors.empty()) {
|
||||
throw std::runtime_error("JSON schema conversion failed:\n" + string_join(_errors, "\n"));
|
||||
throw std::invalid_argument("JSON schema conversion failed:\n" + string_join(_errors, "\n"));
|
||||
}
|
||||
if (!_warnings.empty()) {
|
||||
fprintf(stderr, "WARNING: JSON schema conversion was incomplete: %s\n", string_join(_warnings, "; ").c_str());
|
||||
|
|
|
|||
|
|
@ -443,8 +443,22 @@ void common_log_set_timestamps(struct common_log * log, bool timestamps) {
|
|||
log->set_timestamps(timestamps);
|
||||
}
|
||||
|
||||
static int common_get_verbosity(enum ggml_log_level level) {
|
||||
switch (level) {
|
||||
case GGML_LOG_LEVEL_DEBUG: return LOG_LEVEL_DEBUG;
|
||||
case GGML_LOG_LEVEL_INFO: return LOG_LEVEL_INFO;
|
||||
case GGML_LOG_LEVEL_WARN: return LOG_LEVEL_WARN;
|
||||
case GGML_LOG_LEVEL_ERROR: return LOG_LEVEL_ERROR;
|
||||
case GGML_LOG_LEVEL_CONT: return LOG_LEVEL_INFO; // same as INFO
|
||||
case GGML_LOG_LEVEL_NONE:
|
||||
default:
|
||||
return LOG_LEVEL_OUTPUT;
|
||||
}
|
||||
}
|
||||
|
||||
void common_log_default_callback(enum ggml_log_level level, const char * text, void * /*user_data*/) {
|
||||
if (LOG_DEFAULT_LLAMA <= common_log_verbosity_thold) {
|
||||
auto verbosity = common_get_verbosity(level);
|
||||
if (verbosity <= common_log_verbosity_thold) {
|
||||
common_log_add(common_log_main(), level, "%s", text);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
31
common/log.h
31
common/log.h
|
|
@ -21,8 +21,14 @@
|
|||
# define LOG_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__)))
|
||||
#endif
|
||||
|
||||
#define LOG_DEFAULT_DEBUG 1
|
||||
#define LOG_DEFAULT_LLAMA 0
|
||||
#define LOG_LEVEL_DEBUG 4
|
||||
#define LOG_LEVEL_INFO 3
|
||||
#define LOG_LEVEL_WARN 2
|
||||
#define LOG_LEVEL_ERROR 1
|
||||
#define LOG_LEVEL_OUTPUT 0 // output data from tools
|
||||
|
||||
#define LOG_DEFAULT_DEBUG LOG_LEVEL_DEBUG
|
||||
#define LOG_DEFAULT_LLAMA LOG_LEVEL_INFO
|
||||
|
||||
enum log_colors {
|
||||
LOG_COLORS_AUTO = -1,
|
||||
|
|
@ -67,10 +73,11 @@ void common_log_add(struct common_log * log, enum ggml_log_level level, const ch
|
|||
// 0.00.090.578 I llm_load_tensors: offloading 32 repeating layers to GPU
|
||||
// 0.00.090.579 I llm_load_tensors: offloading non-repeating layers to GPU
|
||||
//
|
||||
// I - info (stdout, V = 0)
|
||||
// W - warning (stderr, V = 0)
|
||||
// E - error (stderr, V = 0)
|
||||
// D - debug (stderr, V = LOG_DEFAULT_DEBUG)
|
||||
// I - info (stdout, V = LOG_DEFAULT_INFO)
|
||||
// W - warning (stderr, V = LOG_DEFAULT_WARN)
|
||||
// E - error (stderr, V = LOG_DEFAULT_ERROR)
|
||||
// O - output (stdout, V = LOG_DEFAULT_OUTPUT)
|
||||
//
|
||||
|
||||
void common_log_set_file (struct common_log * log, const char * file); // not thread-safe
|
||||
|
|
@ -95,14 +102,14 @@ void common_log_set_timestamps(struct common_log * log, bool timestamps); // w
|
|||
} \
|
||||
} while (0)
|
||||
|
||||
#define LOG(...) LOG_TMPL(GGML_LOG_LEVEL_NONE, 0, __VA_ARGS__)
|
||||
#define LOGV(verbosity, ...) LOG_TMPL(GGML_LOG_LEVEL_NONE, verbosity, __VA_ARGS__)
|
||||
#define LOG(...) LOG_TMPL(GGML_LOG_LEVEL_NONE, LOG_LEVEL_OUTPUT, __VA_ARGS__)
|
||||
#define LOGV(verbosity, ...) LOG_TMPL(GGML_LOG_LEVEL_NONE, verbosity, __VA_ARGS__)
|
||||
|
||||
#define LOG_INF(...) LOG_TMPL(GGML_LOG_LEVEL_INFO, 0, __VA_ARGS__)
|
||||
#define LOG_WRN(...) LOG_TMPL(GGML_LOG_LEVEL_WARN, 0, __VA_ARGS__)
|
||||
#define LOG_ERR(...) LOG_TMPL(GGML_LOG_LEVEL_ERROR, 0, __VA_ARGS__)
|
||||
#define LOG_DBG(...) LOG_TMPL(GGML_LOG_LEVEL_DEBUG, LOG_DEFAULT_DEBUG, __VA_ARGS__)
|
||||
#define LOG_CNT(...) LOG_TMPL(GGML_LOG_LEVEL_CONT, 0, __VA_ARGS__)
|
||||
#define LOG_DBG(...) LOG_TMPL(GGML_LOG_LEVEL_DEBUG, LOG_LEVEL_DEBUG, __VA_ARGS__)
|
||||
#define LOG_INF(...) LOG_TMPL(GGML_LOG_LEVEL_INFO, LOG_LEVEL_INFO, __VA_ARGS__)
|
||||
#define LOG_WRN(...) LOG_TMPL(GGML_LOG_LEVEL_WARN, LOG_LEVEL_WARN, __VA_ARGS__)
|
||||
#define LOG_ERR(...) LOG_TMPL(GGML_LOG_LEVEL_ERROR, LOG_LEVEL_ERROR, __VA_ARGS__)
|
||||
#define LOG_CNT(...) LOG_TMPL(GGML_LOG_LEVEL_CONT, LOG_LEVEL_INFO, __VA_ARGS__) // same as INFO
|
||||
|
||||
#define LOG_INFV(verbosity, ...) LOG_TMPL(GGML_LOG_LEVEL_INFO, verbosity, __VA_ARGS__)
|
||||
#define LOG_WRNV(verbosity, ...) LOG_TMPL(GGML_LOG_LEVEL_WARN, verbosity, __VA_ARGS__)
|
||||
|
|
|
|||
|
|
@ -1592,16 +1592,26 @@ class MmprojModel(ModelBase):
|
|||
|
||||
# load preprocessor config
|
||||
self.preprocessor_config = {}
|
||||
if not self.is_mistral_format:
|
||||
# check if preprocessor_config.json exists
|
||||
if (self.dir_model / "preprocessor_config.json").is_file():
|
||||
with open(self.dir_model / "preprocessor_config.json", "r", encoding="utf-8") as f:
|
||||
self.preprocessor_config = json.load(f)
|
||||
else:
|
||||
# try "processing_config" file if exists
|
||||
if (self.dir_model / "processing_config.json").is_file():
|
||||
with open(self.dir_model / "processing_config.json", "r", encoding="utf-8") as f:
|
||||
self.preprocessor_config = json.load(f)
|
||||
|
||||
# prefer preprocessor_config.json if possible
|
||||
preprocessor_config_path = self.dir_model / "preprocessor_config.json"
|
||||
if preprocessor_config_path.is_file():
|
||||
with open(preprocessor_config_path, "r", encoding="utf-8") as f:
|
||||
self.preprocessor_config = json.load(f)
|
||||
|
||||
# prefer processor_config.json if possible
|
||||
processor_config_path = self.dir_model / "processor_config.json"
|
||||
if processor_config_path.is_file():
|
||||
with open(processor_config_path, "r", encoding="utf-8") as f:
|
||||
cfg = json.load(f)
|
||||
# move image_processor to root level for compat
|
||||
if "image_processor" in cfg:
|
||||
cfg = {
|
||||
**cfg,
|
||||
**cfg["image_processor"],
|
||||
}
|
||||
# merge configs
|
||||
self.preprocessor_config = {**self.preprocessor_config, **cfg}
|
||||
|
||||
def get_vision_config(self) -> dict[str, Any] | None:
|
||||
config_name = "vision_config" if not self.is_mistral_format else "vision_encoder"
|
||||
|
|
@ -2815,9 +2825,38 @@ class Llama4VisionModel(MmprojModel):
|
|||
|
||||
@ModelBase.register("Mistral3ForConditionalGeneration")
|
||||
class Mistral3Model(LlamaModel):
|
||||
model_arch = gguf.MODEL_ARCH.LLAMA
|
||||
model_arch = gguf.MODEL_ARCH.MISTRAL3
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
# for compatibility, we use LLAMA arch for older models
|
||||
# TODO: remove this once everyone has migrated to newer version of llama.cpp
|
||||
if self.hparams.get("model_type") != "ministral3":
|
||||
self.model_arch = gguf.MODEL_ARCH.LLAMA
|
||||
self.gguf_writer.arch = gguf.MODEL_ARCH_NAMES[self.model_arch]
|
||||
self.gguf_writer.add_architecture()
|
||||
self.tensor_map = gguf.get_tensor_name_map(self.model_arch, self.block_count)
|
||||
|
||||
def set_gguf_parameters(self):
|
||||
super().set_gguf_parameters()
|
||||
rope_params = self.hparams.get("rope_parameters")
|
||||
if self.hparams.get("model_type") == "ministral3":
|
||||
assert rope_params is not None, "ministral3 must have 'rope_parameters' config"
|
||||
assert rope_params["rope_type"] == "yarn", "ministral3 rope_type must be 'yarn'"
|
||||
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.YARN)
|
||||
self.gguf_writer.add_rope_scaling_factor(rope_params["factor"])
|
||||
self.gguf_writer.add_rope_scaling_yarn_beta_fast(rope_params["beta_fast"])
|
||||
self.gguf_writer.add_rope_scaling_yarn_beta_slow(rope_params["beta_slow"])
|
||||
self.gguf_writer.add_rope_scaling_yarn_log_mul(rope_params["mscale_all_dim"])
|
||||
self.gguf_writer.add_rope_scaling_orig_ctx_len(rope_params["original_max_position_embeddings"])
|
||||
self.gguf_writer.add_rope_freq_base(rope_params["rope_theta"])
|
||||
self.gguf_writer.add_attn_temperature_scale(rope_params["llama_4_scaling_beta"])
|
||||
|
||||
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None):
|
||||
# TODO: probably not worth supporting quantized weight, as official BF16 is also available
|
||||
if name.endswith("weight_scale_inv"):
|
||||
raise ValueError("This is a quantized weight, please use BF16 weight instead")
|
||||
|
||||
name = name.replace("language_model.", "")
|
||||
if "multi_modal_projector" in name or "vision_tower" in name:
|
||||
return []
|
||||
|
|
@ -9941,12 +9980,22 @@ class ApertusModel(LlamaModel):
|
|||
|
||||
|
||||
class MistralModel(LlamaModel):
|
||||
model_arch = gguf.MODEL_ARCH.LLAMA
|
||||
model_arch = gguf.MODEL_ARCH.MISTRAL3
|
||||
model_name = "Mistral"
|
||||
hf_arch = ""
|
||||
is_mistral_format = True
|
||||
undo_permute = False
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
# for compatibility, we use LLAMA arch for older models
|
||||
# TODO: remove this once everyone migrates to newer version of llama.cpp
|
||||
if "llama_4_scaling" not in self.hparams:
|
||||
self.model_arch = gguf.MODEL_ARCH.LLAMA
|
||||
self.gguf_writer.arch = gguf.MODEL_ARCH_NAMES[self.model_arch]
|
||||
self.gguf_writer.add_architecture()
|
||||
self.tensor_map = gguf.get_tensor_name_map(self.model_arch, self.block_count)
|
||||
|
||||
@staticmethod
|
||||
def get_community_chat_template(vocab: MistralVocab, templates_dir: Path, is_mistral_format: bool):
|
||||
assert TokenizerVersion is not None and Tekkenizer is not None and SentencePieceTokenizer is not None, _mistral_import_error_msg
|
||||
|
|
@ -9986,6 +10035,20 @@ class MistralModel(LlamaModel):
|
|||
|
||||
return template
|
||||
|
||||
def set_gguf_parameters(self):
|
||||
super().set_gguf_parameters()
|
||||
if "yarn" in self.hparams:
|
||||
yarn_params = self.hparams["yarn"]
|
||||
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.YARN)
|
||||
self.gguf_writer.add_rope_scaling_factor(yarn_params["factor"])
|
||||
self.gguf_writer.add_rope_scaling_yarn_beta_fast(yarn_params["beta"])
|
||||
self.gguf_writer.add_rope_scaling_yarn_beta_slow(yarn_params["alpha"])
|
||||
self.gguf_writer.add_rope_scaling_yarn_log_mul(1.0) # mscale_all_dim
|
||||
self.gguf_writer.add_rope_scaling_orig_ctx_len(yarn_params["original_max_position_embeddings"])
|
||||
|
||||
if "llama_4_scaling" in self.hparams:
|
||||
self.gguf_writer.add_attn_temperature_scale(self.hparams["llama_4_scaling"]["beta"])
|
||||
|
||||
|
||||
class PixtralModel(LlavaVisionModel):
|
||||
model_name = "Pixtral"
|
||||
|
|
|
|||
15
docs/ops.md
15
docs/ops.md
|
|
@ -21,11 +21,11 @@ Legend:
|
|||
| ADD_ID | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ |
|
||||
| ARANGE | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ |
|
||||
| ARGMAX | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ |
|
||||
| ARGSORT | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | 🟡 | ❌ |
|
||||
| ARGSORT | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ |
|
||||
| CEIL | ❌ | ❌ | ✅ | 🟡 | ❌ | ❌ | 🟡 | 🟡 | ❌ |
|
||||
| CLAMP | ❌ | ✅ | ✅ | ✅ | 🟡 | 🟡 | 🟡 | 🟡 | ❌ |
|
||||
| CONCAT | ❌ | ✅ | ✅ | 🟡 | ✅ | 🟡 | ✅ | ✅ | ❌ |
|
||||
| CONT | ❌ | 🟡 | ✅ | ✅ | ✅ | 🟡 | 🟡 | 🟡 | ❌ |
|
||||
| CONT | ❌ | 🟡 | ✅ | ✅ | ✅ | 🟡 | 🟡 | ✅ | ❌ |
|
||||
| CONV_2D | ❌ | ❌ | ✅ | ✅ | ❌ | ✅ | ❌ | ✅ | ❌ |
|
||||
| CONV_2D_DW | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ |
|
||||
| CONV_3D | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
|
||||
|
|
@ -36,10 +36,10 @@ Legend:
|
|||
| CPY | ❌ | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | ❌ |
|
||||
| CROSS_ENTROPY_LOSS | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ |
|
||||
| CROSS_ENTROPY_LOSS_BACK | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ |
|
||||
| CUMSUM | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
|
||||
| CUMSUM | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ |
|
||||
| DIAG_MASK_INF | ❌ | ✅ | ✅ | ✅ | 🟡 | 🟡 | ✅ | ✅ | ❌ |
|
||||
| DIV | ❌ | ✅ | ✅ | ✅ | 🟡 | 🟡 | ✅ | ✅ | ❌ |
|
||||
| DUP | ❌ | ✅ | ✅ | 🟡 | 🟡 | 🟡 | ✅ | 🟡 | ❌ |
|
||||
| DUP | ❌ | ✅ | ✅ | 🟡 | 🟡 | 🟡 | ✅ | ✅ | ❌ |
|
||||
| ELU | ❌ | ✅ | ✅ | 🟡 | 🟡 | ❌ | ✅ | ❌ | ❌ |
|
||||
| EXP | ❌ | ✅ | ✅ | 🟡 | 🟡 | ❌ | ✅ | 🟡 | ❌ |
|
||||
| EXPM1 | ❌ | ❌ | ✅ | 🟡 | ❌ | ❌ | ❌ | ❌ | ❌ |
|
||||
|
|
@ -102,7 +102,7 @@ Legend:
|
|||
| SOFTPLUS | ❌ | ❌ | ✅ | 🟡 | ❌ | ❌ | ❌ | 🟡 | ❌ |
|
||||
| SOFT_MAX | ❌ | 🟡 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ |
|
||||
| SOFT_MAX_BACK | ❌ | ❌ | 🟡 | 🟡 | ❌ | ❌ | 🟡 | ✅ | ❌ |
|
||||
| SOLVE_TRI | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
|
||||
| SOLVE_TRI | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | 🟡 | ❌ |
|
||||
| SQR | ❌ | ✅ | ✅ | ✅ | 🟡 | ❌ | 🟡 | 🟡 | ❌ |
|
||||
| SQRT | ❌ | ✅ | ✅ | ✅ | 🟡 | ❌ | 🟡 | 🟡 | ❌ |
|
||||
| SSM_CONV | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ |
|
||||
|
|
@ -115,7 +115,8 @@ Legend:
|
|||
| SWIGLU_OAI | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | 🟡 | ❌ |
|
||||
| TANH | ❌ | ✅ | ✅ | 🟡 | 🟡 | ✅ | ✅ | 🟡 | ❌ |
|
||||
| TIMESTEP_EMBEDDING | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ |
|
||||
| TRI | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
|
||||
| TOP_K | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | 🟡 | ❌ |
|
||||
| TRI | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ |
|
||||
| TRUNC | ❌ | ❌ | ✅ | 🟡 | ❌ | ❌ | 🟡 | 🟡 | ❌ |
|
||||
| UPSCALE | ❌ | 🟡 | ✅ | ✅ | 🟡 | ✅ | 🟡 | ✅ | ❌ |
|
||||
| UPSCALE | ❌ | 🟡 | ✅ | ✅ | 🟡 | ✅ | 🟡 | 🟡 | ❌ |
|
||||
| XIELU | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
|
||||
|
|
|
|||
|
|
@ -5005,8 +5005,8 @@
|
|||
"Vulkan0","DUP","type=f16,ne=[10,10,5,1],permute=[0,2,1,3]","support","1","yes","Vulkan"
|
||||
"Vulkan0","DUP","type=f32,ne=[10,10,5,1],permute=[1,0,2,3]","support","1","yes","Vulkan"
|
||||
"Vulkan0","DUP","type=f16,ne=[10,10,5,1],permute=[1,0,2,3]","support","1","yes","Vulkan"
|
||||
"Vulkan0","DUP","type=i16,ne=[10,8,3,1],permute=[0,2,1,3]","support","0","no","Vulkan"
|
||||
"Vulkan0","DUP","type=i16,ne=[10,8,3,1],permute=[1,2,0,3]","support","0","no","Vulkan"
|
||||
"Vulkan0","DUP","type=i16,ne=[10,8,3,1],permute=[0,2,1,3]","support","1","yes","Vulkan"
|
||||
"Vulkan0","DUP","type=i16,ne=[10,8,3,1],permute=[1,2,0,3]","support","1","yes","Vulkan"
|
||||
"Vulkan0","SET","type_src=f32,type_dst=f32,ne=[6,5,4,3],dim=1","support","0","no","Vulkan"
|
||||
"Vulkan0","SET","type_src=f32,type_dst=f32,ne=[6,5,4,3],dim=2","support","0","no","Vulkan"
|
||||
"Vulkan0","SET","type_src=f32,type_dst=f32,ne=[6,5,4,3],dim=3","support","0","no","Vulkan"
|
||||
|
|
@ -5032,14 +5032,14 @@
|
|||
"Vulkan0","CPY","type_src=f16,type_dst=f16,ne=[3,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","CPY","type_src=f16,type_dst=f16,ne=[3,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","CPY","type_src=bf16,type_dst=bf16,ne=[1,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","CPY","type_src=bf16,type_dst=bf16,ne=[1,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","Vulkan"
|
||||
"Vulkan0","CPY","type_src=bf16,type_dst=bf16,ne=[1,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","Vulkan"
|
||||
"Vulkan0","CPY","type_src=bf16,type_dst=bf16,ne=[1,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","CPY","type_src=bf16,type_dst=bf16,ne=[1,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","CPY","type_src=bf16,type_dst=bf16,ne=[2,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","CPY","type_src=bf16,type_dst=bf16,ne=[2,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","Vulkan"
|
||||
"Vulkan0","CPY","type_src=bf16,type_dst=bf16,ne=[2,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","Vulkan"
|
||||
"Vulkan0","CPY","type_src=bf16,type_dst=bf16,ne=[2,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","CPY","type_src=bf16,type_dst=bf16,ne=[2,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","CPY","type_src=bf16,type_dst=bf16,ne=[3,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","CPY","type_src=bf16,type_dst=bf16,ne=[3,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","Vulkan"
|
||||
"Vulkan0","CPY","type_src=bf16,type_dst=bf16,ne=[3,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","Vulkan"
|
||||
"Vulkan0","CPY","type_src=bf16,type_dst=bf16,ne=[3,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","CPY","type_src=bf16,type_dst=bf16,ne=[3,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","CPY","type_src=q4_0,type_dst=q4_0,ne=[32,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","CPY","type_src=q4_0,type_dst=q4_0,ne=[32,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","Vulkan"
|
||||
"Vulkan0","CPY","type_src=q4_0,type_dst=q4_0,ne=[32,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","Vulkan"
|
||||
|
|
@ -5271,7 +5271,7 @@
|
|||
"Vulkan0","CPY","type_src=bf16,type_dst=f16,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","Vulkan"
|
||||
"Vulkan0","CPY","type_src=bf16,type_dst=f16,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","Vulkan"
|
||||
"Vulkan0","CPY","type_src=bf16,type_dst=bf16,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","CPY","type_src=bf16,type_dst=bf16,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","Vulkan"
|
||||
"Vulkan0","CPY","type_src=bf16,type_dst=bf16,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","CPY","type_src=bf16,type_dst=q4_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","Vulkan"
|
||||
"Vulkan0","CPY","type_src=bf16,type_dst=q4_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","Vulkan"
|
||||
"Vulkan0","CPY","type_src=bf16,type_dst=q4_1,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","Vulkan"
|
||||
|
|
@ -5415,21 +5415,49 @@
|
|||
"Vulkan0","CPY","type_src=f16,type_dst=f16,ne=[256,4,3,1],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","CPY","type_src=f32,type_dst=f32,ne=[256,4,3,1],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","CPY","type_src=f32,type_dst=f32,ne=[256,4,3,3],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","CPY","type_src=bf16,type_dst=bf16,ne=[256,4,3,1],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=1","support","0","no","Vulkan"
|
||||
"Vulkan0","CPY","type_src=bf16,type_dst=bf16,ne=[256,4,3,1],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","CPY","type_src=f16,type_dst=f16,ne=[256,4,1,1],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","CPY","type_src=f32,type_dst=f32,ne=[256,4,1,1],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","CPY","type_src=bf16,type_dst=bf16,ne=[256,4,1,1],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=1","support","0","no","Vulkan"
|
||||
"Vulkan0","CPY","type_src=bf16,type_dst=bf16,ne=[256,4,1,1],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","CPY","type_src=i32,type_dst=i32,ne=[256,4,1,1],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","CPY","type_src=i32,type_dst=i32,ne=[256,1,4,1],permute_src=[1,2,0,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","CPY","type_src=f32,type_dst=f32,ne=[256,1,4,1],permute_src=[1,2,0,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=f32,ne=[10,10,10,1]","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=f32,ne=[2,1,1,1]","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=f32,ne=[2,1,3,5]","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=f32,ne=[2,3,5,7]","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=f16,ne=[2,1,1,1]","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=f16,ne=[2,1,3,5]","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=f16,ne=[2,3,5,7]","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=bf16,ne=[2,1,1,1]","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=bf16,ne=[2,1,3,5]","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=bf16,ne=[2,3,5,7]","support","0","no","Vulkan"
|
||||
"Vulkan0","CONT","type=f32,ne=[2,1,1,1],use_view_slice=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=f32,ne=[2,1,3,5],use_view_slice=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=f32,ne=[2,3,5,7],use_view_slice=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=f32,ne=[1,4,4,1],use_view_slice=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=f32,ne=[1,8,17,1],use_view_slice=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=f32,ne=[10,10,10,1],use_view_slice=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=f32,ne=[2,1,1,1],use_view_slice=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=f32,ne=[2,1,3,5],use_view_slice=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=f32,ne=[2,3,5,7],use_view_slice=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=f32,ne=[1,4,4,1],use_view_slice=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=f32,ne=[1,8,17,1],use_view_slice=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=f32,ne=[10,10,10,1],use_view_slice=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=i32,ne=[2,1,1,1],use_view_slice=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=i32,ne=[2,1,3,5],use_view_slice=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=i32,ne=[2,3,5,7],use_view_slice=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=i32,ne=[1,4,4,1],use_view_slice=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=i32,ne=[1,8,17,1],use_view_slice=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=i32,ne=[10,10,10,1],use_view_slice=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=i32,ne=[2,1,1,1],use_view_slice=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=i32,ne=[2,1,3,5],use_view_slice=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=i32,ne=[2,3,5,7],use_view_slice=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=i32,ne=[1,4,4,1],use_view_slice=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=i32,ne=[1,8,17,1],use_view_slice=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=i32,ne=[10,10,10,1],use_view_slice=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=f16,ne=[2,1,1,1],use_view_slice=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=f16,ne=[2,1,3,5],use_view_slice=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=f16,ne=[2,3,5,7],use_view_slice=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=f16,ne=[1,4,4,1],use_view_slice=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=f16,ne=[1,8,17,1],use_view_slice=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=f16,ne=[10,10,10,1],use_view_slice=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=bf16,ne=[2,1,1,1],use_view_slice=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=bf16,ne=[2,1,3,5],use_view_slice=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=bf16,ne=[2,3,5,7],use_view_slice=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=bf16,ne=[1,4,4,1],use_view_slice=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=bf16,ne=[1,8,17,1],use_view_slice=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=bf16,ne=[10,10,10,1],use_view_slice=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ADD","type=f16,ne=[1,1,8,1],nr=[1,1,1,1],nf=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","SUB","type=f16,ne=[1,1,8,1],nr=[1,1,1,1],nf=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","MUL","type=f16,ne=[1,1,8,1],nr=[1,1,1,1],nf=1","support","1","yes","Vulkan"
|
||||
|
|
@ -5655,6 +5683,7 @@
|
|||
"Vulkan0","MUL","type=f32,ne=[64,262144,1,1],nr=[1,1,1,1],nf=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","DIV","type=f32,ne=[64,262144,1,1],nr=[1,1,1,1],nf=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","ADD1","type=f32,ne=[10,5,4,3]","support","1","yes","Vulkan"
|
||||
"Vulkan0","ADD1","type=f32,ne=[1024,1024,1,1]","support","1","yes","Vulkan"
|
||||
"Vulkan0","SCALE","type=f32,ne=[10,10,10,10],scale=2.000000,bias=0.000000,inplace=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","SCALE","type=f32,ne=[10,10,10,10],scale=2.000000,bias=1.000000,inplace=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","SCALE","type=f32,ne=[10,10,10,10],scale=2.000000,bias=1.000000,inplace=1","support","1","yes","Vulkan"
|
||||
|
|
@ -8644,9 +8673,13 @@
|
|||
"Vulkan0","CLAMP","type=f16,ne=[7,1,5,3],min=-0.500000,max=0.500000","support","0","no","Vulkan"
|
||||
"Vulkan0","LEAKY_RELU","type=f16,ne_a=[7,1,5,3],negative_slope=0.100000","support","0","no","Vulkan"
|
||||
"Vulkan0","FLOOR","type=f16,ne=[7,1,5,3]","support","1","yes","Vulkan"
|
||||
"Vulkan0","FLOOR","type=f16,ne=[1024,1024,1,1]","support","1","yes","Vulkan"
|
||||
"Vulkan0","CEIL","type=f16,ne=[7,1,5,3]","support","1","yes","Vulkan"
|
||||
"Vulkan0","CEIL","type=f16,ne=[1024,1024,1,1]","support","1","yes","Vulkan"
|
||||
"Vulkan0","ROUND","type=f16,ne=[7,1,5,3]","support","1","yes","Vulkan"
|
||||
"Vulkan0","ROUND","type=f16,ne=[1024,1024,1,1]","support","1","yes","Vulkan"
|
||||
"Vulkan0","TRUNC","type=f16,ne=[7,1,5,3]","support","1","yes","Vulkan"
|
||||
"Vulkan0","TRUNC","type=f16,ne=[1024,1024,1,1]","support","1","yes","Vulkan"
|
||||
"Vulkan0","SQR","type=f32,ne=[10,5,4,3]","support","1","yes","Vulkan"
|
||||
"Vulkan0","SQRT","type=f32,ne=[10,3,3,2]","support","1","yes","Vulkan"
|
||||
"Vulkan0","LOG","type=f32,ne=[10,5,4,3]","support","1","yes","Vulkan"
|
||||
|
|
@ -8666,9 +8699,13 @@
|
|||
"Vulkan0","CLAMP","type=f32,ne=[7,1,5,3],min=-0.500000,max=0.500000","support","1","yes","Vulkan"
|
||||
"Vulkan0","LEAKY_RELU","type=f32,ne_a=[7,1,5,3],negative_slope=0.100000","support","1","yes","Vulkan"
|
||||
"Vulkan0","FLOOR","type=f32,ne=[7,1,5,3]","support","1","yes","Vulkan"
|
||||
"Vulkan0","FLOOR","type=f32,ne=[1024,1024,1,1]","support","1","yes","Vulkan"
|
||||
"Vulkan0","CEIL","type=f32,ne=[7,1,5,3]","support","1","yes","Vulkan"
|
||||
"Vulkan0","CEIL","type=f32,ne=[1024,1024,1,1]","support","1","yes","Vulkan"
|
||||
"Vulkan0","ROUND","type=f32,ne=[7,1,5,3]","support","1","yes","Vulkan"
|
||||
"Vulkan0","ROUND","type=f32,ne=[1024,1024,1,1]","support","1","yes","Vulkan"
|
||||
"Vulkan0","TRUNC","type=f32,ne=[7,1,5,3]","support","1","yes","Vulkan"
|
||||
"Vulkan0","TRUNC","type=f32,ne=[1024,1024,1,1]","support","1","yes","Vulkan"
|
||||
"Vulkan0","DIAG_MASK_INF","type=f32,ne=[10,10,1,1],n_past=5","support","1","yes","Vulkan"
|
||||
"Vulkan0","DIAG_MASK_INF","type=f32,ne=[10,10,3,1],n_past=5","support","1","yes","Vulkan"
|
||||
"Vulkan0","DIAG_MASK_INF","type=f32,ne=[10,10,3,2],n_past=5","support","1","yes","Vulkan"
|
||||
|
|
@ -9411,28 +9448,405 @@
|
|||
"Vulkan0","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[3,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[4,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[7,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[8,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[15,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[16,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[31,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[32,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[63,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[64,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[127,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[128,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[255,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[256,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[511,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[512,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[1023,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[1024,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[2047,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[2048,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[4095,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[4096,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[8191,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[8192,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[16383,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[16384,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[32767,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[32768,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[65535,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[65536,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[131071,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[131072,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[262143,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[262144,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[524287,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[524288,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[1048575,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[1048576,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[16,10,10,10],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[60,10,10,10],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[1023,2,1,3],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[1024,2,1,3],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[1025,2,1,3],order=0","support","0","no","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[16384,1,1,1],order=0","support","0","no","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[2047,2,1,3],order=0","support","0","no","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[2048,2,1,3],order=0","support","0","no","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[2049,2,1,3],order=0","support","0","no","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[1025,2,1,3],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[2047,2,1,3],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[2048,2,1,3],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[2049,2,1,3],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[2,8,8192,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[8,1,1,1],order=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[3,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[4,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[7,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[8,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[15,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[16,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[31,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[32,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[63,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[64,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[127,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[128,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[255,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[256,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[511,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[512,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[1023,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[1024,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[2047,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[2048,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[4095,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[4096,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[8191,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[8192,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[16383,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[16384,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[32767,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[32768,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[65535,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[65536,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[131071,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[131072,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[262143,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[262144,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[524287,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[524288,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[1048575,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[1048576,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[16,10,10,10],order=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[60,10,10,10],order=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[1023,2,1,3],order=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[1024,2,1,3],order=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[1025,2,1,3],order=1","support","0","no","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[16384,1,1,1],order=1","support","0","no","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[2047,2,1,3],order=1","support","0","no","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[2048,2,1,3],order=1","support","0","no","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[2049,2,1,3],order=1","support","0","no","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[1025,2,1,3],order=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[2047,2,1,3],order=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[2048,2,1,3],order=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[2049,2,1,3],order=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[2,8,8192,1],order=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[1,1,1,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[12,1,2,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[2,1,1,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[13,1,2,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[2,1,1,1],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[13,1,2,1],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[4,1,1,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[15,1,2,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[4,1,1,1],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[15,1,2,1],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[4,1,1,1],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[15,1,2,1],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[8,1,1,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[19,1,2,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[8,1,1,1],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[19,1,2,1],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[8,1,1,1],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[19,1,2,1],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[8,1,1,1],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[19,1,2,1],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[16,1,1,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[27,1,2,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[16,1,1,1],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[27,1,2,1],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[16,1,1,1],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[27,1,2,1],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[16,1,1,1],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[27,1,2,1],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[16,1,1,1],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[27,1,2,1],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[32,1,1,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[43,1,2,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[32,1,1,1],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[43,1,2,1],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[32,1,1,1],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[43,1,2,1],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[32,1,1,1],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[43,1,2,1],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[32,1,1,1],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[43,1,2,1],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[64,1,1,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[75,1,2,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[64,1,1,1],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[75,1,2,1],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[64,1,1,1],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[75,1,2,1],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[64,1,1,1],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[75,1,2,1],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[64,1,1,1],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[75,1,2,1],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[128,1,1,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[139,1,2,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[128,1,1,1],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[139,1,2,1],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[128,1,1,1],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[139,1,2,1],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[128,1,1,1],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[139,1,2,1],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[128,1,1,1],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[139,1,2,1],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[128,1,1,1],k=100","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[139,1,2,1],k=100","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[256,1,1,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[267,1,2,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[256,1,1,1],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[267,1,2,1],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[256,1,1,1],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[267,1,2,1],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[256,1,1,1],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[267,1,2,1],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[256,1,1,1],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[267,1,2,1],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[256,1,1,1],k=100","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[267,1,2,1],k=100","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[512,1,1,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[523,1,2,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[512,1,1,1],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[523,1,2,1],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[512,1,1,1],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[523,1,2,1],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[512,1,1,1],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[523,1,2,1],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[512,1,1,1],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[523,1,2,1],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[512,1,1,1],k=100","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[523,1,2,1],k=100","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[512,1,1,1],k=500","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[523,1,2,1],k=500","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[1024,1,1,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[1035,1,2,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[1024,1,1,1],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[1035,1,2,1],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[1024,1,1,1],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[1035,1,2,1],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[1024,1,1,1],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[1035,1,2,1],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[1024,1,1,1],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[1035,1,2,1],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[1024,1,1,1],k=100","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[1035,1,2,1],k=100","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[1024,1,1,1],k=500","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[1035,1,2,1],k=500","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[1024,1,1,1],k=1023","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[1035,1,2,1],k=1023","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[2048,1,1,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[2059,1,2,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[2048,1,1,1],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[2059,1,2,1],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[2048,1,1,1],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[2059,1,2,1],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[2048,1,1,1],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[2059,1,2,1],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[2048,1,1,1],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[2059,1,2,1],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[2048,1,1,1],k=100","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[2059,1,2,1],k=100","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[2048,1,1,1],k=500","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[2059,1,2,1],k=500","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[2048,1,1,1],k=1023","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[2059,1,2,1],k=1023","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[4096,1,1,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[4107,1,2,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[4096,1,1,1],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[4107,1,2,1],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[4096,1,1,1],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[4107,1,2,1],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[4096,1,1,1],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[4107,1,2,1],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[4096,1,1,1],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[4107,1,2,1],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[4096,1,1,1],k=100","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[4107,1,2,1],k=100","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[4096,1,1,1],k=500","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[4107,1,2,1],k=500","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[4096,1,1,1],k=1023","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[4107,1,2,1],k=1023","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[8192,1,1,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[8203,1,2,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[8192,1,1,1],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[8203,1,2,1],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[8192,1,1,1],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[8203,1,2,1],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[8192,1,1,1],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[8203,1,2,1],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[8192,1,1,1],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[8203,1,2,1],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[8192,1,1,1],k=100","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[8203,1,2,1],k=100","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[8192,1,1,1],k=500","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[8203,1,2,1],k=500","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[8192,1,1,1],k=1023","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[8203,1,2,1],k=1023","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[16384,1,1,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[16395,1,2,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[16384,1,1,1],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[16395,1,2,1],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[16384,1,1,1],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[16395,1,2,1],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[16384,1,1,1],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[16395,1,2,1],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[16384,1,1,1],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[16395,1,2,1],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[16384,1,1,1],k=100","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[16395,1,2,1],k=100","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[16384,1,1,1],k=500","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[16395,1,2,1],k=500","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[16384,1,1,1],k=1023","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[16395,1,2,1],k=1023","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[16384,1,1,1],k=9999","support","0","no","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[16395,1,2,1],k=9999","support","0","no","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[32768,1,1,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[32779,1,2,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[32768,1,1,1],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[32779,1,2,1],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[32768,1,1,1],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[32779,1,2,1],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[32768,1,1,1],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[32779,1,2,1],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[32768,1,1,1],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[32779,1,2,1],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[32768,1,1,1],k=100","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[32779,1,2,1],k=100","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[32768,1,1,1],k=500","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[32779,1,2,1],k=500","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[32768,1,1,1],k=1023","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[32779,1,2,1],k=1023","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[32768,1,1,1],k=9999","support","0","no","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[32779,1,2,1],k=9999","support","0","no","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[65536,1,1,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[65547,1,2,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[65536,1,1,1],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[65547,1,2,1],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[65536,1,1,1],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[65547,1,2,1],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[65536,1,1,1],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[65547,1,2,1],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[65536,1,1,1],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[65547,1,2,1],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[65536,1,1,1],k=100","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[65547,1,2,1],k=100","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[65536,1,1,1],k=500","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[65547,1,2,1],k=500","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[65536,1,1,1],k=1023","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[65547,1,2,1],k=1023","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[65536,1,1,1],k=9999","support","0","no","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[65547,1,2,1],k=9999","support","0","no","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[131072,1,1,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[131083,1,2,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[131072,1,1,1],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[131083,1,2,1],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[131072,1,1,1],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[131083,1,2,1],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[131072,1,1,1],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[131083,1,2,1],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[131072,1,1,1],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[131083,1,2,1],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[131072,1,1,1],k=100","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[131083,1,2,1],k=100","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[131072,1,1,1],k=500","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[131083,1,2,1],k=500","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[131072,1,1,1],k=1023","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[131083,1,2,1],k=1023","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[131072,1,1,1],k=9999","support","0","no","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[131083,1,2,1],k=9999","support","0","no","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[262144,1,1,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[262155,1,2,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[262144,1,1,1],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[262155,1,2,1],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[262144,1,1,1],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[262155,1,2,1],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[262144,1,1,1],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[262155,1,2,1],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[262144,1,1,1],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[262155,1,2,1],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[262144,1,1,1],k=100","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[262155,1,2,1],k=100","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[262144,1,1,1],k=500","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[262155,1,2,1],k=500","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[262144,1,1,1],k=1023","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[262155,1,2,1],k=1023","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[262144,1,1,1],k=9999","support","0","no","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[262155,1,2,1],k=9999","support","0","no","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[524288,1,1,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[524299,1,2,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[524288,1,1,1],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[524299,1,2,1],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[524288,1,1,1],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[524299,1,2,1],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[524288,1,1,1],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[524299,1,2,1],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[524288,1,1,1],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[524299,1,2,1],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[524288,1,1,1],k=100","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[524299,1,2,1],k=100","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[524288,1,1,1],k=500","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[524299,1,2,1],k=500","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[524288,1,1,1],k=1023","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[524299,1,2,1],k=1023","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[524288,1,1,1],k=9999","support","0","no","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[524299,1,2,1],k=9999","support","0","no","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[16,10,10,10],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[60,10,10,10],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[1023,2,1,3],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[1024,2,1,3],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[1025,2,1,3],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[16384,1,1,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[2047,2,1,3],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[2048,2,1,3],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[2049,2,1,3],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[16,10,10,10],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[60,10,10,10],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[1023,2,1,3],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[1024,2,1,3],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[1025,2,1,3],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[16384,1,1,1],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[2047,2,1,3],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[2048,2,1,3],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[2049,2,1,3],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[16,10,10,10],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[60,10,10,10],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[1023,2,1,3],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[1024,2,1,3],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[1025,2,1,3],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[16384,1,1,1],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[2047,2,1,3],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[2048,2,1,3],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[2049,2,1,3],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[16,10,10,10],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[60,10,10,10],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[1023,2,1,3],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[1024,2,1,3],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[1025,2,1,3],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[16384,1,1,1],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[2047,2,1,3],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[2048,2,1,3],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[2049,2,1,3],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[16,10,10,10],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[60,10,10,10],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[1023,2,1,3],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[1024,2,1,3],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[1025,2,1,3],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[16384,1,1,1],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[2047,2,1,3],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[2048,2,1,3],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[2049,2,1,3],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=nearest,transpose=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=nearest,transpose=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=nearest,flags=none","support","1","yes","Vulkan"
|
||||
|
|
@ -9445,6 +9859,10 @@
|
|||
"Vulkan0","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=bicubic,transpose=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=bicubic,flags=none","support","1","yes","Vulkan"
|
||||
"Vulkan0","UPSCALE","type=f32,ne=[5,7,11,13],ne_tgt=[2,5,7,11],mode=bicubic,flags=none","support","1","yes","Vulkan"
|
||||
"Vulkan0","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=513,transpose=0","support","0","no","Vulkan"
|
||||
"Vulkan0","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=513,transpose=1","support","0","no","Vulkan"
|
||||
"Vulkan0","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=bilinear,flags=none","support","0","no","Vulkan"
|
||||
"Vulkan0","UPSCALE","type=f32,ne=[5,7,11,13],ne_tgt=[2,5,7,11],mode=bilinear,flags=none","support","0","no","Vulkan"
|
||||
"Vulkan0","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=bilinear,flags=align_corners","support","1","yes","Vulkan"
|
||||
"Vulkan0","UPSCALE","type=f32,ne=[1,4,3,2],ne_tgt=[2,8,3,2],mode=bilinear,flags=align_corners","support","1","yes","Vulkan"
|
||||
"Vulkan0","UPSCALE","type=f32,ne=[4,1,3,2],ne_tgt=[1,1,3,2],mode=bilinear,flags=align_corners","support","1","yes","Vulkan"
|
||||
|
|
@ -9479,23 +9897,37 @@
|
|||
"Vulkan0","PAD_REFLECT_1D","type=f32,ne_a=[3000,384,4,1],pad_0=10,pad_1=9","support","0","no","Vulkan"
|
||||
"Vulkan0","ROLL","shift0=3,shift1=-2,shift3=1,shift4=-1","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARANGE","type=f32,start=0.000000,stop=10.000000,step=1.000000","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARANGE","type=f32,start=0.000000,stop=1048576.000000,step=1.000000","support","1","yes","Vulkan"
|
||||
"Vulkan0","TIMESTEP_EMBEDDING","type=f32,ne_a=[2,1,1,1],dim=320,max_period=10000","support","1","yes","Vulkan"
|
||||
"Vulkan0","LEAKY_RELU","type=f32,ne_a=[10,5,4,3],negative_slope=0.100000","support","1","yes","Vulkan"
|
||||
"Vulkan0","CUMSUM","type=f32,ne=[10,5,4,3]","support","0","no","Vulkan"
|
||||
"Vulkan0","CUMSUM","type=f32,ne=[10,5,4,3]","support","1","yes","Vulkan"
|
||||
"Vulkan0","CUMSUM","type=f32,ne=[127,5,4,3]","support","1","yes","Vulkan"
|
||||
"Vulkan0","CUMSUM","type=f32,ne=[128,5,4,3]","support","1","yes","Vulkan"
|
||||
"Vulkan0","CUMSUM","type=f32,ne=[255,5,4,3]","support","1","yes","Vulkan"
|
||||
"Vulkan0","CUMSUM","type=f32,ne=[256,5,4,3]","support","1","yes","Vulkan"
|
||||
"Vulkan0","CUMSUM","type=f32,ne=[511,5,4,3]","support","1","yes","Vulkan"
|
||||
"Vulkan0","CUMSUM","type=f32,ne=[512,5,4,3]","support","1","yes","Vulkan"
|
||||
"Vulkan0","CUMSUM","type=f32,ne=[1023,5,4,3]","support","1","yes","Vulkan"
|
||||
"Vulkan0","CUMSUM","type=f32,ne=[1024,5,4,3]","support","1","yes","Vulkan"
|
||||
"Vulkan0","CUMSUM","type=f32,ne=[2047,5,4,3]","support","1","yes","Vulkan"
|
||||
"Vulkan0","CUMSUM","type=f32,ne=[2048,5,4,3]","support","1","yes","Vulkan"
|
||||
"Vulkan0","CUMSUM","type=f32,ne=[242004,1,1,1]","support","1","yes","Vulkan"
|
||||
"Vulkan0","CUMSUM","type=f32,ne=[375960,1,1,1]","support","1","yes","Vulkan"
|
||||
"Vulkan0","XIELU","type=f32,ne=[10,5,4,3]","support","0","no","Vulkan"
|
||||
"Vulkan0","TRI","type=f32,ne=[10,10,4,3],tri_type=3","support","0","no","Vulkan"
|
||||
"Vulkan0","TRI","type=f32,ne=[10,10,4,3],tri_type=2","support","0","no","Vulkan"
|
||||
"Vulkan0","TRI","type=f32,ne=[10,10,4,3],tri_type=1","support","0","no","Vulkan"
|
||||
"Vulkan0","TRI","type=f32,ne=[10,10,4,3],tri_type=0","support","0","no","Vulkan"
|
||||
"Vulkan0","TRI","type=f32,ne=[10,10,4,3],tri_type=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TRI","type=f32,ne=[10,10,4,3],tri_type=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TRI","type=f32,ne=[10,10,4,3],tri_type=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TRI","type=f32,ne=[10,10,4,3],tri_type=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","FILL","type=f32,ne=[10,10,4,3],c=0.000000","support","1","yes","Vulkan"
|
||||
"Vulkan0","FILL","type=f32,ne=[303,207,11,3],c=2.000000","support","1","yes","Vulkan"
|
||||
"Vulkan0","FILL","type=f32,ne=[800,600,4,4],c=-152.000000","support","1","yes","Vulkan"
|
||||
"Vulkan0","SOLVE_TRI","type=f32,ne_lhs=[10,10,4,3],ne_rhs=[3,10,4,3]","support","0","no","Vulkan"
|
||||
"Vulkan0","SOLVE_TRI","type=f32,ne_lhs=[11,11,1,1],ne_rhs=[5,11,1,1]","support","0","no","Vulkan"
|
||||
"Vulkan0","SOLVE_TRI","type=f32,ne_lhs=[17,17,2,4],ne_rhs=[9,17,2,4]","support","0","no","Vulkan"
|
||||
"Vulkan0","SOLVE_TRI","type=f32,ne_lhs=[30,30,7,1],ne_rhs=[8,30,7,1]","support","0","no","Vulkan"
|
||||
"Vulkan0","SOLVE_TRI","type=f32,ne_lhs=[42,42,5,2],ne_rhs=[10,42,5,2]","support","0","no","Vulkan"
|
||||
"Vulkan0","SOLVE_TRI","type=f32,ne_lhs=[64,64,2,2],ne_rhs=[10,64,2,2]","support","0","no","Vulkan"
|
||||
"Vulkan0","FILL","type=f32,ne=[2048,512,2,2],c=3.500000","support","1","yes","Vulkan"
|
||||
"Vulkan0","SOLVE_TRI","type=f32,ne_lhs=[10,10,4,3],ne_rhs=[3,10,4,3]","support","1","yes","Vulkan"
|
||||
"Vulkan0","SOLVE_TRI","type=f32,ne_lhs=[11,11,1,1],ne_rhs=[5,11,1,1]","support","1","yes","Vulkan"
|
||||
"Vulkan0","SOLVE_TRI","type=f32,ne_lhs=[17,17,2,4],ne_rhs=[9,17,2,4]","support","1","yes","Vulkan"
|
||||
"Vulkan0","SOLVE_TRI","type=f32,ne_lhs=[30,30,7,1],ne_rhs=[8,30,7,1]","support","1","yes","Vulkan"
|
||||
"Vulkan0","SOLVE_TRI","type=f32,ne_lhs=[42,42,5,2],ne_rhs=[10,42,5,2]","support","1","yes","Vulkan"
|
||||
"Vulkan0","SOLVE_TRI","type=f32,ne_lhs=[64,64,2,2],ne_rhs=[10,64,2,2]","support","1","yes","Vulkan"
|
||||
"Vulkan0","SOLVE_TRI","type=f32,ne_lhs=[100,100,4,4],ne_rhs=[41,100,4,4]","support","0","no","Vulkan"
|
||||
"Vulkan0","PAD","type=f32,ne_a=[512,512,1,1],lp0=0,rp0=1,lp1=0,rp1=1,lp2=0,rp2=0,lp3=0,rp3=0,v=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","PAD","type=f32,ne_a=[11,22,33,44],lp0=1,rp0=2,lp1=3,rp1=4,lp2=5,rp2=6,lp3=7,rp3=8,v=0","support","1","yes","Vulkan"
|
||||
|
|
|
|||
|
Can't render this file because it is too large.
|
|
|
@ -408,62 +408,67 @@ if (MSVC)
|
|||
/wd4996 # Disable POSIX deprecation warnings
|
||||
/wd4702 # Unreachable code warnings
|
||||
)
|
||||
function(disable_msvc_warnings target_name)
|
||||
set(MSVC_COMPILE_OPTIONS
|
||||
"$<$<COMPILE_LANGUAGE:C>:/utf-8>"
|
||||
"$<$<COMPILE_LANGUAGE:CXX>:/utf-8>"
|
||||
)
|
||||
function(configure_msvc_target target_name)
|
||||
if(TARGET ${target_name})
|
||||
target_compile_options(${target_name} PRIVATE ${MSVC_WARNING_FLAGS})
|
||||
target_compile_options(${target_name} PRIVATE ${MSVC_COMPILE_OPTIONS})
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
disable_msvc_warnings(ggml-base)
|
||||
disable_msvc_warnings(ggml)
|
||||
disable_msvc_warnings(ggml-cpu)
|
||||
disable_msvc_warnings(ggml-cpu-x64)
|
||||
disable_msvc_warnings(ggml-cpu-sse42)
|
||||
disable_msvc_warnings(ggml-cpu-sandybridge)
|
||||
disable_msvc_warnings(ggml-cpu-haswell)
|
||||
disable_msvc_warnings(ggml-cpu-skylakex)
|
||||
disable_msvc_warnings(ggml-cpu-icelake)
|
||||
disable_msvc_warnings(ggml-cpu-alderlake)
|
||||
configure_msvc_target(ggml-base)
|
||||
configure_msvc_target(ggml)
|
||||
configure_msvc_target(ggml-cpu)
|
||||
configure_msvc_target(ggml-cpu-x64)
|
||||
configure_msvc_target(ggml-cpu-sse42)
|
||||
configure_msvc_target(ggml-cpu-sandybridge)
|
||||
configure_msvc_target(ggml-cpu-haswell)
|
||||
configure_msvc_target(ggml-cpu-skylakex)
|
||||
configure_msvc_target(ggml-cpu-icelake)
|
||||
configure_msvc_target(ggml-cpu-alderlake)
|
||||
|
||||
if (GGML_BUILD_EXAMPLES)
|
||||
disable_msvc_warnings(common-ggml)
|
||||
disable_msvc_warnings(common)
|
||||
configure_msvc_target(common-ggml)
|
||||
configure_msvc_target(common)
|
||||
|
||||
disable_msvc_warnings(mnist-common)
|
||||
disable_msvc_warnings(mnist-eval)
|
||||
disable_msvc_warnings(mnist-train)
|
||||
configure_msvc_target(mnist-common)
|
||||
configure_msvc_target(mnist-eval)
|
||||
configure_msvc_target(mnist-train)
|
||||
|
||||
disable_msvc_warnings(gpt-2-ctx)
|
||||
disable_msvc_warnings(gpt-2-alloc)
|
||||
disable_msvc_warnings(gpt-2-backend)
|
||||
disable_msvc_warnings(gpt-2-sched)
|
||||
disable_msvc_warnings(gpt-2-quantize)
|
||||
disable_msvc_warnings(gpt-2-batched)
|
||||
configure_msvc_target(gpt-2-ctx)
|
||||
configure_msvc_target(gpt-2-alloc)
|
||||
configure_msvc_target(gpt-2-backend)
|
||||
configure_msvc_target(gpt-2-sched)
|
||||
configure_msvc_target(gpt-2-quantize)
|
||||
configure_msvc_target(gpt-2-batched)
|
||||
|
||||
disable_msvc_warnings(gpt-j)
|
||||
disable_msvc_warnings(gpt-j-quantize)
|
||||
configure_msvc_target(gpt-j)
|
||||
configure_msvc_target(gpt-j-quantize)
|
||||
|
||||
disable_msvc_warnings(magika)
|
||||
disable_msvc_warnings(yolov3-tiny)
|
||||
disable_msvc_warnings(sam)
|
||||
configure_msvc_target(magika)
|
||||
configure_msvc_target(yolov3-tiny)
|
||||
configure_msvc_target(sam)
|
||||
|
||||
disable_msvc_warnings(simple-ctx)
|
||||
disable_msvc_warnings(simple-backend)
|
||||
configure_msvc_target(simple-ctx)
|
||||
configure_msvc_target(simple-backend)
|
||||
endif()
|
||||
|
||||
if (GGML_BUILD_TESTS)
|
||||
disable_msvc_warnings(test-mul-mat)
|
||||
disable_msvc_warnings(test-arange)
|
||||
disable_msvc_warnings(test-backend-ops)
|
||||
disable_msvc_warnings(test-cont)
|
||||
disable_msvc_warnings(test-conv-transpose)
|
||||
disable_msvc_warnings(test-conv-transpose-1d)
|
||||
disable_msvc_warnings(test-conv1d)
|
||||
disable_msvc_warnings(test-conv2d)
|
||||
disable_msvc_warnings(test-conv2d-dw)
|
||||
disable_msvc_warnings(test-customop)
|
||||
disable_msvc_warnings(test-dup)
|
||||
disable_msvc_warnings(test-opt)
|
||||
disable_msvc_warnings(test-pool)
|
||||
configure_msvc_target(test-mul-mat)
|
||||
configure_msvc_target(test-arange)
|
||||
configure_msvc_target(test-backend-ops)
|
||||
configure_msvc_target(test-cont)
|
||||
configure_msvc_target(test-conv-transpose)
|
||||
configure_msvc_target(test-conv-transpose-1d)
|
||||
configure_msvc_target(test-conv1d)
|
||||
configure_msvc_target(test-conv2d)
|
||||
configure_msvc_target(test-conv2d-dw)
|
||||
configure_msvc_target(test-customop)
|
||||
configure_msvc_target(test-dup)
|
||||
configure_msvc_target(test-opt)
|
||||
configure_msvc_target(test-pool)
|
||||
endif ()
|
||||
endif()
|
||||
|
|
|
|||
|
|
@ -2148,7 +2148,8 @@ extern "C" {
|
|||
};
|
||||
|
||||
enum ggml_scale_flag {
|
||||
GGML_SCALE_FLAG_ALIGN_CORNERS = (1 << 8)
|
||||
GGML_SCALE_FLAG_ALIGN_CORNERS = (1 << 8),
|
||||
GGML_SCALE_FLAG_ANTIALIAS = (1 << 9),
|
||||
};
|
||||
|
||||
// interpolate
|
||||
|
|
|
|||
|
|
@ -723,6 +723,12 @@ struct ggml_backend_sched {
|
|||
bool op_offload;
|
||||
|
||||
int debug;
|
||||
|
||||
// used for debugging graph reallocations [GGML_SCHED_DEBUG_REALLOC]
|
||||
// ref: https://github.com/ggml-org/llama.cpp/pull/17617
|
||||
int debug_realloc;
|
||||
int debug_graph_size;
|
||||
int debug_prev_graph_size;
|
||||
};
|
||||
|
||||
#define hash_id(tensor) ggml_hash_find_or_insert(&sched->hash_set, tensor)
|
||||
|
|
@ -1234,10 +1240,8 @@ void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct ggml_cgra
|
|||
tensor_copy = ggml_dup_tensor_layout(sched->ctx, src);
|
||||
ggml_format_name(tensor_copy, "%s#%s#%d", ggml_backend_name(backend), src->name, c);
|
||||
}
|
||||
if (sched->n_copies > 1) {
|
||||
ggml_set_input(tensor_copy);
|
||||
ggml_set_output(tensor_copy); // prevent ggml-alloc from overwriting the tensor
|
||||
}
|
||||
ggml_set_input(tensor_copy);
|
||||
ggml_set_output(tensor_copy); // prevent ggml-alloc from overwriting the tensor
|
||||
tensor_id_copy(src_id, src_backend_id, c) = tensor_copy;
|
||||
SET_CAUSE(tensor_copy, "4.cpy");
|
||||
}
|
||||
|
|
@ -1289,6 +1293,11 @@ void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct ggml_cgra
|
|||
}
|
||||
|
||||
int graph_size = std::max(graph->n_nodes, graph->n_leafs) + sched->n_splits*GGML_SCHED_MAX_SPLIT_INPUTS*2*sched->n_copies;
|
||||
|
||||
// remember the actual graph_size for performing reallocation checks later [GGML_SCHED_DEBUG_REALLOC]
|
||||
sched->debug_prev_graph_size = sched->debug_graph_size;
|
||||
sched->debug_graph_size = graph_size;
|
||||
|
||||
if (sched->graph.size < graph_size) {
|
||||
sched->graph.size = graph_size;
|
||||
sched->graph.nodes = (ggml_tensor **) realloc(sched->graph.nodes, graph_size * sizeof(struct ggml_tensor *));
|
||||
|
|
@ -1395,14 +1404,21 @@ static bool ggml_backend_sched_alloc_splits(ggml_backend_sched_t sched) {
|
|||
|
||||
// allocate graph
|
||||
if (backend_ids_changed || !ggml_gallocr_alloc_graph(sched->galloc, &sched->graph)) {
|
||||
#ifdef GGML_SCHED_NO_REALLOC
|
||||
GGML_ABORT("%s: failed to allocate graph, but graph re-allocation is disabled by GGML_SCHED_NO_REALLOC\n", __func__);
|
||||
#endif
|
||||
|
||||
#ifndef NDEBUG
|
||||
GGML_LOG_DEBUG("%s: failed to allocate graph, reserving (backend_ids_changed = %d)\n", __func__, backend_ids_changed);
|
||||
#endif
|
||||
|
||||
if (sched->debug_realloc > 0) {
|
||||
// we are interested only in situations where the graph was reallocated even though its size remained the same [GGML_SCHED_DEBUG_REALLOC]
|
||||
// example: https://github.com/ggml-org/llama.cpp/pull/17143
|
||||
const bool unexpected = !backend_ids_changed && sched->debug_prev_graph_size == sched->debug_graph_size;
|
||||
|
||||
if (unexpected || sched->debug_realloc > 1) {
|
||||
GGML_ABORT("%s: unexpected graph reallocation (graph size = %d, nodes = %d, leafs = %d), debug_realloc = %d\n", __func__,
|
||||
sched->debug_graph_size, sched->graph.n_nodes, sched->graph.n_leafs, sched->debug_realloc);
|
||||
}
|
||||
}
|
||||
|
||||
// the re-allocation may cause the split inputs to be moved to a different address
|
||||
// synchronize without ggml_backend_sched_synchronize to avoid changing cur_copy
|
||||
for (int i = 0; i < sched->n_backends; i++) {
|
||||
|
|
@ -1620,6 +1636,14 @@ ggml_backend_sched_t ggml_backend_sched_new(
|
|||
|
||||
const char * GGML_SCHED_DEBUG = getenv("GGML_SCHED_DEBUG");
|
||||
sched->debug = GGML_SCHED_DEBUG ? atoi(GGML_SCHED_DEBUG) : 0;
|
||||
|
||||
sched->debug_realloc = 0;
|
||||
#ifdef GGML_SCHED_NO_REALLOC
|
||||
sched->debug_realloc = 1;
|
||||
#endif
|
||||
const char * GGML_SCHED_DEBUG_REALLOC = getenv("GGML_SCHED_DEBUG_REALLOC");
|
||||
sched->debug_realloc = GGML_SCHED_DEBUG_REALLOC ? atoi(GGML_SCHED_DEBUG_REALLOC) : sched->debug_realloc;
|
||||
|
||||
sched->n_backends = n_backends;
|
||||
sched->n_copies = parallel ? GGML_SCHED_MAX_COPIES : 1;
|
||||
|
||||
|
|
@ -1636,6 +1660,9 @@ ggml_backend_sched_t ggml_backend_sched_new(
|
|||
sched->prev_node_backend_ids = (int *) calloc(nodes_size, sizeof(sched->prev_node_backend_ids[0]));
|
||||
sched->prev_leaf_backend_ids = (int *) calloc(nodes_size, sizeof(sched->prev_leaf_backend_ids[0]));
|
||||
|
||||
sched->debug_graph_size = 0;
|
||||
sched->debug_prev_graph_size = 0;
|
||||
|
||||
sched->context_buffer_size = ggml_sched_max_splits*GGML_SCHED_MAX_SPLIT_INPUTS*2*sizeof(struct ggml_tensor) + ggml_graph_overhead_custom(graph_size, false);
|
||||
sched->context_buffer = (char *) malloc(sched->context_buffer_size);
|
||||
|
||||
|
|
|
|||
|
|
@ -2500,6 +2500,9 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev, const ggml_ten
|
|||
if (op->op_params[0] != GGML_SCALE_MODE_NEAREST) {
|
||||
return false;
|
||||
}
|
||||
if (op->op_params[0] & GGML_SCALE_FLAG_ANTIALIAS) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
case GGML_OP_POOL_2D:
|
||||
|
|
@ -2561,6 +2564,10 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev, const ggml_ten
|
|||
return true;
|
||||
case GGML_OP_OUT_PROD:
|
||||
{
|
||||
#ifdef ASCEND_310P
|
||||
// Ger is not supported on 310p device
|
||||
return false;
|
||||
#endif
|
||||
switch (op->src[0]->type) {
|
||||
case GGML_TYPE_F16:
|
||||
case GGML_TYPE_F32:
|
||||
|
|
|
|||
|
|
@ -8,6 +8,10 @@
|
|||
#include <sys/sysctl.h>
|
||||
#endif
|
||||
|
||||
#if !defined(HWCAP2_SVE2)
|
||||
#define HWCAP2_SVE2 (1 << 1)
|
||||
#endif
|
||||
|
||||
#if !defined(HWCAP2_I8MM)
|
||||
#define HWCAP2_I8MM (1 << 13)
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -683,22 +683,14 @@ bool ggml_is_numa(void) {
|
|||
}
|
||||
|
||||
#if defined(__ARM_ARCH)
|
||||
|
||||
#if defined(__linux__) && defined(__aarch64__)
|
||||
#include <sys/auxv.h>
|
||||
#endif
|
||||
|
||||
static void ggml_init_arm_arch_features(void) {
|
||||
#if defined(__aarch64__) && defined(__ARM_FEATURE_SVE)
|
||||
#if defined(__linux__)
|
||||
ggml_arm_arch_features.sve_cnt = PR_SVE_VL_LEN_MASK & prctl(PR_SVE_GET_VL);
|
||||
#else
|
||||
// TODO: add support of SVE for non-linux systems
|
||||
#error "TODO: SVE is not supported on this platform. To use SVE, sve_cnt needs to be initialized here."
|
||||
#endif
|
||||
#endif
|
||||
#include <arm_sve.h>
|
||||
static void ggml_init_arm_arch_features(void) {
|
||||
ggml_arm_arch_features.sve_cnt = svcntb();
|
||||
}
|
||||
|
||||
#else
|
||||
static void ggml_init_arm_arch_features(void) {}
|
||||
#endif
|
||||
#endif // __ARM_ARCH
|
||||
|
||||
struct ggml_tensor * ggml_new_i32(struct ggml_context * ctx, int32_t value) {
|
||||
|
|
|
|||
|
|
@ -7420,6 +7420,65 @@ static void ggml_compute_forward_upscale_f32(
|
|||
}
|
||||
}
|
||||
}
|
||||
} else if (mode == GGML_SCALE_MODE_BILINEAR && (mode_flags & GGML_SCALE_FLAG_ANTIALIAS)) {
|
||||
// Similar to F.interpolate(..., mode="bilinear", align_corners=False, antialias=True)
|
||||
// https://github.com/pytorch/pytorch/blob/8871ff29b743948d1225389d5b7068f37b22750b/aten/src/ATen/native/cpu/UpSampleKernel.cpp
|
||||
auto triangle_filter = [](float x) -> float {
|
||||
return std::max(1.0f - fabsf(x), 0.0f);
|
||||
};
|
||||
|
||||
// support and invscale, minimum 1 pixel for bilinear
|
||||
const float support1 = std::max(1.0f, 1.0f / sf1);
|
||||
const float invscale1 = 1.0f / support1;
|
||||
const float support0 = std::max(1.0f, 1.0f / sf0);
|
||||
const float invscale0 = 1.0f / support0;
|
||||
|
||||
for (int64_t i3 = 0; i3 < ne3; i3++) {
|
||||
const int64_t i03 = i3 / sf3;
|
||||
for (int64_t i2 = ith; i2 < ne2; i2 += nth) {
|
||||
const int64_t i02 = i2 / sf2;
|
||||
for (int64_t i1 = 0; i1 < ne1; i1++) {
|
||||
const float y = ((float) i1 + pixel_offset) / sf1;
|
||||
for (int64_t i0 = 0; i0 < ne0; i0++) {
|
||||
const float x = ((float) i0 + pixel_offset) / sf0;
|
||||
|
||||
// the range of source pixels that contribute
|
||||
const int64_t x_min = std::max<int64_t>(x - support0 + pixel_offset, 0);
|
||||
const int64_t x_max = std::min<int64_t>(x + support0 + pixel_offset, ne00);
|
||||
const int64_t y_min = std::max<int64_t>(y - support1 + pixel_offset, 0);
|
||||
const int64_t y_max = std::min<int64_t>(y + support1 + pixel_offset, ne01);
|
||||
|
||||
// bilinear filter with antialiasing
|
||||
float val = 0.0f;
|
||||
float total_weight = 0.0f;
|
||||
|
||||
for (int64_t sy = y_min; sy < y_max; sy++) {
|
||||
const float weight_y = triangle_filter((sy - y + pixel_offset) * invscale1);
|
||||
|
||||
for (int64_t sx = x_min; sx < x_max; sx++) {
|
||||
const float weight_x = triangle_filter((sx - x + pixel_offset) * invscale0);
|
||||
const float weight = weight_x * weight_y;
|
||||
|
||||
if (weight <= 0.0f) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const float pixel = *(const float *)((const char *)src0->data + sx*nb00 + sy*nb01 + i02*nb02 + i03*nb03);
|
||||
val += pixel * weight;
|
||||
total_weight += weight;
|
||||
}
|
||||
}
|
||||
|
||||
if (total_weight > 0.0f) {
|
||||
val /= total_weight;
|
||||
}
|
||||
|
||||
float * dst_ptr = (float *)((char *)dst->data + i0*nb0 + i1*nb1 + i2*nb2 + i3*nb3);
|
||||
*dst_ptr = val;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (mode == GGML_SCALE_MODE_BILINEAR) {
|
||||
for (int64_t i3 = 0; i3 < ne3; i3++) {
|
||||
const int64_t i03 = i3 / sf3;
|
||||
|
|
|
|||
|
|
@ -989,6 +989,10 @@ struct ggml_cuda_concurrent_event {
|
|||
int n_streams = 0;
|
||||
std::unordered_map<const ggml_tensor *, int> stream_mapping;
|
||||
|
||||
// Original order of nodes in this concurrent region (before interleaving)
|
||||
// Used to restore grouping for fusion within streams
|
||||
std::vector<const ggml_tensor *> original_order;
|
||||
|
||||
const ggml_tensor * join_node;
|
||||
|
||||
ggml_cuda_concurrent_event() = default;
|
||||
|
|
@ -1011,6 +1015,7 @@ struct ggml_cuda_concurrent_event {
|
|||
, fork_event(other.fork_event)
|
||||
, n_streams(other.n_streams)
|
||||
, stream_mapping(std::move(other.stream_mapping))
|
||||
, original_order(std::move(other.original_order))
|
||||
, join_node(other.join_node) {
|
||||
other.fork_event = nullptr;
|
||||
}
|
||||
|
|
@ -1121,11 +1126,9 @@ struct ggml_cuda_concurrent_event {
|
|||
};
|
||||
|
||||
struct ggml_cuda_stream_context {
|
||||
std::vector<const ggml_tensor *> original_nodes;
|
||||
std::unordered_map<const ggml_tensor *, ggml_cuda_concurrent_event> concurrent_events;
|
||||
|
||||
void reset() {
|
||||
original_nodes.clear();
|
||||
concurrent_events.clear();
|
||||
}
|
||||
};
|
||||
|
|
|
|||
|
|
@ -3238,9 +3238,56 @@ static void evaluate_and_capture_cuda_graph(ggml_backend_cuda_context * cuda_ctx
|
|||
}
|
||||
}
|
||||
if (should_launch_concurrent_events) {
|
||||
//Restore the original graph to enable fusion within the streams
|
||||
cgraph->nodes = const_cast<ggml_tensor **>(stream_ctx.original_nodes.data());
|
||||
cgraph->n_nodes = (int) stream_ctx.original_nodes.size();
|
||||
// Restore original node order within each concurrent region to enable fusion within streams
|
||||
|
||||
std::unordered_map<const ggml_tensor *, int> node_to_idx;
|
||||
node_to_idx.reserve(cgraph->n_nodes);
|
||||
for (int i = 0; i < cgraph->n_nodes; ++i) {
|
||||
node_to_idx[cgraph->nodes[i]] = i;
|
||||
}
|
||||
|
||||
for (auto & [fork_node, event] : stream_ctx.concurrent_events) {
|
||||
// Find positions of all nodes from this event in the current graph
|
||||
std::vector<int> positions;
|
||||
positions.reserve(event.original_order.size());
|
||||
|
||||
bool all_found = true;
|
||||
for (const ggml_tensor * orig_node : event.original_order) {
|
||||
auto it = node_to_idx.find(orig_node);
|
||||
if (it != node_to_idx.end()) {
|
||||
positions.push_back(it->second);
|
||||
} else {
|
||||
all_found = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!all_found || positions.size() != event.original_order.size()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Sort positions to get contiguous range
|
||||
std::vector<int> sorted_positions = positions;
|
||||
std::sort(sorted_positions.begin(), sorted_positions.end());
|
||||
|
||||
bool is_contiguous = true;
|
||||
for (size_t i = 1; i < sorted_positions.size(); ++i) {
|
||||
if (sorted_positions[i] != sorted_positions[i-1] + 1) {
|
||||
is_contiguous = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!is_contiguous) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Restore original order at the sorted positions
|
||||
int start_pos = sorted_positions[0];
|
||||
for (size_t i = 0; i < event.original_order.size(); ++i) {
|
||||
cgraph->nodes[start_pos + i] = const_cast<ggml_tensor *>(event.original_order[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < cgraph->n_nodes; i++) {
|
||||
|
|
@ -3274,7 +3321,6 @@ static void evaluate_and_capture_cuda_graph(ggml_backend_cuda_context * cuda_ctx
|
|||
GGML_LOG_DEBUG("Setting stream no to %d for node %s\n", cuda_ctx->curr_stream_no, node->name);
|
||||
}
|
||||
}
|
||||
prev_i = i;
|
||||
|
||||
#ifdef GGML_CUDA_DEBUG
|
||||
const int nodes_fused = i - prev_i - 1;
|
||||
|
|
@ -3282,6 +3328,7 @@ static void evaluate_and_capture_cuda_graph(ggml_backend_cuda_context * cuda_ctx
|
|||
GGML_LOG_INFO("nodes_fused: %d\n", nodes_fused);
|
||||
}
|
||||
#endif
|
||||
prev_i = i;
|
||||
|
||||
if (ggml_is_empty(node) || node->op == GGML_OP_RESHAPE || node->op == GGML_OP_TRANSPOSE || node->op == GGML_OP_VIEW || node->op == GGML_OP_PERMUTE || node->op == GGML_OP_NONE) {
|
||||
continue;
|
||||
|
|
@ -3805,14 +3852,6 @@ static void ggml_backend_cuda_graph_optimize(ggml_backend_t backend, ggml_cgraph
|
|||
// store {fork_idx, join_idx}
|
||||
std::vector<std::pair<int, int>> concurrent_node_ranges;
|
||||
|
||||
// save the original nodes
|
||||
std::vector<const ggml_tensor *> original_nodes;
|
||||
original_nodes.reserve(cgraph->n_nodes);
|
||||
for (int i = 0; i < cgraph->n_nodes; ++i) {
|
||||
original_nodes.push_back(cgraph->nodes[i]);
|
||||
}
|
||||
cuda_ctx->stream_context().original_nodes = std::move(original_nodes);
|
||||
|
||||
for (const auto & [root_node, count] : fan_out) {
|
||||
if (count >= min_fan_out && count <= max_fan_out) {
|
||||
const int root_node_idx = node_indices[root_node];
|
||||
|
|
@ -3917,6 +3956,13 @@ static void ggml_backend_cuda_graph_optimize(ggml_backend_t backend, ggml_cgraph
|
|||
continue;
|
||||
}
|
||||
|
||||
// Save the original order of nodes in this region before interleaving
|
||||
// This is used later to restore grouping for fusion within streams
|
||||
concurrent_event.original_order.reserve(total_branch_nodes);
|
||||
for (int i = fork_node_idx + 1; i < join_node_idx; ++i) {
|
||||
concurrent_event.original_order.push_back(cgraph->nodes[i]);
|
||||
}
|
||||
|
||||
std::unordered_map<const ggml_tensor *, ggml_cuda_concurrent_event> & concurrent_events = cuda_ctx->stream_context().concurrent_events;
|
||||
GGML_ASSERT(concurrent_events.find(root_node) == concurrent_events.end());
|
||||
concurrent_events.emplace(root_node, std::move(concurrent_event));
|
||||
|
|
|
|||
|
|
@ -81,6 +81,76 @@ static __global__ void upscale_f32_bilinear(const float * x, float * dst,
|
|||
dst[index] = result;
|
||||
}
|
||||
|
||||
// Similar to F.interpolate(..., mode="bilinear", align_corners=False, antialias=True)
|
||||
// https://github.com/pytorch/pytorch/blob/8871ff29b743948d1225389d5b7068f37b22750b/aten/src/ATen/native/cpu/UpSampleKernel.cpp
|
||||
static __global__ void upscale_f32_bilinear_antialias(const float * src0, float * dst,
|
||||
const int nb00, const int nb01, const int nb02, const int nb03,
|
||||
const int ne00_src, const int ne01_src,
|
||||
const int ne10_dst, const int ne11_dst, const int ne12_dst, const int ne13_dst,
|
||||
const float sf0, const float sf1, const float sf2, const float sf3,
|
||||
const float pixel_offset) {
|
||||
const int64_t index = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
const int64_t dst_total_elements = ne10_dst * ne11_dst * ne12_dst * ne13_dst;
|
||||
|
||||
if (index >= dst_total_elements) {
|
||||
return;
|
||||
}
|
||||
|
||||
const int i10_dst = index % ne10_dst;
|
||||
const int i11_dst = (index / ne10_dst) % ne11_dst;
|
||||
const int i12_dst = (index / (ne10_dst * ne11_dst)) % ne12_dst;
|
||||
const int i13_dst = index / (ne10_dst * ne11_dst * ne12_dst);
|
||||
|
||||
const int i02_src = (int)(i12_dst / sf2);
|
||||
const int i03_src = (int)(i13_dst / sf3);
|
||||
|
||||
const float y = ((float)i11_dst + pixel_offset) / sf1;
|
||||
const float x = ((float)i10_dst + pixel_offset) / sf0;
|
||||
|
||||
// support and invscale, minimum 1 pixel for bilinear
|
||||
const float support1 = max(1.0f / sf1, 1.0f);
|
||||
const float invscale1 = 1.0f / support1;
|
||||
const float support0 = max(1.0f / sf0, 1.0f);
|
||||
const float invscale0 = 1.0f / support0;
|
||||
|
||||
// the range of source pixels that contribute
|
||||
const int64_t x_min = max(int64_t(0), int64_t(x - support0 + pixel_offset));
|
||||
const int64_t x_max = min(int64_t(ne00_src), int64_t(x + support0 + pixel_offset));
|
||||
const int64_t y_min = max(int64_t(0), int64_t(y - support1 + pixel_offset));
|
||||
const int64_t y_max = min(int64_t(ne01_src), int64_t(y + support1 + pixel_offset));
|
||||
|
||||
// bilinear filter with antialiasing
|
||||
float val = 0.0f;
|
||||
float total_weight = 0.0f;
|
||||
|
||||
auto triangle_filter = [](float x) -> float {
|
||||
return max(1.0f - fabsf(x), 0.0f);
|
||||
};
|
||||
|
||||
for (int64_t sy = y_min; sy < y_max; sy++) {
|
||||
const float weight_y = triangle_filter((sy - y + pixel_offset) * invscale1);
|
||||
|
||||
for (int64_t sx = x_min; sx < x_max; sx++) {
|
||||
const float weight_x = triangle_filter((sx - x + pixel_offset) * invscale0);
|
||||
const float weight = weight_x * weight_y;
|
||||
|
||||
if (weight <= 0.0f) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const float pixel = *(const float *)((const char *)src0 + sx*nb00 + sy*nb01 + i02_src*nb02 + i03_src*nb03);
|
||||
val += pixel * weight;
|
||||
total_weight += weight;
|
||||
}
|
||||
}
|
||||
|
||||
if (total_weight > 0.0f) {
|
||||
val /= total_weight;
|
||||
}
|
||||
|
||||
dst[index] = val;
|
||||
}
|
||||
|
||||
namespace bicubic_interpolation {
|
||||
// https://en.wikipedia.org/wiki/Bicubic_interpolation#Bicubic_convolution_algorithm
|
||||
__device__ const float a = -0.75f; // use alpha = -0.75 (same as PyTorch)
|
||||
|
|
@ -161,11 +231,15 @@ static void upscale_f32_bilinear_cuda(const float * x, float * dst,
|
|||
const int ne00_src, const int ne01_src,
|
||||
const int ne10_dst, const int ne11_dst, const int ne12_dst, const int ne13_dst,
|
||||
const float sf0, const float sf1, const float sf2, const float sf3,
|
||||
const float pixel_offset, cudaStream_t stream) {
|
||||
const float pixel_offset, bool antialias, cudaStream_t stream) {
|
||||
const int64_t dst_size = ne10_dst * ne11_dst * ne12_dst * ne13_dst;
|
||||
const int64_t num_blocks = (dst_size + CUDA_UPSCALE_BLOCK_SIZE - 1) / CUDA_UPSCALE_BLOCK_SIZE;
|
||||
|
||||
upscale_f32_bilinear<<<num_blocks, CUDA_UPSCALE_BLOCK_SIZE,0,stream>>>(x, dst, nb00, nb01, nb02, nb03, ne00_src, ne01_src, ne10_dst, ne11_dst, ne12_dst, ne13_dst, sf0, sf1, sf2, sf3, pixel_offset);
|
||||
if (antialias) {
|
||||
upscale_f32_bilinear_antialias<<<num_blocks, CUDA_UPSCALE_BLOCK_SIZE,0,stream>>>(x, dst, nb00, nb01, nb02, nb03, ne00_src, ne01_src, ne10_dst, ne11_dst, ne12_dst, ne13_dst, sf0, sf1, sf2, sf3, pixel_offset);
|
||||
} else {
|
||||
upscale_f32_bilinear<<<num_blocks, CUDA_UPSCALE_BLOCK_SIZE,0,stream>>>(x, dst, nb00, nb01, nb02, nb03, ne00_src, ne01_src, ne10_dst, ne11_dst, ne12_dst, ne13_dst, sf0, sf1, sf2, sf3, pixel_offset);
|
||||
}
|
||||
}
|
||||
|
||||
static void upscale_f32_bicubic_cuda(const float * x, float * dst,
|
||||
|
|
@ -207,9 +281,10 @@ void ggml_cuda_op_upscale(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
|
|||
if (mode == GGML_SCALE_MODE_NEAREST) {
|
||||
upscale_f32_cuda(src0_d, dst_d, src0->nb[0], src0->nb[1], src0->nb[2], src0->nb[3], dst->ne[0], dst->ne[1], dst->ne[2], dst->ne[3], sf0, sf1, sf2, sf3, stream);
|
||||
} else if (mode == GGML_SCALE_MODE_BILINEAR) {
|
||||
const bool antialias = (mode_flags & GGML_SCALE_FLAG_ANTIALIAS);
|
||||
upscale_f32_bilinear_cuda(src0_d, dst_d, src0->nb[0], src0->nb[1], src0->nb[2], src0->nb[3],
|
||||
src0->ne[0], src0->ne[1], dst->ne[0], dst->ne[1], dst->ne[2], dst->ne[3],
|
||||
sf0, sf1, sf2, sf3, pixel_offset, stream);
|
||||
sf0, sf1, sf2, sf3, pixel_offset, antialias, stream);
|
||||
} else if (mode == GGML_SCALE_MODE_BICUBIC) {
|
||||
upscale_f32_bicubic_cuda(src0_d, dst_d, src0->nb[0], src0->nb[1], src0->nb[2], src0->nb[3],
|
||||
src0->ne[0], src0->ne[1], dst->ne[0], dst->ne[1], dst->ne[2], dst->ne[3],
|
||||
|
|
|
|||
|
|
@ -894,7 +894,7 @@ bool ggml_metal_device_supports_op(ggml_metal_device_t dev, const struct ggml_te
|
|||
case GGML_OP_POOL_1D:
|
||||
return false;
|
||||
case GGML_OP_UPSCALE:
|
||||
return op->src[0]->type == GGML_TYPE_F32 && op->op_params[0] == GGML_SCALE_MODE_NEAREST;
|
||||
return op->src[0]->type == GGML_TYPE_F32 && op->op_params[0] == GGML_SCALE_MODE_NEAREST && !(op->op_params[0] & GGML_SCALE_FLAG_ANTIALIAS);
|
||||
case GGML_OP_POOL_2D:
|
||||
return op->src[0]->type == GGML_TYPE_F32;
|
||||
case GGML_OP_PAD:
|
||||
|
|
@ -912,6 +912,7 @@ bool ggml_metal_device_supports_op(ggml_metal_device_t dev, const struct ggml_te
|
|||
// for new head sizes, add checks here
|
||||
if (op->src[0]->ne[0] != 32 &&
|
||||
op->src[0]->ne[0] != 40 &&
|
||||
op->src[0]->ne[0] != 48 &&
|
||||
op->src[0]->ne[0] != 64 &&
|
||||
op->src[0]->ne[0] != 72 &&
|
||||
op->src[0]->ne[0] != 80 &&
|
||||
|
|
|
|||
|
|
@ -5757,6 +5757,7 @@ typedef decltype(kernel_flash_attn_ext<FA_TYPES, half4x4, 1, dequantize_f16, hal
|
|||
|
||||
template [[host_name("kernel_flash_attn_ext_f32_dk32_dv32" )]] kernel flash_attn_ext_t kernel_flash_attn_ext<FA_TYPES_F32, float4x4, 1, dequantize_f32, float4x4, 1, dequantize_f32, 32, 32>;
|
||||
template [[host_name("kernel_flash_attn_ext_f32_dk40_dv40" )]] kernel flash_attn_ext_t kernel_flash_attn_ext<FA_TYPES_F32, float4x4, 1, dequantize_f32, float4x4, 1, dequantize_f32, 40, 40>;
|
||||
template [[host_name("kernel_flash_attn_ext_f32_dk48_dv48" )]] kernel flash_attn_ext_t kernel_flash_attn_ext<FA_TYPES_F32, float4x4, 1, dequantize_f32, float4x4, 1, dequantize_f32, 48, 48>;
|
||||
template [[host_name("kernel_flash_attn_ext_f32_dk64_dv64" )]] kernel flash_attn_ext_t kernel_flash_attn_ext<FA_TYPES_F32, float4x4, 1, dequantize_f32, float4x4, 1, dequantize_f32, 64, 64>;
|
||||
template [[host_name("kernel_flash_attn_ext_f32_dk72_dv72" )]] kernel flash_attn_ext_t kernel_flash_attn_ext<FA_TYPES_F32, float4x4, 1, dequantize_f32, float4x4, 1, dequantize_f32, 72, 72>;
|
||||
template [[host_name("kernel_flash_attn_ext_f32_dk80_dv80" )]] kernel flash_attn_ext_t kernel_flash_attn_ext<FA_TYPES_F32, float4x4, 1, dequantize_f32, float4x4, 1, dequantize_f32, 80, 80>;
|
||||
|
|
@ -5770,6 +5771,7 @@ template [[host_name("kernel_flash_attn_ext_f32_dk576_dv512")]] kernel flash_at
|
|||
|
||||
template [[host_name("kernel_flash_attn_ext_f16_dk32_dv32" )]] kernel flash_attn_ext_t kernel_flash_attn_ext<FA_TYPES, half4x4, 1, dequantize_f16, half4x4, 1, dequantize_f16, 32, 32>;
|
||||
template [[host_name("kernel_flash_attn_ext_f16_dk40_dv40" )]] kernel flash_attn_ext_t kernel_flash_attn_ext<FA_TYPES, half4x4, 1, dequantize_f16, half4x4, 1, dequantize_f16, 40, 40>;
|
||||
template [[host_name("kernel_flash_attn_ext_f16_dk48_dv48" )]] kernel flash_attn_ext_t kernel_flash_attn_ext<FA_TYPES, half4x4, 1, dequantize_f16, half4x4, 1, dequantize_f16, 48, 48>;
|
||||
template [[host_name("kernel_flash_attn_ext_f16_dk64_dv64" )]] kernel flash_attn_ext_t kernel_flash_attn_ext<FA_TYPES, half4x4, 1, dequantize_f16, half4x4, 1, dequantize_f16, 64, 64>;
|
||||
template [[host_name("kernel_flash_attn_ext_f16_dk72_dv72" )]] kernel flash_attn_ext_t kernel_flash_attn_ext<FA_TYPES, half4x4, 1, dequantize_f16, half4x4, 1, dequantize_f16, 72, 72>;
|
||||
template [[host_name("kernel_flash_attn_ext_f16_dk80_dv80" )]] kernel flash_attn_ext_t kernel_flash_attn_ext<FA_TYPES, half4x4, 1, dequantize_f16, half4x4, 1, dequantize_f16, 80, 80>;
|
||||
|
|
@ -5784,6 +5786,7 @@ template [[host_name("kernel_flash_attn_ext_f16_dk576_dv512")]] kernel flash_at
|
|||
#if defined(GGML_METAL_HAS_BF16)
|
||||
template [[host_name("kernel_flash_attn_ext_bf16_dk32_dv32" )]] kernel flash_attn_ext_t kernel_flash_attn_ext<FA_TYPES_BF, bfloat4x4, 1, dequantize_bf16, bfloat4x4, 1, dequantize_bf16, 32, 32>;
|
||||
template [[host_name("kernel_flash_attn_ext_bf16_dk40_dv40" )]] kernel flash_attn_ext_t kernel_flash_attn_ext<FA_TYPES_BF, bfloat4x4, 1, dequantize_bf16, bfloat4x4, 1, dequantize_bf16, 40, 40>;
|
||||
template [[host_name("kernel_flash_attn_ext_bf16_dk48_dv48" )]] kernel flash_attn_ext_t kernel_flash_attn_ext<FA_TYPES_BF, bfloat4x4, 1, dequantize_bf16, bfloat4x4, 1, dequantize_bf16, 48, 48>;
|
||||
template [[host_name("kernel_flash_attn_ext_bf16_dk64_dv64" )]] kernel flash_attn_ext_t kernel_flash_attn_ext<FA_TYPES_BF, bfloat4x4, 1, dequantize_bf16, bfloat4x4, 1, dequantize_bf16, 64, 64>;
|
||||
template [[host_name("kernel_flash_attn_ext_bf16_dk72_dv72" )]] kernel flash_attn_ext_t kernel_flash_attn_ext<FA_TYPES_BF, bfloat4x4, 1, dequantize_bf16, bfloat4x4, 1, dequantize_bf16, 72, 72>;
|
||||
template [[host_name("kernel_flash_attn_ext_bf16_dk80_dv80" )]] kernel flash_attn_ext_t kernel_flash_attn_ext<FA_TYPES_BF, bfloat4x4, 1, dequantize_bf16, bfloat4x4, 1, dequantize_bf16, 80, 80>;
|
||||
|
|
@ -5798,6 +5801,7 @@ template [[host_name("kernel_flash_attn_ext_bf16_dk576_dv512")]] kernel flash_at
|
|||
|
||||
template [[host_name("kernel_flash_attn_ext_q4_0_dk32_dv32" )]] kernel flash_attn_ext_t kernel_flash_attn_ext<FA_TYPES, block_q4_0, 2, dequantize_q4_0, block_q4_0, 2, dequantize_q4_0, 32, 32>;
|
||||
template [[host_name("kernel_flash_attn_ext_q4_0_dk40_dv40" )]] kernel flash_attn_ext_t kernel_flash_attn_ext<FA_TYPES, block_q4_0, 2, dequantize_q4_0, block_q4_0, 2, dequantize_q4_0, 40, 40>;
|
||||
template [[host_name("kernel_flash_attn_ext_q4_0_dk48_dv48" )]] kernel flash_attn_ext_t kernel_flash_attn_ext<FA_TYPES, block_q4_0, 2, dequantize_q4_0, block_q4_0, 2, dequantize_q4_0, 48, 48>;
|
||||
template [[host_name("kernel_flash_attn_ext_q4_0_dk64_dv64" )]] kernel flash_attn_ext_t kernel_flash_attn_ext<FA_TYPES, block_q4_0, 2, dequantize_q4_0, block_q4_0, 2, dequantize_q4_0, 64, 64>;
|
||||
template [[host_name("kernel_flash_attn_ext_q4_0_dk72_dv72" )]] kernel flash_attn_ext_t kernel_flash_attn_ext<FA_TYPES, block_q4_0, 2, dequantize_q4_0, block_q4_0, 2, dequantize_q4_0, 72, 72>;
|
||||
template [[host_name("kernel_flash_attn_ext_q4_0_dk80_dv80" )]] kernel flash_attn_ext_t kernel_flash_attn_ext<FA_TYPES, block_q4_0, 2, dequantize_q4_0, block_q4_0, 2, dequantize_q4_0, 80, 80>;
|
||||
|
|
@ -5811,6 +5815,7 @@ template [[host_name("kernel_flash_attn_ext_q4_0_dk576_dv512")]] kernel flash_at
|
|||
|
||||
template [[host_name("kernel_flash_attn_ext_q4_1_dk32_dv32" )]] kernel flash_attn_ext_t kernel_flash_attn_ext<FA_TYPES, block_q4_1, 2, dequantize_q4_1, block_q4_1, 2, dequantize_q4_1, 32, 32>;
|
||||
template [[host_name("kernel_flash_attn_ext_q4_1_dk40_dv40" )]] kernel flash_attn_ext_t kernel_flash_attn_ext<FA_TYPES, block_q4_1, 2, dequantize_q4_1, block_q4_1, 2, dequantize_q4_1, 40, 40>;
|
||||
template [[host_name("kernel_flash_attn_ext_q4_1_dk48_dv48" )]] kernel flash_attn_ext_t kernel_flash_attn_ext<FA_TYPES, block_q4_1, 2, dequantize_q4_1, block_q4_1, 2, dequantize_q4_1, 48, 48>;
|
||||
template [[host_name("kernel_flash_attn_ext_q4_1_dk64_dv64" )]] kernel flash_attn_ext_t kernel_flash_attn_ext<FA_TYPES, block_q4_1, 2, dequantize_q4_1, block_q4_1, 2, dequantize_q4_1, 64, 64>;
|
||||
template [[host_name("kernel_flash_attn_ext_q4_1_dk72_dv72" )]] kernel flash_attn_ext_t kernel_flash_attn_ext<FA_TYPES, block_q4_1, 2, dequantize_q4_1, block_q4_1, 2, dequantize_q4_1, 72, 72>;
|
||||
template [[host_name("kernel_flash_attn_ext_q4_1_dk80_dv80" )]] kernel flash_attn_ext_t kernel_flash_attn_ext<FA_TYPES, block_q4_1, 2, dequantize_q4_1, block_q4_1, 2, dequantize_q4_1, 80, 80>;
|
||||
|
|
@ -5824,6 +5829,7 @@ template [[host_name("kernel_flash_attn_ext_q4_1_dk576_dv512")]] kernel flash_at
|
|||
|
||||
template [[host_name("kernel_flash_attn_ext_q5_0_dk32_dv32" )]] kernel flash_attn_ext_t kernel_flash_attn_ext<FA_TYPES, block_q5_0, 2, dequantize_q5_0, block_q5_0, 2, dequantize_q5_0, 32, 32>;
|
||||
template [[host_name("kernel_flash_attn_ext_q5_0_dk40_dv40" )]] kernel flash_attn_ext_t kernel_flash_attn_ext<FA_TYPES, block_q5_0, 2, dequantize_q5_0, block_q5_0, 2, dequantize_q5_0, 40, 40>;
|
||||
template [[host_name("kernel_flash_attn_ext_q5_0_dk48_dv48" )]] kernel flash_attn_ext_t kernel_flash_attn_ext<FA_TYPES, block_q5_0, 2, dequantize_q5_0, block_q5_0, 2, dequantize_q5_0, 48, 48>;
|
||||
template [[host_name("kernel_flash_attn_ext_q5_0_dk64_dv64" )]] kernel flash_attn_ext_t kernel_flash_attn_ext<FA_TYPES, block_q5_0, 2, dequantize_q5_0, block_q5_0, 2, dequantize_q5_0, 64, 64>;
|
||||
template [[host_name("kernel_flash_attn_ext_q5_0_dk72_dv72" )]] kernel flash_attn_ext_t kernel_flash_attn_ext<FA_TYPES, block_q5_0, 2, dequantize_q5_0, block_q5_0, 2, dequantize_q5_0, 72, 72>;
|
||||
template [[host_name("kernel_flash_attn_ext_q5_0_dk80_dv80" )]] kernel flash_attn_ext_t kernel_flash_attn_ext<FA_TYPES, block_q5_0, 2, dequantize_q5_0, block_q5_0, 2, dequantize_q5_0, 80, 80>;
|
||||
|
|
@ -5837,6 +5843,7 @@ template [[host_name("kernel_flash_attn_ext_q5_0_dk576_dv512")]] kernel flash_at
|
|||
|
||||
template [[host_name("kernel_flash_attn_ext_q5_1_dk32_dv32" )]] kernel flash_attn_ext_t kernel_flash_attn_ext<FA_TYPES, block_q5_1, 2, dequantize_q5_1, block_q5_1, 2, dequantize_q5_1, 32, 32>;
|
||||
template [[host_name("kernel_flash_attn_ext_q5_1_dk40_dv40" )]] kernel flash_attn_ext_t kernel_flash_attn_ext<FA_TYPES, block_q5_1, 2, dequantize_q5_1, block_q5_1, 2, dequantize_q5_1, 40, 40>;
|
||||
template [[host_name("kernel_flash_attn_ext_q5_1_dk48_dv48" )]] kernel flash_attn_ext_t kernel_flash_attn_ext<FA_TYPES, block_q5_1, 2, dequantize_q5_1, block_q5_1, 2, dequantize_q5_1, 48, 48>;
|
||||
template [[host_name("kernel_flash_attn_ext_q5_1_dk64_dv64" )]] kernel flash_attn_ext_t kernel_flash_attn_ext<FA_TYPES, block_q5_1, 2, dequantize_q5_1, block_q5_1, 2, dequantize_q5_1, 64, 64>;
|
||||
template [[host_name("kernel_flash_attn_ext_q5_1_dk72_dv72" )]] kernel flash_attn_ext_t kernel_flash_attn_ext<FA_TYPES, block_q5_1, 2, dequantize_q5_1, block_q5_1, 2, dequantize_q5_1, 72, 72>;
|
||||
template [[host_name("kernel_flash_attn_ext_q5_1_dk80_dv80" )]] kernel flash_attn_ext_t kernel_flash_attn_ext<FA_TYPES, block_q5_1, 2, dequantize_q5_1, block_q5_1, 2, dequantize_q5_1, 80, 80>;
|
||||
|
|
@ -5850,6 +5857,7 @@ template [[host_name("kernel_flash_attn_ext_q5_1_dk576_dv512")]] kernel flash_at
|
|||
|
||||
template [[host_name("kernel_flash_attn_ext_q8_0_dk32_dv32" )]] kernel flash_attn_ext_t kernel_flash_attn_ext<FA_TYPES, block_q8_0, 2, dequantize_q8_0, block_q8_0, 2, dequantize_q8_0, 32, 32>;
|
||||
template [[host_name("kernel_flash_attn_ext_q8_0_dk40_dv40" )]] kernel flash_attn_ext_t kernel_flash_attn_ext<FA_TYPES, block_q8_0, 2, dequantize_q8_0, block_q8_0, 2, dequantize_q8_0, 40, 40>;
|
||||
template [[host_name("kernel_flash_attn_ext_q8_0_dk48_dv48" )]] kernel flash_attn_ext_t kernel_flash_attn_ext<FA_TYPES, block_q8_0, 2, dequantize_q8_0, block_q8_0, 2, dequantize_q8_0, 48, 48>;
|
||||
template [[host_name("kernel_flash_attn_ext_q8_0_dk64_dv64" )]] kernel flash_attn_ext_t kernel_flash_attn_ext<FA_TYPES, block_q8_0, 2, dequantize_q8_0, block_q8_0, 2, dequantize_q8_0, 64, 64>;
|
||||
template [[host_name("kernel_flash_attn_ext_q8_0_dk72_dv72" )]] kernel flash_attn_ext_t kernel_flash_attn_ext<FA_TYPES, block_q8_0, 2, dequantize_q8_0, block_q8_0, 2, dequantize_q8_0, 72, 72>;
|
||||
template [[host_name("kernel_flash_attn_ext_q8_0_dk80_dv80" )]] kernel flash_attn_ext_t kernel_flash_attn_ext<FA_TYPES, block_q8_0, 2, dequantize_q8_0, block_q8_0, 2, dequantize_q8_0, 80, 80>;
|
||||
|
|
|
|||
|
|
@ -3086,8 +3086,9 @@ static bool ggml_opencl_supports_op(ggml_backend_dev_t dev, const struct ggml_te
|
|||
return op->src[0]->type == GGML_TYPE_F32 && op->type == GGML_TYPE_F32;
|
||||
case GGML_OP_UPSCALE: {
|
||||
ggml_scale_mode mode = (ggml_scale_mode)(ggml_get_op_params_i32(op, 0) & 0xFF);
|
||||
const bool antialias = (ggml_scale_mode)(ggml_get_op_params_i32(op, 0) & GGML_SCALE_FLAG_ANTIALIAS);
|
||||
return op->src[0]->type == GGML_TYPE_F32 && op->type == GGML_TYPE_F32 &&
|
||||
(mode == GGML_SCALE_MODE_NEAREST || mode == GGML_SCALE_MODE_BILINEAR);
|
||||
(mode == GGML_SCALE_MODE_NEAREST || mode == GGML_SCALE_MODE_BILINEAR) && !antialias;
|
||||
}
|
||||
case GGML_OP_CONV_2D:
|
||||
return (op->src[0]->type == GGML_TYPE_F16 && op->src[1]->type == GGML_TYPE_F16 && op->type == GGML_TYPE_F16) ||
|
||||
|
|
|
|||
|
|
@ -1787,6 +1787,7 @@ static void argsort_f32_i32_sycl(const float *x, int *dst, const int ncols,
|
|||
const sycl::range<3> block_dims(1, 1, nth);
|
||||
const sycl::range<3> block_nums(1, nrows, 1);
|
||||
const size_t shared_mem = ncols_pad * sizeof(int);
|
||||
GGML_ASSERT(shared_mem<=ggml_sycl_info().devices[device].smpbo);
|
||||
|
||||
if (order == GGML_SORT_ORDER_ASC) {
|
||||
stream->submit([&](sycl::handler &cgh) {
|
||||
|
|
@ -4348,6 +4349,9 @@ static ggml_backend_buffer_t ggml_backend_sycl_device_buffer_from_host_ptr(ggml_
|
|||
}
|
||||
|
||||
static bool ggml_backend_sycl_device_supports_op(ggml_backend_dev_t dev, const ggml_tensor * op) {
|
||||
ggml_backend_sycl_device_context *sycl_ctx =
|
||||
(ggml_backend_sycl_device_context *)dev->context;
|
||||
int device = sycl_ctx->device;
|
||||
switch (op->op) {
|
||||
case GGML_OP_CONV_TRANSPOSE_1D:
|
||||
{
|
||||
|
|
@ -4597,12 +4601,14 @@ static bool ggml_backend_sycl_device_supports_op(ggml_backend_dev_t dev, const g
|
|||
case GGML_OP_IM2COL:
|
||||
return true;
|
||||
case GGML_OP_UPSCALE:
|
||||
return op->src[0]->type == GGML_TYPE_F32 && op->op_params[0] == GGML_SCALE_MODE_NEAREST;
|
||||
return op->src[0]->type == GGML_TYPE_F32 && op->op_params[0] == GGML_SCALE_MODE_NEAREST && !(op->op_params[0] & GGML_SCALE_FLAG_ANTIALIAS);
|
||||
case GGML_OP_SUM:
|
||||
case GGML_OP_SUM_ROWS:
|
||||
case GGML_OP_MEAN:
|
||||
case GGML_OP_ARGSORT:
|
||||
return ggml_is_contiguous(op->src[0]);
|
||||
case GGML_OP_ARGSORT:
|
||||
return op->src[0]->ne[0] * sizeof(int) <=
|
||||
ggml_sycl_info().devices[device].smpbo;
|
||||
case GGML_OP_POOL_2D:
|
||||
case GGML_OP_ACC:
|
||||
return true;
|
||||
|
|
|
|||
|
|
@ -1227,6 +1227,7 @@ struct vk_op_topk_push_constants {
|
|||
uint32_t orig_ncols;
|
||||
uint32_t ncols_input;
|
||||
uint32_t ncols_output;
|
||||
uint32_t k;
|
||||
uint32_t nrows;
|
||||
uint32_t first_pass;
|
||||
uint32_t last_pass;
|
||||
|
|
@ -1673,6 +1674,14 @@ class vk_perf_logger {
|
|||
timings[name.str()].push_back(time);
|
||||
return;
|
||||
}
|
||||
if (node->op == GGML_OP_TOP_K) {
|
||||
std::stringstream name;
|
||||
name << ggml_op_name(node->op) <<
|
||||
" K=" << node->ne[0] <<
|
||||
" (" << node->src[0]->ne[0] << "," << node->src[0]->ne[1] << "," << node->src[0]->ne[2] << "," << node->src[0]->ne[3] << ")";
|
||||
timings[name.str()].push_back(time);
|
||||
return;
|
||||
}
|
||||
timings[ggml_op_name(node->op)].push_back(time);
|
||||
}
|
||||
private:
|
||||
|
|
@ -10345,17 +10354,8 @@ static void ggml_vk_topk(ggml_backend_vk_context * ctx, vk_context& subctx, cons
|
|||
uint32_t nrows = ggml_nrows(src0);
|
||||
uint32_t k = dst->ne[0];
|
||||
|
||||
vk_op_topk_push_constants pc { ncols, ncols, k, nrows, 0, 0 };
|
||||
vk_op_topk_push_constants pc { ncols, ncols, ncols, k, nrows, 0, 0 };
|
||||
|
||||
// Reserve space for ivec2 per element, double buffered
|
||||
const size_t dbl_buf_size = size_t{ncols} * nrows * 2 * sizeof(int);
|
||||
const size_t x_sz = dbl_buf_size * 2;
|
||||
uint32_t dbl_buf_index = 0;
|
||||
|
||||
if (ctx->prealloc_size_x < x_sz) {
|
||||
ctx->prealloc_size_x = x_sz;
|
||||
ggml_vk_preallocate_buffers(ctx, subctx);
|
||||
}
|
||||
if (ctx->prealloc_x_need_sync) {
|
||||
ggml_vk_sync_buffers(ctx, subctx);
|
||||
}
|
||||
|
|
@ -10370,8 +10370,9 @@ static void ggml_vk_topk(ggml_backend_vk_context * ctx, vk_context& subctx, cons
|
|||
// largest elements. Repeat until we have the top K elements.
|
||||
// Need to do at least one iteration to write out the results.
|
||||
bool done_one_iter = false;
|
||||
uint32_t dbl_buf_index = 0;
|
||||
size_t dbl_buf_size;
|
||||
while (num_elements > k || !done_one_iter) {
|
||||
done_one_iter = true;
|
||||
|
||||
// Prefer going as small as num_topk_pipelines - 3 for perf reasons.
|
||||
// But if K is larger, then we need a larger workgroup
|
||||
|
|
@ -10411,6 +10412,21 @@ static void ggml_vk_topk(ggml_backend_vk_context * ctx, vk_context& subctx, cons
|
|||
// Number of elements remaining after this pass
|
||||
uint32_t num_dst_elements = (num_elements / pipeline->wg_denoms[0]) * k + std::min(k, num_elements % pipeline->wg_denoms[0]);
|
||||
|
||||
pc2.ncols_output = num_dst_elements;
|
||||
|
||||
if (!done_one_iter) {
|
||||
// Reserve space for ivec2 per element, double buffered
|
||||
// K per workgroup per row
|
||||
dbl_buf_size = num_dst_elements * nrows * 2 * sizeof(int);
|
||||
dbl_buf_size = ROUNDUP_POW2(dbl_buf_size, ctx->device->properties.limits.minStorageBufferOffsetAlignment);
|
||||
const size_t x_sz = dbl_buf_size * 2;
|
||||
|
||||
if (ctx->prealloc_size_x < x_sz) {
|
||||
ctx->prealloc_size_x = x_sz;
|
||||
ggml_vk_preallocate_buffers(ctx, subctx);
|
||||
}
|
||||
}
|
||||
|
||||
vk_subbuffer src_buf;
|
||||
vk_subbuffer dst_buf;
|
||||
|
||||
|
|
@ -10436,6 +10452,7 @@ static void ggml_vk_topk(ggml_backend_vk_context * ctx, vk_context& subctx, cons
|
|||
if (num_elements > k) {
|
||||
ggml_vk_sync_buffers(ctx, subctx);
|
||||
}
|
||||
done_one_iter = true;
|
||||
}
|
||||
ctx->prealloc_x_need_sync = true;
|
||||
}
|
||||
|
|
@ -14113,6 +14130,7 @@ static bool ggml_backend_vk_device_supports_op(ggml_backend_dev_t dev, const ggm
|
|||
}
|
||||
return true;
|
||||
case GGML_OP_UPSCALE:
|
||||
return op->src[0]->type == GGML_TYPE_F32 && !(op->op_params[0] & GGML_SCALE_FLAG_ANTIALIAS);
|
||||
case GGML_OP_ACC:
|
||||
return op->src[0]->type == GGML_TYPE_F32;
|
||||
case GGML_OP_CONCAT:
|
||||
|
|
|
|||
|
|
@ -19,6 +19,7 @@ layout (push_constant) uniform parameter {
|
|||
uint orig_ncols;
|
||||
uint ncols_input;
|
||||
uint ncols_output;
|
||||
uint k;
|
||||
uint nrows;
|
||||
uint first_pass;
|
||||
uint last_pass;
|
||||
|
|
@ -36,7 +37,7 @@ void topk(bool needs_bounds_check, const uint row) {
|
|||
const uint row_offset = row * p.ncols_input;
|
||||
dst_row[col] = ivec2(gl_GlobalInvocationID.x, floatBitsToInt(data_a[row_offset + gl_GlobalInvocationID.x]));
|
||||
} else {
|
||||
const uint row_offset = row * p.orig_ncols;
|
||||
const uint row_offset = row * p.ncols_input;
|
||||
dst_row[col] = data_s[row_offset + gl_GlobalInvocationID.x];
|
||||
}
|
||||
} else {
|
||||
|
|
@ -44,7 +45,7 @@ void topk(bool needs_bounds_check, const uint row) {
|
|||
}
|
||||
barrier();
|
||||
|
||||
if (p.ncols_output == 1) {
|
||||
if (p.k == 1) {
|
||||
// Fast path for single output - just do a max reduction
|
||||
[[unroll]] for (int s = BLOCK_SIZE / 2; s >= 1; s /= 2) {
|
||||
if (col < s) {
|
||||
|
|
@ -84,13 +85,17 @@ void topk(bool needs_bounds_check, const uint row) {
|
|||
}
|
||||
}
|
||||
|
||||
if (col < p.ncols_output && gl_GlobalInvocationID.x < p.orig_ncols) {
|
||||
if (col < p.k) {
|
||||
if (p.last_pass != 0) {
|
||||
const uint row_offset = row * p.ncols_output;
|
||||
data_d[row_offset + col] = dst_row[col].x;
|
||||
if (gl_GlobalInvocationID.x < p.ncols_input) {
|
||||
const uint row_offset = row * p.k;
|
||||
data_d[row_offset + col] = dst_row[col].x;
|
||||
}
|
||||
} else {
|
||||
const uint row_offset = row * p.orig_ncols + gl_WorkGroupID.x * p.ncols_output;
|
||||
data_t[row_offset + col] = dst_row[col];
|
||||
if (gl_WorkGroupID.x * p.k + col < p.ncols_output) {
|
||||
const uint row_offset = row * p.ncols_output + gl_WorkGroupID.x * p.k;
|
||||
data_t[row_offset + col] = dst_row[col];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@ layout (push_constant) uniform parameter {
|
|||
uint orig_ncols;
|
||||
uint ncols_input;
|
||||
uint ncols_output;
|
||||
uint k;
|
||||
uint nrows;
|
||||
uint first_pass;
|
||||
uint last_pass;
|
||||
|
|
@ -60,7 +61,7 @@ void topk(const uint row) {
|
|||
const uint row_offset = row * p.ncols_input;
|
||||
dst_row[tid] = ivec2(gl_GlobalInvocationID.x, floatBitsToInt(data_a[row_offset + gl_GlobalInvocationID.x]));
|
||||
} else {
|
||||
const uint row_offset = row * p.orig_ncols;
|
||||
const uint row_offset = row * p.ncols_input;
|
||||
dst_row[tid] = data_s[row_offset + gl_GlobalInvocationID.x];
|
||||
}
|
||||
} else {
|
||||
|
|
@ -68,7 +69,7 @@ void topk(const uint row) {
|
|||
}
|
||||
barrier();
|
||||
|
||||
if (p.ncols_output == 1) {
|
||||
if (p.k == 1) {
|
||||
// Fast path for single output - just do a max reduction
|
||||
[[unroll]] for (int s = BLOCK_SIZE / 2; s >= 1; s /= 2) {
|
||||
if (tid < s) {
|
||||
|
|
@ -98,7 +99,7 @@ void topk(const uint row) {
|
|||
uint range_max = 0xFF800000;
|
||||
// How many are above the current range, and how many we need to find.
|
||||
uint total = 0;
|
||||
uint limit = min(p.ncols_output, p.ncols_input - gl_WorkGroupID.x * BLOCK_SIZE);
|
||||
uint limit = min(p.k, p.ncols_input - gl_WorkGroupID.x * BLOCK_SIZE);
|
||||
|
||||
while (mask != 0) {
|
||||
barrier();
|
||||
|
|
@ -139,7 +140,7 @@ void topk(const uint row) {
|
|||
range_max = range_min + ((min_idx + 1) << shift);
|
||||
range_min = range_min + (min_idx << shift);
|
||||
|
||||
if (total == p.ncols_output) {
|
||||
if (total == p.k) {
|
||||
break;
|
||||
}
|
||||
total -= counts[min_idx];
|
||||
|
|
@ -179,13 +180,17 @@ void topk(const uint row) {
|
|||
barrier();
|
||||
}
|
||||
|
||||
if (tid < p.ncols_output && gl_GlobalInvocationID.x < p.orig_ncols) {
|
||||
if (tid < p.k) {
|
||||
if (p.last_pass != 0) {
|
||||
const uint row_offset = row * p.ncols_output;
|
||||
data_d[row_offset + tid] = dst_row[tid].x;
|
||||
if (gl_GlobalInvocationID.x < p.ncols_input) {
|
||||
const uint row_offset = row * p.k;
|
||||
data_d[row_offset + tid] = dst_row[tid].x;
|
||||
}
|
||||
} else {
|
||||
const uint row_offset = row * p.orig_ncols + gl_WorkGroupID.x * p.ncols_output;
|
||||
data_t[row_offset + tid] = dst_row[tid];
|
||||
if (gl_WorkGroupID.x * p.k + tid < p.ncols_output) {
|
||||
const uint row_offset = row * p.ncols_output + gl_WorkGroupID.x * p.k;
|
||||
data_t[row_offset + tid] = dst_row[tid];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4891,6 +4891,8 @@ static struct ggml_tensor * ggml_interpolate_impl(
|
|||
int64_t ne3,
|
||||
uint32_t mode) {
|
||||
GGML_ASSERT((mode & 0xFF) < GGML_SCALE_MODE_COUNT);
|
||||
// TODO: implement antialias for modes other than bilinear
|
||||
GGML_ASSERT(!(mode & GGML_SCALE_FLAG_ANTIALIAS) || (mode & 0xFF) == GGML_SCALE_MODE_BILINEAR);
|
||||
|
||||
struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3);
|
||||
|
||||
|
|
|
|||
|
|
@ -175,6 +175,7 @@ class Keys:
|
|||
VALUE_LENGTH_MLA = "{arch}.attention.value_length_mla"
|
||||
SHARED_KV_LAYERS = "{arch}.attention.shared_kv_layers"
|
||||
SLIDING_WINDOW_PATTERN = "{arch}.attention.sliding_window_pattern"
|
||||
TEMPERATURE_SCALE = "{arch}.attention.temperature_scale"
|
||||
|
||||
class Rope:
|
||||
DIMENSION_COUNT = "{arch}.rope.dimension_count"
|
||||
|
|
@ -448,6 +449,7 @@ class MODEL_ARCH(IntEnum):
|
|||
MINIMAXM2 = auto()
|
||||
RND1 = auto()
|
||||
PANGU_EMBED = auto()
|
||||
MISTRAL3 = auto()
|
||||
|
||||
|
||||
class VISION_PROJECTOR_TYPE(IntEnum):
|
||||
|
|
@ -837,6 +839,7 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
|
|||
MODEL_ARCH.COGVLM: "cogvlm",
|
||||
MODEL_ARCH.RND1: "rnd1",
|
||||
MODEL_ARCH.PANGU_EMBED: "pangu-embedded",
|
||||
MODEL_ARCH.MISTRAL3: "mistral3",
|
||||
}
|
||||
|
||||
VISION_PROJECTOR_TYPE_NAMES: dict[VISION_PROJECTOR_TYPE, str] = {
|
||||
|
|
@ -3124,6 +3127,26 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|||
MODEL_TENSOR.FFN_DOWN,
|
||||
MODEL_TENSOR.FFN_UP,
|
||||
],
|
||||
MODEL_ARCH.MISTRAL3: [
|
||||
MODEL_TENSOR.TOKEN_EMBD,
|
||||
MODEL_TENSOR.OUTPUT_NORM,
|
||||
MODEL_TENSOR.OUTPUT,
|
||||
MODEL_TENSOR.ROPE_FREQS,
|
||||
MODEL_TENSOR.ATTN_NORM,
|
||||
MODEL_TENSOR.ATTN_Q,
|
||||
MODEL_TENSOR.ATTN_K,
|
||||
MODEL_TENSOR.ATTN_V,
|
||||
MODEL_TENSOR.ATTN_OUT,
|
||||
MODEL_TENSOR.ATTN_ROT_EMBD,
|
||||
MODEL_TENSOR.FFN_GATE_INP,
|
||||
MODEL_TENSOR.FFN_NORM,
|
||||
MODEL_TENSOR.FFN_GATE,
|
||||
MODEL_TENSOR.FFN_DOWN,
|
||||
MODEL_TENSOR.FFN_UP,
|
||||
MODEL_TENSOR.FFN_GATE_EXP,
|
||||
MODEL_TENSOR.FFN_DOWN_EXP,
|
||||
MODEL_TENSOR.FFN_UP_EXP,
|
||||
],
|
||||
# TODO
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -904,6 +904,9 @@ class GGUFWriter:
|
|||
def add_attn_temperature_length(self, value: int) -> None:
|
||||
self.add_uint32(Keys.Attention.TEMPERATURE_LENGTH.format(arch=self.arch), value)
|
||||
|
||||
def add_attn_temperature_scale(self, value: float) -> None:
|
||||
self.add_float32(Keys.Attention.TEMPERATURE_SCALE.format(arch=self.arch), value)
|
||||
|
||||
def add_pooling_type(self, value: PoolingType) -> None:
|
||||
self.add_uint32(Keys.LLM.POOLING_TYPE.format(arch=self.arch), value.value)
|
||||
|
||||
|
|
|
|||
|
|
@ -17,6 +17,8 @@ vendor = {
|
|||
"https://github.com/mackron/miniaudio/raw/669ed3e844524fcd883231b13095baee9f6de304/miniaudio.h": "vendor/miniaudio/miniaudio.h",
|
||||
|
||||
"https://raw.githubusercontent.com/yhirose/cpp-httplib/refs/tags/v0.28.0/httplib.h": "vendor/cpp-httplib/httplib.h",
|
||||
|
||||
"https://raw.githubusercontent.com/sheredom/subprocess.h/b49c56e9fe214488493021017bf3954b91c7c1f5/subprocess.h": "vendor/sheredom/subprocess.h",
|
||||
}
|
||||
|
||||
for url, filename in vendor.items():
|
||||
|
|
|
|||
|
|
@ -132,6 +132,7 @@ add_library(llama
|
|||
models/t5-enc.cpp
|
||||
models/wavtokenizer-dec.cpp
|
||||
models/xverse.cpp
|
||||
models/mistral3.cpp
|
||||
models/graph-context-mamba.cpp
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -111,6 +111,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
|
|||
{ LLM_ARCH_COGVLM, "cogvlm" },
|
||||
{ LLM_ARCH_RND1, "rnd1" },
|
||||
{ LLM_ARCH_PANGU_EMBED, "pangu-embedded" },
|
||||
{ LLM_ARCH_MISTRAL3, "mistral3" },
|
||||
{ LLM_ARCH_UNKNOWN, "(unknown)" },
|
||||
};
|
||||
|
||||
|
|
@ -204,6 +205,7 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
|
|||
{ LLM_KV_ATTENTION_SCALE, "%s.attention.scale" },
|
||||
{ LLM_KV_ATTENTION_OUTPUT_SCALE, "%s.attention.output_scale" },
|
||||
{ LLM_KV_ATTENTION_TEMPERATURE_LENGTH, "%s.attention.temperature_length" },
|
||||
{ LLM_KV_ATTENTION_TEMPERATURE_SCALE, "%s.attention.temperature_scale" },
|
||||
{ LLM_KV_ATTENTION_KEY_LENGTH_MLA, "%s.attention.key_length_mla" },
|
||||
{ LLM_KV_ATTENTION_VALUE_LENGTH_MLA, "%s.attention.value_length_mla" },
|
||||
|
||||
|
|
@ -853,7 +855,7 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
|
|||
{ LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" },
|
||||
{ LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
|
||||
{ LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
|
||||
{ LLM_TENSOR_SSM_A, "blk.%d.ssm_a" },
|
||||
{ LLM_TENSOR_SSM_A_NOSCAN, "blk.%d.ssm_a" },
|
||||
{ LLM_TENSOR_SSM_CONV1D, "blk.%d.ssm_conv1d" },
|
||||
{ LLM_TENSOR_SSM_DT, "blk.%d.ssm_dt" },
|
||||
{ LLM_TENSOR_SSM_BETA_ALPHA, "blk.%d.ssm_ba" },
|
||||
|
|
@ -2514,6 +2516,32 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
|
|||
{ LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
|
||||
},
|
||||
},
|
||||
{
|
||||
LLM_ARCH_MISTRAL3,
|
||||
{
|
||||
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
||||
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
|
||||
{ LLM_TENSOR_OUTPUT, "output" },
|
||||
{ LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
|
||||
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
|
||||
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
|
||||
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
|
||||
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
|
||||
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
|
||||
{ LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
|
||||
{ LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
|
||||
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
|
||||
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
|
||||
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
|
||||
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
|
||||
{ LLM_TENSOR_FFN_GATE_EXP, "blk.%d.ffn_gate.%d" },
|
||||
{ LLM_TENSOR_FFN_DOWN_EXP, "blk.%d.ffn_down.%d" },
|
||||
{ LLM_TENSOR_FFN_UP_EXP, "blk.%d.ffn_up.%d" },
|
||||
{ LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
|
||||
{ LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
|
||||
{ LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
|
||||
},
|
||||
},
|
||||
{
|
||||
LLM_ARCH_UNKNOWN,
|
||||
{
|
||||
|
|
@ -2613,6 +2641,7 @@ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
|
|||
{LLM_TENSOR_FFN_ACT, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_DIV}},
|
||||
{LLM_TENSOR_SSM_CONV1D, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_SSM_CONV}},
|
||||
{LLM_TENSOR_SSM_A, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_SSM_SCAN}},
|
||||
{LLM_TENSOR_SSM_A_NOSCAN, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}}, // a version of SSM_A used for MUL instead of SSM_SCAN
|
||||
{LLM_TENSOR_SSM_DT_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
|
||||
{LLM_TENSOR_SSM_B_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
|
||||
{LLM_TENSOR_SSM_C_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
|
||||
|
|
|
|||
|
|
@ -115,6 +115,7 @@ enum llm_arch {
|
|||
LLM_ARCH_COGVLM,
|
||||
LLM_ARCH_RND1,
|
||||
LLM_ARCH_PANGU_EMBED,
|
||||
LLM_ARCH_MISTRAL3,
|
||||
LLM_ARCH_UNKNOWN,
|
||||
};
|
||||
|
||||
|
|
@ -208,6 +209,7 @@ enum llm_kv {
|
|||
LLM_KV_ATTENTION_SCALE,
|
||||
LLM_KV_ATTENTION_OUTPUT_SCALE,
|
||||
LLM_KV_ATTENTION_TEMPERATURE_LENGTH,
|
||||
LLM_KV_ATTENTION_TEMPERATURE_SCALE,
|
||||
LLM_KV_ATTENTION_KEY_LENGTH_MLA,
|
||||
LLM_KV_ATTENTION_VALUE_LENGTH_MLA,
|
||||
|
||||
|
|
@ -377,6 +379,7 @@ enum llm_tensor {
|
|||
LLM_TENSOR_SSM_DT,
|
||||
LLM_TENSOR_SSM_DT_NORM,
|
||||
LLM_TENSOR_SSM_A,
|
||||
LLM_TENSOR_SSM_A_NOSCAN, // qwen3next special case with MUL instead of SSM_SCAN
|
||||
LLM_TENSOR_SSM_B_NORM,
|
||||
LLM_TENSOR_SSM_C_NORM,
|
||||
LLM_TENSOR_SSM_D,
|
||||
|
|
|
|||
|
|
@ -71,6 +71,9 @@ void llm_graph_input_attn_temp::set_input(const llama_ubatch * ubatch) {
|
|||
if (ubatch->pos && attn_scale) {
|
||||
const int64_t n_tokens = ubatch->n_tokens;
|
||||
|
||||
GGML_ASSERT(f_attn_temp_scale != 0.0f);
|
||||
GGML_ASSERT(n_attn_temp_floor_scale != 0);
|
||||
|
||||
std::vector<float> attn_scale_data(n_tokens, 0.0f);
|
||||
for (int i = 0; i < n_tokens; ++i) {
|
||||
const float pos = ubatch->pos[i];
|
||||
|
|
@ -810,9 +813,6 @@ ggml_tensor * llm_graph_context::build_ffn(
|
|||
GGML_ABORT("fatal error");
|
||||
}
|
||||
|
||||
//expand here so that we can fuse ffn gate
|
||||
ggml_build_forward_expand(gf, cur);
|
||||
|
||||
if (gate && type_gate == LLM_FFN_PAR) {
|
||||
cur = ggml_mul(ctx0, cur, tmp);
|
||||
cb(cur, "ffn_gate_par", il);
|
||||
|
|
@ -1093,9 +1093,6 @@ ggml_tensor * llm_graph_context::build_moe_ffn(
|
|||
GGML_ABORT("fatal error");
|
||||
}
|
||||
|
||||
//expand here so that we can fuse ffn gate
|
||||
ggml_build_forward_expand(gf, cur);
|
||||
|
||||
experts = build_lora_mm_id(down_exps, cur, selected_experts); // [n_embd, n_expert_used, n_tokens]
|
||||
cb(experts, "ffn_moe_down", il);
|
||||
|
||||
|
|
|
|||
|
|
@ -162,8 +162,8 @@ struct llama_hparams {
|
|||
// llama4 smallthinker
|
||||
uint32_t n_moe_layer_step = 0;
|
||||
uint32_t n_no_rope_layer_step = 4;
|
||||
uint32_t n_attn_temp_floor_scale = 8192;
|
||||
float f_attn_temp_scale = 0.1;
|
||||
uint32_t n_attn_temp_floor_scale = 0;
|
||||
float f_attn_temp_scale = 0.0f;
|
||||
|
||||
// gemma3n altup
|
||||
uint32_t n_altup = 4; // altup_num_inputs
|
||||
|
|
|
|||
|
|
@ -485,7 +485,7 @@ struct llama_mlock::impl {
|
|||
if (suggest && getrlimit(RLIMIT_MEMLOCK, &lock_limit)) {
|
||||
suggest = false;
|
||||
}
|
||||
if (suggest && (lock_limit.rlim_max > lock_limit.rlim_cur + size)) {
|
||||
if (suggest && ((uint64_t)lock_limit.rlim_max > (uint64_t)lock_limit.rlim_cur + size)) {
|
||||
suggest = false;
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -663,8 +663,10 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
|||
hparams.swa_type = LLAMA_SWA_TYPE_NONE;
|
||||
hparams.n_no_rope_layer_step = hparams.n_layer; // always use rope
|
||||
} else {
|
||||
hparams.swa_type = LLAMA_SWA_TYPE_CHUNKED;
|
||||
hparams.n_swa = 8192;
|
||||
hparams.swa_type = LLAMA_SWA_TYPE_CHUNKED;
|
||||
hparams.n_swa = 8192;
|
||||
hparams.n_attn_temp_floor_scale = 8192;
|
||||
hparams.f_attn_temp_scale = 0.1f;
|
||||
hparams.set_swa_pattern(4); // pattern: 3 chunked - 1 full
|
||||
}
|
||||
|
||||
|
|
@ -2251,6 +2253,42 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
|||
default: type = LLM_TYPE_UNKNOWN;
|
||||
}
|
||||
} break;
|
||||
case LLM_ARCH_MISTRAL3:
|
||||
{
|
||||
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
|
||||
ml.get_key(LLM_KV_ATTENTION_TEMPERATURE_SCALE, hparams.f_attn_temp_scale, false);
|
||||
|
||||
ml.get_key(LLM_KV_ROPE_SCALING_YARN_BETA_FAST, hparams.yarn_beta_fast, false);
|
||||
ml.get_key(LLM_KV_ROPE_SCALING_YARN_BETA_SLOW, hparams.yarn_beta_slow, false);
|
||||
ml.get_key(LLM_KV_ROPE_SCALING_YARN_LOG_MUL, hparams.rope_yarn_log_mul, false);
|
||||
|
||||
// TODO: maybe add n_attn_temp_floor_scale as a separate KV?
|
||||
if (hparams.f_attn_temp_scale != 0.0f) {
|
||||
hparams.n_attn_temp_floor_scale = hparams.n_ctx_orig_yarn;
|
||||
if (hparams.n_attn_temp_floor_scale == 0) {
|
||||
throw std::runtime_error("invalid n_ctx_orig_yarn for attention temperature scaling");
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: this seems to be correct with the case of mscale == mscale_all_dims == 1.0f
|
||||
// but may need further verification with other values
|
||||
if (hparams.rope_yarn_log_mul != 0.0f) {
|
||||
float factor = 1.0f / hparams.rope_freq_scale_train;
|
||||
float mscale = 1.0f;
|
||||
float mscale_all_dims = hparams.rope_yarn_log_mul;
|
||||
static auto get_mscale = [](float scale, float mscale) {
|
||||
return scale <= 1.0f ? 1.0f : (0.1f * mscale * logf(scale) + 1.0f);
|
||||
};
|
||||
hparams.yarn_attn_factor = get_mscale(factor, mscale) / get_mscale(factor, mscale_all_dims);
|
||||
}
|
||||
|
||||
switch (hparams.n_layer) {
|
||||
case 26: type = LLM_TYPE_3B; break;
|
||||
case 34: type = LLM_TYPE_8B; break;
|
||||
case 40: type = LLM_TYPE_14B; break;
|
||||
default: type = LLM_TYPE_UNKNOWN;
|
||||
}
|
||||
} break;
|
||||
default: throw std::runtime_error("unsupported model architecture");
|
||||
}
|
||||
|
||||
|
|
@ -2564,6 +2602,7 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
|
|||
case LLM_ARCH_MINICPM:
|
||||
case LLM_ARCH_GRANITE:
|
||||
case LLM_ARCH_GRANITE_MOE:
|
||||
case LLM_ARCH_MISTRAL3:
|
||||
{
|
||||
tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0);
|
||||
|
||||
|
|
@ -6521,7 +6560,7 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
|
|||
layer.ssm_in = create_tensor(tn(LLM_TENSOR_SSM_IN, "weight", i), { n_embd, qkvz_dim }, 0);
|
||||
layer.ssm_conv1d = create_tensor(tn(LLM_TENSOR_SSM_CONV1D, "weight", i), { hparams.ssm_d_conv, conv_dim }, 0);
|
||||
layer.ssm_dt = create_tensor(tn(LLM_TENSOR_SSM_DT, "bias", i), { hparams.ssm_dt_rank }, 0);
|
||||
layer.ssm_a = create_tensor(tn(LLM_TENSOR_SSM_A, i), { hparams.ssm_dt_rank }, 0);
|
||||
layer.ssm_a = create_tensor(tn(LLM_TENSOR_SSM_A_NOSCAN, i), { hparams.ssm_dt_rank }, 0);
|
||||
layer.ssm_beta_alpha = create_tensor(tn(LLM_TENSOR_SSM_BETA_ALPHA, "weight", i), { n_embd, ba_dim }, 0);
|
||||
layer.ssm_norm = create_tensor(tn(LLM_TENSOR_SSM_NORM, "weight", i), { head_v_dim }, 0);
|
||||
layer.ssm_out = create_tensor(tn(LLM_TENSOR_SSM_OUT, "weight", i), { value_dim, n_embd }, 0);
|
||||
|
|
@ -7556,6 +7595,10 @@ ggml_cgraph * llama_model::build_graph(const llm_graph_params & params) const {
|
|||
{
|
||||
llm = std::make_unique<llm_build_qwen3next>(*this, params);
|
||||
} break;
|
||||
case LLM_ARCH_MISTRAL3:
|
||||
{
|
||||
llm = std::make_unique<llm_build_mistral3>(*this, params);
|
||||
} break;
|
||||
default:
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
|
|
@ -7724,6 +7767,7 @@ llama_rope_type llama_model_rope_type(const llama_model * model) {
|
|||
case LLM_ARCH_ARCEE:
|
||||
case LLM_ARCH_ERNIE4_5:
|
||||
case LLM_ARCH_ERNIE4_5_MOE:
|
||||
case LLM_ARCH_MISTRAL3:
|
||||
return LLAMA_ROPE_TYPE_NORM;
|
||||
|
||||
// the pairs of head values are offset by n_rot/2
|
||||
|
|
|
|||
|
|
@ -0,0 +1,160 @@
|
|||
#include "models.h"
|
||||
|
||||
llm_build_mistral3::llm_build_mistral3(const llama_model & model, const llm_graph_params & params) : llm_graph_context(params) {
|
||||
const int64_t n_embd_head = hparams.n_embd_head_v;
|
||||
|
||||
GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
|
||||
GGML_ASSERT(n_embd_head == hparams.n_rot);
|
||||
|
||||
ggml_tensor * cur;
|
||||
ggml_tensor * inpL;
|
||||
|
||||
inpL = build_inp_embd(model.tok_embd);
|
||||
|
||||
// inp_pos - contains the positions
|
||||
ggml_tensor * inp_pos = build_inp_pos();
|
||||
|
||||
// (optional) temperature tuning
|
||||
ggml_tensor * inp_attn_scale = nullptr;
|
||||
if (hparams.f_attn_temp_scale != 0.0f) {
|
||||
inp_attn_scale = build_inp_attn_scale();
|
||||
}
|
||||
|
||||
auto * inp_attn = build_attn_inp_kv();
|
||||
|
||||
const float kq_scale = hparams.f_attention_scale == 0.0f ? 1.0f/sqrtf(float(n_embd_head)) : hparams.f_attention_scale;
|
||||
|
||||
ggml_tensor * inp_out_ids = build_inp_out_ids();
|
||||
|
||||
for (int il = 0; il < n_layer; ++il) {
|
||||
ggml_tensor * inpSA = inpL;
|
||||
|
||||
// norm
|
||||
cur = build_norm(inpL,
|
||||
model.layers[il].attn_norm, NULL,
|
||||
LLM_NORM_RMS, il);
|
||||
cb(cur, "attn_norm", il);
|
||||
|
||||
// self-attention
|
||||
{
|
||||
// rope freq factors for llama3; may return nullptr for llama2 and other models
|
||||
ggml_tensor * rope_factors = model.get_rope_factors(cparams, il);
|
||||
|
||||
// compute Q and K and RoPE them
|
||||
ggml_tensor * Qcur = build_lora_mm(model.layers[il].wq, cur);
|
||||
cb(Qcur, "Qcur", il);
|
||||
if (model.layers[il].bq) {
|
||||
Qcur = ggml_add(ctx0, Qcur, model.layers[il].bq);
|
||||
cb(Qcur, "Qcur", il);
|
||||
}
|
||||
ggml_tensor * Kcur = build_lora_mm(model.layers[il].wk, cur);
|
||||
cb(Kcur, "Kcur", il);
|
||||
if (model.layers[il].bk) {
|
||||
Kcur = ggml_add(ctx0, Kcur, model.layers[il].bk);
|
||||
cb(Kcur, "Kcur", il);
|
||||
}
|
||||
ggml_tensor * Vcur = build_lora_mm(model.layers[il].wv, cur);
|
||||
cb(Vcur, "Vcur", il);
|
||||
if (model.layers[il].bv) {
|
||||
Vcur = ggml_add(ctx0, Vcur, model.layers[il].bv);
|
||||
cb(Vcur, "Vcur", il);
|
||||
}
|
||||
Qcur = ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens);
|
||||
Kcur = ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens);
|
||||
Vcur = ggml_reshape_3d(ctx0, Vcur, n_embd_head, n_head_kv, n_tokens);
|
||||
|
||||
Qcur = ggml_rope_ext(
|
||||
ctx0, Qcur, inp_pos, rope_factors,
|
||||
n_rot, rope_type, n_ctx_orig, freq_base, freq_scale,
|
||||
ext_factor, attn_factor, beta_fast, beta_slow
|
||||
);
|
||||
|
||||
Kcur = ggml_rope_ext(
|
||||
ctx0, Kcur, inp_pos, rope_factors,
|
||||
n_rot, rope_type, n_ctx_orig, freq_base, freq_scale,
|
||||
ext_factor, attn_factor, beta_fast, beta_slow
|
||||
);
|
||||
|
||||
cb(Qcur, "Qcur", il);
|
||||
cb(Kcur, "Kcur", il);
|
||||
cb(Vcur, "Vcur", il);
|
||||
|
||||
if (inp_attn_scale) {
|
||||
// apply llama 4 temperature scaling
|
||||
Qcur = ggml_mul(ctx0, Qcur, inp_attn_scale);
|
||||
cb(Qcur, "Qcur_attn_temp_scaled", il);
|
||||
}
|
||||
|
||||
cur = build_attn(inp_attn,
|
||||
model.layers[il].wo, model.layers[il].bo,
|
||||
Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, kq_scale, il);
|
||||
cb(cur, "attn_out", il);
|
||||
}
|
||||
if (il == n_layer - 1 && inp_out_ids) {
|
||||
cur = ggml_get_rows(ctx0, cur, inp_out_ids);
|
||||
inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids);
|
||||
}
|
||||
ggml_tensor * ffn_inp = ggml_add(ctx0, cur, inpSA);
|
||||
cb(ffn_inp, "ffn_inp", il);
|
||||
|
||||
// feed-forward network (non-MoE)
|
||||
if (model.layers[il].ffn_gate_inp == nullptr) {
|
||||
|
||||
cur = build_norm(ffn_inp,
|
||||
model.layers[il].ffn_norm, NULL,
|
||||
LLM_NORM_RMS, il);
|
||||
cb(cur, "ffn_norm", il);
|
||||
|
||||
cur = build_ffn(cur,
|
||||
model.layers[il].ffn_up, model.layers[il].ffn_up_b, NULL,
|
||||
model.layers[il].ffn_gate, model.layers[il].ffn_gate_b, NULL,
|
||||
model.layers[il].ffn_down, model.layers[il].ffn_down_b, NULL,
|
||||
NULL,
|
||||
LLM_FFN_SILU, LLM_FFN_PAR, il);
|
||||
cb(cur, "ffn_out", il);
|
||||
} else {
|
||||
// MoE branch
|
||||
cur = build_norm(ffn_inp,
|
||||
model.layers[il].ffn_norm, NULL,
|
||||
LLM_NORM_RMS, il);
|
||||
cb(cur, "ffn_norm", il);
|
||||
|
||||
cur = build_moe_ffn(cur,
|
||||
model.layers[il].ffn_gate_inp,
|
||||
model.layers[il].ffn_up_exps,
|
||||
model.layers[il].ffn_gate_exps,
|
||||
model.layers[il].ffn_down_exps,
|
||||
nullptr,
|
||||
n_expert, n_expert_used,
|
||||
LLM_FFN_SILU, true,
|
||||
false, 0.0,
|
||||
LLAMA_EXPERT_GATING_FUNC_TYPE_SOFTMAX,
|
||||
il);
|
||||
cb(cur, "ffn_moe_out", il);
|
||||
}
|
||||
cur = ggml_add(ctx0, cur, ffn_inp);
|
||||
cb(cur, "ffn_out", il);
|
||||
|
||||
cur = build_cvec(cur, il);
|
||||
cb(cur, "l_out", il);
|
||||
|
||||
// input for next layer
|
||||
inpL = cur;
|
||||
}
|
||||
cur = inpL;
|
||||
|
||||
cur = build_norm(cur,
|
||||
model.output_norm, NULL,
|
||||
LLM_NORM_RMS, -1);
|
||||
|
||||
cb(cur, "result_norm", -1);
|
||||
res->t_embd = cur;
|
||||
|
||||
// lm_head
|
||||
cur = build_lora_mm(model.output, cur);
|
||||
|
||||
cb(cur, "result_output", -1);
|
||||
res->t_logits = cur;
|
||||
|
||||
ggml_build_forward_expand(gf, cur);
|
||||
}
|
||||
|
|
@ -322,6 +322,10 @@ struct llm_build_minimax_m2 : public llm_graph_context {
|
|||
llm_build_minimax_m2(const llama_model & model, const llm_graph_params & params);
|
||||
};
|
||||
|
||||
struct llm_build_mistral3 : public llm_graph_context {
|
||||
llm_build_mistral3(const llama_model & model, const llm_graph_params & params);
|
||||
};
|
||||
|
||||
struct llm_build_mpt : public llm_graph_context {
|
||||
llm_build_mpt(const llama_model & model, const llm_graph_params & params);
|
||||
};
|
||||
|
|
|
|||
|
|
@ -7660,7 +7660,7 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
|||
// test_cases.emplace_back(new test_top_k(GGML_TYPE_F32, {i, 2, 1, 3}, rand() % i + 1));
|
||||
//}
|
||||
|
||||
for (ggml_scale_mode mode : {GGML_SCALE_MODE_NEAREST, GGML_SCALE_MODE_BILINEAR, GGML_SCALE_MODE_BICUBIC}) {
|
||||
for (ggml_scale_mode mode : {GGML_SCALE_MODE_NEAREST, GGML_SCALE_MODE_BILINEAR, GGML_SCALE_MODE_BICUBIC, ggml_scale_mode(GGML_SCALE_MODE_BILINEAR | GGML_SCALE_FLAG_ANTIALIAS)}) {
|
||||
test_cases.emplace_back(new test_upscale(GGML_TYPE_F32, {512, 512, 3, 2}, 2, mode));
|
||||
test_cases.emplace_back(new test_upscale(GGML_TYPE_F32, {512, 512, 3, 2}, 2, mode, true));
|
||||
test_cases.emplace_back(new test_interpolate(GGML_TYPE_F32, {2, 5, 7, 11}, {5, 7, 11, 13}, mode));
|
||||
|
|
|
|||
|
|
@ -1375,7 +1375,7 @@ int main() {
|
|||
try {
|
||||
tc.verify(json_schema_to_grammar(nlohmann::ordered_json::parse(tc.schema), true));
|
||||
tc.verify_status(SUCCESS);
|
||||
} catch (const std::runtime_error & ex) {
|
||||
} catch (const std::invalid_argument & ex) {
|
||||
fprintf(stderr, "Error: %s\n", ex.what());
|
||||
tc.verify_status(FAILURE);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@
|
|||
#endif
|
||||
|
||||
struct quantize_stats_params {
|
||||
std::string model = DEFAULT_MODEL_PATH;
|
||||
std::string model = "models/7B/ggml-model-f16.gguf";
|
||||
bool verbose = false;
|
||||
bool per_layer_stats = false;
|
||||
bool print_histogram = false;
|
||||
|
|
|
|||
|
|
@ -521,6 +521,12 @@ int main(int argc, char ** argv) {
|
|||
is_interacting = params.interactive_first;
|
||||
}
|
||||
|
||||
LOG_WRN("*****************************\n");
|
||||
LOG_WRN("IMPORTANT: The current llama-cli will be moved to llama-completion in the near future\n");
|
||||
LOG_WRN(" New llama-cli will have enhanced features and improved user experience\n");
|
||||
LOG_WRN(" More info: https://github.com/ggml-org/llama.cpp/discussions/17618\n");
|
||||
LOG_WRN("*****************************\n");
|
||||
|
||||
bool is_antiprompt = false;
|
||||
bool input_echo = true;
|
||||
bool display = true;
|
||||
|
|
|
|||
|
|
@ -1263,12 +1263,20 @@ struct clip_graph {
|
|||
cur = ggml_mul_mat(ctx0, layer.qkv_w, cur);
|
||||
cur = ggml_add(ctx0, cur, layer.qkv_b);
|
||||
|
||||
ggml_tensor * Qcur = ggml_view_3d(ctx0, cur, d_head, n_head, n_pos, d_head*sizeof(float),
|
||||
cur->nb[1], 0);
|
||||
ggml_tensor * Kcur = ggml_view_3d(ctx0, cur, d_head, n_head, n_pos, d_head*sizeof(float),
|
||||
cur->nb[1], n_embd * sizeof(float));
|
||||
ggml_tensor * Vcur = ggml_view_3d(ctx0, cur, d_head, n_head, n_pos, d_head*sizeof(float),
|
||||
cur->nb[1], 2 * n_embd * sizeof(float));
|
||||
ggml_tensor * Qcur = ggml_view_3d(ctx0, cur, d_head, n_head, n_pos,
|
||||
/* nb1 */ ggml_row_size(cur->type, d_head),
|
||||
/* nb2 */ cur->nb[1],
|
||||
/* offset */ 0);
|
||||
|
||||
ggml_tensor * Kcur = ggml_view_3d(ctx0, cur, d_head, n_head, n_pos,
|
||||
/* nb1 */ ggml_row_size(cur->type, d_head),
|
||||
/* nb2 */ cur->nb[1],
|
||||
/* offset */ ggml_row_size(cur->type, n_embd));
|
||||
|
||||
ggml_tensor * Vcur = ggml_view_3d(ctx0, cur, d_head, n_head, n_pos,
|
||||
/* nb1 */ ggml_row_size(cur->type, d_head),
|
||||
/* nb2 */ cur->nb[1],
|
||||
/* offset */ ggml_row_size(cur->type, 2 * n_embd));
|
||||
|
||||
cb(Qcur, "Qcur", il);
|
||||
cb(Kcur, "Kcur", il);
|
||||
|
|
@ -2344,7 +2352,7 @@ private:
|
|||
ggml_tensor * pos_embd = model.position_embeddings;
|
||||
const int height = img.ny / patch_size;
|
||||
const int width = img.nx / patch_size;
|
||||
const uint32_t mode = GGML_SCALE_MODE_BILINEAR;
|
||||
const uint32_t mode = GGML_SCALE_MODE_BILINEAR | GGML_SCALE_FLAG_ANTIALIAS;
|
||||
const int n_per_side = (int)std::sqrt(pos_embd->ne[1]);
|
||||
|
||||
GGML_ASSERT(pos_embd);
|
||||
|
|
@ -3331,7 +3339,8 @@ struct clip_model_loader {
|
|||
{
|
||||
get_u32(KEY_PROJ_SCALE_FACTOR, hparams.n_merge, false);
|
||||
// ref: https://huggingface.co/LiquidAI/LFM2-VL-3B/blob/main/preprocessor_config.json
|
||||
hparams.set_limit_image_tokens(64, 256);
|
||||
// config above specifies number of tokens after downsampling, while here it is before, relax lowerbound to 64
|
||||
hparams.set_limit_image_tokens(64, 1024);
|
||||
} break;
|
||||
case PROJECTOR_TYPE_PIXTRAL:
|
||||
case PROJECTOR_TYPE_LIGHTONOCR:
|
||||
|
|
@ -4106,14 +4115,18 @@ struct clip_init_result clip_init(const char * fname, struct clip_context_params
|
|||
ctx_vision = new clip_ctx(ctx_params);
|
||||
loader.load_hparams(ctx_vision->model, CLIP_MODALITY_VISION);
|
||||
loader.load_tensors(*ctx_vision);
|
||||
loader.warmup(*ctx_vision);
|
||||
if (ctx_params.warmup) {
|
||||
loader.warmup(*ctx_vision);
|
||||
}
|
||||
}
|
||||
|
||||
if (loader.has_audio) {
|
||||
ctx_audio = new clip_ctx(ctx_params);
|
||||
loader.load_hparams(ctx_audio->model, CLIP_MODALITY_AUDIO);
|
||||
loader.load_tensors(*ctx_audio);
|
||||
loader.warmup(*ctx_audio);
|
||||
if (ctx_params.warmup) {
|
||||
loader.warmup(*ctx_audio);
|
||||
}
|
||||
}
|
||||
|
||||
} catch (const std::exception & e) {
|
||||
|
|
@ -4333,12 +4346,13 @@ struct img_tool {
|
|||
const int width = inp_size.width;
|
||||
const int height = inp_size.height;
|
||||
|
||||
auto round_by_factor = [f = align_size](float x) { return static_cast<int>(std::round(x / static_cast<float>(f))) * f; };
|
||||
auto ceil_by_factor = [f = align_size](float x) { return static_cast<int>(std::ceil(x / static_cast<float>(f))) * f; };
|
||||
auto floor_by_factor = [f = align_size](float x) { return static_cast<int>(std::floor(x / static_cast<float>(f))) * f; };
|
||||
|
||||
// always align up first
|
||||
int h_bar = std::max(align_size, ceil_by_factor(height));
|
||||
int w_bar = std::max(align_size, ceil_by_factor(width));
|
||||
int h_bar = std::max(align_size, round_by_factor(height));
|
||||
int w_bar = std::max(align_size, round_by_factor(width));
|
||||
|
||||
if (h_bar * w_bar > max_pixels) {
|
||||
const auto beta = std::sqrt(static_cast<float>(height * width) / max_pixels);
|
||||
|
|
@ -5209,7 +5223,8 @@ bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, str
|
|||
const std::array<uint8_t, 3> pad_color = {122, 116, 104};
|
||||
|
||||
clip_image_u8 resized_img;
|
||||
img_tool::resize(*img, resized_img, target_size, img_tool::RESIZE_ALGO_BILINEAR, true, pad_color);
|
||||
const bool pad = (ctx->proj_type() != PROJECTOR_TYPE_LFM2);
|
||||
img_tool::resize(*img, resized_img, target_size, img_tool::RESIZE_ALGO_BILINEAR, pad, pad_color);
|
||||
clip_image_f32_ptr res(clip_image_f32_init());
|
||||
normalize_image_u8_to_f32(resized_img, *res, params.image_mean, params.image_std);
|
||||
res_imgs->entries.push_back(std::move(res));
|
||||
|
|
|
|||
|
|
@ -44,6 +44,7 @@ struct clip_context_params {
|
|||
enum clip_flash_attn_type flash_attn_type;
|
||||
int image_min_tokens;
|
||||
int image_max_tokens;
|
||||
bool warmup;
|
||||
enum clip_dsocr_mode dsocr_mode;
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -136,6 +136,7 @@ struct mtmd_cli_context {
|
|||
mparams.print_timings = true;
|
||||
mparams.n_threads = params.cpuparams.n_threads;
|
||||
mparams.flash_attn_type = params.flash_attn_type;
|
||||
mparams.warmup = params.warmup;
|
||||
mparams.image_min_tokens = params.image_min_tokens;
|
||||
mparams.image_max_tokens = params.image_max_tokens;
|
||||
mparams.dsocr_mode = params.dsocr_mode.c_str();
|
||||
|
|
|
|||
|
|
@ -108,6 +108,7 @@ mtmd_context_params mtmd_context_params_default() {
|
|||
/* image_marker */ MTMD_DEFAULT_IMAGE_MARKER,
|
||||
/* media_marker */ mtmd_default_marker(),
|
||||
/* flash_attn_type */ LLAMA_FLASH_ATTN_TYPE_AUTO,
|
||||
/* warmup */ true,
|
||||
/* image_min_tokens */ -1,
|
||||
/* image_max_tokens */ -1,
|
||||
/* dsocr_mode */ "auto",
|
||||
|
|
@ -198,6 +199,7 @@ struct mtmd_context {
|
|||
/* flash_attn_type */ CLIP_FLASH_ATTN_TYPE_AUTO,
|
||||
/* image_min_tokens */ ctx_params.image_min_tokens,
|
||||
/* image_max_tokens */ ctx_params.image_max_tokens,
|
||||
/* warmup */ ctx_params.warmup,
|
||||
/* dsocr_mode */ dsocr_mode,
|
||||
};
|
||||
|
||||
|
|
@ -326,6 +328,10 @@ struct mtmd_context {
|
|||
img_beg = "<|im_start|>";
|
||||
img_end = "<|im_end|>";
|
||||
|
||||
} else if (proj == PROJECTOR_TYPE_LFM2) {
|
||||
img_beg = "<|image_start|>";
|
||||
img_end = "<|image_end|>";
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -82,6 +82,7 @@ struct mtmd_context_params {
|
|||
const char * image_marker; // deprecated, use media_marker instead
|
||||
const char * media_marker;
|
||||
enum llama_flash_attn_type flash_attn_type;
|
||||
bool warmup; // whether to run a warmup encode pass after initialization
|
||||
|
||||
// limit number of image tokens, only for vision models with dynamic resolution
|
||||
int image_min_tokens; // minimum number of tokens for image input (default: read from metadata)
|
||||
|
|
|
|||
|
|
@ -15,6 +15,8 @@ set(TARGET_SRCS
|
|||
server.cpp
|
||||
server-http.cpp
|
||||
server-http.h
|
||||
server-models.cpp
|
||||
server-models.h
|
||||
server-task.cpp
|
||||
server-task.h
|
||||
server-queue.cpp
|
||||
|
|
|
|||
|
|
@ -52,7 +52,7 @@ The project is under active development, and we are [looking for feedback and co
|
|||
| `-ub, --ubatch-size N` | physical maximum batch size (default: 512)<br/>(env: LLAMA_ARG_UBATCH) |
|
||||
| `--keep N` | number of tokens to keep from the initial prompt (default: 0, -1 = all) |
|
||||
| `--swa-full` | use full-size SWA cache (default: false)<br/>[(more info)](https://github.com/ggml-org/llama.cpp/pull/13194#issuecomment-2868343055)<br/>(env: LLAMA_ARG_SWA_FULL) |
|
||||
| `--kv-unified, -kvu` | use single unified KV buffer for the KV cache of all sequences (default: false)<br/>[(more info)](https://github.com/ggml-org/llama.cpp/pull/14363)<br/>(env: LLAMA_ARG_KV_SPLIT) |
|
||||
| `--kv-unified, -kvu` | use single unified KV buffer for the KV cache of all sequences (default: false)<br/>[(more info)](https://github.com/ggml-org/llama.cpp/pull/14363)<br/>(env: LLAMA_ARG_KV_UNIFIED) |
|
||||
| `-fa, --flash-attn [on\|off\|auto]` | set Flash Attention use ('on', 'off', or 'auto', default: 'auto')<br/>(env: LLAMA_ARG_FLASH_ATTN) |
|
||||
| `--no-perf` | disable internal libllama performance timings (default: false)<br/>(env: LLAMA_ARG_NO_PERF) |
|
||||
| `-e, --escape` | process escapes sequences (\n, \r, \t, \', \", \\) (default: true) |
|
||||
|
|
@ -93,7 +93,7 @@ The project is under active development, and we are [looking for feedback and co
|
|||
| `--control-vector FNAME` | add a control vector<br/>note: this argument can be repeated to add multiple control vectors |
|
||||
| `--control-vector-scaled FNAME SCALE` | add a control vector with user defined scaling SCALE<br/>note: this argument can be repeated to add multiple scaled control vectors |
|
||||
| `--control-vector-layer-range START END` | layer range to apply the control vector(s) to, start and end inclusive |
|
||||
| `-m, --model FNAME` | model path (default: `models/$filename` with filename from `--hf-file` or `--model-url` if set, otherwise models/7B/ggml-model-f16.gguf)<br/>(env: LLAMA_ARG_MODEL) |
|
||||
| `-m, --model FNAME` | model path to load<br/>(env: LLAMA_ARG_MODEL) |
|
||||
| `-mu, --model-url MODEL_URL` | model download url (default: unused)<br/>(env: LLAMA_ARG_MODEL_URL) |
|
||||
| `-dr, --docker-repo [<repo>/]<model>[:quant]` | Docker Hub model repository. repo is optional, default to ai/. quant is optional, default to :latest.<br/>example: gemma3<br/>(default: unused)<br/>(env: LLAMA_ARG_DOCKER_REPO) |
|
||||
| `-hf, -hfr, --hf-repo <user>/<model>[:quant]` | Hugging Face model repository; quant is optional, case-insensitive, default to Q4_K_M, or falls back to the first file in the repo if Q4_K_M doesn't exist.<br/>mmproj is also downloaded automatically if available. to disable, add --no-mmproj<br/>example: unsloth/phi-4-GGUF:q4_k_m<br/>(default: unused)<br/>(env: LLAMA_ARG_HF_REPO) |
|
||||
|
|
@ -103,11 +103,11 @@ The project is under active development, and we are [looking for feedback and co
|
|||
| `-hffv, --hf-file-v FILE` | Hugging Face model file for the vocoder model (default: unused)<br/>(env: LLAMA_ARG_HF_FILE_V) |
|
||||
| `-hft, --hf-token TOKEN` | Hugging Face access token (default: value from HF_TOKEN environment variable)<br/>(env: HF_TOKEN) |
|
||||
| `--log-disable` | Log disable |
|
||||
| `--log-file FNAME` | Log to file |
|
||||
| `--log-file FNAME` | Log to file<br/>(env: LLAMA_LOG_FILE) |
|
||||
| `--log-colors [on\|off\|auto]` | Set colored logging ('on', 'off', or 'auto', default: 'auto')<br/>'auto' enables colors when output is to a terminal<br/>(env: LLAMA_LOG_COLORS) |
|
||||
| `-v, --verbose, --log-verbose` | Set verbosity level to infinity (i.e. log all messages, useful for debugging) |
|
||||
| `--offline` | Offline mode: forces use of cache, prevents network access<br/>(env: LLAMA_OFFLINE) |
|
||||
| `-lv, --verbosity, --log-verbosity N` | Set the verbosity threshold. Messages with a higher verbosity will be ignored.<br/>(env: LLAMA_LOG_VERBOSITY) |
|
||||
| `-lv, --verbosity, --log-verbosity N` | Set the verbosity threshold. Messages with a higher verbosity will be ignored. Values:<br/> - 0: generic output<br/> - 1: error<br/> - 2: warning<br/> - 3: info<br/> - 4: debug<br/>(default: 3)<br/><br/>(env: LLAMA_LOG_VERBOSITY) |
|
||||
| `--log-prefix` | Enable prefix in log messages<br/>(env: LLAMA_LOG_PREFIX) |
|
||||
| `--log-timestamps` | Enable timestamps in log messages<br/>(env: LLAMA_LOG_TIMESTAMPS) |
|
||||
| `-ctkd, --cache-type-k-draft TYPE` | KV cache data type for K for the draft model<br/>allowed values: f32, f16, bf16, q8_0, q4_0, q4_1, iq4_nl, q5_0, q5_1<br/>(default: f16)<br/>(env: LLAMA_ARG_CACHE_TYPE_K_DRAFT) |
|
||||
|
|
@ -196,6 +196,10 @@ The project is under active development, and we are [looking for feedback and co
|
|||
| `--slots` | enable slots monitoring endpoint (default: enabled)<br/>(env: LLAMA_ARG_ENDPOINT_SLOTS) |
|
||||
| `--no-slots` | disables slots monitoring endpoint<br/>(env: LLAMA_ARG_NO_ENDPOINT_SLOTS) |
|
||||
| `--slot-save-path PATH` | path to save slot kv cache (default: disabled) |
|
||||
| `--models-dir PATH` | directory containing models for the router server (default: disabled)<br/>(env: LLAMA_ARG_MODELS_DIR) |
|
||||
| `--models-max N` | for router server, maximum number of models to load simultaneously (default: 4, 0 = unlimited)<br/>(env: LLAMA_ARG_MODELS_MAX) |
|
||||
| `--models-allow-extra-args` | for router server, allow extra arguments for models; important: some arguments can allow users to access local file system, use with caution (default: disabled)<br/>(env: LLAMA_ARG_MODELS_ALLOW_EXTRA_ARGS) |
|
||||
| `--no-models-autoload` | disables automatic loading of models (default: enabled)<br/>(env: LLAMA_ARG_NO_MODELS_AUTOLOAD) |
|
||||
| `--jinja` | use jinja template for chat (default: enabled)<br/><br/>(env: LLAMA_ARG_JINJA) |
|
||||
| `--no-jinja` | disable jinja template for chat (default: enabled)<br/><br/>(env: LLAMA_ARG_NO_JINJA) |
|
||||
| `--reasoning-format FORMAT` | controls whether thought tags are allowed and/or extracted from the response, and in which format they're returned; one of:<br/>- none: leaves thoughts unparsed in `message.content`<br/>- deepseek: puts thoughts in `message.reasoning_content`<br/>- deepseek-legacy: keeps `<think>` tags in `message.content` while also populating `message.reasoning_content`<br/>(default: auto)<br/>(env: LLAMA_ARG_THINK) |
|
||||
|
|
@ -287,38 +291,66 @@ For more details, please refer to [multimodal documentation](../../docs/multimod
|
|||
|
||||
## Web UI
|
||||
|
||||
The project includes a web-based user interface that enables interaction with the model through the `/v1/chat/completions` endpoint.
|
||||
The project includes a web-based user interface for interacting with `llama-server`. It supports both single-model (`MODEL` mode) and multi-model (`ROUTER` mode) operation.
|
||||
|
||||
The web UI is developed using:
|
||||
- `react` framework for frontend development
|
||||
- `tailwindcss` and `daisyui` for styling
|
||||
- `vite` for build tooling
|
||||
### Features
|
||||
|
||||
A pre-built version is available as a single HTML file under `/public` directory.
|
||||
- **Chat interface** with streaming responses
|
||||
- **Multi-model support** (ROUTER mode) - switch between models, auto-load on selection
|
||||
- **Modality validation** - ensures selected model supports conversation's attachments (images, audio)
|
||||
- **Conversation management** - branching, regeneration, editing with history preservation
|
||||
- **Attachment support** - images, audio, PDFs (with vision/text fallback)
|
||||
- **Configurable parameters** - temperature, top_p, etc. synced with server defaults
|
||||
- **Dark/light theme**
|
||||
|
||||
To build or to run the dev server (with hot reload):
|
||||
### Tech Stack
|
||||
|
||||
- **SvelteKit** - frontend framework with Svelte 5 runes for reactive state
|
||||
- **TailwindCSS** + **shadcn-svelte** - styling and UI components
|
||||
- **Vite** - build tooling
|
||||
- **IndexedDB** (Dexie) - local storage for conversations
|
||||
- **LocalStorage** - user settings persistence
|
||||
|
||||
### Architecture
|
||||
|
||||
The WebUI follows a layered architecture:
|
||||
|
||||
```
|
||||
Routes → Components → Hooks → Stores → Services → Storage/API
|
||||
```
|
||||
|
||||
- **Stores** - reactive state management (`chatStore`, `conversationsStore`, `modelsStore`, `serverStore`, `settingsStore`)
|
||||
- **Services** - stateless API/database communication (`ChatService`, `ModelsService`, `PropsService`, `DatabaseService`)
|
||||
- **Hooks** - reusable logic (`useModelChangeValidation`, `useProcessingState`)
|
||||
|
||||
For detailed architecture diagrams, see [`tools/server/webui/docs/`](webui/docs/):
|
||||
|
||||
- `high-level-architecture.mmd` - full architecture with all modules
|
||||
- `high-level-architecture-simplified.mmd` - simplified overview
|
||||
- `data-flow-simplified-model-mode.mmd` - data flow for single-model mode
|
||||
- `data-flow-simplified-router-mode.mmd` - data flow for multi-model mode
|
||||
- `flows/*.mmd` - detailed per-domain flows (chat, conversations, models, etc.)
|
||||
|
||||
### Development
|
||||
|
||||
```sh
|
||||
# make sure you have nodejs installed
|
||||
# make sure you have Node.js installed
|
||||
cd tools/server/webui
|
||||
npm i
|
||||
|
||||
# to run the dev server
|
||||
# run dev server (with hot reload)
|
||||
npm run dev
|
||||
|
||||
# to build the public/index.html.gz
|
||||
# run tests
|
||||
npm run test
|
||||
|
||||
# build production bundle
|
||||
npm run build
|
||||
```
|
||||
After `public/index.html.gz` has been generated we need to generate the c++
|
||||
headers (like build/tools/server/index.html.gz.hpp) that will be included
|
||||
by server.cpp. This is done by building `llama-server` as described in the
|
||||
[build](#build) section above.
|
||||
|
||||
NOTE: if you are using the vite dev server, you can change the API base URL to llama.cpp. To do that, run this code snippet in browser's console:
|
||||
After `public/index.html.gz` has been generated, rebuild `llama-server` as described in the [build](#build) section to include the updated UI.
|
||||
|
||||
```js
|
||||
localStorage.setItem('base', 'http://localhost:8080')
|
||||
```
|
||||
**Note:** The Vite dev server automatically proxies API requests to `http://localhost:8080`. Make sure `llama-server` is running on that port during development.
|
||||
|
||||
## Quick Start
|
||||
|
||||
|
|
@ -1424,6 +1456,184 @@ curl http://localhost:8080/v1/messages/count_tokens \
|
|||
{"input_tokens": 10}
|
||||
```
|
||||
|
||||
## Using multiple models
|
||||
|
||||
`llama-server` can be launched in a **router mode** that exposes an API for dynamically loading and unloading models. The main process (the "router") automatically forwards each request to the appropriate model instance.
|
||||
|
||||
To start in router mode, launch `llama-server` **without specifying any model**:
|
||||
|
||||
```sh
|
||||
llama-server
|
||||
```
|
||||
|
||||
### Model sources
|
||||
|
||||
By default, the router looks for models in the cache. You can add Hugging Face models to the cache with:
|
||||
|
||||
```sh
|
||||
llama-server -hf <user>/<model>:<tag>
|
||||
```
|
||||
|
||||
*The server must be restarted after adding a new model.*
|
||||
|
||||
Alternatively, you can point the router to a local directory containing your GGUF files using `--models-dir`. Example command:
|
||||
|
||||
```sh
|
||||
llama-server --models-dir ./models_directory
|
||||
```
|
||||
|
||||
If the model contains multiple GGUF (for multimodal or multi-shard), files should be put into a subdirectory. The directory structure should look like this:
|
||||
|
||||
```sh
|
||||
models_directory
|
||||
│
|
||||
│ # single file
|
||||
├─ llama-3.2-1b-Q4_K_M.gguf
|
||||
├─ Qwen3-8B-Q4_K_M.gguf
|
||||
│
|
||||
│ # multimodal
|
||||
├─ gemma-3-4b-it-Q8_0
|
||||
│ ├─ gemma-3-4b-it-Q8_0.gguf
|
||||
│ └─ mmproj-F16.gguf # file name must start with "mmproj"
|
||||
│
|
||||
│ # multi-shard
|
||||
├─ Kimi-K2-Thinking-UD-IQ1_S
|
||||
│ ├─ Kimi-K2-Thinking-UD-IQ1_S-00001-of-00006.gguf
|
||||
│ ├─ Kimi-K2-Thinking-UD-IQ1_S-00002-of-00006.gguf
|
||||
│ ├─ ...
|
||||
│ └─ Kimi-K2-Thinking-UD-IQ1_S-00006-of-00006.gguf
|
||||
```
|
||||
|
||||
You may also specify default arguments that will be passed to every model instance:
|
||||
|
||||
```sh
|
||||
llama-server -ctx 8192 -n 1024 -np 2
|
||||
```
|
||||
|
||||
Note: model instances inherit both command line arguments and environment variables from the router server.
|
||||
|
||||
### Routing requests
|
||||
|
||||
Requests are routed according to the requested model name.
|
||||
|
||||
For **POST** endpoints (`/v1/chat/completions`, `/v1/completions`, `/infill`, etc.) The router uses the `"model"` field in the JSON body:
|
||||
|
||||
```json
|
||||
{
|
||||
"model": "ggml-org/gemma-3-4b-it-GGUF:Q4_K_M",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "hello"
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
For **GET** endpoints (`/props`, `/metrics`, etc.) The router uses the `model` query parameter (URL-encoded):
|
||||
|
||||
```
|
||||
GET /props?model=ggml-org%2Fgemma-3-4b-it-GGUF%3AQ4_K_M
|
||||
```
|
||||
|
||||
By default, the model will be loaded automatically if it's not loaded. To disable this, add `--no-models-autoload` when starting the server. Additionally, you can include `?autoload=true|false` in the query param to control this behavior per-request.
|
||||
|
||||
### GET `/models`: List available models
|
||||
|
||||
Listing all models in cache. The model metadata will also include a field to indicate the status of the model:
|
||||
|
||||
```json
|
||||
{
|
||||
"data": [{
|
||||
"id": "ggml-org/gemma-3-4b-it-GGUF:Q4_K_M",
|
||||
"in_cache": true,
|
||||
"path": "/Users/REDACTED/Library/Caches/llama.cpp/ggml-org_gemma-3-4b-it-GGUF_gemma-3-4b-it-Q4_K_M.gguf",
|
||||
"status": {
|
||||
"value": "loaded",
|
||||
"args": ["llama-server", "-ctx", "4096"]
|
||||
},
|
||||
...
|
||||
}]
|
||||
}
|
||||
```
|
||||
|
||||
Note: For a local GGUF (stored offline in a custom directory), the model object will have `"in_cache": false`.
|
||||
|
||||
The `status` object can be:
|
||||
|
||||
```json
|
||||
"status": {
|
||||
"value": "unloaded"
|
||||
}
|
||||
```
|
||||
|
||||
```json
|
||||
"status": {
|
||||
"value": "loading",
|
||||
"args": ["llama-server", "-ctx", "4096"]
|
||||
}
|
||||
```
|
||||
|
||||
```json
|
||||
"status": {
|
||||
"value": "unloaded",
|
||||
"args": ["llama-server", "-ctx", "4096"],
|
||||
"failed": true,
|
||||
"exit_code": 1
|
||||
}
|
||||
```
|
||||
|
||||
```json
|
||||
"status": {
|
||||
"value": "loaded",
|
||||
"args": ["llama-server", "-ctx", "4096"]
|
||||
}
|
||||
```
|
||||
|
||||
### POST `/models/load`: Load a model
|
||||
|
||||
Load a model
|
||||
|
||||
Payload:
|
||||
- `model`: name of the model to be loaded.
|
||||
- `extra_args`: (optional) an array of additional arguments to be passed to the model instance. Note: you must start the server with `--models-allow-extra-args` to enable this feature.
|
||||
|
||||
```json
|
||||
{
|
||||
"model": "ggml-org/gemma-3-4b-it-GGUF:Q4_K_M",
|
||||
"extra_args": ["-n", "128", "--top-k", "4"]
|
||||
}
|
||||
```
|
||||
|
||||
Response:
|
||||
|
||||
```json
|
||||
{
|
||||
"success": true
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
### POST `/models/unload`: Unload a model
|
||||
|
||||
Unload a model
|
||||
|
||||
Payload:
|
||||
|
||||
```json
|
||||
{
|
||||
"model": "ggml-org/gemma-3-4b-it-GGUF:Q4_K_M",
|
||||
}
|
||||
```
|
||||
|
||||
Response:
|
||||
|
||||
```json
|
||||
{
|
||||
"success": true
|
||||
}
|
||||
```
|
||||
|
||||
## More examples
|
||||
|
||||
### Interactive mode
|
||||
|
|
|
|||
Binary file not shown.
|
|
@ -819,26 +819,26 @@ json oaicompat_chat_params_parse(
|
|||
auto schema_wrapper = json_value(response_format, "json_schema", json::object());
|
||||
json_schema = json_value(schema_wrapper, "schema", json::object());
|
||||
} else if (!response_type.empty() && response_type != "text") {
|
||||
throw std::runtime_error("response_format type must be one of \"text\" or \"json_object\", but got: " + response_type);
|
||||
throw std::invalid_argument("response_format type must be one of \"text\" or \"json_object\", but got: " + response_type);
|
||||
}
|
||||
}
|
||||
|
||||
// get input files
|
||||
if (!body.contains("messages")) {
|
||||
throw std::runtime_error("'messages' is required");
|
||||
throw std::invalid_argument("'messages' is required");
|
||||
}
|
||||
json & messages = body.at("messages");
|
||||
if (!messages.is_array()) {
|
||||
throw std::runtime_error("Expected 'messages' to be an array");
|
||||
throw std::invalid_argument("Expected 'messages' to be an array");
|
||||
}
|
||||
for (auto & msg : messages) {
|
||||
std::string role = json_value(msg, "role", std::string());
|
||||
if (role != "assistant" && !msg.contains("content")) {
|
||||
throw std::runtime_error("All non-assistant messages must contain 'content'");
|
||||
throw std::invalid_argument("All non-assistant messages must contain 'content'");
|
||||
}
|
||||
if (role == "assistant") {
|
||||
if (!msg.contains("content") && !msg.contains("tool_calls")) {
|
||||
throw std::runtime_error("Assistant message must contain either 'content' or 'tool_calls'!");
|
||||
throw std::invalid_argument("Assistant message must contain either 'content' or 'tool_calls'!");
|
||||
}
|
||||
if (!msg.contains("content")) {
|
||||
continue; // avoid errors with no content
|
||||
|
|
@ -850,7 +850,7 @@ json oaicompat_chat_params_parse(
|
|||
}
|
||||
|
||||
if (!content.is_array()) {
|
||||
throw std::runtime_error("Expected 'content' to be a string or an array");
|
||||
throw std::invalid_argument("Expected 'content' to be a string or an array");
|
||||
}
|
||||
|
||||
for (auto & p : content) {
|
||||
|
|
@ -884,11 +884,11 @@ json oaicompat_chat_params_parse(
|
|||
// try to decode base64 image
|
||||
std::vector<std::string> parts = string_split<std::string>(url, /*separator*/ ',');
|
||||
if (parts.size() != 2) {
|
||||
throw std::runtime_error("Invalid image_url.url value");
|
||||
throw std::invalid_argument("Invalid image_url.url value");
|
||||
} else if (!string_starts_with(parts[0], "data:image/")) {
|
||||
throw std::runtime_error("Invalid image_url.url format: " + parts[0]);
|
||||
throw std::invalid_argument("Invalid image_url.url format: " + parts[0]);
|
||||
} else if (!string_ends_with(parts[0], "base64")) {
|
||||
throw std::runtime_error("image_url.url must be base64 encoded");
|
||||
throw std::invalid_argument("image_url.url must be base64 encoded");
|
||||
} else {
|
||||
auto base64_data = parts[1];
|
||||
auto decoded_data = base64_decode(base64_data);
|
||||
|
|
@ -911,7 +911,7 @@ json oaicompat_chat_params_parse(
|
|||
std::string format = json_value(input_audio, "format", std::string());
|
||||
// while we also support flac, we don't allow it here so we matches the OAI spec
|
||||
if (format != "wav" && format != "mp3") {
|
||||
throw std::runtime_error("input_audio.format must be either 'wav' or 'mp3'");
|
||||
throw std::invalid_argument("input_audio.format must be either 'wav' or 'mp3'");
|
||||
}
|
||||
auto decoded_data = base64_decode(data); // expected to be base64 encoded
|
||||
out_files.push_back(decoded_data);
|
||||
|
|
@ -922,7 +922,7 @@ json oaicompat_chat_params_parse(
|
|||
p.erase("input_audio");
|
||||
|
||||
} else if (type != "text") {
|
||||
throw std::runtime_error("unsupported content[].type");
|
||||
throw std::invalid_argument("unsupported content[].type");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -940,7 +940,7 @@ json oaicompat_chat_params_parse(
|
|||
inputs.enable_thinking = opt.enable_thinking;
|
||||
if (!inputs.tools.empty() && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE) {
|
||||
if (body.contains("grammar")) {
|
||||
throw std::runtime_error("Cannot use custom grammar constraints with tools.");
|
||||
throw std::invalid_argument("Cannot use custom grammar constraints with tools.");
|
||||
}
|
||||
llama_params["parse_tool_calls"] = true;
|
||||
}
|
||||
|
|
@ -959,7 +959,7 @@ json oaicompat_chat_params_parse(
|
|||
} else if (enable_thinking_kwarg == "false") {
|
||||
inputs.enable_thinking = false;
|
||||
} else if (!enable_thinking_kwarg.empty() && enable_thinking_kwarg[0] == '"') {
|
||||
throw std::runtime_error("invalid type for \"enable_thinking\" (expected boolean, got string)");
|
||||
throw std::invalid_argument("invalid type for \"enable_thinking\" (expected boolean, got string)");
|
||||
}
|
||||
|
||||
// if the assistant message appears at the end of list, we do not add end-of-turn token
|
||||
|
|
@ -972,14 +972,14 @@ json oaicompat_chat_params_parse(
|
|||
|
||||
/* sanity check, max one assistant message at the end of the list */
|
||||
if (!inputs.messages.empty() && inputs.messages.back().role == "assistant"){
|
||||
throw std::runtime_error("Cannot have 2 or more assistant messages at the end of the list.");
|
||||
throw std::invalid_argument("Cannot have 2 or more assistant messages at the end of the list.");
|
||||
}
|
||||
|
||||
/* TODO: test this properly */
|
||||
inputs.reasoning_format = COMMON_REASONING_FORMAT_NONE;
|
||||
|
||||
if ( inputs.enable_thinking ) {
|
||||
throw std::runtime_error("Assistant response prefill is incompatible with enable_thinking.");
|
||||
throw std::invalid_argument("Assistant response prefill is incompatible with enable_thinking.");
|
||||
}
|
||||
|
||||
inputs.add_generation_prompt = true;
|
||||
|
|
@ -1020,18 +1020,18 @@ json oaicompat_chat_params_parse(
|
|||
// Handle "n" field
|
||||
int n_choices = json_value(body, "n", 1);
|
||||
if (n_choices != 1) {
|
||||
throw std::runtime_error("Only one completion choice is allowed");
|
||||
throw std::invalid_argument("Only one completion choice is allowed");
|
||||
}
|
||||
|
||||
// Handle "logprobs" field
|
||||
// TODO: The response format of this option is not yet OAI-compatible, but seems like no one really using it; We may need to fix it in the future
|
||||
if (json_value(body, "logprobs", false)) {
|
||||
if (has_tools && stream) {
|
||||
throw std::runtime_error("logprobs is not supported with tools + stream");
|
||||
throw std::invalid_argument("logprobs is not supported with tools + stream");
|
||||
}
|
||||
llama_params["n_probs"] = json_value(body, "top_logprobs", 20);
|
||||
} else if (body.contains("top_logprobs") && !body.at("top_logprobs").is_null()) {
|
||||
throw std::runtime_error("top_logprobs requires logprobs to be set to true");
|
||||
throw std::invalid_argument("top_logprobs requires logprobs to be set to true");
|
||||
}
|
||||
|
||||
// Copy remaining properties to llama_params
|
||||
|
|
@ -1263,7 +1263,11 @@ json convert_anthropic_to_oai(const json & body) {
|
|||
return oai_body;
|
||||
}
|
||||
|
||||
json format_embeddings_response_oaicompat(const json & request, const json & embeddings, bool use_base64) {
|
||||
json format_embeddings_response_oaicompat(
|
||||
const json & request,
|
||||
const std::string & model_name,
|
||||
const json & embeddings,
|
||||
bool use_base64) {
|
||||
json data = json::array();
|
||||
int32_t n_tokens = 0;
|
||||
int i = 0;
|
||||
|
|
@ -1293,7 +1297,7 @@ json format_embeddings_response_oaicompat(const json & request, const json & emb
|
|||
}
|
||||
|
||||
json res = json {
|
||||
{"model", json_value(request, "model", std::string(DEFAULT_OAICOMPAT_MODEL))},
|
||||
{"model", json_value(request, "model", model_name)},
|
||||
{"object", "list"},
|
||||
{"usage", json {
|
||||
{"prompt_tokens", n_tokens},
|
||||
|
|
@ -1307,6 +1311,7 @@ json format_embeddings_response_oaicompat(const json & request, const json & emb
|
|||
|
||||
json format_response_rerank(
|
||||
const json & request,
|
||||
const std::string & model_name,
|
||||
const json & ranks,
|
||||
bool is_tei_format,
|
||||
std::vector<std::string> & texts,
|
||||
|
|
@ -1338,7 +1343,7 @@ json format_response_rerank(
|
|||
if (is_tei_format) return results;
|
||||
|
||||
json res = json{
|
||||
{"model", json_value(request, "model", std::string(DEFAULT_OAICOMPAT_MODEL))},
|
||||
{"model", json_value(request, "model", model_name)},
|
||||
{"object", "list"},
|
||||
{"usage", json{
|
||||
{"prompt_tokens", n_tokens},
|
||||
|
|
|
|||
|
|
@ -13,8 +13,6 @@
|
|||
#include <vector>
|
||||
#include <cinttypes>
|
||||
|
||||
#define DEFAULT_OAICOMPAT_MODEL "gpt-3.5-turbo"
|
||||
|
||||
const static std::string build_info("b" + std::to_string(LLAMA_BUILD_NUMBER) + "-" + LLAMA_COMMIT);
|
||||
|
||||
using json = nlohmann::ordered_json;
|
||||
|
|
@ -298,11 +296,16 @@ json oaicompat_chat_params_parse(
|
|||
json convert_anthropic_to_oai(const json & body);
|
||||
|
||||
// TODO: move it to server-task.cpp
|
||||
json format_embeddings_response_oaicompat(const json & request, const json & embeddings, bool use_base64 = false);
|
||||
json format_embeddings_response_oaicompat(
|
||||
const json & request,
|
||||
const std::string & model_name,
|
||||
const json & embeddings,
|
||||
bool use_base64 = false);
|
||||
|
||||
// TODO: move it to server-task.cpp
|
||||
json format_response_rerank(
|
||||
const json & request,
|
||||
const std::string & model_name,
|
||||
const json & ranks,
|
||||
bool is_tei_format,
|
||||
std::vector<std::string> & texts,
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
#include <cinttypes>
|
||||
#include <memory>
|
||||
#include <unordered_set>
|
||||
#include <filesystem>
|
||||
|
||||
// fix problem with std::min and std::max
|
||||
#if defined(_WIN32)
|
||||
|
|
@ -518,6 +519,8 @@ struct server_context_impl {
|
|||
// Necessary similarity of prompt for slot selection
|
||||
float slot_prompt_similarity = 0.0f;
|
||||
|
||||
std::string model_name; // name of the loaded model, to be used by API
|
||||
|
||||
common_chat_templates_ptr chat_templates;
|
||||
oaicompat_parser_options oai_parser_opt;
|
||||
|
||||
|
|
@ -621,6 +624,7 @@ struct server_context_impl {
|
|||
mparams.print_timings = false;
|
||||
mparams.n_threads = params_base.cpuparams.n_threads;
|
||||
mparams.flash_attn_type = params_base.flash_attn_type;
|
||||
mparams.warmup = params_base.warmup;
|
||||
mparams.image_min_tokens = params_base.image_min_tokens;
|
||||
mparams.image_max_tokens = params_base.image_max_tokens;
|
||||
mctx = mtmd_init_from_file(mmproj_path.c_str(), model, mparams);
|
||||
|
|
@ -757,6 +761,18 @@ struct server_context_impl {
|
|||
}
|
||||
SRV_WRN("%s", "for more info see https://github.com/ggml-org/llama.cpp/pull/16391\n");
|
||||
|
||||
if (!params_base.model_alias.empty()) {
|
||||
// user explicitly specified model name
|
||||
model_name = params_base.model_alias;
|
||||
} else if (!params_base.model.name.empty()) {
|
||||
// use model name in registry format (for models in cache)
|
||||
model_name = params_base.model.name;
|
||||
} else {
|
||||
// fallback: derive model name from file name
|
||||
auto model_path = std::filesystem::path(params_base.model.path);
|
||||
model_name = model_path.filename().string();
|
||||
}
|
||||
|
||||
// thinking is enabled if:
|
||||
// 1. It's not explicitly disabled (reasoning_budget == 0)
|
||||
// 2. The chat template supports it
|
||||
|
|
@ -2610,7 +2626,7 @@ static std::unique_ptr<server_res_generator> handle_completions_impl(
|
|||
// OAI-compat
|
||||
task.params.res_type = res_type;
|
||||
task.params.oaicompat_cmpl_id = completion_id;
|
||||
// oaicompat_model is already populated by params_from_json_cmpl
|
||||
task.params.oaicompat_model = ctx_server.model_name;
|
||||
|
||||
tasks.push_back(std::move(task));
|
||||
}
|
||||
|
|
@ -2938,7 +2954,7 @@ void server_routes::init_routes() {
|
|||
json data = {
|
||||
{ "default_generation_settings", default_generation_settings_for_props },
|
||||
{ "total_slots", ctx_server.params_base.n_parallel },
|
||||
{ "model_alias", ctx_server.params_base.model_alias },
|
||||
{ "model_alias", ctx_server.model_name },
|
||||
{ "model_path", ctx_server.params_base.model.path },
|
||||
{ "modalities", json {
|
||||
{"vision", ctx_server.oai_parser_opt.allow_image},
|
||||
|
|
@ -3180,8 +3196,8 @@ void server_routes::init_routes() {
|
|||
json models = {
|
||||
{"models", {
|
||||
{
|
||||
{"name", params.model_alias.empty() ? params.model.path : params.model_alias},
|
||||
{"model", params.model_alias.empty() ? params.model.path : params.model_alias},
|
||||
{"name", ctx_server.model_name},
|
||||
{"model", ctx_server.model_name},
|
||||
{"modified_at", ""},
|
||||
{"size", ""},
|
||||
{"digest", ""}, // dummy value, llama.cpp does not support managing model file's hash
|
||||
|
|
@ -3203,7 +3219,7 @@ void server_routes::init_routes() {
|
|||
{"object", "list"},
|
||||
{"data", {
|
||||
{
|
||||
{"id", params.model_alias.empty() ? params.model.path : params.model_alias},
|
||||
{"id", ctx_server.model_name},
|
||||
{"object", "model"},
|
||||
{"created", std::time(0)},
|
||||
{"owned_by", "llamacpp"},
|
||||
|
|
@ -3350,6 +3366,7 @@ void server_routes::init_routes() {
|
|||
// write JSON response
|
||||
json root = format_response_rerank(
|
||||
body,
|
||||
ctx_server.model_name,
|
||||
responses,
|
||||
is_tei_format,
|
||||
documents,
|
||||
|
|
@ -3612,7 +3629,7 @@ std::unique_ptr<server_res_generator> server_routes::handle_embeddings_impl(cons
|
|||
|
||||
// write JSON response
|
||||
json root = res_type == TASK_RESPONSE_TYPE_OAI_EMBD
|
||||
? format_embeddings_response_oaicompat(body, responses, use_base64)
|
||||
? format_embeddings_response_oaicompat(body, ctx_server.model_name, responses, use_base64)
|
||||
: json(responses);
|
||||
res->ok(root);
|
||||
return res;
|
||||
|
|
|
|||
|
|
@ -0,0 +1,975 @@
|
|||
#include "server-common.h"
|
||||
#include "server-models.h"
|
||||
|
||||
#include "download.h"
|
||||
|
||||
#include <cpp-httplib/httplib.h> // TODO: remove this once we use HTTP client from download.h
|
||||
#include <sheredom/subprocess.h>
|
||||
|
||||
#include <functional>
|
||||
#include <thread>
|
||||
#include <mutex>
|
||||
#include <condition_variable>
|
||||
#include <cstring>
|
||||
#include <atomic>
|
||||
#include <chrono>
|
||||
#include <queue>
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <winsock2.h>
|
||||
#else
|
||||
#include <sys/socket.h>
|
||||
#include <netinet/in.h>
|
||||
#include <arpa/inet.h>
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#if defined(__APPLE__) && defined(__MACH__)
|
||||
// macOS: use _NSGetExecutablePath to get the executable path
|
||||
#include <mach-o/dyld.h>
|
||||
#include <limits.h>
|
||||
#endif
|
||||
|
||||
#define CMD_EXIT "exit"
|
||||
|
||||
static std::filesystem::path get_server_exec_path() {
|
||||
#if defined(_WIN32)
|
||||
wchar_t buf[32768] = { 0 }; // Large buffer to handle long paths
|
||||
DWORD len = GetModuleFileNameW(nullptr, buf, _countof(buf));
|
||||
if (len == 0 || len >= _countof(buf)) {
|
||||
throw std::runtime_error("GetModuleFileNameW failed or path too long");
|
||||
}
|
||||
return std::filesystem::path(buf);
|
||||
#elif defined(__APPLE__) && defined(__MACH__)
|
||||
char small_path[PATH_MAX];
|
||||
uint32_t size = sizeof(small_path);
|
||||
|
||||
if (_NSGetExecutablePath(small_path, &size) == 0) {
|
||||
// resolve any symlinks to get absolute path
|
||||
try {
|
||||
return std::filesystem::canonical(std::filesystem::path(small_path));
|
||||
} catch (...) {
|
||||
return std::filesystem::path(small_path);
|
||||
}
|
||||
} else {
|
||||
// buffer was too small, allocate required size and call again
|
||||
std::vector<char> buf(size);
|
||||
if (_NSGetExecutablePath(buf.data(), &size) == 0) {
|
||||
try {
|
||||
return std::filesystem::canonical(std::filesystem::path(buf.data()));
|
||||
} catch (...) {
|
||||
return std::filesystem::path(buf.data());
|
||||
}
|
||||
}
|
||||
throw std::runtime_error("_NSGetExecutablePath failed after buffer resize");
|
||||
}
|
||||
#else
|
||||
char path[FILENAME_MAX];
|
||||
ssize_t count = readlink("/proc/self/exe", path, FILENAME_MAX);
|
||||
if (count <= 0) {
|
||||
throw std::runtime_error("failed to resolve /proc/self/exe");
|
||||
}
|
||||
return std::filesystem::path(std::string(path, count));
|
||||
#endif
|
||||
}
|
||||
|
||||
struct local_model {
|
||||
std::string name;
|
||||
std::string path;
|
||||
std::string path_mmproj;
|
||||
};
|
||||
|
||||
static std::vector<local_model> list_local_models(const std::string & dir) {
|
||||
if (!std::filesystem::exists(dir) || !std::filesystem::is_directory(dir)) {
|
||||
throw std::runtime_error(string_format("error: '%s' does not exist or is not a directory\n", dir.c_str()));
|
||||
}
|
||||
|
||||
std::vector<local_model> models;
|
||||
auto scan_subdir = [&models](const std::string & subdir_path, const std::string & name) {
|
||||
auto files = fs_list(subdir_path, false);
|
||||
common_file_info model_file;
|
||||
common_file_info first_shard_file;
|
||||
common_file_info mmproj_file;
|
||||
for (const auto & file : files) {
|
||||
if (string_ends_with(file.name, ".gguf")) {
|
||||
if (file.name.find("mmproj") != std::string::npos) {
|
||||
mmproj_file = file;
|
||||
} else if (file.name.find("-00001-of-") != std::string::npos) {
|
||||
first_shard_file = file;
|
||||
} else {
|
||||
model_file = file;
|
||||
}
|
||||
}
|
||||
}
|
||||
// single file model
|
||||
local_model model{
|
||||
/* name */ name,
|
||||
/* path */ first_shard_file.path.empty() ? model_file.path : first_shard_file.path,
|
||||
/* path_mmproj */ mmproj_file.path // can be empty
|
||||
};
|
||||
if (!model.path.empty()) {
|
||||
models.push_back(model);
|
||||
}
|
||||
};
|
||||
|
||||
auto files = fs_list(dir, true);
|
||||
for (const auto & file : files) {
|
||||
if (file.is_dir) {
|
||||
scan_subdir(file.path, file.name);
|
||||
} else if (string_ends_with(file.name, ".gguf")) {
|
||||
// single file model
|
||||
std::string name = file.name;
|
||||
string_replace_all(name, ".gguf", "");
|
||||
local_model model{
|
||||
/* name */ name,
|
||||
/* path */ file.path,
|
||||
/* path_mmproj */ ""
|
||||
};
|
||||
models.push_back(model);
|
||||
}
|
||||
}
|
||||
return models;
|
||||
}
|
||||
|
||||
//
|
||||
// server_models
|
||||
//
|
||||
|
||||
server_models::server_models(
|
||||
const common_params & params,
|
||||
int argc,
|
||||
char ** argv,
|
||||
char ** envp) : base_params(params) {
|
||||
for (int i = 0; i < argc; i++) {
|
||||
base_args.push_back(std::string(argv[i]));
|
||||
}
|
||||
for (char ** env = envp; *env != nullptr; env++) {
|
||||
base_env.push_back(std::string(*env));
|
||||
}
|
||||
GGML_ASSERT(!base_args.empty());
|
||||
// set binary path
|
||||
try {
|
||||
base_args[0] = get_server_exec_path().string();
|
||||
} catch (const std::exception & e) {
|
||||
LOG_WRN("failed to get server executable path: %s\n", e.what());
|
||||
LOG_WRN("using original argv[0] as fallback: %s\n", base_args[0].c_str());
|
||||
}
|
||||
// TODO: allow refreshing cached model list
|
||||
// add cached models
|
||||
auto cached_models = common_list_cached_models();
|
||||
for (const auto & model : cached_models) {
|
||||
server_model_meta meta{
|
||||
/* name */ model.to_string(),
|
||||
/* path */ model.manifest_path,
|
||||
/* path_mmproj */ "", // auto-detected when loading
|
||||
/* in_cache */ true,
|
||||
/* port */ 0,
|
||||
/* status */ SERVER_MODEL_STATUS_UNLOADED,
|
||||
/* last_used */ 0,
|
||||
/* args */ std::vector<std::string>(),
|
||||
/* exit_code */ 0
|
||||
};
|
||||
mapping[meta.name] = instance_t{
|
||||
/* subproc */ std::make_shared<subprocess_s>(),
|
||||
/* th */ std::thread(),
|
||||
/* meta */ meta
|
||||
};
|
||||
}
|
||||
// add local models specificed via --models-dir
|
||||
if (!params.models_dir.empty()) {
|
||||
auto local_models = list_local_models(params.models_dir);
|
||||
for (const auto & model : local_models) {
|
||||
if (mapping.find(model.name) != mapping.end()) {
|
||||
// already exists in cached models, skip
|
||||
continue;
|
||||
}
|
||||
server_model_meta meta{
|
||||
/* name */ model.name,
|
||||
/* path */ model.path,
|
||||
/* path_mmproj */ model.path_mmproj,
|
||||
/* in_cache */ false,
|
||||
/* port */ 0,
|
||||
/* status */ SERVER_MODEL_STATUS_UNLOADED,
|
||||
/* last_used */ 0,
|
||||
/* args */ std::vector<std::string>(),
|
||||
/* exit_code */ 0
|
||||
};
|
||||
mapping[meta.name] = instance_t{
|
||||
/* subproc */ std::make_shared<subprocess_s>(),
|
||||
/* th */ std::thread(),
|
||||
/* meta */ meta
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void server_models::update_meta(const std::string & name, const server_model_meta & meta) {
|
||||
std::lock_guard<std::mutex> lk(mutex);
|
||||
auto it = mapping.find(name);
|
||||
if (it != mapping.end()) {
|
||||
it->second.meta = meta;
|
||||
}
|
||||
cv.notify_all(); // notify wait_until_loaded
|
||||
}
|
||||
|
||||
bool server_models::has_model(const std::string & name) {
|
||||
std::lock_guard<std::mutex> lk(mutex);
|
||||
return mapping.find(name) != mapping.end();
|
||||
}
|
||||
|
||||
std::optional<server_model_meta> server_models::get_meta(const std::string & name) {
|
||||
std::lock_guard<std::mutex> lk(mutex);
|
||||
auto it = mapping.find(name);
|
||||
if (it != mapping.end()) {
|
||||
return it->second.meta;
|
||||
}
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
static int get_free_port() {
|
||||
#ifdef _WIN32
|
||||
WSADATA wsaData;
|
||||
if (WSAStartup(MAKEWORD(2, 2), &wsaData) != 0) {
|
||||
return -1;
|
||||
}
|
||||
typedef SOCKET native_socket_t;
|
||||
#define INVALID_SOCKET_VAL INVALID_SOCKET
|
||||
#define CLOSE_SOCKET(s) closesocket(s)
|
||||
#else
|
||||
typedef int native_socket_t;
|
||||
#define INVALID_SOCKET_VAL -1
|
||||
#define CLOSE_SOCKET(s) close(s)
|
||||
#endif
|
||||
|
||||
native_socket_t sock = socket(AF_INET, SOCK_STREAM, 0);
|
||||
if (sock == INVALID_SOCKET_VAL) {
|
||||
#ifdef _WIN32
|
||||
WSACleanup();
|
||||
#endif
|
||||
return -1;
|
||||
}
|
||||
|
||||
struct sockaddr_in serv_addr;
|
||||
std::memset(&serv_addr, 0, sizeof(serv_addr));
|
||||
serv_addr.sin_family = AF_INET;
|
||||
serv_addr.sin_addr.s_addr = htonl(INADDR_ANY);
|
||||
serv_addr.sin_port = htons(0);
|
||||
|
||||
if (bind(sock, (struct sockaddr*)&serv_addr, sizeof(serv_addr)) != 0) {
|
||||
CLOSE_SOCKET(sock);
|
||||
#ifdef _WIN32
|
||||
WSACleanup();
|
||||
#endif
|
||||
return -1;
|
||||
}
|
||||
|
||||
#ifdef _WIN32
|
||||
int namelen = sizeof(serv_addr);
|
||||
#else
|
||||
socklen_t namelen = sizeof(serv_addr);
|
||||
#endif
|
||||
if (getsockname(sock, (struct sockaddr*)&serv_addr, &namelen) != 0) {
|
||||
CLOSE_SOCKET(sock);
|
||||
#ifdef _WIN32
|
||||
WSACleanup();
|
||||
#endif
|
||||
return -1;
|
||||
}
|
||||
|
||||
int port = ntohs(serv_addr.sin_port);
|
||||
|
||||
CLOSE_SOCKET(sock);
|
||||
#ifdef _WIN32
|
||||
WSACleanup();
|
||||
#endif
|
||||
|
||||
return port;
|
||||
}
|
||||
|
||||
// helper to convert vector<string> to char **
|
||||
// pointers are only valid as long as the original vector is valid
|
||||
static std::vector<char *> to_char_ptr_array(const std::vector<std::string> & vec) {
|
||||
std::vector<char *> result;
|
||||
result.reserve(vec.size() + 1);
|
||||
for (const auto & s : vec) {
|
||||
result.push_back(const_cast<char*>(s.c_str()));
|
||||
}
|
||||
result.push_back(nullptr);
|
||||
return result;
|
||||
}
|
||||
|
||||
std::vector<server_model_meta> server_models::get_all_meta() {
|
||||
std::lock_guard<std::mutex> lk(mutex);
|
||||
std::vector<server_model_meta> result;
|
||||
result.reserve(mapping.size());
|
||||
for (const auto & [name, inst] : mapping) {
|
||||
result.push_back(inst.meta);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
void server_models::unload_lru() {
|
||||
if (base_params.models_max <= 0) {
|
||||
return; // no limit
|
||||
}
|
||||
// remove one of the servers if we passed the models_max (least recently used - LRU)
|
||||
std::string lru_model_name = "";
|
||||
int64_t lru_last_used = ggml_time_ms();
|
||||
size_t count_active = 0;
|
||||
{
|
||||
std::lock_guard<std::mutex> lk(mutex);
|
||||
for (const auto & m : mapping) {
|
||||
if (m.second.meta.is_active()) {
|
||||
count_active++;
|
||||
if (m.second.meta.last_used < lru_last_used) {
|
||||
lru_model_name = m.first;
|
||||
lru_last_used = m.second.meta.last_used;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!lru_model_name.empty() && count_active >= (size_t)base_params.models_max) {
|
||||
SRV_INF("models_max limit reached, removing LRU name=%s\n", lru_model_name.c_str());
|
||||
unload(lru_model_name);
|
||||
}
|
||||
}
|
||||
|
||||
static void add_or_replace_arg(std::vector<std::string> & args, const std::string & key, const std::string & value) {
|
||||
for (size_t i = 0; i < args.size(); i++) {
|
||||
if (args[i] == key && i + 1 < args.size()) {
|
||||
args[i + 1] = value;
|
||||
return;
|
||||
}
|
||||
}
|
||||
// not found, append
|
||||
args.push_back(key);
|
||||
args.push_back(value);
|
||||
}
|
||||
|
||||
void server_models::load(const std::string & name, bool auto_load) {
|
||||
if (!has_model(name)) {
|
||||
throw std::runtime_error("model name=" + name + " is not found");
|
||||
}
|
||||
unload_lru();
|
||||
|
||||
std::lock_guard<std::mutex> lk(mutex);
|
||||
|
||||
auto meta = mapping[name].meta;
|
||||
if (meta.status != SERVER_MODEL_STATUS_UNLOADED) {
|
||||
SRV_INF("model %s is not ready\n", name.c_str());
|
||||
return;
|
||||
}
|
||||
|
||||
// prepare new instance info
|
||||
instance_t inst;
|
||||
inst.meta = meta;
|
||||
inst.meta.port = get_free_port();
|
||||
inst.meta.status = SERVER_MODEL_STATUS_LOADING;
|
||||
inst.meta.last_used = ggml_time_ms();
|
||||
|
||||
if (inst.meta.port <= 0) {
|
||||
throw std::runtime_error("failed to get a port number");
|
||||
}
|
||||
|
||||
inst.subproc = std::make_shared<subprocess_s>();
|
||||
{
|
||||
SRV_INF("spawning server instance with name=%s on port %d\n", inst.meta.name.c_str(), inst.meta.port);
|
||||
|
||||
std::vector<std::string> child_args;
|
||||
if (auto_load && !meta.args.empty()) {
|
||||
child_args = meta.args; // copy previous args
|
||||
} else {
|
||||
child_args = base_args; // copy
|
||||
if (inst.meta.in_cache) {
|
||||
add_or_replace_arg(child_args, "-hf", inst.meta.name);
|
||||
} else {
|
||||
add_or_replace_arg(child_args, "-m", inst.meta.path);
|
||||
if (!inst.meta.path_mmproj.empty()) {
|
||||
add_or_replace_arg(child_args, "--mmproj", inst.meta.path_mmproj);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// set model args
|
||||
add_or_replace_arg(child_args, "--port", std::to_string(inst.meta.port));
|
||||
add_or_replace_arg(child_args, "--alias", inst.meta.name);
|
||||
|
||||
std::vector<std::string> child_env = base_env; // copy
|
||||
child_env.push_back("LLAMA_SERVER_ROUTER_PORT=" + std::to_string(base_params.port));
|
||||
|
||||
SRV_INF("%s", "spawning server instance with args:\n");
|
||||
for (const auto & arg : child_args) {
|
||||
SRV_INF(" %s\n", arg.c_str());
|
||||
}
|
||||
inst.meta.args = child_args; // save for debugging
|
||||
|
||||
std::vector<char *> argv = to_char_ptr_array(child_args);
|
||||
std::vector<char *> envp = to_char_ptr_array(child_env);
|
||||
|
||||
int options = subprocess_option_no_window | subprocess_option_combined_stdout_stderr;
|
||||
int result = subprocess_create_ex(argv.data(), options, envp.data(), inst.subproc.get());
|
||||
if (result != 0) {
|
||||
throw std::runtime_error("failed to spawn server instance");
|
||||
}
|
||||
|
||||
inst.stdin_file = subprocess_stdin(inst.subproc.get());
|
||||
}
|
||||
|
||||
// start a thread to manage the child process
|
||||
// captured variables are guaranteed to be destroyed only after the thread is joined
|
||||
inst.th = std::thread([this, name, child_proc = inst.subproc, port = inst.meta.port]() {
|
||||
// read stdout/stderr and forward to main server log
|
||||
FILE * p_stdout_stderr = subprocess_stdout(child_proc.get());
|
||||
if (p_stdout_stderr) {
|
||||
char buffer[4096];
|
||||
while (fgets(buffer, sizeof(buffer), p_stdout_stderr) != nullptr) {
|
||||
LOG("[%5d] %s", port, buffer);
|
||||
}
|
||||
} else {
|
||||
SRV_ERR("failed to get stdout/stderr of child process for name=%s\n", name.c_str());
|
||||
}
|
||||
// we reach here when the child process exits
|
||||
int exit_code = 0;
|
||||
subprocess_join(child_proc.get(), &exit_code);
|
||||
subprocess_destroy(child_proc.get());
|
||||
// update PID and status
|
||||
{
|
||||
std::lock_guard<std::mutex> lk(mutex);
|
||||
auto it = mapping.find(name);
|
||||
if (it != mapping.end()) {
|
||||
auto & meta = it->second.meta;
|
||||
meta.exit_code = exit_code;
|
||||
meta.status = SERVER_MODEL_STATUS_UNLOADED;
|
||||
}
|
||||
cv.notify_all();
|
||||
}
|
||||
SRV_INF("instance name=%s exited with status %d\n", name.c_str(), exit_code);
|
||||
});
|
||||
|
||||
// clean up old process/thread if exists
|
||||
{
|
||||
auto & old_instance = mapping[name];
|
||||
// old process should have exited already, but just in case, we clean it up here
|
||||
if (subprocess_alive(old_instance.subproc.get())) {
|
||||
SRV_WRN("old process for model name=%s is still alive, this is unexpected\n", name.c_str());
|
||||
subprocess_terminate(old_instance.subproc.get()); // force kill
|
||||
}
|
||||
if (old_instance.th.joinable()) {
|
||||
old_instance.th.join();
|
||||
}
|
||||
}
|
||||
|
||||
mapping[name] = std::move(inst);
|
||||
cv.notify_all();
|
||||
}
|
||||
|
||||
static void interrupt_subprocess(FILE * stdin_file) {
|
||||
// because subprocess.h does not provide a way to send SIGINT,
|
||||
// we will send a command to the child process to exit gracefully
|
||||
if (stdin_file) {
|
||||
fprintf(stdin_file, "%s\n", CMD_EXIT);
|
||||
fflush(stdin_file);
|
||||
}
|
||||
}
|
||||
|
||||
void server_models::unload(const std::string & name) {
|
||||
std::lock_guard<std::mutex> lk(mutex);
|
||||
auto it = mapping.find(name);
|
||||
if (it != mapping.end()) {
|
||||
if (it->second.meta.is_active()) {
|
||||
SRV_INF("unloading model instance name=%s\n", name.c_str());
|
||||
interrupt_subprocess(it->second.stdin_file);
|
||||
// status change will be handled by the managing thread
|
||||
} else {
|
||||
SRV_WRN("model instance name=%s is not loaded\n", name.c_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void server_models::unload_all() {
|
||||
std::vector<std::thread> to_join;
|
||||
{
|
||||
std::lock_guard<std::mutex> lk(mutex);
|
||||
for (auto & [name, inst] : mapping) {
|
||||
if (inst.meta.is_active()) {
|
||||
SRV_INF("unloading model instance name=%s\n", name.c_str());
|
||||
interrupt_subprocess(inst.stdin_file);
|
||||
// status change will be handled by the managing thread
|
||||
}
|
||||
// moving the thread to join list to avoid deadlock
|
||||
to_join.push_back(std::move(inst.th));
|
||||
}
|
||||
}
|
||||
for (auto & th : to_join) {
|
||||
if (th.joinable()) {
|
||||
th.join();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void server_models::update_status(const std::string & name, server_model_status status) {
|
||||
// for now, we only allow updating to LOADED status
|
||||
if (status != SERVER_MODEL_STATUS_LOADED) {
|
||||
throw std::runtime_error("invalid status value");
|
||||
}
|
||||
auto meta = get_meta(name);
|
||||
if (meta.has_value()) {
|
||||
meta->status = status;
|
||||
update_meta(name, meta.value());
|
||||
}
|
||||
}
|
||||
|
||||
void server_models::wait_until_loaded(const std::string & name) {
|
||||
std::unique_lock<std::mutex> lk(mutex);
|
||||
cv.wait(lk, [this, &name]() {
|
||||
auto it = mapping.find(name);
|
||||
if (it != mapping.end()) {
|
||||
return it->second.meta.status != SERVER_MODEL_STATUS_LOADING;
|
||||
}
|
||||
return false;
|
||||
});
|
||||
}
|
||||
|
||||
bool server_models::ensure_model_loaded(const std::string & name) {
|
||||
auto meta = get_meta(name);
|
||||
if (!meta.has_value()) {
|
||||
throw std::runtime_error("model name=" + name + " is not found");
|
||||
}
|
||||
if (meta->status == SERVER_MODEL_STATUS_LOADED) {
|
||||
return false; // already loaded
|
||||
}
|
||||
if (meta->status == SERVER_MODEL_STATUS_UNLOADED) {
|
||||
SRV_INF("model name=%s is not loaded, loading...\n", name.c_str());
|
||||
load(name, true);
|
||||
}
|
||||
|
||||
SRV_INF("waiting until model name=%s is fully loaded...\n", name.c_str());
|
||||
wait_until_loaded(name);
|
||||
|
||||
// check final status
|
||||
meta = get_meta(name);
|
||||
if (!meta.has_value() || meta->is_failed()) {
|
||||
throw std::runtime_error("model name=" + name + " failed to load");
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
server_http_res_ptr server_models::proxy_request(const server_http_req & req, const std::string & method, const std::string & name, bool update_last_used) {
|
||||
auto meta = get_meta(name);
|
||||
if (!meta.has_value()) {
|
||||
throw std::runtime_error("model name=" + name + " is not found");
|
||||
}
|
||||
if (meta->status != SERVER_MODEL_STATUS_LOADED) {
|
||||
throw std::invalid_argument("model name=" + name + " is not loaded");
|
||||
}
|
||||
if (update_last_used) {
|
||||
std::unique_lock<std::mutex> lk(mutex);
|
||||
mapping[name].meta.last_used = ggml_time_ms();
|
||||
}
|
||||
SRV_INF("proxying request to model %s on port %d\n", name.c_str(), meta->port);
|
||||
auto proxy = std::make_unique<server_http_proxy>(
|
||||
method,
|
||||
base_params.hostname,
|
||||
meta->port,
|
||||
req.path,
|
||||
req.headers,
|
||||
req.body,
|
||||
req.should_stop);
|
||||
return proxy;
|
||||
}
|
||||
|
||||
std::thread server_models::setup_child_server(const common_params & base_params, int router_port, const std::string & name, std::function<void(int)> & shutdown_handler) {
|
||||
// send a notification to the router server that a model instance is ready
|
||||
// TODO @ngxson : use HTTP client from libcommon
|
||||
httplib::Client cli(base_params.hostname, router_port);
|
||||
cli.set_connection_timeout(0, 200000); // 200 milliseconds
|
||||
|
||||
httplib::Request req;
|
||||
req.method = "POST";
|
||||
req.path = "/models/status";
|
||||
req.set_header("Content-Type", "application/json");
|
||||
if (!base_params.api_keys.empty()) {
|
||||
req.set_header("Authorization", "Bearer " + base_params.api_keys[0]);
|
||||
}
|
||||
|
||||
json body;
|
||||
body["model"] = name;
|
||||
body["value"] = server_model_status_to_string(SERVER_MODEL_STATUS_LOADED);
|
||||
req.body = body.dump();
|
||||
|
||||
SRV_INF("notifying router server (port=%d) that model %s is ready\n", router_port, name.c_str());
|
||||
auto result = cli.send(std::move(req));
|
||||
if (result.error() != httplib::Error::Success) {
|
||||
auto err_str = httplib::to_string(result.error());
|
||||
SRV_ERR("failed to notify router server: %s\n", err_str.c_str());
|
||||
exit(1); // force exit
|
||||
}
|
||||
|
||||
// setup thread for monitoring stdin
|
||||
return std::thread([shutdown_handler]() {
|
||||
// wait for EOF on stdin
|
||||
SRV_INF("%s", "child server monitoring thread started, waiting for EOF on stdin...\n");
|
||||
bool eof = false;
|
||||
while (true) {
|
||||
std::string line;
|
||||
if (!std::getline(std::cin, line)) {
|
||||
// EOF detected, that means the router server is unexpectedly exit or killed
|
||||
eof = true;
|
||||
break;
|
||||
}
|
||||
if (line.find(CMD_EXIT) != std::string::npos) {
|
||||
SRV_INF("%s", "exit command received, exiting...\n");
|
||||
shutdown_handler(0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (eof) {
|
||||
SRV_INF("%s", "EOF on stdin detected, forcing shutdown...\n");
|
||||
exit(1);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
|
||||
//
|
||||
// server_models_routes
|
||||
//
|
||||
|
||||
static void res_ok(std::unique_ptr<server_http_res> & res, const json & response_data) {
|
||||
res->status = 200;
|
||||
res->data = safe_json_to_str(response_data);
|
||||
}
|
||||
|
||||
static void res_err(std::unique_ptr<server_http_res> & res, const json & error_data) {
|
||||
res->status = json_value(error_data, "code", 500);
|
||||
res->data = safe_json_to_str({{ "error", error_data }});
|
||||
}
|
||||
|
||||
static bool router_validate_model(const std::string & name, server_models & models, bool models_autoload, std::unique_ptr<server_http_res> & res) {
|
||||
if (name.empty()) {
|
||||
res_err(res, format_error_response("model name is missing from the request", ERROR_TYPE_INVALID_REQUEST));
|
||||
return false;
|
||||
}
|
||||
auto meta = models.get_meta(name);
|
||||
if (!meta.has_value()) {
|
||||
res_err(res, format_error_response("model not found", ERROR_TYPE_INVALID_REQUEST));
|
||||
return false;
|
||||
}
|
||||
if (models_autoload) {
|
||||
models.ensure_model_loaded(name);
|
||||
} else {
|
||||
if (meta->status != SERVER_MODEL_STATUS_LOADED) {
|
||||
res_err(res, format_error_response("model is not loaded", ERROR_TYPE_INVALID_REQUEST));
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool is_autoload(const common_params & params, const server_http_req & req) {
|
||||
std::string autoload = req.get_param("autoload");
|
||||
if (autoload.empty()) {
|
||||
return params.models_autoload;
|
||||
} else {
|
||||
return autoload == "true" || autoload == "1";
|
||||
}
|
||||
}
|
||||
|
||||
void server_models_routes::init_routes() {
|
||||
this->get_router_props = [this](const server_http_req & req) {
|
||||
std::string name = req.get_param("model");
|
||||
if (name.empty()) {
|
||||
// main instance
|
||||
auto res = std::make_unique<server_http_res>();
|
||||
res_ok(res, {
|
||||
// TODO: add support for this on web UI
|
||||
{"role", "router"},
|
||||
{"max_instances", 4}, // dummy value for testing
|
||||
// this is a dummy response to make sure webui doesn't break
|
||||
{"model_alias", "llama-server"},
|
||||
{"model_path", "none"},
|
||||
{"default_generation_settings", {
|
||||
{"params", json{}},
|
||||
{"n_ctx", 0},
|
||||
}},
|
||||
});
|
||||
return res;
|
||||
}
|
||||
return proxy_get(req);
|
||||
};
|
||||
|
||||
this->proxy_get = [this](const server_http_req & req) {
|
||||
std::string method = "GET";
|
||||
std::string name = req.get_param("model");
|
||||
bool autoload = is_autoload(params, req);
|
||||
auto error_res = std::make_unique<server_http_res>();
|
||||
if (!router_validate_model(name, models, autoload, error_res)) {
|
||||
return error_res;
|
||||
}
|
||||
return models.proxy_request(req, method, name, false);
|
||||
};
|
||||
|
||||
this->proxy_post = [this](const server_http_req & req) {
|
||||
std::string method = "POST";
|
||||
json body = json::parse(req.body);
|
||||
std::string name = json_value(body, "model", std::string());
|
||||
bool autoload = is_autoload(params, req);
|
||||
auto error_res = std::make_unique<server_http_res>();
|
||||
if (!router_validate_model(name, models, autoload, error_res)) {
|
||||
return error_res;
|
||||
}
|
||||
return models.proxy_request(req, method, name, true); // update last usage for POST request only
|
||||
};
|
||||
|
||||
this->get_router_models = [this](const server_http_req &) {
|
||||
auto res = std::make_unique<server_http_res>();
|
||||
json models_json = json::array();
|
||||
auto all_models = models.get_all_meta();
|
||||
std::time_t t = std::time(0);
|
||||
for (const auto & meta : all_models) {
|
||||
json status {
|
||||
{"value", server_model_status_to_string(meta.status)},
|
||||
{"args", meta.args},
|
||||
};
|
||||
if (meta.is_failed()) {
|
||||
status["exit_code"] = meta.exit_code;
|
||||
status["failed"] = true;
|
||||
}
|
||||
models_json.push_back(json {
|
||||
{"id", meta.name},
|
||||
{"object", "model"}, // for OAI-compat
|
||||
{"owned_by", "llamacpp"}, // for OAI-compat
|
||||
{"created", t}, // for OAI-compat
|
||||
{"in_cache", meta.in_cache},
|
||||
{"path", meta.path},
|
||||
{"status", status},
|
||||
// TODO: add other fields, may require reading GGUF metadata
|
||||
});
|
||||
}
|
||||
res_ok(res, {
|
||||
{"data", models_json},
|
||||
{"object", "list"},
|
||||
});
|
||||
return res;
|
||||
};
|
||||
|
||||
this->post_router_models_load = [this](const server_http_req & req) {
|
||||
auto res = std::make_unique<server_http_res>();
|
||||
json body = json::parse(req.body);
|
||||
std::string name = json_value(body, "model", std::string());
|
||||
auto model = models.get_meta(name);
|
||||
if (!model.has_value()) {
|
||||
res_err(res, format_error_response("model is not found", ERROR_TYPE_NOT_FOUND));
|
||||
return res;
|
||||
}
|
||||
if (model->status == SERVER_MODEL_STATUS_LOADED) {
|
||||
res_err(res, format_error_response("model is already loaded", ERROR_TYPE_INVALID_REQUEST));
|
||||
return res;
|
||||
}
|
||||
models.load(name, false);
|
||||
res_ok(res, {{"success", true}});
|
||||
return res;
|
||||
};
|
||||
|
||||
// used by child process to notify the router about status change
|
||||
// TODO @ngxson : maybe implement authentication for this endpoint in the future
|
||||
this->post_router_models_status = [this](const server_http_req & req) {
|
||||
auto res = std::make_unique<server_http_res>();
|
||||
json body = json::parse(req.body);
|
||||
std::string model = json_value(body, "model", std::string());
|
||||
std::string value = json_value(body, "value", std::string());
|
||||
models.update_status(model, server_model_status_from_string(value));
|
||||
res_ok(res, {{"success", true}});
|
||||
return res;
|
||||
};
|
||||
|
||||
this->get_router_models = [this](const server_http_req &) {
|
||||
auto res = std::make_unique<server_http_res>();
|
||||
json models_json = json::array();
|
||||
auto all_models = models.get_all_meta();
|
||||
std::time_t t = std::time(0);
|
||||
for (const auto & meta : all_models) {
|
||||
json status {
|
||||
{"value", server_model_status_to_string(meta.status)},
|
||||
{"args", meta.args},
|
||||
};
|
||||
if (meta.is_failed()) {
|
||||
status["exit_code"] = meta.exit_code;
|
||||
status["failed"] = true;
|
||||
}
|
||||
models_json.push_back(json {
|
||||
{"id", meta.name},
|
||||
{"object", "model"}, // for OAI-compat
|
||||
{"owned_by", "llamacpp"}, // for OAI-compat
|
||||
{"created", t}, // for OAI-compat
|
||||
{"in_cache", meta.in_cache},
|
||||
{"path", meta.path},
|
||||
{"status", status},
|
||||
// TODO: add other fields, may require reading GGUF metadata
|
||||
});
|
||||
}
|
||||
res_ok(res, {
|
||||
{"data", models_json},
|
||||
{"object", "list"},
|
||||
});
|
||||
return res;
|
||||
};
|
||||
|
||||
this->post_router_models_unload = [this](const server_http_req & req) {
|
||||
auto res = std::make_unique<server_http_res>();
|
||||
json body = json::parse(req.body);
|
||||
std::string name = json_value(body, "model", std::string());
|
||||
auto model = models.get_meta(name);
|
||||
if (!model.has_value()) {
|
||||
res_err(res, format_error_response("model is not found", ERROR_TYPE_INVALID_REQUEST));
|
||||
return res;
|
||||
}
|
||||
if (model->status != SERVER_MODEL_STATUS_LOADED) {
|
||||
res_err(res, format_error_response("model is not loaded", ERROR_TYPE_INVALID_REQUEST));
|
||||
return res;
|
||||
}
|
||||
models.unload(name);
|
||||
res_ok(res, {{"success", true}});
|
||||
return res;
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
|
||||
//
|
||||
// server_http_proxy
|
||||
//
|
||||
|
||||
// simple implementation of a pipe
|
||||
// used for streaming data between threads
|
||||
template<typename T>
|
||||
struct pipe_t {
|
||||
std::mutex mutex;
|
||||
std::condition_variable cv;
|
||||
std::queue<T> queue;
|
||||
std::atomic<bool> writer_closed{false};
|
||||
std::atomic<bool> reader_closed{false};
|
||||
void close_write() {
|
||||
writer_closed.store(true, std::memory_order_relaxed);
|
||||
cv.notify_all();
|
||||
}
|
||||
void close_read() {
|
||||
reader_closed.store(true, std::memory_order_relaxed);
|
||||
cv.notify_all();
|
||||
}
|
||||
bool read(T & output, const std::function<bool()> & should_stop) {
|
||||
std::unique_lock<std::mutex> lk(mutex);
|
||||
constexpr auto poll_interval = std::chrono::milliseconds(500);
|
||||
while (true) {
|
||||
if (!queue.empty()) {
|
||||
output = std::move(queue.front());
|
||||
queue.pop();
|
||||
return true;
|
||||
}
|
||||
if (writer_closed.load()) {
|
||||
return false; // clean EOF
|
||||
}
|
||||
if (should_stop()) {
|
||||
close_read(); // signal broken pipe to writer
|
||||
return false; // cancelled / reader no longer alive
|
||||
}
|
||||
cv.wait_for(lk, poll_interval);
|
||||
}
|
||||
}
|
||||
bool write(T && data) {
|
||||
std::lock_guard<std::mutex> lk(mutex);
|
||||
if (reader_closed.load()) {
|
||||
return false; // broken pipe
|
||||
}
|
||||
queue.push(std::move(data));
|
||||
cv.notify_one();
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
server_http_proxy::server_http_proxy(
|
||||
const std::string & method,
|
||||
const std::string & host,
|
||||
int port,
|
||||
const std::string & path,
|
||||
const std::map<std::string, std::string> & headers,
|
||||
const std::string & body,
|
||||
const std::function<bool()> should_stop) {
|
||||
// shared between reader and writer threads
|
||||
auto cli = std::make_shared<httplib::Client>(host, port);
|
||||
auto pipe = std::make_shared<pipe_t<msg_t>>();
|
||||
|
||||
// setup Client
|
||||
cli->set_connection_timeout(0, 200000); // 200 milliseconds
|
||||
this->status = 500; // to be overwritten upon response
|
||||
this->cleanup = [pipe]() {
|
||||
pipe->close_read();
|
||||
pipe->close_write();
|
||||
};
|
||||
|
||||
// wire up the receive end of the pipe
|
||||
this->next = [pipe, should_stop](std::string & out) -> bool {
|
||||
msg_t msg;
|
||||
bool has_next = pipe->read(msg, should_stop);
|
||||
if (!msg.data.empty()) {
|
||||
out = std::move(msg.data);
|
||||
}
|
||||
return has_next; // false if EOF or pipe broken
|
||||
};
|
||||
|
||||
// wire up the HTTP client
|
||||
// note: do NOT capture `this` pointer, as it may be destroyed before the thread ends
|
||||
httplib::ResponseHandler response_handler = [pipe, cli](const httplib::Response & response) {
|
||||
msg_t msg;
|
||||
msg.status = response.status;
|
||||
for (const auto & [key, value] : response.headers) {
|
||||
msg.headers[key] = value;
|
||||
}
|
||||
return pipe->write(std::move(msg)); // send headers first
|
||||
};
|
||||
httplib::ContentReceiverWithProgress content_receiver = [pipe](const char * data, size_t data_length, size_t, size_t) {
|
||||
// send data chunks
|
||||
// returns false if pipe is closed / broken (signal to stop receiving)
|
||||
return pipe->write({{}, 0, std::string(data, data_length)});
|
||||
};
|
||||
|
||||
// prepare the request to destination server
|
||||
httplib::Request req;
|
||||
{
|
||||
req.method = method;
|
||||
req.path = path;
|
||||
for (const auto & [key, value] : headers) {
|
||||
req.set_header(key, value);
|
||||
}
|
||||
req.body = body;
|
||||
req.response_handler = response_handler;
|
||||
req.content_receiver = content_receiver;
|
||||
}
|
||||
|
||||
// start the proxy thread
|
||||
SRV_DBG("start proxy thread %s %s\n", req.method.c_str(), req.path.c_str());
|
||||
this->thread = std::thread([cli, pipe, req]() {
|
||||
auto result = cli->send(std::move(req));
|
||||
if (result.error() != httplib::Error::Success) {
|
||||
auto err_str = httplib::to_string(result.error());
|
||||
SRV_ERR("http client error: %s\n", err_str.c_str());
|
||||
pipe->write({{}, 500, ""}); // header
|
||||
pipe->write({{}, 0, "proxy error: " + err_str}); // body
|
||||
}
|
||||
pipe->close_write(); // signal EOF to reader
|
||||
SRV_DBG("%s", "client request thread ended\n");
|
||||
});
|
||||
this->thread.detach();
|
||||
|
||||
// wait for the first chunk (headers)
|
||||
msg_t header;
|
||||
if (pipe->read(header, should_stop)) {
|
||||
SRV_DBG("%s", "received response headers\n");
|
||||
this->status = header.status;
|
||||
this->headers = header.headers;
|
||||
} else {
|
||||
SRV_DBG("%s", "no response headers received (request cancelled?)\n");
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,174 @@
|
|||
#pragma once
|
||||
|
||||
#include "common.h"
|
||||
#include "server-http.h"
|
||||
|
||||
#include <mutex>
|
||||
#include <condition_variable>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
|
||||
/**
|
||||
* state diagram:
|
||||
*
|
||||
* UNLOADED ──► LOADING ──► LOADED
|
||||
* ▲ │ │
|
||||
* └───failed───┘ │
|
||||
* ▲ │
|
||||
* └────────unloaded─────────┘
|
||||
*/
|
||||
enum server_model_status {
|
||||
// TODO: also add downloading state when the logic is added
|
||||
SERVER_MODEL_STATUS_UNLOADED,
|
||||
SERVER_MODEL_STATUS_LOADING,
|
||||
SERVER_MODEL_STATUS_LOADED
|
||||
};
|
||||
|
||||
static server_model_status server_model_status_from_string(const std::string & status_str) {
|
||||
if (status_str == "unloaded") {
|
||||
return SERVER_MODEL_STATUS_UNLOADED;
|
||||
}
|
||||
if (status_str == "loading") {
|
||||
return SERVER_MODEL_STATUS_LOADING;
|
||||
}
|
||||
if (status_str == "loaded") {
|
||||
return SERVER_MODEL_STATUS_LOADED;
|
||||
}
|
||||
throw std::runtime_error("invalid server model status");
|
||||
}
|
||||
|
||||
static std::string server_model_status_to_string(server_model_status status) {
|
||||
switch (status) {
|
||||
case SERVER_MODEL_STATUS_UNLOADED: return "unloaded";
|
||||
case SERVER_MODEL_STATUS_LOADING: return "loading";
|
||||
case SERVER_MODEL_STATUS_LOADED: return "loaded";
|
||||
default: return "unknown";
|
||||
}
|
||||
}
|
||||
|
||||
struct server_model_meta {
|
||||
std::string name;
|
||||
std::string path;
|
||||
std::string path_mmproj; // only available if in_cache=false
|
||||
bool in_cache = false; // if true, use -hf; use -m otherwise
|
||||
int port = 0;
|
||||
server_model_status status = SERVER_MODEL_STATUS_UNLOADED;
|
||||
int64_t last_used = 0; // for LRU unloading
|
||||
std::vector<std::string> args; // additional args passed to the model instance (used for debugging)
|
||||
int exit_code = 0; // exit code of the model instance process (only valid if status == FAILED)
|
||||
|
||||
bool is_active() const {
|
||||
return status == SERVER_MODEL_STATUS_LOADED || status == SERVER_MODEL_STATUS_LOADING;
|
||||
}
|
||||
|
||||
bool is_failed() const {
|
||||
return status == SERVER_MODEL_STATUS_UNLOADED && exit_code != 0;
|
||||
}
|
||||
};
|
||||
|
||||
struct subprocess_s;
|
||||
|
||||
struct server_models {
|
||||
private:
|
||||
struct instance_t {
|
||||
std::shared_ptr<subprocess_s> subproc; // shared between main thread and monitoring thread
|
||||
std::thread th;
|
||||
server_model_meta meta;
|
||||
FILE * stdin_file = nullptr;
|
||||
};
|
||||
|
||||
std::mutex mutex;
|
||||
std::condition_variable cv;
|
||||
std::map<std::string, instance_t> mapping;
|
||||
|
||||
common_params base_params;
|
||||
std::vector<std::string> base_args;
|
||||
std::vector<std::string> base_env;
|
||||
|
||||
void update_meta(const std::string & name, const server_model_meta & meta);
|
||||
|
||||
// unload least recently used models if the limit is reached
|
||||
void unload_lru();
|
||||
|
||||
public:
|
||||
server_models(const common_params & params, int argc, char ** argv, char ** envp);
|
||||
|
||||
// check if a model instance exists
|
||||
bool has_model(const std::string & name);
|
||||
|
||||
// return a copy of model metadata
|
||||
std::optional<server_model_meta> get_meta(const std::string & name);
|
||||
|
||||
// return a copy of all model metadata
|
||||
std::vector<server_model_meta> get_all_meta();
|
||||
|
||||
// if auto_load is true, load the model with previous args if any
|
||||
void load(const std::string & name, bool auto_load);
|
||||
void unload(const std::string & name);
|
||||
void unload_all();
|
||||
|
||||
// update the status of a model instance
|
||||
void update_status(const std::string & name, server_model_status status);
|
||||
|
||||
// wait until the model instance is fully loaded
|
||||
// return when the model is loaded or failed to load
|
||||
void wait_until_loaded(const std::string & name);
|
||||
|
||||
// load the model if not loaded, otherwise do nothing
|
||||
// return false if model is already loaded; return true otherwise (meta may need to be refreshed)
|
||||
bool ensure_model_loaded(const std::string & name);
|
||||
|
||||
// proxy an HTTP request to the model instance
|
||||
server_http_res_ptr proxy_request(const server_http_req & req, const std::string & method, const std::string & name, bool update_last_used);
|
||||
|
||||
// notify the router server that a model instance is ready
|
||||
// return the monitoring thread (to be joined by the caller)
|
||||
static std::thread setup_child_server(const common_params & base_params, int router_port, const std::string & name, std::function<void(int)> & shutdown_handler);
|
||||
};
|
||||
|
||||
struct server_models_routes {
|
||||
common_params params;
|
||||
server_models models;
|
||||
server_models_routes(const common_params & params, int argc, char ** argv, char ** envp)
|
||||
: params(params), models(params, argc, argv, envp) {
|
||||
init_routes();
|
||||
}
|
||||
|
||||
void init_routes();
|
||||
// handlers using lambda function, so that they can capture `this` without `std::bind`
|
||||
server_http_context::handler_t get_router_props;
|
||||
server_http_context::handler_t proxy_get;
|
||||
server_http_context::handler_t proxy_post;
|
||||
server_http_context::handler_t get_router_models;
|
||||
server_http_context::handler_t post_router_models_load;
|
||||
server_http_context::handler_t post_router_models_status;
|
||||
server_http_context::handler_t post_router_models_unload;
|
||||
};
|
||||
|
||||
/**
|
||||
* A simple HTTP proxy that forwards requests to another server
|
||||
* and relays the responses back.
|
||||
*/
|
||||
struct server_http_proxy : server_http_res {
|
||||
std::function<void()> cleanup = nullptr;
|
||||
public:
|
||||
server_http_proxy(const std::string & method,
|
||||
const std::string & host,
|
||||
int port,
|
||||
const std::string & path,
|
||||
const std::map<std::string, std::string> & headers,
|
||||
const std::string & body,
|
||||
const std::function<bool()> should_stop);
|
||||
~server_http_proxy() {
|
||||
if (cleanup) {
|
||||
cleanup();
|
||||
}
|
||||
}
|
||||
private:
|
||||
std::thread thread;
|
||||
struct msg_t {
|
||||
std::map<std::string, std::string> headers;
|
||||
int status = 0;
|
||||
std::string data;
|
||||
};
|
||||
};
|
||||
|
|
@ -450,9 +450,6 @@ task_params server_task::params_from_json_cmpl(
|
|||
}
|
||||
}
|
||||
|
||||
std::string model_name = params_base.model_alias.empty() ? DEFAULT_OAICOMPAT_MODEL : params_base.model_alias;
|
||||
params.oaicompat_model = json_value(data, "model", model_name);
|
||||
|
||||
return params;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
#include "server-context.h"
|
||||
#include "server-http.h"
|
||||
#include "server-models.h"
|
||||
|
||||
#include "arg.h"
|
||||
#include "common.h"
|
||||
|
|
@ -33,30 +34,36 @@ static inline void signal_handler(int signal) {
|
|||
static server_http_context::handler_t ex_wrapper(server_http_context::handler_t func) {
|
||||
return [func = std::move(func)](const server_http_req & req) -> server_http_res_ptr {
|
||||
std::string message;
|
||||
error_type error;
|
||||
try {
|
||||
return func(req);
|
||||
} catch (const std::invalid_argument & e) {
|
||||
error = ERROR_TYPE_INVALID_REQUEST;
|
||||
message = e.what();
|
||||
} catch (const std::exception & e) {
|
||||
error = ERROR_TYPE_SERVER;
|
||||
message = e.what();
|
||||
} catch (...) {
|
||||
error = ERROR_TYPE_SERVER;
|
||||
message = "unknown error";
|
||||
}
|
||||
|
||||
auto res = std::make_unique<server_http_res>();
|
||||
res->status = 500;
|
||||
try {
|
||||
json error_data = format_error_response(message, ERROR_TYPE_SERVER);
|
||||
json error_data = format_error_response(message, error);
|
||||
res->status = json_value(error_data, "code", 500);
|
||||
res->data = safe_json_to_str({{ "error", error_data }});
|
||||
LOG_WRN("got exception: %s\n", res->data.c_str());
|
||||
SRV_WRN("got exception: %s\n", res->data.c_str());
|
||||
} catch (const std::exception & e) {
|
||||
LOG_ERR("got another exception: %s | while hanlding exception: %s\n", e.what(), message.c_str());
|
||||
SRV_ERR("got another exception: %s | while handling exception: %s\n", e.what(), message.c_str());
|
||||
res->data = "Internal Server Error";
|
||||
}
|
||||
return res;
|
||||
};
|
||||
}
|
||||
|
||||
int main(int argc, char ** argv) {
|
||||
int main(int argc, char ** argv, char ** envp) {
|
||||
// own arguments required by this example
|
||||
common_params params;
|
||||
|
||||
|
|
@ -75,6 +82,11 @@ int main(int argc, char ** argv) {
|
|||
params.kv_unified = true;
|
||||
}
|
||||
|
||||
// for consistency between server router mode and single-model mode, we set the same model name as alias
|
||||
if (params.model_alias.empty() && !params.model.name.empty()) {
|
||||
params.model_alias = params.model.name;
|
||||
}
|
||||
|
||||
common_init();
|
||||
|
||||
// struct that contains llama context and inference
|
||||
|
|
@ -101,6 +113,42 @@ int main(int argc, char ** argv) {
|
|||
// register API routes
|
||||
server_routes routes(params, ctx_server, [&ctx_http]() { return ctx_http.is_ready.load(); });
|
||||
|
||||
bool is_router_server = params.model.path.empty();
|
||||
std::optional<server_models_routes> models_routes{};
|
||||
if (is_router_server) {
|
||||
// setup server instances manager
|
||||
models_routes.emplace(params, argc, argv, envp);
|
||||
|
||||
// proxy handlers
|
||||
// note: routes.get_health stays the same
|
||||
routes.get_metrics = models_routes->proxy_get;
|
||||
routes.post_props = models_routes->proxy_post;
|
||||
routes.get_api_show = models_routes->proxy_get;
|
||||
routes.post_completions = models_routes->proxy_post;
|
||||
routes.post_completions_oai = models_routes->proxy_post;
|
||||
routes.post_chat_completions = models_routes->proxy_post;
|
||||
routes.post_anthropic_messages = models_routes->proxy_post;
|
||||
routes.post_anthropic_count_tokens = models_routes->proxy_post;
|
||||
routes.post_infill = models_routes->proxy_post;
|
||||
routes.post_embeddings = models_routes->proxy_post;
|
||||
routes.post_embeddings_oai = models_routes->proxy_post;
|
||||
routes.post_rerank = models_routes->proxy_post;
|
||||
routes.post_tokenize = models_routes->proxy_post;
|
||||
routes.post_detokenize = models_routes->proxy_post;
|
||||
routes.post_apply_template = models_routes->proxy_post;
|
||||
routes.get_lora_adapters = models_routes->proxy_get;
|
||||
routes.post_lora_adapters = models_routes->proxy_post;
|
||||
routes.get_slots = models_routes->proxy_get;
|
||||
routes.post_slots = models_routes->proxy_post;
|
||||
|
||||
// custom routes for router
|
||||
routes.get_props = models_routes->get_router_props;
|
||||
routes.get_models = models_routes->get_router_models;
|
||||
ctx_http.post("/models/load", ex_wrapper(models_routes->post_router_models_load));
|
||||
ctx_http.post("/models/unload", ex_wrapper(models_routes->post_router_models_unload));
|
||||
ctx_http.post("/models/status", ex_wrapper(models_routes->post_router_models_status));
|
||||
}
|
||||
|
||||
ctx_http.get ("/health", ex_wrapper(routes.get_health)); // public endpoint (no API key check)
|
||||
ctx_http.get ("/v1/health", ex_wrapper(routes.get_health)); // public endpoint (no API key check)
|
||||
ctx_http.get ("/metrics", ex_wrapper(routes.get_metrics));
|
||||
|
|
@ -140,43 +188,69 @@ int main(int argc, char ** argv) {
|
|||
// Start the server
|
||||
//
|
||||
|
||||
// setup clean up function, to be called before exit
|
||||
auto clean_up = [&ctx_http, &ctx_server]() {
|
||||
SRV_INF("%s: cleaning up before exit...\n", __func__);
|
||||
ctx_http.stop();
|
||||
ctx_server.terminate();
|
||||
llama_backend_free();
|
||||
};
|
||||
std::function<void()> clean_up;
|
||||
|
||||
// start the HTTP server before loading the model to be able to serve /health requests
|
||||
if (!ctx_http.start()) {
|
||||
clean_up();
|
||||
LOG_ERR("%s: exiting due to HTTP server error\n", __func__);
|
||||
return 1;
|
||||
}
|
||||
if (is_router_server) {
|
||||
LOG_INF("%s: starting router server, no model will be loaded in this process\n", __func__);
|
||||
|
||||
// load the model
|
||||
LOG_INF("%s: loading model\n", __func__);
|
||||
clean_up = [&models_routes]() {
|
||||
SRV_INF("%s: cleaning up before exit...\n", __func__);
|
||||
if (models_routes.has_value()) {
|
||||
models_routes->models.unload_all();
|
||||
}
|
||||
llama_backend_free();
|
||||
};
|
||||
|
||||
if (!ctx_server.load_model(params)) {
|
||||
clean_up();
|
||||
if (ctx_http.thread.joinable()) {
|
||||
ctx_http.thread.join();
|
||||
if (!ctx_http.start()) {
|
||||
clean_up();
|
||||
LOG_ERR("%s: exiting due to HTTP server error\n", __func__);
|
||||
return 1;
|
||||
}
|
||||
LOG_ERR("%s: exiting due to model loading error\n", __func__);
|
||||
return 1;
|
||||
ctx_http.is_ready.store(true);
|
||||
|
||||
shutdown_handler = [&](int) {
|
||||
ctx_http.stop();
|
||||
};
|
||||
|
||||
} else {
|
||||
// setup clean up function, to be called before exit
|
||||
clean_up = [&ctx_http, &ctx_server]() {
|
||||
SRV_INF("%s: cleaning up before exit...\n", __func__);
|
||||
ctx_http.stop();
|
||||
ctx_server.terminate();
|
||||
llama_backend_free();
|
||||
};
|
||||
|
||||
// start the HTTP server before loading the model to be able to serve /health requests
|
||||
if (!ctx_http.start()) {
|
||||
clean_up();
|
||||
LOG_ERR("%s: exiting due to HTTP server error\n", __func__);
|
||||
return 1;
|
||||
}
|
||||
|
||||
// load the model
|
||||
LOG_INF("%s: loading model\n", __func__);
|
||||
|
||||
if (!ctx_server.load_model(params)) {
|
||||
clean_up();
|
||||
if (ctx_http.thread.joinable()) {
|
||||
ctx_http.thread.join();
|
||||
}
|
||||
LOG_ERR("%s: exiting due to model loading error\n", __func__);
|
||||
return 1;
|
||||
}
|
||||
|
||||
ctx_server.init();
|
||||
ctx_http.is_ready.store(true);
|
||||
|
||||
LOG_INF("%s: model loaded\n", __func__);
|
||||
|
||||
shutdown_handler = [&](int) {
|
||||
// this will unblock start_loop()
|
||||
ctx_server.terminate();
|
||||
};
|
||||
}
|
||||
|
||||
ctx_server.init();
|
||||
ctx_http.is_ready.store(true);
|
||||
|
||||
LOG_INF("%s: model loaded\n", __func__);
|
||||
|
||||
shutdown_handler = [&](int) {
|
||||
// this will unblock start_loop()
|
||||
ctx_server.terminate();
|
||||
};
|
||||
|
||||
// TODO: refactor in common/console
|
||||
#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__))
|
||||
struct sigaction sigint_action;
|
||||
|
|
@ -192,16 +266,39 @@ int main(int argc, char ** argv) {
|
|||
SetConsoleCtrlHandler(reinterpret_cast<PHANDLER_ROUTINE>(console_ctrl_handler), true);
|
||||
#endif
|
||||
|
||||
LOG_INF("%s: server is listening on %s\n", __func__, ctx_http.listening_address.c_str());
|
||||
LOG_INF("%s: starting the main loop...\n", __func__);
|
||||
// this call blocks the main thread until ctx_server.terminate() is called
|
||||
ctx_server.start_loop();
|
||||
if (is_router_server) {
|
||||
LOG_INF("%s: router server is listening on %s\n", __func__, ctx_http.listening_address.c_str());
|
||||
LOG_INF("%s: NOTE: router mode is experimental\n", __func__);
|
||||
LOG_INF("%s: it is not recommended to use this mode in untrusted environments\n", __func__);
|
||||
if (ctx_http.thread.joinable()) {
|
||||
ctx_http.thread.join(); // keep the main thread alive
|
||||
}
|
||||
|
||||
clean_up();
|
||||
if (ctx_http.thread.joinable()) {
|
||||
ctx_http.thread.join();
|
||||
// when the HTTP server stops, clean up and exit
|
||||
clean_up();
|
||||
} else {
|
||||
LOG_INF("%s: server is listening on %s\n", __func__, ctx_http.listening_address.c_str());
|
||||
LOG_INF("%s: starting the main loop...\n", __func__);
|
||||
|
||||
// optionally, notify router server that this instance is ready
|
||||
const char * router_port = std::getenv("LLAMA_SERVER_ROUTER_PORT");
|
||||
std::thread monitor_thread;
|
||||
if (router_port != nullptr) {
|
||||
monitor_thread = server_models::setup_child_server(params, std::atoi(router_port), params.model_alias, shutdown_handler);
|
||||
}
|
||||
|
||||
// this call blocks the main thread until queue_tasks.terminate() is called
|
||||
ctx_server.start_loop();
|
||||
|
||||
clean_up();
|
||||
if (ctx_http.thread.joinable()) {
|
||||
ctx_http.thread.join();
|
||||
}
|
||||
if (monitor_thread.joinable()) {
|
||||
monitor_thread.join();
|
||||
}
|
||||
llama_memory_breakdown_print(ctx_server.get_llama_context());
|
||||
}
|
||||
llama_memory_breakdown_print(ctx_server.get_llama_context());
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -41,7 +41,8 @@ def test_chat_completion(model, system_prompt, user_prompt, max_tokens, re_conte
|
|||
assert res.status_code == 200
|
||||
assert "cmpl" in res.body["id"] # make sure the completion id has the expected format
|
||||
assert res.body["system_fingerprint"].startswith("b")
|
||||
assert res.body["model"] == model if model is not None else server.model_alias
|
||||
# we no longer reflect back the model name, see https://github.com/ggml-org/llama.cpp/pull/17668
|
||||
# assert res.body["model"] == model if model is not None else server.model_alias
|
||||
assert res.body["usage"]["prompt_tokens"] == n_prompt
|
||||
assert res.body["usage"]["completion_tokens"] == n_predicted
|
||||
choice = res.body["choices"][0]
|
||||
|
|
@ -59,7 +60,7 @@ def test_chat_completion(model, system_prompt, user_prompt, max_tokens, re_conte
|
|||
)
|
||||
def test_chat_completion_stream(system_prompt, user_prompt, max_tokens, re_content, n_prompt, n_predicted, finish_reason):
|
||||
global server
|
||||
server.model_alias = None # try using DEFAULT_OAICOMPAT_MODEL
|
||||
server.model_alias = "llama-test-model"
|
||||
server.start()
|
||||
res = server.make_stream_request("POST", "/chat/completions", data={
|
||||
"max_tokens": max_tokens,
|
||||
|
|
@ -81,7 +82,7 @@ def test_chat_completion_stream(system_prompt, user_prompt, max_tokens, re_conte
|
|||
else:
|
||||
assert "role" not in choice["delta"]
|
||||
assert data["system_fingerprint"].startswith("b")
|
||||
assert "gpt-3.5" in data["model"] # DEFAULT_OAICOMPAT_MODEL, maybe changed in the future
|
||||
assert data["model"] == "llama-test-model"
|
||||
if last_cmpl_id is None:
|
||||
last_cmpl_id = data["id"]
|
||||
assert last_cmpl_id == data["id"] # make sure the completion id is the same for all events in the stream
|
||||
|
|
@ -198,7 +199,7 @@ def test_completion_with_response_format(response_format: dict, n_predicted: int
|
|||
choice = res.body["choices"][0]
|
||||
assert match_regex(re_content, choice["message"]["content"])
|
||||
else:
|
||||
assert res.status_code != 200
|
||||
assert res.status_code == 400
|
||||
assert "error" in res.body
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,50 @@
|
|||
import pytest
|
||||
from utils import *
|
||||
|
||||
server: ServerProcess
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def create_server():
|
||||
global server
|
||||
server = ServerPreset.router()
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"model,success",
|
||||
[
|
||||
("ggml-org/tinygemma3-GGUF:Q8_0", True),
|
||||
("non-existent/model", False),
|
||||
]
|
||||
)
|
||||
def test_router_chat_completion_stream(model: str, success: bool):
|
||||
# TODO: make sure the model is in cache (ie. ServerProcess.load_all()) before starting the router server
|
||||
global server
|
||||
server.start()
|
||||
content = ""
|
||||
ex: ServerError | None = None
|
||||
try:
|
||||
res = server.make_stream_request("POST", "/chat/completions", data={
|
||||
"model": model,
|
||||
"max_tokens": 16,
|
||||
"messages": [
|
||||
{"role": "user", "content": "hello"},
|
||||
],
|
||||
"stream": True,
|
||||
})
|
||||
for data in res:
|
||||
if data["choices"]:
|
||||
choice = data["choices"][0]
|
||||
if choice["finish_reason"] in ["stop", "length"]:
|
||||
assert "content" not in choice["delta"]
|
||||
else:
|
||||
assert choice["finish_reason"] is None
|
||||
content += choice["delta"]["content"] or ''
|
||||
except ServerError as e:
|
||||
ex = e
|
||||
|
||||
if success:
|
||||
assert ex is None
|
||||
assert len(content) > 0
|
||||
else:
|
||||
assert ex is not None
|
||||
assert content == ""
|
||||
|
|
@ -46,7 +46,7 @@ class ServerProcess:
|
|||
debug: bool = False
|
||||
server_port: int = 8080
|
||||
server_host: str = "127.0.0.1"
|
||||
model_hf_repo: str = "ggml-org/models"
|
||||
model_hf_repo: str | None = "ggml-org/models"
|
||||
model_hf_file: str | None = "tinyllamas/stories260K.gguf"
|
||||
model_alias: str = "tinyllama-2"
|
||||
temperature: float = 0.8
|
||||
|
|
@ -521,9 +521,8 @@ class ServerPreset:
|
|||
server = ServerProcess()
|
||||
server.offline = True # will be downloaded by load_all()
|
||||
# mmproj is already provided by HF registry API
|
||||
server.model_hf_repo = "ggml-org/tinygemma3-GGUF"
|
||||
server.model_hf_file = "tinygemma3-Q8_0.gguf"
|
||||
server.mmproj_url = "https://huggingface.co/ggml-org/tinygemma3-GGUF/resolve/main/mmproj-tinygemma3.gguf"
|
||||
server.model_hf_file = None
|
||||
server.model_hf_repo = "ggml-org/tinygemma3-GGUF:Q8_0"
|
||||
server.model_alias = "tinygemma3"
|
||||
server.n_ctx = 1024
|
||||
server.n_batch = 32
|
||||
|
|
@ -532,6 +531,21 @@ class ServerPreset:
|
|||
server.seed = 42
|
||||
return server
|
||||
|
||||
@staticmethod
|
||||
def router() -> ServerProcess:
|
||||
server = ServerProcess()
|
||||
# router server has no models
|
||||
server.model_file = None
|
||||
server.model_alias = None
|
||||
server.model_hf_repo = None
|
||||
server.model_hf_file = None
|
||||
server.n_ctx = 1024
|
||||
server.n_batch = 16
|
||||
server.n_slots = 1
|
||||
server.n_predict = 16
|
||||
server.seed = 42
|
||||
return server
|
||||
|
||||
|
||||
def parallel_function_calls(function_list: List[Tuple[Callable[..., Any], Tuple[Any, ...]]]) -> List[Any]:
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
import type { StorybookConfig } from '@storybook/sveltekit';
|
||||
|
||||
const config: StorybookConfig = {
|
||||
stories: ['../src/**/*.mdx', '../src/**/*.stories.@(js|ts|svelte)'],
|
||||
stories: ['../tests/stories/**/*.mdx', '../tests/stories/**/*.stories.@(js|ts|svelte)'],
|
||||
addons: [
|
||||
'@storybook/addon-svelte-csf',
|
||||
'@chromatic-com/storybook',
|
||||
|
|
|
|||
|
|
@ -2,65 +2,685 @@
|
|||
|
||||
A modern, feature-rich web interface for llama.cpp built with SvelteKit. This UI provides an intuitive chat interface with advanced file handling, conversation management, and comprehensive model interaction capabilities.
|
||||
|
||||
The WebUI supports two server operation modes:
|
||||
|
||||
- **MODEL mode** - Single model operation (standard llama-server)
|
||||
- **ROUTER mode** - Multi-model operation with dynamic model loading/unloading
|
||||
|
||||
---
|
||||
|
||||
## Table of Contents
|
||||
|
||||
- [Features](#features)
|
||||
- [Getting Started](#getting-started)
|
||||
- [Tech Stack](#tech-stack)
|
||||
- [Build Pipeline](#build-pipeline)
|
||||
- [Architecture](#architecture)
|
||||
- [Data Flows](#data-flows)
|
||||
- [Architectural Patterns](#architectural-patterns)
|
||||
- [Testing](#testing)
|
||||
|
||||
---
|
||||
|
||||
## Features
|
||||
|
||||
- **Modern Chat Interface** - Clean, responsive design with dark/light mode
|
||||
- **File Attachments** - Support for images, text files, PDFs, and audio with rich previews and drag-and-drop support
|
||||
- **Conversation Management** - Create, edit, branch, and search conversations
|
||||
- **Advanced Markdown** - Code highlighting, math formulas (KaTeX), and content blocks
|
||||
- **Reasoning Content** - Support for models with thinking blocks
|
||||
- **Keyboard Shortcuts** - Keyboard navigation (Shift+Ctrl/Cmd+O for new chat, Shift+Ctrl/Cmdt+E for edit conversation, Shift+Ctrl/Cmdt+D for delete conversation, Ctrl/Cmd+K for search, Ctrl/Cmd+V for paste, Ctrl/Cmd+B for opening/collapsing sidebar)
|
||||
- **Request Tracking** - Monitor processing with slots endpoint integration
|
||||
- **UI Testing** - Storybook component library with automated tests
|
||||
### Chat Interface
|
||||
|
||||
## Development
|
||||
- **Streaming responses** with real-time updates
|
||||
- **Reasoning content** - Support for models with thinking/reasoning blocks
|
||||
- **Dark/light theme** with system preference detection
|
||||
- **Responsive design** for desktop and mobile
|
||||
|
||||
Install dependencies:
|
||||
### File Attachments
|
||||
|
||||
- **Images** - JPEG, PNG, GIF, WebP, SVG (with PNG conversion)
|
||||
- **Documents** - PDF (text extraction or image conversion for vision models)
|
||||
- **Audio** - MP3, WAV for audio-capable models
|
||||
- **Text files** - Source code, markdown, and other text formats
|
||||
- **Drag-and-drop** and paste support with rich previews
|
||||
|
||||
### Conversation Management
|
||||
|
||||
- **Branching** - Branch messages conversations at any point by editing messages or regenerating responses, navigate between branches
|
||||
- **Regeneration** - Regenerate responses with optional model switching (ROUTER mode)
|
||||
- **Import/Export** - JSON format for backup and sharing
|
||||
- **Search** - Find conversations by title or content
|
||||
|
||||
### Advanced Rendering
|
||||
|
||||
- **Syntax highlighting** - Code blocks with language detection
|
||||
- **Math formulas** - KaTeX rendering for LaTeX expressions
|
||||
- **Markdown** - Full GFM support with tables, lists, and more
|
||||
|
||||
### Multi-Model Support (ROUTER mode)
|
||||
|
||||
- **Model selector** with Loaded/Available groups
|
||||
- **Automatic loading** - Models load on selection
|
||||
- **Modality validation** - Prevents sending images to non-vision models
|
||||
- **LRU unloading** - Server auto-manages model cache
|
||||
|
||||
### Keyboard Shortcuts
|
||||
|
||||
| Shortcut | Action |
|
||||
| ------------------ | -------------------- |
|
||||
| `Shift+Ctrl/Cmd+O` | New chat |
|
||||
| `Shift+Ctrl/Cmd+E` | Edit conversation |
|
||||
| `Shift+Ctrl/Cmd+D` | Delete conversation |
|
||||
| `Ctrl/Cmd+K` | Search conversations |
|
||||
| `Ctrl/Cmd+B` | Toggle sidebar |
|
||||
|
||||
### Developer Experience
|
||||
|
||||
- **Request tracking** - Monitor token generation with `/slots` endpoint
|
||||
- **Storybook** - Component library with visual testing
|
||||
- **Hot reload** - Instant updates during development
|
||||
|
||||
---
|
||||
|
||||
## Getting Started
|
||||
|
||||
### Prerequisites
|
||||
|
||||
- **Node.js** 18+ (20+ recommended)
|
||||
- **npm** 9+
|
||||
- **llama-server** running locally (for API access)
|
||||
|
||||
### 1. Install Dependencies
|
||||
|
||||
```bash
|
||||
cd tools/server/webui
|
||||
npm install
|
||||
```
|
||||
|
||||
Start the development server + Storybook:
|
||||
### 2. Start llama-server
|
||||
|
||||
In a separate terminal, start the backend server:
|
||||
|
||||
```bash
|
||||
# Single model (MODEL mode)
|
||||
./llama-server -m model.gguf
|
||||
|
||||
# Multi-model (ROUTER mode)
|
||||
./llama-server --model-store /path/to/models
|
||||
```
|
||||
|
||||
### 3. Start Development Servers
|
||||
|
||||
```bash
|
||||
npm run dev
|
||||
```
|
||||
|
||||
This will start both the SvelteKit dev server and Storybook on port 6006.
|
||||
This starts:
|
||||
|
||||
## Building
|
||||
- **Vite dev server** at `http://localhost:5173` - The main WebUI
|
||||
- **Storybook** at `http://localhost:6006` - Component documentation
|
||||
|
||||
Create a production build:
|
||||
The Vite dev server proxies API requests to `http://localhost:8080` (default llama-server port):
|
||||
|
||||
```typescript
|
||||
// vite.config.ts proxy configuration
|
||||
proxy: {
|
||||
'/v1': 'http://localhost:8080',
|
||||
'/props': 'http://localhost:8080',
|
||||
'/slots': 'http://localhost:8080',
|
||||
'/models': 'http://localhost:8080'
|
||||
}
|
||||
```
|
||||
|
||||
### Development Workflow
|
||||
|
||||
1. Open `http://localhost:5173` in your browser
|
||||
2. Make changes to `.svelte`, `.ts`, or `.css` files
|
||||
3. Changes hot-reload instantly
|
||||
4. Use Storybook at `http://localhost:6006` for isolated component development
|
||||
|
||||
---
|
||||
|
||||
## Tech Stack
|
||||
|
||||
| Layer | Technology | Purpose |
|
||||
| ----------------- | ------------------------------- | -------------------------------------------------------- |
|
||||
| **Framework** | SvelteKit + Svelte 5 | Reactive UI with runes (`$state`, `$derived`, `$effect`) |
|
||||
| **UI Components** | shadcn-svelte + bits-ui | Accessible, customizable component library |
|
||||
| **Styling** | TailwindCSS 4 | Utility-first CSS with design tokens |
|
||||
| **Database** | IndexedDB (Dexie) | Client-side storage for conversations and messages |
|
||||
| **Build** | Vite | Fast bundling with static adapter |
|
||||
| **Testing** | Playwright + Vitest + Storybook | E2E, unit, and visual testing |
|
||||
| **Markdown** | remark + rehype | Markdown processing with KaTeX and syntax highlighting |
|
||||
|
||||
### Key Dependencies
|
||||
|
||||
```json
|
||||
{
|
||||
"svelte": "^5.0.0",
|
||||
"bits-ui": "^2.8.11",
|
||||
"dexie": "^4.0.11",
|
||||
"pdfjs-dist": "^5.4.54",
|
||||
"highlight.js": "^11.11.1",
|
||||
"rehype-katex": "^7.0.1"
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Build Pipeline
|
||||
|
||||
### Development Build
|
||||
|
||||
```bash
|
||||
npm run dev
|
||||
```
|
||||
|
||||
Runs Vite in development mode with:
|
||||
|
||||
- Hot Module Replacement (HMR)
|
||||
- Source maps
|
||||
- Proxy to llama-server
|
||||
|
||||
### Production Build
|
||||
|
||||
```bash
|
||||
npm run build
|
||||
```
|
||||
|
||||
The build outputs static files to `../public` directory for deployment with llama.cpp server.
|
||||
The build process:
|
||||
|
||||
## Testing
|
||||
1. **Vite Build** - Bundles all TypeScript, Svelte, and CSS
|
||||
2. **Static Adapter** - Outputs to `../public` (llama-server's static file directory)
|
||||
3. **Post-Build Script** - Cleans up intermediate files
|
||||
4. **Custom Plugin** - Creates `index.html.gz` with:
|
||||
- Inlined favicon as base64
|
||||
- GZIP compression (level 9)
|
||||
- Deterministic output (zeroed timestamps)
|
||||
|
||||
Run the test suite:
|
||||
|
||||
```bash
|
||||
# E2E tests
|
||||
npm run test:e2e
|
||||
|
||||
# Unit tests
|
||||
npm run test:unit
|
||||
|
||||
# UI tests
|
||||
npm run test:ui
|
||||
|
||||
# All tests
|
||||
npm run test
|
||||
```text
|
||||
tools/server/webui/ → build → tools/server/public/
|
||||
├── src/ ├── index.html.gz (served by llama-server)
|
||||
├── static/ └── (favicon inlined)
|
||||
└── ...
|
||||
```
|
||||
|
||||
### SvelteKit Configuration
|
||||
|
||||
```javascript
|
||||
// svelte.config.js
|
||||
adapter: adapter({
|
||||
pages: '../public', // Output directory
|
||||
assets: '../public', // Static assets
|
||||
fallback: 'index.html', // SPA fallback
|
||||
strict: true
|
||||
}),
|
||||
output: {
|
||||
bundleStrategy: 'inline' // Single-file bundle
|
||||
}
|
||||
```
|
||||
|
||||
### Integration with llama-server
|
||||
|
||||
The WebUI is embedded directly into the llama-server binary:
|
||||
|
||||
1. `npm run build` outputs `index.html.gz` to `tools/server/public/`
|
||||
2. llama-server compiles this into the binary at build time
|
||||
3. When accessing `/`, llama-server serves the gzipped HTML
|
||||
4. All assets are inlined (CSS, JS, fonts, favicon)
|
||||
|
||||
This results in a **single portable binary** with the full WebUI included.
|
||||
|
||||
---
|
||||
|
||||
## Architecture
|
||||
|
||||
- **Framework**: SvelteKit with Svelte 5 runes
|
||||
- **Components**: ShadCN UI + bits-ui design system
|
||||
- **Database**: IndexedDB with Dexie for local storage
|
||||
- **Build**: Static adapter for deployment with llama.cpp server
|
||||
- **Testing**: Playwright (E2E) + Vitest (unit) + Storybook (components)
|
||||
The WebUI follows a layered architecture with unidirectional data flow:
|
||||
|
||||
```text
|
||||
Routes → Components → Hooks → Stores → Services → Storage/API
|
||||
```
|
||||
|
||||
### High-Level Architecture
|
||||
|
||||
See: [`docs/architecture/high-level-architecture-simplified.md`](docs/architecture/high-level-architecture-simplified.md)
|
||||
|
||||
```mermaid
|
||||
flowchart TB
|
||||
subgraph Routes["📍 Routes"]
|
||||
R1["/ (Welcome)"]
|
||||
R2["/chat/[id]"]
|
||||
RL["+layout.svelte"]
|
||||
end
|
||||
|
||||
subgraph Components["🧩 Components"]
|
||||
C_Sidebar["ChatSidebar"]
|
||||
C_Screen["ChatScreen"]
|
||||
C_Form["ChatForm"]
|
||||
C_Messages["ChatMessages"]
|
||||
C_ModelsSelector["ModelsSelector"]
|
||||
C_Settings["ChatSettings"]
|
||||
end
|
||||
|
||||
subgraph Stores["🗄️ Stores"]
|
||||
S1["chatStore"]
|
||||
S2["conversationsStore"]
|
||||
S3["modelsStore"]
|
||||
S4["serverStore"]
|
||||
S5["settingsStore"]
|
||||
end
|
||||
|
||||
subgraph Services["⚙️ Services"]
|
||||
SV1["ChatService"]
|
||||
SV2["ModelsService"]
|
||||
SV3["PropsService"]
|
||||
SV4["DatabaseService"]
|
||||
end
|
||||
|
||||
subgraph Storage["💾 Storage"]
|
||||
ST1["IndexedDB"]
|
||||
ST2["LocalStorage"]
|
||||
end
|
||||
|
||||
subgraph APIs["🌐 llama-server"]
|
||||
API1["/v1/chat/completions"]
|
||||
API2["/props"]
|
||||
API3["/models/*"]
|
||||
end
|
||||
|
||||
R1 & R2 --> C_Screen
|
||||
RL --> C_Sidebar
|
||||
C_Screen --> C_Form & C_Messages & C_Settings
|
||||
C_Screen --> S1 & S2
|
||||
C_ModelsSelector --> S3 & S4
|
||||
S1 --> SV1 & SV4
|
||||
S3 --> SV2 & SV3
|
||||
SV4 --> ST1
|
||||
SV1 --> API1
|
||||
SV2 --> API3
|
||||
SV3 --> API2
|
||||
```
|
||||
|
||||
### Layer Breakdown
|
||||
|
||||
#### Routes (`src/routes/`)
|
||||
|
||||
- **`/`** - Welcome screen, creates new conversation
|
||||
- **`/chat/[id]`** - Active chat interface
|
||||
- **`+layout.svelte`** - Sidebar, navigation, global initialization
|
||||
|
||||
#### Components (`src/lib/components/`)
|
||||
|
||||
Components are organized in `app/` (application-specific) and `ui/` (shadcn-svelte primitives).
|
||||
|
||||
**Chat Components** (`app/chat/`):
|
||||
|
||||
| Component | Responsibility |
|
||||
| ------------------ | --------------------------------------------------------------------------- |
|
||||
| `ChatScreen/` | Main chat container, coordinates message list, input form, and attachments |
|
||||
| `ChatForm/` | Message input textarea with file upload, paste handling, keyboard shortcuts |
|
||||
| `ChatMessages/` | Message list with branch navigation, regenerate/continue/edit actions |
|
||||
| `ChatAttachments/` | File attachment previews, drag-and-drop, PDF/image/audio handling |
|
||||
| `ChatSettings/` | Parameter sliders (temperature, top-p, etc.) with server default sync |
|
||||
| `ChatSidebar/` | Conversation list, search, import/export, navigation |
|
||||
|
||||
**Dialog Components** (`app/dialogs/`):
|
||||
|
||||
| Component | Responsibility |
|
||||
| ------------------------------- | -------------------------------------------------------- |
|
||||
| `DialogChatSettings` | Full-screen settings configuration |
|
||||
| `DialogModelInformation` | Model details (context size, modalities, parallel slots) |
|
||||
| `DialogChatAttachmentPreview` | Full preview for images, PDFs (text or page view), code |
|
||||
| `DialogConfirmation` | Generic confirmation for destructive actions |
|
||||
| `DialogConversationTitleUpdate` | Edit conversation title |
|
||||
|
||||
**Server/Model Components** (`app/server/`, `app/models/`):
|
||||
|
||||
| Component | Responsibility |
|
||||
| ------------------- | --------------------------------------------------------- |
|
||||
| `ServerErrorSplash` | Error display when server is unreachable |
|
||||
| `ModelsSelector` | Model dropdown with Loaded/Available groups (ROUTER mode) |
|
||||
|
||||
**Shared UI Components** (`app/misc/`):
|
||||
|
||||
| Component | Responsibility |
|
||||
| -------------------------------- | ---------------------------------------------------------------- |
|
||||
| `MarkdownContent` | Markdown rendering with KaTeX, syntax highlighting, copy buttons |
|
||||
| `SyntaxHighlightedCode` | Code blocks with language detection and highlighting |
|
||||
| `ActionButton`, `ActionDropdown` | Reusable action buttons and menus |
|
||||
| `BadgeModality`, `BadgeInfo` | Status and capability badges |
|
||||
|
||||
#### Hooks (`src/lib/hooks/`)
|
||||
|
||||
- **`useModelChangeValidation`** - Validates model switch against conversation modalities
|
||||
- **`useProcessingState`** - Tracks streaming progress and token generation
|
||||
|
||||
#### Stores (`src/lib/stores/`)
|
||||
|
||||
| Store | Responsibility |
|
||||
| -------------------- | --------------------------------------------------------- |
|
||||
| `chatStore` | Message sending, streaming, abort control, error handling |
|
||||
| `conversationsStore` | CRUD for conversations, message branching, navigation |
|
||||
| `modelsStore` | Model list, selection, loading/unloading (ROUTER) |
|
||||
| `serverStore` | Server properties, role detection, modalities |
|
||||
| `settingsStore` | User preferences, parameter sync with server defaults |
|
||||
|
||||
#### Services (`src/lib/services/`)
|
||||
|
||||
| Service | Responsibility |
|
||||
| ---------------------- | ----------------------------------------------- |
|
||||
| `ChatService` | API calls to`/v1/chat/completions`, SSE parsing |
|
||||
| `ModelsService` | `/models`, `/models/load`, `/models/unload` |
|
||||
| `PropsService` | `/props`, `/props?model=` |
|
||||
| `DatabaseService` | IndexedDB operations via Dexie |
|
||||
| `ParameterSyncService` | Syncs settings with server defaults |
|
||||
|
||||
---
|
||||
|
||||
## Data Flows
|
||||
|
||||
### MODEL Mode (Single Model)
|
||||
|
||||
See: [`docs/flows/data-flow-simplified-model-mode.md`](docs/flows/data-flow-simplified-model-mode.md)
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant User
|
||||
participant UI
|
||||
participant Stores
|
||||
participant DB as IndexedDB
|
||||
participant API as llama-server
|
||||
|
||||
Note over User,API: Initialization
|
||||
UI->>Stores: initialize()
|
||||
Stores->>DB: load conversations
|
||||
Stores->>API: GET /props
|
||||
API-->>Stores: server config
|
||||
Stores->>API: GET /v1/models
|
||||
API-->>Stores: single model (auto-selected)
|
||||
|
||||
Note over User,API: Chat Flow
|
||||
User->>UI: send message
|
||||
Stores->>DB: save user message
|
||||
Stores->>API: POST /v1/chat/completions (stream)
|
||||
loop streaming
|
||||
API-->>Stores: SSE chunks
|
||||
Stores-->>UI: reactive update
|
||||
end
|
||||
Stores->>DB: save assistant message
|
||||
```
|
||||
|
||||
### ROUTER Mode (Multi-Model)
|
||||
|
||||
See: [`docs/flows/data-flow-simplified-router-mode.md`](docs/flows/data-flow-simplified-router-mode.md)
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant User
|
||||
participant UI
|
||||
participant Stores
|
||||
participant API as llama-server
|
||||
|
||||
Note over User,API: Initialization
|
||||
Stores->>API: GET /props
|
||||
API-->>Stores: {role: "router"}
|
||||
Stores->>API: GET /models
|
||||
API-->>Stores: models[] with status
|
||||
|
||||
Note over User,API: Model Selection
|
||||
User->>UI: select model
|
||||
alt model not loaded
|
||||
Stores->>API: POST /models/load
|
||||
loop poll status
|
||||
Stores->>API: GET /models
|
||||
end
|
||||
Stores->>API: GET /props?model=X
|
||||
end
|
||||
Stores->>Stores: validate modalities
|
||||
|
||||
Note over User,API: Chat Flow
|
||||
Stores->>API: POST /v1/chat/completions {model: X}
|
||||
loop streaming
|
||||
API-->>Stores: SSE chunks + model info
|
||||
end
|
||||
```
|
||||
|
||||
### Detailed Flow Diagrams
|
||||
|
||||
| Flow | Description | File |
|
||||
| ------------- | ------------------------------------------ | ----------------------------------------------------------- |
|
||||
| Chat | Message lifecycle, streaming, regeneration | [`chat-flow.md`](docs/flows/chat-flow.md) |
|
||||
| Models | Loading, unloading, modality caching | [`models-flow.md`](docs/flows/models-flow.md) |
|
||||
| Server | Props fetching, role detection | [`server-flow.md`](docs/flows/server-flow.md) |
|
||||
| Conversations | CRUD, branching, import/export | [`conversations-flow.md`](docs/flows/conversations-flow.md) |
|
||||
| Database | IndexedDB schema, operations | [`database-flow.md`](docs/flows/database-flow.md) |
|
||||
| Settings | Parameter sync, user overrides | [`settings-flow.md`](docs/flows/settings-flow.md) |
|
||||
|
||||
---
|
||||
|
||||
## Architectural Patterns
|
||||
|
||||
### 1. Reactive State with Svelte 5 Runes
|
||||
|
||||
All stores use Svelte 5's fine-grained reactivity:
|
||||
|
||||
```typescript
|
||||
// Store with reactive state
|
||||
class ChatStore {
|
||||
#isLoading = $state(false);
|
||||
#currentResponse = $state('');
|
||||
|
||||
// Derived values auto-update
|
||||
get isStreaming() {
|
||||
return $derived(this.#isLoading && this.#currentResponse.length > 0);
|
||||
}
|
||||
}
|
||||
|
||||
// Exported reactive accessors
|
||||
export const isLoading = () => chatStore.isLoading;
|
||||
export const currentResponse = () => chatStore.currentResponse;
|
||||
```
|
||||
|
||||
### 2. Unidirectional Data Flow
|
||||
|
||||
Data flows in one direction, making state predictable:
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
subgraph UI["UI Layer"]
|
||||
A[User Action] --> B[Component]
|
||||
end
|
||||
|
||||
subgraph State["State Layer"]
|
||||
B --> C[Store Method]
|
||||
C --> D[State Update]
|
||||
end
|
||||
|
||||
subgraph IO["I/O Layer"]
|
||||
C --> E[Service]
|
||||
E --> F[API / IndexedDB]
|
||||
F -.->|Response| D
|
||||
end
|
||||
|
||||
D -->|Reactive| B
|
||||
```
|
||||
|
||||
Components dispatch actions to stores, stores coordinate with services for I/O, and state updates reactively propagate back to the UI.
|
||||
|
||||
### 3. Per-Conversation State
|
||||
|
||||
Enables concurrent streaming across multiple conversations:
|
||||
|
||||
```typescript
|
||||
class ChatStore {
|
||||
chatLoadingStates = new Map<string, boolean>();
|
||||
chatStreamingStates = new Map<string, { response: string; messageId: string }>();
|
||||
abortControllers = new Map<string, AbortController>();
|
||||
}
|
||||
```
|
||||
|
||||
### 4. Message Branching with Tree Structure
|
||||
|
||||
Conversations are stored as a tree, not a linear list:
|
||||
|
||||
```typescript
|
||||
interface DatabaseMessage {
|
||||
id: string;
|
||||
parent: string | null; // Points to parent message
|
||||
children: string[]; // List of child message IDs
|
||||
// ...
|
||||
}
|
||||
|
||||
interface DatabaseConversation {
|
||||
currentNode: string; // Currently viewed branch tip
|
||||
// ...
|
||||
}
|
||||
```
|
||||
|
||||
Navigation between branches updates `currentNode` without losing history.
|
||||
|
||||
### 5. Layered Service Architecture
|
||||
|
||||
Stores handle state; services handle I/O:
|
||||
|
||||
```text
|
||||
┌─────────────────┐
|
||||
│ Stores │ Business logic, state management
|
||||
├─────────────────┤
|
||||
│ Services │ API calls, database operations
|
||||
├─────────────────┤
|
||||
│ Storage/API │ IndexedDB, LocalStorage, HTTP
|
||||
└─────────────────┘
|
||||
```
|
||||
|
||||
### 6. Server Role Abstraction
|
||||
|
||||
Single codebase handles both MODEL and ROUTER modes:
|
||||
|
||||
```typescript
|
||||
// serverStore.ts
|
||||
get isRouterMode() {
|
||||
return this.role === ServerRole.ROUTER;
|
||||
}
|
||||
|
||||
// Components conditionally render based on mode
|
||||
{#if isRouterMode()}
|
||||
<ModelsSelector />
|
||||
{/if}
|
||||
```
|
||||
|
||||
### 7. Modality Validation
|
||||
|
||||
Prevents sending attachments to incompatible models:
|
||||
|
||||
```typescript
|
||||
// useModelChangeValidation hook
|
||||
const validate = (modelId: string) => {
|
||||
const modelModalities = modelsStore.getModelModalities(modelId);
|
||||
const conversationModalities = conversationsStore.usedModalities;
|
||||
|
||||
// Check if model supports all used modalities
|
||||
if (conversationModalities.hasImages && !modelModalities.vision) {
|
||||
return { valid: false, reason: 'Model does not support images' };
|
||||
}
|
||||
// ...
|
||||
};
|
||||
```
|
||||
|
||||
### 8. Persistent Storage Strategy
|
||||
|
||||
Data is persisted across sessions using two storage mechanisms:
|
||||
|
||||
```mermaid
|
||||
flowchart TB
|
||||
subgraph Browser["Browser Storage"]
|
||||
subgraph IDB["IndexedDB (Dexie)"]
|
||||
C[Conversations]
|
||||
M[Messages]
|
||||
end
|
||||
subgraph LS["LocalStorage"]
|
||||
S[Settings Config]
|
||||
O[User Overrides]
|
||||
T[Theme Preference]
|
||||
end
|
||||
end
|
||||
|
||||
subgraph Stores["Svelte Stores"]
|
||||
CS[conversationsStore] --> C
|
||||
CS --> M
|
||||
SS[settingsStore] --> S
|
||||
SS --> O
|
||||
SS --> T
|
||||
end
|
||||
```
|
||||
|
||||
- **IndexedDB**: Conversations and messages (large, structured data)
|
||||
- **LocalStorage**: Settings, user parameter overrides, theme (small key-value data)
|
||||
- **Memory only**: Server props, model list (fetched fresh on each session)
|
||||
|
||||
---
|
||||
|
||||
## Testing
|
||||
|
||||
### Test Types
|
||||
|
||||
| Type | Tool | Location | Command |
|
||||
| ------------- | ------------------ | -------------------------------- | ------------------- |
|
||||
| **E2E** | Playwright | `tests/e2e/` | `npm run test:e2e` |
|
||||
| **Unit** | Vitest | `tests/client/`, `tests/server/` | `npm run test:unit` |
|
||||
| **UI/Visual** | Storybook + Vitest | `tests/stories/` | `npm run test:ui` |
|
||||
|
||||
### Running Tests
|
||||
|
||||
```bash
|
||||
# All tests
|
||||
npm run test
|
||||
|
||||
# Individual test suites
|
||||
npm run test:e2e # End-to-end (requires llama-server)
|
||||
npm run test:client # Client-side unit tests
|
||||
npm run test:server # Server-side unit tests
|
||||
npm run test:ui # Storybook visual tests
|
||||
```
|
||||
|
||||
### Storybook Development
|
||||
|
||||
```bash
|
||||
npm run storybook # Start Storybook dev server on :6006
|
||||
npm run build-storybook # Build static Storybook
|
||||
```
|
||||
|
||||
### Linting and Formatting
|
||||
|
||||
```bash
|
||||
npm run lint # Check code style
|
||||
npm run format # Auto-format with Prettier
|
||||
npm run check # TypeScript type checking
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Project Structure
|
||||
|
||||
```text
|
||||
tools/server/webui/
|
||||
├── src/
|
||||
│ ├── lib/
|
||||
│ │ ├── components/ # UI components (app/, ui/)
|
||||
│ │ ├── hooks/ # Svelte hooks
|
||||
│ │ ├── stores/ # State management
|
||||
│ │ ├── services/ # API and database services
|
||||
│ │ ├── types/ # TypeScript interfaces
|
||||
│ │ └── utils/ # Utility functions
|
||||
│ ├── routes/ # SvelteKit routes
|
||||
│ └── styles/ # Global styles
|
||||
├── static/ # Static assets
|
||||
├── tests/ # Test files
|
||||
├── docs/ # Architecture diagrams
|
||||
│ ├── architecture/ # High-level architecture
|
||||
│ └── flows/ # Feature-specific flows
|
||||
└── .storybook/ # Storybook configuration
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Related Documentation
|
||||
|
||||
- [llama.cpp Server README](../README.md) - Full server documentation
|
||||
- [Multimodal Documentation](../../../docs/multimodal.md) - Image and audio support
|
||||
- [Function Calling](../../../docs/function-calling.md) - Tool use capabilities
|
||||
|
|
|
|||
|
|
@ -0,0 +1,102 @@
|
|||
```mermaid
|
||||
flowchart TB
|
||||
subgraph Routes["📍 Routes"]
|
||||
R1["/ (Welcome)"]
|
||||
R2["/chat/[id]"]
|
||||
RL["+layout.svelte"]
|
||||
end
|
||||
|
||||
subgraph Components["🧩 Components"]
|
||||
C_Sidebar["ChatSidebar"]
|
||||
C_Screen["ChatScreen"]
|
||||
C_Form["ChatForm"]
|
||||
C_Messages["ChatMessages"]
|
||||
C_ModelsSelector["ModelsSelector"]
|
||||
C_Settings["ChatSettings"]
|
||||
end
|
||||
|
||||
subgraph Hooks["🪝 Hooks"]
|
||||
H1["useModelChangeValidation"]
|
||||
H2["useProcessingState"]
|
||||
end
|
||||
|
||||
subgraph Stores["🗄️ Stores"]
|
||||
S1["chatStore<br/><i>Chat interactions & streaming</i>"]
|
||||
S2["conversationsStore<br/><i>Conversation data & messages</i>"]
|
||||
S3["modelsStore<br/><i>Model selection & loading</i>"]
|
||||
S4["serverStore<br/><i>Server props & role detection</i>"]
|
||||
S5["settingsStore<br/><i>User configuration</i>"]
|
||||
end
|
||||
|
||||
subgraph Services["⚙️ Services"]
|
||||
SV1["ChatService"]
|
||||
SV2["ModelsService"]
|
||||
SV3["PropsService"]
|
||||
SV4["DatabaseService"]
|
||||
SV5["ParameterSyncService"]
|
||||
end
|
||||
|
||||
subgraph Storage["💾 Storage"]
|
||||
ST1["IndexedDB<br/><i>conversations, messages</i>"]
|
||||
ST2["LocalStorage<br/><i>config, userOverrides</i>"]
|
||||
end
|
||||
|
||||
subgraph APIs["🌐 llama-server API"]
|
||||
API1["/v1/chat/completions"]
|
||||
API2["/props"]
|
||||
API3["/models/*"]
|
||||
API4["/v1/models"]
|
||||
end
|
||||
|
||||
%% Routes → Components
|
||||
R1 & R2 --> C_Screen
|
||||
RL --> C_Sidebar
|
||||
|
||||
%% Component hierarchy
|
||||
C_Screen --> C_Form & C_Messages & C_Settings
|
||||
C_Form & C_Messages --> C_ModelsSelector
|
||||
|
||||
%% Components → Hooks → Stores
|
||||
C_Form & C_Messages --> H1 & H2
|
||||
H1 --> S3 & S4
|
||||
H2 --> S1 & S5
|
||||
|
||||
%% Components → Stores
|
||||
C_Screen --> S1 & S2
|
||||
C_Sidebar --> S2
|
||||
C_ModelsSelector --> S3 & S4
|
||||
C_Settings --> S5
|
||||
|
||||
%% Stores → Services
|
||||
S1 --> SV1 & SV4
|
||||
S2 --> SV4
|
||||
S3 --> SV2 & SV3
|
||||
S4 --> SV3
|
||||
S5 --> SV5
|
||||
|
||||
%% Services → Storage
|
||||
SV4 --> ST1
|
||||
SV5 --> ST2
|
||||
|
||||
%% Services → APIs
|
||||
SV1 --> API1
|
||||
SV2 --> API3 & API4
|
||||
SV3 --> API2
|
||||
|
||||
%% Styling
|
||||
classDef routeStyle fill:#e1f5fe,stroke:#01579b,stroke-width:2px
|
||||
classDef componentStyle fill:#f3e5f5,stroke:#7b1fa2,stroke-width:2px
|
||||
classDef hookStyle fill:#fff8e1,stroke:#ff8f00,stroke-width:2px
|
||||
classDef storeStyle fill:#fff3e0,stroke:#e65100,stroke-width:2px
|
||||
classDef serviceStyle fill:#e8f5e9,stroke:#2e7d32,stroke-width:2px
|
||||
classDef storageStyle fill:#fce4ec,stroke:#c2185b,stroke-width:2px
|
||||
classDef apiStyle fill:#e3f2fd,stroke:#1565c0,stroke-width:2px
|
||||
|
||||
class R1,R2,RL routeStyle
|
||||
class C_Sidebar,C_Screen,C_Form,C_Messages,C_ModelsSelector,C_Settings componentStyle
|
||||
class H1,H2 hookStyle
|
||||
class S1,S2,S3,S4,S5 storeStyle
|
||||
class SV1,SV2,SV3,SV4,SV5 serviceStyle
|
||||
class ST1,ST2 storageStyle
|
||||
class API1,API2,API3,API4 apiStyle
|
||||
```
|
||||
|
|
@ -0,0 +1,269 @@
|
|||
```mermaid
|
||||
flowchart TB
|
||||
subgraph Routes["📍 Routes"]
|
||||
R1["/ (+page.svelte)"]
|
||||
R2["/chat/[id]"]
|
||||
RL["+layout.svelte"]
|
||||
end
|
||||
|
||||
subgraph Components["🧩 Components"]
|
||||
direction TB
|
||||
subgraph LayoutComponents["Layout"]
|
||||
C_Sidebar["ChatSidebar"]
|
||||
C_Screen["ChatScreen"]
|
||||
end
|
||||
subgraph ChatUIComponents["Chat UI"]
|
||||
C_Form["ChatForm"]
|
||||
C_Messages["ChatMessages"]
|
||||
C_Message["ChatMessage"]
|
||||
C_Attach["ChatAttachments"]
|
||||
C_ModelsSelector["ModelsSelector"]
|
||||
C_Settings["ChatSettings"]
|
||||
end
|
||||
end
|
||||
|
||||
subgraph Hooks["🪝 Hooks"]
|
||||
H1["useModelChangeValidation"]
|
||||
H2["useProcessingState"]
|
||||
H3["isMobile"]
|
||||
end
|
||||
|
||||
subgraph Stores["🗄️ Stores"]
|
||||
direction TB
|
||||
subgraph S1["chatStore"]
|
||||
S1State["<b>State:</b><br/>isLoading, currentResponse<br/>errorDialogState<br/>activeProcessingState<br/>chatLoadingStates<br/>chatStreamingStates<br/>abortControllers<br/>processingStates<br/>activeConversationId<br/>isStreamingActive"]
|
||||
S1LoadState["<b>Loading State:</b><br/>setChatLoading()<br/>isChatLoading()<br/>syncLoadingStateForChat()<br/>clearUIState()<br/>isChatLoadingPublic()<br/>getAllLoadingChats()<br/>getAllStreamingChats()"]
|
||||
S1ProcState["<b>Processing State:</b><br/>setActiveProcessingConversation()<br/>getProcessingState()<br/>clearProcessingState()<br/>getActiveProcessingState()<br/>updateProcessingStateFromTimings()<br/>getCurrentProcessingStateSync()<br/>restoreProcessingStateFromMessages()"]
|
||||
S1Stream["<b>Streaming:</b><br/>streamChatCompletion()<br/>startStreaming()<br/>stopStreaming()<br/>stopGeneration()<br/>isStreaming()"]
|
||||
S1Error["<b>Error Handling:</b><br/>showErrorDialog()<br/>dismissErrorDialog()<br/>isAbortError()"]
|
||||
S1Msg["<b>Message Operations:</b><br/>addMessage()<br/>sendMessage()<br/>updateMessage()<br/>deleteMessage()<br/>getDeletionInfo()"]
|
||||
S1Regen["<b>Regeneration:</b><br/>regenerateMessage()<br/>regenerateMessageWithBranching()<br/>continueAssistantMessage()"]
|
||||
S1Edit["<b>Editing:</b><br/>editAssistantMessage()<br/>editUserMessagePreserveResponses()<br/>editMessageWithBranching()"]
|
||||
S1Utils["<b>Utilities:</b><br/>getApiOptions()<br/>parseTimingData()<br/>getOrCreateAbortController()<br/>getConversationModel()"]
|
||||
end
|
||||
subgraph S2["conversationsStore"]
|
||||
S2State["<b>State:</b><br/>conversations<br/>activeConversation<br/>activeMessages<br/>usedModalities<br/>isInitialized<br/>titleUpdateConfirmationCallback"]
|
||||
S2Modal["<b>Modalities:</b><br/>getModalitiesUpToMessage()<br/>calculateModalitiesFromMessages()"]
|
||||
S2Lifecycle["<b>Lifecycle:</b><br/>initialize()<br/>loadConversations()<br/>clearActiveConversation()"]
|
||||
S2ConvCRUD["<b>Conversation CRUD:</b><br/>createConversation()<br/>loadConversation()<br/>deleteConversation()<br/>updateConversationName()<br/>updateConversationTitleWithConfirmation()"]
|
||||
S2MsgMgmt["<b>Message Management:</b><br/>refreshActiveMessages()<br/>addMessageToActive()<br/>updateMessageAtIndex()<br/>findMessageIndex()<br/>sliceActiveMessages()<br/>removeMessageAtIndex()<br/>getConversationMessages()"]
|
||||
S2Nav["<b>Navigation:</b><br/>navigateToSibling()<br/>updateCurrentNode()<br/>updateConversationTimestamp()"]
|
||||
S2Export["<b>Import/Export:</b><br/>downloadConversation()<br/>exportAllConversations()<br/>importConversations()<br/>triggerDownload()"]
|
||||
S2Utils["<b>Utilities:</b><br/>setTitleUpdateConfirmationCallback()"]
|
||||
end
|
||||
subgraph S3["modelsStore"]
|
||||
S3State["<b>State:</b><br/>models, routerModels<br/>selectedModelId<br/>selectedModelName<br/>loading, updating, error<br/>modelLoadingStates<br/>modelPropsCache<br/>modelPropsFetching<br/>propsCacheVersion"]
|
||||
S3Getters["<b>Computed Getters:</b><br/>selectedModel<br/>loadedModelIds<br/>loadingModelIds<br/>singleModelName"]
|
||||
S3Modal["<b>Modalities:</b><br/>getModelModalities()<br/>modelSupportsVision()<br/>modelSupportsAudio()<br/>getModelModalitiesArray()<br/>getModelProps()<br/>updateModelModalities()"]
|
||||
S3Status["<b>Status Queries:</b><br/>isModelLoaded()<br/>isModelOperationInProgress()<br/>getModelStatus()<br/>isModelPropsFetching()"]
|
||||
S3Fetch["<b>Data Fetching:</b><br/>fetch()<br/>fetchRouterModels()<br/>fetchModelProps()<br/>fetchModalitiesForLoadedModels()"]
|
||||
S3Select["<b>Model Selection:</b><br/>selectModelById()<br/>selectModelByName()<br/>clearSelection()<br/>findModelByName()<br/>findModelById()<br/>hasModel()"]
|
||||
S3LoadUnload["<b>Loading/Unloading Models:</b><br/>loadModel()<br/>unloadModel()<br/>ensureModelLoaded()<br/>waitForModelStatus()<br/>pollForModelStatus()"]
|
||||
S3Utils["<b>Utilities:</b><br/>toDisplayName()<br/>clear()"]
|
||||
end
|
||||
subgraph S4["serverStore"]
|
||||
S4State["<b>State:</b><br/>props<br/>loading, error<br/>role<br/>fetchPromise"]
|
||||
S4Getters["<b>Getters:</b><br/>defaultParams<br/>contextSize<br/>isRouterMode<br/>isModelMode"]
|
||||
S4Data["<b>Data Handling:</b><br/>fetch()<br/>getErrorMessage()<br/>clear()"]
|
||||
S4Utils["<b>Utilities:</b><br/>detectRole()"]
|
||||
end
|
||||
subgraph S5["settingsStore"]
|
||||
S5State["<b>State:</b><br/>config<br/>theme<br/>isInitialized<br/>userOverrides"]
|
||||
S5Lifecycle["<b>Lifecycle:</b><br/>initialize()<br/>loadConfig()<br/>saveConfig()<br/>loadTheme()<br/>saveTheme()"]
|
||||
S5Update["<b>Config Updates:</b><br/>updateConfig()<br/>updateMultipleConfig()<br/>updateTheme()"]
|
||||
S5Reset["<b>Reset:</b><br/>resetConfig()<br/>resetTheme()<br/>resetAll()<br/>resetParameterToServerDefault()"]
|
||||
S5Sync["<b>Server Sync:</b><br/>syncWithServerDefaults()<br/>forceSyncWithServerDefaults()"]
|
||||
S5Utils["<b>Utilities:</b><br/>getConfig()<br/>getAllConfig()<br/>getParameterInfo()<br/>getParameterDiff()<br/>getServerDefaults()<br/>clearAllUserOverrides()"]
|
||||
end
|
||||
|
||||
subgraph ReactiveExports["⚡ Reactive Exports"]
|
||||
direction LR
|
||||
subgraph ChatExports["chatStore"]
|
||||
RE1["isLoading()"]
|
||||
RE2["currentResponse()"]
|
||||
RE3["errorDialog()"]
|
||||
RE4["activeProcessingState()"]
|
||||
RE5["isChatStreaming()"]
|
||||
RE6["isChatLoading()"]
|
||||
RE7["getChatStreaming()"]
|
||||
RE8["getAllLoadingChats()"]
|
||||
RE9["getAllStreamingChats()"]
|
||||
end
|
||||
subgraph ConvExports["conversationsStore"]
|
||||
RE10["conversations()"]
|
||||
RE11["activeConversation()"]
|
||||
RE12["activeMessages()"]
|
||||
RE13["isConversationsInitialized()"]
|
||||
RE14["usedModalities()"]
|
||||
end
|
||||
subgraph ModelsExports["modelsStore"]
|
||||
RE15["modelOptions()"]
|
||||
RE16["routerModels()"]
|
||||
RE17["modelsLoading()"]
|
||||
RE18["modelsUpdating()"]
|
||||
RE19["modelsError()"]
|
||||
RE20["selectedModelId()"]
|
||||
RE21["selectedModelName()"]
|
||||
RE22["selectedModelOption()"]
|
||||
RE23["loadedModelIds()"]
|
||||
RE24["loadingModelIds()"]
|
||||
RE25["propsCacheVersion()"]
|
||||
RE26["singleModelName()"]
|
||||
end
|
||||
subgraph ServerExports["serverStore"]
|
||||
RE27["serverProps()"]
|
||||
RE28["serverLoading()"]
|
||||
RE29["serverError()"]
|
||||
RE30["serverRole()"]
|
||||
RE31["defaultParams()"]
|
||||
RE32["contextSize()"]
|
||||
RE33["isRouterMode()"]
|
||||
RE34["isModelMode()"]
|
||||
end
|
||||
subgraph SettingsExports["settingsStore"]
|
||||
RE35["config()"]
|
||||
RE36["theme()"]
|
||||
RE37["isInitialized()"]
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
subgraph Services["⚙️ Services"]
|
||||
direction TB
|
||||
subgraph SV1["ChatService"]
|
||||
SV1Msg["<b>Messaging:</b><br/>sendMessage()"]
|
||||
SV1Stream["<b>Streaming:</b><br/>handleStreamResponse()<br/>parseSSEChunk()"]
|
||||
SV1Convert["<b>Conversion:</b><br/>convertMessageToChatData()<br/>convertExtraToApiFormat()"]
|
||||
SV1Utils["<b>Utilities:</b><br/>extractReasoningContent()<br/>getServerProps()<br/>getModels()"]
|
||||
end
|
||||
subgraph SV2["ModelsService"]
|
||||
SV2List["<b>Listing:</b><br/>list()<br/>listRouter()"]
|
||||
SV2LoadUnload["<b>Load/Unload:</b><br/>load()<br/>unload()"]
|
||||
SV2Status["<b>Status:</b><br/>isModelLoaded()<br/>isModelLoading()"]
|
||||
end
|
||||
subgraph SV3["PropsService"]
|
||||
SV3Fetch["<b>Fetching:</b><br/>fetch()<br/>fetchForModel()"]
|
||||
end
|
||||
subgraph SV4["DatabaseService"]
|
||||
SV4Conv["<b>Conversations:</b><br/>createConversation()<br/>getConversation()<br/>getAllConversations()<br/>updateConversation()<br/>deleteConversation()"]
|
||||
SV4Msg["<b>Messages:</b><br/>createMessageBranch()<br/>createRootMessage()<br/>getConversationMessages()<br/>updateMessage()<br/>deleteMessage()<br/>deleteMessageCascading()"]
|
||||
SV4Node["<b>Navigation:</b><br/>updateCurrentNode()"]
|
||||
SV4Import["<b>Import:</b><br/>importConversations()"]
|
||||
end
|
||||
subgraph SV5["ParameterSyncService"]
|
||||
SV5Extract["<b>Extraction:</b><br/>extractServerDefaults()"]
|
||||
SV5Merge["<b>Merging:</b><br/>mergeWithServerDefaults()"]
|
||||
SV5Info["<b>Info:</b><br/>getParameterInfo()<br/>canSyncParameter()<br/>getSyncableParameterKeys()<br/>validateServerParameter()"]
|
||||
SV5Diff["<b>Diff:</b><br/>createParameterDiff()"]
|
||||
end
|
||||
end
|
||||
|
||||
subgraph Storage["💾 Storage"]
|
||||
ST1["IndexedDB"]
|
||||
ST2["conversations"]
|
||||
ST3["messages"]
|
||||
ST5["LocalStorage"]
|
||||
ST6["config"]
|
||||
ST7["userOverrides"]
|
||||
end
|
||||
|
||||
subgraph APIs["🌐 llama-server API"]
|
||||
API1["/v1/chat/completions"]
|
||||
API2["/props<br/>/props?model="]
|
||||
API3["/models<br/>/models/load<br/>/models/unload"]
|
||||
API4["/v1/models"]
|
||||
end
|
||||
|
||||
%% Routes render Components
|
||||
R1 --> C_Screen
|
||||
R2 --> C_Screen
|
||||
RL --> C_Sidebar
|
||||
|
||||
%% Component hierarchy
|
||||
C_Screen --> C_Form & C_Messages & C_Settings
|
||||
C_Messages --> C_Message
|
||||
C_Message --> C_ModelsSelector
|
||||
C_Form --> C_ModelsSelector
|
||||
C_Form --> C_Attach
|
||||
C_Message --> C_Attach
|
||||
|
||||
%% Components use Hooks
|
||||
C_Form --> H1
|
||||
C_Message --> H1 & H2
|
||||
C_Screen --> H2
|
||||
|
||||
%% Hooks use Stores
|
||||
H1 --> S3 & S4
|
||||
H2 --> S1 & S5
|
||||
|
||||
%% Components use Stores
|
||||
C_Screen --> S1 & S2
|
||||
C_Messages --> S2
|
||||
C_Message --> S1 & S2 & S3
|
||||
C_Form --> S1 & S3
|
||||
C_Sidebar --> S2
|
||||
C_ModelsSelector --> S3 & S4
|
||||
C_Settings --> S5
|
||||
|
||||
%% Stores export Reactive State
|
||||
S1 -. exports .-> ChatExports
|
||||
S2 -. exports .-> ConvExports
|
||||
S3 -. exports .-> ModelsExports
|
||||
S4 -. exports .-> ServerExports
|
||||
S5 -. exports .-> SettingsExports
|
||||
|
||||
%% Stores use Services
|
||||
S1 --> SV1 & SV4
|
||||
S2 --> SV4
|
||||
S3 --> SV2 & SV3
|
||||
S4 --> SV3
|
||||
S5 --> SV5
|
||||
|
||||
%% Services to Storage
|
||||
SV4 --> ST1
|
||||
ST1 --> ST2 & ST3
|
||||
SV5 --> ST5
|
||||
ST5 --> ST6 & ST7
|
||||
|
||||
%% Services to APIs
|
||||
SV1 --> API1
|
||||
SV2 --> API3 & API4
|
||||
SV3 --> API2
|
||||
|
||||
%% Styling
|
||||
classDef routeStyle fill:#e1f5fe,stroke:#01579b,stroke-width:2px
|
||||
classDef componentStyle fill:#f3e5f5,stroke:#7b1fa2,stroke-width:2px
|
||||
classDef componentGroupStyle fill:#e1bee7,stroke:#7b1fa2,stroke-width:1px
|
||||
classDef storeStyle fill:#fff3e0,stroke:#e65100,stroke-width:2px
|
||||
classDef stateStyle fill:#ffe0b2,stroke:#e65100,stroke-width:1px
|
||||
classDef methodStyle fill:#ffecb3,stroke:#e65100,stroke-width:1px
|
||||
classDef reactiveStyle fill:#fffde7,stroke:#f9a825,stroke-width:1px
|
||||
classDef serviceStyle fill:#e8f5e9,stroke:#2e7d32,stroke-width:2px
|
||||
classDef serviceMStyle fill:#c8e6c9,stroke:#2e7d32,stroke-width:1px
|
||||
classDef storageStyle fill:#fce4ec,stroke:#c2185b,stroke-width:2px
|
||||
classDef apiStyle fill:#e3f2fd,stroke:#1565c0,stroke-width:2px
|
||||
|
||||
class R1,R2,RL routeStyle
|
||||
class C_Sidebar,C_Screen,C_Form,C_Messages,C_Message componentStyle
|
||||
class C_ModelsSelector,C_Settings componentStyle
|
||||
class C_Attach componentStyle
|
||||
class H1,H2,H3 methodStyle
|
||||
class LayoutComponents,ChatUIComponents componentGroupStyle
|
||||
class Hooks storeStyle
|
||||
class S1,S2,S3,S4,S5 storeStyle
|
||||
class S1State,S2State,S3State,S4State,S5State stateStyle
|
||||
class S1Msg,S1Regen,S1Edit,S1Stream,S1LoadState,S1ProcState,S1Error,S1Utils methodStyle
|
||||
class S2Lifecycle,S2ConvCRUD,S2MsgMgmt,S2Nav,S2Modal,S2Export,S2Utils methodStyle
|
||||
class S3Getters,S3Modal,S3Status,S3Fetch,S3Select,S3LoadUnload,S3Utils methodStyle
|
||||
class S4Getters,S4Data,S4Utils methodStyle
|
||||
class S5Lifecycle,S5Update,S5Reset,S5Sync,S5Utils methodStyle
|
||||
class ChatExports,ConvExports,ModelsExports,ServerExports,SettingsExports reactiveStyle
|
||||
class SV1,SV2,SV3,SV4,SV5 serviceStyle
|
||||
class SV1Msg,SV1Stream,SV1Convert,SV1Utils serviceMStyle
|
||||
class SV2List,SV2LoadUnload,SV2Status serviceMStyle
|
||||
class SV3Fetch serviceMStyle
|
||||
class SV4Conv,SV4Msg,SV4Node,SV4Import serviceMStyle
|
||||
class SV5Extract,SV5Merge,SV5Info,SV5Diff serviceMStyle
|
||||
class ST1,ST2,ST3,ST5,ST6,ST7 storageStyle
|
||||
class API1,API2,API3,API4 apiStyle
|
||||
```
|
||||
|
|
@ -0,0 +1,174 @@
|
|||
```mermaid
|
||||
sequenceDiagram
|
||||
participant UI as 🧩 ChatForm / ChatMessage
|
||||
participant chatStore as 🗄️ chatStore
|
||||
participant convStore as 🗄️ conversationsStore
|
||||
participant settingsStore as 🗄️ settingsStore
|
||||
participant ChatSvc as ⚙️ ChatService
|
||||
participant DbSvc as ⚙️ DatabaseService
|
||||
participant API as 🌐 /v1/chat/completions
|
||||
|
||||
Note over chatStore: State:<br/>isLoading, currentResponse<br/>errorDialogState, activeProcessingState<br/>chatLoadingStates (Map)<br/>chatStreamingStates (Map)<br/>abortControllers (Map)<br/>processingStates (Map)
|
||||
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
Note over UI,API: 💬 SEND MESSAGE
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
UI->>chatStore: sendMessage(content, extras)
|
||||
activate chatStore
|
||||
|
||||
chatStore->>chatStore: setChatLoading(convId, true)
|
||||
chatStore->>chatStore: clearChatStreaming(convId)
|
||||
|
||||
alt no active conversation
|
||||
chatStore->>convStore: createConversation()
|
||||
Note over convStore: → see conversations-flow.mmd
|
||||
end
|
||||
|
||||
chatStore->>chatStore: addMessage("user", content, extras)
|
||||
chatStore->>DbSvc: createMessageBranch(userMsg, parentId)
|
||||
chatStore->>convStore: addMessageToActive(userMsg)
|
||||
chatStore->>convStore: updateCurrentNode(userMsg.id)
|
||||
|
||||
chatStore->>chatStore: createAssistantMessage(userMsg.id)
|
||||
chatStore->>DbSvc: createMessageBranch(assistantMsg, userMsg.id)
|
||||
chatStore->>convStore: addMessageToActive(assistantMsg)
|
||||
|
||||
chatStore->>chatStore: streamChatCompletion(messages, assistantMsg)
|
||||
deactivate chatStore
|
||||
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
Note over UI,API: 🌊 STREAMING
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
activate chatStore
|
||||
chatStore->>chatStore: startStreaming()
|
||||
Note right of chatStore: isStreamingActive = true
|
||||
|
||||
chatStore->>chatStore: setActiveProcessingConversation(convId)
|
||||
chatStore->>chatStore: getOrCreateAbortController(convId)
|
||||
Note right of chatStore: abortControllers.set(convId, new AbortController())
|
||||
|
||||
chatStore->>chatStore: getApiOptions()
|
||||
Note right of chatStore: Merge from settingsStore.config:<br/>temperature, max_tokens, top_p, etc.
|
||||
|
||||
chatStore->>ChatSvc: sendMessage(messages, options, signal)
|
||||
activate ChatSvc
|
||||
|
||||
ChatSvc->>ChatSvc: convertMessageToChatData(messages)
|
||||
Note right of ChatSvc: DatabaseMessage[] → ApiChatMessageData[]<br/>Process attachments (images, PDFs, audio)
|
||||
|
||||
ChatSvc->>API: POST /v1/chat/completions
|
||||
Note right of API: {messages, model?, stream: true, ...params}
|
||||
|
||||
loop SSE chunks
|
||||
API-->>ChatSvc: data: {"choices":[{"delta":{...}}]}
|
||||
ChatSvc->>ChatSvc: parseSSEChunk(line)
|
||||
|
||||
alt content chunk
|
||||
ChatSvc-->>chatStore: onChunk(content)
|
||||
chatStore->>chatStore: setChatStreaming(convId, response, msgId)
|
||||
Note right of chatStore: currentResponse = $state(accumulated)
|
||||
chatStore->>convStore: updateMessageAtIndex(idx, {content})
|
||||
end
|
||||
|
||||
alt reasoning chunk
|
||||
ChatSvc-->>chatStore: onReasoningChunk(reasoning)
|
||||
chatStore->>convStore: updateMessageAtIndex(idx, {thinking})
|
||||
end
|
||||
|
||||
alt tool_calls chunk
|
||||
ChatSvc-->>chatStore: onToolCallChunk(toolCalls)
|
||||
chatStore->>convStore: updateMessageAtIndex(idx, {toolCalls})
|
||||
end
|
||||
|
||||
alt model info
|
||||
ChatSvc-->>chatStore: onModel(modelName)
|
||||
chatStore->>chatStore: recordModel(modelName)
|
||||
chatStore->>DbSvc: updateMessage(msgId, {model})
|
||||
end
|
||||
|
||||
alt timings (during stream)
|
||||
ChatSvc-->>chatStore: onTimings(timings, promptProgress)
|
||||
chatStore->>chatStore: updateProcessingStateFromTimings()
|
||||
end
|
||||
|
||||
chatStore-->>UI: reactive $state update
|
||||
end
|
||||
|
||||
API-->>ChatSvc: data: [DONE]
|
||||
ChatSvc-->>chatStore: onComplete(content, reasoning, timings, toolCalls)
|
||||
deactivate ChatSvc
|
||||
|
||||
chatStore->>chatStore: stopStreaming()
|
||||
chatStore->>DbSvc: updateMessage(msgId, {content, timings, model})
|
||||
chatStore->>convStore: updateCurrentNode(msgId)
|
||||
chatStore->>chatStore: setChatLoading(convId, false)
|
||||
chatStore->>chatStore: clearChatStreaming(convId)
|
||||
chatStore->>chatStore: clearProcessingState(convId)
|
||||
deactivate chatStore
|
||||
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
Note over UI,API: ⏹️ STOP GENERATION
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
UI->>chatStore: stopGeneration()
|
||||
activate chatStore
|
||||
chatStore->>chatStore: savePartialResponseIfNeeded(convId)
|
||||
Note right of chatStore: Save currentResponse to DB if non-empty
|
||||
chatStore->>chatStore: abortControllers.get(convId).abort()
|
||||
Note right of chatStore: fetch throws AbortError → caught by isAbortError()
|
||||
chatStore->>chatStore: stopStreaming()
|
||||
chatStore->>chatStore: setChatLoading(convId, false)
|
||||
chatStore->>chatStore: clearChatStreaming(convId)
|
||||
chatStore->>chatStore: clearProcessingState(convId)
|
||||
deactivate chatStore
|
||||
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
Note over UI,API: 🔁 REGENERATE
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
UI->>chatStore: regenerateMessageWithBranching(msgId, model?)
|
||||
activate chatStore
|
||||
chatStore->>convStore: findMessageIndex(msgId)
|
||||
chatStore->>chatStore: Get parent of target message
|
||||
chatStore->>chatStore: createAssistantMessage(parentId)
|
||||
chatStore->>DbSvc: createMessageBranch(newAssistantMsg, parentId)
|
||||
chatStore->>convStore: refreshActiveMessages()
|
||||
Note right of chatStore: Same streaming flow
|
||||
chatStore->>chatStore: streamChatCompletion(...)
|
||||
deactivate chatStore
|
||||
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
Note over UI,API: ➡️ CONTINUE
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
UI->>chatStore: continueAssistantMessage(msgId)
|
||||
activate chatStore
|
||||
chatStore->>chatStore: Get existing content from message
|
||||
chatStore->>chatStore: streamChatCompletion(..., existingContent)
|
||||
Note right of chatStore: Appends to existing message content
|
||||
deactivate chatStore
|
||||
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
Note over UI,API: ✏️ EDIT USER MESSAGE
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
UI->>chatStore: editUserMessagePreserveResponses(msgId, newContent)
|
||||
activate chatStore
|
||||
chatStore->>chatStore: Get parent of target message
|
||||
chatStore->>DbSvc: createMessageBranch(editedMsg, parentId)
|
||||
chatStore->>convStore: refreshActiveMessages()
|
||||
Note right of chatStore: Creates new branch, original preserved
|
||||
deactivate chatStore
|
||||
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
Note over UI,API: ❌ ERROR HANDLING
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
Note over chatStore: On stream error (non-abort):
|
||||
chatStore->>chatStore: showErrorDialog(type, message)
|
||||
Note right of chatStore: errorDialogState = {type: 'timeout'|'server', message}
|
||||
chatStore->>convStore: removeMessageAtIndex(failedMsgIdx)
|
||||
chatStore->>DbSvc: deleteMessage(failedMsgId)
|
||||
```
|
||||
|
|
@ -0,0 +1,155 @@
|
|||
```mermaid
|
||||
sequenceDiagram
|
||||
participant UI as 🧩 ChatSidebar / ChatScreen
|
||||
participant convStore as 🗄️ conversationsStore
|
||||
participant chatStore as 🗄️ chatStore
|
||||
participant DbSvc as ⚙️ DatabaseService
|
||||
participant IDB as 💾 IndexedDB
|
||||
|
||||
Note over convStore: State:<br/>conversations: DatabaseConversation[]<br/>activeConversation: DatabaseConversation | null<br/>activeMessages: DatabaseMessage[]<br/>isInitialized: boolean<br/>usedModalities: $derived({vision, audio})
|
||||
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
Note over UI,IDB: 🚀 INITIALIZATION
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
Note over convStore: Auto-initialized in constructor (browser only)
|
||||
convStore->>convStore: initialize()
|
||||
activate convStore
|
||||
convStore->>convStore: loadConversations()
|
||||
convStore->>DbSvc: getAllConversations()
|
||||
DbSvc->>IDB: SELECT * FROM conversations ORDER BY lastModified DESC
|
||||
IDB-->>DbSvc: Conversation[]
|
||||
DbSvc-->>convStore: conversations
|
||||
convStore->>convStore: conversations = $state(data)
|
||||
convStore->>convStore: isInitialized = true
|
||||
deactivate convStore
|
||||
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
Note over UI,IDB: ➕ CREATE CONVERSATION
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
UI->>convStore: createConversation(name?)
|
||||
activate convStore
|
||||
convStore->>DbSvc: createConversation(name || "New Chat")
|
||||
DbSvc->>IDB: INSERT INTO conversations
|
||||
IDB-->>DbSvc: conversation {id, name, lastModified, currNode: ""}
|
||||
DbSvc-->>convStore: conversation
|
||||
convStore->>convStore: conversations.unshift(conversation)
|
||||
convStore->>convStore: activeConversation = $state(conversation)
|
||||
convStore->>convStore: activeMessages = $state([])
|
||||
deactivate convStore
|
||||
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
Note over UI,IDB: 📂 LOAD CONVERSATION
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
UI->>convStore: loadConversation(convId)
|
||||
activate convStore
|
||||
convStore->>DbSvc: getConversation(convId)
|
||||
DbSvc->>IDB: SELECT * FROM conversations WHERE id = ?
|
||||
IDB-->>DbSvc: conversation
|
||||
convStore->>convStore: activeConversation = $state(conversation)
|
||||
|
||||
convStore->>convStore: refreshActiveMessages()
|
||||
convStore->>DbSvc: getConversationMessages(convId)
|
||||
DbSvc->>IDB: SELECT * FROM messages WHERE convId = ?
|
||||
IDB-->>DbSvc: allMessages[]
|
||||
convStore->>convStore: filterByLeafNodeId(allMessages, currNode)
|
||||
Note right of convStore: Filter to show only current branch path
|
||||
convStore->>convStore: activeMessages = $state(filtered)
|
||||
|
||||
convStore->>chatStore: syncLoadingStateForChat(convId)
|
||||
Note right of chatStore: Sync isLoading/currentResponse if streaming
|
||||
deactivate convStore
|
||||
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
Note over UI,IDB: 🌳 MESSAGE BRANCHING MODEL
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
Note over IDB: Message Tree Structure:<br/>- Each message has parent (null for root)<br/>- Each message has children[] array<br/>- Conversation.currNode points to active leaf<br/>- filterByLeafNodeId() traverses from root to currNode
|
||||
|
||||
rect rgb(240, 240, 255)
|
||||
Note over convStore: Example Branch Structure:
|
||||
Note over convStore: root → user1 → assistant1 → user2 → assistant2a (currNode)<br/> ↘ assistant2b (alt branch)
|
||||
end
|
||||
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
Note over UI,IDB: ↔️ BRANCH NAVIGATION
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
UI->>convStore: navigateToSibling(msgId, direction)
|
||||
activate convStore
|
||||
convStore->>convStore: Find message in activeMessages
|
||||
convStore->>convStore: Get parent message
|
||||
convStore->>convStore: Find sibling in parent.children[]
|
||||
convStore->>convStore: findLeafNode(siblingId, allMessages)
|
||||
Note right of convStore: Navigate to leaf of sibling branch
|
||||
convStore->>convStore: updateCurrentNode(leafId)
|
||||
convStore->>DbSvc: updateCurrentNode(convId, leafId)
|
||||
DbSvc->>IDB: UPDATE conversations SET currNode = ?
|
||||
convStore->>convStore: refreshActiveMessages()
|
||||
deactivate convStore
|
||||
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
Note over UI,IDB: 📝 UPDATE CONVERSATION
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
UI->>convStore: updateConversationName(convId, newName)
|
||||
activate convStore
|
||||
convStore->>DbSvc: updateConversation(convId, {name: newName})
|
||||
DbSvc->>IDB: UPDATE conversations SET name = ?
|
||||
convStore->>convStore: Update in conversations array
|
||||
deactivate convStore
|
||||
|
||||
Note over convStore: Auto-title update (after first response):
|
||||
convStore->>convStore: updateConversationTitleWithConfirmation()
|
||||
convStore->>convStore: titleUpdateConfirmationCallback?()
|
||||
Note right of convStore: Shows dialog if title would change
|
||||
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
Note over UI,IDB: 🗑️ DELETE CONVERSATION
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
UI->>convStore: deleteConversation(convId)
|
||||
activate convStore
|
||||
convStore->>DbSvc: deleteConversation(convId)
|
||||
DbSvc->>IDB: DELETE FROM conversations WHERE id = ?
|
||||
DbSvc->>IDB: DELETE FROM messages WHERE convId = ?
|
||||
convStore->>convStore: conversations.filter(c => c.id !== convId)
|
||||
alt deleted active conversation
|
||||
convStore->>convStore: clearActiveConversation()
|
||||
end
|
||||
deactivate convStore
|
||||
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
Note over UI,IDB: 📊 MODALITY TRACKING
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
Note over convStore: usedModalities = $derived.by(() => {<br/> calculateModalitiesFromMessages(activeMessages)<br/>})
|
||||
|
||||
Note over convStore: Scans activeMessages for attachments:<br/>- IMAGE → vision: true<br/>- PDF (processedAsImages) → vision: true<br/>- AUDIO → audio: true
|
||||
|
||||
UI->>convStore: getModalitiesUpToMessage(msgId)
|
||||
Note right of convStore: Used for regeneration validation<br/>Only checks messages BEFORE target
|
||||
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
Note over UI,IDB: 📤 EXPORT / 📥 IMPORT
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
UI->>convStore: exportAllConversations()
|
||||
activate convStore
|
||||
convStore->>DbSvc: getAllConversations()
|
||||
loop each conversation
|
||||
convStore->>DbSvc: getConversationMessages(convId)
|
||||
end
|
||||
convStore->>convStore: triggerDownload(JSON blob)
|
||||
deactivate convStore
|
||||
|
||||
UI->>convStore: importConversations(file)
|
||||
activate convStore
|
||||
convStore->>convStore: Parse JSON file
|
||||
convStore->>DbSvc: importConversations(parsed)
|
||||
DbSvc->>IDB: Bulk INSERT conversations + messages
|
||||
convStore->>convStore: loadConversations()
|
||||
deactivate convStore
|
||||
```
|
||||
|
|
@ -0,0 +1,45 @@
|
|||
```mermaid
|
||||
%% MODEL Mode Data Flow (single model)
|
||||
%% Detailed flows: ./flows/server-flow.mmd, ./flows/models-flow.mmd, ./flows/chat-flow.mmd
|
||||
|
||||
sequenceDiagram
|
||||
participant User as 👤 User
|
||||
participant UI as 🧩 UI
|
||||
participant Stores as 🗄️ Stores
|
||||
participant DB as 💾 IndexedDB
|
||||
participant API as 🌐 llama-server
|
||||
|
||||
Note over User,API: 🚀 Initialization (see: server-flow.mmd, models-flow.mmd)
|
||||
|
||||
UI->>Stores: initialize()
|
||||
Stores->>DB: load conversations
|
||||
Stores->>API: GET /props
|
||||
API-->>Stores: server config + modalities
|
||||
Stores->>API: GET /v1/models
|
||||
API-->>Stores: single model (auto-selected)
|
||||
|
||||
Note over User,API: 💬 Chat Flow (see: chat-flow.mmd)
|
||||
|
||||
User->>UI: send message
|
||||
UI->>Stores: sendMessage()
|
||||
Stores->>DB: save user message
|
||||
Stores->>API: POST /v1/chat/completions (stream)
|
||||
loop streaming
|
||||
API-->>Stores: SSE chunks
|
||||
Stores-->>UI: reactive update
|
||||
end
|
||||
API-->>Stores: done + timings
|
||||
Stores->>DB: save assistant message
|
||||
|
||||
Note over User,API: 🔁 Regenerate
|
||||
|
||||
User->>UI: regenerate
|
||||
Stores->>DB: create message branch
|
||||
Note right of Stores: same streaming flow
|
||||
|
||||
Note over User,API: ⏹️ Stop
|
||||
|
||||
User->>UI: stop
|
||||
Stores->>Stores: abort stream
|
||||
Stores->>DB: save partial response
|
||||
```
|
||||
|
|
@ -0,0 +1,77 @@
|
|||
```mermaid
|
||||
%% ROUTER Mode Data Flow (multi-model)
|
||||
%% Detailed flows: ./flows/server-flow.mmd, ./flows/models-flow.mmd, ./flows/chat-flow.mmd
|
||||
|
||||
sequenceDiagram
|
||||
participant User as 👤 User
|
||||
participant UI as 🧩 UI
|
||||
participant Stores as 🗄️ Stores
|
||||
participant DB as 💾 IndexedDB
|
||||
participant API as 🌐 llama-server
|
||||
|
||||
Note over User,API: 🚀 Initialization (see: server-flow.mmd, models-flow.mmd)
|
||||
|
||||
UI->>Stores: initialize()
|
||||
Stores->>DB: load conversations
|
||||
Stores->>API: GET /props
|
||||
API-->>Stores: {role: "router"}
|
||||
Stores->>API: GET /models
|
||||
API-->>Stores: models[] with status (loaded/available)
|
||||
loop each loaded model
|
||||
Stores->>API: GET /props?model=X
|
||||
API-->>Stores: modalities (vision/audio)
|
||||
end
|
||||
|
||||
Note over User,API: 🔄 Model Selection (see: models-flow.mmd)
|
||||
|
||||
User->>UI: select model
|
||||
alt model not loaded
|
||||
Stores->>API: POST /models/load
|
||||
loop poll status
|
||||
Stores->>API: GET /models
|
||||
API-->>Stores: check if loaded
|
||||
end
|
||||
Stores->>API: GET /props?model=X
|
||||
API-->>Stores: cache modalities
|
||||
end
|
||||
Stores->>Stores: validate modalities vs conversation
|
||||
alt valid
|
||||
Stores->>Stores: select model
|
||||
else invalid
|
||||
Stores->>API: POST /models/unload
|
||||
UI->>User: show error toast
|
||||
end
|
||||
|
||||
Note over User,API: 💬 Chat Flow (see: chat-flow.mmd)
|
||||
|
||||
User->>UI: send message
|
||||
UI->>Stores: sendMessage()
|
||||
Stores->>DB: save user message
|
||||
Stores->>API: POST /v1/chat/completions {model: X}
|
||||
Note right of API: router forwards to model
|
||||
loop streaming
|
||||
API-->>Stores: SSE chunks + model info
|
||||
Stores-->>UI: reactive update
|
||||
end
|
||||
API-->>Stores: done + timings
|
||||
Stores->>DB: save assistant message + model used
|
||||
|
||||
Note over User,API: 🔁 Regenerate (optional: different model)
|
||||
|
||||
User->>UI: regenerate
|
||||
Stores->>Stores: validate modalities up to this message
|
||||
Stores->>DB: create message branch
|
||||
Note right of Stores: same streaming flow
|
||||
|
||||
Note over User,API: ⏹️ Stop
|
||||
|
||||
User->>UI: stop
|
||||
Stores->>Stores: abort stream
|
||||
Stores->>DB: save partial response
|
||||
|
||||
Note over User,API: 🗑️ LRU Unloading
|
||||
|
||||
Note right of API: Server auto-unloads LRU models<br/>when cache full
|
||||
User->>UI: select unloaded model
|
||||
Note right of Stores: triggers load flow again
|
||||
```
|
||||
|
|
@ -0,0 +1,155 @@
|
|||
```mermaid
|
||||
sequenceDiagram
|
||||
participant Store as 🗄️ Stores
|
||||
participant DbSvc as ⚙️ DatabaseService
|
||||
participant Dexie as 📦 Dexie ORM
|
||||
participant IDB as 💾 IndexedDB
|
||||
|
||||
Note over DbSvc: Stateless service - all methods static<br/>Database: "LlamacppWebui"
|
||||
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
Note over Store,IDB: 📊 SCHEMA
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
rect rgb(240, 248, 255)
|
||||
Note over IDB: conversations table:<br/>id (PK), lastModified, currNode, name
|
||||
end
|
||||
|
||||
rect rgb(255, 248, 240)
|
||||
Note over IDB: messages table:<br/>id (PK), convId (FK), type, role, timestamp,<br/>parent, children[], content, thinking,<br/>toolCalls, extra[], model, timings
|
||||
end
|
||||
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
Note over Store,IDB: 💬 CONVERSATIONS CRUD
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
Store->>DbSvc: createConversation(name)
|
||||
activate DbSvc
|
||||
DbSvc->>DbSvc: Generate UUID
|
||||
DbSvc->>Dexie: db.conversations.add({id, name, lastModified, currNode: ""})
|
||||
Dexie->>IDB: INSERT
|
||||
IDB-->>Dexie: success
|
||||
DbSvc-->>Store: DatabaseConversation
|
||||
deactivate DbSvc
|
||||
|
||||
Store->>DbSvc: getConversation(convId)
|
||||
DbSvc->>Dexie: db.conversations.get(convId)
|
||||
Dexie->>IDB: SELECT WHERE id = ?
|
||||
IDB-->>DbSvc: DatabaseConversation
|
||||
|
||||
Store->>DbSvc: getAllConversations()
|
||||
DbSvc->>Dexie: db.conversations.orderBy('lastModified').reverse().toArray()
|
||||
Dexie->>IDB: SELECT ORDER BY lastModified DESC
|
||||
IDB-->>DbSvc: DatabaseConversation[]
|
||||
|
||||
Store->>DbSvc: updateConversation(convId, updates)
|
||||
DbSvc->>Dexie: db.conversations.update(convId, {...updates, lastModified})
|
||||
Dexie->>IDB: UPDATE
|
||||
|
||||
Store->>DbSvc: deleteConversation(convId)
|
||||
activate DbSvc
|
||||
DbSvc->>Dexie: db.conversations.delete(convId)
|
||||
Dexie->>IDB: DELETE FROM conversations
|
||||
DbSvc->>Dexie: db.messages.where('convId').equals(convId).delete()
|
||||
Dexie->>IDB: DELETE FROM messages WHERE convId = ?
|
||||
deactivate DbSvc
|
||||
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
Note over Store,IDB: 📝 MESSAGES CRUD
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
Store->>DbSvc: createRootMessage(convId)
|
||||
activate DbSvc
|
||||
DbSvc->>DbSvc: Create root message {type: "root", parent: null}
|
||||
DbSvc->>Dexie: db.messages.add(rootMsg)
|
||||
Dexie->>IDB: INSERT
|
||||
DbSvc-->>Store: rootMessageId
|
||||
deactivate DbSvc
|
||||
|
||||
Store->>DbSvc: createMessageBranch(message, parentId)
|
||||
activate DbSvc
|
||||
DbSvc->>DbSvc: Generate UUID for new message
|
||||
DbSvc->>Dexie: db.messages.add({...message, id, parent: parentId})
|
||||
Dexie->>IDB: INSERT message
|
||||
|
||||
alt parentId exists
|
||||
DbSvc->>Dexie: db.messages.get(parentId)
|
||||
Dexie->>IDB: SELECT parent
|
||||
DbSvc->>DbSvc: parent.children.push(newId)
|
||||
DbSvc->>Dexie: db.messages.update(parentId, {children})
|
||||
Dexie->>IDB: UPDATE parent.children
|
||||
end
|
||||
|
||||
DbSvc->>Dexie: db.conversations.update(convId, {currNode: newId})
|
||||
Dexie->>IDB: UPDATE conversation.currNode
|
||||
DbSvc-->>Store: DatabaseMessage
|
||||
deactivate DbSvc
|
||||
|
||||
Store->>DbSvc: getConversationMessages(convId)
|
||||
DbSvc->>Dexie: db.messages.where('convId').equals(convId).toArray()
|
||||
Dexie->>IDB: SELECT WHERE convId = ?
|
||||
IDB-->>DbSvc: DatabaseMessage[]
|
||||
|
||||
Store->>DbSvc: updateMessage(msgId, updates)
|
||||
DbSvc->>Dexie: db.messages.update(msgId, updates)
|
||||
Dexie->>IDB: UPDATE
|
||||
|
||||
Store->>DbSvc: deleteMessage(msgId)
|
||||
DbSvc->>Dexie: db.messages.delete(msgId)
|
||||
Dexie->>IDB: DELETE
|
||||
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
Note over Store,IDB: 🌳 BRANCHING OPERATIONS
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
Store->>DbSvc: updateCurrentNode(convId, nodeId)
|
||||
DbSvc->>Dexie: db.conversations.update(convId, {currNode: nodeId, lastModified})
|
||||
Dexie->>IDB: UPDATE
|
||||
|
||||
Store->>DbSvc: deleteMessageCascading(msgId)
|
||||
activate DbSvc
|
||||
DbSvc->>DbSvc: findDescendantMessages(msgId, allMessages)
|
||||
Note right of DbSvc: Recursively find all children
|
||||
loop each descendant
|
||||
DbSvc->>Dexie: db.messages.delete(descendantId)
|
||||
Dexie->>IDB: DELETE
|
||||
end
|
||||
DbSvc->>Dexie: db.messages.delete(msgId)
|
||||
Dexie->>IDB: DELETE target message
|
||||
deactivate DbSvc
|
||||
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
Note over Store,IDB: 📥 IMPORT
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
Store->>DbSvc: importConversations(data)
|
||||
activate DbSvc
|
||||
loop each conversation in data
|
||||
DbSvc->>DbSvc: Generate new UUIDs (avoid conflicts)
|
||||
DbSvc->>Dexie: db.conversations.add(conversation)
|
||||
Dexie->>IDB: INSERT conversation
|
||||
loop each message
|
||||
DbSvc->>Dexie: db.messages.add(message)
|
||||
Dexie->>IDB: INSERT message
|
||||
end
|
||||
end
|
||||
deactivate DbSvc
|
||||
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
Note over Store,IDB: 🔗 MESSAGE TREE UTILITIES
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
Note over DbSvc: Used by stores (imported from utils):
|
||||
|
||||
rect rgb(240, 255, 240)
|
||||
Note over DbSvc: filterByLeafNodeId(messages, leafId)<br/>→ Returns path from root to leaf<br/>→ Used to display current branch
|
||||
end
|
||||
|
||||
rect rgb(240, 255, 240)
|
||||
Note over DbSvc: findLeafNode(startId, messages)<br/>→ Traverse to deepest child<br/>→ Used for branch navigation
|
||||
end
|
||||
|
||||
rect rgb(240, 255, 240)
|
||||
Note over DbSvc: findDescendantMessages(msgId, messages)<br/>→ Find all children recursively<br/>→ Used for cascading deletes
|
||||
end
|
||||
```
|
||||
|
|
@ -0,0 +1,181 @@
|
|||
```mermaid
|
||||
sequenceDiagram
|
||||
participant UI as 🧩 ModelsSelector
|
||||
participant Hooks as 🪝 useModelChangeValidation
|
||||
participant modelsStore as 🗄️ modelsStore
|
||||
participant serverStore as 🗄️ serverStore
|
||||
participant convStore as 🗄️ conversationsStore
|
||||
participant ModelsSvc as ⚙️ ModelsService
|
||||
participant PropsSvc as ⚙️ PropsService
|
||||
participant API as 🌐 llama-server
|
||||
|
||||
Note over modelsStore: State:<br/>models: ModelOption[]<br/>routerModels: ApiModelDataEntry[]<br/>selectedModelId, selectedModelName<br/>loading, updating, error<br/>modelLoadingStates (Map)<br/>modelPropsCache (Map)<br/>propsCacheVersion
|
||||
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
Note over UI,API: 🚀 INITIALIZATION (MODEL mode)
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
UI->>modelsStore: fetch()
|
||||
activate modelsStore
|
||||
modelsStore->>modelsStore: loading = true
|
||||
|
||||
alt serverStore.props not loaded
|
||||
modelsStore->>serverStore: fetch()
|
||||
Note over serverStore: → see server-flow.mmd
|
||||
end
|
||||
|
||||
modelsStore->>ModelsSvc: list()
|
||||
ModelsSvc->>API: GET /v1/models
|
||||
API-->>ModelsSvc: ApiModelListResponse {data: [model]}
|
||||
|
||||
modelsStore->>modelsStore: models = $state(mapped)
|
||||
Note right of modelsStore: Map to ModelOption[]:<br/>{id, name, model, description, capabilities}
|
||||
|
||||
Note over modelsStore: MODEL mode: Get modalities from serverStore.props
|
||||
modelsStore->>modelsStore: modelPropsCache.set(model.id, serverStore.props)
|
||||
modelsStore->>modelsStore: models[0].modalities = props.modalities
|
||||
|
||||
modelsStore->>modelsStore: Auto-select single model
|
||||
Note right of modelsStore: selectedModelId = models[0].id
|
||||
modelsStore->>modelsStore: loading = false
|
||||
deactivate modelsStore
|
||||
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
Note over UI,API: 🚀 INITIALIZATION (ROUTER mode)
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
UI->>modelsStore: fetch()
|
||||
activate modelsStore
|
||||
modelsStore->>ModelsSvc: list()
|
||||
ModelsSvc->>API: GET /v1/models
|
||||
API-->>ModelsSvc: ApiModelListResponse
|
||||
modelsStore->>modelsStore: models = $state(mapped)
|
||||
deactivate modelsStore
|
||||
|
||||
Note over UI: After models loaded, layout triggers:
|
||||
UI->>modelsStore: fetchRouterModels()
|
||||
activate modelsStore
|
||||
modelsStore->>ModelsSvc: listRouter()
|
||||
ModelsSvc->>API: GET /models
|
||||
API-->>ModelsSvc: ApiRouterModelsListResponse
|
||||
Note right of API: {data: [{id, status, path, in_cache}]}
|
||||
modelsStore->>modelsStore: routerModels = $state(data)
|
||||
|
||||
modelsStore->>modelsStore: fetchModalitiesForLoadedModels()
|
||||
loop each model where status === "loaded"
|
||||
modelsStore->>PropsSvc: fetchForModel(modelId)
|
||||
PropsSvc->>API: GET /props?model={modelId}
|
||||
API-->>PropsSvc: ApiLlamaCppServerProps
|
||||
modelsStore->>modelsStore: modelPropsCache.set(modelId, props)
|
||||
end
|
||||
modelsStore->>modelsStore: propsCacheVersion++
|
||||
deactivate modelsStore
|
||||
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
Note over UI,API: 🔄 MODEL SELECTION (ROUTER mode)
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
UI->>Hooks: useModelChangeValidation({getRequiredModalities, onSuccess?, onValidationFailure?})
|
||||
Note over Hooks: Hook configured per-component:<br/>ChatForm: getRequiredModalities = usedModalities<br/>ChatMessage: getRequiredModalities = getModalitiesUpToMessage(msgId)
|
||||
|
||||
UI->>Hooks: handleModelChange(modelId, modelName)
|
||||
activate Hooks
|
||||
Hooks->>Hooks: previousSelectedModelId = modelsStore.selectedModelId
|
||||
Hooks->>modelsStore: isModelLoaded(modelName)?
|
||||
|
||||
alt model NOT loaded
|
||||
Hooks->>modelsStore: loadModel(modelName)
|
||||
Note over modelsStore: → see LOAD MODEL section below
|
||||
end
|
||||
|
||||
Note over Hooks: Always fetch props (from cache or API)
|
||||
Hooks->>modelsStore: fetchModelProps(modelName)
|
||||
modelsStore-->>Hooks: props
|
||||
|
||||
Hooks->>convStore: getRequiredModalities()
|
||||
convStore-->>Hooks: {vision, audio}
|
||||
|
||||
Hooks->>Hooks: Validate: model.modalities ⊇ required?
|
||||
|
||||
alt validation PASSED
|
||||
Hooks->>modelsStore: selectModelById(modelId)
|
||||
Hooks-->>UI: return true
|
||||
else validation FAILED
|
||||
Hooks->>UI: toast.error("Model doesn't support required modalities")
|
||||
alt model was just loaded
|
||||
Hooks->>modelsStore: unloadModel(modelName)
|
||||
end
|
||||
alt onValidationFailure provided
|
||||
Hooks->>modelsStore: selectModelById(previousSelectedModelId)
|
||||
end
|
||||
Hooks-->>UI: return false
|
||||
end
|
||||
deactivate Hooks
|
||||
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
Note over UI,API: ⬆️ LOAD MODEL (ROUTER mode)
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
modelsStore->>modelsStore: loadModel(modelId)
|
||||
activate modelsStore
|
||||
|
||||
alt already loaded
|
||||
modelsStore-->>modelsStore: return (no-op)
|
||||
end
|
||||
|
||||
modelsStore->>modelsStore: modelLoadingStates.set(modelId, true)
|
||||
modelsStore->>ModelsSvc: load(modelId)
|
||||
ModelsSvc->>API: POST /models/load {model: modelId}
|
||||
API-->>ModelsSvc: {status: "loading"}
|
||||
|
||||
modelsStore->>modelsStore: pollForModelStatus(modelId, LOADED)
|
||||
loop poll every 500ms (max 60 attempts)
|
||||
modelsStore->>modelsStore: fetchRouterModels()
|
||||
modelsStore->>ModelsSvc: listRouter()
|
||||
ModelsSvc->>API: GET /models
|
||||
API-->>ModelsSvc: models[]
|
||||
modelsStore->>modelsStore: getModelStatus(modelId)
|
||||
alt status === LOADED
|
||||
Note right of modelsStore: break loop
|
||||
else status === LOADING
|
||||
Note right of modelsStore: wait 500ms, continue
|
||||
end
|
||||
end
|
||||
|
||||
modelsStore->>modelsStore: updateModelModalities(modelId)
|
||||
modelsStore->>PropsSvc: fetchForModel(modelId)
|
||||
PropsSvc->>API: GET /props?model={modelId}
|
||||
API-->>PropsSvc: props with modalities
|
||||
modelsStore->>modelsStore: modelPropsCache.set(modelId, props)
|
||||
modelsStore->>modelsStore: propsCacheVersion++
|
||||
|
||||
modelsStore->>modelsStore: modelLoadingStates.set(modelId, false)
|
||||
deactivate modelsStore
|
||||
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
Note over UI,API: ⬇️ UNLOAD MODEL (ROUTER mode)
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
modelsStore->>modelsStore: unloadModel(modelId)
|
||||
activate modelsStore
|
||||
modelsStore->>modelsStore: modelLoadingStates.set(modelId, true)
|
||||
modelsStore->>ModelsSvc: unload(modelId)
|
||||
ModelsSvc->>API: POST /models/unload {model: modelId}
|
||||
|
||||
modelsStore->>modelsStore: pollForModelStatus(modelId, UNLOADED)
|
||||
loop poll until unloaded
|
||||
modelsStore->>ModelsSvc: listRouter()
|
||||
ModelsSvc->>API: GET /models
|
||||
end
|
||||
|
||||
modelsStore->>modelsStore: modelLoadingStates.set(modelId, false)
|
||||
deactivate modelsStore
|
||||
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
Note over UI,API: 📊 COMPUTED GETTERS
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
Note over modelsStore: Getters:<br/>- selectedModel: ModelOption | null<br/>- loadedModelIds: string[] (from routerModels)<br/>- loadingModelIds: string[] (from modelLoadingStates)<br/>- singleModelName: string | null (MODEL mode only)
|
||||
|
||||
Note over modelsStore: Modality helpers:<br/>- getModelModalities(modelId): {vision, audio}<br/>- modelSupportsVision(modelId): boolean<br/>- modelSupportsAudio(modelId): boolean
|
||||
```
|
||||
|
|
@ -0,0 +1,76 @@
|
|||
```mermaid
|
||||
sequenceDiagram
|
||||
participant UI as 🧩 +layout.svelte
|
||||
participant serverStore as 🗄️ serverStore
|
||||
participant PropsSvc as ⚙️ PropsService
|
||||
participant API as 🌐 llama-server
|
||||
|
||||
Note over serverStore: State:<br/>props: ApiLlamaCppServerProps | null<br/>loading, error<br/>role: ServerRole | null (MODEL | ROUTER)<br/>fetchPromise (deduplication)
|
||||
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
Note over UI,API: 🚀 INITIALIZATION
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
UI->>serverStore: fetch()
|
||||
activate serverStore
|
||||
|
||||
alt fetchPromise exists (already fetching)
|
||||
serverStore-->>UI: return fetchPromise
|
||||
Note right of serverStore: Deduplicate concurrent calls
|
||||
end
|
||||
|
||||
serverStore->>serverStore: loading = true
|
||||
serverStore->>serverStore: fetchPromise = new Promise()
|
||||
|
||||
serverStore->>PropsSvc: fetch()
|
||||
PropsSvc->>API: GET /props
|
||||
API-->>PropsSvc: ApiLlamaCppServerProps
|
||||
Note right of API: {role, model_path, model_alias,<br/>modalities, default_generation_settings, ...}
|
||||
|
||||
PropsSvc-->>serverStore: props
|
||||
serverStore->>serverStore: props = $state(data)
|
||||
|
||||
serverStore->>serverStore: detectRole(props)
|
||||
Note right of serverStore: role = props.role === "router"<br/> ? ServerRole.ROUTER<br/> : ServerRole.MODEL
|
||||
|
||||
serverStore->>serverStore: loading = false
|
||||
serverStore->>serverStore: fetchPromise = null
|
||||
deactivate serverStore
|
||||
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
Note over UI,API: 📊 COMPUTED GETTERS
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
Note over serverStore: Getters from props:
|
||||
|
||||
rect rgb(240, 255, 240)
|
||||
Note over serverStore: defaultParams<br/>→ props.default_generation_settings.params<br/>(temperature, top_p, top_k, etc.)
|
||||
end
|
||||
|
||||
rect rgb(240, 255, 240)
|
||||
Note over serverStore: contextSize<br/>→ props.default_generation_settings.n_ctx
|
||||
end
|
||||
|
||||
rect rgb(255, 240, 240)
|
||||
Note over serverStore: isRouterMode<br/>→ role === ServerRole.ROUTER
|
||||
end
|
||||
|
||||
rect rgb(255, 240, 240)
|
||||
Note over serverStore: isModelMode<br/>→ role === ServerRole.MODEL
|
||||
end
|
||||
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
Note over UI,API: 🔗 RELATIONSHIPS
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
Note over serverStore: Used by:
|
||||
Note right of serverStore: - modelsStore: role detection, MODEL mode modalities<br/>- settingsStore: syncWithServerDefaults (defaultParams)<br/>- chatStore: contextSize for processing state<br/>- UI components: isRouterMode for conditional rendering
|
||||
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
Note over UI,API: ❌ ERROR HANDLING
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
Note over serverStore: getErrorMessage(): string | null<br/>Returns formatted error for UI display
|
||||
|
||||
Note over serverStore: clear(): void<br/>Resets all state (props, error, loading, role)
|
||||
```
|
||||
|
|
@ -0,0 +1,144 @@
|
|||
```mermaid
|
||||
sequenceDiagram
|
||||
participant UI as 🧩 ChatSettings
|
||||
participant settingsStore as 🗄️ settingsStore
|
||||
participant serverStore as 🗄️ serverStore
|
||||
participant ParamSvc as ⚙️ ParameterSyncService
|
||||
participant LS as 💾 LocalStorage
|
||||
|
||||
Note over settingsStore: State:<br/>config: SettingsConfigType<br/>theme: string ("auto" | "light" | "dark")<br/>isInitialized: boolean<br/>userOverrides: Set<string>
|
||||
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
Note over UI,LS: 🚀 INITIALIZATION
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
Note over settingsStore: Auto-initialized in constructor (browser only)
|
||||
settingsStore->>settingsStore: initialize()
|
||||
activate settingsStore
|
||||
|
||||
settingsStore->>settingsStore: loadConfig()
|
||||
settingsStore->>LS: get("llama-config")
|
||||
LS-->>settingsStore: StoredConfig | null
|
||||
|
||||
alt config exists
|
||||
settingsStore->>settingsStore: Merge with SETTING_CONFIG_DEFAULT
|
||||
Note right of settingsStore: Fill missing keys with defaults
|
||||
else no config
|
||||
settingsStore->>settingsStore: config = SETTING_CONFIG_DEFAULT
|
||||
end
|
||||
|
||||
settingsStore->>LS: get("llama-userOverrides")
|
||||
LS-->>settingsStore: string[] | null
|
||||
settingsStore->>settingsStore: userOverrides = new Set(data)
|
||||
|
||||
settingsStore->>settingsStore: loadTheme()
|
||||
settingsStore->>LS: get("llama-theme")
|
||||
LS-->>settingsStore: theme | "auto"
|
||||
|
||||
settingsStore->>settingsStore: isInitialized = true
|
||||
deactivate settingsStore
|
||||
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
Note over UI,LS: 🔄 SYNC WITH SERVER DEFAULTS
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
Note over UI: Triggered from +layout.svelte when serverStore.props loaded
|
||||
UI->>settingsStore: syncWithServerDefaults()
|
||||
activate settingsStore
|
||||
|
||||
settingsStore->>serverStore: defaultParams
|
||||
serverStore-->>settingsStore: {temperature, top_p, top_k, ...}
|
||||
|
||||
settingsStore->>ParamSvc: extractServerDefaults(defaultParams)
|
||||
ParamSvc-->>settingsStore: Record<string, value>
|
||||
|
||||
settingsStore->>ParamSvc: mergeWithServerDefaults(config, serverDefaults)
|
||||
Note right of ParamSvc: For each syncable parameter:<br/>- If NOT in userOverrides → use server default<br/>- If in userOverrides → keep user value
|
||||
ParamSvc-->>settingsStore: mergedConfig
|
||||
|
||||
settingsStore->>settingsStore: config = mergedConfig
|
||||
settingsStore->>settingsStore: saveConfig()
|
||||
deactivate settingsStore
|
||||
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
Note over UI,LS: ⚙️ UPDATE CONFIG
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
UI->>settingsStore: updateConfig(key, value)
|
||||
activate settingsStore
|
||||
settingsStore->>settingsStore: config[key] = value
|
||||
settingsStore->>settingsStore: userOverrides.add(key)
|
||||
Note right of settingsStore: Mark as user-modified (won't be overwritten by server)
|
||||
settingsStore->>settingsStore: saveConfig()
|
||||
settingsStore->>LS: set("llama-config", config)
|
||||
settingsStore->>LS: set("llama-userOverrides", [...userOverrides])
|
||||
deactivate settingsStore
|
||||
|
||||
UI->>settingsStore: updateMultipleConfig({key1: val1, key2: val2})
|
||||
activate settingsStore
|
||||
Note right of settingsStore: Batch update, single save
|
||||
settingsStore->>settingsStore: For each key: config[key] = value
|
||||
settingsStore->>settingsStore: For each key: userOverrides.add(key)
|
||||
settingsStore->>settingsStore: saveConfig()
|
||||
deactivate settingsStore
|
||||
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
Note over UI,LS: 🔄 RESET
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
UI->>settingsStore: resetConfig()
|
||||
activate settingsStore
|
||||
settingsStore->>settingsStore: config = SETTING_CONFIG_DEFAULT
|
||||
settingsStore->>settingsStore: userOverrides.clear()
|
||||
settingsStore->>settingsStore: syncWithServerDefaults()
|
||||
Note right of settingsStore: Apply server defaults for syncable params
|
||||
settingsStore->>settingsStore: saveConfig()
|
||||
deactivate settingsStore
|
||||
|
||||
UI->>settingsStore: resetParameterToServerDefault(key)
|
||||
activate settingsStore
|
||||
settingsStore->>settingsStore: userOverrides.delete(key)
|
||||
settingsStore->>serverStore: defaultParams[key]
|
||||
settingsStore->>settingsStore: config[key] = serverDefault
|
||||
settingsStore->>settingsStore: saveConfig()
|
||||
deactivate settingsStore
|
||||
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
Note over UI,LS: 🎨 THEME
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
UI->>settingsStore: updateTheme(newTheme)
|
||||
activate settingsStore
|
||||
settingsStore->>settingsStore: theme = newTheme
|
||||
settingsStore->>settingsStore: saveTheme()
|
||||
settingsStore->>LS: set("llama-theme", theme)
|
||||
deactivate settingsStore
|
||||
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
Note over UI,LS: 📊 PARAMETER INFO
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
UI->>settingsStore: getParameterInfo(key)
|
||||
settingsStore->>ParamSvc: getParameterInfo(key, config, serverDefaults, userOverrides)
|
||||
ParamSvc-->>settingsStore: ParameterInfo
|
||||
Note right of ParamSvc: {<br/> currentValue,<br/> serverDefault,<br/> isUserOverride: boolean,<br/> canSync: boolean,<br/> isDifferentFromServer: boolean<br/>}
|
||||
|
||||
UI->>settingsStore: getParameterDiff()
|
||||
settingsStore->>ParamSvc: createParameterDiff(config, serverDefaults, userOverrides)
|
||||
ParamSvc-->>settingsStore: ParameterDiff[]
|
||||
Note right of ParamSvc: Array of parameters where user != server
|
||||
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
Note over UI,LS: 📋 CONFIG CATEGORIES
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
Note over settingsStore: Syncable with server (from /props):
|
||||
rect rgb(240, 255, 240)
|
||||
Note over settingsStore: temperature, top_p, top_k, min_p<br/>repeat_penalty, presence_penalty, frequency_penalty<br/>dynatemp_range, dynatemp_exponent<br/>typ_p, xtc_probability, xtc_threshold<br/>dry_multiplier, dry_base, dry_allowed_length, dry_penalty_last_n
|
||||
end
|
||||
|
||||
Note over settingsStore: UI-only (not synced):
|
||||
rect rgb(255, 240, 240)
|
||||
Note over settingsStore: systemMessage, custom (JSON)<br/>showStatistics, enableContinueGeneration<br/>autoMicOnEmpty, disableAutoScroll<br/>apiKey, pdfAsImage, disableReasoningFormat
|
||||
end
|
||||
```
|
||||
|
|
@ -64,7 +64,7 @@
|
|||
"svelte": "^5.0.0",
|
||||
"svelte-check": "^4.0.0",
|
||||
"tailwind-merge": "^3.3.1",
|
||||
"tailwind-variants": "^1.0.0",
|
||||
"tailwind-variants": "^3.2.2",
|
||||
"tailwindcss": "^4.0.0",
|
||||
"tw-animate-css": "^1.3.5",
|
||||
"typescript": "^5.0.0",
|
||||
|
|
@ -8324,31 +8324,23 @@
|
|||
}
|
||||
},
|
||||
"node_modules/tailwind-variants": {
|
||||
"version": "1.0.0",
|
||||
"resolved": "https://registry.npmjs.org/tailwind-variants/-/tailwind-variants-1.0.0.tgz",
|
||||
"integrity": "sha512-2WSbv4ulEEyuBKomOunut65D8UZwxrHoRfYnxGcQNnHqlSCp2+B7Yz2W+yrNDrxRodOXtGD/1oCcKGNBnUqMqA==",
|
||||
"version": "3.2.2",
|
||||
"resolved": "https://registry.npmjs.org/tailwind-variants/-/tailwind-variants-3.2.2.tgz",
|
||||
"integrity": "sha512-Mi4kHeMTLvKlM98XPnK+7HoBPmf4gygdFmqQPaDivc3DpYS6aIY6KiG/PgThrGvii5YZJqRsPz0aPyhoFzmZgg==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"tailwind-merge": "3.0.2"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=16.x",
|
||||
"pnpm": ">=7.x"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"tailwind-merge": ">=3.0.0",
|
||||
"tailwindcss": "*"
|
||||
}
|
||||
},
|
||||
"node_modules/tailwind-variants/node_modules/tailwind-merge": {
|
||||
"version": "3.0.2",
|
||||
"resolved": "https://registry.npmjs.org/tailwind-merge/-/tailwind-merge-3.0.2.tgz",
|
||||
"integrity": "sha512-l7z+OYZ7mu3DTqrL88RiKrKIqO3NcpEO8V/Od04bNpvk0kiIFndGEoqfuzvj4yuhRkHKjRkII2z+KS2HfPcSxw==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"funding": {
|
||||
"type": "github",
|
||||
"url": "https://github.com/sponsors/dcastil"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"tailwind-merge": {
|
||||
"optional": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/tailwindcss": {
|
||||
|
|
|
|||
|
|
@ -66,7 +66,7 @@
|
|||
"svelte": "^5.0.0",
|
||||
"svelte-check": "^4.0.0",
|
||||
"tailwind-merge": "^3.3.1",
|
||||
"tailwind-variants": "^1.0.0",
|
||||
"tailwind-variants": "^3.2.2",
|
||||
"tailwindcss": "^4.0.0",
|
||||
"tw-animate-css": "^1.3.5",
|
||||
"typescript": "^5.0.0",
|
||||
|
|
|
|||
|
|
@ -7,5 +7,5 @@ export default defineConfig({
|
|||
timeout: 120000,
|
||||
reuseExistingServer: false
|
||||
},
|
||||
testDir: 'e2e'
|
||||
testDir: 'tests/e2e'
|
||||
});
|
||||
|
|
|
|||
|
|
@ -49,7 +49,9 @@ trap cleanup SIGINT SIGTERM
|
|||
echo "🚀 Starting development servers..."
|
||||
echo "📝 Note: Make sure to start llama-server separately if needed"
|
||||
cd tools/server/webui
|
||||
storybook dev -p 6006 --ci & vite dev --host 0.0.0.0 &
|
||||
# Use --insecure-http-parser to handle malformed HTTP responses from llama-server
|
||||
# (some responses have both Content-Length and Transfer-Encoding headers)
|
||||
storybook dev -p 6006 --ci & NODE_OPTIONS="--insecure-http-parser" vite dev --host 0.0.0.0 &
|
||||
|
||||
# Wait for all background processes
|
||||
wait
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@
|
|||
--chart-3: oklch(0.398 0.07 227.392);
|
||||
--chart-4: oklch(0.828 0.189 84.429);
|
||||
--chart-5: oklch(0.769 0.188 70.08);
|
||||
--sidebar: oklch(0.985 0 0);
|
||||
--sidebar: oklch(0.987 0 0);
|
||||
--sidebar-foreground: oklch(0.145 0 0);
|
||||
--sidebar-primary: oklch(0.205 0 0);
|
||||
--sidebar-primary-foreground: oklch(0.985 0 0);
|
||||
|
|
@ -66,7 +66,7 @@
|
|||
--chart-3: oklch(0.769 0.188 70.08);
|
||||
--chart-4: oklch(0.627 0.265 303.9);
|
||||
--chart-5: oklch(0.645 0.246 16.439);
|
||||
--sidebar: oklch(0.205 0 0);
|
||||
--sidebar: oklch(0.19 0 0);
|
||||
--sidebar-foreground: oklch(0.985 0 0);
|
||||
--sidebar-primary: oklch(0.488 0.243 264.376);
|
||||
--sidebar-primary-foreground: oklch(0.985 0 0);
|
||||
|
|
|
|||
|
|
@ -4,27 +4,38 @@
|
|||
// Import chat types from dedicated module
|
||||
|
||||
import type {
|
||||
// API types
|
||||
ApiChatCompletionRequest,
|
||||
ApiChatCompletionResponse,
|
||||
ApiChatCompletionStreamChunk,
|
||||
ApiChatCompletionToolCall,
|
||||
ApiChatCompletionToolCallDelta,
|
||||
ApiChatMessageData,
|
||||
ApiChatMessageContentPart,
|
||||
ApiContextSizeError,
|
||||
ApiErrorResponse,
|
||||
ApiLlamaCppServerProps,
|
||||
ApiProcessingState
|
||||
} from '$lib/types/api';
|
||||
|
||||
import type {
|
||||
ApiModelDataEntry,
|
||||
ApiModelListResponse,
|
||||
ApiProcessingState,
|
||||
ApiRouterModelMeta,
|
||||
ApiRouterModelsLoadRequest,
|
||||
ApiRouterModelsLoadResponse,
|
||||
ApiRouterModelsStatusRequest,
|
||||
ApiRouterModelsStatusResponse,
|
||||
ApiRouterModelsListResponse,
|
||||
ApiRouterModelsUnloadRequest,
|
||||
ApiRouterModelsUnloadResponse,
|
||||
// Chat types
|
||||
ChatAttachmentDisplayItem,
|
||||
ChatAttachmentPreviewItem,
|
||||
ChatMessageType,
|
||||
ChatRole,
|
||||
ChatUploadedFile,
|
||||
ChatMessageSiblingInfo,
|
||||
ChatMessagePromptProgress,
|
||||
ChatMessageTimings
|
||||
} from '$lib/types/chat';
|
||||
|
||||
import type {
|
||||
ChatMessageTimings,
|
||||
// Database types
|
||||
DatabaseConversation,
|
||||
DatabaseMessage,
|
||||
DatabaseMessageExtra,
|
||||
|
|
@ -32,14 +43,20 @@ import type {
|
|||
DatabaseMessageExtraImageFile,
|
||||
DatabaseMessageExtraTextFile,
|
||||
DatabaseMessageExtraPdfFile,
|
||||
DatabaseMessageExtraLegacyContext
|
||||
} from '$lib/types/database';
|
||||
|
||||
import type {
|
||||
DatabaseMessageExtraLegacyContext,
|
||||
ExportedConversation,
|
||||
ExportedConversations,
|
||||
// Model types
|
||||
ModelModalities,
|
||||
ModelOption,
|
||||
// Settings types
|
||||
SettingsChatServiceOptions,
|
||||
SettingsConfigValue,
|
||||
SettingsFieldConfig,
|
||||
SettingsConfigType
|
||||
} from '$lib/types/settings';
|
||||
} from '$lib/types';
|
||||
|
||||
import { ServerRole, ServerModelStatus, ModelModality } from '$lib/enums';
|
||||
|
||||
declare global {
|
||||
// namespace App {
|
||||
|
|
@ -51,22 +68,38 @@ declare global {
|
|||
// }
|
||||
|
||||
export {
|
||||
// API types
|
||||
ApiChatCompletionRequest,
|
||||
ApiChatCompletionResponse,
|
||||
ApiChatCompletionStreamChunk,
|
||||
ApiChatCompletionToolCall,
|
||||
ApiChatCompletionToolCallDelta,
|
||||
ApiChatMessageData,
|
||||
ApiChatMessageContentPart,
|
||||
ApiContextSizeError,
|
||||
ApiErrorResponse,
|
||||
ApiLlamaCppServerProps,
|
||||
ApiModelDataEntry,
|
||||
ApiModelListResponse,
|
||||
ApiProcessingState,
|
||||
ChatMessageData,
|
||||
ApiRouterModelMeta,
|
||||
ApiRouterModelsLoadRequest,
|
||||
ApiRouterModelsLoadResponse,
|
||||
ApiRouterModelsStatusRequest,
|
||||
ApiRouterModelsStatusResponse,
|
||||
ApiRouterModelsListResponse,
|
||||
ApiRouterModelsUnloadRequest,
|
||||
ApiRouterModelsUnloadResponse,
|
||||
// Chat types
|
||||
ChatAttachmentDisplayItem,
|
||||
ChatAttachmentPreviewItem,
|
||||
ChatMessagePromptProgress,
|
||||
ChatMessageSiblingInfo,
|
||||
ChatMessageTimings,
|
||||
ChatMessageType,
|
||||
ChatRole,
|
||||
ChatUploadedFile,
|
||||
// Database types
|
||||
DatabaseConversation,
|
||||
DatabaseMessage,
|
||||
DatabaseMessageExtra,
|
||||
|
|
@ -75,9 +108,19 @@ declare global {
|
|||
DatabaseMessageExtraTextFile,
|
||||
DatabaseMessageExtraPdfFile,
|
||||
DatabaseMessageExtraLegacyContext,
|
||||
ExportedConversation,
|
||||
ExportedConversations,
|
||||
// Enum types
|
||||
ModelModality,
|
||||
ServerRole,
|
||||
ServerModelStatus,
|
||||
// Model types
|
||||
ModelModalities,
|
||||
ModelOption,
|
||||
// Settings types
|
||||
SettingsChatServiceOptions,
|
||||
SettingsConfigValue,
|
||||
SettingsFieldConfig,
|
||||
SettingsConfigType,
|
||||
SettingsChatServiceOptions
|
||||
SettingsConfigType
|
||||
};
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,9 +1,17 @@
|
|||
<script lang="ts">
|
||||
import { FileText, Image, Music, FileIcon, Eye } from '@lucide/svelte';
|
||||
import { FileTypeCategory, MimeTypeApplication } from '$lib/enums/files';
|
||||
import { convertPDFToImage } from '$lib/utils/pdf-processing';
|
||||
import { Button } from '$lib/components/ui/button';
|
||||
import { getFileTypeCategory } from '$lib/utils/file-type';
|
||||
import * as Alert from '$lib/components/ui/alert';
|
||||
import { SyntaxHighlightedCode } from '$lib/components/app';
|
||||
import { FileText, Image, Music, FileIcon, Eye, Info } from '@lucide/svelte';
|
||||
import {
|
||||
isTextFile,
|
||||
isImageFile,
|
||||
isPdfFile,
|
||||
isAudioFile,
|
||||
getLanguageFromFilename
|
||||
} from '$lib/utils';
|
||||
import { convertPDFToImage } from '$lib/utils/browser-only';
|
||||
import { modelsStore } from '$lib/stores/models.svelte';
|
||||
|
||||
interface Props {
|
||||
// Either an uploaded file or a stored attachment
|
||||
|
|
@ -12,53 +20,36 @@
|
|||
// For uploaded files
|
||||
preview?: string;
|
||||
name?: string;
|
||||
type?: string;
|
||||
textContent?: string;
|
||||
// For checking vision modality
|
||||
activeModelId?: string;
|
||||
}
|
||||
|
||||
let { uploadedFile, attachment, preview, name, type, textContent }: Props = $props();
|
||||
let { uploadedFile, attachment, preview, name, textContent, activeModelId }: Props = $props();
|
||||
|
||||
let hasVisionModality = $derived(
|
||||
activeModelId ? modelsStore.modelSupportsVision(activeModelId) : false
|
||||
);
|
||||
|
||||
let displayName = $derived(uploadedFile?.name || attachment?.name || name || 'Unknown File');
|
||||
|
||||
let displayPreview = $derived(
|
||||
uploadedFile?.preview || (attachment?.type === 'imageFile' ? attachment.base64Url : preview)
|
||||
);
|
||||
// Determine file type from uploaded file or attachment
|
||||
let isAudio = $derived(isAudioFile(attachment, uploadedFile));
|
||||
let isImage = $derived(isImageFile(attachment, uploadedFile));
|
||||
let isPdf = $derived(isPdfFile(attachment, uploadedFile));
|
||||
let isText = $derived(isTextFile(attachment, uploadedFile));
|
||||
|
||||
let displayType = $derived(
|
||||
uploadedFile?.type ||
|
||||
(attachment?.type === 'imageFile'
|
||||
? 'image'
|
||||
: attachment?.type === 'textFile'
|
||||
? 'text'
|
||||
: attachment?.type === 'audioFile'
|
||||
? attachment.mimeType || 'audio'
|
||||
: attachment?.type === 'pdfFile'
|
||||
? MimeTypeApplication.PDF
|
||||
: type || 'unknown')
|
||||
let displayPreview = $derived(
|
||||
uploadedFile?.preview ||
|
||||
(isImage && attachment && 'base64Url' in attachment ? attachment.base64Url : preview)
|
||||
);
|
||||
|
||||
let displayTextContent = $derived(
|
||||
uploadedFile?.textContent ||
|
||||
(attachment?.type === 'textFile'
|
||||
? attachment.content
|
||||
: attachment?.type === 'pdfFile'
|
||||
? attachment.content
|
||||
: textContent)
|
||||
(attachment && 'content' in attachment ? attachment.content : textContent)
|
||||
);
|
||||
|
||||
let isAudio = $derived(
|
||||
getFileTypeCategory(displayType) === FileTypeCategory.AUDIO || displayType === 'audio'
|
||||
);
|
||||
|
||||
let isImage = $derived(
|
||||
getFileTypeCategory(displayType) === FileTypeCategory.IMAGE || displayType === 'image'
|
||||
);
|
||||
|
||||
let isPdf = $derived(displayType === MimeTypeApplication.PDF);
|
||||
|
||||
let isText = $derived(
|
||||
getFileTypeCategory(displayType) === FileTypeCategory.TEXT || displayType === 'text'
|
||||
);
|
||||
let language = $derived(getLanguageFromFilename(displayName));
|
||||
|
||||
let IconComponent = $derived(() => {
|
||||
if (isImage) return Image;
|
||||
|
|
@ -87,15 +78,20 @@
|
|||
|
||||
if (uploadedFile?.file) {
|
||||
file = uploadedFile.file;
|
||||
} else if (attachment?.type === 'pdfFile') {
|
||||
} else if (isPdf && attachment) {
|
||||
// Check if we have pre-processed images
|
||||
if (attachment.images && Array.isArray(attachment.images)) {
|
||||
if (
|
||||
'images' in attachment &&
|
||||
attachment.images &&
|
||||
Array.isArray(attachment.images) &&
|
||||
attachment.images.length > 0
|
||||
) {
|
||||
pdfImages = attachment.images;
|
||||
return;
|
||||
}
|
||||
|
||||
// Convert base64 back to File for processing
|
||||
if (attachment.base64Data) {
|
||||
if ('base64Data' in attachment && attachment.base64Data) {
|
||||
const base64Data = attachment.base64Data;
|
||||
const byteCharacters = atob(base64Data);
|
||||
const byteNumbers = new Array(byteCharacters.length);
|
||||
|
|
@ -103,7 +99,7 @@
|
|||
byteNumbers[i] = byteCharacters.charCodeAt(i);
|
||||
}
|
||||
const byteArray = new Uint8Array(byteNumbers);
|
||||
file = new File([byteArray], displayName, { type: MimeTypeApplication.PDF });
|
||||
file = new File([byteArray], displayName, { type: 'application/pdf' });
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -181,6 +177,24 @@
|
|||
/>
|
||||
</div>
|
||||
{:else if isPdf && pdfViewMode === 'pages'}
|
||||
{#if !hasVisionModality && activeModelId}
|
||||
<Alert.Root class="mb-4">
|
||||
<Info class="h-4 w-4" />
|
||||
<Alert.Title>Preview only</Alert.Title>
|
||||
<Alert.Description>
|
||||
<span class="inline-flex">
|
||||
The selected model does not support vision. Only the extracted
|
||||
<!-- svelte-ignore a11y_click_events_have_key_events -->
|
||||
<!-- svelte-ignore a11y_no_static_element_interactions -->
|
||||
<span class="mx-1 cursor-pointer underline" onclick={() => (pdfViewMode = 'text')}>
|
||||
text
|
||||
</span>
|
||||
will be sent to the model.
|
||||
</span>
|
||||
</Alert.Description>
|
||||
</Alert.Root>
|
||||
{/if}
|
||||
|
||||
{#if pdfImagesLoading}
|
||||
<div class="flex items-center justify-center p-8">
|
||||
<div class="text-center">
|
||||
|
|
@ -227,28 +241,24 @@
|
|||
</div>
|
||||
{/if}
|
||||
{:else if (isText || (isPdf && pdfViewMode === 'text')) && displayTextContent}
|
||||
<div
|
||||
class="max-h-[60vh] overflow-auto rounded-lg bg-muted p-4 font-mono text-sm break-words whitespace-pre-wrap"
|
||||
>
|
||||
{displayTextContent}
|
||||
</div>
|
||||
<SyntaxHighlightedCode code={displayTextContent} {language} maxWidth="69rem" />
|
||||
{:else if isAudio}
|
||||
<div class="flex items-center justify-center p-8">
|
||||
<div class="w-full max-w-md text-center">
|
||||
<Music class="mx-auto mb-4 h-16 w-16 text-muted-foreground" />
|
||||
|
||||
{#if attachment?.type === 'audioFile'}
|
||||
{#if uploadedFile?.preview}
|
||||
<audio controls class="mb-4 w-full" src={uploadedFile.preview}>
|
||||
Your browser does not support the audio element.
|
||||
</audio>
|
||||
{:else if isAudio && attachment && 'mimeType' in attachment && 'base64Data' in attachment}
|
||||
<audio
|
||||
controls
|
||||
class="mb-4 w-full"
|
||||
src="data:{attachment.mimeType};base64,{attachment.base64Data}"
|
||||
src={`data:${attachment.mimeType};base64,${attachment.base64Data}`}
|
||||
>
|
||||
Your browser does not support the audio element.
|
||||
</audio>
|
||||
{:else if uploadedFile?.preview}
|
||||
<audio controls class="mb-4 w-full" src={uploadedFile.preview}>
|
||||
Your browser does not support the audio element.
|
||||
</audio>
|
||||
{:else}
|
||||
<p class="mb-4 text-muted-foreground">Audio preview not available</p>
|
||||
{/if}
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
<script lang="ts">
|
||||
import { RemoveButton } from '$lib/components/app';
|
||||
import { formatFileSize, getFileTypeLabel, getPreviewText } from '$lib/utils/file-preview';
|
||||
import { FileTypeCategory, MimeTypeText } from '$lib/enums/files';
|
||||
import { getFileTypeLabel, getPreviewText, formatFileSize, isTextFile } from '$lib/utils';
|
||||
import { AttachmentType } from '$lib/enums';
|
||||
|
||||
interface Props {
|
||||
class?: string;
|
||||
|
|
@ -12,7 +12,9 @@
|
|||
readonly?: boolean;
|
||||
size?: number;
|
||||
textContent?: string;
|
||||
type: string;
|
||||
// Either uploaded file or stored attachment
|
||||
uploadedFile?: ChatUploadedFile;
|
||||
attachment?: DatabaseMessageExtra;
|
||||
}
|
||||
|
||||
let {
|
||||
|
|
@ -24,11 +26,41 @@
|
|||
readonly = false,
|
||||
size,
|
||||
textContent,
|
||||
type
|
||||
uploadedFile,
|
||||
attachment
|
||||
}: Props = $props();
|
||||
|
||||
let isText = $derived(isTextFile(attachment, uploadedFile));
|
||||
|
||||
let fileTypeLabel = $derived.by(() => {
|
||||
if (uploadedFile?.type) {
|
||||
return getFileTypeLabel(uploadedFile.type);
|
||||
}
|
||||
|
||||
if (attachment) {
|
||||
if ('mimeType' in attachment && attachment.mimeType) {
|
||||
return getFileTypeLabel(attachment.mimeType);
|
||||
}
|
||||
|
||||
if (attachment.type) {
|
||||
return getFileTypeLabel(attachment.type);
|
||||
}
|
||||
}
|
||||
|
||||
return getFileTypeLabel(name);
|
||||
});
|
||||
|
||||
let pdfProcessingMode = $derived.by(() => {
|
||||
if (attachment?.type === AttachmentType.PDF) {
|
||||
const pdfAttachment = attachment as DatabaseMessageExtraPdfFile;
|
||||
|
||||
return pdfAttachment.processedAsImages ? 'Sent as Image' : 'Sent as Text';
|
||||
}
|
||||
return null;
|
||||
});
|
||||
</script>
|
||||
|
||||
{#if type === MimeTypeText.PLAIN || type === FileTypeCategory.TEXT}
|
||||
{#if isText}
|
||||
{#if readonly}
|
||||
<!-- Readonly mode (ChatMessage) -->
|
||||
<button
|
||||
|
|
@ -45,7 +77,7 @@
|
|||
<span class="text-xs text-muted-foreground">{formatFileSize(size)}</span>
|
||||
{/if}
|
||||
|
||||
{#if textContent && type === 'text'}
|
||||
{#if textContent}
|
||||
<div class="relative mt-2 w-full">
|
||||
<div
|
||||
class="overflow-hidden font-mono text-xs leading-relaxed break-words whitespace-pre-wrap text-muted-foreground"
|
||||
|
|
@ -105,17 +137,21 @@
|
|||
<div
|
||||
class="flex h-8 w-8 items-center justify-center rounded bg-primary/10 text-xs font-medium text-primary"
|
||||
>
|
||||
{getFileTypeLabel(type)}
|
||||
{fileTypeLabel}
|
||||
</div>
|
||||
|
||||
<div class="flex flex-col gap-1">
|
||||
<div class="flex flex-col gap-0.5">
|
||||
<span
|
||||
class="max-w-24 truncate text-sm font-medium text-foreground group-hover:pr-6 md:max-w-32"
|
||||
class="max-w-24 truncate text-sm font-medium text-foreground {readonly
|
||||
? ''
|
||||
: 'group-hover:pr-6'} md:max-w-32"
|
||||
>
|
||||
{name}
|
||||
</span>
|
||||
|
||||
{#if size}
|
||||
{#if pdfProcessingMode}
|
||||
<span class="text-left text-xs text-muted-foreground">{pdfProcessingMode}</span>
|
||||
{:else if size}
|
||||
<span class="text-left text-xs text-muted-foreground">{formatFileSize(size)}</span>
|
||||
{/if}
|
||||
</div>
|
||||
|
|
|
|||
|
|
@ -30,7 +30,9 @@
|
|||
}: Props = $props();
|
||||
</script>
|
||||
|
||||
<div class="group relative overflow-hidden rounded-lg border border-border bg-muted {className}">
|
||||
<div
|
||||
class="group relative overflow-hidden rounded-lg bg-muted shadow-lg dark:border dark:border-muted {className}"
|
||||
>
|
||||
{#if onClick}
|
||||
<button
|
||||
type="button"
|
||||
|
|
|
|||
|
|
@ -2,10 +2,8 @@
|
|||
import { ChatAttachmentThumbnailImage, ChatAttachmentThumbnailFile } from '$lib/components/app';
|
||||
import { Button } from '$lib/components/ui/button';
|
||||
import { ChevronLeft, ChevronRight } from '@lucide/svelte';
|
||||
import { FileTypeCategory } from '$lib/enums/files';
|
||||
import { getFileTypeCategory } from '$lib/utils/file-type';
|
||||
import { DialogChatAttachmentPreview, DialogChatAttachmentsViewAll } from '$lib/components/app';
|
||||
import type { ChatAttachmentDisplayItem, ChatAttachmentPreviewItem } from '$lib/types/chat';
|
||||
import { getAttachmentDisplayItems } from '$lib/utils';
|
||||
|
||||
interface Props {
|
||||
class?: string;
|
||||
|
|
@ -22,6 +20,8 @@
|
|||
imageWidth?: string;
|
||||
// Limit display to single row with "+ X more" button
|
||||
limitToSingleRow?: boolean;
|
||||
// For vision modality check
|
||||
activeModelId?: string;
|
||||
}
|
||||
|
||||
let {
|
||||
|
|
@ -35,10 +35,11 @@
|
|||
imageClass = '',
|
||||
imageHeight = 'h-24',
|
||||
imageWidth = 'w-auto',
|
||||
limitToSingleRow = false
|
||||
limitToSingleRow = false,
|
||||
activeModelId
|
||||
}: Props = $props();
|
||||
|
||||
let displayItems = $derived(getDisplayItems());
|
||||
let displayItems = $derived(getAttachmentDisplayItems({ uploadedFiles, attachments }));
|
||||
|
||||
let canScrollLeft = $state(false);
|
||||
let canScrollRight = $state(false);
|
||||
|
|
@ -49,81 +50,6 @@
|
|||
let showViewAll = $derived(limitToSingleRow && displayItems.length > 0 && isScrollable);
|
||||
let viewAllDialogOpen = $state(false);
|
||||
|
||||
function getDisplayItems(): ChatAttachmentDisplayItem[] {
|
||||
const items: ChatAttachmentDisplayItem[] = [];
|
||||
|
||||
// Add uploaded files (ChatForm)
|
||||
for (const file of uploadedFiles) {
|
||||
items.push({
|
||||
id: file.id,
|
||||
name: file.name,
|
||||
size: file.size,
|
||||
preview: file.preview,
|
||||
type: file.type,
|
||||
isImage: getFileTypeCategory(file.type) === FileTypeCategory.IMAGE,
|
||||
uploadedFile: file,
|
||||
textContent: file.textContent
|
||||
});
|
||||
}
|
||||
|
||||
// Add stored attachments (ChatMessage)
|
||||
for (const [index, attachment] of attachments.entries()) {
|
||||
if (attachment.type === 'imageFile') {
|
||||
items.push({
|
||||
id: `attachment-${index}`,
|
||||
name: attachment.name,
|
||||
preview: attachment.base64Url,
|
||||
type: 'image',
|
||||
isImage: true,
|
||||
attachment,
|
||||
attachmentIndex: index
|
||||
});
|
||||
} else if (attachment.type === 'textFile') {
|
||||
items.push({
|
||||
id: `attachment-${index}`,
|
||||
name: attachment.name,
|
||||
type: 'text',
|
||||
isImage: false,
|
||||
attachment,
|
||||
attachmentIndex: index,
|
||||
textContent: attachment.content
|
||||
});
|
||||
} else if (attachment.type === 'context') {
|
||||
// Legacy format from old webui - treat as text file
|
||||
items.push({
|
||||
id: `attachment-${index}`,
|
||||
name: attachment.name,
|
||||
type: 'text',
|
||||
isImage: false,
|
||||
attachment,
|
||||
attachmentIndex: index,
|
||||
textContent: attachment.content
|
||||
});
|
||||
} else if (attachment.type === 'audioFile') {
|
||||
items.push({
|
||||
id: `attachment-${index}`,
|
||||
name: attachment.name,
|
||||
type: attachment.mimeType || 'audio',
|
||||
isImage: false,
|
||||
attachment,
|
||||
attachmentIndex: index
|
||||
});
|
||||
} else if (attachment.type === 'pdfFile') {
|
||||
items.push({
|
||||
id: `attachment-${index}`,
|
||||
name: attachment.name,
|
||||
type: 'application/pdf',
|
||||
isImage: false,
|
||||
attachment,
|
||||
attachmentIndex: index,
|
||||
textContent: attachment.content
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return items.reverse();
|
||||
}
|
||||
|
||||
function openPreview(item: ChatAttachmentDisplayItem, event?: MouseEvent) {
|
||||
event?.stopPropagation();
|
||||
event?.preventDefault();
|
||||
|
|
@ -133,7 +59,6 @@
|
|||
attachment: item.attachment,
|
||||
preview: item.preview,
|
||||
name: item.name,
|
||||
type: item.type,
|
||||
size: item.size,
|
||||
textContent: item.textContent
|
||||
};
|
||||
|
|
@ -181,26 +106,88 @@
|
|||
|
||||
{#if displayItems.length > 0}
|
||||
<div class={className} {style}>
|
||||
<div class="relative">
|
||||
<button
|
||||
class="absolute top-1/2 left-4 z-10 flex h-6 w-6 -translate-y-1/2 items-center justify-center rounded-full bg-foreground/15 shadow-md backdrop-blur-xs transition-opacity hover:bg-foreground/35 {canScrollLeft
|
||||
? 'opacity-100'
|
||||
: 'pointer-events-none opacity-0'}"
|
||||
onclick={scrollLeft}
|
||||
aria-label="Scroll left"
|
||||
>
|
||||
<ChevronLeft class="h-4 w-4" />
|
||||
</button>
|
||||
{#if limitToSingleRow}
|
||||
<div class="relative">
|
||||
<button
|
||||
class="absolute top-1/2 left-4 z-10 flex h-6 w-6 -translate-y-1/2 items-center justify-center rounded-full bg-foreground/15 shadow-md backdrop-blur-xs transition-opacity hover:bg-foreground/35 {canScrollLeft
|
||||
? 'opacity-100'
|
||||
: 'pointer-events-none opacity-0'}"
|
||||
onclick={scrollLeft}
|
||||
aria-label="Scroll left"
|
||||
>
|
||||
<ChevronLeft class="h-4 w-4" />
|
||||
</button>
|
||||
|
||||
<div
|
||||
class="scrollbar-hide flex items-start gap-3 overflow-x-auto"
|
||||
bind:this={scrollContainer}
|
||||
onscroll={updateScrollButtons}
|
||||
>
|
||||
<div
|
||||
class="scrollbar-hide flex items-start gap-3 overflow-x-auto"
|
||||
bind:this={scrollContainer}
|
||||
onscroll={updateScrollButtons}
|
||||
>
|
||||
{#each displayItems as item (item.id)}
|
||||
{#if item.isImage && item.preview}
|
||||
<ChatAttachmentThumbnailImage
|
||||
class="flex-shrink-0 cursor-pointer {limitToSingleRow
|
||||
? 'first:ml-4 last:mr-4'
|
||||
: ''}"
|
||||
id={item.id}
|
||||
name={item.name}
|
||||
preview={item.preview}
|
||||
{readonly}
|
||||
onRemove={onFileRemove}
|
||||
height={imageHeight}
|
||||
width={imageWidth}
|
||||
{imageClass}
|
||||
onClick={(event) => openPreview(item, event)}
|
||||
/>
|
||||
{:else}
|
||||
<ChatAttachmentThumbnailFile
|
||||
class="flex-shrink-0 cursor-pointer {limitToSingleRow
|
||||
? 'first:ml-4 last:mr-4'
|
||||
: ''}"
|
||||
id={item.id}
|
||||
name={item.name}
|
||||
size={item.size}
|
||||
{readonly}
|
||||
onRemove={onFileRemove}
|
||||
textContent={item.textContent}
|
||||
attachment={item.attachment}
|
||||
uploadedFile={item.uploadedFile}
|
||||
onClick={(event) => openPreview(item, event)}
|
||||
/>
|
||||
{/if}
|
||||
{/each}
|
||||
</div>
|
||||
|
||||
<button
|
||||
class="absolute top-1/2 right-4 z-10 flex h-6 w-6 -translate-y-1/2 items-center justify-center rounded-full bg-foreground/15 shadow-md backdrop-blur-xs transition-opacity hover:bg-foreground/35 {canScrollRight
|
||||
? 'opacity-100'
|
||||
: 'pointer-events-none opacity-0'}"
|
||||
onclick={scrollRight}
|
||||
aria-label="Scroll right"
|
||||
>
|
||||
<ChevronRight class="h-4 w-4" />
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{#if showViewAll}
|
||||
<div class="mt-2 -mr-2 flex justify-end px-4">
|
||||
<Button
|
||||
type="button"
|
||||
variant="ghost"
|
||||
size="sm"
|
||||
class="h-6 text-xs text-muted-foreground hover:text-foreground"
|
||||
onclick={() => (viewAllDialogOpen = true)}
|
||||
>
|
||||
View all ({displayItems.length})
|
||||
</Button>
|
||||
</div>
|
||||
{/if}
|
||||
{:else}
|
||||
<div class="flex flex-wrap items-start justify-end gap-3">
|
||||
{#each displayItems as item (item.id)}
|
||||
{#if item.isImage && item.preview}
|
||||
<ChatAttachmentThumbnailImage
|
||||
class="flex-shrink-0 cursor-pointer {limitToSingleRow ? 'first:ml-4 last:mr-4' : ''}"
|
||||
class="cursor-pointer"
|
||||
id={item.id}
|
||||
name={item.name}
|
||||
preview={item.preview}
|
||||
|
|
@ -213,43 +200,20 @@
|
|||
/>
|
||||
{:else}
|
||||
<ChatAttachmentThumbnailFile
|
||||
class="flex-shrink-0 cursor-pointer {limitToSingleRow ? 'first:ml-4 last:mr-4' : ''}"
|
||||
class="cursor-pointer"
|
||||
id={item.id}
|
||||
name={item.name}
|
||||
type={item.type}
|
||||
size={item.size}
|
||||
{readonly}
|
||||
onRemove={onFileRemove}
|
||||
textContent={item.textContent}
|
||||
onClick={(event) => openPreview(item, event)}
|
||||
attachment={item.attachment}
|
||||
uploadedFile={item.uploadedFile}
|
||||
onClick={(event?: MouseEvent) => openPreview(item, event)}
|
||||
/>
|
||||
{/if}
|
||||
{/each}
|
||||
</div>
|
||||
|
||||
<button
|
||||
class="absolute top-1/2 right-4 z-10 flex h-6 w-6 -translate-y-1/2 items-center justify-center rounded-full bg-foreground/15 shadow-md backdrop-blur-xs transition-opacity hover:bg-foreground/35 {canScrollRight
|
||||
? 'opacity-100'
|
||||
: 'pointer-events-none opacity-0'}"
|
||||
onclick={scrollRight}
|
||||
aria-label="Scroll right"
|
||||
>
|
||||
<ChevronRight class="h-4 w-4" />
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{#if showViewAll}
|
||||
<div class="mt-2 -mr-2 flex justify-end px-4">
|
||||
<Button
|
||||
type="button"
|
||||
variant="ghost"
|
||||
size="sm"
|
||||
class="h-6 text-xs text-muted-foreground hover:text-foreground"
|
||||
onclick={() => (viewAllDialogOpen = true)}
|
||||
>
|
||||
View all
|
||||
</Button>
|
||||
</div>
|
||||
{/if}
|
||||
</div>
|
||||
{/if}
|
||||
|
|
@ -261,9 +225,9 @@
|
|||
attachment={previewItem.attachment}
|
||||
preview={previewItem.preview}
|
||||
name={previewItem.name}
|
||||
type={previewItem.type}
|
||||
size={previewItem.size}
|
||||
textContent={previewItem.textContent}
|
||||
{activeModelId}
|
||||
/>
|
||||
{/if}
|
||||
|
||||
|
|
@ -275,4 +239,5 @@
|
|||
{onFileRemove}
|
||||
imageHeight="h-64"
|
||||
{imageClass}
|
||||
{activeModelId}
|
||||
/>
|
||||
|
|
|
|||
|
|
@ -4,9 +4,7 @@
|
|||
ChatAttachmentThumbnailFile,
|
||||
DialogChatAttachmentPreview
|
||||
} from '$lib/components/app';
|
||||
import { FileTypeCategory } from '$lib/enums/files';
|
||||
import { getFileTypeCategory } from '$lib/utils/file-type';
|
||||
import type { ChatAttachmentDisplayItem, ChatAttachmentPreviewItem } from '$lib/types/chat';
|
||||
import { getAttachmentDisplayItems } from '$lib/utils';
|
||||
|
||||
interface Props {
|
||||
uploadedFiles?: ChatUploadedFile[];
|
||||
|
|
@ -16,6 +14,7 @@
|
|||
imageHeight?: string;
|
||||
imageWidth?: string;
|
||||
imageClass?: string;
|
||||
activeModelId?: string;
|
||||
}
|
||||
|
||||
let {
|
||||
|
|
@ -25,89 +24,17 @@
|
|||
onFileRemove,
|
||||
imageHeight = 'h-24',
|
||||
imageWidth = 'w-auto',
|
||||
imageClass = ''
|
||||
imageClass = '',
|
||||
activeModelId
|
||||
}: Props = $props();
|
||||
|
||||
let previewDialogOpen = $state(false);
|
||||
let previewItem = $state<ChatAttachmentPreviewItem | null>(null);
|
||||
|
||||
let displayItems = $derived(getDisplayItems());
|
||||
let displayItems = $derived(getAttachmentDisplayItems({ uploadedFiles, attachments }));
|
||||
let imageItems = $derived(displayItems.filter((item) => item.isImage));
|
||||
let fileItems = $derived(displayItems.filter((item) => !item.isImage));
|
||||
|
||||
function getDisplayItems(): ChatAttachmentDisplayItem[] {
|
||||
const items: ChatAttachmentDisplayItem[] = [];
|
||||
|
||||
for (const file of uploadedFiles) {
|
||||
items.push({
|
||||
id: file.id,
|
||||
name: file.name,
|
||||
size: file.size,
|
||||
preview: file.preview,
|
||||
type: file.type,
|
||||
isImage: getFileTypeCategory(file.type) === FileTypeCategory.IMAGE,
|
||||
uploadedFile: file,
|
||||
textContent: file.textContent
|
||||
});
|
||||
}
|
||||
|
||||
for (const [index, attachment] of attachments.entries()) {
|
||||
if (attachment.type === 'imageFile') {
|
||||
items.push({
|
||||
id: `attachment-${index}`,
|
||||
name: attachment.name,
|
||||
preview: attachment.base64Url,
|
||||
type: 'image',
|
||||
isImage: true,
|
||||
attachment,
|
||||
attachmentIndex: index
|
||||
});
|
||||
} else if (attachment.type === 'textFile') {
|
||||
items.push({
|
||||
id: `attachment-${index}`,
|
||||
name: attachment.name,
|
||||
type: 'text',
|
||||
isImage: false,
|
||||
attachment,
|
||||
attachmentIndex: index,
|
||||
textContent: attachment.content
|
||||
});
|
||||
} else if (attachment.type === 'context') {
|
||||
// Legacy format from old webui - treat as text file
|
||||
items.push({
|
||||
id: `attachment-${index}`,
|
||||
name: attachment.name,
|
||||
type: 'text',
|
||||
isImage: false,
|
||||
attachment,
|
||||
attachmentIndex: index,
|
||||
textContent: attachment.content
|
||||
});
|
||||
} else if (attachment.type === 'audioFile') {
|
||||
items.push({
|
||||
id: `attachment-${index}`,
|
||||
name: attachment.name,
|
||||
type: attachment.mimeType || 'audio',
|
||||
isImage: false,
|
||||
attachment,
|
||||
attachmentIndex: index
|
||||
});
|
||||
} else if (attachment.type === 'pdfFile') {
|
||||
items.push({
|
||||
id: `attachment-${index}`,
|
||||
name: attachment.name,
|
||||
type: 'application/pdf',
|
||||
isImage: false,
|
||||
attachment,
|
||||
attachmentIndex: index,
|
||||
textContent: attachment.content
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return items.reverse();
|
||||
}
|
||||
|
||||
function openPreview(item: (typeof displayItems)[0], event?: Event) {
|
||||
if (event) {
|
||||
event.preventDefault();
|
||||
|
|
@ -119,7 +46,6 @@
|
|||
attachment: item.attachment,
|
||||
preview: item.preview,
|
||||
name: item.name,
|
||||
type: item.type,
|
||||
size: item.size,
|
||||
textContent: item.textContent
|
||||
};
|
||||
|
|
@ -138,12 +64,13 @@
|
|||
class="cursor-pointer"
|
||||
id={item.id}
|
||||
name={item.name}
|
||||
type={item.type}
|
||||
size={item.size}
|
||||
{readonly}
|
||||
onRemove={onFileRemove}
|
||||
textContent={item.textContent}
|
||||
onClick={(event) => openPreview(item, event)}
|
||||
attachment={item.attachment}
|
||||
uploadedFile={item.uploadedFile}
|
||||
onClick={(event?: MouseEvent) => openPreview(item, event)}
|
||||
/>
|
||||
{/each}
|
||||
</div>
|
||||
|
|
@ -183,8 +110,8 @@
|
|||
attachment={previewItem.attachment}
|
||||
preview={previewItem.preview}
|
||||
name={previewItem.name}
|
||||
type={previewItem.type}
|
||||
size={previewItem.size}
|
||||
textContent={previewItem.textContent}
|
||||
{activeModelId}
|
||||
/>
|
||||
{/if}
|
||||
|
|
|
|||
|
|
@ -9,15 +9,13 @@
|
|||
} from '$lib/components/app';
|
||||
import { INPUT_CLASSES } from '$lib/constants/input-classes';
|
||||
import { config } from '$lib/stores/settings.svelte';
|
||||
import { FileTypeCategory, MimeTypeApplication } from '$lib/enums/files';
|
||||
import {
|
||||
AudioRecorder,
|
||||
convertToWav,
|
||||
createAudioFile,
|
||||
isAudioRecordingSupported
|
||||
} from '$lib/utils/audio-recording';
|
||||
import { onMount } from 'svelte';
|
||||
import { modelsStore, modelOptions, selectedModelId } from '$lib/stores/models.svelte';
|
||||
import { isRouterMode } from '$lib/stores/server.svelte';
|
||||
import { chatStore } from '$lib/stores/chat.svelte';
|
||||
import { activeMessages } from '$lib/stores/conversations.svelte';
|
||||
import {
|
||||
FileTypeCategory,
|
||||
MimeTypeApplication,
|
||||
FileExtensionAudio,
|
||||
FileExtensionImage,
|
||||
FileExtensionPdf,
|
||||
|
|
@ -25,8 +23,15 @@
|
|||
MimeTypeAudio,
|
||||
MimeTypeImage,
|
||||
MimeTypeText
|
||||
} from '$lib/enums/files';
|
||||
import { isIMEComposing } from '$lib/utils/is-ime-composing';
|
||||
} from '$lib/enums';
|
||||
import { isIMEComposing } from '$lib/utils';
|
||||
import {
|
||||
AudioRecorder,
|
||||
convertToWav,
|
||||
createAudioFile,
|
||||
isAudioRecordingSupported
|
||||
} from '$lib/utils/browser-only';
|
||||
import { onMount } from 'svelte';
|
||||
|
||||
interface Props {
|
||||
class?: string;
|
||||
|
|
@ -53,6 +58,7 @@
|
|||
}: Props = $props();
|
||||
|
||||
let audioRecorder: AudioRecorder | undefined;
|
||||
let chatFormActionsRef: ChatFormActions | undefined = $state(undefined);
|
||||
let currentConfig = $derived(config());
|
||||
let fileAcceptString = $state<string | undefined>(undefined);
|
||||
let fileInputRef: ChatFormFileInputInvisible | undefined = $state(undefined);
|
||||
|
|
@ -63,18 +69,97 @@
|
|||
let recordingSupported = $state(false);
|
||||
let textareaRef: ChatFormTextarea | undefined = $state(undefined);
|
||||
|
||||
// Check if model is selected (in ROUTER mode)
|
||||
let conversationModel = $derived(
|
||||
chatStore.getConversationModel(activeMessages() as DatabaseMessage[])
|
||||
);
|
||||
let isRouter = $derived(isRouterMode());
|
||||
let hasModelSelected = $derived(!isRouter || !!conversationModel || !!selectedModelId());
|
||||
|
||||
// Get active model ID for capability detection
|
||||
let activeModelId = $derived.by(() => {
|
||||
const options = modelOptions();
|
||||
|
||||
if (!isRouter) {
|
||||
return options.length > 0 ? options[0].model : null;
|
||||
}
|
||||
|
||||
// First try user-selected model
|
||||
const selectedId = selectedModelId();
|
||||
if (selectedId) {
|
||||
const model = options.find((m) => m.id === selectedId);
|
||||
if (model) return model.model;
|
||||
}
|
||||
|
||||
// Fallback to conversation model
|
||||
if (conversationModel) {
|
||||
const model = options.find((m) => m.model === conversationModel);
|
||||
if (model) return model.model;
|
||||
}
|
||||
|
||||
return null;
|
||||
});
|
||||
|
||||
// State for model props reactivity
|
||||
let modelPropsVersion = $state(0);
|
||||
|
||||
// Fetch model props when active model changes (works for both MODEL and ROUTER mode)
|
||||
$effect(() => {
|
||||
if (activeModelId) {
|
||||
const cached = modelsStore.getModelProps(activeModelId);
|
||||
if (!cached) {
|
||||
modelsStore.fetchModelProps(activeModelId).then(() => {
|
||||
modelPropsVersion++;
|
||||
});
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Derive modalities from active model (works for both MODEL and ROUTER mode)
|
||||
let hasAudioModality = $derived.by(() => {
|
||||
if (activeModelId) {
|
||||
void modelPropsVersion; // Trigger reactivity on props fetch
|
||||
return modelsStore.modelSupportsAudio(activeModelId);
|
||||
}
|
||||
|
||||
return false;
|
||||
});
|
||||
|
||||
let hasVisionModality = $derived.by(() => {
|
||||
if (activeModelId) {
|
||||
void modelPropsVersion; // Trigger reactivity on props fetch
|
||||
return modelsStore.modelSupportsVision(activeModelId);
|
||||
}
|
||||
|
||||
return false;
|
||||
});
|
||||
|
||||
function checkModelSelected(): boolean {
|
||||
if (!hasModelSelected) {
|
||||
// Open the model selector
|
||||
chatFormActionsRef?.openModelSelector();
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
function getAcceptStringForFileType(fileType: FileTypeCategory): string {
|
||||
switch (fileType) {
|
||||
case FileTypeCategory.IMAGE:
|
||||
return [...Object.values(FileExtensionImage), ...Object.values(MimeTypeImage)].join(',');
|
||||
|
||||
case FileTypeCategory.AUDIO:
|
||||
return [...Object.values(FileExtensionAudio), ...Object.values(MimeTypeAudio)].join(',');
|
||||
|
||||
case FileTypeCategory.PDF:
|
||||
return [...Object.values(FileExtensionPdf), ...Object.values(MimeTypeApplication)].join(
|
||||
','
|
||||
);
|
||||
|
||||
case FileTypeCategory.TEXT:
|
||||
return [...Object.values(FileExtensionText), MimeTypeText.PLAIN].join(',');
|
||||
|
||||
default:
|
||||
return '';
|
||||
}
|
||||
|
|
@ -103,6 +188,9 @@
|
|||
|
||||
if ((!message.trim() && uploadedFiles.length === 0) || disabled || isLoading) return;
|
||||
|
||||
// Check if model is selected first
|
||||
if (!checkModelSelected()) return;
|
||||
|
||||
const messageToSend = message.trim();
|
||||
const filesToSend = [...uploadedFiles];
|
||||
|
||||
|
|
@ -131,6 +219,7 @@
|
|||
if (files.length > 0) {
|
||||
event.preventDefault();
|
||||
onFileUpload?.(files);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -154,6 +243,7 @@
|
|||
async function handleMicClick() {
|
||||
if (!audioRecorder || !recordingSupported) {
|
||||
console.warn('Audio recording not supported');
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -187,6 +277,9 @@
|
|||
event.preventDefault();
|
||||
if ((!message.trim() && uploadedFiles.length === 0) || disabled || isLoading) return;
|
||||
|
||||
// Check if model is selected first
|
||||
if (!checkModelSelected()) return;
|
||||
|
||||
const messageToSend = message.trim();
|
||||
const filesToSend = [...uploadedFiles];
|
||||
|
||||
|
|
@ -225,12 +318,16 @@
|
|||
<ChatFormFileInputInvisible
|
||||
bind:this={fileInputRef}
|
||||
bind:accept={fileAcceptString}
|
||||
{hasAudioModality}
|
||||
{hasVisionModality}
|
||||
onFileSelect={handleFileSelect}
|
||||
/>
|
||||
|
||||
<form
|
||||
onsubmit={handleSubmit}
|
||||
class="{INPUT_CLASSES} border-radius-bottom-none mx-auto max-w-[48rem] overflow-hidden rounded-3xl backdrop-blur-md {className}"
|
||||
class="{INPUT_CLASSES} border-radius-bottom-none mx-auto max-w-[48rem] overflow-hidden rounded-3xl backdrop-blur-md {disabled
|
||||
? 'cursor-not-allowed opacity-60'
|
||||
: ''} {className}"
|
||||
>
|
||||
<ChatAttachmentsList
|
||||
bind:uploadedFiles
|
||||
|
|
@ -238,6 +335,7 @@
|
|||
limitToSingleRow
|
||||
class="py-5"
|
||||
style="scroll-padding: 1rem;"
|
||||
activeModelId={activeModelId ?? undefined}
|
||||
/>
|
||||
|
||||
<div
|
||||
|
|
@ -252,10 +350,13 @@
|
|||
/>
|
||||
|
||||
<ChatFormActions
|
||||
bind:this={chatFormActionsRef}
|
||||
canSend={message.trim().length > 0 || uploadedFiles.length > 0}
|
||||
hasText={message.trim().length > 0}
|
||||
{disabled}
|
||||
{isLoading}
|
||||
{isRecording}
|
||||
{uploadedFiles}
|
||||
onFileUpload={handleFileUpload}
|
||||
onMicClick={handleMicClick}
|
||||
onStop={handleStop}
|
||||
|
|
|
|||
|
|
@ -1,22 +1,29 @@
|
|||
<script lang="ts">
|
||||
import { Paperclip, Image, FileText, File, Volume2 } from '@lucide/svelte';
|
||||
import { Paperclip } from '@lucide/svelte';
|
||||
import { Button } from '$lib/components/ui/button';
|
||||
import * as DropdownMenu from '$lib/components/ui/dropdown-menu';
|
||||
import * as Tooltip from '$lib/components/ui/tooltip';
|
||||
import { TOOLTIP_DELAY_DURATION } from '$lib/constants/tooltip-config';
|
||||
import { FileTypeCategory } from '$lib/enums/files';
|
||||
import { supportsAudio, supportsVision } from '$lib/stores/server.svelte';
|
||||
import { FILE_TYPE_ICONS } from '$lib/constants/icons';
|
||||
import { FileTypeCategory } from '$lib/enums';
|
||||
|
||||
interface Props {
|
||||
class?: string;
|
||||
disabled?: boolean;
|
||||
hasAudioModality?: boolean;
|
||||
hasVisionModality?: boolean;
|
||||
onFileUpload?: (fileType?: FileTypeCategory) => void;
|
||||
}
|
||||
|
||||
let { class: className = '', disabled = false, onFileUpload }: Props = $props();
|
||||
let {
|
||||
class: className = '',
|
||||
disabled = false,
|
||||
hasAudioModality = false,
|
||||
hasVisionModality = false,
|
||||
onFileUpload
|
||||
}: Props = $props();
|
||||
|
||||
const fileUploadTooltipText = $derived.by(() => {
|
||||
return !supportsVision()
|
||||
return !hasVisionModality
|
||||
? 'Text files and PDFs supported. Images, audio, and video require vision models.'
|
||||
: 'Attach files';
|
||||
});
|
||||
|
|
@ -29,7 +36,7 @@
|
|||
<div class="flex items-center gap-1 {className}">
|
||||
<DropdownMenu.Root>
|
||||
<DropdownMenu.Trigger name="Attach files">
|
||||
<Tooltip.Root delayDuration={TOOLTIP_DELAY_DURATION}>
|
||||
<Tooltip.Root>
|
||||
<Tooltip.Trigger>
|
||||
<Button
|
||||
class="file-upload-button h-8 w-8 rounded-full bg-transparent p-0 text-muted-foreground hover:bg-foreground/10 hover:text-foreground"
|
||||
|
|
@ -49,40 +56,40 @@
|
|||
</DropdownMenu.Trigger>
|
||||
|
||||
<DropdownMenu.Content align="start" class="w-48">
|
||||
<Tooltip.Root delayDuration={TOOLTIP_DELAY_DURATION}>
|
||||
<Tooltip.Root>
|
||||
<Tooltip.Trigger class="w-full">
|
||||
<DropdownMenu.Item
|
||||
class="images-button flex cursor-pointer items-center gap-2"
|
||||
disabled={!supportsVision()}
|
||||
disabled={!hasVisionModality}
|
||||
onclick={() => handleFileUpload(FileTypeCategory.IMAGE)}
|
||||
>
|
||||
<Image class="h-4 w-4" />
|
||||
<FILE_TYPE_ICONS.image class="h-4 w-4" />
|
||||
|
||||
<span>Images</span>
|
||||
</DropdownMenu.Item>
|
||||
</Tooltip.Trigger>
|
||||
|
||||
{#if !supportsVision()}
|
||||
{#if !hasVisionModality}
|
||||
<Tooltip.Content>
|
||||
<p>Images require vision models to be processed</p>
|
||||
</Tooltip.Content>
|
||||
{/if}
|
||||
</Tooltip.Root>
|
||||
|
||||
<Tooltip.Root delayDuration={TOOLTIP_DELAY_DURATION}>
|
||||
<Tooltip.Root>
|
||||
<Tooltip.Trigger class="w-full">
|
||||
<DropdownMenu.Item
|
||||
class="audio-button flex cursor-pointer items-center gap-2"
|
||||
disabled={!supportsAudio()}
|
||||
disabled={!hasAudioModality}
|
||||
onclick={() => handleFileUpload(FileTypeCategory.AUDIO)}
|
||||
>
|
||||
<Volume2 class="h-4 w-4" />
|
||||
<FILE_TYPE_ICONS.audio class="h-4 w-4" />
|
||||
|
||||
<span>Audio Files</span>
|
||||
</DropdownMenu.Item>
|
||||
</Tooltip.Trigger>
|
||||
|
||||
{#if !supportsAudio()}
|
||||
{#if !hasAudioModality}
|
||||
<Tooltip.Content>
|
||||
<p>Audio files require audio models to be processed</p>
|
||||
</Tooltip.Content>
|
||||
|
|
@ -93,24 +100,24 @@
|
|||
class="flex cursor-pointer items-center gap-2"
|
||||
onclick={() => handleFileUpload(FileTypeCategory.TEXT)}
|
||||
>
|
||||
<FileText class="h-4 w-4" />
|
||||
<FILE_TYPE_ICONS.text class="h-4 w-4" />
|
||||
|
||||
<span>Text Files</span>
|
||||
</DropdownMenu.Item>
|
||||
|
||||
<Tooltip.Root delayDuration={TOOLTIP_DELAY_DURATION}>
|
||||
<Tooltip.Root>
|
||||
<Tooltip.Trigger class="w-full">
|
||||
<DropdownMenu.Item
|
||||
class="flex cursor-pointer items-center gap-2"
|
||||
onclick={() => handleFileUpload(FileTypeCategory.PDF)}
|
||||
>
|
||||
<File class="h-4 w-4" />
|
||||
<FILE_TYPE_ICONS.pdf class="h-4 w-4" />
|
||||
|
||||
<span>PDF Files</span>
|
||||
</DropdownMenu.Item>
|
||||
</Tooltip.Trigger>
|
||||
|
||||
{#if !supportsVision()}
|
||||
{#if !hasVisionModality}
|
||||
<Tooltip.Content>
|
||||
<p>PDFs will be converted to text. Image-based PDFs may not work properly.</p>
|
||||
</Tooltip.Content>
|
||||
|
|
|
|||
|
|
@ -1,12 +1,12 @@
|
|||
<script lang="ts">
|
||||
import { Mic } from '@lucide/svelte';
|
||||
import { Mic, Square } from '@lucide/svelte';
|
||||
import { Button } from '$lib/components/ui/button';
|
||||
import * as Tooltip from '$lib/components/ui/tooltip';
|
||||
import { supportsAudio } from '$lib/stores/server.svelte';
|
||||
|
||||
interface Props {
|
||||
class?: string;
|
||||
disabled?: boolean;
|
||||
hasAudioModality?: boolean;
|
||||
isLoading?: boolean;
|
||||
isRecording?: boolean;
|
||||
onMicClick?: () => void;
|
||||
|
|
@ -15,6 +15,7 @@
|
|||
let {
|
||||
class: className = '',
|
||||
disabled = false,
|
||||
hasAudioModality = false,
|
||||
isLoading = false,
|
||||
isRecording = false,
|
||||
onMicClick
|
||||
|
|
@ -22,25 +23,27 @@
|
|||
</script>
|
||||
|
||||
<div class="flex items-center gap-1 {className}">
|
||||
<Tooltip.Root delayDuration={100}>
|
||||
<Tooltip.Root>
|
||||
<Tooltip.Trigger>
|
||||
<Button
|
||||
class="h-8 w-8 rounded-full p-0 {isRecording
|
||||
? 'animate-pulse bg-red-500 text-white hover:bg-red-600'
|
||||
: 'bg-transparent text-muted-foreground hover:bg-foreground/10 hover:text-foreground'} {!supportsAudio()
|
||||
? 'cursor-not-allowed opacity-50'
|
||||
: ''}"
|
||||
disabled={disabled || isLoading || !supportsAudio()}
|
||||
disabled={disabled || isLoading || !hasAudioModality}
|
||||
onclick={onMicClick}
|
||||
type="button"
|
||||
>
|
||||
<span class="sr-only">{isRecording ? 'Stop recording' : 'Start recording'}</span>
|
||||
|
||||
<Mic class="h-4 w-4" />
|
||||
{#if isRecording}
|
||||
<Square class="h-4 w-4 animate-pulse fill-white" />
|
||||
{:else}
|
||||
<Mic class="h-4 w-4" />
|
||||
{/if}
|
||||
</Button>
|
||||
</Tooltip.Trigger>
|
||||
|
||||
{#if !supportsAudio()}
|
||||
{#if !hasAudioModality}
|
||||
<Tooltip.Content>
|
||||
<p>Current model does not support audio</p>
|
||||
</Tooltip.Content>
|
||||
|
|
|
|||
|
|
@ -0,0 +1,55 @@
|
|||
<script lang="ts">
|
||||
import { ArrowUp } from '@lucide/svelte';
|
||||
import { Button } from '$lib/components/ui/button';
|
||||
import * as Tooltip from '$lib/components/ui/tooltip';
|
||||
import { cn } from '$lib/components/ui/utils';
|
||||
|
||||
interface Props {
|
||||
canSend?: boolean;
|
||||
disabled?: boolean;
|
||||
isLoading?: boolean;
|
||||
showErrorState?: boolean;
|
||||
tooltipLabel?: string;
|
||||
}
|
||||
|
||||
let {
|
||||
canSend = false,
|
||||
disabled = false,
|
||||
isLoading = false,
|
||||
showErrorState = false,
|
||||
tooltipLabel
|
||||
}: Props = $props();
|
||||
|
||||
let isDisabled = $derived(!canSend || disabled || isLoading);
|
||||
</script>
|
||||
|
||||
{#snippet submitButton(props = {})}
|
||||
<Button
|
||||
type="submit"
|
||||
disabled={isDisabled}
|
||||
class={cn(
|
||||
'h-8 w-8 rounded-full p-0',
|
||||
showErrorState
|
||||
? 'bg-red-400/10 text-red-400 hover:bg-red-400/20 hover:text-red-400 disabled:opacity-100'
|
||||
: ''
|
||||
)}
|
||||
{...props}
|
||||
>
|
||||
<span class="sr-only">Send</span>
|
||||
<ArrowUp class="h-12 w-12" />
|
||||
</Button>
|
||||
{/snippet}
|
||||
|
||||
{#if tooltipLabel}
|
||||
<Tooltip.Root>
|
||||
<Tooltip.Trigger>
|
||||
{@render submitButton()}
|
||||
</Tooltip.Trigger>
|
||||
|
||||
<Tooltip.Content>
|
||||
<p>{tooltipLabel}</p>
|
||||
</Tooltip.Content>
|
||||
</Tooltip.Root>
|
||||
{:else}
|
||||
{@render submitButton()}
|
||||
{/if}
|
||||
|
|
@ -1,13 +1,20 @@
|
|||
<script lang="ts">
|
||||
import { Square, ArrowUp } from '@lucide/svelte';
|
||||
import { Square } from '@lucide/svelte';
|
||||
import { Button } from '$lib/components/ui/button';
|
||||
import {
|
||||
ChatFormActionFileAttachments,
|
||||
ChatFormActionRecord,
|
||||
ChatFormModelSelector
|
||||
ChatFormActionSubmit,
|
||||
ModelsSelector
|
||||
} from '$lib/components/app';
|
||||
import { FileTypeCategory } from '$lib/enums';
|
||||
import { getFileTypeCategory } from '$lib/utils';
|
||||
import { config } from '$lib/stores/settings.svelte';
|
||||
import type { FileTypeCategory } from '$lib/enums/files';
|
||||
import { modelsStore, modelOptions, selectedModelId } from '$lib/stores/models.svelte';
|
||||
import { isRouterMode } from '$lib/stores/server.svelte';
|
||||
import { chatStore } from '$lib/stores/chat.svelte';
|
||||
import { activeMessages, usedModalities } from '$lib/stores/conversations.svelte';
|
||||
import { useModelChangeValidation } from '$lib/hooks/use-model-change-validation.svelte';
|
||||
|
||||
interface Props {
|
||||
canSend?: boolean;
|
||||
|
|
@ -15,6 +22,8 @@
|
|||
disabled?: boolean;
|
||||
isLoading?: boolean;
|
||||
isRecording?: boolean;
|
||||
hasText?: boolean;
|
||||
uploadedFiles?: ChatUploadedFile[];
|
||||
onFileUpload?: (fileType?: FileTypeCategory) => void;
|
||||
onMicClick?: () => void;
|
||||
onStop?: () => void;
|
||||
|
|
@ -26,20 +35,150 @@
|
|||
disabled = false,
|
||||
isLoading = false,
|
||||
isRecording = false,
|
||||
hasText = false,
|
||||
uploadedFiles = [],
|
||||
onFileUpload,
|
||||
onMicClick,
|
||||
onStop
|
||||
}: Props = $props();
|
||||
|
||||
let currentConfig = $derived(config());
|
||||
let isRouter = $derived(isRouterMode());
|
||||
|
||||
let conversationModel = $derived(
|
||||
chatStore.getConversationModel(activeMessages() as DatabaseMessage[])
|
||||
);
|
||||
|
||||
let previousConversationModel: string | null = null;
|
||||
|
||||
$effect(() => {
|
||||
if (conversationModel && conversationModel !== previousConversationModel) {
|
||||
previousConversationModel = conversationModel;
|
||||
modelsStore.selectModelByName(conversationModel);
|
||||
}
|
||||
});
|
||||
|
||||
let activeModelId = $derived.by(() => {
|
||||
const options = modelOptions();
|
||||
|
||||
if (!isRouter) {
|
||||
return options.length > 0 ? options[0].model : null;
|
||||
}
|
||||
|
||||
const selectedId = selectedModelId();
|
||||
if (selectedId) {
|
||||
const model = options.find((m) => m.id === selectedId);
|
||||
if (model) return model.model;
|
||||
}
|
||||
|
||||
if (conversationModel) {
|
||||
const model = options.find((m) => m.model === conversationModel);
|
||||
if (model) return model.model;
|
||||
}
|
||||
|
||||
return null;
|
||||
});
|
||||
|
||||
let modelPropsVersion = $state(0); // Used to trigger reactivity after fetch
|
||||
|
||||
$effect(() => {
|
||||
if (activeModelId) {
|
||||
const cached = modelsStore.getModelProps(activeModelId);
|
||||
|
||||
if (!cached) {
|
||||
modelsStore.fetchModelProps(activeModelId).then(() => {
|
||||
modelPropsVersion++;
|
||||
});
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
let hasAudioModality = $derived.by(() => {
|
||||
if (activeModelId) {
|
||||
void modelPropsVersion;
|
||||
|
||||
return modelsStore.modelSupportsAudio(activeModelId);
|
||||
}
|
||||
|
||||
return false;
|
||||
});
|
||||
|
||||
let hasVisionModality = $derived.by(() => {
|
||||
if (activeModelId) {
|
||||
void modelPropsVersion;
|
||||
|
||||
return modelsStore.modelSupportsVision(activeModelId);
|
||||
}
|
||||
|
||||
return false;
|
||||
});
|
||||
|
||||
let hasAudioAttachments = $derived(
|
||||
uploadedFiles.some((file) => getFileTypeCategory(file.type) === FileTypeCategory.AUDIO)
|
||||
);
|
||||
let shouldShowRecordButton = $derived(
|
||||
hasAudioModality && !hasText && !hasAudioAttachments && currentConfig.autoMicOnEmpty
|
||||
);
|
||||
|
||||
let hasModelSelected = $derived(!isRouter || !!conversationModel || !!selectedModelId());
|
||||
|
||||
let isSelectedModelInCache = $derived.by(() => {
|
||||
if (!isRouter) return true;
|
||||
|
||||
if (conversationModel) {
|
||||
return modelOptions().some((option) => option.model === conversationModel);
|
||||
}
|
||||
|
||||
const currentModelId = selectedModelId();
|
||||
if (!currentModelId) return false;
|
||||
|
||||
return modelOptions().some((option) => option.id === currentModelId);
|
||||
});
|
||||
|
||||
let submitTooltip = $derived.by(() => {
|
||||
if (!hasModelSelected) {
|
||||
return 'Please select a model first';
|
||||
}
|
||||
|
||||
if (!isSelectedModelInCache) {
|
||||
return 'Selected model is not available, please select another';
|
||||
}
|
||||
|
||||
return '';
|
||||
});
|
||||
|
||||
let selectorModelRef: ModelsSelector | undefined = $state(undefined);
|
||||
|
||||
export function openModelSelector() {
|
||||
selectorModelRef?.open();
|
||||
}
|
||||
|
||||
const { handleModelChange } = useModelChangeValidation({
|
||||
getRequiredModalities: () => usedModalities(),
|
||||
onValidationFailure: async (previousModelId) => {
|
||||
if (previousModelId) {
|
||||
await modelsStore.selectModelById(previousModelId);
|
||||
}
|
||||
}
|
||||
});
|
||||
</script>
|
||||
|
||||
<div class="flex w-full items-center gap-2 {className}">
|
||||
<ChatFormActionFileAttachments class="mr-auto" {disabled} {onFileUpload} />
|
||||
<div class="flex w-full items-center gap-3 {className}" style="container-type: inline-size">
|
||||
<ChatFormActionFileAttachments
|
||||
class="mr-auto"
|
||||
{disabled}
|
||||
{hasAudioModality}
|
||||
{hasVisionModality}
|
||||
{onFileUpload}
|
||||
/>
|
||||
|
||||
{#if currentConfig.modelSelectorEnabled}
|
||||
<ChatFormModelSelector class="shrink-0" />
|
||||
{/if}
|
||||
<ModelsSelector
|
||||
bind:this={selectorModelRef}
|
||||
currentModel={conversationModel}
|
||||
forceForegroundText={true}
|
||||
useGlobalSelection={true}
|
||||
onModelChange={handleModelChange}
|
||||
/>
|
||||
|
||||
{#if isLoading}
|
||||
<Button
|
||||
|
|
@ -50,16 +189,15 @@
|
|||
<span class="sr-only">Stop</span>
|
||||
<Square class="h-8 w-8 fill-destructive stroke-destructive" />
|
||||
</Button>
|
||||
{:else if shouldShowRecordButton}
|
||||
<ChatFormActionRecord {disabled} {hasAudioModality} {isLoading} {isRecording} {onMicClick} />
|
||||
{:else}
|
||||
<ChatFormActionRecord {disabled} {isLoading} {isRecording} {onMicClick} />
|
||||
|
||||
<Button
|
||||
type="submit"
|
||||
disabled={!canSend || disabled || isLoading}
|
||||
class="h-8 w-8 rounded-full p-0"
|
||||
>
|
||||
<span class="sr-only">Send</span>
|
||||
<ArrowUp class="h-12 w-12" />
|
||||
</Button>
|
||||
<ChatFormActionSubmit
|
||||
canSend={canSend && hasModelSelected && isSelectedModelInCache}
|
||||
{disabled}
|
||||
{isLoading}
|
||||
tooltipLabel={submitTooltip}
|
||||
showErrorState={hasModelSelected && !isSelectedModelInCache}
|
||||
/>
|
||||
{/if}
|
||||
</div>
|
||||
|
|
|
|||
|
|
@ -1,9 +1,11 @@
|
|||
<script lang="ts">
|
||||
import { generateModalityAwareAcceptString } from '$lib/utils/modality-file-validation';
|
||||
import { generateModalityAwareAcceptString } from '$lib/utils';
|
||||
|
||||
interface Props {
|
||||
accept?: string;
|
||||
class?: string;
|
||||
hasAudioModality?: boolean;
|
||||
hasVisionModality?: boolean;
|
||||
multiple?: boolean;
|
||||
onFileSelect?: (files: File[]) => void;
|
||||
}
|
||||
|
|
@ -11,6 +13,8 @@
|
|||
let {
|
||||
accept = $bindable(),
|
||||
class: className = '',
|
||||
hasAudioModality = false,
|
||||
hasVisionModality = false,
|
||||
multiple = true,
|
||||
onFileSelect
|
||||
}: Props = $props();
|
||||
|
|
@ -18,7 +22,13 @@
|
|||
let fileInputElement: HTMLInputElement | undefined;
|
||||
|
||||
// Use modality-aware accept string by default, but allow override
|
||||
let finalAccept = $derived(accept ?? generateModalityAwareAcceptString());
|
||||
let finalAccept = $derived(
|
||||
accept ??
|
||||
generateModalityAwareAcceptString({
|
||||
hasVision: hasVisionModality,
|
||||
hasAudio: hasAudioModality
|
||||
})
|
||||
);
|
||||
|
||||
export function click() {
|
||||
fileInputElement?.click();
|
||||
|
|
|
|||
|
|
@ -1,352 +0,0 @@
|
|||
<script lang="ts">
|
||||
import { onMount, tick } from 'svelte';
|
||||
import { ChevronDown, Loader2 } from '@lucide/svelte';
|
||||
import { cn } from '$lib/components/ui/utils';
|
||||
import { portalToBody } from '$lib/utils/portal-to-body';
|
||||
import {
|
||||
fetchModels,
|
||||
modelOptions,
|
||||
modelsError,
|
||||
modelsLoading,
|
||||
modelsUpdating,
|
||||
selectModel,
|
||||
selectedModelId
|
||||
} from '$lib/stores/models.svelte';
|
||||
import type { ModelOption } from '$lib/types/models';
|
||||
|
||||
interface Props {
|
||||
class?: string;
|
||||
}
|
||||
|
||||
let { class: className = '' }: Props = $props();
|
||||
|
||||
let options = $derived(modelOptions());
|
||||
let loading = $derived(modelsLoading());
|
||||
let updating = $derived(modelsUpdating());
|
||||
let error = $derived(modelsError());
|
||||
let activeId = $derived(selectedModelId());
|
||||
|
||||
let isMounted = $state(false);
|
||||
let isOpen = $state(false);
|
||||
let container: HTMLDivElement | null = null;
|
||||
let triggerButton = $state<HTMLButtonElement | null>(null);
|
||||
let menuRef = $state<HTMLDivElement | null>(null);
|
||||
let menuPosition = $state<{
|
||||
top: number;
|
||||
left: number;
|
||||
width: number;
|
||||
placement: 'top' | 'bottom';
|
||||
maxHeight: number;
|
||||
} | null>(null);
|
||||
let lockedWidth: number | null = null;
|
||||
|
||||
onMount(async () => {
|
||||
try {
|
||||
await fetchModels();
|
||||
} catch (error) {
|
||||
console.error('Unable to load models:', error);
|
||||
} finally {
|
||||
isMounted = true;
|
||||
}
|
||||
});
|
||||
|
||||
function handlePointerDown(event: PointerEvent) {
|
||||
if (!container) return;
|
||||
|
||||
const target = event.target as Node | null;
|
||||
|
||||
if (target && !container.contains(target) && !(menuRef && menuRef.contains(target))) {
|
||||
closeMenu();
|
||||
}
|
||||
}
|
||||
|
||||
function handleKeydown(event: KeyboardEvent) {
|
||||
if (event.key === 'Escape') {
|
||||
closeMenu();
|
||||
}
|
||||
}
|
||||
|
||||
function handleResize() {
|
||||
if (isOpen) {
|
||||
updateMenuPosition();
|
||||
}
|
||||
}
|
||||
|
||||
async function handleSelect(value: string | undefined) {
|
||||
if (!value) return;
|
||||
|
||||
const option = options.find((item) => item.id === value);
|
||||
if (!option) {
|
||||
console.error('Model is no longer available');
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
await selectModel(option.id);
|
||||
} catch (error) {
|
||||
console.error('Failed to switch model:', error);
|
||||
}
|
||||
}
|
||||
|
||||
const VIEWPORT_GUTTER = 8;
|
||||
const MENU_OFFSET = 6;
|
||||
const MENU_MAX_WIDTH = 320;
|
||||
|
||||
async function openMenu() {
|
||||
if (loading || updating) return;
|
||||
|
||||
isOpen = true;
|
||||
await tick();
|
||||
updateMenuPosition();
|
||||
requestAnimationFrame(() => updateMenuPosition());
|
||||
}
|
||||
|
||||
function toggleOpen() {
|
||||
if (loading || updating) return;
|
||||
|
||||
if (isOpen) {
|
||||
closeMenu();
|
||||
} else {
|
||||
void openMenu();
|
||||
}
|
||||
}
|
||||
|
||||
function closeMenu() {
|
||||
if (!isOpen) return;
|
||||
|
||||
isOpen = false;
|
||||
menuPosition = null;
|
||||
lockedWidth = null;
|
||||
}
|
||||
|
||||
async function handleOptionSelect(optionId: string) {
|
||||
try {
|
||||
await handleSelect(optionId);
|
||||
} finally {
|
||||
closeMenu();
|
||||
}
|
||||
}
|
||||
|
||||
$effect(() => {
|
||||
if (loading || updating) {
|
||||
closeMenu();
|
||||
}
|
||||
});
|
||||
|
||||
$effect(() => {
|
||||
const optionCount = options.length;
|
||||
|
||||
if (!isOpen || optionCount <= 0) return;
|
||||
|
||||
queueMicrotask(() => updateMenuPosition());
|
||||
});
|
||||
|
||||
function updateMenuPosition() {
|
||||
if (!isOpen || !triggerButton || !menuRef) return;
|
||||
|
||||
const triggerRect = triggerButton.getBoundingClientRect();
|
||||
const viewportWidth = window.innerWidth;
|
||||
const viewportHeight = window.innerHeight;
|
||||
|
||||
if (viewportWidth === 0 || viewportHeight === 0) return;
|
||||
|
||||
const scrollWidth = menuRef.scrollWidth;
|
||||
const scrollHeight = menuRef.scrollHeight;
|
||||
|
||||
const availableWidth = Math.max(0, viewportWidth - VIEWPORT_GUTTER * 2);
|
||||
const constrainedMaxWidth = Math.min(MENU_MAX_WIDTH, availableWidth || MENU_MAX_WIDTH);
|
||||
const safeMaxWidth =
|
||||
constrainedMaxWidth > 0 ? constrainedMaxWidth : Math.min(MENU_MAX_WIDTH, viewportWidth);
|
||||
const desiredMinWidth = Math.min(160, safeMaxWidth || 160);
|
||||
|
||||
let width = lockedWidth;
|
||||
if (width === null) {
|
||||
const naturalWidth = Math.min(scrollWidth, safeMaxWidth);
|
||||
const baseWidth = Math.max(triggerRect.width, naturalWidth, desiredMinWidth);
|
||||
width = Math.min(baseWidth, safeMaxWidth || baseWidth);
|
||||
lockedWidth = width;
|
||||
} else {
|
||||
width = Math.min(Math.max(width, desiredMinWidth), safeMaxWidth || width);
|
||||
}
|
||||
|
||||
if (width > 0) {
|
||||
menuRef.style.width = `${width}px`;
|
||||
}
|
||||
|
||||
const availableBelow = Math.max(
|
||||
0,
|
||||
viewportHeight - VIEWPORT_GUTTER - triggerRect.bottom - MENU_OFFSET
|
||||
);
|
||||
const availableAbove = Math.max(0, triggerRect.top - VIEWPORT_GUTTER - MENU_OFFSET);
|
||||
const viewportAllowance = Math.max(0, viewportHeight - VIEWPORT_GUTTER * 2);
|
||||
const fallbackAllowance = Math.max(1, viewportAllowance > 0 ? viewportAllowance : scrollHeight);
|
||||
|
||||
function computePlacement(placement: 'top' | 'bottom') {
|
||||
const available = placement === 'bottom' ? availableBelow : availableAbove;
|
||||
const allowedHeight =
|
||||
available > 0 ? Math.min(available, fallbackAllowance) : fallbackAllowance;
|
||||
const maxHeight = Math.min(scrollHeight, allowedHeight);
|
||||
const height = Math.max(0, maxHeight);
|
||||
|
||||
let top: number;
|
||||
if (placement === 'bottom') {
|
||||
const rawTop = triggerRect.bottom + MENU_OFFSET;
|
||||
const minTop = VIEWPORT_GUTTER;
|
||||
const maxTop = viewportHeight - VIEWPORT_GUTTER - height;
|
||||
if (maxTop < minTop) {
|
||||
top = minTop;
|
||||
} else {
|
||||
top = Math.min(Math.max(rawTop, minTop), maxTop);
|
||||
}
|
||||
} else {
|
||||
const rawTop = triggerRect.top - MENU_OFFSET - height;
|
||||
const minTop = VIEWPORT_GUTTER;
|
||||
const maxTop = viewportHeight - VIEWPORT_GUTTER - height;
|
||||
if (maxTop < minTop) {
|
||||
top = minTop;
|
||||
} else {
|
||||
top = Math.max(Math.min(rawTop, maxTop), minTop);
|
||||
}
|
||||
}
|
||||
|
||||
return { placement, top, height, maxHeight };
|
||||
}
|
||||
|
||||
const belowMetrics = computePlacement('bottom');
|
||||
const aboveMetrics = computePlacement('top');
|
||||
|
||||
let metrics = belowMetrics;
|
||||
if (scrollHeight > belowMetrics.maxHeight && aboveMetrics.maxHeight > belowMetrics.maxHeight) {
|
||||
metrics = aboveMetrics;
|
||||
}
|
||||
|
||||
menuRef.style.maxHeight = metrics.maxHeight > 0 ? `${Math.round(metrics.maxHeight)}px` : '';
|
||||
|
||||
let left = triggerRect.right - width;
|
||||
const maxLeft = viewportWidth - VIEWPORT_GUTTER - width;
|
||||
if (maxLeft < VIEWPORT_GUTTER) {
|
||||
left = VIEWPORT_GUTTER;
|
||||
} else {
|
||||
if (left > maxLeft) {
|
||||
left = maxLeft;
|
||||
}
|
||||
if (left < VIEWPORT_GUTTER) {
|
||||
left = VIEWPORT_GUTTER;
|
||||
}
|
||||
}
|
||||
|
||||
menuPosition = {
|
||||
top: Math.round(metrics.top),
|
||||
left: Math.round(left),
|
||||
width: Math.round(width),
|
||||
placement: metrics.placement,
|
||||
maxHeight: Math.round(metrics.maxHeight)
|
||||
};
|
||||
}
|
||||
|
||||
function getDisplayOption(): ModelOption | undefined {
|
||||
if (activeId) {
|
||||
return options.find((option) => option.id === activeId);
|
||||
}
|
||||
|
||||
return options[0];
|
||||
}
|
||||
</script>
|
||||
|
||||
<svelte:window onresize={handleResize} />
|
||||
|
||||
<svelte:document onpointerdown={handlePointerDown} onkeydown={handleKeydown} />
|
||||
|
||||
<div
|
||||
class={cn('relative z-10 flex max-w-[200px] min-w-[120px] flex-col items-end gap-1', className)}
|
||||
bind:this={container}
|
||||
>
|
||||
{#if loading && options.length === 0 && !isMounted}
|
||||
<div class="flex items-center gap-2 text-xs text-muted-foreground">
|
||||
<Loader2 class="h-4 w-4 animate-spin" />
|
||||
Loading models…
|
||||
</div>
|
||||
{:else if options.length === 0}
|
||||
<p class="text-xs text-muted-foreground">No models available.</p>
|
||||
{:else}
|
||||
{@const selectedOption = getDisplayOption()}
|
||||
|
||||
<div class="relative w-full">
|
||||
<button
|
||||
type="button"
|
||||
class={cn(
|
||||
'flex w-full items-center justify-end gap-2 rounded-md px-2 py-1 text-sm text-muted-foreground transition hover:text-foreground focus:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-60',
|
||||
isOpen ? 'text-foreground' : ''
|
||||
)}
|
||||
aria-haspopup="listbox"
|
||||
aria-expanded={isOpen}
|
||||
onclick={toggleOpen}
|
||||
bind:this={triggerButton}
|
||||
disabled={loading || updating}
|
||||
>
|
||||
<span class="max-w-[160px] truncate text-right font-medium">
|
||||
{selectedOption?.name || 'Select model'}
|
||||
</span>
|
||||
|
||||
{#if updating}
|
||||
<Loader2 class="h-3.5 w-3.5 animate-spin text-muted-foreground" />
|
||||
{:else}
|
||||
<ChevronDown
|
||||
class={cn(
|
||||
'h-4 w-4 text-muted-foreground transition-transform',
|
||||
isOpen ? 'rotate-180 text-foreground' : ''
|
||||
)}
|
||||
/>
|
||||
{/if}
|
||||
</button>
|
||||
|
||||
{#if isOpen}
|
||||
<div
|
||||
bind:this={menuRef}
|
||||
use:portalToBody
|
||||
class={cn(
|
||||
'fixed z-[1000] overflow-hidden rounded-md border bg-popover shadow-lg transition-opacity',
|
||||
menuPosition ? 'opacity-100' : 'pointer-events-none opacity-0'
|
||||
)}
|
||||
role="listbox"
|
||||
style:top={menuPosition ? `${menuPosition.top}px` : undefined}
|
||||
style:left={menuPosition ? `${menuPosition.left}px` : undefined}
|
||||
style:width={menuPosition ? `${menuPosition.width}px` : undefined}
|
||||
data-placement={menuPosition?.placement ?? 'bottom'}
|
||||
>
|
||||
<div
|
||||
class="overflow-y-auto py-1"
|
||||
style:max-height={menuPosition && menuPosition.maxHeight > 0
|
||||
? `${menuPosition.maxHeight}px`
|
||||
: undefined}
|
||||
>
|
||||
{#each options as option (option.id)}
|
||||
<button
|
||||
type="button"
|
||||
class={cn(
|
||||
'flex w-full flex-col items-start gap-0.5 px-3 py-2 text-left text-sm transition hover:bg-muted focus:bg-muted focus:outline-none',
|
||||
option.id === selectedOption?.id ? 'bg-accent text-accent-foreground' : ''
|
||||
)}
|
||||
role="option"
|
||||
aria-selected={option.id === selectedOption?.id}
|
||||
onclick={() => handleOptionSelect(option.id)}
|
||||
>
|
||||
<span class="block w-full truncate font-medium" title={option.name}>
|
||||
{option.name}
|
||||
</span>
|
||||
|
||||
{#if option.description}
|
||||
<span class="text-xs text-muted-foreground">{option.description}</span>
|
||||
{/if}
|
||||
</button>
|
||||
{/each}
|
||||
</div>
|
||||
</div>
|
||||
{/if}
|
||||
</div>
|
||||
{/if}
|
||||
|
||||
{#if error}
|
||||
<p class="text-xs text-destructive">{error}</p>
|
||||
{/if}
|
||||
</div>
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue