Merge remote-tracking branch 'upstream/master' into gpu-sampling
This commit is contained in:
commit
3e9a258c14
|
|
@ -66,14 +66,21 @@ jobs:
|
|||
id: pack_artifacts
|
||||
run: |
|
||||
cp LICENSE ./build/bin/
|
||||
zip -r llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.zip ./build/bin/*
|
||||
zip -y -r llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.zip ./build/bin/*
|
||||
tar -czvf llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.tar.gz -C ./build/bin .
|
||||
|
||||
- name: Upload artifacts
|
||||
- name: Upload artifacts (zip)
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
path: llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.zip
|
||||
name: llama-bin-macos-arm64.zip
|
||||
|
||||
- name: Upload artifacts (tar)
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
path: llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.tar.gz
|
||||
name: llama-bin-macos-arm64.tar.gz
|
||||
|
||||
macOS-x64:
|
||||
runs-on: macos-15-intel
|
||||
|
||||
|
|
@ -120,14 +127,21 @@ jobs:
|
|||
id: pack_artifacts
|
||||
run: |
|
||||
cp LICENSE ./build/bin/
|
||||
zip -r llama-${{ steps.tag.outputs.name }}-bin-macos-x64.zip ./build/bin/*
|
||||
zip -y -r llama-${{ steps.tag.outputs.name }}-bin-macos-x64.zip ./build/bin/*
|
||||
tar -czvf llama-${{ steps.tag.outputs.name }}-bin-macos-x64.tar.gz -C ./build/bin .
|
||||
|
||||
- name: Upload artifacts
|
||||
- name: Upload artifacts (zip)
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
path: llama-${{ steps.tag.outputs.name }}-bin-macos-x64.zip
|
||||
name: llama-bin-macos-x64.zip
|
||||
|
||||
- name: Upload artifacts (tar)
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
path: llama-${{ steps.tag.outputs.name }}-bin-macos-x64.tar.gz
|
||||
name: llama-bin-macos-x64.tar.gz
|
||||
|
||||
ubuntu-22-cpu:
|
||||
strategy:
|
||||
matrix:
|
||||
|
|
@ -182,14 +196,21 @@ jobs:
|
|||
id: pack_artifacts
|
||||
run: |
|
||||
cp LICENSE ./build/bin/
|
||||
zip -r llama-${{ steps.tag.outputs.name }}-bin-ubuntu-${{ matrix.build }}.zip ./build/bin/*
|
||||
zip -y -r llama-${{ steps.tag.outputs.name }}-bin-ubuntu-${{ matrix.build }}.zip ./build/bin/*
|
||||
tar -czvf llama-${{ steps.tag.outputs.name }}-bin-ubuntu-${{ matrix.build }}.tar.gz -C ./build/bin .
|
||||
|
||||
- name: Upload artifacts
|
||||
- name: Upload artifacts (zip)
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-${{ matrix.build }}.zip
|
||||
name: llama-bin-ubuntu-${{ matrix.build }}.zip
|
||||
|
||||
- name: Upload artifacts (tar)
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-${{ matrix.build }}.tar.gz
|
||||
name: llama-bin-ubuntu-${{ matrix.build }}.tar.gz
|
||||
|
||||
ubuntu-22-vulkan:
|
||||
runs-on: ubuntu-22.04
|
||||
|
||||
|
|
@ -235,14 +256,21 @@ jobs:
|
|||
id: pack_artifacts
|
||||
run: |
|
||||
cp LICENSE ./build/bin/
|
||||
zip -r llama-${{ steps.tag.outputs.name }}-bin-ubuntu-vulkan-x64.zip ./build/bin/*
|
||||
zip -y -r llama-${{ steps.tag.outputs.name }}-bin-ubuntu-vulkan-x64.zip ./build/bin/*
|
||||
tar -czvf llama-${{ steps.tag.outputs.name }}-bin-ubuntu-vulkan-x64.tar.gz -C ./build/bin .
|
||||
|
||||
- name: Upload artifacts
|
||||
- name: Upload artifacts (zip)
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-vulkan-x64.zip
|
||||
name: llama-bin-ubuntu-vulkan-x64.zip
|
||||
|
||||
- name: Upload artifacts (tar)
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-vulkan-x64.tar.gz
|
||||
name: llama-bin-ubuntu-vulkan-x64.tar.gz
|
||||
|
||||
windows-cpu:
|
||||
runs-on: windows-2025
|
||||
|
||||
|
|
@ -298,7 +326,7 @@ jobs:
|
|||
run: |
|
||||
Copy-Item $env:CURL_PATH\bin\libcurl-${{ matrix.arch }}.dll .\build\bin\Release\
|
||||
Copy-Item "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Redist\MSVC\14.44.35112\debug_nonredist\${{ matrix.arch }}\Microsoft.VC143.OpenMP.LLVM\libomp140.${{ matrix.arch == 'x64' && 'x86_64' || 'aarch64' }}.dll" .\build\bin\Release\
|
||||
7z a llama-bin-win-cpu-${{ matrix.arch }}.zip .\build\bin\Release\*
|
||||
7z a -snl llama-bin-win-cpu-${{ matrix.arch }}.zip .\build\bin\Release\*
|
||||
|
||||
- name: Upload artifacts
|
||||
uses: actions/upload-artifact@v4
|
||||
|
|
@ -380,7 +408,7 @@ jobs:
|
|||
- name: Pack artifacts
|
||||
id: pack_artifacts
|
||||
run: |
|
||||
7z a llama-bin-win-${{ matrix.backend }}-${{ matrix.arch }}.zip .\build\bin\Release\${{ matrix.target }}.dll
|
||||
7z a -snl llama-bin-win-${{ matrix.backend }}-${{ matrix.arch }}.zip .\build\bin\Release\${{ matrix.target }}.dll
|
||||
|
||||
- name: Upload artifacts
|
||||
uses: actions/upload-artifact@v4
|
||||
|
|
@ -434,7 +462,7 @@ jobs:
|
|||
- name: Pack artifacts
|
||||
id: pack_artifacts
|
||||
run: |
|
||||
7z a llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip .\build\bin\Release\ggml-cuda.dll
|
||||
7z a -snl llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip .\build\bin\Release\ggml-cuda.dll
|
||||
|
||||
- name: Upload artifacts
|
||||
uses: actions/upload-artifact@v4
|
||||
|
|
@ -526,7 +554,7 @@ jobs:
|
|||
cp "${{ env.ONEAPI_ROOT }}/umf/latest/bin/umf.dll" ./build/bin
|
||||
|
||||
echo "cp oneAPI running time dll files to ./build/bin done"
|
||||
7z a llama-bin-win-sycl-x64.zip ./build/bin/*
|
||||
7z a -snl llama-bin-win-sycl-x64.zip ./build/bin/*
|
||||
|
||||
- name: Upload the release package
|
||||
uses: actions/upload-artifact@v4
|
||||
|
|
@ -632,7 +660,7 @@ jobs:
|
|||
- name: Pack artifacts
|
||||
id: pack_artifacts
|
||||
run: |
|
||||
7z a llama-bin-win-hip-${{ matrix.name }}-x64.zip .\build\bin\*
|
||||
7z a -snl llama-bin-win-hip-${{ matrix.name }}-x64.zip .\build\bin\*
|
||||
|
||||
- name: Upload artifacts
|
||||
uses: actions/upload-artifact@v4
|
||||
|
|
@ -685,13 +713,20 @@ jobs:
|
|||
- name: Pack artifacts
|
||||
id: pack_artifacts
|
||||
run: |
|
||||
zip --symlinks -r llama-${{ steps.tag.outputs.name }}-xcframework.zip build-apple/llama.xcframework
|
||||
zip -y -r llama-${{ steps.tag.outputs.name }}-xcframework.zip build-apple/llama.xcframework
|
||||
tar -czvf llama-${{ steps.tag.outputs.name }}-xcframework.tar.gz -C build-apple llama.xcframework
|
||||
|
||||
- name: Upload artifacts
|
||||
- name: Upload artifacts (zip)
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
path: llama-${{ steps.tag.outputs.name }}-xcframework.zip
|
||||
name: llama-${{ steps.tag.outputs.name }}-xcframework
|
||||
name: llama-${{ steps.tag.outputs.name }}-xcframework.zip
|
||||
|
||||
- name: Upload artifacts (tar)
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
path: llama-${{ steps.tag.outputs.name }}-xcframework.tar.gz
|
||||
name: llama-${{ steps.tag.outputs.name }}-xcframework.tar.gz
|
||||
|
||||
openEuler-cann:
|
||||
strategy:
|
||||
|
|
@ -730,14 +765,21 @@ jobs:
|
|||
- name: Pack artifacts
|
||||
run: |
|
||||
cp LICENSE ./build/bin/
|
||||
zip -r llama-${{ steps.tag.outputs.name }}-bin-${{ matrix.chip_type }}-openEuler-${{ matrix.arch }}.zip ./build/bin/*
|
||||
zip -y -r llama-${{ steps.tag.outputs.name }}-bin-${{ matrix.chip_type }}-openEuler-${{ matrix.arch }}.zip ./build/bin/*
|
||||
tar -czvf llama-${{ steps.tag.outputs.name }}-bin-${{ matrix.chip_type }}-openEuler-${{ matrix.arch }}.tar.gz -C ./build/bin .
|
||||
|
||||
- name: Upload artifacts
|
||||
- name: Upload artifacts (zip)
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
path: llama-${{ steps.tag.outputs.name }}-bin-${{ matrix.chip_type }}-openEuler-${{ matrix.arch }}.zip
|
||||
name: llama-bin-${{ matrix.chip_type }}-openEuler-${{ matrix.arch }}.zip
|
||||
|
||||
- name: Upload artifacts (tar)
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
path: llama-${{ steps.tag.outputs.name }}-bin-${{ matrix.chip_type }}-openEuler-${{ matrix.arch }}.tar.gz
|
||||
name: llama-bin-${{ matrix.chip_type }}-openEuler-${{ matrix.arch }}.tar.gz
|
||||
|
||||
release:
|
||||
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
||||
|
||||
|
|
@ -814,6 +856,7 @@ jobs:
|
|||
|
||||
echo "Moving other artifacts..."
|
||||
mv -v artifact/*.zip release
|
||||
mv -v artifact/*.tar.gz release
|
||||
|
||||
- name: Create release
|
||||
id: create_release
|
||||
|
|
@ -822,6 +865,39 @@ jobs:
|
|||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
with:
|
||||
tag_name: ${{ steps.tag.outputs.name }}
|
||||
body: |
|
||||
> [!WARNING]
|
||||
> **Release Format Update**: Linux releases will soon use .tar.gz archives instead of .zip. Please make the necessary changes to your deployment scripts.
|
||||
|
||||
**macOS/iOS:**
|
||||
- [macOS Apple Silicon (arm64)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.tar.gz)
|
||||
- [macOS Intel (x64)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-macos-x64.tar.gz)
|
||||
- [iOS XCFramework](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-xcframework.tar.gz)
|
||||
|
||||
**Linux:**
|
||||
- [Ubuntu x64 (CPU)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-ubuntu-x64.tar.gz)
|
||||
- [Ubuntu x64 (Vulkan)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-ubuntu-vulkan-x64.tar.gz)
|
||||
- [Ubuntu s390x (CPU)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-ubuntu-s390x.tar.gz)
|
||||
|
||||
**Windows:**
|
||||
- [Windows x64 (CPU)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-win-cpu-x64.zip)
|
||||
- [Windows arm64 (CPU)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-win-cpu-arm64.zip)
|
||||
- [Windows x64 (CUDA)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-win-cuda-12.4-x64.zip)
|
||||
- [Windows x64 (Vulkan)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-win-vulkan-x64.zip)
|
||||
- [Windows x64 (SYCL)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-win-sycl-x64.zip)
|
||||
- [Windows x64 (HIP)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-win-hip-radeon-x64.zip)
|
||||
|
||||
**openEuler:**
|
||||
- [openEuler x86 (310p)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-310p-openEuler-x86.tar.gz)
|
||||
- [openEuler x86 (910b)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-910b-openEuler-x86.tar.gz)
|
||||
- [openEuler aarch64 (310p)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-310p-openEuler-aarch64.tar.gz)
|
||||
- [openEuler aarch64 (910b)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-910b-openEuler-aarch64.tar.gz)
|
||||
|
||||
<details>
|
||||
|
||||
${{ github.event.head_commit.message }}
|
||||
|
||||
</details>
|
||||
|
||||
- name: Upload release
|
||||
id: upload_release
|
||||
|
|
@ -833,7 +909,7 @@ jobs:
|
|||
const fs = require('fs');
|
||||
const release_id = '${{ steps.create_release.outputs.id }}';
|
||||
for (let file of await fs.readdirSync('./release')) {
|
||||
if (path.extname(file) === '.zip') {
|
||||
if (path.extname(file) === '.zip' || file.endsWith('.tar.gz')) {
|
||||
console.log('uploadReleaseAsset', file);
|
||||
await github.repos.uploadReleaseAsset({
|
||||
owner: context.repo.owner,
|
||||
|
|
|
|||
|
|
@ -613,3 +613,4 @@ $ echo "source ~/.llama-completion.bash" >> ~/.bashrc
|
|||
- [linenoise.cpp](./tools/run/linenoise.cpp/linenoise.cpp) - C++ library that provides readline-like line editing capabilities, used by `llama-run` - BSD 2-Clause License
|
||||
- [curl](https://curl.se/) - Client-side URL transfer library, used by various tools/examples - [CURL License](https://curl.se/docs/copyright.html)
|
||||
- [miniaudio.h](https://github.com/mackron/miniaudio) - Single-header audio format decoder, used by multimodal subsystem - Public domain
|
||||
- [subprocess.h](https://github.com/sheredom/subprocess.h) - Single-header process launching solution for C and C++ - Public domain
|
||||
|
|
|
|||
|
|
@ -212,13 +212,13 @@ struct handle_model_result {
|
|||
static handle_model_result common_params_handle_model(
|
||||
struct common_params_model & model,
|
||||
const std::string & bearer_token,
|
||||
const std::string & model_path_default,
|
||||
bool offline) {
|
||||
handle_model_result result;
|
||||
// handle pre-fill default model path and url based on hf_repo and hf_file
|
||||
{
|
||||
if (!model.docker_repo.empty()) { // Handle Docker URLs by resolving them to local paths
|
||||
model.path = common_docker_resolve_model(model.docker_repo);
|
||||
model.name = model.docker_repo; // set name for consistency
|
||||
} else if (!model.hf_repo.empty()) {
|
||||
// short-hand to avoid specifying --hf-file -> default it to --model
|
||||
if (model.hf_file.empty()) {
|
||||
|
|
@ -227,7 +227,8 @@ static handle_model_result common_params_handle_model(
|
|||
if (auto_detected.repo.empty() || auto_detected.ggufFile.empty()) {
|
||||
exit(1); // built without CURL, error message already printed
|
||||
}
|
||||
model.hf_repo = auto_detected.repo;
|
||||
model.name = model.hf_repo; // repo name with tag
|
||||
model.hf_repo = auto_detected.repo; // repo name without tag
|
||||
model.hf_file = auto_detected.ggufFile;
|
||||
if (!auto_detected.mmprojFile.empty()) {
|
||||
result.found_mmproj = true;
|
||||
|
|
@ -257,8 +258,6 @@ static handle_model_result common_params_handle_model(
|
|||
model.path = fs_get_cache_file(string_split<std::string>(f, '/').back());
|
||||
}
|
||||
|
||||
} else if (model.path.empty()) {
|
||||
model.path = model_path_default;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -405,7 +404,7 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context
|
|||
|
||||
// handle model and download
|
||||
{
|
||||
auto res = common_params_handle_model(params.model, params.hf_token, DEFAULT_MODEL_PATH, params.offline);
|
||||
auto res = common_params_handle_model(params.model, params.hf_token, params.offline);
|
||||
if (params.no_mmproj) {
|
||||
params.mmproj = {};
|
||||
} else if (res.found_mmproj && params.mmproj.path.empty() && params.mmproj.url.empty()) {
|
||||
|
|
@ -415,12 +414,18 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context
|
|||
// only download mmproj if the current example is using it
|
||||
for (auto & ex : mmproj_examples) {
|
||||
if (ctx_arg.ex == ex) {
|
||||
common_params_handle_model(params.mmproj, params.hf_token, "", params.offline);
|
||||
common_params_handle_model(params.mmproj, params.hf_token, params.offline);
|
||||
break;
|
||||
}
|
||||
}
|
||||
common_params_handle_model(params.speculative.model, params.hf_token, "", params.offline);
|
||||
common_params_handle_model(params.vocoder.model, params.hf_token, "", params.offline);
|
||||
common_params_handle_model(params.speculative.model, params.hf_token, params.offline);
|
||||
common_params_handle_model(params.vocoder.model, params.hf_token, params.offline);
|
||||
}
|
||||
|
||||
// model is required (except for server)
|
||||
// TODO @ngxson : maybe show a list of available models in CLI in this case
|
||||
if (params.model.path.empty() && ctx_arg.ex != LLAMA_EXAMPLE_SERVER) {
|
||||
throw std::invalid_argument("error: --model is required\n");
|
||||
}
|
||||
|
||||
if (params.escape) {
|
||||
|
|
@ -2097,11 +2102,8 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|||
add_opt(common_arg(
|
||||
{"-m", "--model"}, "FNAME",
|
||||
ex == LLAMA_EXAMPLE_EXPORT_LORA
|
||||
? std::string("model path from which to load base model")
|
||||
: string_format(
|
||||
"model path (default: `models/$filename` with filename from `--hf-file` "
|
||||
"or `--model-url` if set, otherwise %s)", DEFAULT_MODEL_PATH
|
||||
),
|
||||
? "model path from which to load base model"
|
||||
: "model path to load",
|
||||
[](common_params & params, const std::string & value) {
|
||||
params.model.path = value;
|
||||
}
|
||||
|
|
@ -2499,6 +2501,27 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|||
}
|
||||
}
|
||||
).set_examples({LLAMA_EXAMPLE_SERVER}));
|
||||
add_opt(common_arg(
|
||||
{"--models-dir"}, "PATH",
|
||||
"directory containing models for the router server (default: disabled)",
|
||||
[](common_params & params, const std::string & value) {
|
||||
params.models_dir = value;
|
||||
}
|
||||
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_MODELS_DIR"));
|
||||
add_opt(common_arg(
|
||||
{"--models-max"}, "N",
|
||||
string_format("for router server, maximum number of models to load simultaneously (default: %d, 0 = unlimited)", params.models_max),
|
||||
[](common_params & params, int value) {
|
||||
params.models_max = value;
|
||||
}
|
||||
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_MODELS_MAX"));
|
||||
add_opt(common_arg(
|
||||
{"--no-models-autoload"},
|
||||
"disables automatic loading of models (default: enabled)",
|
||||
[](common_params & params) {
|
||||
params.models_autoload = false;
|
||||
}
|
||||
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_NO_MODELS_AUTOLOAD"));
|
||||
add_opt(common_arg(
|
||||
{"--jinja"},
|
||||
string_format("use jinja template for chat (default: %s)\n", params.use_jinja ? "enabled" : "disabled"),
|
||||
|
|
@ -2681,7 +2704,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|||
).set_env("LLAMA_OFFLINE"));
|
||||
add_opt(common_arg(
|
||||
{"-lv", "--verbosity", "--log-verbosity"}, "N",
|
||||
"Set the verbosity threshold. Messages with a higher verbosity will be ignored.",
|
||||
string_format("Set the verbosity threshold. Messages with a higher verbosity will be ignored. Values:\n"
|
||||
" - 0: generic output\n"
|
||||
" - 1: error\n"
|
||||
" - 2: warning\n"
|
||||
" - 3: info\n"
|
||||
" - 4: debug\n"
|
||||
"(default: %d)\n", params.verbosity),
|
||||
[](common_params & params, int value) {
|
||||
params.verbosity = value;
|
||||
common_log_set_verbosity_thold(value);
|
||||
|
|
|
|||
|
|
@ -912,7 +912,7 @@ std::string fs_get_cache_file(const std::string & filename) {
|
|||
return cache_directory + filename;
|
||||
}
|
||||
|
||||
std::vector<common_file_info> fs_list_files(const std::string & path) {
|
||||
std::vector<common_file_info> fs_list(const std::string & path, bool include_directories) {
|
||||
std::vector<common_file_info> files;
|
||||
if (path.empty()) return files;
|
||||
|
||||
|
|
@ -929,12 +929,20 @@ std::vector<common_file_info> fs_list_files(const std::string & path) {
|
|||
common_file_info info;
|
||||
info.path = p.string();
|
||||
info.name = p.filename().string();
|
||||
info.is_dir = false;
|
||||
try {
|
||||
info.size = static_cast<size_t>(std::filesystem::file_size(p));
|
||||
} catch (const std::filesystem::filesystem_error &) {
|
||||
info.size = 0;
|
||||
}
|
||||
files.push_back(std::move(info));
|
||||
} else if (include_directories && std::filesystem::is_directory(p)) {
|
||||
common_file_info info;
|
||||
info.path = p.string();
|
||||
info.name = p.filename().string();
|
||||
info.size = 0; // Directories have no size
|
||||
info.is_dir = true;
|
||||
files.push_back(std::move(info));
|
||||
}
|
||||
} catch (const std::filesystem::filesystem_error &) {
|
||||
// skip entries we cannot inspect
|
||||
|
|
|
|||
|
|
@ -26,8 +26,6 @@
|
|||
fprintf(stderr, "%s: built with %s for %s\n", __func__, LLAMA_COMPILER, LLAMA_BUILD_TARGET); \
|
||||
} while(0)
|
||||
|
||||
#define DEFAULT_MODEL_PATH "models/7B/ggml-model-f16.gguf"
|
||||
|
||||
struct common_time_meas {
|
||||
common_time_meas(int64_t & t_acc, bool disable = false);
|
||||
~common_time_meas();
|
||||
|
|
@ -228,6 +226,7 @@ struct common_params_model {
|
|||
std::string hf_repo = ""; // HF repo // NOLINT
|
||||
std::string hf_file = ""; // HF file // NOLINT
|
||||
std::string docker_repo = ""; // Docker repo // NOLINT
|
||||
std::string name = ""; // in format <user>/<model>[:<tag>] (tag is optional) // NOLINT
|
||||
};
|
||||
|
||||
struct common_params_speculative {
|
||||
|
|
@ -374,7 +373,7 @@ struct common_params {
|
|||
|
||||
std::vector<common_control_vector_load_info> control_vectors; // control vector with user defined scale
|
||||
|
||||
int32_t verbosity = 0;
|
||||
int32_t verbosity = 3; // LOG_LEVEL_INFO
|
||||
int32_t control_vector_layer_start = -1; // layer range for control vector
|
||||
int32_t control_vector_layer_end = -1; // layer range for control vector
|
||||
bool offline = false;
|
||||
|
|
@ -483,6 +482,11 @@ struct common_params {
|
|||
bool endpoint_props = false; // only control POST requests, not GET
|
||||
bool endpoint_metrics = false;
|
||||
|
||||
// router server configs
|
||||
std::string models_dir = ""; // directory containing models for the router server
|
||||
int models_max = 4; // maximum number of models to load simultaneously
|
||||
bool models_autoload = true; // automatically load models when requested via the router server
|
||||
|
||||
bool log_json = false;
|
||||
|
||||
std::string slot_save_path;
|
||||
|
|
@ -646,8 +650,9 @@ struct common_file_info {
|
|||
std::string path;
|
||||
std::string name;
|
||||
size_t size = 0; // in bytes
|
||||
bool is_dir = false;
|
||||
};
|
||||
std::vector<common_file_info> fs_list_files(const std::string & path);
|
||||
std::vector<common_file_info> fs_list(const std::string & path, bool include_directories);
|
||||
|
||||
//
|
||||
// Model utils
|
||||
|
|
|
|||
|
|
@ -430,7 +430,7 @@ std::pair<long, std::vector<char>> common_remote_get_content(const std::string &
|
|||
curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str());
|
||||
curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 1L);
|
||||
curl_easy_setopt(curl.get(), CURLOPT_FOLLOWLOCATION, 1L);
|
||||
curl_easy_setopt(curl.get(), CURLOPT_VERBOSE, 1L);
|
||||
curl_easy_setopt(curl.get(), CURLOPT_VERBOSE, 0L);
|
||||
typedef size_t(*CURLOPT_WRITEFUNCTION_PTR)(void * ptr, size_t size, size_t nmemb, void * data);
|
||||
auto write_callback = [](void * ptr, size_t size, size_t nmemb, void * data) -> size_t {
|
||||
auto data_vec = static_cast<std::vector<char> *>(data);
|
||||
|
|
@ -1054,7 +1054,7 @@ std::string common_docker_resolve_model(const std::string &) {
|
|||
std::vector<common_cached_model_info> common_list_cached_models() {
|
||||
std::vector<common_cached_model_info> models;
|
||||
const std::string cache_dir = fs_get_cache_directory();
|
||||
const std::vector<common_file_info> files = fs_list_files(cache_dir);
|
||||
const std::vector<common_file_info> files = fs_list(cache_dir, false);
|
||||
for (const auto & file : files) {
|
||||
if (string_starts_with(file.name, "manifest=") && string_ends_with(file.name, ".json")) {
|
||||
common_cached_model_info model_info;
|
||||
|
|
|
|||
|
|
@ -14,8 +14,10 @@ struct common_cached_model_info {
|
|||
std::string model;
|
||||
std::string tag;
|
||||
size_t size = 0; // GGUF size in bytes
|
||||
// return string representation like "user/model:tag"
|
||||
// if tag is "latest", it will be omitted
|
||||
std::string to_string() const {
|
||||
return user + "/" + model + ":" + tag;
|
||||
return user + "/" + model + (tag == "latest" ? "" : ":" + tag);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -443,8 +443,22 @@ void common_log_set_timestamps(struct common_log * log, bool timestamps) {
|
|||
log->set_timestamps(timestamps);
|
||||
}
|
||||
|
||||
static int common_get_verbosity(enum ggml_log_level level) {
|
||||
switch (level) {
|
||||
case GGML_LOG_LEVEL_DEBUG: return LOG_LEVEL_DEBUG;
|
||||
case GGML_LOG_LEVEL_INFO: return LOG_LEVEL_INFO;
|
||||
case GGML_LOG_LEVEL_WARN: return LOG_LEVEL_WARN;
|
||||
case GGML_LOG_LEVEL_ERROR: return LOG_LEVEL_ERROR;
|
||||
case GGML_LOG_LEVEL_CONT: return LOG_LEVEL_INFO; // same as INFO
|
||||
case GGML_LOG_LEVEL_NONE:
|
||||
default:
|
||||
return LOG_LEVEL_OUTPUT;
|
||||
}
|
||||
}
|
||||
|
||||
void common_log_default_callback(enum ggml_log_level level, const char * text, void * /*user_data*/) {
|
||||
if (LOG_DEFAULT_LLAMA <= common_log_verbosity_thold) {
|
||||
auto verbosity = common_get_verbosity(level);
|
||||
if (verbosity <= common_log_verbosity_thold) {
|
||||
common_log_add(common_log_main(), level, "%s", text);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
29
common/log.h
29
common/log.h
|
|
@ -21,8 +21,14 @@
|
|||
# define LOG_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__)))
|
||||
#endif
|
||||
|
||||
#define LOG_DEFAULT_DEBUG 1
|
||||
#define LOG_DEFAULT_LLAMA 0
|
||||
#define LOG_LEVEL_DEBUG 4
|
||||
#define LOG_LEVEL_INFO 3
|
||||
#define LOG_LEVEL_WARN 2
|
||||
#define LOG_LEVEL_ERROR 1
|
||||
#define LOG_LEVEL_OUTPUT 0 // output data from tools
|
||||
|
||||
#define LOG_DEFAULT_DEBUG LOG_LEVEL_DEBUG
|
||||
#define LOG_DEFAULT_LLAMA LOG_LEVEL_INFO
|
||||
|
||||
enum log_colors {
|
||||
LOG_COLORS_AUTO = -1,
|
||||
|
|
@ -67,10 +73,11 @@ void common_log_add(struct common_log * log, enum ggml_log_level level, const ch
|
|||
// 0.00.090.578 I llm_load_tensors: offloading 32 repeating layers to GPU
|
||||
// 0.00.090.579 I llm_load_tensors: offloading non-repeating layers to GPU
|
||||
//
|
||||
// I - info (stdout, V = 0)
|
||||
// W - warning (stderr, V = 0)
|
||||
// E - error (stderr, V = 0)
|
||||
// D - debug (stderr, V = LOG_DEFAULT_DEBUG)
|
||||
// I - info (stdout, V = LOG_DEFAULT_INFO)
|
||||
// W - warning (stderr, V = LOG_DEFAULT_WARN)
|
||||
// E - error (stderr, V = LOG_DEFAULT_ERROR)
|
||||
// O - output (stdout, V = LOG_DEFAULT_OUTPUT)
|
||||
//
|
||||
|
||||
void common_log_set_file (struct common_log * log, const char * file); // not thread-safe
|
||||
|
|
@ -95,14 +102,14 @@ void common_log_set_timestamps(struct common_log * log, bool timestamps); // w
|
|||
} \
|
||||
} while (0)
|
||||
|
||||
#define LOG(...) LOG_TMPL(GGML_LOG_LEVEL_NONE, 0, __VA_ARGS__)
|
||||
#define LOG(...) LOG_TMPL(GGML_LOG_LEVEL_NONE, LOG_LEVEL_OUTPUT, __VA_ARGS__)
|
||||
#define LOGV(verbosity, ...) LOG_TMPL(GGML_LOG_LEVEL_NONE, verbosity, __VA_ARGS__)
|
||||
|
||||
#define LOG_INF(...) LOG_TMPL(GGML_LOG_LEVEL_INFO, 0, __VA_ARGS__)
|
||||
#define LOG_WRN(...) LOG_TMPL(GGML_LOG_LEVEL_WARN, 0, __VA_ARGS__)
|
||||
#define LOG_ERR(...) LOG_TMPL(GGML_LOG_LEVEL_ERROR, 0, __VA_ARGS__)
|
||||
#define LOG_DBG(...) LOG_TMPL(GGML_LOG_LEVEL_DEBUG, LOG_DEFAULT_DEBUG, __VA_ARGS__)
|
||||
#define LOG_CNT(...) LOG_TMPL(GGML_LOG_LEVEL_CONT, 0, __VA_ARGS__)
|
||||
#define LOG_DBG(...) LOG_TMPL(GGML_LOG_LEVEL_DEBUG, LOG_LEVEL_DEBUG, __VA_ARGS__)
|
||||
#define LOG_INF(...) LOG_TMPL(GGML_LOG_LEVEL_INFO, LOG_LEVEL_INFO, __VA_ARGS__)
|
||||
#define LOG_WRN(...) LOG_TMPL(GGML_LOG_LEVEL_WARN, LOG_LEVEL_WARN, __VA_ARGS__)
|
||||
#define LOG_ERR(...) LOG_TMPL(GGML_LOG_LEVEL_ERROR, LOG_LEVEL_ERROR, __VA_ARGS__)
|
||||
#define LOG_CNT(...) LOG_TMPL(GGML_LOG_LEVEL_CONT, LOG_LEVEL_INFO, __VA_ARGS__) // same as INFO
|
||||
|
||||
#define LOG_INFV(verbosity, ...) LOG_TMPL(GGML_LOG_LEVEL_INFO, verbosity, __VA_ARGS__)
|
||||
#define LOG_WRNV(verbosity, ...) LOG_TMPL(GGML_LOG_LEVEL_WARN, verbosity, __VA_ARGS__)
|
||||
|
|
|
|||
15
docs/ops.md
15
docs/ops.md
|
|
@ -21,11 +21,11 @@ Legend:
|
|||
| ADD_ID | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ |
|
||||
| ARANGE | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ |
|
||||
| ARGMAX | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ |
|
||||
| ARGSORT | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | 🟡 | ❌ |
|
||||
| ARGSORT | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ |
|
||||
| CEIL | ❌ | ❌ | ✅ | 🟡 | ❌ | ❌ | 🟡 | 🟡 | ❌ |
|
||||
| CLAMP | ❌ | ✅ | ✅ | ✅ | 🟡 | 🟡 | 🟡 | 🟡 | ❌ |
|
||||
| CONCAT | ❌ | ✅ | ✅ | 🟡 | ✅ | 🟡 | ✅ | ✅ | ❌ |
|
||||
| CONT | ❌ | 🟡 | ✅ | ✅ | ✅ | 🟡 | 🟡 | 🟡 | ❌ |
|
||||
| CONT | ❌ | 🟡 | ✅ | ✅ | ✅ | 🟡 | 🟡 | ✅ | ❌ |
|
||||
| CONV_2D | ❌ | ❌ | ✅ | ✅ | ❌ | ✅ | ❌ | ✅ | ❌ |
|
||||
| CONV_2D_DW | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ |
|
||||
| CONV_3D | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
|
||||
|
|
@ -36,10 +36,10 @@ Legend:
|
|||
| CPY | ❌ | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | ❌ |
|
||||
| CROSS_ENTROPY_LOSS | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ |
|
||||
| CROSS_ENTROPY_LOSS_BACK | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ |
|
||||
| CUMSUM | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
|
||||
| CUMSUM | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ |
|
||||
| DIAG_MASK_INF | ❌ | ✅ | ✅ | ✅ | 🟡 | 🟡 | ✅ | ✅ | ❌ |
|
||||
| DIV | ❌ | ✅ | ✅ | ✅ | 🟡 | 🟡 | ✅ | ✅ | ❌ |
|
||||
| DUP | ❌ | ✅ | ✅ | 🟡 | 🟡 | 🟡 | ✅ | 🟡 | ❌ |
|
||||
| DUP | ❌ | ✅ | ✅ | 🟡 | 🟡 | 🟡 | ✅ | ✅ | ❌ |
|
||||
| ELU | ❌ | ✅ | ✅ | 🟡 | 🟡 | ❌ | ✅ | ❌ | ❌ |
|
||||
| EXP | ❌ | ✅ | ✅ | 🟡 | 🟡 | ❌ | ✅ | 🟡 | ❌ |
|
||||
| EXPM1 | ❌ | ❌ | ✅ | 🟡 | ❌ | ❌ | ❌ | ❌ | ❌ |
|
||||
|
|
@ -102,7 +102,7 @@ Legend:
|
|||
| SOFTPLUS | ❌ | ❌ | ✅ | 🟡 | ❌ | ❌ | ❌ | 🟡 | ❌ |
|
||||
| SOFT_MAX | ❌ | 🟡 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ |
|
||||
| SOFT_MAX_BACK | ❌ | ❌ | 🟡 | 🟡 | ❌ | ❌ | 🟡 | ✅ | ❌ |
|
||||
| SOLVE_TRI | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
|
||||
| SOLVE_TRI | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | 🟡 | ❌ |
|
||||
| SQR | ❌ | ✅ | ✅ | ✅ | 🟡 | ❌ | 🟡 | 🟡 | ❌ |
|
||||
| SQRT | ❌ | ✅ | ✅ | ✅ | 🟡 | ❌ | 🟡 | 🟡 | ❌ |
|
||||
| SSM_CONV | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ |
|
||||
|
|
@ -115,7 +115,8 @@ Legend:
|
|||
| SWIGLU_OAI | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | 🟡 | ❌ |
|
||||
| TANH | ❌ | ✅ | ✅ | 🟡 | 🟡 | ✅ | ✅ | 🟡 | ❌ |
|
||||
| TIMESTEP_EMBEDDING | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ |
|
||||
| TRI | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
|
||||
| TOP_K | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | 🟡 | ❌ |
|
||||
| TRI | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ |
|
||||
| TRUNC | ❌ | ❌ | ✅ | 🟡 | ❌ | ❌ | 🟡 | 🟡 | ❌ |
|
||||
| UPSCALE | ❌ | 🟡 | ✅ | ✅ | 🟡 | ✅ | 🟡 | ✅ | ❌ |
|
||||
| UPSCALE | ❌ | 🟡 | ✅ | ✅ | 🟡 | ✅ | 🟡 | 🟡 | ❌ |
|
||||
| XIELU | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
|
||||
|
|
|
|||
|
|
@ -5005,8 +5005,8 @@
|
|||
"Vulkan0","DUP","type=f16,ne=[10,10,5,1],permute=[0,2,1,3]","support","1","yes","Vulkan"
|
||||
"Vulkan0","DUP","type=f32,ne=[10,10,5,1],permute=[1,0,2,3]","support","1","yes","Vulkan"
|
||||
"Vulkan0","DUP","type=f16,ne=[10,10,5,1],permute=[1,0,2,3]","support","1","yes","Vulkan"
|
||||
"Vulkan0","DUP","type=i16,ne=[10,8,3,1],permute=[0,2,1,3]","support","0","no","Vulkan"
|
||||
"Vulkan0","DUP","type=i16,ne=[10,8,3,1],permute=[1,2,0,3]","support","0","no","Vulkan"
|
||||
"Vulkan0","DUP","type=i16,ne=[10,8,3,1],permute=[0,2,1,3]","support","1","yes","Vulkan"
|
||||
"Vulkan0","DUP","type=i16,ne=[10,8,3,1],permute=[1,2,0,3]","support","1","yes","Vulkan"
|
||||
"Vulkan0","SET","type_src=f32,type_dst=f32,ne=[6,5,4,3],dim=1","support","0","no","Vulkan"
|
||||
"Vulkan0","SET","type_src=f32,type_dst=f32,ne=[6,5,4,3],dim=2","support","0","no","Vulkan"
|
||||
"Vulkan0","SET","type_src=f32,type_dst=f32,ne=[6,5,4,3],dim=3","support","0","no","Vulkan"
|
||||
|
|
@ -5032,14 +5032,14 @@
|
|||
"Vulkan0","CPY","type_src=f16,type_dst=f16,ne=[3,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","CPY","type_src=f16,type_dst=f16,ne=[3,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","CPY","type_src=bf16,type_dst=bf16,ne=[1,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","CPY","type_src=bf16,type_dst=bf16,ne=[1,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","Vulkan"
|
||||
"Vulkan0","CPY","type_src=bf16,type_dst=bf16,ne=[1,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","Vulkan"
|
||||
"Vulkan0","CPY","type_src=bf16,type_dst=bf16,ne=[1,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","CPY","type_src=bf16,type_dst=bf16,ne=[1,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","CPY","type_src=bf16,type_dst=bf16,ne=[2,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","CPY","type_src=bf16,type_dst=bf16,ne=[2,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","Vulkan"
|
||||
"Vulkan0","CPY","type_src=bf16,type_dst=bf16,ne=[2,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","Vulkan"
|
||||
"Vulkan0","CPY","type_src=bf16,type_dst=bf16,ne=[2,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","CPY","type_src=bf16,type_dst=bf16,ne=[2,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","CPY","type_src=bf16,type_dst=bf16,ne=[3,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","CPY","type_src=bf16,type_dst=bf16,ne=[3,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","Vulkan"
|
||||
"Vulkan0","CPY","type_src=bf16,type_dst=bf16,ne=[3,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","Vulkan"
|
||||
"Vulkan0","CPY","type_src=bf16,type_dst=bf16,ne=[3,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","CPY","type_src=bf16,type_dst=bf16,ne=[3,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","CPY","type_src=q4_0,type_dst=q4_0,ne=[32,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","CPY","type_src=q4_0,type_dst=q4_0,ne=[32,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","Vulkan"
|
||||
"Vulkan0","CPY","type_src=q4_0,type_dst=q4_0,ne=[32,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3],_src_transpose=0","support","0","no","Vulkan"
|
||||
|
|
@ -5271,7 +5271,7 @@
|
|||
"Vulkan0","CPY","type_src=bf16,type_dst=f16,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","Vulkan"
|
||||
"Vulkan0","CPY","type_src=bf16,type_dst=f16,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","Vulkan"
|
||||
"Vulkan0","CPY","type_src=bf16,type_dst=bf16,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","CPY","type_src=bf16,type_dst=bf16,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","Vulkan"
|
||||
"Vulkan0","CPY","type_src=bf16,type_dst=bf16,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","CPY","type_src=bf16,type_dst=q4_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","Vulkan"
|
||||
"Vulkan0","CPY","type_src=bf16,type_dst=q4_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","Vulkan"
|
||||
"Vulkan0","CPY","type_src=bf16,type_dst=q4_1,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=0","support","0","no","Vulkan"
|
||||
|
|
@ -5415,21 +5415,49 @@
|
|||
"Vulkan0","CPY","type_src=f16,type_dst=f16,ne=[256,4,3,1],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","CPY","type_src=f32,type_dst=f32,ne=[256,4,3,1],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","CPY","type_src=f32,type_dst=f32,ne=[256,4,3,3],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","CPY","type_src=bf16,type_dst=bf16,ne=[256,4,3,1],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=1","support","0","no","Vulkan"
|
||||
"Vulkan0","CPY","type_src=bf16,type_dst=bf16,ne=[256,4,3,1],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","CPY","type_src=f16,type_dst=f16,ne=[256,4,1,1],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","CPY","type_src=f32,type_dst=f32,ne=[256,4,1,1],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","CPY","type_src=bf16,type_dst=bf16,ne=[256,4,1,1],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=1","support","0","no","Vulkan"
|
||||
"Vulkan0","CPY","type_src=bf16,type_dst=bf16,ne=[256,4,1,1],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","CPY","type_src=i32,type_dst=i32,ne=[256,4,1,1],permute_src=[0,0,0,0],permute_dst=[0,0,0,0],_src_transpose=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","CPY","type_src=i32,type_dst=i32,ne=[256,1,4,1],permute_src=[1,2,0,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","CPY","type_src=f32,type_dst=f32,ne=[256,1,4,1],permute_src=[1,2,0,3],permute_dst=[0,0,0,0],_src_transpose=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=f32,ne=[10,10,10,1]","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=f32,ne=[2,1,1,1]","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=f32,ne=[2,1,3,5]","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=f32,ne=[2,3,5,7]","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=f16,ne=[2,1,1,1]","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=f16,ne=[2,1,3,5]","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=f16,ne=[2,3,5,7]","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=bf16,ne=[2,1,1,1]","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=bf16,ne=[2,1,3,5]","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=bf16,ne=[2,3,5,7]","support","0","no","Vulkan"
|
||||
"Vulkan0","CONT","type=f32,ne=[2,1,1,1],use_view_slice=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=f32,ne=[2,1,3,5],use_view_slice=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=f32,ne=[2,3,5,7],use_view_slice=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=f32,ne=[1,4,4,1],use_view_slice=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=f32,ne=[1,8,17,1],use_view_slice=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=f32,ne=[10,10,10,1],use_view_slice=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=f32,ne=[2,1,1,1],use_view_slice=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=f32,ne=[2,1,3,5],use_view_slice=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=f32,ne=[2,3,5,7],use_view_slice=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=f32,ne=[1,4,4,1],use_view_slice=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=f32,ne=[1,8,17,1],use_view_slice=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=f32,ne=[10,10,10,1],use_view_slice=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=i32,ne=[2,1,1,1],use_view_slice=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=i32,ne=[2,1,3,5],use_view_slice=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=i32,ne=[2,3,5,7],use_view_slice=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=i32,ne=[1,4,4,1],use_view_slice=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=i32,ne=[1,8,17,1],use_view_slice=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=i32,ne=[10,10,10,1],use_view_slice=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=i32,ne=[2,1,1,1],use_view_slice=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=i32,ne=[2,1,3,5],use_view_slice=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=i32,ne=[2,3,5,7],use_view_slice=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=i32,ne=[1,4,4,1],use_view_slice=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=i32,ne=[1,8,17,1],use_view_slice=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=i32,ne=[10,10,10,1],use_view_slice=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=f16,ne=[2,1,1,1],use_view_slice=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=f16,ne=[2,1,3,5],use_view_slice=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=f16,ne=[2,3,5,7],use_view_slice=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=f16,ne=[1,4,4,1],use_view_slice=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=f16,ne=[1,8,17,1],use_view_slice=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=f16,ne=[10,10,10,1],use_view_slice=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=bf16,ne=[2,1,1,1],use_view_slice=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=bf16,ne=[2,1,3,5],use_view_slice=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=bf16,ne=[2,3,5,7],use_view_slice=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=bf16,ne=[1,4,4,1],use_view_slice=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=bf16,ne=[1,8,17,1],use_view_slice=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONT","type=bf16,ne=[10,10,10,1],use_view_slice=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ADD","type=f16,ne=[1,1,8,1],nr=[1,1,1,1],nf=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","SUB","type=f16,ne=[1,1,8,1],nr=[1,1,1,1],nf=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","MUL","type=f16,ne=[1,1,8,1],nr=[1,1,1,1],nf=1","support","1","yes","Vulkan"
|
||||
|
|
@ -5655,6 +5683,7 @@
|
|||
"Vulkan0","MUL","type=f32,ne=[64,262144,1,1],nr=[1,1,1,1],nf=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","DIV","type=f32,ne=[64,262144,1,1],nr=[1,1,1,1],nf=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","ADD1","type=f32,ne=[10,5,4,3]","support","1","yes","Vulkan"
|
||||
"Vulkan0","ADD1","type=f32,ne=[1024,1024,1,1]","support","1","yes","Vulkan"
|
||||
"Vulkan0","SCALE","type=f32,ne=[10,10,10,10],scale=2.000000,bias=0.000000,inplace=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","SCALE","type=f32,ne=[10,10,10,10],scale=2.000000,bias=1.000000,inplace=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","SCALE","type=f32,ne=[10,10,10,10],scale=2.000000,bias=1.000000,inplace=1","support","1","yes","Vulkan"
|
||||
|
|
@ -8644,9 +8673,13 @@
|
|||
"Vulkan0","CLAMP","type=f16,ne=[7,1,5,3],min=-0.500000,max=0.500000","support","0","no","Vulkan"
|
||||
"Vulkan0","LEAKY_RELU","type=f16,ne_a=[7,1,5,3],negative_slope=0.100000","support","0","no","Vulkan"
|
||||
"Vulkan0","FLOOR","type=f16,ne=[7,1,5,3]","support","1","yes","Vulkan"
|
||||
"Vulkan0","FLOOR","type=f16,ne=[1024,1024,1,1]","support","1","yes","Vulkan"
|
||||
"Vulkan0","CEIL","type=f16,ne=[7,1,5,3]","support","1","yes","Vulkan"
|
||||
"Vulkan0","CEIL","type=f16,ne=[1024,1024,1,1]","support","1","yes","Vulkan"
|
||||
"Vulkan0","ROUND","type=f16,ne=[7,1,5,3]","support","1","yes","Vulkan"
|
||||
"Vulkan0","ROUND","type=f16,ne=[1024,1024,1,1]","support","1","yes","Vulkan"
|
||||
"Vulkan0","TRUNC","type=f16,ne=[7,1,5,3]","support","1","yes","Vulkan"
|
||||
"Vulkan0","TRUNC","type=f16,ne=[1024,1024,1,1]","support","1","yes","Vulkan"
|
||||
"Vulkan0","SQR","type=f32,ne=[10,5,4,3]","support","1","yes","Vulkan"
|
||||
"Vulkan0","SQRT","type=f32,ne=[10,3,3,2]","support","1","yes","Vulkan"
|
||||
"Vulkan0","LOG","type=f32,ne=[10,5,4,3]","support","1","yes","Vulkan"
|
||||
|
|
@ -8666,9 +8699,13 @@
|
|||
"Vulkan0","CLAMP","type=f32,ne=[7,1,5,3],min=-0.500000,max=0.500000","support","1","yes","Vulkan"
|
||||
"Vulkan0","LEAKY_RELU","type=f32,ne_a=[7,1,5,3],negative_slope=0.100000","support","1","yes","Vulkan"
|
||||
"Vulkan0","FLOOR","type=f32,ne=[7,1,5,3]","support","1","yes","Vulkan"
|
||||
"Vulkan0","FLOOR","type=f32,ne=[1024,1024,1,1]","support","1","yes","Vulkan"
|
||||
"Vulkan0","CEIL","type=f32,ne=[7,1,5,3]","support","1","yes","Vulkan"
|
||||
"Vulkan0","CEIL","type=f32,ne=[1024,1024,1,1]","support","1","yes","Vulkan"
|
||||
"Vulkan0","ROUND","type=f32,ne=[7,1,5,3]","support","1","yes","Vulkan"
|
||||
"Vulkan0","ROUND","type=f32,ne=[1024,1024,1,1]","support","1","yes","Vulkan"
|
||||
"Vulkan0","TRUNC","type=f32,ne=[7,1,5,3]","support","1","yes","Vulkan"
|
||||
"Vulkan0","TRUNC","type=f32,ne=[1024,1024,1,1]","support","1","yes","Vulkan"
|
||||
"Vulkan0","DIAG_MASK_INF","type=f32,ne=[10,10,1,1],n_past=5","support","1","yes","Vulkan"
|
||||
"Vulkan0","DIAG_MASK_INF","type=f32,ne=[10,10,3,1],n_past=5","support","1","yes","Vulkan"
|
||||
"Vulkan0","DIAG_MASK_INF","type=f32,ne=[10,10,3,2],n_past=5","support","1","yes","Vulkan"
|
||||
|
|
@ -9411,28 +9448,405 @@
|
|||
"Vulkan0","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[3,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[4,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[7,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[8,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[15,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[16,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[31,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[32,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[63,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[64,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[127,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[128,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[255,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[256,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[511,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[512,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[1023,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[1024,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[2047,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[2048,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[4095,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[4096,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[8191,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[8192,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[16383,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[16384,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[32767,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[32768,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[65535,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[65536,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[131071,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[131072,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[262143,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[262144,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[524287,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[524288,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[1048575,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[1048576,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[16,10,10,10],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[60,10,10,10],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[1023,2,1,3],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[1024,2,1,3],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[1025,2,1,3],order=0","support","0","no","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[16384,1,1,1],order=0","support","0","no","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[2047,2,1,3],order=0","support","0","no","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[2048,2,1,3],order=0","support","0","no","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[2049,2,1,3],order=0","support","0","no","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[1025,2,1,3],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[2047,2,1,3],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[2048,2,1,3],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[2049,2,1,3],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[2,8,8192,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[8,1,1,1],order=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[3,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[4,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[7,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[8,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[15,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[16,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[31,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[32,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[63,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[64,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[127,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[128,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[255,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[256,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[511,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[512,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[1023,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[1024,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[2047,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[2048,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[4095,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[4096,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[8191,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[8192,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[16383,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[16384,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[32767,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[32768,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[65535,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[65536,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[131071,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[131072,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[262143,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[262144,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[524287,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[524288,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[1048575,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[1048576,1,1,1],order=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[16,10,10,10],order=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[60,10,10,10],order=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[1023,2,1,3],order=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[1024,2,1,3],order=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[1025,2,1,3],order=1","support","0","no","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[16384,1,1,1],order=1","support","0","no","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[2047,2,1,3],order=1","support","0","no","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[2048,2,1,3],order=1","support","0","no","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[2049,2,1,3],order=1","support","0","no","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[1025,2,1,3],order=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[2047,2,1,3],order=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[2048,2,1,3],order=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[2049,2,1,3],order=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARGSORT","type=f32,ne=[2,8,8192,1],order=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[1,1,1,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[12,1,2,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[2,1,1,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[13,1,2,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[2,1,1,1],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[13,1,2,1],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[4,1,1,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[15,1,2,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[4,1,1,1],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[15,1,2,1],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[4,1,1,1],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[15,1,2,1],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[8,1,1,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[19,1,2,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[8,1,1,1],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[19,1,2,1],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[8,1,1,1],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[19,1,2,1],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[8,1,1,1],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[19,1,2,1],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[16,1,1,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[27,1,2,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[16,1,1,1],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[27,1,2,1],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[16,1,1,1],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[27,1,2,1],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[16,1,1,1],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[27,1,2,1],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[16,1,1,1],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[27,1,2,1],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[32,1,1,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[43,1,2,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[32,1,1,1],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[43,1,2,1],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[32,1,1,1],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[43,1,2,1],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[32,1,1,1],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[43,1,2,1],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[32,1,1,1],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[43,1,2,1],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[64,1,1,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[75,1,2,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[64,1,1,1],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[75,1,2,1],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[64,1,1,1],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[75,1,2,1],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[64,1,1,1],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[75,1,2,1],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[64,1,1,1],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[75,1,2,1],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[128,1,1,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[139,1,2,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[128,1,1,1],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[139,1,2,1],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[128,1,1,1],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[139,1,2,1],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[128,1,1,1],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[139,1,2,1],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[128,1,1,1],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[139,1,2,1],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[128,1,1,1],k=100","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[139,1,2,1],k=100","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[256,1,1,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[267,1,2,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[256,1,1,1],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[267,1,2,1],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[256,1,1,1],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[267,1,2,1],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[256,1,1,1],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[267,1,2,1],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[256,1,1,1],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[267,1,2,1],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[256,1,1,1],k=100","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[267,1,2,1],k=100","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[512,1,1,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[523,1,2,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[512,1,1,1],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[523,1,2,1],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[512,1,1,1],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[523,1,2,1],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[512,1,1,1],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[523,1,2,1],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[512,1,1,1],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[523,1,2,1],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[512,1,1,1],k=100","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[523,1,2,1],k=100","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[512,1,1,1],k=500","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[523,1,2,1],k=500","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[1024,1,1,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[1035,1,2,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[1024,1,1,1],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[1035,1,2,1],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[1024,1,1,1],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[1035,1,2,1],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[1024,1,1,1],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[1035,1,2,1],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[1024,1,1,1],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[1035,1,2,1],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[1024,1,1,1],k=100","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[1035,1,2,1],k=100","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[1024,1,1,1],k=500","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[1035,1,2,1],k=500","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[1024,1,1,1],k=1023","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[1035,1,2,1],k=1023","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[2048,1,1,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[2059,1,2,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[2048,1,1,1],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[2059,1,2,1],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[2048,1,1,1],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[2059,1,2,1],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[2048,1,1,1],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[2059,1,2,1],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[2048,1,1,1],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[2059,1,2,1],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[2048,1,1,1],k=100","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[2059,1,2,1],k=100","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[2048,1,1,1],k=500","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[2059,1,2,1],k=500","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[2048,1,1,1],k=1023","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[2059,1,2,1],k=1023","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[4096,1,1,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[4107,1,2,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[4096,1,1,1],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[4107,1,2,1],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[4096,1,1,1],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[4107,1,2,1],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[4096,1,1,1],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[4107,1,2,1],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[4096,1,1,1],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[4107,1,2,1],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[4096,1,1,1],k=100","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[4107,1,2,1],k=100","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[4096,1,1,1],k=500","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[4107,1,2,1],k=500","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[4096,1,1,1],k=1023","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[4107,1,2,1],k=1023","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[8192,1,1,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[8203,1,2,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[8192,1,1,1],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[8203,1,2,1],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[8192,1,1,1],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[8203,1,2,1],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[8192,1,1,1],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[8203,1,2,1],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[8192,1,1,1],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[8203,1,2,1],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[8192,1,1,1],k=100","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[8203,1,2,1],k=100","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[8192,1,1,1],k=500","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[8203,1,2,1],k=500","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[8192,1,1,1],k=1023","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[8203,1,2,1],k=1023","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[16384,1,1,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[16395,1,2,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[16384,1,1,1],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[16395,1,2,1],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[16384,1,1,1],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[16395,1,2,1],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[16384,1,1,1],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[16395,1,2,1],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[16384,1,1,1],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[16395,1,2,1],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[16384,1,1,1],k=100","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[16395,1,2,1],k=100","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[16384,1,1,1],k=500","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[16395,1,2,1],k=500","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[16384,1,1,1],k=1023","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[16395,1,2,1],k=1023","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[16384,1,1,1],k=9999","support","0","no","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[16395,1,2,1],k=9999","support","0","no","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[32768,1,1,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[32779,1,2,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[32768,1,1,1],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[32779,1,2,1],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[32768,1,1,1],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[32779,1,2,1],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[32768,1,1,1],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[32779,1,2,1],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[32768,1,1,1],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[32779,1,2,1],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[32768,1,1,1],k=100","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[32779,1,2,1],k=100","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[32768,1,1,1],k=500","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[32779,1,2,1],k=500","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[32768,1,1,1],k=1023","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[32779,1,2,1],k=1023","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[32768,1,1,1],k=9999","support","0","no","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[32779,1,2,1],k=9999","support","0","no","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[65536,1,1,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[65547,1,2,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[65536,1,1,1],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[65547,1,2,1],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[65536,1,1,1],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[65547,1,2,1],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[65536,1,1,1],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[65547,1,2,1],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[65536,1,1,1],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[65547,1,2,1],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[65536,1,1,1],k=100","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[65547,1,2,1],k=100","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[65536,1,1,1],k=500","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[65547,1,2,1],k=500","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[65536,1,1,1],k=1023","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[65547,1,2,1],k=1023","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[65536,1,1,1],k=9999","support","0","no","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[65547,1,2,1],k=9999","support","0","no","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[131072,1,1,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[131083,1,2,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[131072,1,1,1],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[131083,1,2,1],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[131072,1,1,1],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[131083,1,2,1],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[131072,1,1,1],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[131083,1,2,1],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[131072,1,1,1],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[131083,1,2,1],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[131072,1,1,1],k=100","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[131083,1,2,1],k=100","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[131072,1,1,1],k=500","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[131083,1,2,1],k=500","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[131072,1,1,1],k=1023","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[131083,1,2,1],k=1023","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[131072,1,1,1],k=9999","support","0","no","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[131083,1,2,1],k=9999","support","0","no","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[262144,1,1,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[262155,1,2,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[262144,1,1,1],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[262155,1,2,1],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[262144,1,1,1],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[262155,1,2,1],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[262144,1,1,1],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[262155,1,2,1],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[262144,1,1,1],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[262155,1,2,1],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[262144,1,1,1],k=100","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[262155,1,2,1],k=100","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[262144,1,1,1],k=500","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[262155,1,2,1],k=500","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[262144,1,1,1],k=1023","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[262155,1,2,1],k=1023","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[262144,1,1,1],k=9999","support","0","no","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[262155,1,2,1],k=9999","support","0","no","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[524288,1,1,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[524299,1,2,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[524288,1,1,1],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[524299,1,2,1],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[524288,1,1,1],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[524299,1,2,1],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[524288,1,1,1],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[524299,1,2,1],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[524288,1,1,1],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[524299,1,2,1],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[524288,1,1,1],k=100","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[524299,1,2,1],k=100","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[524288,1,1,1],k=500","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[524299,1,2,1],k=500","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[524288,1,1,1],k=1023","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[524299,1,2,1],k=1023","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[524288,1,1,1],k=9999","support","0","no","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[524299,1,2,1],k=9999","support","0","no","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[16,10,10,10],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[60,10,10,10],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[1023,2,1,3],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[1024,2,1,3],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[1025,2,1,3],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[16384,1,1,1],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[2047,2,1,3],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[2048,2,1,3],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[2049,2,1,3],k=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[16,10,10,10],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[60,10,10,10],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[1023,2,1,3],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[1024,2,1,3],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[1025,2,1,3],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[16384,1,1,1],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[2047,2,1,3],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[2048,2,1,3],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[2049,2,1,3],k=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[16,10,10,10],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[60,10,10,10],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[1023,2,1,3],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[1024,2,1,3],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[1025,2,1,3],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[16384,1,1,1],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[2047,2,1,3],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[2048,2,1,3],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[2049,2,1,3],k=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[16,10,10,10],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[60,10,10,10],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[1023,2,1,3],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[1024,2,1,3],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[1025,2,1,3],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[16384,1,1,1],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[2047,2,1,3],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[2048,2,1,3],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[2049,2,1,3],k=7","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[16,10,10,10],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[60,10,10,10],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[1023,2,1,3],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[1024,2,1,3],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[1025,2,1,3],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[16384,1,1,1],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[2047,2,1,3],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[2048,2,1,3],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","TOP_K","type=f32,ne=[2049,2,1,3],k=15","support","1","yes","Vulkan"
|
||||
"Vulkan0","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=nearest,transpose=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=nearest,transpose=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=nearest,flags=none","support","1","yes","Vulkan"
|
||||
|
|
@ -9445,6 +9859,10 @@
|
|||
"Vulkan0","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=bicubic,transpose=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=bicubic,flags=none","support","1","yes","Vulkan"
|
||||
"Vulkan0","UPSCALE","type=f32,ne=[5,7,11,13],ne_tgt=[2,5,7,11],mode=bicubic,flags=none","support","1","yes","Vulkan"
|
||||
"Vulkan0","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=513,transpose=0","support","0","no","Vulkan"
|
||||
"Vulkan0","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=513,transpose=1","support","0","no","Vulkan"
|
||||
"Vulkan0","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=bilinear,flags=none","support","0","no","Vulkan"
|
||||
"Vulkan0","UPSCALE","type=f32,ne=[5,7,11,13],ne_tgt=[2,5,7,11],mode=bilinear,flags=none","support","0","no","Vulkan"
|
||||
"Vulkan0","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=bilinear,flags=align_corners","support","1","yes","Vulkan"
|
||||
"Vulkan0","UPSCALE","type=f32,ne=[1,4,3,2],ne_tgt=[2,8,3,2],mode=bilinear,flags=align_corners","support","1","yes","Vulkan"
|
||||
"Vulkan0","UPSCALE","type=f32,ne=[4,1,3,2],ne_tgt=[1,1,3,2],mode=bilinear,flags=align_corners","support","1","yes","Vulkan"
|
||||
|
|
@ -9479,23 +9897,37 @@
|
|||
"Vulkan0","PAD_REFLECT_1D","type=f32,ne_a=[3000,384,4,1],pad_0=10,pad_1=9","support","0","no","Vulkan"
|
||||
"Vulkan0","ROLL","shift0=3,shift1=-2,shift3=1,shift4=-1","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARANGE","type=f32,start=0.000000,stop=10.000000,step=1.000000","support","1","yes","Vulkan"
|
||||
"Vulkan0","ARANGE","type=f32,start=0.000000,stop=1048576.000000,step=1.000000","support","1","yes","Vulkan"
|
||||
"Vulkan0","TIMESTEP_EMBEDDING","type=f32,ne_a=[2,1,1,1],dim=320,max_period=10000","support","1","yes","Vulkan"
|
||||
"Vulkan0","LEAKY_RELU","type=f32,ne_a=[10,5,4,3],negative_slope=0.100000","support","1","yes","Vulkan"
|
||||
"Vulkan0","CUMSUM","type=f32,ne=[10,5,4,3]","support","0","no","Vulkan"
|
||||
"Vulkan0","CUMSUM","type=f32,ne=[10,5,4,3]","support","1","yes","Vulkan"
|
||||
"Vulkan0","CUMSUM","type=f32,ne=[127,5,4,3]","support","1","yes","Vulkan"
|
||||
"Vulkan0","CUMSUM","type=f32,ne=[128,5,4,3]","support","1","yes","Vulkan"
|
||||
"Vulkan0","CUMSUM","type=f32,ne=[255,5,4,3]","support","1","yes","Vulkan"
|
||||
"Vulkan0","CUMSUM","type=f32,ne=[256,5,4,3]","support","1","yes","Vulkan"
|
||||
"Vulkan0","CUMSUM","type=f32,ne=[511,5,4,3]","support","1","yes","Vulkan"
|
||||
"Vulkan0","CUMSUM","type=f32,ne=[512,5,4,3]","support","1","yes","Vulkan"
|
||||
"Vulkan0","CUMSUM","type=f32,ne=[1023,5,4,3]","support","1","yes","Vulkan"
|
||||
"Vulkan0","CUMSUM","type=f32,ne=[1024,5,4,3]","support","1","yes","Vulkan"
|
||||
"Vulkan0","CUMSUM","type=f32,ne=[2047,5,4,3]","support","1","yes","Vulkan"
|
||||
"Vulkan0","CUMSUM","type=f32,ne=[2048,5,4,3]","support","1","yes","Vulkan"
|
||||
"Vulkan0","CUMSUM","type=f32,ne=[242004,1,1,1]","support","1","yes","Vulkan"
|
||||
"Vulkan0","CUMSUM","type=f32,ne=[375960,1,1,1]","support","1","yes","Vulkan"
|
||||
"Vulkan0","XIELU","type=f32,ne=[10,5,4,3]","support","0","no","Vulkan"
|
||||
"Vulkan0","TRI","type=f32,ne=[10,10,4,3],tri_type=3","support","0","no","Vulkan"
|
||||
"Vulkan0","TRI","type=f32,ne=[10,10,4,3],tri_type=2","support","0","no","Vulkan"
|
||||
"Vulkan0","TRI","type=f32,ne=[10,10,4,3],tri_type=1","support","0","no","Vulkan"
|
||||
"Vulkan0","TRI","type=f32,ne=[10,10,4,3],tri_type=0","support","0","no","Vulkan"
|
||||
"Vulkan0","TRI","type=f32,ne=[10,10,4,3],tri_type=3","support","1","yes","Vulkan"
|
||||
"Vulkan0","TRI","type=f32,ne=[10,10,4,3],tri_type=2","support","1","yes","Vulkan"
|
||||
"Vulkan0","TRI","type=f32,ne=[10,10,4,3],tri_type=1","support","1","yes","Vulkan"
|
||||
"Vulkan0","TRI","type=f32,ne=[10,10,4,3],tri_type=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","FILL","type=f32,ne=[10,10,4,3],c=0.000000","support","1","yes","Vulkan"
|
||||
"Vulkan0","FILL","type=f32,ne=[303,207,11,3],c=2.000000","support","1","yes","Vulkan"
|
||||
"Vulkan0","FILL","type=f32,ne=[800,600,4,4],c=-152.000000","support","1","yes","Vulkan"
|
||||
"Vulkan0","SOLVE_TRI","type=f32,ne_lhs=[10,10,4,3],ne_rhs=[3,10,4,3]","support","0","no","Vulkan"
|
||||
"Vulkan0","SOLVE_TRI","type=f32,ne_lhs=[11,11,1,1],ne_rhs=[5,11,1,1]","support","0","no","Vulkan"
|
||||
"Vulkan0","SOLVE_TRI","type=f32,ne_lhs=[17,17,2,4],ne_rhs=[9,17,2,4]","support","0","no","Vulkan"
|
||||
"Vulkan0","SOLVE_TRI","type=f32,ne_lhs=[30,30,7,1],ne_rhs=[8,30,7,1]","support","0","no","Vulkan"
|
||||
"Vulkan0","SOLVE_TRI","type=f32,ne_lhs=[42,42,5,2],ne_rhs=[10,42,5,2]","support","0","no","Vulkan"
|
||||
"Vulkan0","SOLVE_TRI","type=f32,ne_lhs=[64,64,2,2],ne_rhs=[10,64,2,2]","support","0","no","Vulkan"
|
||||
"Vulkan0","FILL","type=f32,ne=[2048,512,2,2],c=3.500000","support","1","yes","Vulkan"
|
||||
"Vulkan0","SOLVE_TRI","type=f32,ne_lhs=[10,10,4,3],ne_rhs=[3,10,4,3]","support","1","yes","Vulkan"
|
||||
"Vulkan0","SOLVE_TRI","type=f32,ne_lhs=[11,11,1,1],ne_rhs=[5,11,1,1]","support","1","yes","Vulkan"
|
||||
"Vulkan0","SOLVE_TRI","type=f32,ne_lhs=[17,17,2,4],ne_rhs=[9,17,2,4]","support","1","yes","Vulkan"
|
||||
"Vulkan0","SOLVE_TRI","type=f32,ne_lhs=[30,30,7,1],ne_rhs=[8,30,7,1]","support","1","yes","Vulkan"
|
||||
"Vulkan0","SOLVE_TRI","type=f32,ne_lhs=[42,42,5,2],ne_rhs=[10,42,5,2]","support","1","yes","Vulkan"
|
||||
"Vulkan0","SOLVE_TRI","type=f32,ne_lhs=[64,64,2,2],ne_rhs=[10,64,2,2]","support","1","yes","Vulkan"
|
||||
"Vulkan0","SOLVE_TRI","type=f32,ne_lhs=[100,100,4,4],ne_rhs=[41,100,4,4]","support","0","no","Vulkan"
|
||||
"Vulkan0","PAD","type=f32,ne_a=[512,512,1,1],lp0=0,rp0=1,lp1=0,rp1=1,lp2=0,rp2=0,lp3=0,rp3=0,v=0","support","1","yes","Vulkan"
|
||||
"Vulkan0","PAD","type=f32,ne_a=[11,22,33,44],lp0=1,rp0=2,lp1=3,rp1=4,lp2=5,rp2=6,lp3=7,rp3=8,v=0","support","1","yes","Vulkan"
|
||||
|
|
|
|||
|
Can't render this file because it is too large.
|
|
|
@ -989,6 +989,10 @@ struct ggml_cuda_concurrent_event {
|
|||
int n_streams = 0;
|
||||
std::unordered_map<const ggml_tensor *, int> stream_mapping;
|
||||
|
||||
// Original order of nodes in this concurrent region (before interleaving)
|
||||
// Used to restore grouping for fusion within streams
|
||||
std::vector<const ggml_tensor *> original_order;
|
||||
|
||||
const ggml_tensor * join_node;
|
||||
|
||||
ggml_cuda_concurrent_event() = default;
|
||||
|
|
@ -1011,6 +1015,7 @@ struct ggml_cuda_concurrent_event {
|
|||
, fork_event(other.fork_event)
|
||||
, n_streams(other.n_streams)
|
||||
, stream_mapping(std::move(other.stream_mapping))
|
||||
, original_order(std::move(other.original_order))
|
||||
, join_node(other.join_node) {
|
||||
other.fork_event = nullptr;
|
||||
}
|
||||
|
|
@ -1121,11 +1126,9 @@ struct ggml_cuda_concurrent_event {
|
|||
};
|
||||
|
||||
struct ggml_cuda_stream_context {
|
||||
std::vector<const ggml_tensor *> original_nodes;
|
||||
std::unordered_map<const ggml_tensor *, ggml_cuda_concurrent_event> concurrent_events;
|
||||
|
||||
void reset() {
|
||||
original_nodes.clear();
|
||||
concurrent_events.clear();
|
||||
}
|
||||
};
|
||||
|
|
|
|||
|
|
@ -3246,9 +3246,56 @@ static void evaluate_and_capture_cuda_graph(ggml_backend_cuda_context * cuda_ctx
|
|||
}
|
||||
}
|
||||
if (should_launch_concurrent_events) {
|
||||
//Restore the original graph to enable fusion within the streams
|
||||
cgraph->nodes = const_cast<ggml_tensor **>(stream_ctx.original_nodes.data());
|
||||
cgraph->n_nodes = (int) stream_ctx.original_nodes.size();
|
||||
// Restore original node order within each concurrent region to enable fusion within streams
|
||||
|
||||
std::unordered_map<const ggml_tensor *, int> node_to_idx;
|
||||
node_to_idx.reserve(cgraph->n_nodes);
|
||||
for (int i = 0; i < cgraph->n_nodes; ++i) {
|
||||
node_to_idx[cgraph->nodes[i]] = i;
|
||||
}
|
||||
|
||||
for (auto & [fork_node, event] : stream_ctx.concurrent_events) {
|
||||
// Find positions of all nodes from this event in the current graph
|
||||
std::vector<int> positions;
|
||||
positions.reserve(event.original_order.size());
|
||||
|
||||
bool all_found = true;
|
||||
for (const ggml_tensor * orig_node : event.original_order) {
|
||||
auto it = node_to_idx.find(orig_node);
|
||||
if (it != node_to_idx.end()) {
|
||||
positions.push_back(it->second);
|
||||
} else {
|
||||
all_found = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!all_found || positions.size() != event.original_order.size()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Sort positions to get contiguous range
|
||||
std::vector<int> sorted_positions = positions;
|
||||
std::sort(sorted_positions.begin(), sorted_positions.end());
|
||||
|
||||
bool is_contiguous = true;
|
||||
for (size_t i = 1; i < sorted_positions.size(); ++i) {
|
||||
if (sorted_positions[i] != sorted_positions[i-1] + 1) {
|
||||
is_contiguous = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!is_contiguous) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Restore original order at the sorted positions
|
||||
int start_pos = sorted_positions[0];
|
||||
for (size_t i = 0; i < event.original_order.size(); ++i) {
|
||||
cgraph->nodes[start_pos + i] = const_cast<ggml_tensor *>(event.original_order[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < cgraph->n_nodes; i++) {
|
||||
|
|
@ -3813,14 +3860,6 @@ static void ggml_backend_cuda_graph_optimize(ggml_backend_t backend, ggml_cgraph
|
|||
// store {fork_idx, join_idx}
|
||||
std::vector<std::pair<int, int>> concurrent_node_ranges;
|
||||
|
||||
// save the original nodes
|
||||
std::vector<const ggml_tensor *> original_nodes;
|
||||
original_nodes.reserve(cgraph->n_nodes);
|
||||
for (int i = 0; i < cgraph->n_nodes; ++i) {
|
||||
original_nodes.push_back(cgraph->nodes[i]);
|
||||
}
|
||||
cuda_ctx->stream_context().original_nodes = std::move(original_nodes);
|
||||
|
||||
for (const auto & [root_node, count] : fan_out) {
|
||||
if (count >= min_fan_out && count <= max_fan_out) {
|
||||
const int root_node_idx = node_indices[root_node];
|
||||
|
|
@ -3925,6 +3964,13 @@ static void ggml_backend_cuda_graph_optimize(ggml_backend_t backend, ggml_cgraph
|
|||
continue;
|
||||
}
|
||||
|
||||
// Save the original order of nodes in this region before interleaving
|
||||
// This is used later to restore grouping for fusion within streams
|
||||
concurrent_event.original_order.reserve(total_branch_nodes);
|
||||
for (int i = fork_node_idx + 1; i < join_node_idx; ++i) {
|
||||
concurrent_event.original_order.push_back(cgraph->nodes[i]);
|
||||
}
|
||||
|
||||
std::unordered_map<const ggml_tensor *, ggml_cuda_concurrent_event> & concurrent_events = cuda_ctx->stream_context().concurrent_events;
|
||||
GGML_ASSERT(concurrent_events.find(root_node) == concurrent_events.end());
|
||||
concurrent_events.emplace(root_node, std::move(concurrent_event));
|
||||
|
|
|
|||
|
|
@ -1787,6 +1787,7 @@ static void argsort_f32_i32_sycl(const float *x, int *dst, const int ncols,
|
|||
const sycl::range<3> block_dims(1, 1, nth);
|
||||
const sycl::range<3> block_nums(1, nrows, 1);
|
||||
const size_t shared_mem = ncols_pad * sizeof(int);
|
||||
GGML_ASSERT(shared_mem<=ggml_sycl_info().devices[device].smpbo);
|
||||
|
||||
if (order == GGML_SORT_ORDER_ASC) {
|
||||
stream->submit([&](sycl::handler &cgh) {
|
||||
|
|
@ -4348,6 +4349,9 @@ static ggml_backend_buffer_t ggml_backend_sycl_device_buffer_from_host_ptr(ggml_
|
|||
}
|
||||
|
||||
static bool ggml_backend_sycl_device_supports_op(ggml_backend_dev_t dev, const ggml_tensor * op) {
|
||||
ggml_backend_sycl_device_context *sycl_ctx =
|
||||
(ggml_backend_sycl_device_context *)dev->context;
|
||||
int device = sycl_ctx->device;
|
||||
switch (op->op) {
|
||||
case GGML_OP_CONV_TRANSPOSE_1D:
|
||||
{
|
||||
|
|
@ -4601,8 +4605,10 @@ static bool ggml_backend_sycl_device_supports_op(ggml_backend_dev_t dev, const g
|
|||
case GGML_OP_SUM:
|
||||
case GGML_OP_SUM_ROWS:
|
||||
case GGML_OP_MEAN:
|
||||
case GGML_OP_ARGSORT:
|
||||
return ggml_is_contiguous(op->src[0]);
|
||||
case GGML_OP_ARGSORT:
|
||||
return op->src[0]->ne[0] * sizeof(int) <=
|
||||
ggml_sycl_info().devices[device].smpbo;
|
||||
case GGML_OP_POOL_2D:
|
||||
case GGML_OP_ACC:
|
||||
return true;
|
||||
|
|
|
|||
|
|
@ -17,6 +17,8 @@ vendor = {
|
|||
"https://github.com/mackron/miniaudio/raw/669ed3e844524fcd883231b13095baee9f6de304/miniaudio.h": "vendor/miniaudio/miniaudio.h",
|
||||
|
||||
"https://raw.githubusercontent.com/yhirose/cpp-httplib/refs/tags/v0.28.0/httplib.h": "vendor/cpp-httplib/httplib.h",
|
||||
|
||||
"https://raw.githubusercontent.com/sheredom/subprocess.h/b49c56e9fe214488493021017bf3954b91c7c1f5/subprocess.h": "vendor/sheredom/subprocess.h",
|
||||
}
|
||||
|
||||
for url, filename in vendor.items():
|
||||
|
|
|
|||
|
|
@ -855,7 +855,7 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
|
|||
{ LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" },
|
||||
{ LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
|
||||
{ LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
|
||||
{ LLM_TENSOR_SSM_A, "blk.%d.ssm_a" },
|
||||
{ LLM_TENSOR_SSM_A_NOSCAN, "blk.%d.ssm_a" },
|
||||
{ LLM_TENSOR_SSM_CONV1D, "blk.%d.ssm_conv1d" },
|
||||
{ LLM_TENSOR_SSM_DT, "blk.%d.ssm_dt" },
|
||||
{ LLM_TENSOR_SSM_BETA_ALPHA, "blk.%d.ssm_ba" },
|
||||
|
|
@ -2639,6 +2639,7 @@ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
|
|||
{LLM_TENSOR_FFN_ACT, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_DIV}},
|
||||
{LLM_TENSOR_SSM_CONV1D, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_SSM_CONV}},
|
||||
{LLM_TENSOR_SSM_A, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_SSM_SCAN}},
|
||||
{LLM_TENSOR_SSM_A_NOSCAN, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}}, // a version of SSM_A used for MUL instead of SSM_SCAN
|
||||
{LLM_TENSOR_SSM_DT_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
|
||||
{LLM_TENSOR_SSM_B_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
|
||||
{LLM_TENSOR_SSM_C_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
|
||||
|
|
|
|||
|
|
@ -379,6 +379,7 @@ enum llm_tensor {
|
|||
LLM_TENSOR_SSM_DT,
|
||||
LLM_TENSOR_SSM_DT_NORM,
|
||||
LLM_TENSOR_SSM_A,
|
||||
LLM_TENSOR_SSM_A_NOSCAN, // qwen3next special case with MUL instead of SSM_SCAN
|
||||
LLM_TENSOR_SSM_B_NORM,
|
||||
LLM_TENSOR_SSM_C_NORM,
|
||||
LLM_TENSOR_SSM_D,
|
||||
|
|
|
|||
|
|
@ -626,6 +626,8 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
|||
switch (arch) {
|
||||
case LLM_ARCH_LLAMA:
|
||||
{
|
||||
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
|
||||
|
||||
if (hparams.n_expert == 8) {
|
||||
switch (hparams.n_layer) {
|
||||
case 32: type = LLM_TYPE_8x7B; break;
|
||||
|
|
@ -6524,7 +6526,7 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
|
|||
layer.ssm_in = create_tensor(tn(LLM_TENSOR_SSM_IN, "weight", i), { n_embd, qkvz_dim }, 0);
|
||||
layer.ssm_conv1d = create_tensor(tn(LLM_TENSOR_SSM_CONV1D, "weight", i), { hparams.ssm_d_conv, conv_dim }, 0);
|
||||
layer.ssm_dt = create_tensor(tn(LLM_TENSOR_SSM_DT, "bias", i), { hparams.ssm_dt_rank }, 0);
|
||||
layer.ssm_a = create_tensor(tn(LLM_TENSOR_SSM_A, i), { hparams.ssm_dt_rank }, 0);
|
||||
layer.ssm_a = create_tensor(tn(LLM_TENSOR_SSM_A_NOSCAN, i), { hparams.ssm_dt_rank }, 0);
|
||||
layer.ssm_beta_alpha = create_tensor(tn(LLM_TENSOR_SSM_BETA_ALPHA, "weight", i), { n_embd, ba_dim }, 0);
|
||||
layer.ssm_norm = create_tensor(tn(LLM_TENSOR_SSM_NORM, "weight", i), { head_v_dim }, 0);
|
||||
layer.ssm_out = create_tensor(tn(LLM_TENSOR_SSM_OUT, "weight", i), { value_dim, n_embd }, 0);
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@
|
|||
#endif
|
||||
|
||||
struct quantize_stats_params {
|
||||
std::string model = DEFAULT_MODEL_PATH;
|
||||
std::string model = "models/7B/ggml-model-f16.gguf";
|
||||
bool verbose = false;
|
||||
bool per_layer_stats = false;
|
||||
bool print_histogram = false;
|
||||
|
|
|
|||
|
|
@ -3526,15 +3526,19 @@ struct clip_init_result clip_init(const char * fname, struct clip_context_params
|
|||
ctx_vision = new clip_ctx(ctx_params);
|
||||
loader.load_hparams(ctx_vision->model, CLIP_MODALITY_VISION);
|
||||
loader.load_tensors(*ctx_vision);
|
||||
if (ctx_params.warmup) {
|
||||
loader.warmup(*ctx_vision);
|
||||
}
|
||||
}
|
||||
|
||||
if (loader.has_audio) {
|
||||
ctx_audio = new clip_ctx(ctx_params);
|
||||
loader.load_hparams(ctx_audio->model, CLIP_MODALITY_AUDIO);
|
||||
loader.load_tensors(*ctx_audio);
|
||||
if (ctx_params.warmup) {
|
||||
loader.warmup(*ctx_audio);
|
||||
}
|
||||
}
|
||||
|
||||
} catch (const std::exception & e) {
|
||||
LOG_ERR("%s: failed to load model '%s': %s\n", __func__, fname, e.what());
|
||||
|
|
|
|||
|
|
@ -34,6 +34,7 @@ struct clip_context_params {
|
|||
enum clip_flash_attn_type flash_attn_type;
|
||||
int image_min_tokens;
|
||||
int image_max_tokens;
|
||||
bool warmup;
|
||||
};
|
||||
|
||||
struct clip_init_result {
|
||||
|
|
|
|||
|
|
@ -136,6 +136,7 @@ struct mtmd_cli_context {
|
|||
mparams.print_timings = true;
|
||||
mparams.n_threads = params.cpuparams.n_threads;
|
||||
mparams.flash_attn_type = params.flash_attn_type;
|
||||
mparams.warmup = params.warmup;
|
||||
mparams.image_min_tokens = params.image_min_tokens;
|
||||
mparams.image_max_tokens = params.image_max_tokens;
|
||||
ctx_vision.reset(mtmd_init_from_file(clip_path, model, mparams));
|
||||
|
|
|
|||
|
|
@ -108,6 +108,7 @@ mtmd_context_params mtmd_context_params_default() {
|
|||
/* image_marker */ MTMD_DEFAULT_IMAGE_MARKER,
|
||||
/* media_marker */ mtmd_default_marker(),
|
||||
/* flash_attn_type */ LLAMA_FLASH_ATTN_TYPE_AUTO,
|
||||
/* warmup */ true,
|
||||
/* image_min_tokens */ -1,
|
||||
/* image_max_tokens */ -1,
|
||||
};
|
||||
|
|
@ -177,6 +178,7 @@ struct mtmd_context {
|
|||
/* flash_attn_type */ CLIP_FLASH_ATTN_TYPE_AUTO,
|
||||
/* image_min_tokens */ ctx_params.image_min_tokens,
|
||||
/* image_max_tokens */ ctx_params.image_max_tokens,
|
||||
/* warmup */ ctx_params.warmup,
|
||||
};
|
||||
|
||||
auto res = clip_init(mmproj_fname, ctx_clip_params);
|
||||
|
|
|
|||
|
|
@ -82,6 +82,7 @@ struct mtmd_context_params {
|
|||
const char * image_marker; // deprecated, use media_marker instead
|
||||
const char * media_marker;
|
||||
enum llama_flash_attn_type flash_attn_type;
|
||||
bool warmup; // whether to run a warmup encode pass after initialization
|
||||
|
||||
// limit number of image tokens, only for vision models with dynamic resolution
|
||||
int image_min_tokens; // minimum number of tokens for image input (default: read from metadata)
|
||||
|
|
|
|||
|
|
@ -15,6 +15,8 @@ set(TARGET_SRCS
|
|||
server.cpp
|
||||
server-http.cpp
|
||||
server-http.h
|
||||
server-models.cpp
|
||||
server-models.h
|
||||
server-task.cpp
|
||||
server-task.h
|
||||
server-queue.cpp
|
||||
|
|
|
|||
|
|
@ -52,7 +52,7 @@ The project is under active development, and we are [looking for feedback and co
|
|||
| `-ub, --ubatch-size N` | physical maximum batch size (default: 512)<br/>(env: LLAMA_ARG_UBATCH) |
|
||||
| `--keep N` | number of tokens to keep from the initial prompt (default: 0, -1 = all) |
|
||||
| `--swa-full` | use full-size SWA cache (default: false)<br/>[(more info)](https://github.com/ggml-org/llama.cpp/pull/13194#issuecomment-2868343055)<br/>(env: LLAMA_ARG_SWA_FULL) |
|
||||
| `--kv-unified, -kvu` | use single unified KV buffer for the KV cache of all sequences (default: false)<br/>[(more info)](https://github.com/ggml-org/llama.cpp/pull/14363)<br/>(env: LLAMA_ARG_KV_SPLIT) |
|
||||
| `--kv-unified, -kvu` | use single unified KV buffer for the KV cache of all sequences (default: false)<br/>[(more info)](https://github.com/ggml-org/llama.cpp/pull/14363)<br/>(env: LLAMA_ARG_KV_UNIFIED) |
|
||||
| `-fa, --flash-attn [on\|off\|auto]` | set Flash Attention use ('on', 'off', or 'auto', default: 'auto')<br/>(env: LLAMA_ARG_FLASH_ATTN) |
|
||||
| `--no-perf` | disable internal libllama performance timings (default: false)<br/>(env: LLAMA_ARG_NO_PERF) |
|
||||
| `-e, --escape` | process escapes sequences (\n, \r, \t, \', \", \\) (default: true) |
|
||||
|
|
@ -93,7 +93,7 @@ The project is under active development, and we are [looking for feedback and co
|
|||
| `--control-vector FNAME` | add a control vector<br/>note: this argument can be repeated to add multiple control vectors |
|
||||
| `--control-vector-scaled FNAME SCALE` | add a control vector with user defined scaling SCALE<br/>note: this argument can be repeated to add multiple scaled control vectors |
|
||||
| `--control-vector-layer-range START END` | layer range to apply the control vector(s) to, start and end inclusive |
|
||||
| `-m, --model FNAME` | model path (default: `models/$filename` with filename from `--hf-file` or `--model-url` if set, otherwise models/7B/ggml-model-f16.gguf)<br/>(env: LLAMA_ARG_MODEL) |
|
||||
| `-m, --model FNAME` | model path to load<br/>(env: LLAMA_ARG_MODEL) |
|
||||
| `-mu, --model-url MODEL_URL` | model download url (default: unused)<br/>(env: LLAMA_ARG_MODEL_URL) |
|
||||
| `-dr, --docker-repo [<repo>/]<model>[:quant]` | Docker Hub model repository. repo is optional, default to ai/. quant is optional, default to :latest.<br/>example: gemma3<br/>(default: unused)<br/>(env: LLAMA_ARG_DOCKER_REPO) |
|
||||
| `-hf, -hfr, --hf-repo <user>/<model>[:quant]` | Hugging Face model repository; quant is optional, case-insensitive, default to Q4_K_M, or falls back to the first file in the repo if Q4_K_M doesn't exist.<br/>mmproj is also downloaded automatically if available. to disable, add --no-mmproj<br/>example: unsloth/phi-4-GGUF:q4_k_m<br/>(default: unused)<br/>(env: LLAMA_ARG_HF_REPO) |
|
||||
|
|
@ -103,11 +103,11 @@ The project is under active development, and we are [looking for feedback and co
|
|||
| `-hffv, --hf-file-v FILE` | Hugging Face model file for the vocoder model (default: unused)<br/>(env: LLAMA_ARG_HF_FILE_V) |
|
||||
| `-hft, --hf-token TOKEN` | Hugging Face access token (default: value from HF_TOKEN environment variable)<br/>(env: HF_TOKEN) |
|
||||
| `--log-disable` | Log disable |
|
||||
| `--log-file FNAME` | Log to file |
|
||||
| `--log-file FNAME` | Log to file<br/>(env: LLAMA_LOG_FILE) |
|
||||
| `--log-colors [on\|off\|auto]` | Set colored logging ('on', 'off', or 'auto', default: 'auto')<br/>'auto' enables colors when output is to a terminal<br/>(env: LLAMA_LOG_COLORS) |
|
||||
| `-v, --verbose, --log-verbose` | Set verbosity level to infinity (i.e. log all messages, useful for debugging) |
|
||||
| `--offline` | Offline mode: forces use of cache, prevents network access<br/>(env: LLAMA_OFFLINE) |
|
||||
| `-lv, --verbosity, --log-verbosity N` | Set the verbosity threshold. Messages with a higher verbosity will be ignored.<br/>(env: LLAMA_LOG_VERBOSITY) |
|
||||
| `-lv, --verbosity, --log-verbosity N` | Set the verbosity threshold. Messages with a higher verbosity will be ignored. Values:<br/> - 0: generic output<br/> - 1: error<br/> - 2: warning<br/> - 3: info<br/> - 4: debug<br/>(default: 3)<br/><br/>(env: LLAMA_LOG_VERBOSITY) |
|
||||
| `--log-prefix` | Enable prefix in log messages<br/>(env: LLAMA_LOG_PREFIX) |
|
||||
| `--log-timestamps` | Enable timestamps in log messages<br/>(env: LLAMA_LOG_TIMESTAMPS) |
|
||||
| `-ctkd, --cache-type-k-draft TYPE` | KV cache data type for K for the draft model<br/>allowed values: f32, f16, bf16, q8_0, q4_0, q4_1, iq4_nl, q5_0, q5_1<br/>(default: f16)<br/>(env: LLAMA_ARG_CACHE_TYPE_K_DRAFT) |
|
||||
|
|
@ -196,6 +196,10 @@ The project is under active development, and we are [looking for feedback and co
|
|||
| `--slots` | enable slots monitoring endpoint (default: enabled)<br/>(env: LLAMA_ARG_ENDPOINT_SLOTS) |
|
||||
| `--no-slots` | disables slots monitoring endpoint<br/>(env: LLAMA_ARG_NO_ENDPOINT_SLOTS) |
|
||||
| `--slot-save-path PATH` | path to save slot kv cache (default: disabled) |
|
||||
| `--models-dir PATH` | directory containing models for the router server (default: disabled)<br/>(env: LLAMA_ARG_MODELS_DIR) |
|
||||
| `--models-max N` | for router server, maximum number of models to load simultaneously (default: 4, 0 = unlimited)<br/>(env: LLAMA_ARG_MODELS_MAX) |
|
||||
| `--models-allow-extra-args` | for router server, allow extra arguments for models; important: some arguments can allow users to access local file system, use with caution (default: disabled)<br/>(env: LLAMA_ARG_MODELS_ALLOW_EXTRA_ARGS) |
|
||||
| `--no-models-autoload` | disables automatic loading of models (default: enabled)<br/>(env: LLAMA_ARG_NO_MODELS_AUTOLOAD) |
|
||||
| `--jinja` | use jinja template for chat (default: enabled)<br/><br/>(env: LLAMA_ARG_JINJA) |
|
||||
| `--no-jinja` | disable jinja template for chat (default: enabled)<br/><br/>(env: LLAMA_ARG_NO_JINJA) |
|
||||
| `--reasoning-format FORMAT` | controls whether thought tags are allowed and/or extracted from the response, and in which format they're returned; one of:<br/>- none: leaves thoughts unparsed in `message.content`<br/>- deepseek: puts thoughts in `message.reasoning_content`<br/>- deepseek-legacy: keeps `<think>` tags in `message.content` while also populating `message.reasoning_content`<br/>(default: auto)<br/>(env: LLAMA_ARG_THINK) |
|
||||
|
|
@ -287,38 +291,66 @@ For more details, please refer to [multimodal documentation](../../docs/multimod
|
|||
|
||||
## Web UI
|
||||
|
||||
The project includes a web-based user interface that enables interaction with the model through the `/v1/chat/completions` endpoint.
|
||||
The project includes a web-based user interface for interacting with `llama-server`. It supports both single-model (`MODEL` mode) and multi-model (`ROUTER` mode) operation.
|
||||
|
||||
The web UI is developed using:
|
||||
- `react` framework for frontend development
|
||||
- `tailwindcss` and `daisyui` for styling
|
||||
- `vite` for build tooling
|
||||
### Features
|
||||
|
||||
A pre-built version is available as a single HTML file under `/public` directory.
|
||||
- **Chat interface** with streaming responses
|
||||
- **Multi-model support** (ROUTER mode) - switch between models, auto-load on selection
|
||||
- **Modality validation** - ensures selected model supports conversation's attachments (images, audio)
|
||||
- **Conversation management** - branching, regeneration, editing with history preservation
|
||||
- **Attachment support** - images, audio, PDFs (with vision/text fallback)
|
||||
- **Configurable parameters** - temperature, top_p, etc. synced with server defaults
|
||||
- **Dark/light theme**
|
||||
|
||||
To build or to run the dev server (with hot reload):
|
||||
### Tech Stack
|
||||
|
||||
- **SvelteKit** - frontend framework with Svelte 5 runes for reactive state
|
||||
- **TailwindCSS** + **shadcn-svelte** - styling and UI components
|
||||
- **Vite** - build tooling
|
||||
- **IndexedDB** (Dexie) - local storage for conversations
|
||||
- **LocalStorage** - user settings persistence
|
||||
|
||||
### Architecture
|
||||
|
||||
The WebUI follows a layered architecture:
|
||||
|
||||
```
|
||||
Routes → Components → Hooks → Stores → Services → Storage/API
|
||||
```
|
||||
|
||||
- **Stores** - reactive state management (`chatStore`, `conversationsStore`, `modelsStore`, `serverStore`, `settingsStore`)
|
||||
- **Services** - stateless API/database communication (`ChatService`, `ModelsService`, `PropsService`, `DatabaseService`)
|
||||
- **Hooks** - reusable logic (`useModelChangeValidation`, `useProcessingState`)
|
||||
|
||||
For detailed architecture diagrams, see [`tools/server/webui/docs/`](webui/docs/):
|
||||
|
||||
- `high-level-architecture.mmd` - full architecture with all modules
|
||||
- `high-level-architecture-simplified.mmd` - simplified overview
|
||||
- `data-flow-simplified-model-mode.mmd` - data flow for single-model mode
|
||||
- `data-flow-simplified-router-mode.mmd` - data flow for multi-model mode
|
||||
- `flows/*.mmd` - detailed per-domain flows (chat, conversations, models, etc.)
|
||||
|
||||
### Development
|
||||
|
||||
```sh
|
||||
# make sure you have nodejs installed
|
||||
# make sure you have Node.js installed
|
||||
cd tools/server/webui
|
||||
npm i
|
||||
|
||||
# to run the dev server
|
||||
# run dev server (with hot reload)
|
||||
npm run dev
|
||||
|
||||
# to build the public/index.html.gz
|
||||
# run tests
|
||||
npm run test
|
||||
|
||||
# build production bundle
|
||||
npm run build
|
||||
```
|
||||
After `public/index.html.gz` has been generated we need to generate the c++
|
||||
headers (like build/tools/server/index.html.gz.hpp) that will be included
|
||||
by server.cpp. This is done by building `llama-server` as described in the
|
||||
[build](#build) section above.
|
||||
|
||||
NOTE: if you are using the vite dev server, you can change the API base URL to llama.cpp. To do that, run this code snippet in browser's console:
|
||||
After `public/index.html.gz` has been generated, rebuild `llama-server` as described in the [build](#build) section to include the updated UI.
|
||||
|
||||
```js
|
||||
localStorage.setItem('base', 'http://localhost:8080')
|
||||
```
|
||||
**Note:** The Vite dev server automatically proxies API requests to `http://localhost:8080`. Make sure `llama-server` is running on that port during development.
|
||||
|
||||
## Quick Start
|
||||
|
||||
|
|
@ -1424,6 +1456,184 @@ curl http://localhost:8080/v1/messages/count_tokens \
|
|||
{"input_tokens": 10}
|
||||
```
|
||||
|
||||
## Using multiple models
|
||||
|
||||
`llama-server` can be launched in a **router mode** that exposes an API for dynamically loading and unloading models. The main process (the "router") automatically forwards each request to the appropriate model instance.
|
||||
|
||||
To start in router mode, launch `llama-server` **without specifying any model**:
|
||||
|
||||
```sh
|
||||
llama-server
|
||||
```
|
||||
|
||||
### Model sources
|
||||
|
||||
By default, the router looks for models in the cache. You can add Hugging Face models to the cache with:
|
||||
|
||||
```sh
|
||||
llama-server -hf <user>/<model>:<tag>
|
||||
```
|
||||
|
||||
*The server must be restarted after adding a new model.*
|
||||
|
||||
Alternatively, you can point the router to a local directory containing your GGUF files using `--models-dir`. Example command:
|
||||
|
||||
```sh
|
||||
llama-server --models-dir ./models_directory
|
||||
```
|
||||
|
||||
If the model contains multiple GGUF (for multimodal or multi-shard), files should be put into a subdirectory. The directory structure should look like this:
|
||||
|
||||
```sh
|
||||
models_directory
|
||||
│
|
||||
│ # single file
|
||||
├─ llama-3.2-1b-Q4_K_M.gguf
|
||||
├─ Qwen3-8B-Q4_K_M.gguf
|
||||
│
|
||||
│ # multimodal
|
||||
├─ gemma-3-4b-it-Q8_0
|
||||
│ ├─ gemma-3-4b-it-Q8_0.gguf
|
||||
│ └─ mmproj-F16.gguf # file name must start with "mmproj"
|
||||
│
|
||||
│ # multi-shard
|
||||
├─ Kimi-K2-Thinking-UD-IQ1_S
|
||||
│ ├─ Kimi-K2-Thinking-UD-IQ1_S-00001-of-00006.gguf
|
||||
│ ├─ Kimi-K2-Thinking-UD-IQ1_S-00002-of-00006.gguf
|
||||
│ ├─ ...
|
||||
│ └─ Kimi-K2-Thinking-UD-IQ1_S-00006-of-00006.gguf
|
||||
```
|
||||
|
||||
You may also specify default arguments that will be passed to every model instance:
|
||||
|
||||
```sh
|
||||
llama-server -ctx 8192 -n 1024 -np 2
|
||||
```
|
||||
|
||||
Note: model instances inherit both command line arguments and environment variables from the router server.
|
||||
|
||||
### Routing requests
|
||||
|
||||
Requests are routed according to the requested model name.
|
||||
|
||||
For **POST** endpoints (`/v1/chat/completions`, `/v1/completions`, `/infill`, etc.) The router uses the `"model"` field in the JSON body:
|
||||
|
||||
```json
|
||||
{
|
||||
"model": "ggml-org/gemma-3-4b-it-GGUF:Q4_K_M",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "hello"
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
For **GET** endpoints (`/props`, `/metrics`, etc.) The router uses the `model` query parameter (URL-encoded):
|
||||
|
||||
```
|
||||
GET /props?model=ggml-org%2Fgemma-3-4b-it-GGUF%3AQ4_K_M
|
||||
```
|
||||
|
||||
By default, the model will be loaded automatically if it's not loaded. To disable this, add `--no-models-autoload` when starting the server. Additionally, you can include `?autoload=true|false` in the query param to control this behavior per-request.
|
||||
|
||||
### GET `/models`: List available models
|
||||
|
||||
Listing all models in cache. The model metadata will also include a field to indicate the status of the model:
|
||||
|
||||
```json
|
||||
{
|
||||
"data": [{
|
||||
"id": "ggml-org/gemma-3-4b-it-GGUF:Q4_K_M",
|
||||
"in_cache": true,
|
||||
"path": "/Users/REDACTED/Library/Caches/llama.cpp/ggml-org_gemma-3-4b-it-GGUF_gemma-3-4b-it-Q4_K_M.gguf",
|
||||
"status": {
|
||||
"value": "loaded",
|
||||
"args": ["llama-server", "-ctx", "4096"]
|
||||
},
|
||||
...
|
||||
}]
|
||||
}
|
||||
```
|
||||
|
||||
Note: For a local GGUF (stored offline in a custom directory), the model object will have `"in_cache": false`.
|
||||
|
||||
The `status` object can be:
|
||||
|
||||
```json
|
||||
"status": {
|
||||
"value": "unloaded"
|
||||
}
|
||||
```
|
||||
|
||||
```json
|
||||
"status": {
|
||||
"value": "loading",
|
||||
"args": ["llama-server", "-ctx", "4096"]
|
||||
}
|
||||
```
|
||||
|
||||
```json
|
||||
"status": {
|
||||
"value": "unloaded",
|
||||
"args": ["llama-server", "-ctx", "4096"],
|
||||
"failed": true,
|
||||
"exit_code": 1
|
||||
}
|
||||
```
|
||||
|
||||
```json
|
||||
"status": {
|
||||
"value": "loaded",
|
||||
"args": ["llama-server", "-ctx", "4096"]
|
||||
}
|
||||
```
|
||||
|
||||
### POST `/models/load`: Load a model
|
||||
|
||||
Load a model
|
||||
|
||||
Payload:
|
||||
- `model`: name of the model to be loaded.
|
||||
- `extra_args`: (optional) an array of additional arguments to be passed to the model instance. Note: you must start the server with `--models-allow-extra-args` to enable this feature.
|
||||
|
||||
```json
|
||||
{
|
||||
"model": "ggml-org/gemma-3-4b-it-GGUF:Q4_K_M",
|
||||
"extra_args": ["-n", "128", "--top-k", "4"]
|
||||
}
|
||||
```
|
||||
|
||||
Response:
|
||||
|
||||
```json
|
||||
{
|
||||
"success": true
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
### POST `/models/unload`: Unload a model
|
||||
|
||||
Unload a model
|
||||
|
||||
Payload:
|
||||
|
||||
```json
|
||||
{
|
||||
"model": "ggml-org/gemma-3-4b-it-GGUF:Q4_K_M",
|
||||
}
|
||||
```
|
||||
|
||||
Response:
|
||||
|
||||
```json
|
||||
{
|
||||
"success": true
|
||||
}
|
||||
```
|
||||
|
||||
## More examples
|
||||
|
||||
### Interactive mode
|
||||
|
|
|
|||
Binary file not shown.
|
|
@ -618,6 +618,7 @@ struct server_context_impl {
|
|||
mparams.print_timings = false;
|
||||
mparams.n_threads = params_base.cpuparams.n_threads;
|
||||
mparams.flash_attn_type = params_base.flash_attn_type;
|
||||
mparams.warmup = params_base.warmup;
|
||||
mparams.image_min_tokens = params_base.image_min_tokens;
|
||||
mparams.image_max_tokens = params_base.image_max_tokens;
|
||||
mctx = mtmd_init_from_file(mmproj_path.c_str(), model, mparams);
|
||||
|
|
|
|||
|
|
@ -0,0 +1,920 @@
|
|||
#include "server-common.h"
|
||||
#include "server-models.h"
|
||||
|
||||
#include "download.h"
|
||||
|
||||
#include <cpp-httplib/httplib.h> // TODO: remove this once we use HTTP client from download.h
|
||||
#include <sheredom/subprocess.h>
|
||||
|
||||
#include <functional>
|
||||
#include <thread>
|
||||
#include <mutex>
|
||||
#include <condition_variable>
|
||||
#include <cstring>
|
||||
#include <atomic>
|
||||
#include <chrono>
|
||||
#include <queue>
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <winsock2.h>
|
||||
#else
|
||||
#include <sys/socket.h>
|
||||
#include <netinet/in.h>
|
||||
#include <arpa/inet.h>
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#define CMD_EXIT "exit"
|
||||
|
||||
struct local_model {
|
||||
std::string name;
|
||||
std::string path;
|
||||
std::string path_mmproj;
|
||||
};
|
||||
|
||||
static std::vector<local_model> list_local_models(const std::string & dir) {
|
||||
if (!std::filesystem::exists(dir) || !std::filesystem::is_directory(dir)) {
|
||||
throw std::runtime_error(string_format("error: '%s' does not exist or is not a directory\n", dir.c_str()));
|
||||
}
|
||||
|
||||
std::vector<local_model> models;
|
||||
auto scan_subdir = [&models](const std::string & subdir_path, const std::string & name) {
|
||||
auto files = fs_list(subdir_path, false);
|
||||
common_file_info model_file;
|
||||
common_file_info first_shard_file;
|
||||
common_file_info mmproj_file;
|
||||
for (const auto & file : files) {
|
||||
if (string_ends_with(file.name, ".gguf")) {
|
||||
if (file.name.find("mmproj") != std::string::npos) {
|
||||
mmproj_file = file;
|
||||
} else if (file.name.find("-00001-of-") != std::string::npos) {
|
||||
first_shard_file = file;
|
||||
} else {
|
||||
model_file = file;
|
||||
}
|
||||
}
|
||||
}
|
||||
// single file model
|
||||
local_model model{
|
||||
/* name */ name,
|
||||
/* path */ first_shard_file.path.empty() ? model_file.path : first_shard_file.path,
|
||||
/* path_mmproj */ mmproj_file.path // can be empty
|
||||
};
|
||||
if (!model.path.empty()) {
|
||||
models.push_back(model);
|
||||
}
|
||||
};
|
||||
|
||||
auto files = fs_list(dir, true);
|
||||
for (const auto & file : files) {
|
||||
if (file.is_dir) {
|
||||
scan_subdir(file.path, file.name);
|
||||
} else if (string_ends_with(file.name, ".gguf")) {
|
||||
// single file model
|
||||
std::string name = file.name;
|
||||
string_replace_all(name, ".gguf", "");
|
||||
local_model model{
|
||||
/* name */ name,
|
||||
/* path */ file.path,
|
||||
/* path_mmproj */ ""
|
||||
};
|
||||
models.push_back(model);
|
||||
}
|
||||
}
|
||||
return models;
|
||||
}
|
||||
|
||||
//
|
||||
// server_models
|
||||
//
|
||||
|
||||
server_models::server_models(
|
||||
const common_params & params,
|
||||
int argc,
|
||||
char ** argv,
|
||||
char ** envp) : base_params(params) {
|
||||
for (int i = 0; i < argc; i++) {
|
||||
base_args.push_back(std::string(argv[i]));
|
||||
}
|
||||
for (char ** env = envp; *env != nullptr; env++) {
|
||||
base_env.push_back(std::string(*env));
|
||||
}
|
||||
// TODO: allow refreshing cached model list
|
||||
// add cached models
|
||||
auto cached_models = common_list_cached_models();
|
||||
for (const auto & model : cached_models) {
|
||||
server_model_meta meta{
|
||||
/* name */ model.to_string(),
|
||||
/* path */ model.manifest_path,
|
||||
/* path_mmproj */ "", // auto-detected when loading
|
||||
/* in_cache */ true,
|
||||
/* port */ 0,
|
||||
/* status */ SERVER_MODEL_STATUS_UNLOADED,
|
||||
/* last_used */ 0,
|
||||
/* args */ std::vector<std::string>(),
|
||||
/* exit_code */ 0
|
||||
};
|
||||
mapping[meta.name] = instance_t{
|
||||
/* subproc */ std::make_shared<subprocess_s>(),
|
||||
/* th */ std::thread(),
|
||||
/* meta */ meta
|
||||
};
|
||||
}
|
||||
// add local models specificed via --models-dir
|
||||
if (!params.models_dir.empty()) {
|
||||
auto local_models = list_local_models(params.models_dir);
|
||||
for (const auto & model : local_models) {
|
||||
if (mapping.find(model.name) != mapping.end()) {
|
||||
// already exists in cached models, skip
|
||||
continue;
|
||||
}
|
||||
server_model_meta meta{
|
||||
/* name */ model.name,
|
||||
/* path */ model.path,
|
||||
/* path_mmproj */ model.path_mmproj,
|
||||
/* in_cache */ false,
|
||||
/* port */ 0,
|
||||
/* status */ SERVER_MODEL_STATUS_UNLOADED,
|
||||
/* last_used */ 0,
|
||||
/* args */ std::vector<std::string>(),
|
||||
/* exit_code */ 0
|
||||
};
|
||||
mapping[meta.name] = instance_t{
|
||||
/* subproc */ std::make_shared<subprocess_s>(),
|
||||
/* th */ std::thread(),
|
||||
/* meta */ meta
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void server_models::update_meta(const std::string & name, const server_model_meta & meta) {
|
||||
std::lock_guard<std::mutex> lk(mutex);
|
||||
auto it = mapping.find(name);
|
||||
if (it != mapping.end()) {
|
||||
it->second.meta = meta;
|
||||
}
|
||||
cv.notify_all(); // notify wait_until_loaded
|
||||
}
|
||||
|
||||
bool server_models::has_model(const std::string & name) {
|
||||
std::lock_guard<std::mutex> lk(mutex);
|
||||
return mapping.find(name) != mapping.end();
|
||||
}
|
||||
|
||||
std::optional<server_model_meta> server_models::get_meta(const std::string & name) {
|
||||
std::lock_guard<std::mutex> lk(mutex);
|
||||
auto it = mapping.find(name);
|
||||
if (it != mapping.end()) {
|
||||
return it->second.meta;
|
||||
}
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
static int get_free_port() {
|
||||
#ifdef _WIN32
|
||||
WSADATA wsaData;
|
||||
if (WSAStartup(MAKEWORD(2, 2), &wsaData) != 0) {
|
||||
return -1;
|
||||
}
|
||||
typedef SOCKET native_socket_t;
|
||||
#define INVALID_SOCKET_VAL INVALID_SOCKET
|
||||
#define CLOSE_SOCKET(s) closesocket(s)
|
||||
#else
|
||||
typedef int native_socket_t;
|
||||
#define INVALID_SOCKET_VAL -1
|
||||
#define CLOSE_SOCKET(s) close(s)
|
||||
#endif
|
||||
|
||||
native_socket_t sock = socket(AF_INET, SOCK_STREAM, 0);
|
||||
if (sock == INVALID_SOCKET_VAL) {
|
||||
#ifdef _WIN32
|
||||
WSACleanup();
|
||||
#endif
|
||||
return -1;
|
||||
}
|
||||
|
||||
struct sockaddr_in serv_addr;
|
||||
std::memset(&serv_addr, 0, sizeof(serv_addr));
|
||||
serv_addr.sin_family = AF_INET;
|
||||
serv_addr.sin_addr.s_addr = htonl(INADDR_ANY);
|
||||
serv_addr.sin_port = htons(0);
|
||||
|
||||
if (bind(sock, (struct sockaddr*)&serv_addr, sizeof(serv_addr)) != 0) {
|
||||
CLOSE_SOCKET(sock);
|
||||
#ifdef _WIN32
|
||||
WSACleanup();
|
||||
#endif
|
||||
return -1;
|
||||
}
|
||||
|
||||
#ifdef _WIN32
|
||||
int namelen = sizeof(serv_addr);
|
||||
#else
|
||||
socklen_t namelen = sizeof(serv_addr);
|
||||
#endif
|
||||
if (getsockname(sock, (struct sockaddr*)&serv_addr, &namelen) != 0) {
|
||||
CLOSE_SOCKET(sock);
|
||||
#ifdef _WIN32
|
||||
WSACleanup();
|
||||
#endif
|
||||
return -1;
|
||||
}
|
||||
|
||||
int port = ntohs(serv_addr.sin_port);
|
||||
|
||||
CLOSE_SOCKET(sock);
|
||||
#ifdef _WIN32
|
||||
WSACleanup();
|
||||
#endif
|
||||
|
||||
return port;
|
||||
}
|
||||
|
||||
// helper to convert vector<string> to char **
|
||||
// pointers are only valid as long as the original vector is valid
|
||||
static std::vector<char *> to_char_ptr_array(const std::vector<std::string> & vec) {
|
||||
std::vector<char *> result;
|
||||
result.reserve(vec.size() + 1);
|
||||
for (const auto & s : vec) {
|
||||
result.push_back(const_cast<char*>(s.c_str()));
|
||||
}
|
||||
result.push_back(nullptr);
|
||||
return result;
|
||||
}
|
||||
|
||||
std::vector<server_model_meta> server_models::get_all_meta() {
|
||||
std::lock_guard<std::mutex> lk(mutex);
|
||||
std::vector<server_model_meta> result;
|
||||
result.reserve(mapping.size());
|
||||
for (const auto & [name, inst] : mapping) {
|
||||
result.push_back(inst.meta);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
void server_models::unload_lru() {
|
||||
if (base_params.models_max <= 0) {
|
||||
return; // no limit
|
||||
}
|
||||
// remove one of the servers if we passed the models_max (least recently used - LRU)
|
||||
std::string lru_model_name = "";
|
||||
int64_t lru_last_used = ggml_time_ms();
|
||||
size_t count_active = 0;
|
||||
{
|
||||
std::lock_guard<std::mutex> lk(mutex);
|
||||
for (const auto & m : mapping) {
|
||||
if (m.second.meta.is_active()) {
|
||||
count_active++;
|
||||
if (m.second.meta.last_used < lru_last_used) {
|
||||
lru_model_name = m.first;
|
||||
lru_last_used = m.second.meta.last_used;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!lru_model_name.empty() && count_active >= (size_t)base_params.models_max) {
|
||||
SRV_INF("models_max limit reached, removing LRU name=%s\n", lru_model_name.c_str());
|
||||
unload(lru_model_name);
|
||||
}
|
||||
}
|
||||
|
||||
static void add_or_replace_arg(std::vector<std::string> & args, const std::string & key, const std::string & value) {
|
||||
for (size_t i = 0; i < args.size(); i++) {
|
||||
if (args[i] == key && i + 1 < args.size()) {
|
||||
args[i + 1] = value;
|
||||
return;
|
||||
}
|
||||
}
|
||||
// not found, append
|
||||
args.push_back(key);
|
||||
args.push_back(value);
|
||||
}
|
||||
|
||||
void server_models::load(const std::string & name, bool auto_load) {
|
||||
if (!has_model(name)) {
|
||||
throw std::runtime_error("model name=" + name + " is not found");
|
||||
}
|
||||
unload_lru();
|
||||
|
||||
std::lock_guard<std::mutex> lk(mutex);
|
||||
|
||||
auto meta = mapping[name].meta;
|
||||
if (meta.status != SERVER_MODEL_STATUS_UNLOADED) {
|
||||
SRV_INF("model %s is not ready\n", name.c_str());
|
||||
return;
|
||||
}
|
||||
|
||||
// prepare new instance info
|
||||
instance_t inst;
|
||||
inst.meta = meta;
|
||||
inst.meta.port = get_free_port();
|
||||
inst.meta.status = SERVER_MODEL_STATUS_LOADING;
|
||||
inst.meta.last_used = ggml_time_ms();
|
||||
|
||||
if (inst.meta.port <= 0) {
|
||||
throw std::runtime_error("failed to get a port number");
|
||||
}
|
||||
|
||||
inst.subproc = std::make_shared<subprocess_s>();
|
||||
{
|
||||
SRV_INF("spawning server instance with name=%s on port %d\n", inst.meta.name.c_str(), inst.meta.port);
|
||||
|
||||
std::vector<std::string> child_args;
|
||||
if (auto_load && !meta.args.empty()) {
|
||||
child_args = meta.args; // copy previous args
|
||||
} else {
|
||||
child_args = base_args; // copy
|
||||
if (inst.meta.in_cache) {
|
||||
add_or_replace_arg(child_args, "-hf", inst.meta.name);
|
||||
} else {
|
||||
add_or_replace_arg(child_args, "-m", inst.meta.path);
|
||||
if (!inst.meta.path_mmproj.empty()) {
|
||||
add_or_replace_arg(child_args, "--mmproj", inst.meta.path_mmproj);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// set model args
|
||||
add_or_replace_arg(child_args, "--port", std::to_string(inst.meta.port));
|
||||
add_or_replace_arg(child_args, "--alias", inst.meta.name);
|
||||
|
||||
std::vector<std::string> child_env = base_env; // copy
|
||||
child_env.push_back("LLAMA_SERVER_ROUTER_PORT=" + std::to_string(base_params.port));
|
||||
|
||||
SRV_INF("%s", "spawning server instance with args:\n");
|
||||
for (const auto & arg : child_args) {
|
||||
SRV_INF(" %s\n", arg.c_str());
|
||||
}
|
||||
inst.meta.args = child_args; // save for debugging
|
||||
|
||||
std::vector<char *> argv = to_char_ptr_array(child_args);
|
||||
std::vector<char *> envp = to_char_ptr_array(child_env);
|
||||
|
||||
int options = subprocess_option_no_window | subprocess_option_combined_stdout_stderr;
|
||||
int result = subprocess_create_ex(argv.data(), options, envp.data(), inst.subproc.get());
|
||||
if (result != 0) {
|
||||
throw std::runtime_error("failed to spawn server instance");
|
||||
}
|
||||
|
||||
inst.stdin_file = subprocess_stdin(inst.subproc.get());
|
||||
}
|
||||
|
||||
// start a thread to manage the child process
|
||||
// captured variables are guaranteed to be destroyed only after the thread is joined
|
||||
inst.th = std::thread([this, name, child_proc = inst.subproc, port = inst.meta.port]() {
|
||||
// read stdout/stderr and forward to main server log
|
||||
FILE * p_stdout_stderr = subprocess_stdout(child_proc.get());
|
||||
if (p_stdout_stderr) {
|
||||
char buffer[4096];
|
||||
while (fgets(buffer, sizeof(buffer), p_stdout_stderr) != nullptr) {
|
||||
LOG("[%5d] %s", port, buffer);
|
||||
}
|
||||
} else {
|
||||
SRV_ERR("failed to get stdout/stderr of child process for name=%s\n", name.c_str());
|
||||
}
|
||||
// we reach here when the child process exits
|
||||
int exit_code = 0;
|
||||
subprocess_join(child_proc.get(), &exit_code);
|
||||
subprocess_destroy(child_proc.get());
|
||||
// update PID and status
|
||||
{
|
||||
std::lock_guard<std::mutex> lk(mutex);
|
||||
auto it = mapping.find(name);
|
||||
if (it != mapping.end()) {
|
||||
auto & meta = it->second.meta;
|
||||
meta.exit_code = exit_code;
|
||||
meta.status = SERVER_MODEL_STATUS_UNLOADED;
|
||||
}
|
||||
cv.notify_all();
|
||||
}
|
||||
SRV_INF("instance name=%s exited with status %d\n", name.c_str(), exit_code);
|
||||
});
|
||||
|
||||
// clean up old process/thread if exists
|
||||
{
|
||||
auto & old_instance = mapping[name];
|
||||
// old process should have exited already, but just in case, we clean it up here
|
||||
if (subprocess_alive(old_instance.subproc.get())) {
|
||||
SRV_WRN("old process for model name=%s is still alive, this is unexpected\n", name.c_str());
|
||||
subprocess_terminate(old_instance.subproc.get()); // force kill
|
||||
}
|
||||
if (old_instance.th.joinable()) {
|
||||
old_instance.th.join();
|
||||
}
|
||||
}
|
||||
|
||||
mapping[name] = std::move(inst);
|
||||
cv.notify_all();
|
||||
}
|
||||
|
||||
static void interrupt_subprocess(FILE * stdin_file) {
|
||||
// because subprocess.h does not provide a way to send SIGINT,
|
||||
// we will send a command to the child process to exit gracefully
|
||||
if (stdin_file) {
|
||||
fprintf(stdin_file, "%s\n", CMD_EXIT);
|
||||
fflush(stdin_file);
|
||||
}
|
||||
}
|
||||
|
||||
void server_models::unload(const std::string & name) {
|
||||
std::lock_guard<std::mutex> lk(mutex);
|
||||
auto it = mapping.find(name);
|
||||
if (it != mapping.end()) {
|
||||
if (it->second.meta.is_active()) {
|
||||
SRV_INF("unloading model instance name=%s\n", name.c_str());
|
||||
interrupt_subprocess(it->second.stdin_file);
|
||||
// status change will be handled by the managing thread
|
||||
} else {
|
||||
SRV_WRN("model instance name=%s is not loaded\n", name.c_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void server_models::unload_all() {
|
||||
std::vector<std::thread> to_join;
|
||||
{
|
||||
std::lock_guard<std::mutex> lk(mutex);
|
||||
for (auto & [name, inst] : mapping) {
|
||||
if (inst.meta.is_active()) {
|
||||
SRV_INF("unloading model instance name=%s\n", name.c_str());
|
||||
interrupt_subprocess(inst.stdin_file);
|
||||
// status change will be handled by the managing thread
|
||||
}
|
||||
// moving the thread to join list to avoid deadlock
|
||||
to_join.push_back(std::move(inst.th));
|
||||
}
|
||||
}
|
||||
for (auto & th : to_join) {
|
||||
if (th.joinable()) {
|
||||
th.join();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void server_models::update_status(const std::string & name, server_model_status status) {
|
||||
// for now, we only allow updating to LOADED status
|
||||
if (status != SERVER_MODEL_STATUS_LOADED) {
|
||||
throw std::runtime_error("invalid status value");
|
||||
}
|
||||
auto meta = get_meta(name);
|
||||
if (meta.has_value()) {
|
||||
meta->status = status;
|
||||
update_meta(name, meta.value());
|
||||
}
|
||||
}
|
||||
|
||||
void server_models::wait_until_loaded(const std::string & name) {
|
||||
std::unique_lock<std::mutex> lk(mutex);
|
||||
cv.wait(lk, [this, &name]() {
|
||||
auto it = mapping.find(name);
|
||||
if (it != mapping.end()) {
|
||||
return it->second.meta.status != SERVER_MODEL_STATUS_LOADING;
|
||||
}
|
||||
return false;
|
||||
});
|
||||
}
|
||||
|
||||
bool server_models::ensure_model_loaded(const std::string & name) {
|
||||
auto meta = get_meta(name);
|
||||
if (!meta.has_value()) {
|
||||
throw std::runtime_error("model name=" + name + " is not found");
|
||||
}
|
||||
if (meta->status == SERVER_MODEL_STATUS_LOADED) {
|
||||
return false; // already loaded
|
||||
}
|
||||
if (meta->status == SERVER_MODEL_STATUS_UNLOADED) {
|
||||
SRV_INF("model name=%s is not loaded, loading...\n", name.c_str());
|
||||
load(name, true);
|
||||
}
|
||||
|
||||
SRV_INF("waiting until model name=%s is fully loaded...\n", name.c_str());
|
||||
wait_until_loaded(name);
|
||||
|
||||
// check final status
|
||||
meta = get_meta(name);
|
||||
if (!meta.has_value() || meta->is_failed()) {
|
||||
throw std::runtime_error("model name=" + name + " failed to load");
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
server_http_res_ptr server_models::proxy_request(const server_http_req & req, const std::string & method, const std::string & name, bool update_last_used) {
|
||||
auto meta = get_meta(name);
|
||||
if (!meta.has_value()) {
|
||||
throw std::runtime_error("model name=" + name + " is not found");
|
||||
}
|
||||
if (meta->status != SERVER_MODEL_STATUS_LOADED) {
|
||||
throw std::invalid_argument("model name=" + name + " is not loaded");
|
||||
}
|
||||
if (update_last_used) {
|
||||
std::unique_lock<std::mutex> lk(mutex);
|
||||
mapping[name].meta.last_used = ggml_time_ms();
|
||||
}
|
||||
SRV_INF("proxying request to model %s on port %d\n", name.c_str(), meta->port);
|
||||
auto proxy = std::make_unique<server_http_proxy>(
|
||||
method,
|
||||
base_params.hostname,
|
||||
meta->port,
|
||||
req.path,
|
||||
req.headers,
|
||||
req.body,
|
||||
req.should_stop);
|
||||
return proxy;
|
||||
}
|
||||
|
||||
std::thread server_models::setup_child_server(const common_params & base_params, int router_port, const std::string & name, std::function<void(int)> & shutdown_handler) {
|
||||
// send a notification to the router server that a model instance is ready
|
||||
// TODO @ngxson : use HTTP client from libcommon
|
||||
httplib::Client cli(base_params.hostname, router_port);
|
||||
cli.set_connection_timeout(0, 200000); // 200 milliseconds
|
||||
|
||||
httplib::Request req;
|
||||
req.method = "POST";
|
||||
req.path = "/models/status";
|
||||
req.set_header("Content-Type", "application/json");
|
||||
if (!base_params.api_keys.empty()) {
|
||||
req.set_header("Authorization", "Bearer " + base_params.api_keys[0]);
|
||||
}
|
||||
|
||||
json body;
|
||||
body["model"] = name;
|
||||
body["value"] = server_model_status_to_string(SERVER_MODEL_STATUS_LOADED);
|
||||
req.body = body.dump();
|
||||
|
||||
SRV_INF("notifying router server (port=%d) that model %s is ready\n", router_port, name.c_str());
|
||||
auto result = cli.send(std::move(req));
|
||||
if (result.error() != httplib::Error::Success) {
|
||||
auto err_str = httplib::to_string(result.error());
|
||||
SRV_ERR("failed to notify router server: %s\n", err_str.c_str());
|
||||
exit(1); // force exit
|
||||
}
|
||||
|
||||
// setup thread for monitoring stdin
|
||||
return std::thread([shutdown_handler]() {
|
||||
// wait for EOF on stdin
|
||||
SRV_INF("%s", "child server monitoring thread started, waiting for EOF on stdin...\n");
|
||||
bool eof = false;
|
||||
while (true) {
|
||||
std::string line;
|
||||
if (!std::getline(std::cin, line)) {
|
||||
// EOF detected, that means the router server is unexpectedly exit or killed
|
||||
eof = true;
|
||||
break;
|
||||
}
|
||||
if (line.find(CMD_EXIT) != std::string::npos) {
|
||||
SRV_INF("%s", "exit command received, exiting...\n");
|
||||
shutdown_handler(0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (eof) {
|
||||
SRV_INF("%s", "EOF on stdin detected, forcing shutdown...\n");
|
||||
exit(1);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
|
||||
//
|
||||
// server_models_routes
|
||||
//
|
||||
|
||||
static void res_ok(std::unique_ptr<server_http_res> & res, const json & response_data) {
|
||||
res->status = 200;
|
||||
res->data = safe_json_to_str(response_data);
|
||||
}
|
||||
|
||||
static void res_error(std::unique_ptr<server_http_res> & res, const json & error_data) {
|
||||
res->status = json_value(error_data, "code", 500);
|
||||
res->data = safe_json_to_str({{ "error", error_data }});
|
||||
}
|
||||
|
||||
static bool router_validate_model(const std::string & name, server_models & models, bool models_autoload, std::unique_ptr<server_http_res> & res) {
|
||||
if (name.empty()) {
|
||||
res_error(res, format_error_response("model name is missing from the request", ERROR_TYPE_INVALID_REQUEST));
|
||||
return false;
|
||||
}
|
||||
auto meta = models.get_meta(name);
|
||||
if (!meta.has_value()) {
|
||||
res_error(res, format_error_response("model not found", ERROR_TYPE_INVALID_REQUEST));
|
||||
return false;
|
||||
}
|
||||
if (models_autoload) {
|
||||
models.ensure_model_loaded(name);
|
||||
} else {
|
||||
if (meta->status != SERVER_MODEL_STATUS_LOADED) {
|
||||
res_error(res, format_error_response("model is not loaded", ERROR_TYPE_INVALID_REQUEST));
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool is_autoload(const common_params & params, const server_http_req & req) {
|
||||
std::string autoload = req.get_param("autoload");
|
||||
if (autoload.empty()) {
|
||||
return params.models_autoload;
|
||||
} else {
|
||||
return autoload == "true" || autoload == "1";
|
||||
}
|
||||
}
|
||||
|
||||
void server_models_routes::init_routes() {
|
||||
this->get_router_props = [this](const server_http_req & req) {
|
||||
std::string name = req.get_param("model");
|
||||
if (name.empty()) {
|
||||
// main instance
|
||||
auto res = std::make_unique<server_http_res>();
|
||||
res_ok(res, {
|
||||
// TODO: add support for this on web UI
|
||||
{"role", "router"},
|
||||
{"max_instances", 4}, // dummy value for testing
|
||||
// this is a dummy response to make sure webui doesn't break
|
||||
{"model_alias", "llama-server"},
|
||||
{"model_path", "none"},
|
||||
{"default_generation_settings", {
|
||||
{"params", json{}},
|
||||
{"n_ctx", 0},
|
||||
}},
|
||||
});
|
||||
return res;
|
||||
}
|
||||
return proxy_get(req);
|
||||
};
|
||||
|
||||
this->proxy_get = [this](const server_http_req & req) {
|
||||
std::string method = "GET";
|
||||
std::string name = req.get_param("model");
|
||||
bool autoload = is_autoload(params, req);
|
||||
auto error_res = std::make_unique<server_http_res>();
|
||||
if (!router_validate_model(name, models, autoload, error_res)) {
|
||||
return error_res;
|
||||
}
|
||||
return models.proxy_request(req, method, name, false);
|
||||
};
|
||||
|
||||
this->proxy_post = [this](const server_http_req & req) {
|
||||
std::string method = "POST";
|
||||
json body = json::parse(req.body);
|
||||
std::string name = json_value(body, "model", std::string());
|
||||
bool autoload = is_autoload(params, req);
|
||||
auto error_res = std::make_unique<server_http_res>();
|
||||
if (!router_validate_model(name, models, autoload, error_res)) {
|
||||
return error_res;
|
||||
}
|
||||
return models.proxy_request(req, method, name, true); // update last usage for POST request only
|
||||
};
|
||||
|
||||
this->get_router_models = [this](const server_http_req &) {
|
||||
auto res = std::make_unique<server_http_res>();
|
||||
json models_json = json::array();
|
||||
auto all_models = models.get_all_meta();
|
||||
std::time_t t = std::time(0);
|
||||
for (const auto & meta : all_models) {
|
||||
json status {
|
||||
{"value", server_model_status_to_string(meta.status)},
|
||||
{"args", meta.args},
|
||||
};
|
||||
if (meta.is_failed()) {
|
||||
status["exit_code"] = meta.exit_code;
|
||||
status["failed"] = true;
|
||||
}
|
||||
models_json.push_back(json {
|
||||
{"id", meta.name},
|
||||
{"object", "model"}, // for OAI-compat
|
||||
{"owned_by", "llamacpp"}, // for OAI-compat
|
||||
{"created", t}, // for OAI-compat
|
||||
{"in_cache", meta.in_cache},
|
||||
{"path", meta.path},
|
||||
{"status", status},
|
||||
// TODO: add other fields, may require reading GGUF metadata
|
||||
});
|
||||
}
|
||||
res_ok(res, {
|
||||
{"data", models_json},
|
||||
{"object", "list"},
|
||||
});
|
||||
return res;
|
||||
};
|
||||
|
||||
this->post_router_models_load = [this](const server_http_req & req) {
|
||||
auto res = std::make_unique<server_http_res>();
|
||||
json body = json::parse(req.body);
|
||||
std::string name = json_value(body, "model", std::string());
|
||||
auto model = models.get_meta(name);
|
||||
if (!model.has_value()) {
|
||||
res_error(res, format_error_response("model is not found", ERROR_TYPE_NOT_FOUND));
|
||||
return res;
|
||||
}
|
||||
if (model->status == SERVER_MODEL_STATUS_LOADED) {
|
||||
res_error(res, format_error_response("model is already loaded", ERROR_TYPE_INVALID_REQUEST));
|
||||
return res;
|
||||
}
|
||||
models.load(name, false);
|
||||
res_ok(res, {{"success", true}});
|
||||
return res;
|
||||
};
|
||||
|
||||
// used by child process to notify the router about status change
|
||||
// TODO @ngxson : maybe implement authentication for this endpoint in the future
|
||||
this->post_router_models_status = [this](const server_http_req & req) {
|
||||
auto res = std::make_unique<server_http_res>();
|
||||
json body = json::parse(req.body);
|
||||
std::string model = json_value(body, "model", std::string());
|
||||
std::string value = json_value(body, "value", std::string());
|
||||
models.update_status(model, server_model_status_from_string(value));
|
||||
res_ok(res, {{"success", true}});
|
||||
return res;
|
||||
};
|
||||
|
||||
this->get_router_models = [this](const server_http_req &) {
|
||||
auto res = std::make_unique<server_http_res>();
|
||||
json models_json = json::array();
|
||||
auto all_models = models.get_all_meta();
|
||||
std::time_t t = std::time(0);
|
||||
for (const auto & meta : all_models) {
|
||||
json status {
|
||||
{"value", server_model_status_to_string(meta.status)},
|
||||
{"args", meta.args},
|
||||
};
|
||||
if (meta.is_failed()) {
|
||||
status["exit_code"] = meta.exit_code;
|
||||
status["failed"] = true;
|
||||
}
|
||||
models_json.push_back(json {
|
||||
{"id", meta.name},
|
||||
{"object", "model"}, // for OAI-compat
|
||||
{"owned_by", "llamacpp"}, // for OAI-compat
|
||||
{"created", t}, // for OAI-compat
|
||||
{"in_cache", meta.in_cache},
|
||||
{"path", meta.path},
|
||||
{"status", status},
|
||||
// TODO: add other fields, may require reading GGUF metadata
|
||||
});
|
||||
}
|
||||
res_ok(res, {
|
||||
{"data", models_json},
|
||||
{"object", "list"},
|
||||
});
|
||||
return res;
|
||||
};
|
||||
|
||||
this->post_router_models_unload = [this](const server_http_req & req) {
|
||||
auto res = std::make_unique<server_http_res>();
|
||||
json body = json::parse(req.body);
|
||||
std::string name = json_value(body, "model", std::string());
|
||||
auto model = models.get_meta(name);
|
||||
if (!model.has_value()) {
|
||||
res_error(res, format_error_response("model is not found", ERROR_TYPE_INVALID_REQUEST));
|
||||
return res;
|
||||
}
|
||||
if (model->status != SERVER_MODEL_STATUS_LOADED) {
|
||||
res_error(res, format_error_response("model is not loaded", ERROR_TYPE_INVALID_REQUEST));
|
||||
return res;
|
||||
}
|
||||
models.unload(name);
|
||||
res_ok(res, {{"success", true}});
|
||||
return res;
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
|
||||
//
|
||||
// server_http_proxy
|
||||
//
|
||||
|
||||
// simple implementation of a pipe
|
||||
// used for streaming data between threads
|
||||
template<typename T>
|
||||
struct pipe_t {
|
||||
std::mutex mutex;
|
||||
std::condition_variable cv;
|
||||
std::queue<T> queue;
|
||||
std::atomic<bool> writer_closed{false};
|
||||
std::atomic<bool> reader_closed{false};
|
||||
void close_write() {
|
||||
writer_closed.store(true, std::memory_order_relaxed);
|
||||
cv.notify_all();
|
||||
}
|
||||
void close_read() {
|
||||
reader_closed.store(true, std::memory_order_relaxed);
|
||||
cv.notify_all();
|
||||
}
|
||||
bool read(T & output, const std::function<bool()> & should_stop) {
|
||||
std::unique_lock<std::mutex> lk(mutex);
|
||||
constexpr auto poll_interval = std::chrono::milliseconds(500);
|
||||
while (true) {
|
||||
if (!queue.empty()) {
|
||||
output = std::move(queue.front());
|
||||
queue.pop();
|
||||
return true;
|
||||
}
|
||||
if (writer_closed.load()) {
|
||||
return false; // clean EOF
|
||||
}
|
||||
if (should_stop()) {
|
||||
close_read(); // signal broken pipe to writer
|
||||
return false; // cancelled / reader no longer alive
|
||||
}
|
||||
cv.wait_for(lk, poll_interval);
|
||||
}
|
||||
}
|
||||
bool write(T && data) {
|
||||
std::lock_guard<std::mutex> lk(mutex);
|
||||
if (reader_closed.load()) {
|
||||
return false; // broken pipe
|
||||
}
|
||||
queue.push(std::move(data));
|
||||
cv.notify_one();
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
server_http_proxy::server_http_proxy(
|
||||
const std::string & method,
|
||||
const std::string & host,
|
||||
int port,
|
||||
const std::string & path,
|
||||
const std::map<std::string, std::string> & headers,
|
||||
const std::string & body,
|
||||
const std::function<bool()> should_stop) {
|
||||
// shared between reader and writer threads
|
||||
auto cli = std::make_shared<httplib::Client>(host, port);
|
||||
auto pipe = std::make_shared<pipe_t<msg_t>>();
|
||||
|
||||
// setup Client
|
||||
cli->set_connection_timeout(0, 200000); // 200 milliseconds
|
||||
this->status = 500; // to be overwritten upon response
|
||||
this->cleanup = [pipe]() {
|
||||
pipe->close_read();
|
||||
pipe->close_write();
|
||||
};
|
||||
|
||||
// wire up the receive end of the pipe
|
||||
this->next = [pipe, should_stop](std::string & out) -> bool {
|
||||
msg_t msg;
|
||||
bool has_next = pipe->read(msg, should_stop);
|
||||
if (!msg.data.empty()) {
|
||||
out = std::move(msg.data);
|
||||
}
|
||||
return has_next; // false if EOF or pipe broken
|
||||
};
|
||||
|
||||
// wire up the HTTP client
|
||||
// note: do NOT capture `this` pointer, as it may be destroyed before the thread ends
|
||||
httplib::ResponseHandler response_handler = [pipe, cli](const httplib::Response & response) {
|
||||
msg_t msg;
|
||||
msg.status = response.status;
|
||||
for (const auto & [key, value] : response.headers) {
|
||||
msg.headers[key] = value;
|
||||
}
|
||||
return pipe->write(std::move(msg)); // send headers first
|
||||
};
|
||||
httplib::ContentReceiverWithProgress content_receiver = [pipe](const char * data, size_t data_length, size_t, size_t) {
|
||||
// send data chunks
|
||||
// returns false if pipe is closed / broken (signal to stop receiving)
|
||||
return pipe->write({{}, 0, std::string(data, data_length)});
|
||||
};
|
||||
|
||||
// prepare the request to destination server
|
||||
httplib::Request req;
|
||||
{
|
||||
req.method = method;
|
||||
req.path = path;
|
||||
for (const auto & [key, value] : headers) {
|
||||
req.set_header(key, value);
|
||||
}
|
||||
req.body = body;
|
||||
req.response_handler = response_handler;
|
||||
req.content_receiver = content_receiver;
|
||||
}
|
||||
|
||||
// start the proxy thread
|
||||
SRV_DBG("start proxy thread %s %s\n", req.method.c_str(), req.path.c_str());
|
||||
this->thread = std::thread([cli, pipe, req]() {
|
||||
auto result = cli->send(std::move(req));
|
||||
if (result.error() != httplib::Error::Success) {
|
||||
auto err_str = httplib::to_string(result.error());
|
||||
SRV_ERR("http client error: %s\n", err_str.c_str());
|
||||
pipe->write({{}, 500, ""}); // header
|
||||
pipe->write({{}, 0, "proxy error: " + err_str}); // body
|
||||
}
|
||||
pipe->close_write(); // signal EOF to reader
|
||||
SRV_DBG("%s", "client request thread ended\n");
|
||||
});
|
||||
this->thread.detach();
|
||||
|
||||
// wait for the first chunk (headers)
|
||||
msg_t header;
|
||||
if (pipe->read(header, should_stop)) {
|
||||
SRV_DBG("%s", "received response headers\n");
|
||||
this->status = header.status;
|
||||
this->headers = header.headers;
|
||||
} else {
|
||||
SRV_DBG("%s", "no response headers received (request cancelled?)\n");
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,174 @@
|
|||
#pragma once
|
||||
|
||||
#include "common.h"
|
||||
#include "server-http.h"
|
||||
|
||||
#include <mutex>
|
||||
#include <condition_variable>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
|
||||
/**
|
||||
* state diagram:
|
||||
*
|
||||
* UNLOADED ──► LOADING ──► LOADED
|
||||
* ▲ │ │
|
||||
* └───failed───┘ │
|
||||
* ▲ │
|
||||
* └────────unloaded─────────┘
|
||||
*/
|
||||
enum server_model_status {
|
||||
// TODO: also add downloading state when the logic is added
|
||||
SERVER_MODEL_STATUS_UNLOADED,
|
||||
SERVER_MODEL_STATUS_LOADING,
|
||||
SERVER_MODEL_STATUS_LOADED
|
||||
};
|
||||
|
||||
static server_model_status server_model_status_from_string(const std::string & status_str) {
|
||||
if (status_str == "unloaded") {
|
||||
return SERVER_MODEL_STATUS_UNLOADED;
|
||||
}
|
||||
if (status_str == "loading") {
|
||||
return SERVER_MODEL_STATUS_LOADING;
|
||||
}
|
||||
if (status_str == "loaded") {
|
||||
return SERVER_MODEL_STATUS_LOADED;
|
||||
}
|
||||
throw std::runtime_error("invalid server model status");
|
||||
}
|
||||
|
||||
static std::string server_model_status_to_string(server_model_status status) {
|
||||
switch (status) {
|
||||
case SERVER_MODEL_STATUS_UNLOADED: return "unloaded";
|
||||
case SERVER_MODEL_STATUS_LOADING: return "loading";
|
||||
case SERVER_MODEL_STATUS_LOADED: return "loaded";
|
||||
default: return "unknown";
|
||||
}
|
||||
}
|
||||
|
||||
struct server_model_meta {
|
||||
std::string name;
|
||||
std::string path;
|
||||
std::string path_mmproj; // only available if in_cache=false
|
||||
bool in_cache = false; // if true, use -hf; use -m otherwise
|
||||
int port = 0;
|
||||
server_model_status status = SERVER_MODEL_STATUS_UNLOADED;
|
||||
int64_t last_used = 0; // for LRU unloading
|
||||
std::vector<std::string> args; // additional args passed to the model instance (used for debugging)
|
||||
int exit_code = 0; // exit code of the model instance process (only valid if status == FAILED)
|
||||
|
||||
bool is_active() const {
|
||||
return status == SERVER_MODEL_STATUS_LOADED || status == SERVER_MODEL_STATUS_LOADING;
|
||||
}
|
||||
|
||||
bool is_failed() const {
|
||||
return status == SERVER_MODEL_STATUS_UNLOADED && exit_code != 0;
|
||||
}
|
||||
};
|
||||
|
||||
struct subprocess_s;
|
||||
|
||||
struct server_models {
|
||||
private:
|
||||
struct instance_t {
|
||||
std::shared_ptr<subprocess_s> subproc; // shared between main thread and monitoring thread
|
||||
std::thread th;
|
||||
server_model_meta meta;
|
||||
FILE * stdin_file = nullptr;
|
||||
};
|
||||
|
||||
std::mutex mutex;
|
||||
std::condition_variable cv;
|
||||
std::map<std::string, instance_t> mapping;
|
||||
|
||||
common_params base_params;
|
||||
std::vector<std::string> base_args;
|
||||
std::vector<std::string> base_env;
|
||||
|
||||
void update_meta(const std::string & name, const server_model_meta & meta);
|
||||
|
||||
// unload least recently used models if the limit is reached
|
||||
void unload_lru();
|
||||
|
||||
public:
|
||||
server_models(const common_params & params, int argc, char ** argv, char ** envp);
|
||||
|
||||
// check if a model instance exists
|
||||
bool has_model(const std::string & name);
|
||||
|
||||
// return a copy of model metadata
|
||||
std::optional<server_model_meta> get_meta(const std::string & name);
|
||||
|
||||
// return a copy of all model metadata
|
||||
std::vector<server_model_meta> get_all_meta();
|
||||
|
||||
// if auto_load is true, load the model with previous args if any
|
||||
void load(const std::string & name, bool auto_load);
|
||||
void unload(const std::string & name);
|
||||
void unload_all();
|
||||
|
||||
// update the status of a model instance
|
||||
void update_status(const std::string & name, server_model_status status);
|
||||
|
||||
// wait until the model instance is fully loaded
|
||||
// return when the model is loaded or failed to load
|
||||
void wait_until_loaded(const std::string & name);
|
||||
|
||||
// load the model if not loaded, otherwise do nothing
|
||||
// return false if model is already loaded; return true otherwise (meta may need to be refreshed)
|
||||
bool ensure_model_loaded(const std::string & name);
|
||||
|
||||
// proxy an HTTP request to the model instance
|
||||
server_http_res_ptr proxy_request(const server_http_req & req, const std::string & method, const std::string & name, bool update_last_used);
|
||||
|
||||
// notify the router server that a model instance is ready
|
||||
// return the monitoring thread (to be joined by the caller)
|
||||
static std::thread setup_child_server(const common_params & base_params, int router_port, const std::string & name, std::function<void(int)> & shutdown_handler);
|
||||
};
|
||||
|
||||
struct server_models_routes {
|
||||
common_params params;
|
||||
server_models models;
|
||||
server_models_routes(const common_params & params, int argc, char ** argv, char ** envp)
|
||||
: params(params), models(params, argc, argv, envp) {
|
||||
init_routes();
|
||||
}
|
||||
|
||||
void init_routes();
|
||||
// handlers using lambda function, so that they can capture `this` without `std::bind`
|
||||
server_http_context::handler_t get_router_props;
|
||||
server_http_context::handler_t proxy_get;
|
||||
server_http_context::handler_t proxy_post;
|
||||
server_http_context::handler_t get_router_models;
|
||||
server_http_context::handler_t post_router_models_load;
|
||||
server_http_context::handler_t post_router_models_status;
|
||||
server_http_context::handler_t post_router_models_unload;
|
||||
};
|
||||
|
||||
/**
|
||||
* A simple HTTP proxy that forwards requests to another server
|
||||
* and relays the responses back.
|
||||
*/
|
||||
struct server_http_proxy : server_http_res {
|
||||
std::function<void()> cleanup = nullptr;
|
||||
public:
|
||||
server_http_proxy(const std::string & method,
|
||||
const std::string & host,
|
||||
int port,
|
||||
const std::string & path,
|
||||
const std::map<std::string, std::string> & headers,
|
||||
const std::string & body,
|
||||
const std::function<bool()> should_stop);
|
||||
~server_http_proxy() {
|
||||
if (cleanup) {
|
||||
cleanup();
|
||||
}
|
||||
}
|
||||
private:
|
||||
std::thread thread;
|
||||
struct msg_t {
|
||||
std::map<std::string, std::string> headers;
|
||||
int status = 0;
|
||||
std::string data;
|
||||
};
|
||||
};
|
||||
|
|
@ -1,5 +1,6 @@
|
|||
#include "server-context.h"
|
||||
#include "server-http.h"
|
||||
#include "server-models.h"
|
||||
|
||||
#include "arg.h"
|
||||
#include "common.h"
|
||||
|
|
@ -47,16 +48,16 @@ static server_http_context::handler_t ex_wrapper(server_http_context::handler_t
|
|||
json error_data = format_error_response(message, ERROR_TYPE_SERVER);
|
||||
res->status = json_value(error_data, "code", 500);
|
||||
res->data = safe_json_to_str({{ "error", error_data }});
|
||||
LOG_WRN("got exception: %s\n", res->data.c_str());
|
||||
SRV_WRN("got exception: %s\n", res->data.c_str());
|
||||
} catch (const std::exception & e) {
|
||||
LOG_ERR("got another exception: %s | while hanlding exception: %s\n", e.what(), message.c_str());
|
||||
SRV_ERR("got another exception: %s | while handling exception: %s\n", e.what(), message.c_str());
|
||||
res->data = "Internal Server Error";
|
||||
}
|
||||
return res;
|
||||
};
|
||||
}
|
||||
|
||||
int main(int argc, char ** argv) {
|
||||
int main(int argc, char ** argv, char ** envp) {
|
||||
// own arguments required by this example
|
||||
common_params params;
|
||||
|
||||
|
|
@ -75,6 +76,11 @@ int main(int argc, char ** argv) {
|
|||
params.kv_unified = true;
|
||||
}
|
||||
|
||||
// for consistency between server router mode and single-model mode, we set the same model name as alias
|
||||
if (params.model_alias.empty() && !params.model.name.empty()) {
|
||||
params.model_alias = params.model.name;
|
||||
}
|
||||
|
||||
common_init();
|
||||
|
||||
// struct that contains llama context and inference
|
||||
|
|
@ -101,6 +107,42 @@ int main(int argc, char ** argv) {
|
|||
// register API routes
|
||||
server_routes routes(params, ctx_server, [&ctx_http]() { return ctx_http.is_ready.load(); });
|
||||
|
||||
bool is_router_server = params.model.path.empty();
|
||||
std::optional<server_models_routes> models_routes{};
|
||||
if (is_router_server) {
|
||||
// setup server instances manager
|
||||
models_routes.emplace(params, argc, argv, envp);
|
||||
|
||||
// proxy handlers
|
||||
// note: routes.get_health stays the same
|
||||
routes.get_metrics = models_routes->proxy_get;
|
||||
routes.post_props = models_routes->proxy_post;
|
||||
routes.get_api_show = models_routes->proxy_get;
|
||||
routes.post_completions = models_routes->proxy_post;
|
||||
routes.post_completions_oai = models_routes->proxy_post;
|
||||
routes.post_chat_completions = models_routes->proxy_post;
|
||||
routes.post_anthropic_messages = models_routes->proxy_post;
|
||||
routes.post_anthropic_count_tokens = models_routes->proxy_post;
|
||||
routes.post_infill = models_routes->proxy_post;
|
||||
routes.post_embeddings = models_routes->proxy_post;
|
||||
routes.post_embeddings_oai = models_routes->proxy_post;
|
||||
routes.post_rerank = models_routes->proxy_post;
|
||||
routes.post_tokenize = models_routes->proxy_post;
|
||||
routes.post_detokenize = models_routes->proxy_post;
|
||||
routes.post_apply_template = models_routes->proxy_post;
|
||||
routes.get_lora_adapters = models_routes->proxy_get;
|
||||
routes.post_lora_adapters = models_routes->proxy_post;
|
||||
routes.get_slots = models_routes->proxy_get;
|
||||
routes.post_slots = models_routes->proxy_post;
|
||||
|
||||
// custom routes for router
|
||||
routes.get_props = models_routes->get_router_props;
|
||||
routes.get_models = models_routes->get_router_models;
|
||||
ctx_http.post("/models/load", ex_wrapper(models_routes->post_router_models_load));
|
||||
ctx_http.post("/models/unload", ex_wrapper(models_routes->post_router_models_unload));
|
||||
ctx_http.post("/models/status", ex_wrapper(models_routes->post_router_models_status));
|
||||
}
|
||||
|
||||
ctx_http.get ("/health", ex_wrapper(routes.get_health)); // public endpoint (no API key check)
|
||||
ctx_http.get ("/v1/health", ex_wrapper(routes.get_health)); // public endpoint (no API key check)
|
||||
ctx_http.get ("/metrics", ex_wrapper(routes.get_metrics));
|
||||
|
|
@ -140,8 +182,33 @@ int main(int argc, char ** argv) {
|
|||
// Start the server
|
||||
//
|
||||
|
||||
std::function<void()> clean_up;
|
||||
|
||||
if (is_router_server) {
|
||||
LOG_INF("%s: starting router server, no model will be loaded in this process\n", __func__);
|
||||
|
||||
clean_up = [&models_routes]() {
|
||||
SRV_INF("%s: cleaning up before exit...\n", __func__);
|
||||
if (models_routes.has_value()) {
|
||||
models_routes->models.unload_all();
|
||||
}
|
||||
llama_backend_free();
|
||||
};
|
||||
|
||||
if (!ctx_http.start()) {
|
||||
clean_up();
|
||||
LOG_ERR("%s: exiting due to HTTP server error\n", __func__);
|
||||
return 1;
|
||||
}
|
||||
ctx_http.is_ready.store(true);
|
||||
|
||||
shutdown_handler = [&](int) {
|
||||
ctx_http.stop();
|
||||
};
|
||||
|
||||
} else {
|
||||
// setup clean up function, to be called before exit
|
||||
auto clean_up = [&ctx_http, &ctx_server]() {
|
||||
clean_up = [&ctx_http, &ctx_server]() {
|
||||
SRV_INF("%s: cleaning up before exit...\n", __func__);
|
||||
ctx_http.stop();
|
||||
ctx_server.terminate();
|
||||
|
|
@ -176,6 +243,7 @@ int main(int argc, char ** argv) {
|
|||
// this will unblock start_loop()
|
||||
ctx_server.terminate();
|
||||
};
|
||||
}
|
||||
|
||||
// TODO: refactor in common/console
|
||||
#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__))
|
||||
|
|
@ -192,16 +260,39 @@ int main(int argc, char ** argv) {
|
|||
SetConsoleCtrlHandler(reinterpret_cast<PHANDLER_ROUTINE>(console_ctrl_handler), true);
|
||||
#endif
|
||||
|
||||
if (is_router_server) {
|
||||
LOG_INF("%s: router server is listening on %s\n", __func__, ctx_http.listening_address.c_str());
|
||||
LOG_INF("%s: NOTE: router mode is experimental\n", __func__);
|
||||
LOG_INF("%s: it is not recommended to use this mode in untrusted environments\n", __func__);
|
||||
if (ctx_http.thread.joinable()) {
|
||||
ctx_http.thread.join(); // keep the main thread alive
|
||||
}
|
||||
|
||||
// when the HTTP server stops, clean up and exit
|
||||
clean_up();
|
||||
} else {
|
||||
LOG_INF("%s: server is listening on %s\n", __func__, ctx_http.listening_address.c_str());
|
||||
LOG_INF("%s: starting the main loop...\n", __func__);
|
||||
// this call blocks the main thread until ctx_server.terminate() is called
|
||||
|
||||
// optionally, notify router server that this instance is ready
|
||||
const char * router_port = std::getenv("LLAMA_SERVER_ROUTER_PORT");
|
||||
std::thread monitor_thread;
|
||||
if (router_port != nullptr) {
|
||||
monitor_thread = server_models::setup_child_server(params, std::atoi(router_port), params.model_alias, shutdown_handler);
|
||||
}
|
||||
|
||||
// this call blocks the main thread until queue_tasks.terminate() is called
|
||||
ctx_server.start_loop();
|
||||
|
||||
clean_up();
|
||||
if (ctx_http.thread.joinable()) {
|
||||
ctx_http.thread.join();
|
||||
}
|
||||
if (monitor_thread.joinable()) {
|
||||
monitor_thread.join();
|
||||
}
|
||||
llama_memory_breakdown_print(ctx_server.get_llama_context());
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,50 @@
|
|||
import pytest
|
||||
from utils import *
|
||||
|
||||
server: ServerProcess
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def create_server():
|
||||
global server
|
||||
server = ServerPreset.router()
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"model,success",
|
||||
[
|
||||
("ggml-org/tinygemma3-GGUF:Q8_0", True),
|
||||
("non-existent/model", False),
|
||||
]
|
||||
)
|
||||
def test_router_chat_completion_stream(model: str, success: bool):
|
||||
# TODO: make sure the model is in cache (ie. ServerProcess.load_all()) before starting the router server
|
||||
global server
|
||||
server.start()
|
||||
content = ""
|
||||
ex: ServerError | None = None
|
||||
try:
|
||||
res = server.make_stream_request("POST", "/chat/completions", data={
|
||||
"model": model,
|
||||
"max_tokens": 16,
|
||||
"messages": [
|
||||
{"role": "user", "content": "hello"},
|
||||
],
|
||||
"stream": True,
|
||||
})
|
||||
for data in res:
|
||||
if data["choices"]:
|
||||
choice = data["choices"][0]
|
||||
if choice["finish_reason"] in ["stop", "length"]:
|
||||
assert "content" not in choice["delta"]
|
||||
else:
|
||||
assert choice["finish_reason"] is None
|
||||
content += choice["delta"]["content"] or ''
|
||||
except ServerError as e:
|
||||
ex = e
|
||||
|
||||
if success:
|
||||
assert ex is None
|
||||
assert len(content) > 0
|
||||
else:
|
||||
assert ex is not None
|
||||
assert content == ""
|
||||
|
|
@ -46,7 +46,7 @@ class ServerProcess:
|
|||
debug: bool = False
|
||||
server_port: int = 8080
|
||||
server_host: str = "127.0.0.1"
|
||||
model_hf_repo: str = "ggml-org/models"
|
||||
model_hf_repo: str | None = "ggml-org/models"
|
||||
model_hf_file: str | None = "tinyllamas/stories260K.gguf"
|
||||
model_alias: str = "tinyllama-2"
|
||||
temperature: float = 0.8
|
||||
|
|
@ -521,9 +521,8 @@ class ServerPreset:
|
|||
server = ServerProcess()
|
||||
server.offline = True # will be downloaded by load_all()
|
||||
# mmproj is already provided by HF registry API
|
||||
server.model_hf_repo = "ggml-org/tinygemma3-GGUF"
|
||||
server.model_hf_file = "tinygemma3-Q8_0.gguf"
|
||||
server.mmproj_url = "https://huggingface.co/ggml-org/tinygemma3-GGUF/resolve/main/mmproj-tinygemma3.gguf"
|
||||
server.model_hf_file = None
|
||||
server.model_hf_repo = "ggml-org/tinygemma3-GGUF:Q8_0"
|
||||
server.model_alias = "tinygemma3"
|
||||
server.n_ctx = 1024
|
||||
server.n_batch = 32
|
||||
|
|
@ -532,6 +531,21 @@ class ServerPreset:
|
|||
server.seed = 42
|
||||
return server
|
||||
|
||||
@staticmethod
|
||||
def router() -> ServerProcess:
|
||||
server = ServerProcess()
|
||||
# router server has no models
|
||||
server.model_file = None
|
||||
server.model_alias = None
|
||||
server.model_hf_repo = None
|
||||
server.model_hf_file = None
|
||||
server.n_ctx = 1024
|
||||
server.n_batch = 16
|
||||
server.n_slots = 1
|
||||
server.n_predict = 16
|
||||
server.seed = 42
|
||||
return server
|
||||
|
||||
|
||||
def parallel_function_calls(function_list: List[Tuple[Callable[..., Any], Tuple[Any, ...]]]) -> List[Any]:
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
import type { StorybookConfig } from '@storybook/sveltekit';
|
||||
|
||||
const config: StorybookConfig = {
|
||||
stories: ['../src/**/*.mdx', '../src/**/*.stories.@(js|ts|svelte)'],
|
||||
stories: ['../tests/stories/**/*.mdx', '../tests/stories/**/*.stories.@(js|ts|svelte)'],
|
||||
addons: [
|
||||
'@storybook/addon-svelte-csf',
|
||||
'@chromatic-com/storybook',
|
||||
|
|
|
|||
|
|
@ -2,65 +2,685 @@
|
|||
|
||||
A modern, feature-rich web interface for llama.cpp built with SvelteKit. This UI provides an intuitive chat interface with advanced file handling, conversation management, and comprehensive model interaction capabilities.
|
||||
|
||||
The WebUI supports two server operation modes:
|
||||
|
||||
- **MODEL mode** - Single model operation (standard llama-server)
|
||||
- **ROUTER mode** - Multi-model operation with dynamic model loading/unloading
|
||||
|
||||
---
|
||||
|
||||
## Table of Contents
|
||||
|
||||
- [Features](#features)
|
||||
- [Getting Started](#getting-started)
|
||||
- [Tech Stack](#tech-stack)
|
||||
- [Build Pipeline](#build-pipeline)
|
||||
- [Architecture](#architecture)
|
||||
- [Data Flows](#data-flows)
|
||||
- [Architectural Patterns](#architectural-patterns)
|
||||
- [Testing](#testing)
|
||||
|
||||
---
|
||||
|
||||
## Features
|
||||
|
||||
- **Modern Chat Interface** - Clean, responsive design with dark/light mode
|
||||
- **File Attachments** - Support for images, text files, PDFs, and audio with rich previews and drag-and-drop support
|
||||
- **Conversation Management** - Create, edit, branch, and search conversations
|
||||
- **Advanced Markdown** - Code highlighting, math formulas (KaTeX), and content blocks
|
||||
- **Reasoning Content** - Support for models with thinking blocks
|
||||
- **Keyboard Shortcuts** - Keyboard navigation (Shift+Ctrl/Cmd+O for new chat, Shift+Ctrl/Cmdt+E for edit conversation, Shift+Ctrl/Cmdt+D for delete conversation, Ctrl/Cmd+K for search, Ctrl/Cmd+V for paste, Ctrl/Cmd+B for opening/collapsing sidebar)
|
||||
- **Request Tracking** - Monitor processing with slots endpoint integration
|
||||
- **UI Testing** - Storybook component library with automated tests
|
||||
### Chat Interface
|
||||
|
||||
## Development
|
||||
- **Streaming responses** with real-time updates
|
||||
- **Reasoning content** - Support for models with thinking/reasoning blocks
|
||||
- **Dark/light theme** with system preference detection
|
||||
- **Responsive design** for desktop and mobile
|
||||
|
||||
Install dependencies:
|
||||
### File Attachments
|
||||
|
||||
- **Images** - JPEG, PNG, GIF, WebP, SVG (with PNG conversion)
|
||||
- **Documents** - PDF (text extraction or image conversion for vision models)
|
||||
- **Audio** - MP3, WAV for audio-capable models
|
||||
- **Text files** - Source code, markdown, and other text formats
|
||||
- **Drag-and-drop** and paste support with rich previews
|
||||
|
||||
### Conversation Management
|
||||
|
||||
- **Branching** - Branch messages conversations at any point by editing messages or regenerating responses, navigate between branches
|
||||
- **Regeneration** - Regenerate responses with optional model switching (ROUTER mode)
|
||||
- **Import/Export** - JSON format for backup and sharing
|
||||
- **Search** - Find conversations by title or content
|
||||
|
||||
### Advanced Rendering
|
||||
|
||||
- **Syntax highlighting** - Code blocks with language detection
|
||||
- **Math formulas** - KaTeX rendering for LaTeX expressions
|
||||
- **Markdown** - Full GFM support with tables, lists, and more
|
||||
|
||||
### Multi-Model Support (ROUTER mode)
|
||||
|
||||
- **Model selector** with Loaded/Available groups
|
||||
- **Automatic loading** - Models load on selection
|
||||
- **Modality validation** - Prevents sending images to non-vision models
|
||||
- **LRU unloading** - Server auto-manages model cache
|
||||
|
||||
### Keyboard Shortcuts
|
||||
|
||||
| Shortcut | Action |
|
||||
| ------------------ | -------------------- |
|
||||
| `Shift+Ctrl/Cmd+O` | New chat |
|
||||
| `Shift+Ctrl/Cmd+E` | Edit conversation |
|
||||
| `Shift+Ctrl/Cmd+D` | Delete conversation |
|
||||
| `Ctrl/Cmd+K` | Search conversations |
|
||||
| `Ctrl/Cmd+B` | Toggle sidebar |
|
||||
|
||||
### Developer Experience
|
||||
|
||||
- **Request tracking** - Monitor token generation with `/slots` endpoint
|
||||
- **Storybook** - Component library with visual testing
|
||||
- **Hot reload** - Instant updates during development
|
||||
|
||||
---
|
||||
|
||||
## Getting Started
|
||||
|
||||
### Prerequisites
|
||||
|
||||
- **Node.js** 18+ (20+ recommended)
|
||||
- **npm** 9+
|
||||
- **llama-server** running locally (for API access)
|
||||
|
||||
### 1. Install Dependencies
|
||||
|
||||
```bash
|
||||
cd tools/server/webui
|
||||
npm install
|
||||
```
|
||||
|
||||
Start the development server + Storybook:
|
||||
### 2. Start llama-server
|
||||
|
||||
In a separate terminal, start the backend server:
|
||||
|
||||
```bash
|
||||
# Single model (MODEL mode)
|
||||
./llama-server -m model.gguf
|
||||
|
||||
# Multi-model (ROUTER mode)
|
||||
./llama-server --model-store /path/to/models
|
||||
```
|
||||
|
||||
### 3. Start Development Servers
|
||||
|
||||
```bash
|
||||
npm run dev
|
||||
```
|
||||
|
||||
This will start both the SvelteKit dev server and Storybook on port 6006.
|
||||
This starts:
|
||||
|
||||
## Building
|
||||
- **Vite dev server** at `http://localhost:5173` - The main WebUI
|
||||
- **Storybook** at `http://localhost:6006` - Component documentation
|
||||
|
||||
Create a production build:
|
||||
The Vite dev server proxies API requests to `http://localhost:8080` (default llama-server port):
|
||||
|
||||
```typescript
|
||||
// vite.config.ts proxy configuration
|
||||
proxy: {
|
||||
'/v1': 'http://localhost:8080',
|
||||
'/props': 'http://localhost:8080',
|
||||
'/slots': 'http://localhost:8080',
|
||||
'/models': 'http://localhost:8080'
|
||||
}
|
||||
```
|
||||
|
||||
### Development Workflow
|
||||
|
||||
1. Open `http://localhost:5173` in your browser
|
||||
2. Make changes to `.svelte`, `.ts`, or `.css` files
|
||||
3. Changes hot-reload instantly
|
||||
4. Use Storybook at `http://localhost:6006` for isolated component development
|
||||
|
||||
---
|
||||
|
||||
## Tech Stack
|
||||
|
||||
| Layer | Technology | Purpose |
|
||||
| ----------------- | ------------------------------- | -------------------------------------------------------- |
|
||||
| **Framework** | SvelteKit + Svelte 5 | Reactive UI with runes (`$state`, `$derived`, `$effect`) |
|
||||
| **UI Components** | shadcn-svelte + bits-ui | Accessible, customizable component library |
|
||||
| **Styling** | TailwindCSS 4 | Utility-first CSS with design tokens |
|
||||
| **Database** | IndexedDB (Dexie) | Client-side storage for conversations and messages |
|
||||
| **Build** | Vite | Fast bundling with static adapter |
|
||||
| **Testing** | Playwright + Vitest + Storybook | E2E, unit, and visual testing |
|
||||
| **Markdown** | remark + rehype | Markdown processing with KaTeX and syntax highlighting |
|
||||
|
||||
### Key Dependencies
|
||||
|
||||
```json
|
||||
{
|
||||
"svelte": "^5.0.0",
|
||||
"bits-ui": "^2.8.11",
|
||||
"dexie": "^4.0.11",
|
||||
"pdfjs-dist": "^5.4.54",
|
||||
"highlight.js": "^11.11.1",
|
||||
"rehype-katex": "^7.0.1"
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Build Pipeline
|
||||
|
||||
### Development Build
|
||||
|
||||
```bash
|
||||
npm run dev
|
||||
```
|
||||
|
||||
Runs Vite in development mode with:
|
||||
|
||||
- Hot Module Replacement (HMR)
|
||||
- Source maps
|
||||
- Proxy to llama-server
|
||||
|
||||
### Production Build
|
||||
|
||||
```bash
|
||||
npm run build
|
||||
```
|
||||
|
||||
The build outputs static files to `../public` directory for deployment with llama.cpp server.
|
||||
The build process:
|
||||
|
||||
## Testing
|
||||
1. **Vite Build** - Bundles all TypeScript, Svelte, and CSS
|
||||
2. **Static Adapter** - Outputs to `../public` (llama-server's static file directory)
|
||||
3. **Post-Build Script** - Cleans up intermediate files
|
||||
4. **Custom Plugin** - Creates `index.html.gz` with:
|
||||
- Inlined favicon as base64
|
||||
- GZIP compression (level 9)
|
||||
- Deterministic output (zeroed timestamps)
|
||||
|
||||
Run the test suite:
|
||||
|
||||
```bash
|
||||
# E2E tests
|
||||
npm run test:e2e
|
||||
|
||||
# Unit tests
|
||||
npm run test:unit
|
||||
|
||||
# UI tests
|
||||
npm run test:ui
|
||||
|
||||
# All tests
|
||||
npm run test
|
||||
```text
|
||||
tools/server/webui/ → build → tools/server/public/
|
||||
├── src/ ├── index.html.gz (served by llama-server)
|
||||
├── static/ └── (favicon inlined)
|
||||
└── ...
|
||||
```
|
||||
|
||||
### SvelteKit Configuration
|
||||
|
||||
```javascript
|
||||
// svelte.config.js
|
||||
adapter: adapter({
|
||||
pages: '../public', // Output directory
|
||||
assets: '../public', // Static assets
|
||||
fallback: 'index.html', // SPA fallback
|
||||
strict: true
|
||||
}),
|
||||
output: {
|
||||
bundleStrategy: 'inline' // Single-file bundle
|
||||
}
|
||||
```
|
||||
|
||||
### Integration with llama-server
|
||||
|
||||
The WebUI is embedded directly into the llama-server binary:
|
||||
|
||||
1. `npm run build` outputs `index.html.gz` to `tools/server/public/`
|
||||
2. llama-server compiles this into the binary at build time
|
||||
3. When accessing `/`, llama-server serves the gzipped HTML
|
||||
4. All assets are inlined (CSS, JS, fonts, favicon)
|
||||
|
||||
This results in a **single portable binary** with the full WebUI included.
|
||||
|
||||
---
|
||||
|
||||
## Architecture
|
||||
|
||||
- **Framework**: SvelteKit with Svelte 5 runes
|
||||
- **Components**: ShadCN UI + bits-ui design system
|
||||
- **Database**: IndexedDB with Dexie for local storage
|
||||
- **Build**: Static adapter for deployment with llama.cpp server
|
||||
- **Testing**: Playwright (E2E) + Vitest (unit) + Storybook (components)
|
||||
The WebUI follows a layered architecture with unidirectional data flow:
|
||||
|
||||
```text
|
||||
Routes → Components → Hooks → Stores → Services → Storage/API
|
||||
```
|
||||
|
||||
### High-Level Architecture
|
||||
|
||||
See: [`docs/architecture/high-level-architecture-simplified.md`](docs/architecture/high-level-architecture-simplified.md)
|
||||
|
||||
```mermaid
|
||||
flowchart TB
|
||||
subgraph Routes["📍 Routes"]
|
||||
R1["/ (Welcome)"]
|
||||
R2["/chat/[id]"]
|
||||
RL["+layout.svelte"]
|
||||
end
|
||||
|
||||
subgraph Components["🧩 Components"]
|
||||
C_Sidebar["ChatSidebar"]
|
||||
C_Screen["ChatScreen"]
|
||||
C_Form["ChatForm"]
|
||||
C_Messages["ChatMessages"]
|
||||
C_ModelsSelector["ModelsSelector"]
|
||||
C_Settings["ChatSettings"]
|
||||
end
|
||||
|
||||
subgraph Stores["🗄️ Stores"]
|
||||
S1["chatStore"]
|
||||
S2["conversationsStore"]
|
||||
S3["modelsStore"]
|
||||
S4["serverStore"]
|
||||
S5["settingsStore"]
|
||||
end
|
||||
|
||||
subgraph Services["⚙️ Services"]
|
||||
SV1["ChatService"]
|
||||
SV2["ModelsService"]
|
||||
SV3["PropsService"]
|
||||
SV4["DatabaseService"]
|
||||
end
|
||||
|
||||
subgraph Storage["💾 Storage"]
|
||||
ST1["IndexedDB"]
|
||||
ST2["LocalStorage"]
|
||||
end
|
||||
|
||||
subgraph APIs["🌐 llama-server"]
|
||||
API1["/v1/chat/completions"]
|
||||
API2["/props"]
|
||||
API3["/models/*"]
|
||||
end
|
||||
|
||||
R1 & R2 --> C_Screen
|
||||
RL --> C_Sidebar
|
||||
C_Screen --> C_Form & C_Messages & C_Settings
|
||||
C_Screen --> S1 & S2
|
||||
C_ModelsSelector --> S3 & S4
|
||||
S1 --> SV1 & SV4
|
||||
S3 --> SV2 & SV3
|
||||
SV4 --> ST1
|
||||
SV1 --> API1
|
||||
SV2 --> API3
|
||||
SV3 --> API2
|
||||
```
|
||||
|
||||
### Layer Breakdown
|
||||
|
||||
#### Routes (`src/routes/`)
|
||||
|
||||
- **`/`** - Welcome screen, creates new conversation
|
||||
- **`/chat/[id]`** - Active chat interface
|
||||
- **`+layout.svelte`** - Sidebar, navigation, global initialization
|
||||
|
||||
#### Components (`src/lib/components/`)
|
||||
|
||||
Components are organized in `app/` (application-specific) and `ui/` (shadcn-svelte primitives).
|
||||
|
||||
**Chat Components** (`app/chat/`):
|
||||
|
||||
| Component | Responsibility |
|
||||
| ------------------ | --------------------------------------------------------------------------- |
|
||||
| `ChatScreen/` | Main chat container, coordinates message list, input form, and attachments |
|
||||
| `ChatForm/` | Message input textarea with file upload, paste handling, keyboard shortcuts |
|
||||
| `ChatMessages/` | Message list with branch navigation, regenerate/continue/edit actions |
|
||||
| `ChatAttachments/` | File attachment previews, drag-and-drop, PDF/image/audio handling |
|
||||
| `ChatSettings/` | Parameter sliders (temperature, top-p, etc.) with server default sync |
|
||||
| `ChatSidebar/` | Conversation list, search, import/export, navigation |
|
||||
|
||||
**Dialog Components** (`app/dialogs/`):
|
||||
|
||||
| Component | Responsibility |
|
||||
| ------------------------------- | -------------------------------------------------------- |
|
||||
| `DialogChatSettings` | Full-screen settings configuration |
|
||||
| `DialogModelInformation` | Model details (context size, modalities, parallel slots) |
|
||||
| `DialogChatAttachmentPreview` | Full preview for images, PDFs (text or page view), code |
|
||||
| `DialogConfirmation` | Generic confirmation for destructive actions |
|
||||
| `DialogConversationTitleUpdate` | Edit conversation title |
|
||||
|
||||
**Server/Model Components** (`app/server/`, `app/models/`):
|
||||
|
||||
| Component | Responsibility |
|
||||
| ------------------- | --------------------------------------------------------- |
|
||||
| `ServerErrorSplash` | Error display when server is unreachable |
|
||||
| `ModelsSelector` | Model dropdown with Loaded/Available groups (ROUTER mode) |
|
||||
|
||||
**Shared UI Components** (`app/misc/`):
|
||||
|
||||
| Component | Responsibility |
|
||||
| -------------------------------- | ---------------------------------------------------------------- |
|
||||
| `MarkdownContent` | Markdown rendering with KaTeX, syntax highlighting, copy buttons |
|
||||
| `SyntaxHighlightedCode` | Code blocks with language detection and highlighting |
|
||||
| `ActionButton`, `ActionDropdown` | Reusable action buttons and menus |
|
||||
| `BadgeModality`, `BadgeInfo` | Status and capability badges |
|
||||
|
||||
#### Hooks (`src/lib/hooks/`)
|
||||
|
||||
- **`useModelChangeValidation`** - Validates model switch against conversation modalities
|
||||
- **`useProcessingState`** - Tracks streaming progress and token generation
|
||||
|
||||
#### Stores (`src/lib/stores/`)
|
||||
|
||||
| Store | Responsibility |
|
||||
| -------------------- | --------------------------------------------------------- |
|
||||
| `chatStore` | Message sending, streaming, abort control, error handling |
|
||||
| `conversationsStore` | CRUD for conversations, message branching, navigation |
|
||||
| `modelsStore` | Model list, selection, loading/unloading (ROUTER) |
|
||||
| `serverStore` | Server properties, role detection, modalities |
|
||||
| `settingsStore` | User preferences, parameter sync with server defaults |
|
||||
|
||||
#### Services (`src/lib/services/`)
|
||||
|
||||
| Service | Responsibility |
|
||||
| ---------------------- | ----------------------------------------------- |
|
||||
| `ChatService` | API calls to`/v1/chat/completions`, SSE parsing |
|
||||
| `ModelsService` | `/models`, `/models/load`, `/models/unload` |
|
||||
| `PropsService` | `/props`, `/props?model=` |
|
||||
| `DatabaseService` | IndexedDB operations via Dexie |
|
||||
| `ParameterSyncService` | Syncs settings with server defaults |
|
||||
|
||||
---
|
||||
|
||||
## Data Flows
|
||||
|
||||
### MODEL Mode (Single Model)
|
||||
|
||||
See: [`docs/flows/data-flow-simplified-model-mode.md`](docs/flows/data-flow-simplified-model-mode.md)
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant User
|
||||
participant UI
|
||||
participant Stores
|
||||
participant DB as IndexedDB
|
||||
participant API as llama-server
|
||||
|
||||
Note over User,API: Initialization
|
||||
UI->>Stores: initialize()
|
||||
Stores->>DB: load conversations
|
||||
Stores->>API: GET /props
|
||||
API-->>Stores: server config
|
||||
Stores->>API: GET /v1/models
|
||||
API-->>Stores: single model (auto-selected)
|
||||
|
||||
Note over User,API: Chat Flow
|
||||
User->>UI: send message
|
||||
Stores->>DB: save user message
|
||||
Stores->>API: POST /v1/chat/completions (stream)
|
||||
loop streaming
|
||||
API-->>Stores: SSE chunks
|
||||
Stores-->>UI: reactive update
|
||||
end
|
||||
Stores->>DB: save assistant message
|
||||
```
|
||||
|
||||
### ROUTER Mode (Multi-Model)
|
||||
|
||||
See: [`docs/flows/data-flow-simplified-router-mode.md`](docs/flows/data-flow-simplified-router-mode.md)
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant User
|
||||
participant UI
|
||||
participant Stores
|
||||
participant API as llama-server
|
||||
|
||||
Note over User,API: Initialization
|
||||
Stores->>API: GET /props
|
||||
API-->>Stores: {role: "router"}
|
||||
Stores->>API: GET /models
|
||||
API-->>Stores: models[] with status
|
||||
|
||||
Note over User,API: Model Selection
|
||||
User->>UI: select model
|
||||
alt model not loaded
|
||||
Stores->>API: POST /models/load
|
||||
loop poll status
|
||||
Stores->>API: GET /models
|
||||
end
|
||||
Stores->>API: GET /props?model=X
|
||||
end
|
||||
Stores->>Stores: validate modalities
|
||||
|
||||
Note over User,API: Chat Flow
|
||||
Stores->>API: POST /v1/chat/completions {model: X}
|
||||
loop streaming
|
||||
API-->>Stores: SSE chunks + model info
|
||||
end
|
||||
```
|
||||
|
||||
### Detailed Flow Diagrams
|
||||
|
||||
| Flow | Description | File |
|
||||
| ------------- | ------------------------------------------ | ----------------------------------------------------------- |
|
||||
| Chat | Message lifecycle, streaming, regeneration | [`chat-flow.md`](docs/flows/chat-flow.md) |
|
||||
| Models | Loading, unloading, modality caching | [`models-flow.md`](docs/flows/models-flow.md) |
|
||||
| Server | Props fetching, role detection | [`server-flow.md`](docs/flows/server-flow.md) |
|
||||
| Conversations | CRUD, branching, import/export | [`conversations-flow.md`](docs/flows/conversations-flow.md) |
|
||||
| Database | IndexedDB schema, operations | [`database-flow.md`](docs/flows/database-flow.md) |
|
||||
| Settings | Parameter sync, user overrides | [`settings-flow.md`](docs/flows/settings-flow.md) |
|
||||
|
||||
---
|
||||
|
||||
## Architectural Patterns
|
||||
|
||||
### 1. Reactive State with Svelte 5 Runes
|
||||
|
||||
All stores use Svelte 5's fine-grained reactivity:
|
||||
|
||||
```typescript
|
||||
// Store with reactive state
|
||||
class ChatStore {
|
||||
#isLoading = $state(false);
|
||||
#currentResponse = $state('');
|
||||
|
||||
// Derived values auto-update
|
||||
get isStreaming() {
|
||||
return $derived(this.#isLoading && this.#currentResponse.length > 0);
|
||||
}
|
||||
}
|
||||
|
||||
// Exported reactive accessors
|
||||
export const isLoading = () => chatStore.isLoading;
|
||||
export const currentResponse = () => chatStore.currentResponse;
|
||||
```
|
||||
|
||||
### 2. Unidirectional Data Flow
|
||||
|
||||
Data flows in one direction, making state predictable:
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
subgraph UI["UI Layer"]
|
||||
A[User Action] --> B[Component]
|
||||
end
|
||||
|
||||
subgraph State["State Layer"]
|
||||
B --> C[Store Method]
|
||||
C --> D[State Update]
|
||||
end
|
||||
|
||||
subgraph IO["I/O Layer"]
|
||||
C --> E[Service]
|
||||
E --> F[API / IndexedDB]
|
||||
F -.->|Response| D
|
||||
end
|
||||
|
||||
D -->|Reactive| B
|
||||
```
|
||||
|
||||
Components dispatch actions to stores, stores coordinate with services for I/O, and state updates reactively propagate back to the UI.
|
||||
|
||||
### 3. Per-Conversation State
|
||||
|
||||
Enables concurrent streaming across multiple conversations:
|
||||
|
||||
```typescript
|
||||
class ChatStore {
|
||||
chatLoadingStates = new Map<string, boolean>();
|
||||
chatStreamingStates = new Map<string, { response: string; messageId: string }>();
|
||||
abortControllers = new Map<string, AbortController>();
|
||||
}
|
||||
```
|
||||
|
||||
### 4. Message Branching with Tree Structure
|
||||
|
||||
Conversations are stored as a tree, not a linear list:
|
||||
|
||||
```typescript
|
||||
interface DatabaseMessage {
|
||||
id: string;
|
||||
parent: string | null; // Points to parent message
|
||||
children: string[]; // List of child message IDs
|
||||
// ...
|
||||
}
|
||||
|
||||
interface DatabaseConversation {
|
||||
currentNode: string; // Currently viewed branch tip
|
||||
// ...
|
||||
}
|
||||
```
|
||||
|
||||
Navigation between branches updates `currentNode` without losing history.
|
||||
|
||||
### 5. Layered Service Architecture
|
||||
|
||||
Stores handle state; services handle I/O:
|
||||
|
||||
```text
|
||||
┌─────────────────┐
|
||||
│ Stores │ Business logic, state management
|
||||
├─────────────────┤
|
||||
│ Services │ API calls, database operations
|
||||
├─────────────────┤
|
||||
│ Storage/API │ IndexedDB, LocalStorage, HTTP
|
||||
└─────────────────┘
|
||||
```
|
||||
|
||||
### 6. Server Role Abstraction
|
||||
|
||||
Single codebase handles both MODEL and ROUTER modes:
|
||||
|
||||
```typescript
|
||||
// serverStore.ts
|
||||
get isRouterMode() {
|
||||
return this.role === ServerRole.ROUTER;
|
||||
}
|
||||
|
||||
// Components conditionally render based on mode
|
||||
{#if isRouterMode()}
|
||||
<ModelsSelector />
|
||||
{/if}
|
||||
```
|
||||
|
||||
### 7. Modality Validation
|
||||
|
||||
Prevents sending attachments to incompatible models:
|
||||
|
||||
```typescript
|
||||
// useModelChangeValidation hook
|
||||
const validate = (modelId: string) => {
|
||||
const modelModalities = modelsStore.getModelModalities(modelId);
|
||||
const conversationModalities = conversationsStore.usedModalities;
|
||||
|
||||
// Check if model supports all used modalities
|
||||
if (conversationModalities.hasImages && !modelModalities.vision) {
|
||||
return { valid: false, reason: 'Model does not support images' };
|
||||
}
|
||||
// ...
|
||||
};
|
||||
```
|
||||
|
||||
### 8. Persistent Storage Strategy
|
||||
|
||||
Data is persisted across sessions using two storage mechanisms:
|
||||
|
||||
```mermaid
|
||||
flowchart TB
|
||||
subgraph Browser["Browser Storage"]
|
||||
subgraph IDB["IndexedDB (Dexie)"]
|
||||
C[Conversations]
|
||||
M[Messages]
|
||||
end
|
||||
subgraph LS["LocalStorage"]
|
||||
S[Settings Config]
|
||||
O[User Overrides]
|
||||
T[Theme Preference]
|
||||
end
|
||||
end
|
||||
|
||||
subgraph Stores["Svelte Stores"]
|
||||
CS[conversationsStore] --> C
|
||||
CS --> M
|
||||
SS[settingsStore] --> S
|
||||
SS --> O
|
||||
SS --> T
|
||||
end
|
||||
```
|
||||
|
||||
- **IndexedDB**: Conversations and messages (large, structured data)
|
||||
- **LocalStorage**: Settings, user parameter overrides, theme (small key-value data)
|
||||
- **Memory only**: Server props, model list (fetched fresh on each session)
|
||||
|
||||
---
|
||||
|
||||
## Testing
|
||||
|
||||
### Test Types
|
||||
|
||||
| Type | Tool | Location | Command |
|
||||
| ------------- | ------------------ | -------------------------------- | ------------------- |
|
||||
| **E2E** | Playwright | `tests/e2e/` | `npm run test:e2e` |
|
||||
| **Unit** | Vitest | `tests/client/`, `tests/server/` | `npm run test:unit` |
|
||||
| **UI/Visual** | Storybook + Vitest | `tests/stories/` | `npm run test:ui` |
|
||||
|
||||
### Running Tests
|
||||
|
||||
```bash
|
||||
# All tests
|
||||
npm run test
|
||||
|
||||
# Individual test suites
|
||||
npm run test:e2e # End-to-end (requires llama-server)
|
||||
npm run test:client # Client-side unit tests
|
||||
npm run test:server # Server-side unit tests
|
||||
npm run test:ui # Storybook visual tests
|
||||
```
|
||||
|
||||
### Storybook Development
|
||||
|
||||
```bash
|
||||
npm run storybook # Start Storybook dev server on :6006
|
||||
npm run build-storybook # Build static Storybook
|
||||
```
|
||||
|
||||
### Linting and Formatting
|
||||
|
||||
```bash
|
||||
npm run lint # Check code style
|
||||
npm run format # Auto-format with Prettier
|
||||
npm run check # TypeScript type checking
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Project Structure
|
||||
|
||||
```text
|
||||
tools/server/webui/
|
||||
├── src/
|
||||
│ ├── lib/
|
||||
│ │ ├── components/ # UI components (app/, ui/)
|
||||
│ │ ├── hooks/ # Svelte hooks
|
||||
│ │ ├── stores/ # State management
|
||||
│ │ ├── services/ # API and database services
|
||||
│ │ ├── types/ # TypeScript interfaces
|
||||
│ │ └── utils/ # Utility functions
|
||||
│ ├── routes/ # SvelteKit routes
|
||||
│ └── styles/ # Global styles
|
||||
├── static/ # Static assets
|
||||
├── tests/ # Test files
|
||||
├── docs/ # Architecture diagrams
|
||||
│ ├── architecture/ # High-level architecture
|
||||
│ └── flows/ # Feature-specific flows
|
||||
└── .storybook/ # Storybook configuration
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Related Documentation
|
||||
|
||||
- [llama.cpp Server README](../README.md) - Full server documentation
|
||||
- [Multimodal Documentation](../../../docs/multimodal.md) - Image and audio support
|
||||
- [Function Calling](../../../docs/function-calling.md) - Tool use capabilities
|
||||
|
|
|
|||
|
|
@ -0,0 +1,102 @@
|
|||
```mermaid
|
||||
flowchart TB
|
||||
subgraph Routes["📍 Routes"]
|
||||
R1["/ (Welcome)"]
|
||||
R2["/chat/[id]"]
|
||||
RL["+layout.svelte"]
|
||||
end
|
||||
|
||||
subgraph Components["🧩 Components"]
|
||||
C_Sidebar["ChatSidebar"]
|
||||
C_Screen["ChatScreen"]
|
||||
C_Form["ChatForm"]
|
||||
C_Messages["ChatMessages"]
|
||||
C_ModelsSelector["ModelsSelector"]
|
||||
C_Settings["ChatSettings"]
|
||||
end
|
||||
|
||||
subgraph Hooks["🪝 Hooks"]
|
||||
H1["useModelChangeValidation"]
|
||||
H2["useProcessingState"]
|
||||
end
|
||||
|
||||
subgraph Stores["🗄️ Stores"]
|
||||
S1["chatStore<br/><i>Chat interactions & streaming</i>"]
|
||||
S2["conversationsStore<br/><i>Conversation data & messages</i>"]
|
||||
S3["modelsStore<br/><i>Model selection & loading</i>"]
|
||||
S4["serverStore<br/><i>Server props & role detection</i>"]
|
||||
S5["settingsStore<br/><i>User configuration</i>"]
|
||||
end
|
||||
|
||||
subgraph Services["⚙️ Services"]
|
||||
SV1["ChatService"]
|
||||
SV2["ModelsService"]
|
||||
SV3["PropsService"]
|
||||
SV4["DatabaseService"]
|
||||
SV5["ParameterSyncService"]
|
||||
end
|
||||
|
||||
subgraph Storage["💾 Storage"]
|
||||
ST1["IndexedDB<br/><i>conversations, messages</i>"]
|
||||
ST2["LocalStorage<br/><i>config, userOverrides</i>"]
|
||||
end
|
||||
|
||||
subgraph APIs["🌐 llama-server API"]
|
||||
API1["/v1/chat/completions"]
|
||||
API2["/props"]
|
||||
API3["/models/*"]
|
||||
API4["/v1/models"]
|
||||
end
|
||||
|
||||
%% Routes → Components
|
||||
R1 & R2 --> C_Screen
|
||||
RL --> C_Sidebar
|
||||
|
||||
%% Component hierarchy
|
||||
C_Screen --> C_Form & C_Messages & C_Settings
|
||||
C_Form & C_Messages --> C_ModelsSelector
|
||||
|
||||
%% Components → Hooks → Stores
|
||||
C_Form & C_Messages --> H1 & H2
|
||||
H1 --> S3 & S4
|
||||
H2 --> S1 & S5
|
||||
|
||||
%% Components → Stores
|
||||
C_Screen --> S1 & S2
|
||||
C_Sidebar --> S2
|
||||
C_ModelsSelector --> S3 & S4
|
||||
C_Settings --> S5
|
||||
|
||||
%% Stores → Services
|
||||
S1 --> SV1 & SV4
|
||||
S2 --> SV4
|
||||
S3 --> SV2 & SV3
|
||||
S4 --> SV3
|
||||
S5 --> SV5
|
||||
|
||||
%% Services → Storage
|
||||
SV4 --> ST1
|
||||
SV5 --> ST2
|
||||
|
||||
%% Services → APIs
|
||||
SV1 --> API1
|
||||
SV2 --> API3 & API4
|
||||
SV3 --> API2
|
||||
|
||||
%% Styling
|
||||
classDef routeStyle fill:#e1f5fe,stroke:#01579b,stroke-width:2px
|
||||
classDef componentStyle fill:#f3e5f5,stroke:#7b1fa2,stroke-width:2px
|
||||
classDef hookStyle fill:#fff8e1,stroke:#ff8f00,stroke-width:2px
|
||||
classDef storeStyle fill:#fff3e0,stroke:#e65100,stroke-width:2px
|
||||
classDef serviceStyle fill:#e8f5e9,stroke:#2e7d32,stroke-width:2px
|
||||
classDef storageStyle fill:#fce4ec,stroke:#c2185b,stroke-width:2px
|
||||
classDef apiStyle fill:#e3f2fd,stroke:#1565c0,stroke-width:2px
|
||||
|
||||
class R1,R2,RL routeStyle
|
||||
class C_Sidebar,C_Screen,C_Form,C_Messages,C_ModelsSelector,C_Settings componentStyle
|
||||
class H1,H2 hookStyle
|
||||
class S1,S2,S3,S4,S5 storeStyle
|
||||
class SV1,SV2,SV3,SV4,SV5 serviceStyle
|
||||
class ST1,ST2 storageStyle
|
||||
class API1,API2,API3,API4 apiStyle
|
||||
```
|
||||
|
|
@ -0,0 +1,269 @@
|
|||
```mermaid
|
||||
flowchart TB
|
||||
subgraph Routes["📍 Routes"]
|
||||
R1["/ (+page.svelte)"]
|
||||
R2["/chat/[id]"]
|
||||
RL["+layout.svelte"]
|
||||
end
|
||||
|
||||
subgraph Components["🧩 Components"]
|
||||
direction TB
|
||||
subgraph LayoutComponents["Layout"]
|
||||
C_Sidebar["ChatSidebar"]
|
||||
C_Screen["ChatScreen"]
|
||||
end
|
||||
subgraph ChatUIComponents["Chat UI"]
|
||||
C_Form["ChatForm"]
|
||||
C_Messages["ChatMessages"]
|
||||
C_Message["ChatMessage"]
|
||||
C_Attach["ChatAttachments"]
|
||||
C_ModelsSelector["ModelsSelector"]
|
||||
C_Settings["ChatSettings"]
|
||||
end
|
||||
end
|
||||
|
||||
subgraph Hooks["🪝 Hooks"]
|
||||
H1["useModelChangeValidation"]
|
||||
H2["useProcessingState"]
|
||||
H3["isMobile"]
|
||||
end
|
||||
|
||||
subgraph Stores["🗄️ Stores"]
|
||||
direction TB
|
||||
subgraph S1["chatStore"]
|
||||
S1State["<b>State:</b><br/>isLoading, currentResponse<br/>errorDialogState<br/>activeProcessingState<br/>chatLoadingStates<br/>chatStreamingStates<br/>abortControllers<br/>processingStates<br/>activeConversationId<br/>isStreamingActive"]
|
||||
S1LoadState["<b>Loading State:</b><br/>setChatLoading()<br/>isChatLoading()<br/>syncLoadingStateForChat()<br/>clearUIState()<br/>isChatLoadingPublic()<br/>getAllLoadingChats()<br/>getAllStreamingChats()"]
|
||||
S1ProcState["<b>Processing State:</b><br/>setActiveProcessingConversation()<br/>getProcessingState()<br/>clearProcessingState()<br/>getActiveProcessingState()<br/>updateProcessingStateFromTimings()<br/>getCurrentProcessingStateSync()<br/>restoreProcessingStateFromMessages()"]
|
||||
S1Stream["<b>Streaming:</b><br/>streamChatCompletion()<br/>startStreaming()<br/>stopStreaming()<br/>stopGeneration()<br/>isStreaming()"]
|
||||
S1Error["<b>Error Handling:</b><br/>showErrorDialog()<br/>dismissErrorDialog()<br/>isAbortError()"]
|
||||
S1Msg["<b>Message Operations:</b><br/>addMessage()<br/>sendMessage()<br/>updateMessage()<br/>deleteMessage()<br/>getDeletionInfo()"]
|
||||
S1Regen["<b>Regeneration:</b><br/>regenerateMessage()<br/>regenerateMessageWithBranching()<br/>continueAssistantMessage()"]
|
||||
S1Edit["<b>Editing:</b><br/>editAssistantMessage()<br/>editUserMessagePreserveResponses()<br/>editMessageWithBranching()"]
|
||||
S1Utils["<b>Utilities:</b><br/>getApiOptions()<br/>parseTimingData()<br/>getOrCreateAbortController()<br/>getConversationModel()"]
|
||||
end
|
||||
subgraph S2["conversationsStore"]
|
||||
S2State["<b>State:</b><br/>conversations<br/>activeConversation<br/>activeMessages<br/>usedModalities<br/>isInitialized<br/>titleUpdateConfirmationCallback"]
|
||||
S2Modal["<b>Modalities:</b><br/>getModalitiesUpToMessage()<br/>calculateModalitiesFromMessages()"]
|
||||
S2Lifecycle["<b>Lifecycle:</b><br/>initialize()<br/>loadConversations()<br/>clearActiveConversation()"]
|
||||
S2ConvCRUD["<b>Conversation CRUD:</b><br/>createConversation()<br/>loadConversation()<br/>deleteConversation()<br/>updateConversationName()<br/>updateConversationTitleWithConfirmation()"]
|
||||
S2MsgMgmt["<b>Message Management:</b><br/>refreshActiveMessages()<br/>addMessageToActive()<br/>updateMessageAtIndex()<br/>findMessageIndex()<br/>sliceActiveMessages()<br/>removeMessageAtIndex()<br/>getConversationMessages()"]
|
||||
S2Nav["<b>Navigation:</b><br/>navigateToSibling()<br/>updateCurrentNode()<br/>updateConversationTimestamp()"]
|
||||
S2Export["<b>Import/Export:</b><br/>downloadConversation()<br/>exportAllConversations()<br/>importConversations()<br/>triggerDownload()"]
|
||||
S2Utils["<b>Utilities:</b><br/>setTitleUpdateConfirmationCallback()"]
|
||||
end
|
||||
subgraph S3["modelsStore"]
|
||||
S3State["<b>State:</b><br/>models, routerModels<br/>selectedModelId<br/>selectedModelName<br/>loading, updating, error<br/>modelLoadingStates<br/>modelPropsCache<br/>modelPropsFetching<br/>propsCacheVersion"]
|
||||
S3Getters["<b>Computed Getters:</b><br/>selectedModel<br/>loadedModelIds<br/>loadingModelIds<br/>singleModelName"]
|
||||
S3Modal["<b>Modalities:</b><br/>getModelModalities()<br/>modelSupportsVision()<br/>modelSupportsAudio()<br/>getModelModalitiesArray()<br/>getModelProps()<br/>updateModelModalities()"]
|
||||
S3Status["<b>Status Queries:</b><br/>isModelLoaded()<br/>isModelOperationInProgress()<br/>getModelStatus()<br/>isModelPropsFetching()"]
|
||||
S3Fetch["<b>Data Fetching:</b><br/>fetch()<br/>fetchRouterModels()<br/>fetchModelProps()<br/>fetchModalitiesForLoadedModels()"]
|
||||
S3Select["<b>Model Selection:</b><br/>selectModelById()<br/>selectModelByName()<br/>clearSelection()<br/>findModelByName()<br/>findModelById()<br/>hasModel()"]
|
||||
S3LoadUnload["<b>Loading/Unloading Models:</b><br/>loadModel()<br/>unloadModel()<br/>ensureModelLoaded()<br/>waitForModelStatus()<br/>pollForModelStatus()"]
|
||||
S3Utils["<b>Utilities:</b><br/>toDisplayName()<br/>clear()"]
|
||||
end
|
||||
subgraph S4["serverStore"]
|
||||
S4State["<b>State:</b><br/>props<br/>loading, error<br/>role<br/>fetchPromise"]
|
||||
S4Getters["<b>Getters:</b><br/>defaultParams<br/>contextSize<br/>isRouterMode<br/>isModelMode"]
|
||||
S4Data["<b>Data Handling:</b><br/>fetch()<br/>getErrorMessage()<br/>clear()"]
|
||||
S4Utils["<b>Utilities:</b><br/>detectRole()"]
|
||||
end
|
||||
subgraph S5["settingsStore"]
|
||||
S5State["<b>State:</b><br/>config<br/>theme<br/>isInitialized<br/>userOverrides"]
|
||||
S5Lifecycle["<b>Lifecycle:</b><br/>initialize()<br/>loadConfig()<br/>saveConfig()<br/>loadTheme()<br/>saveTheme()"]
|
||||
S5Update["<b>Config Updates:</b><br/>updateConfig()<br/>updateMultipleConfig()<br/>updateTheme()"]
|
||||
S5Reset["<b>Reset:</b><br/>resetConfig()<br/>resetTheme()<br/>resetAll()<br/>resetParameterToServerDefault()"]
|
||||
S5Sync["<b>Server Sync:</b><br/>syncWithServerDefaults()<br/>forceSyncWithServerDefaults()"]
|
||||
S5Utils["<b>Utilities:</b><br/>getConfig()<br/>getAllConfig()<br/>getParameterInfo()<br/>getParameterDiff()<br/>getServerDefaults()<br/>clearAllUserOverrides()"]
|
||||
end
|
||||
|
||||
subgraph ReactiveExports["⚡ Reactive Exports"]
|
||||
direction LR
|
||||
subgraph ChatExports["chatStore"]
|
||||
RE1["isLoading()"]
|
||||
RE2["currentResponse()"]
|
||||
RE3["errorDialog()"]
|
||||
RE4["activeProcessingState()"]
|
||||
RE5["isChatStreaming()"]
|
||||
RE6["isChatLoading()"]
|
||||
RE7["getChatStreaming()"]
|
||||
RE8["getAllLoadingChats()"]
|
||||
RE9["getAllStreamingChats()"]
|
||||
end
|
||||
subgraph ConvExports["conversationsStore"]
|
||||
RE10["conversations()"]
|
||||
RE11["activeConversation()"]
|
||||
RE12["activeMessages()"]
|
||||
RE13["isConversationsInitialized()"]
|
||||
RE14["usedModalities()"]
|
||||
end
|
||||
subgraph ModelsExports["modelsStore"]
|
||||
RE15["modelOptions()"]
|
||||
RE16["routerModels()"]
|
||||
RE17["modelsLoading()"]
|
||||
RE18["modelsUpdating()"]
|
||||
RE19["modelsError()"]
|
||||
RE20["selectedModelId()"]
|
||||
RE21["selectedModelName()"]
|
||||
RE22["selectedModelOption()"]
|
||||
RE23["loadedModelIds()"]
|
||||
RE24["loadingModelIds()"]
|
||||
RE25["propsCacheVersion()"]
|
||||
RE26["singleModelName()"]
|
||||
end
|
||||
subgraph ServerExports["serverStore"]
|
||||
RE27["serverProps()"]
|
||||
RE28["serverLoading()"]
|
||||
RE29["serverError()"]
|
||||
RE30["serverRole()"]
|
||||
RE31["defaultParams()"]
|
||||
RE32["contextSize()"]
|
||||
RE33["isRouterMode()"]
|
||||
RE34["isModelMode()"]
|
||||
end
|
||||
subgraph SettingsExports["settingsStore"]
|
||||
RE35["config()"]
|
||||
RE36["theme()"]
|
||||
RE37["isInitialized()"]
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
subgraph Services["⚙️ Services"]
|
||||
direction TB
|
||||
subgraph SV1["ChatService"]
|
||||
SV1Msg["<b>Messaging:</b><br/>sendMessage()"]
|
||||
SV1Stream["<b>Streaming:</b><br/>handleStreamResponse()<br/>parseSSEChunk()"]
|
||||
SV1Convert["<b>Conversion:</b><br/>convertMessageToChatData()<br/>convertExtraToApiFormat()"]
|
||||
SV1Utils["<b>Utilities:</b><br/>extractReasoningContent()<br/>getServerProps()<br/>getModels()"]
|
||||
end
|
||||
subgraph SV2["ModelsService"]
|
||||
SV2List["<b>Listing:</b><br/>list()<br/>listRouter()"]
|
||||
SV2LoadUnload["<b>Load/Unload:</b><br/>load()<br/>unload()"]
|
||||
SV2Status["<b>Status:</b><br/>isModelLoaded()<br/>isModelLoading()"]
|
||||
end
|
||||
subgraph SV3["PropsService"]
|
||||
SV3Fetch["<b>Fetching:</b><br/>fetch()<br/>fetchForModel()"]
|
||||
end
|
||||
subgraph SV4["DatabaseService"]
|
||||
SV4Conv["<b>Conversations:</b><br/>createConversation()<br/>getConversation()<br/>getAllConversations()<br/>updateConversation()<br/>deleteConversation()"]
|
||||
SV4Msg["<b>Messages:</b><br/>createMessageBranch()<br/>createRootMessage()<br/>getConversationMessages()<br/>updateMessage()<br/>deleteMessage()<br/>deleteMessageCascading()"]
|
||||
SV4Node["<b>Navigation:</b><br/>updateCurrentNode()"]
|
||||
SV4Import["<b>Import:</b><br/>importConversations()"]
|
||||
end
|
||||
subgraph SV5["ParameterSyncService"]
|
||||
SV5Extract["<b>Extraction:</b><br/>extractServerDefaults()"]
|
||||
SV5Merge["<b>Merging:</b><br/>mergeWithServerDefaults()"]
|
||||
SV5Info["<b>Info:</b><br/>getParameterInfo()<br/>canSyncParameter()<br/>getSyncableParameterKeys()<br/>validateServerParameter()"]
|
||||
SV5Diff["<b>Diff:</b><br/>createParameterDiff()"]
|
||||
end
|
||||
end
|
||||
|
||||
subgraph Storage["💾 Storage"]
|
||||
ST1["IndexedDB"]
|
||||
ST2["conversations"]
|
||||
ST3["messages"]
|
||||
ST5["LocalStorage"]
|
||||
ST6["config"]
|
||||
ST7["userOverrides"]
|
||||
end
|
||||
|
||||
subgraph APIs["🌐 llama-server API"]
|
||||
API1["/v1/chat/completions"]
|
||||
API2["/props<br/>/props?model="]
|
||||
API3["/models<br/>/models/load<br/>/models/unload"]
|
||||
API4["/v1/models"]
|
||||
end
|
||||
|
||||
%% Routes render Components
|
||||
R1 --> C_Screen
|
||||
R2 --> C_Screen
|
||||
RL --> C_Sidebar
|
||||
|
||||
%% Component hierarchy
|
||||
C_Screen --> C_Form & C_Messages & C_Settings
|
||||
C_Messages --> C_Message
|
||||
C_Message --> C_ModelsSelector
|
||||
C_Form --> C_ModelsSelector
|
||||
C_Form --> C_Attach
|
||||
C_Message --> C_Attach
|
||||
|
||||
%% Components use Hooks
|
||||
C_Form --> H1
|
||||
C_Message --> H1 & H2
|
||||
C_Screen --> H2
|
||||
|
||||
%% Hooks use Stores
|
||||
H1 --> S3 & S4
|
||||
H2 --> S1 & S5
|
||||
|
||||
%% Components use Stores
|
||||
C_Screen --> S1 & S2
|
||||
C_Messages --> S2
|
||||
C_Message --> S1 & S2 & S3
|
||||
C_Form --> S1 & S3
|
||||
C_Sidebar --> S2
|
||||
C_ModelsSelector --> S3 & S4
|
||||
C_Settings --> S5
|
||||
|
||||
%% Stores export Reactive State
|
||||
S1 -. exports .-> ChatExports
|
||||
S2 -. exports .-> ConvExports
|
||||
S3 -. exports .-> ModelsExports
|
||||
S4 -. exports .-> ServerExports
|
||||
S5 -. exports .-> SettingsExports
|
||||
|
||||
%% Stores use Services
|
||||
S1 --> SV1 & SV4
|
||||
S2 --> SV4
|
||||
S3 --> SV2 & SV3
|
||||
S4 --> SV3
|
||||
S5 --> SV5
|
||||
|
||||
%% Services to Storage
|
||||
SV4 --> ST1
|
||||
ST1 --> ST2 & ST3
|
||||
SV5 --> ST5
|
||||
ST5 --> ST6 & ST7
|
||||
|
||||
%% Services to APIs
|
||||
SV1 --> API1
|
||||
SV2 --> API3 & API4
|
||||
SV3 --> API2
|
||||
|
||||
%% Styling
|
||||
classDef routeStyle fill:#e1f5fe,stroke:#01579b,stroke-width:2px
|
||||
classDef componentStyle fill:#f3e5f5,stroke:#7b1fa2,stroke-width:2px
|
||||
classDef componentGroupStyle fill:#e1bee7,stroke:#7b1fa2,stroke-width:1px
|
||||
classDef storeStyle fill:#fff3e0,stroke:#e65100,stroke-width:2px
|
||||
classDef stateStyle fill:#ffe0b2,stroke:#e65100,stroke-width:1px
|
||||
classDef methodStyle fill:#ffecb3,stroke:#e65100,stroke-width:1px
|
||||
classDef reactiveStyle fill:#fffde7,stroke:#f9a825,stroke-width:1px
|
||||
classDef serviceStyle fill:#e8f5e9,stroke:#2e7d32,stroke-width:2px
|
||||
classDef serviceMStyle fill:#c8e6c9,stroke:#2e7d32,stroke-width:1px
|
||||
classDef storageStyle fill:#fce4ec,stroke:#c2185b,stroke-width:2px
|
||||
classDef apiStyle fill:#e3f2fd,stroke:#1565c0,stroke-width:2px
|
||||
|
||||
class R1,R2,RL routeStyle
|
||||
class C_Sidebar,C_Screen,C_Form,C_Messages,C_Message componentStyle
|
||||
class C_ModelsSelector,C_Settings componentStyle
|
||||
class C_Attach componentStyle
|
||||
class H1,H2,H3 methodStyle
|
||||
class LayoutComponents,ChatUIComponents componentGroupStyle
|
||||
class Hooks storeStyle
|
||||
class S1,S2,S3,S4,S5 storeStyle
|
||||
class S1State,S2State,S3State,S4State,S5State stateStyle
|
||||
class S1Msg,S1Regen,S1Edit,S1Stream,S1LoadState,S1ProcState,S1Error,S1Utils methodStyle
|
||||
class S2Lifecycle,S2ConvCRUD,S2MsgMgmt,S2Nav,S2Modal,S2Export,S2Utils methodStyle
|
||||
class S3Getters,S3Modal,S3Status,S3Fetch,S3Select,S3LoadUnload,S3Utils methodStyle
|
||||
class S4Getters,S4Data,S4Utils methodStyle
|
||||
class S5Lifecycle,S5Update,S5Reset,S5Sync,S5Utils methodStyle
|
||||
class ChatExports,ConvExports,ModelsExports,ServerExports,SettingsExports reactiveStyle
|
||||
class SV1,SV2,SV3,SV4,SV5 serviceStyle
|
||||
class SV1Msg,SV1Stream,SV1Convert,SV1Utils serviceMStyle
|
||||
class SV2List,SV2LoadUnload,SV2Status serviceMStyle
|
||||
class SV3Fetch serviceMStyle
|
||||
class SV4Conv,SV4Msg,SV4Node,SV4Import serviceMStyle
|
||||
class SV5Extract,SV5Merge,SV5Info,SV5Diff serviceMStyle
|
||||
class ST1,ST2,ST3,ST5,ST6,ST7 storageStyle
|
||||
class API1,API2,API3,API4 apiStyle
|
||||
```
|
||||
|
|
@ -0,0 +1,174 @@
|
|||
```mermaid
|
||||
sequenceDiagram
|
||||
participant UI as 🧩 ChatForm / ChatMessage
|
||||
participant chatStore as 🗄️ chatStore
|
||||
participant convStore as 🗄️ conversationsStore
|
||||
participant settingsStore as 🗄️ settingsStore
|
||||
participant ChatSvc as ⚙️ ChatService
|
||||
participant DbSvc as ⚙️ DatabaseService
|
||||
participant API as 🌐 /v1/chat/completions
|
||||
|
||||
Note over chatStore: State:<br/>isLoading, currentResponse<br/>errorDialogState, activeProcessingState<br/>chatLoadingStates (Map)<br/>chatStreamingStates (Map)<br/>abortControllers (Map)<br/>processingStates (Map)
|
||||
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
Note over UI,API: 💬 SEND MESSAGE
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
UI->>chatStore: sendMessage(content, extras)
|
||||
activate chatStore
|
||||
|
||||
chatStore->>chatStore: setChatLoading(convId, true)
|
||||
chatStore->>chatStore: clearChatStreaming(convId)
|
||||
|
||||
alt no active conversation
|
||||
chatStore->>convStore: createConversation()
|
||||
Note over convStore: → see conversations-flow.mmd
|
||||
end
|
||||
|
||||
chatStore->>chatStore: addMessage("user", content, extras)
|
||||
chatStore->>DbSvc: createMessageBranch(userMsg, parentId)
|
||||
chatStore->>convStore: addMessageToActive(userMsg)
|
||||
chatStore->>convStore: updateCurrentNode(userMsg.id)
|
||||
|
||||
chatStore->>chatStore: createAssistantMessage(userMsg.id)
|
||||
chatStore->>DbSvc: createMessageBranch(assistantMsg, userMsg.id)
|
||||
chatStore->>convStore: addMessageToActive(assistantMsg)
|
||||
|
||||
chatStore->>chatStore: streamChatCompletion(messages, assistantMsg)
|
||||
deactivate chatStore
|
||||
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
Note over UI,API: 🌊 STREAMING
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
activate chatStore
|
||||
chatStore->>chatStore: startStreaming()
|
||||
Note right of chatStore: isStreamingActive = true
|
||||
|
||||
chatStore->>chatStore: setActiveProcessingConversation(convId)
|
||||
chatStore->>chatStore: getOrCreateAbortController(convId)
|
||||
Note right of chatStore: abortControllers.set(convId, new AbortController())
|
||||
|
||||
chatStore->>chatStore: getApiOptions()
|
||||
Note right of chatStore: Merge from settingsStore.config:<br/>temperature, max_tokens, top_p, etc.
|
||||
|
||||
chatStore->>ChatSvc: sendMessage(messages, options, signal)
|
||||
activate ChatSvc
|
||||
|
||||
ChatSvc->>ChatSvc: convertMessageToChatData(messages)
|
||||
Note right of ChatSvc: DatabaseMessage[] → ApiChatMessageData[]<br/>Process attachments (images, PDFs, audio)
|
||||
|
||||
ChatSvc->>API: POST /v1/chat/completions
|
||||
Note right of API: {messages, model?, stream: true, ...params}
|
||||
|
||||
loop SSE chunks
|
||||
API-->>ChatSvc: data: {"choices":[{"delta":{...}}]}
|
||||
ChatSvc->>ChatSvc: parseSSEChunk(line)
|
||||
|
||||
alt content chunk
|
||||
ChatSvc-->>chatStore: onChunk(content)
|
||||
chatStore->>chatStore: setChatStreaming(convId, response, msgId)
|
||||
Note right of chatStore: currentResponse = $state(accumulated)
|
||||
chatStore->>convStore: updateMessageAtIndex(idx, {content})
|
||||
end
|
||||
|
||||
alt reasoning chunk
|
||||
ChatSvc-->>chatStore: onReasoningChunk(reasoning)
|
||||
chatStore->>convStore: updateMessageAtIndex(idx, {thinking})
|
||||
end
|
||||
|
||||
alt tool_calls chunk
|
||||
ChatSvc-->>chatStore: onToolCallChunk(toolCalls)
|
||||
chatStore->>convStore: updateMessageAtIndex(idx, {toolCalls})
|
||||
end
|
||||
|
||||
alt model info
|
||||
ChatSvc-->>chatStore: onModel(modelName)
|
||||
chatStore->>chatStore: recordModel(modelName)
|
||||
chatStore->>DbSvc: updateMessage(msgId, {model})
|
||||
end
|
||||
|
||||
alt timings (during stream)
|
||||
ChatSvc-->>chatStore: onTimings(timings, promptProgress)
|
||||
chatStore->>chatStore: updateProcessingStateFromTimings()
|
||||
end
|
||||
|
||||
chatStore-->>UI: reactive $state update
|
||||
end
|
||||
|
||||
API-->>ChatSvc: data: [DONE]
|
||||
ChatSvc-->>chatStore: onComplete(content, reasoning, timings, toolCalls)
|
||||
deactivate ChatSvc
|
||||
|
||||
chatStore->>chatStore: stopStreaming()
|
||||
chatStore->>DbSvc: updateMessage(msgId, {content, timings, model})
|
||||
chatStore->>convStore: updateCurrentNode(msgId)
|
||||
chatStore->>chatStore: setChatLoading(convId, false)
|
||||
chatStore->>chatStore: clearChatStreaming(convId)
|
||||
chatStore->>chatStore: clearProcessingState(convId)
|
||||
deactivate chatStore
|
||||
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
Note over UI,API: ⏹️ STOP GENERATION
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
UI->>chatStore: stopGeneration()
|
||||
activate chatStore
|
||||
chatStore->>chatStore: savePartialResponseIfNeeded(convId)
|
||||
Note right of chatStore: Save currentResponse to DB if non-empty
|
||||
chatStore->>chatStore: abortControllers.get(convId).abort()
|
||||
Note right of chatStore: fetch throws AbortError → caught by isAbortError()
|
||||
chatStore->>chatStore: stopStreaming()
|
||||
chatStore->>chatStore: setChatLoading(convId, false)
|
||||
chatStore->>chatStore: clearChatStreaming(convId)
|
||||
chatStore->>chatStore: clearProcessingState(convId)
|
||||
deactivate chatStore
|
||||
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
Note over UI,API: 🔁 REGENERATE
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
UI->>chatStore: regenerateMessageWithBranching(msgId, model?)
|
||||
activate chatStore
|
||||
chatStore->>convStore: findMessageIndex(msgId)
|
||||
chatStore->>chatStore: Get parent of target message
|
||||
chatStore->>chatStore: createAssistantMessage(parentId)
|
||||
chatStore->>DbSvc: createMessageBranch(newAssistantMsg, parentId)
|
||||
chatStore->>convStore: refreshActiveMessages()
|
||||
Note right of chatStore: Same streaming flow
|
||||
chatStore->>chatStore: streamChatCompletion(...)
|
||||
deactivate chatStore
|
||||
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
Note over UI,API: ➡️ CONTINUE
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
UI->>chatStore: continueAssistantMessage(msgId)
|
||||
activate chatStore
|
||||
chatStore->>chatStore: Get existing content from message
|
||||
chatStore->>chatStore: streamChatCompletion(..., existingContent)
|
||||
Note right of chatStore: Appends to existing message content
|
||||
deactivate chatStore
|
||||
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
Note over UI,API: ✏️ EDIT USER MESSAGE
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
UI->>chatStore: editUserMessagePreserveResponses(msgId, newContent)
|
||||
activate chatStore
|
||||
chatStore->>chatStore: Get parent of target message
|
||||
chatStore->>DbSvc: createMessageBranch(editedMsg, parentId)
|
||||
chatStore->>convStore: refreshActiveMessages()
|
||||
Note right of chatStore: Creates new branch, original preserved
|
||||
deactivate chatStore
|
||||
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
Note over UI,API: ❌ ERROR HANDLING
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
Note over chatStore: On stream error (non-abort):
|
||||
chatStore->>chatStore: showErrorDialog(type, message)
|
||||
Note right of chatStore: errorDialogState = {type: 'timeout'|'server', message}
|
||||
chatStore->>convStore: removeMessageAtIndex(failedMsgIdx)
|
||||
chatStore->>DbSvc: deleteMessage(failedMsgId)
|
||||
```
|
||||
|
|
@ -0,0 +1,155 @@
|
|||
```mermaid
|
||||
sequenceDiagram
|
||||
participant UI as 🧩 ChatSidebar / ChatScreen
|
||||
participant convStore as 🗄️ conversationsStore
|
||||
participant chatStore as 🗄️ chatStore
|
||||
participant DbSvc as ⚙️ DatabaseService
|
||||
participant IDB as 💾 IndexedDB
|
||||
|
||||
Note over convStore: State:<br/>conversations: DatabaseConversation[]<br/>activeConversation: DatabaseConversation | null<br/>activeMessages: DatabaseMessage[]<br/>isInitialized: boolean<br/>usedModalities: $derived({vision, audio})
|
||||
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
Note over UI,IDB: 🚀 INITIALIZATION
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
Note over convStore: Auto-initialized in constructor (browser only)
|
||||
convStore->>convStore: initialize()
|
||||
activate convStore
|
||||
convStore->>convStore: loadConversations()
|
||||
convStore->>DbSvc: getAllConversations()
|
||||
DbSvc->>IDB: SELECT * FROM conversations ORDER BY lastModified DESC
|
||||
IDB-->>DbSvc: Conversation[]
|
||||
DbSvc-->>convStore: conversations
|
||||
convStore->>convStore: conversations = $state(data)
|
||||
convStore->>convStore: isInitialized = true
|
||||
deactivate convStore
|
||||
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
Note over UI,IDB: ➕ CREATE CONVERSATION
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
UI->>convStore: createConversation(name?)
|
||||
activate convStore
|
||||
convStore->>DbSvc: createConversation(name || "New Chat")
|
||||
DbSvc->>IDB: INSERT INTO conversations
|
||||
IDB-->>DbSvc: conversation {id, name, lastModified, currNode: ""}
|
||||
DbSvc-->>convStore: conversation
|
||||
convStore->>convStore: conversations.unshift(conversation)
|
||||
convStore->>convStore: activeConversation = $state(conversation)
|
||||
convStore->>convStore: activeMessages = $state([])
|
||||
deactivate convStore
|
||||
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
Note over UI,IDB: 📂 LOAD CONVERSATION
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
UI->>convStore: loadConversation(convId)
|
||||
activate convStore
|
||||
convStore->>DbSvc: getConversation(convId)
|
||||
DbSvc->>IDB: SELECT * FROM conversations WHERE id = ?
|
||||
IDB-->>DbSvc: conversation
|
||||
convStore->>convStore: activeConversation = $state(conversation)
|
||||
|
||||
convStore->>convStore: refreshActiveMessages()
|
||||
convStore->>DbSvc: getConversationMessages(convId)
|
||||
DbSvc->>IDB: SELECT * FROM messages WHERE convId = ?
|
||||
IDB-->>DbSvc: allMessages[]
|
||||
convStore->>convStore: filterByLeafNodeId(allMessages, currNode)
|
||||
Note right of convStore: Filter to show only current branch path
|
||||
convStore->>convStore: activeMessages = $state(filtered)
|
||||
|
||||
convStore->>chatStore: syncLoadingStateForChat(convId)
|
||||
Note right of chatStore: Sync isLoading/currentResponse if streaming
|
||||
deactivate convStore
|
||||
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
Note over UI,IDB: 🌳 MESSAGE BRANCHING MODEL
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
Note over IDB: Message Tree Structure:<br/>- Each message has parent (null for root)<br/>- Each message has children[] array<br/>- Conversation.currNode points to active leaf<br/>- filterByLeafNodeId() traverses from root to currNode
|
||||
|
||||
rect rgb(240, 240, 255)
|
||||
Note over convStore: Example Branch Structure:
|
||||
Note over convStore: root → user1 → assistant1 → user2 → assistant2a (currNode)<br/> ↘ assistant2b (alt branch)
|
||||
end
|
||||
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
Note over UI,IDB: ↔️ BRANCH NAVIGATION
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
UI->>convStore: navigateToSibling(msgId, direction)
|
||||
activate convStore
|
||||
convStore->>convStore: Find message in activeMessages
|
||||
convStore->>convStore: Get parent message
|
||||
convStore->>convStore: Find sibling in parent.children[]
|
||||
convStore->>convStore: findLeafNode(siblingId, allMessages)
|
||||
Note right of convStore: Navigate to leaf of sibling branch
|
||||
convStore->>convStore: updateCurrentNode(leafId)
|
||||
convStore->>DbSvc: updateCurrentNode(convId, leafId)
|
||||
DbSvc->>IDB: UPDATE conversations SET currNode = ?
|
||||
convStore->>convStore: refreshActiveMessages()
|
||||
deactivate convStore
|
||||
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
Note over UI,IDB: 📝 UPDATE CONVERSATION
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
UI->>convStore: updateConversationName(convId, newName)
|
||||
activate convStore
|
||||
convStore->>DbSvc: updateConversation(convId, {name: newName})
|
||||
DbSvc->>IDB: UPDATE conversations SET name = ?
|
||||
convStore->>convStore: Update in conversations array
|
||||
deactivate convStore
|
||||
|
||||
Note over convStore: Auto-title update (after first response):
|
||||
convStore->>convStore: updateConversationTitleWithConfirmation()
|
||||
convStore->>convStore: titleUpdateConfirmationCallback?()
|
||||
Note right of convStore: Shows dialog if title would change
|
||||
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
Note over UI,IDB: 🗑️ DELETE CONVERSATION
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
UI->>convStore: deleteConversation(convId)
|
||||
activate convStore
|
||||
convStore->>DbSvc: deleteConversation(convId)
|
||||
DbSvc->>IDB: DELETE FROM conversations WHERE id = ?
|
||||
DbSvc->>IDB: DELETE FROM messages WHERE convId = ?
|
||||
convStore->>convStore: conversations.filter(c => c.id !== convId)
|
||||
alt deleted active conversation
|
||||
convStore->>convStore: clearActiveConversation()
|
||||
end
|
||||
deactivate convStore
|
||||
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
Note over UI,IDB: 📊 MODALITY TRACKING
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
Note over convStore: usedModalities = $derived.by(() => {<br/> calculateModalitiesFromMessages(activeMessages)<br/>})
|
||||
|
||||
Note over convStore: Scans activeMessages for attachments:<br/>- IMAGE → vision: true<br/>- PDF (processedAsImages) → vision: true<br/>- AUDIO → audio: true
|
||||
|
||||
UI->>convStore: getModalitiesUpToMessage(msgId)
|
||||
Note right of convStore: Used for regeneration validation<br/>Only checks messages BEFORE target
|
||||
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
Note over UI,IDB: 📤 EXPORT / 📥 IMPORT
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
UI->>convStore: exportAllConversations()
|
||||
activate convStore
|
||||
convStore->>DbSvc: getAllConversations()
|
||||
loop each conversation
|
||||
convStore->>DbSvc: getConversationMessages(convId)
|
||||
end
|
||||
convStore->>convStore: triggerDownload(JSON blob)
|
||||
deactivate convStore
|
||||
|
||||
UI->>convStore: importConversations(file)
|
||||
activate convStore
|
||||
convStore->>convStore: Parse JSON file
|
||||
convStore->>DbSvc: importConversations(parsed)
|
||||
DbSvc->>IDB: Bulk INSERT conversations + messages
|
||||
convStore->>convStore: loadConversations()
|
||||
deactivate convStore
|
||||
```
|
||||
|
|
@ -0,0 +1,45 @@
|
|||
```mermaid
|
||||
%% MODEL Mode Data Flow (single model)
|
||||
%% Detailed flows: ./flows/server-flow.mmd, ./flows/models-flow.mmd, ./flows/chat-flow.mmd
|
||||
|
||||
sequenceDiagram
|
||||
participant User as 👤 User
|
||||
participant UI as 🧩 UI
|
||||
participant Stores as 🗄️ Stores
|
||||
participant DB as 💾 IndexedDB
|
||||
participant API as 🌐 llama-server
|
||||
|
||||
Note over User,API: 🚀 Initialization (see: server-flow.mmd, models-flow.mmd)
|
||||
|
||||
UI->>Stores: initialize()
|
||||
Stores->>DB: load conversations
|
||||
Stores->>API: GET /props
|
||||
API-->>Stores: server config + modalities
|
||||
Stores->>API: GET /v1/models
|
||||
API-->>Stores: single model (auto-selected)
|
||||
|
||||
Note over User,API: 💬 Chat Flow (see: chat-flow.mmd)
|
||||
|
||||
User->>UI: send message
|
||||
UI->>Stores: sendMessage()
|
||||
Stores->>DB: save user message
|
||||
Stores->>API: POST /v1/chat/completions (stream)
|
||||
loop streaming
|
||||
API-->>Stores: SSE chunks
|
||||
Stores-->>UI: reactive update
|
||||
end
|
||||
API-->>Stores: done + timings
|
||||
Stores->>DB: save assistant message
|
||||
|
||||
Note over User,API: 🔁 Regenerate
|
||||
|
||||
User->>UI: regenerate
|
||||
Stores->>DB: create message branch
|
||||
Note right of Stores: same streaming flow
|
||||
|
||||
Note over User,API: ⏹️ Stop
|
||||
|
||||
User->>UI: stop
|
||||
Stores->>Stores: abort stream
|
||||
Stores->>DB: save partial response
|
||||
```
|
||||
|
|
@ -0,0 +1,77 @@
|
|||
```mermaid
|
||||
%% ROUTER Mode Data Flow (multi-model)
|
||||
%% Detailed flows: ./flows/server-flow.mmd, ./flows/models-flow.mmd, ./flows/chat-flow.mmd
|
||||
|
||||
sequenceDiagram
|
||||
participant User as 👤 User
|
||||
participant UI as 🧩 UI
|
||||
participant Stores as 🗄️ Stores
|
||||
participant DB as 💾 IndexedDB
|
||||
participant API as 🌐 llama-server
|
||||
|
||||
Note over User,API: 🚀 Initialization (see: server-flow.mmd, models-flow.mmd)
|
||||
|
||||
UI->>Stores: initialize()
|
||||
Stores->>DB: load conversations
|
||||
Stores->>API: GET /props
|
||||
API-->>Stores: {role: "router"}
|
||||
Stores->>API: GET /models
|
||||
API-->>Stores: models[] with status (loaded/available)
|
||||
loop each loaded model
|
||||
Stores->>API: GET /props?model=X
|
||||
API-->>Stores: modalities (vision/audio)
|
||||
end
|
||||
|
||||
Note over User,API: 🔄 Model Selection (see: models-flow.mmd)
|
||||
|
||||
User->>UI: select model
|
||||
alt model not loaded
|
||||
Stores->>API: POST /models/load
|
||||
loop poll status
|
||||
Stores->>API: GET /models
|
||||
API-->>Stores: check if loaded
|
||||
end
|
||||
Stores->>API: GET /props?model=X
|
||||
API-->>Stores: cache modalities
|
||||
end
|
||||
Stores->>Stores: validate modalities vs conversation
|
||||
alt valid
|
||||
Stores->>Stores: select model
|
||||
else invalid
|
||||
Stores->>API: POST /models/unload
|
||||
UI->>User: show error toast
|
||||
end
|
||||
|
||||
Note over User,API: 💬 Chat Flow (see: chat-flow.mmd)
|
||||
|
||||
User->>UI: send message
|
||||
UI->>Stores: sendMessage()
|
||||
Stores->>DB: save user message
|
||||
Stores->>API: POST /v1/chat/completions {model: X}
|
||||
Note right of API: router forwards to model
|
||||
loop streaming
|
||||
API-->>Stores: SSE chunks + model info
|
||||
Stores-->>UI: reactive update
|
||||
end
|
||||
API-->>Stores: done + timings
|
||||
Stores->>DB: save assistant message + model used
|
||||
|
||||
Note over User,API: 🔁 Regenerate (optional: different model)
|
||||
|
||||
User->>UI: regenerate
|
||||
Stores->>Stores: validate modalities up to this message
|
||||
Stores->>DB: create message branch
|
||||
Note right of Stores: same streaming flow
|
||||
|
||||
Note over User,API: ⏹️ Stop
|
||||
|
||||
User->>UI: stop
|
||||
Stores->>Stores: abort stream
|
||||
Stores->>DB: save partial response
|
||||
|
||||
Note over User,API: 🗑️ LRU Unloading
|
||||
|
||||
Note right of API: Server auto-unloads LRU models<br/>when cache full
|
||||
User->>UI: select unloaded model
|
||||
Note right of Stores: triggers load flow again
|
||||
```
|
||||
|
|
@ -0,0 +1,155 @@
|
|||
```mermaid
|
||||
sequenceDiagram
|
||||
participant Store as 🗄️ Stores
|
||||
participant DbSvc as ⚙️ DatabaseService
|
||||
participant Dexie as 📦 Dexie ORM
|
||||
participant IDB as 💾 IndexedDB
|
||||
|
||||
Note over DbSvc: Stateless service - all methods static<br/>Database: "LlamacppWebui"
|
||||
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
Note over Store,IDB: 📊 SCHEMA
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
rect rgb(240, 248, 255)
|
||||
Note over IDB: conversations table:<br/>id (PK), lastModified, currNode, name
|
||||
end
|
||||
|
||||
rect rgb(255, 248, 240)
|
||||
Note over IDB: messages table:<br/>id (PK), convId (FK), type, role, timestamp,<br/>parent, children[], content, thinking,<br/>toolCalls, extra[], model, timings
|
||||
end
|
||||
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
Note over Store,IDB: 💬 CONVERSATIONS CRUD
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
Store->>DbSvc: createConversation(name)
|
||||
activate DbSvc
|
||||
DbSvc->>DbSvc: Generate UUID
|
||||
DbSvc->>Dexie: db.conversations.add({id, name, lastModified, currNode: ""})
|
||||
Dexie->>IDB: INSERT
|
||||
IDB-->>Dexie: success
|
||||
DbSvc-->>Store: DatabaseConversation
|
||||
deactivate DbSvc
|
||||
|
||||
Store->>DbSvc: getConversation(convId)
|
||||
DbSvc->>Dexie: db.conversations.get(convId)
|
||||
Dexie->>IDB: SELECT WHERE id = ?
|
||||
IDB-->>DbSvc: DatabaseConversation
|
||||
|
||||
Store->>DbSvc: getAllConversations()
|
||||
DbSvc->>Dexie: db.conversations.orderBy('lastModified').reverse().toArray()
|
||||
Dexie->>IDB: SELECT ORDER BY lastModified DESC
|
||||
IDB-->>DbSvc: DatabaseConversation[]
|
||||
|
||||
Store->>DbSvc: updateConversation(convId, updates)
|
||||
DbSvc->>Dexie: db.conversations.update(convId, {...updates, lastModified})
|
||||
Dexie->>IDB: UPDATE
|
||||
|
||||
Store->>DbSvc: deleteConversation(convId)
|
||||
activate DbSvc
|
||||
DbSvc->>Dexie: db.conversations.delete(convId)
|
||||
Dexie->>IDB: DELETE FROM conversations
|
||||
DbSvc->>Dexie: db.messages.where('convId').equals(convId).delete()
|
||||
Dexie->>IDB: DELETE FROM messages WHERE convId = ?
|
||||
deactivate DbSvc
|
||||
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
Note over Store,IDB: 📝 MESSAGES CRUD
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
Store->>DbSvc: createRootMessage(convId)
|
||||
activate DbSvc
|
||||
DbSvc->>DbSvc: Create root message {type: "root", parent: null}
|
||||
DbSvc->>Dexie: db.messages.add(rootMsg)
|
||||
Dexie->>IDB: INSERT
|
||||
DbSvc-->>Store: rootMessageId
|
||||
deactivate DbSvc
|
||||
|
||||
Store->>DbSvc: createMessageBranch(message, parentId)
|
||||
activate DbSvc
|
||||
DbSvc->>DbSvc: Generate UUID for new message
|
||||
DbSvc->>Dexie: db.messages.add({...message, id, parent: parentId})
|
||||
Dexie->>IDB: INSERT message
|
||||
|
||||
alt parentId exists
|
||||
DbSvc->>Dexie: db.messages.get(parentId)
|
||||
Dexie->>IDB: SELECT parent
|
||||
DbSvc->>DbSvc: parent.children.push(newId)
|
||||
DbSvc->>Dexie: db.messages.update(parentId, {children})
|
||||
Dexie->>IDB: UPDATE parent.children
|
||||
end
|
||||
|
||||
DbSvc->>Dexie: db.conversations.update(convId, {currNode: newId})
|
||||
Dexie->>IDB: UPDATE conversation.currNode
|
||||
DbSvc-->>Store: DatabaseMessage
|
||||
deactivate DbSvc
|
||||
|
||||
Store->>DbSvc: getConversationMessages(convId)
|
||||
DbSvc->>Dexie: db.messages.where('convId').equals(convId).toArray()
|
||||
Dexie->>IDB: SELECT WHERE convId = ?
|
||||
IDB-->>DbSvc: DatabaseMessage[]
|
||||
|
||||
Store->>DbSvc: updateMessage(msgId, updates)
|
||||
DbSvc->>Dexie: db.messages.update(msgId, updates)
|
||||
Dexie->>IDB: UPDATE
|
||||
|
||||
Store->>DbSvc: deleteMessage(msgId)
|
||||
DbSvc->>Dexie: db.messages.delete(msgId)
|
||||
Dexie->>IDB: DELETE
|
||||
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
Note over Store,IDB: 🌳 BRANCHING OPERATIONS
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
Store->>DbSvc: updateCurrentNode(convId, nodeId)
|
||||
DbSvc->>Dexie: db.conversations.update(convId, {currNode: nodeId, lastModified})
|
||||
Dexie->>IDB: UPDATE
|
||||
|
||||
Store->>DbSvc: deleteMessageCascading(msgId)
|
||||
activate DbSvc
|
||||
DbSvc->>DbSvc: findDescendantMessages(msgId, allMessages)
|
||||
Note right of DbSvc: Recursively find all children
|
||||
loop each descendant
|
||||
DbSvc->>Dexie: db.messages.delete(descendantId)
|
||||
Dexie->>IDB: DELETE
|
||||
end
|
||||
DbSvc->>Dexie: db.messages.delete(msgId)
|
||||
Dexie->>IDB: DELETE target message
|
||||
deactivate DbSvc
|
||||
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
Note over Store,IDB: 📥 IMPORT
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
Store->>DbSvc: importConversations(data)
|
||||
activate DbSvc
|
||||
loop each conversation in data
|
||||
DbSvc->>DbSvc: Generate new UUIDs (avoid conflicts)
|
||||
DbSvc->>Dexie: db.conversations.add(conversation)
|
||||
Dexie->>IDB: INSERT conversation
|
||||
loop each message
|
||||
DbSvc->>Dexie: db.messages.add(message)
|
||||
Dexie->>IDB: INSERT message
|
||||
end
|
||||
end
|
||||
deactivate DbSvc
|
||||
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
Note over Store,IDB: 🔗 MESSAGE TREE UTILITIES
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
Note over DbSvc: Used by stores (imported from utils):
|
||||
|
||||
rect rgb(240, 255, 240)
|
||||
Note over DbSvc: filterByLeafNodeId(messages, leafId)<br/>→ Returns path from root to leaf<br/>→ Used to display current branch
|
||||
end
|
||||
|
||||
rect rgb(240, 255, 240)
|
||||
Note over DbSvc: findLeafNode(startId, messages)<br/>→ Traverse to deepest child<br/>→ Used for branch navigation
|
||||
end
|
||||
|
||||
rect rgb(240, 255, 240)
|
||||
Note over DbSvc: findDescendantMessages(msgId, messages)<br/>→ Find all children recursively<br/>→ Used for cascading deletes
|
||||
end
|
||||
```
|
||||
|
|
@ -0,0 +1,181 @@
|
|||
```mermaid
|
||||
sequenceDiagram
|
||||
participant UI as 🧩 ModelsSelector
|
||||
participant Hooks as 🪝 useModelChangeValidation
|
||||
participant modelsStore as 🗄️ modelsStore
|
||||
participant serverStore as 🗄️ serverStore
|
||||
participant convStore as 🗄️ conversationsStore
|
||||
participant ModelsSvc as ⚙️ ModelsService
|
||||
participant PropsSvc as ⚙️ PropsService
|
||||
participant API as 🌐 llama-server
|
||||
|
||||
Note over modelsStore: State:<br/>models: ModelOption[]<br/>routerModels: ApiModelDataEntry[]<br/>selectedModelId, selectedModelName<br/>loading, updating, error<br/>modelLoadingStates (Map)<br/>modelPropsCache (Map)<br/>propsCacheVersion
|
||||
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
Note over UI,API: 🚀 INITIALIZATION (MODEL mode)
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
UI->>modelsStore: fetch()
|
||||
activate modelsStore
|
||||
modelsStore->>modelsStore: loading = true
|
||||
|
||||
alt serverStore.props not loaded
|
||||
modelsStore->>serverStore: fetch()
|
||||
Note over serverStore: → see server-flow.mmd
|
||||
end
|
||||
|
||||
modelsStore->>ModelsSvc: list()
|
||||
ModelsSvc->>API: GET /v1/models
|
||||
API-->>ModelsSvc: ApiModelListResponse {data: [model]}
|
||||
|
||||
modelsStore->>modelsStore: models = $state(mapped)
|
||||
Note right of modelsStore: Map to ModelOption[]:<br/>{id, name, model, description, capabilities}
|
||||
|
||||
Note over modelsStore: MODEL mode: Get modalities from serverStore.props
|
||||
modelsStore->>modelsStore: modelPropsCache.set(model.id, serverStore.props)
|
||||
modelsStore->>modelsStore: models[0].modalities = props.modalities
|
||||
|
||||
modelsStore->>modelsStore: Auto-select single model
|
||||
Note right of modelsStore: selectedModelId = models[0].id
|
||||
modelsStore->>modelsStore: loading = false
|
||||
deactivate modelsStore
|
||||
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
Note over UI,API: 🚀 INITIALIZATION (ROUTER mode)
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
UI->>modelsStore: fetch()
|
||||
activate modelsStore
|
||||
modelsStore->>ModelsSvc: list()
|
||||
ModelsSvc->>API: GET /v1/models
|
||||
API-->>ModelsSvc: ApiModelListResponse
|
||||
modelsStore->>modelsStore: models = $state(mapped)
|
||||
deactivate modelsStore
|
||||
|
||||
Note over UI: After models loaded, layout triggers:
|
||||
UI->>modelsStore: fetchRouterModels()
|
||||
activate modelsStore
|
||||
modelsStore->>ModelsSvc: listRouter()
|
||||
ModelsSvc->>API: GET /models
|
||||
API-->>ModelsSvc: ApiRouterModelsListResponse
|
||||
Note right of API: {data: [{id, status, path, in_cache}]}
|
||||
modelsStore->>modelsStore: routerModels = $state(data)
|
||||
|
||||
modelsStore->>modelsStore: fetchModalitiesForLoadedModels()
|
||||
loop each model where status === "loaded"
|
||||
modelsStore->>PropsSvc: fetchForModel(modelId)
|
||||
PropsSvc->>API: GET /props?model={modelId}
|
||||
API-->>PropsSvc: ApiLlamaCppServerProps
|
||||
modelsStore->>modelsStore: modelPropsCache.set(modelId, props)
|
||||
end
|
||||
modelsStore->>modelsStore: propsCacheVersion++
|
||||
deactivate modelsStore
|
||||
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
Note over UI,API: 🔄 MODEL SELECTION (ROUTER mode)
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
UI->>Hooks: useModelChangeValidation({getRequiredModalities, onSuccess?, onValidationFailure?})
|
||||
Note over Hooks: Hook configured per-component:<br/>ChatForm: getRequiredModalities = usedModalities<br/>ChatMessage: getRequiredModalities = getModalitiesUpToMessage(msgId)
|
||||
|
||||
UI->>Hooks: handleModelChange(modelId, modelName)
|
||||
activate Hooks
|
||||
Hooks->>Hooks: previousSelectedModelId = modelsStore.selectedModelId
|
||||
Hooks->>modelsStore: isModelLoaded(modelName)?
|
||||
|
||||
alt model NOT loaded
|
||||
Hooks->>modelsStore: loadModel(modelName)
|
||||
Note over modelsStore: → see LOAD MODEL section below
|
||||
end
|
||||
|
||||
Note over Hooks: Always fetch props (from cache or API)
|
||||
Hooks->>modelsStore: fetchModelProps(modelName)
|
||||
modelsStore-->>Hooks: props
|
||||
|
||||
Hooks->>convStore: getRequiredModalities()
|
||||
convStore-->>Hooks: {vision, audio}
|
||||
|
||||
Hooks->>Hooks: Validate: model.modalities ⊇ required?
|
||||
|
||||
alt validation PASSED
|
||||
Hooks->>modelsStore: selectModelById(modelId)
|
||||
Hooks-->>UI: return true
|
||||
else validation FAILED
|
||||
Hooks->>UI: toast.error("Model doesn't support required modalities")
|
||||
alt model was just loaded
|
||||
Hooks->>modelsStore: unloadModel(modelName)
|
||||
end
|
||||
alt onValidationFailure provided
|
||||
Hooks->>modelsStore: selectModelById(previousSelectedModelId)
|
||||
end
|
||||
Hooks-->>UI: return false
|
||||
end
|
||||
deactivate Hooks
|
||||
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
Note over UI,API: ⬆️ LOAD MODEL (ROUTER mode)
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
modelsStore->>modelsStore: loadModel(modelId)
|
||||
activate modelsStore
|
||||
|
||||
alt already loaded
|
||||
modelsStore-->>modelsStore: return (no-op)
|
||||
end
|
||||
|
||||
modelsStore->>modelsStore: modelLoadingStates.set(modelId, true)
|
||||
modelsStore->>ModelsSvc: load(modelId)
|
||||
ModelsSvc->>API: POST /models/load {model: modelId}
|
||||
API-->>ModelsSvc: {status: "loading"}
|
||||
|
||||
modelsStore->>modelsStore: pollForModelStatus(modelId, LOADED)
|
||||
loop poll every 500ms (max 60 attempts)
|
||||
modelsStore->>modelsStore: fetchRouterModels()
|
||||
modelsStore->>ModelsSvc: listRouter()
|
||||
ModelsSvc->>API: GET /models
|
||||
API-->>ModelsSvc: models[]
|
||||
modelsStore->>modelsStore: getModelStatus(modelId)
|
||||
alt status === LOADED
|
||||
Note right of modelsStore: break loop
|
||||
else status === LOADING
|
||||
Note right of modelsStore: wait 500ms, continue
|
||||
end
|
||||
end
|
||||
|
||||
modelsStore->>modelsStore: updateModelModalities(modelId)
|
||||
modelsStore->>PropsSvc: fetchForModel(modelId)
|
||||
PropsSvc->>API: GET /props?model={modelId}
|
||||
API-->>PropsSvc: props with modalities
|
||||
modelsStore->>modelsStore: modelPropsCache.set(modelId, props)
|
||||
modelsStore->>modelsStore: propsCacheVersion++
|
||||
|
||||
modelsStore->>modelsStore: modelLoadingStates.set(modelId, false)
|
||||
deactivate modelsStore
|
||||
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
Note over UI,API: ⬇️ UNLOAD MODEL (ROUTER mode)
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
modelsStore->>modelsStore: unloadModel(modelId)
|
||||
activate modelsStore
|
||||
modelsStore->>modelsStore: modelLoadingStates.set(modelId, true)
|
||||
modelsStore->>ModelsSvc: unload(modelId)
|
||||
ModelsSvc->>API: POST /models/unload {model: modelId}
|
||||
|
||||
modelsStore->>modelsStore: pollForModelStatus(modelId, UNLOADED)
|
||||
loop poll until unloaded
|
||||
modelsStore->>ModelsSvc: listRouter()
|
||||
ModelsSvc->>API: GET /models
|
||||
end
|
||||
|
||||
modelsStore->>modelsStore: modelLoadingStates.set(modelId, false)
|
||||
deactivate modelsStore
|
||||
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
Note over UI,API: 📊 COMPUTED GETTERS
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
Note over modelsStore: Getters:<br/>- selectedModel: ModelOption | null<br/>- loadedModelIds: string[] (from routerModels)<br/>- loadingModelIds: string[] (from modelLoadingStates)<br/>- singleModelName: string | null (MODEL mode only)
|
||||
|
||||
Note over modelsStore: Modality helpers:<br/>- getModelModalities(modelId): {vision, audio}<br/>- modelSupportsVision(modelId): boolean<br/>- modelSupportsAudio(modelId): boolean
|
||||
```
|
||||
|
|
@ -0,0 +1,76 @@
|
|||
```mermaid
|
||||
sequenceDiagram
|
||||
participant UI as 🧩 +layout.svelte
|
||||
participant serverStore as 🗄️ serverStore
|
||||
participant PropsSvc as ⚙️ PropsService
|
||||
participant API as 🌐 llama-server
|
||||
|
||||
Note over serverStore: State:<br/>props: ApiLlamaCppServerProps | null<br/>loading, error<br/>role: ServerRole | null (MODEL | ROUTER)<br/>fetchPromise (deduplication)
|
||||
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
Note over UI,API: 🚀 INITIALIZATION
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
UI->>serverStore: fetch()
|
||||
activate serverStore
|
||||
|
||||
alt fetchPromise exists (already fetching)
|
||||
serverStore-->>UI: return fetchPromise
|
||||
Note right of serverStore: Deduplicate concurrent calls
|
||||
end
|
||||
|
||||
serverStore->>serverStore: loading = true
|
||||
serverStore->>serverStore: fetchPromise = new Promise()
|
||||
|
||||
serverStore->>PropsSvc: fetch()
|
||||
PropsSvc->>API: GET /props
|
||||
API-->>PropsSvc: ApiLlamaCppServerProps
|
||||
Note right of API: {role, model_path, model_alias,<br/>modalities, default_generation_settings, ...}
|
||||
|
||||
PropsSvc-->>serverStore: props
|
||||
serverStore->>serverStore: props = $state(data)
|
||||
|
||||
serverStore->>serverStore: detectRole(props)
|
||||
Note right of serverStore: role = props.role === "router"<br/> ? ServerRole.ROUTER<br/> : ServerRole.MODEL
|
||||
|
||||
serverStore->>serverStore: loading = false
|
||||
serverStore->>serverStore: fetchPromise = null
|
||||
deactivate serverStore
|
||||
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
Note over UI,API: 📊 COMPUTED GETTERS
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
Note over serverStore: Getters from props:
|
||||
|
||||
rect rgb(240, 255, 240)
|
||||
Note over serverStore: defaultParams<br/>→ props.default_generation_settings.params<br/>(temperature, top_p, top_k, etc.)
|
||||
end
|
||||
|
||||
rect rgb(240, 255, 240)
|
||||
Note over serverStore: contextSize<br/>→ props.default_generation_settings.n_ctx
|
||||
end
|
||||
|
||||
rect rgb(255, 240, 240)
|
||||
Note over serverStore: isRouterMode<br/>→ role === ServerRole.ROUTER
|
||||
end
|
||||
|
||||
rect rgb(255, 240, 240)
|
||||
Note over serverStore: isModelMode<br/>→ role === ServerRole.MODEL
|
||||
end
|
||||
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
Note over UI,API: 🔗 RELATIONSHIPS
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
Note over serverStore: Used by:
|
||||
Note right of serverStore: - modelsStore: role detection, MODEL mode modalities<br/>- settingsStore: syncWithServerDefaults (defaultParams)<br/>- chatStore: contextSize for processing state<br/>- UI components: isRouterMode for conditional rendering
|
||||
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
Note over UI,API: ❌ ERROR HANDLING
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
Note over serverStore: getErrorMessage(): string | null<br/>Returns formatted error for UI display
|
||||
|
||||
Note over serverStore: clear(): void<br/>Resets all state (props, error, loading, role)
|
||||
```
|
||||
|
|
@ -0,0 +1,144 @@
|
|||
```mermaid
|
||||
sequenceDiagram
|
||||
participant UI as 🧩 ChatSettings
|
||||
participant settingsStore as 🗄️ settingsStore
|
||||
participant serverStore as 🗄️ serverStore
|
||||
participant ParamSvc as ⚙️ ParameterSyncService
|
||||
participant LS as 💾 LocalStorage
|
||||
|
||||
Note over settingsStore: State:<br/>config: SettingsConfigType<br/>theme: string ("auto" | "light" | "dark")<br/>isInitialized: boolean<br/>userOverrides: Set<string>
|
||||
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
Note over UI,LS: 🚀 INITIALIZATION
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
Note over settingsStore: Auto-initialized in constructor (browser only)
|
||||
settingsStore->>settingsStore: initialize()
|
||||
activate settingsStore
|
||||
|
||||
settingsStore->>settingsStore: loadConfig()
|
||||
settingsStore->>LS: get("llama-config")
|
||||
LS-->>settingsStore: StoredConfig | null
|
||||
|
||||
alt config exists
|
||||
settingsStore->>settingsStore: Merge with SETTING_CONFIG_DEFAULT
|
||||
Note right of settingsStore: Fill missing keys with defaults
|
||||
else no config
|
||||
settingsStore->>settingsStore: config = SETTING_CONFIG_DEFAULT
|
||||
end
|
||||
|
||||
settingsStore->>LS: get("llama-userOverrides")
|
||||
LS-->>settingsStore: string[] | null
|
||||
settingsStore->>settingsStore: userOverrides = new Set(data)
|
||||
|
||||
settingsStore->>settingsStore: loadTheme()
|
||||
settingsStore->>LS: get("llama-theme")
|
||||
LS-->>settingsStore: theme | "auto"
|
||||
|
||||
settingsStore->>settingsStore: isInitialized = true
|
||||
deactivate settingsStore
|
||||
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
Note over UI,LS: 🔄 SYNC WITH SERVER DEFAULTS
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
Note over UI: Triggered from +layout.svelte when serverStore.props loaded
|
||||
UI->>settingsStore: syncWithServerDefaults()
|
||||
activate settingsStore
|
||||
|
||||
settingsStore->>serverStore: defaultParams
|
||||
serverStore-->>settingsStore: {temperature, top_p, top_k, ...}
|
||||
|
||||
settingsStore->>ParamSvc: extractServerDefaults(defaultParams)
|
||||
ParamSvc-->>settingsStore: Record<string, value>
|
||||
|
||||
settingsStore->>ParamSvc: mergeWithServerDefaults(config, serverDefaults)
|
||||
Note right of ParamSvc: For each syncable parameter:<br/>- If NOT in userOverrides → use server default<br/>- If in userOverrides → keep user value
|
||||
ParamSvc-->>settingsStore: mergedConfig
|
||||
|
||||
settingsStore->>settingsStore: config = mergedConfig
|
||||
settingsStore->>settingsStore: saveConfig()
|
||||
deactivate settingsStore
|
||||
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
Note over UI,LS: ⚙️ UPDATE CONFIG
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
UI->>settingsStore: updateConfig(key, value)
|
||||
activate settingsStore
|
||||
settingsStore->>settingsStore: config[key] = value
|
||||
settingsStore->>settingsStore: userOverrides.add(key)
|
||||
Note right of settingsStore: Mark as user-modified (won't be overwritten by server)
|
||||
settingsStore->>settingsStore: saveConfig()
|
||||
settingsStore->>LS: set("llama-config", config)
|
||||
settingsStore->>LS: set("llama-userOverrides", [...userOverrides])
|
||||
deactivate settingsStore
|
||||
|
||||
UI->>settingsStore: updateMultipleConfig({key1: val1, key2: val2})
|
||||
activate settingsStore
|
||||
Note right of settingsStore: Batch update, single save
|
||||
settingsStore->>settingsStore: For each key: config[key] = value
|
||||
settingsStore->>settingsStore: For each key: userOverrides.add(key)
|
||||
settingsStore->>settingsStore: saveConfig()
|
||||
deactivate settingsStore
|
||||
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
Note over UI,LS: 🔄 RESET
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
UI->>settingsStore: resetConfig()
|
||||
activate settingsStore
|
||||
settingsStore->>settingsStore: config = SETTING_CONFIG_DEFAULT
|
||||
settingsStore->>settingsStore: userOverrides.clear()
|
||||
settingsStore->>settingsStore: syncWithServerDefaults()
|
||||
Note right of settingsStore: Apply server defaults for syncable params
|
||||
settingsStore->>settingsStore: saveConfig()
|
||||
deactivate settingsStore
|
||||
|
||||
UI->>settingsStore: resetParameterToServerDefault(key)
|
||||
activate settingsStore
|
||||
settingsStore->>settingsStore: userOverrides.delete(key)
|
||||
settingsStore->>serverStore: defaultParams[key]
|
||||
settingsStore->>settingsStore: config[key] = serverDefault
|
||||
settingsStore->>settingsStore: saveConfig()
|
||||
deactivate settingsStore
|
||||
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
Note over UI,LS: 🎨 THEME
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
UI->>settingsStore: updateTheme(newTheme)
|
||||
activate settingsStore
|
||||
settingsStore->>settingsStore: theme = newTheme
|
||||
settingsStore->>settingsStore: saveTheme()
|
||||
settingsStore->>LS: set("llama-theme", theme)
|
||||
deactivate settingsStore
|
||||
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
Note over UI,LS: 📊 PARAMETER INFO
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
UI->>settingsStore: getParameterInfo(key)
|
||||
settingsStore->>ParamSvc: getParameterInfo(key, config, serverDefaults, userOverrides)
|
||||
ParamSvc-->>settingsStore: ParameterInfo
|
||||
Note right of ParamSvc: {<br/> currentValue,<br/> serverDefault,<br/> isUserOverride: boolean,<br/> canSync: boolean,<br/> isDifferentFromServer: boolean<br/>}
|
||||
|
||||
UI->>settingsStore: getParameterDiff()
|
||||
settingsStore->>ParamSvc: createParameterDiff(config, serverDefaults, userOverrides)
|
||||
ParamSvc-->>settingsStore: ParameterDiff[]
|
||||
Note right of ParamSvc: Array of parameters where user != server
|
||||
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
Note over UI,LS: 📋 CONFIG CATEGORIES
|
||||
%% ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
Note over settingsStore: Syncable with server (from /props):
|
||||
rect rgb(240, 255, 240)
|
||||
Note over settingsStore: temperature, top_p, top_k, min_p<br/>repeat_penalty, presence_penalty, frequency_penalty<br/>dynatemp_range, dynatemp_exponent<br/>typ_p, xtc_probability, xtc_threshold<br/>dry_multiplier, dry_base, dry_allowed_length, dry_penalty_last_n
|
||||
end
|
||||
|
||||
Note over settingsStore: UI-only (not synced):
|
||||
rect rgb(255, 240, 240)
|
||||
Note over settingsStore: systemMessage, custom (JSON)<br/>showStatistics, enableContinueGeneration<br/>autoMicOnEmpty, disableAutoScroll<br/>apiKey, pdfAsImage, disableReasoningFormat
|
||||
end
|
||||
```
|
||||
|
|
@ -64,7 +64,7 @@
|
|||
"svelte": "^5.0.0",
|
||||
"svelte-check": "^4.0.0",
|
||||
"tailwind-merge": "^3.3.1",
|
||||
"tailwind-variants": "^1.0.0",
|
||||
"tailwind-variants": "^3.2.2",
|
||||
"tailwindcss": "^4.0.0",
|
||||
"tw-animate-css": "^1.3.5",
|
||||
"typescript": "^5.0.0",
|
||||
|
|
@ -8324,31 +8324,23 @@
|
|||
}
|
||||
},
|
||||
"node_modules/tailwind-variants": {
|
||||
"version": "1.0.0",
|
||||
"resolved": "https://registry.npmjs.org/tailwind-variants/-/tailwind-variants-1.0.0.tgz",
|
||||
"integrity": "sha512-2WSbv4ulEEyuBKomOunut65D8UZwxrHoRfYnxGcQNnHqlSCp2+B7Yz2W+yrNDrxRodOXtGD/1oCcKGNBnUqMqA==",
|
||||
"version": "3.2.2",
|
||||
"resolved": "https://registry.npmjs.org/tailwind-variants/-/tailwind-variants-3.2.2.tgz",
|
||||
"integrity": "sha512-Mi4kHeMTLvKlM98XPnK+7HoBPmf4gygdFmqQPaDivc3DpYS6aIY6KiG/PgThrGvii5YZJqRsPz0aPyhoFzmZgg==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"tailwind-merge": "3.0.2"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=16.x",
|
||||
"pnpm": ">=7.x"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"tailwind-merge": ">=3.0.0",
|
||||
"tailwindcss": "*"
|
||||
}
|
||||
},
|
||||
"node_modules/tailwind-variants/node_modules/tailwind-merge": {
|
||||
"version": "3.0.2",
|
||||
"resolved": "https://registry.npmjs.org/tailwind-merge/-/tailwind-merge-3.0.2.tgz",
|
||||
"integrity": "sha512-l7z+OYZ7mu3DTqrL88RiKrKIqO3NcpEO8V/Od04bNpvk0kiIFndGEoqfuzvj4yuhRkHKjRkII2z+KS2HfPcSxw==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"funding": {
|
||||
"type": "github",
|
||||
"url": "https://github.com/sponsors/dcastil"
|
||||
"peerDependenciesMeta": {
|
||||
"tailwind-merge": {
|
||||
"optional": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/tailwindcss": {
|
||||
|
|
|
|||
|
|
@ -66,7 +66,7 @@
|
|||
"svelte": "^5.0.0",
|
||||
"svelte-check": "^4.0.0",
|
||||
"tailwind-merge": "^3.3.1",
|
||||
"tailwind-variants": "^1.0.0",
|
||||
"tailwind-variants": "^3.2.2",
|
||||
"tailwindcss": "^4.0.0",
|
||||
"tw-animate-css": "^1.3.5",
|
||||
"typescript": "^5.0.0",
|
||||
|
|
|
|||
|
|
@ -7,5 +7,5 @@ export default defineConfig({
|
|||
timeout: 120000,
|
||||
reuseExistingServer: false
|
||||
},
|
||||
testDir: 'e2e'
|
||||
testDir: 'tests/e2e'
|
||||
});
|
||||
|
|
|
|||
|
|
@ -49,7 +49,9 @@ trap cleanup SIGINT SIGTERM
|
|||
echo "🚀 Starting development servers..."
|
||||
echo "📝 Note: Make sure to start llama-server separately if needed"
|
||||
cd tools/server/webui
|
||||
storybook dev -p 6006 --ci & vite dev --host 0.0.0.0 &
|
||||
# Use --insecure-http-parser to handle malformed HTTP responses from llama-server
|
||||
# (some responses have both Content-Length and Transfer-Encoding headers)
|
||||
storybook dev -p 6006 --ci & NODE_OPTIONS="--insecure-http-parser" vite dev --host 0.0.0.0 &
|
||||
|
||||
# Wait for all background processes
|
||||
wait
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@
|
|||
--chart-3: oklch(0.398 0.07 227.392);
|
||||
--chart-4: oklch(0.828 0.189 84.429);
|
||||
--chart-5: oklch(0.769 0.188 70.08);
|
||||
--sidebar: oklch(0.985 0 0);
|
||||
--sidebar: oklch(0.987 0 0);
|
||||
--sidebar-foreground: oklch(0.145 0 0);
|
||||
--sidebar-primary: oklch(0.205 0 0);
|
||||
--sidebar-primary-foreground: oklch(0.985 0 0);
|
||||
|
|
@ -66,7 +66,7 @@
|
|||
--chart-3: oklch(0.769 0.188 70.08);
|
||||
--chart-4: oklch(0.627 0.265 303.9);
|
||||
--chart-5: oklch(0.645 0.246 16.439);
|
||||
--sidebar: oklch(0.205 0 0);
|
||||
--sidebar: oklch(0.19 0 0);
|
||||
--sidebar-foreground: oklch(0.985 0 0);
|
||||
--sidebar-primary: oklch(0.488 0.243 264.376);
|
||||
--sidebar-primary-foreground: oklch(0.985 0 0);
|
||||
|
|
|
|||
|
|
@ -4,27 +4,38 @@
|
|||
// Import chat types from dedicated module
|
||||
|
||||
import type {
|
||||
// API types
|
||||
ApiChatCompletionRequest,
|
||||
ApiChatCompletionResponse,
|
||||
ApiChatCompletionStreamChunk,
|
||||
ApiChatCompletionToolCall,
|
||||
ApiChatCompletionToolCallDelta,
|
||||
ApiChatMessageData,
|
||||
ApiChatMessageContentPart,
|
||||
ApiContextSizeError,
|
||||
ApiErrorResponse,
|
||||
ApiLlamaCppServerProps,
|
||||
ApiProcessingState
|
||||
} from '$lib/types/api';
|
||||
|
||||
import type {
|
||||
ApiModelDataEntry,
|
||||
ApiModelListResponse,
|
||||
ApiProcessingState,
|
||||
ApiRouterModelMeta,
|
||||
ApiRouterModelsLoadRequest,
|
||||
ApiRouterModelsLoadResponse,
|
||||
ApiRouterModelsStatusRequest,
|
||||
ApiRouterModelsStatusResponse,
|
||||
ApiRouterModelsListResponse,
|
||||
ApiRouterModelsUnloadRequest,
|
||||
ApiRouterModelsUnloadResponse,
|
||||
// Chat types
|
||||
ChatAttachmentDisplayItem,
|
||||
ChatAttachmentPreviewItem,
|
||||
ChatMessageType,
|
||||
ChatRole,
|
||||
ChatUploadedFile,
|
||||
ChatMessageSiblingInfo,
|
||||
ChatMessagePromptProgress,
|
||||
ChatMessageTimings
|
||||
} from '$lib/types/chat';
|
||||
|
||||
import type {
|
||||
ChatMessageTimings,
|
||||
// Database types
|
||||
DatabaseConversation,
|
||||
DatabaseMessage,
|
||||
DatabaseMessageExtra,
|
||||
|
|
@ -32,14 +43,20 @@ import type {
|
|||
DatabaseMessageExtraImageFile,
|
||||
DatabaseMessageExtraTextFile,
|
||||
DatabaseMessageExtraPdfFile,
|
||||
DatabaseMessageExtraLegacyContext
|
||||
} from '$lib/types/database';
|
||||
|
||||
import type {
|
||||
DatabaseMessageExtraLegacyContext,
|
||||
ExportedConversation,
|
||||
ExportedConversations,
|
||||
// Model types
|
||||
ModelModalities,
|
||||
ModelOption,
|
||||
// Settings types
|
||||
SettingsChatServiceOptions,
|
||||
SettingsConfigValue,
|
||||
SettingsFieldConfig,
|
||||
SettingsConfigType
|
||||
} from '$lib/types/settings';
|
||||
} from '$lib/types';
|
||||
|
||||
import { ServerRole, ServerModelStatus, ModelModality } from '$lib/enums';
|
||||
|
||||
declare global {
|
||||
// namespace App {
|
||||
|
|
@ -51,22 +68,38 @@ declare global {
|
|||
// }
|
||||
|
||||
export {
|
||||
// API types
|
||||
ApiChatCompletionRequest,
|
||||
ApiChatCompletionResponse,
|
||||
ApiChatCompletionStreamChunk,
|
||||
ApiChatCompletionToolCall,
|
||||
ApiChatCompletionToolCallDelta,
|
||||
ApiChatMessageData,
|
||||
ApiChatMessageContentPart,
|
||||
ApiContextSizeError,
|
||||
ApiErrorResponse,
|
||||
ApiLlamaCppServerProps,
|
||||
ApiModelDataEntry,
|
||||
ApiModelListResponse,
|
||||
ApiProcessingState,
|
||||
ChatMessageData,
|
||||
ApiRouterModelMeta,
|
||||
ApiRouterModelsLoadRequest,
|
||||
ApiRouterModelsLoadResponse,
|
||||
ApiRouterModelsStatusRequest,
|
||||
ApiRouterModelsStatusResponse,
|
||||
ApiRouterModelsListResponse,
|
||||
ApiRouterModelsUnloadRequest,
|
||||
ApiRouterModelsUnloadResponse,
|
||||
// Chat types
|
||||
ChatAttachmentDisplayItem,
|
||||
ChatAttachmentPreviewItem,
|
||||
ChatMessagePromptProgress,
|
||||
ChatMessageSiblingInfo,
|
||||
ChatMessageTimings,
|
||||
ChatMessageType,
|
||||
ChatRole,
|
||||
ChatUploadedFile,
|
||||
// Database types
|
||||
DatabaseConversation,
|
||||
DatabaseMessage,
|
||||
DatabaseMessageExtra,
|
||||
|
|
@ -75,9 +108,19 @@ declare global {
|
|||
DatabaseMessageExtraTextFile,
|
||||
DatabaseMessageExtraPdfFile,
|
||||
DatabaseMessageExtraLegacyContext,
|
||||
ExportedConversation,
|
||||
ExportedConversations,
|
||||
// Enum types
|
||||
ModelModality,
|
||||
ServerRole,
|
||||
ServerModelStatus,
|
||||
// Model types
|
||||
ModelModalities,
|
||||
ModelOption,
|
||||
// Settings types
|
||||
SettingsChatServiceOptions,
|
||||
SettingsConfigValue,
|
||||
SettingsFieldConfig,
|
||||
SettingsConfigType,
|
||||
SettingsChatServiceOptions
|
||||
SettingsConfigType
|
||||
};
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,9 +1,17 @@
|
|||
<script lang="ts">
|
||||
import { FileText, Image, Music, FileIcon, Eye } from '@lucide/svelte';
|
||||
import { FileTypeCategory, MimeTypeApplication } from '$lib/enums/files';
|
||||
import { convertPDFToImage } from '$lib/utils/pdf-processing';
|
||||
import { Button } from '$lib/components/ui/button';
|
||||
import { getFileTypeCategory } from '$lib/utils/file-type';
|
||||
import * as Alert from '$lib/components/ui/alert';
|
||||
import { SyntaxHighlightedCode } from '$lib/components/app';
|
||||
import { FileText, Image, Music, FileIcon, Eye, Info } from '@lucide/svelte';
|
||||
import {
|
||||
isTextFile,
|
||||
isImageFile,
|
||||
isPdfFile,
|
||||
isAudioFile,
|
||||
getLanguageFromFilename
|
||||
} from '$lib/utils';
|
||||
import { convertPDFToImage } from '$lib/utils/browser-only';
|
||||
import { modelsStore } from '$lib/stores/models.svelte';
|
||||
|
||||
interface Props {
|
||||
// Either an uploaded file or a stored attachment
|
||||
|
|
@ -12,53 +20,36 @@
|
|||
// For uploaded files
|
||||
preview?: string;
|
||||
name?: string;
|
||||
type?: string;
|
||||
textContent?: string;
|
||||
// For checking vision modality
|
||||
activeModelId?: string;
|
||||
}
|
||||
|
||||
let { uploadedFile, attachment, preview, name, type, textContent }: Props = $props();
|
||||
let { uploadedFile, attachment, preview, name, textContent, activeModelId }: Props = $props();
|
||||
|
||||
let hasVisionModality = $derived(
|
||||
activeModelId ? modelsStore.modelSupportsVision(activeModelId) : false
|
||||
);
|
||||
|
||||
let displayName = $derived(uploadedFile?.name || attachment?.name || name || 'Unknown File');
|
||||
|
||||
let displayPreview = $derived(
|
||||
uploadedFile?.preview || (attachment?.type === 'imageFile' ? attachment.base64Url : preview)
|
||||
);
|
||||
// Determine file type from uploaded file or attachment
|
||||
let isAudio = $derived(isAudioFile(attachment, uploadedFile));
|
||||
let isImage = $derived(isImageFile(attachment, uploadedFile));
|
||||
let isPdf = $derived(isPdfFile(attachment, uploadedFile));
|
||||
let isText = $derived(isTextFile(attachment, uploadedFile));
|
||||
|
||||
let displayType = $derived(
|
||||
uploadedFile?.type ||
|
||||
(attachment?.type === 'imageFile'
|
||||
? 'image'
|
||||
: attachment?.type === 'textFile'
|
||||
? 'text'
|
||||
: attachment?.type === 'audioFile'
|
||||
? attachment.mimeType || 'audio'
|
||||
: attachment?.type === 'pdfFile'
|
||||
? MimeTypeApplication.PDF
|
||||
: type || 'unknown')
|
||||
let displayPreview = $derived(
|
||||
uploadedFile?.preview ||
|
||||
(isImage && attachment && 'base64Url' in attachment ? attachment.base64Url : preview)
|
||||
);
|
||||
|
||||
let displayTextContent = $derived(
|
||||
uploadedFile?.textContent ||
|
||||
(attachment?.type === 'textFile'
|
||||
? attachment.content
|
||||
: attachment?.type === 'pdfFile'
|
||||
? attachment.content
|
||||
: textContent)
|
||||
(attachment && 'content' in attachment ? attachment.content : textContent)
|
||||
);
|
||||
|
||||
let isAudio = $derived(
|
||||
getFileTypeCategory(displayType) === FileTypeCategory.AUDIO || displayType === 'audio'
|
||||
);
|
||||
|
||||
let isImage = $derived(
|
||||
getFileTypeCategory(displayType) === FileTypeCategory.IMAGE || displayType === 'image'
|
||||
);
|
||||
|
||||
let isPdf = $derived(displayType === MimeTypeApplication.PDF);
|
||||
|
||||
let isText = $derived(
|
||||
getFileTypeCategory(displayType) === FileTypeCategory.TEXT || displayType === 'text'
|
||||
);
|
||||
let language = $derived(getLanguageFromFilename(displayName));
|
||||
|
||||
let IconComponent = $derived(() => {
|
||||
if (isImage) return Image;
|
||||
|
|
@ -87,15 +78,20 @@
|
|||
|
||||
if (uploadedFile?.file) {
|
||||
file = uploadedFile.file;
|
||||
} else if (attachment?.type === 'pdfFile') {
|
||||
} else if (isPdf && attachment) {
|
||||
// Check if we have pre-processed images
|
||||
if (attachment.images && Array.isArray(attachment.images)) {
|
||||
if (
|
||||
'images' in attachment &&
|
||||
attachment.images &&
|
||||
Array.isArray(attachment.images) &&
|
||||
attachment.images.length > 0
|
||||
) {
|
||||
pdfImages = attachment.images;
|
||||
return;
|
||||
}
|
||||
|
||||
// Convert base64 back to File for processing
|
||||
if (attachment.base64Data) {
|
||||
if ('base64Data' in attachment && attachment.base64Data) {
|
||||
const base64Data = attachment.base64Data;
|
||||
const byteCharacters = atob(base64Data);
|
||||
const byteNumbers = new Array(byteCharacters.length);
|
||||
|
|
@ -103,7 +99,7 @@
|
|||
byteNumbers[i] = byteCharacters.charCodeAt(i);
|
||||
}
|
||||
const byteArray = new Uint8Array(byteNumbers);
|
||||
file = new File([byteArray], displayName, { type: MimeTypeApplication.PDF });
|
||||
file = new File([byteArray], displayName, { type: 'application/pdf' });
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -181,6 +177,24 @@
|
|||
/>
|
||||
</div>
|
||||
{:else if isPdf && pdfViewMode === 'pages'}
|
||||
{#if !hasVisionModality && activeModelId}
|
||||
<Alert.Root class="mb-4">
|
||||
<Info class="h-4 w-4" />
|
||||
<Alert.Title>Preview only</Alert.Title>
|
||||
<Alert.Description>
|
||||
<span class="inline-flex">
|
||||
The selected model does not support vision. Only the extracted
|
||||
<!-- svelte-ignore a11y_click_events_have_key_events -->
|
||||
<!-- svelte-ignore a11y_no_static_element_interactions -->
|
||||
<span class="mx-1 cursor-pointer underline" onclick={() => (pdfViewMode = 'text')}>
|
||||
text
|
||||
</span>
|
||||
will be sent to the model.
|
||||
</span>
|
||||
</Alert.Description>
|
||||
</Alert.Root>
|
||||
{/if}
|
||||
|
||||
{#if pdfImagesLoading}
|
||||
<div class="flex items-center justify-center p-8">
|
||||
<div class="text-center">
|
||||
|
|
@ -227,28 +241,24 @@
|
|||
</div>
|
||||
{/if}
|
||||
{:else if (isText || (isPdf && pdfViewMode === 'text')) && displayTextContent}
|
||||
<div
|
||||
class="max-h-[60vh] overflow-auto rounded-lg bg-muted p-4 font-mono text-sm break-words whitespace-pre-wrap"
|
||||
>
|
||||
{displayTextContent}
|
||||
</div>
|
||||
<SyntaxHighlightedCode code={displayTextContent} {language} maxWidth="69rem" />
|
||||
{:else if isAudio}
|
||||
<div class="flex items-center justify-center p-8">
|
||||
<div class="w-full max-w-md text-center">
|
||||
<Music class="mx-auto mb-4 h-16 w-16 text-muted-foreground" />
|
||||
|
||||
{#if attachment?.type === 'audioFile'}
|
||||
{#if uploadedFile?.preview}
|
||||
<audio controls class="mb-4 w-full" src={uploadedFile.preview}>
|
||||
Your browser does not support the audio element.
|
||||
</audio>
|
||||
{:else if isAudio && attachment && 'mimeType' in attachment && 'base64Data' in attachment}
|
||||
<audio
|
||||
controls
|
||||
class="mb-4 w-full"
|
||||
src="data:{attachment.mimeType};base64,{attachment.base64Data}"
|
||||
src={`data:${attachment.mimeType};base64,${attachment.base64Data}`}
|
||||
>
|
||||
Your browser does not support the audio element.
|
||||
</audio>
|
||||
{:else if uploadedFile?.preview}
|
||||
<audio controls class="mb-4 w-full" src={uploadedFile.preview}>
|
||||
Your browser does not support the audio element.
|
||||
</audio>
|
||||
{:else}
|
||||
<p class="mb-4 text-muted-foreground">Audio preview not available</p>
|
||||
{/if}
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
<script lang="ts">
|
||||
import { RemoveButton } from '$lib/components/app';
|
||||
import { formatFileSize, getFileTypeLabel, getPreviewText } from '$lib/utils/file-preview';
|
||||
import { FileTypeCategory, MimeTypeText } from '$lib/enums/files';
|
||||
import { getFileTypeLabel, getPreviewText, formatFileSize, isTextFile } from '$lib/utils';
|
||||
import { AttachmentType } from '$lib/enums';
|
||||
|
||||
interface Props {
|
||||
class?: string;
|
||||
|
|
@ -12,7 +12,9 @@
|
|||
readonly?: boolean;
|
||||
size?: number;
|
||||
textContent?: string;
|
||||
type: string;
|
||||
// Either uploaded file or stored attachment
|
||||
uploadedFile?: ChatUploadedFile;
|
||||
attachment?: DatabaseMessageExtra;
|
||||
}
|
||||
|
||||
let {
|
||||
|
|
@ -24,11 +26,41 @@
|
|||
readonly = false,
|
||||
size,
|
||||
textContent,
|
||||
type
|
||||
uploadedFile,
|
||||
attachment
|
||||
}: Props = $props();
|
||||
|
||||
let isText = $derived(isTextFile(attachment, uploadedFile));
|
||||
|
||||
let fileTypeLabel = $derived.by(() => {
|
||||
if (uploadedFile?.type) {
|
||||
return getFileTypeLabel(uploadedFile.type);
|
||||
}
|
||||
|
||||
if (attachment) {
|
||||
if ('mimeType' in attachment && attachment.mimeType) {
|
||||
return getFileTypeLabel(attachment.mimeType);
|
||||
}
|
||||
|
||||
if (attachment.type) {
|
||||
return getFileTypeLabel(attachment.type);
|
||||
}
|
||||
}
|
||||
|
||||
return getFileTypeLabel(name);
|
||||
});
|
||||
|
||||
let pdfProcessingMode = $derived.by(() => {
|
||||
if (attachment?.type === AttachmentType.PDF) {
|
||||
const pdfAttachment = attachment as DatabaseMessageExtraPdfFile;
|
||||
|
||||
return pdfAttachment.processedAsImages ? 'Sent as Image' : 'Sent as Text';
|
||||
}
|
||||
return null;
|
||||
});
|
||||
</script>
|
||||
|
||||
{#if type === MimeTypeText.PLAIN || type === FileTypeCategory.TEXT}
|
||||
{#if isText}
|
||||
{#if readonly}
|
||||
<!-- Readonly mode (ChatMessage) -->
|
||||
<button
|
||||
|
|
@ -45,7 +77,7 @@
|
|||
<span class="text-xs text-muted-foreground">{formatFileSize(size)}</span>
|
||||
{/if}
|
||||
|
||||
{#if textContent && type === 'text'}
|
||||
{#if textContent}
|
||||
<div class="relative mt-2 w-full">
|
||||
<div
|
||||
class="overflow-hidden font-mono text-xs leading-relaxed break-words whitespace-pre-wrap text-muted-foreground"
|
||||
|
|
@ -105,17 +137,21 @@
|
|||
<div
|
||||
class="flex h-8 w-8 items-center justify-center rounded bg-primary/10 text-xs font-medium text-primary"
|
||||
>
|
||||
{getFileTypeLabel(type)}
|
||||
{fileTypeLabel}
|
||||
</div>
|
||||
|
||||
<div class="flex flex-col gap-1">
|
||||
<div class="flex flex-col gap-0.5">
|
||||
<span
|
||||
class="max-w-24 truncate text-sm font-medium text-foreground group-hover:pr-6 md:max-w-32"
|
||||
class="max-w-24 truncate text-sm font-medium text-foreground {readonly
|
||||
? ''
|
||||
: 'group-hover:pr-6'} md:max-w-32"
|
||||
>
|
||||
{name}
|
||||
</span>
|
||||
|
||||
{#if size}
|
||||
{#if pdfProcessingMode}
|
||||
<span class="text-left text-xs text-muted-foreground">{pdfProcessingMode}</span>
|
||||
{:else if size}
|
||||
<span class="text-left text-xs text-muted-foreground">{formatFileSize(size)}</span>
|
||||
{/if}
|
||||
</div>
|
||||
|
|
|
|||
|
|
@ -30,7 +30,9 @@
|
|||
}: Props = $props();
|
||||
</script>
|
||||
|
||||
<div class="group relative overflow-hidden rounded-lg border border-border bg-muted {className}">
|
||||
<div
|
||||
class="group relative overflow-hidden rounded-lg bg-muted shadow-lg dark:border dark:border-muted {className}"
|
||||
>
|
||||
{#if onClick}
|
||||
<button
|
||||
type="button"
|
||||
|
|
|
|||
|
|
@ -2,10 +2,8 @@
|
|||
import { ChatAttachmentThumbnailImage, ChatAttachmentThumbnailFile } from '$lib/components/app';
|
||||
import { Button } from '$lib/components/ui/button';
|
||||
import { ChevronLeft, ChevronRight } from '@lucide/svelte';
|
||||
import { FileTypeCategory } from '$lib/enums/files';
|
||||
import { getFileTypeCategory } from '$lib/utils/file-type';
|
||||
import { DialogChatAttachmentPreview, DialogChatAttachmentsViewAll } from '$lib/components/app';
|
||||
import type { ChatAttachmentDisplayItem, ChatAttachmentPreviewItem } from '$lib/types/chat';
|
||||
import { getAttachmentDisplayItems } from '$lib/utils';
|
||||
|
||||
interface Props {
|
||||
class?: string;
|
||||
|
|
@ -22,6 +20,8 @@
|
|||
imageWidth?: string;
|
||||
// Limit display to single row with "+ X more" button
|
||||
limitToSingleRow?: boolean;
|
||||
// For vision modality check
|
||||
activeModelId?: string;
|
||||
}
|
||||
|
||||
let {
|
||||
|
|
@ -35,10 +35,11 @@
|
|||
imageClass = '',
|
||||
imageHeight = 'h-24',
|
||||
imageWidth = 'w-auto',
|
||||
limitToSingleRow = false
|
||||
limitToSingleRow = false,
|
||||
activeModelId
|
||||
}: Props = $props();
|
||||
|
||||
let displayItems = $derived(getDisplayItems());
|
||||
let displayItems = $derived(getAttachmentDisplayItems({ uploadedFiles, attachments }));
|
||||
|
||||
let canScrollLeft = $state(false);
|
||||
let canScrollRight = $state(false);
|
||||
|
|
@ -49,81 +50,6 @@
|
|||
let showViewAll = $derived(limitToSingleRow && displayItems.length > 0 && isScrollable);
|
||||
let viewAllDialogOpen = $state(false);
|
||||
|
||||
function getDisplayItems(): ChatAttachmentDisplayItem[] {
|
||||
const items: ChatAttachmentDisplayItem[] = [];
|
||||
|
||||
// Add uploaded files (ChatForm)
|
||||
for (const file of uploadedFiles) {
|
||||
items.push({
|
||||
id: file.id,
|
||||
name: file.name,
|
||||
size: file.size,
|
||||
preview: file.preview,
|
||||
type: file.type,
|
||||
isImage: getFileTypeCategory(file.type) === FileTypeCategory.IMAGE,
|
||||
uploadedFile: file,
|
||||
textContent: file.textContent
|
||||
});
|
||||
}
|
||||
|
||||
// Add stored attachments (ChatMessage)
|
||||
for (const [index, attachment] of attachments.entries()) {
|
||||
if (attachment.type === 'imageFile') {
|
||||
items.push({
|
||||
id: `attachment-${index}`,
|
||||
name: attachment.name,
|
||||
preview: attachment.base64Url,
|
||||
type: 'image',
|
||||
isImage: true,
|
||||
attachment,
|
||||
attachmentIndex: index
|
||||
});
|
||||
} else if (attachment.type === 'textFile') {
|
||||
items.push({
|
||||
id: `attachment-${index}`,
|
||||
name: attachment.name,
|
||||
type: 'text',
|
||||
isImage: false,
|
||||
attachment,
|
||||
attachmentIndex: index,
|
||||
textContent: attachment.content
|
||||
});
|
||||
} else if (attachment.type === 'context') {
|
||||
// Legacy format from old webui - treat as text file
|
||||
items.push({
|
||||
id: `attachment-${index}`,
|
||||
name: attachment.name,
|
||||
type: 'text',
|
||||
isImage: false,
|
||||
attachment,
|
||||
attachmentIndex: index,
|
||||
textContent: attachment.content
|
||||
});
|
||||
} else if (attachment.type === 'audioFile') {
|
||||
items.push({
|
||||
id: `attachment-${index}`,
|
||||
name: attachment.name,
|
||||
type: attachment.mimeType || 'audio',
|
||||
isImage: false,
|
||||
attachment,
|
||||
attachmentIndex: index
|
||||
});
|
||||
} else if (attachment.type === 'pdfFile') {
|
||||
items.push({
|
||||
id: `attachment-${index}`,
|
||||
name: attachment.name,
|
||||
type: 'application/pdf',
|
||||
isImage: false,
|
||||
attachment,
|
||||
attachmentIndex: index,
|
||||
textContent: attachment.content
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return items.reverse();
|
||||
}
|
||||
|
||||
function openPreview(item: ChatAttachmentDisplayItem, event?: MouseEvent) {
|
||||
event?.stopPropagation();
|
||||
event?.preventDefault();
|
||||
|
|
@ -133,7 +59,6 @@
|
|||
attachment: item.attachment,
|
||||
preview: item.preview,
|
||||
name: item.name,
|
||||
type: item.type,
|
||||
size: item.size,
|
||||
textContent: item.textContent
|
||||
};
|
||||
|
|
@ -181,6 +106,7 @@
|
|||
|
||||
{#if displayItems.length > 0}
|
||||
<div class={className} {style}>
|
||||
{#if limitToSingleRow}
|
||||
<div class="relative">
|
||||
<button
|
||||
class="absolute top-1/2 left-4 z-10 flex h-6 w-6 -translate-y-1/2 items-center justify-center rounded-full bg-foreground/15 shadow-md backdrop-blur-xs transition-opacity hover:bg-foreground/35 {canScrollLeft
|
||||
|
|
@ -200,7 +126,9 @@
|
|||
{#each displayItems as item (item.id)}
|
||||
{#if item.isImage && item.preview}
|
||||
<ChatAttachmentThumbnailImage
|
||||
class="flex-shrink-0 cursor-pointer {limitToSingleRow ? 'first:ml-4 last:mr-4' : ''}"
|
||||
class="flex-shrink-0 cursor-pointer {limitToSingleRow
|
||||
? 'first:ml-4 last:mr-4'
|
||||
: ''}"
|
||||
id={item.id}
|
||||
name={item.name}
|
||||
preview={item.preview}
|
||||
|
|
@ -213,14 +141,17 @@
|
|||
/>
|
||||
{:else}
|
||||
<ChatAttachmentThumbnailFile
|
||||
class="flex-shrink-0 cursor-pointer {limitToSingleRow ? 'first:ml-4 last:mr-4' : ''}"
|
||||
class="flex-shrink-0 cursor-pointer {limitToSingleRow
|
||||
? 'first:ml-4 last:mr-4'
|
||||
: ''}"
|
||||
id={item.id}
|
||||
name={item.name}
|
||||
type={item.type}
|
||||
size={item.size}
|
||||
{readonly}
|
||||
onRemove={onFileRemove}
|
||||
textContent={item.textContent}
|
||||
attachment={item.attachment}
|
||||
uploadedFile={item.uploadedFile}
|
||||
onClick={(event) => openPreview(item, event)}
|
||||
/>
|
||||
{/if}
|
||||
|
|
@ -247,10 +178,43 @@
|
|||
class="h-6 text-xs text-muted-foreground hover:text-foreground"
|
||||
onclick={() => (viewAllDialogOpen = true)}
|
||||
>
|
||||
View all
|
||||
View all ({displayItems.length})
|
||||
</Button>
|
||||
</div>
|
||||
{/if}
|
||||
{:else}
|
||||
<div class="flex flex-wrap items-start justify-end gap-3">
|
||||
{#each displayItems as item (item.id)}
|
||||
{#if item.isImage && item.preview}
|
||||
<ChatAttachmentThumbnailImage
|
||||
class="cursor-pointer"
|
||||
id={item.id}
|
||||
name={item.name}
|
||||
preview={item.preview}
|
||||
{readonly}
|
||||
onRemove={onFileRemove}
|
||||
height={imageHeight}
|
||||
width={imageWidth}
|
||||
{imageClass}
|
||||
onClick={(event) => openPreview(item, event)}
|
||||
/>
|
||||
{:else}
|
||||
<ChatAttachmentThumbnailFile
|
||||
class="cursor-pointer"
|
||||
id={item.id}
|
||||
name={item.name}
|
||||
size={item.size}
|
||||
{readonly}
|
||||
onRemove={onFileRemove}
|
||||
textContent={item.textContent}
|
||||
attachment={item.attachment}
|
||||
uploadedFile={item.uploadedFile}
|
||||
onClick={(event?: MouseEvent) => openPreview(item, event)}
|
||||
/>
|
||||
{/if}
|
||||
{/each}
|
||||
</div>
|
||||
{/if}
|
||||
</div>
|
||||
{/if}
|
||||
|
||||
|
|
@ -261,9 +225,9 @@
|
|||
attachment={previewItem.attachment}
|
||||
preview={previewItem.preview}
|
||||
name={previewItem.name}
|
||||
type={previewItem.type}
|
||||
size={previewItem.size}
|
||||
textContent={previewItem.textContent}
|
||||
{activeModelId}
|
||||
/>
|
||||
{/if}
|
||||
|
||||
|
|
@ -275,4 +239,5 @@
|
|||
{onFileRemove}
|
||||
imageHeight="h-64"
|
||||
{imageClass}
|
||||
{activeModelId}
|
||||
/>
|
||||
|
|
|
|||
|
|
@ -4,9 +4,7 @@
|
|||
ChatAttachmentThumbnailFile,
|
||||
DialogChatAttachmentPreview
|
||||
} from '$lib/components/app';
|
||||
import { FileTypeCategory } from '$lib/enums/files';
|
||||
import { getFileTypeCategory } from '$lib/utils/file-type';
|
||||
import type { ChatAttachmentDisplayItem, ChatAttachmentPreviewItem } from '$lib/types/chat';
|
||||
import { getAttachmentDisplayItems } from '$lib/utils';
|
||||
|
||||
interface Props {
|
||||
uploadedFiles?: ChatUploadedFile[];
|
||||
|
|
@ -16,6 +14,7 @@
|
|||
imageHeight?: string;
|
||||
imageWidth?: string;
|
||||
imageClass?: string;
|
||||
activeModelId?: string;
|
||||
}
|
||||
|
||||
let {
|
||||
|
|
@ -25,89 +24,17 @@
|
|||
onFileRemove,
|
||||
imageHeight = 'h-24',
|
||||
imageWidth = 'w-auto',
|
||||
imageClass = ''
|
||||
imageClass = '',
|
||||
activeModelId
|
||||
}: Props = $props();
|
||||
|
||||
let previewDialogOpen = $state(false);
|
||||
let previewItem = $state<ChatAttachmentPreviewItem | null>(null);
|
||||
|
||||
let displayItems = $derived(getDisplayItems());
|
||||
let displayItems = $derived(getAttachmentDisplayItems({ uploadedFiles, attachments }));
|
||||
let imageItems = $derived(displayItems.filter((item) => item.isImage));
|
||||
let fileItems = $derived(displayItems.filter((item) => !item.isImage));
|
||||
|
||||
function getDisplayItems(): ChatAttachmentDisplayItem[] {
|
||||
const items: ChatAttachmentDisplayItem[] = [];
|
||||
|
||||
for (const file of uploadedFiles) {
|
||||
items.push({
|
||||
id: file.id,
|
||||
name: file.name,
|
||||
size: file.size,
|
||||
preview: file.preview,
|
||||
type: file.type,
|
||||
isImage: getFileTypeCategory(file.type) === FileTypeCategory.IMAGE,
|
||||
uploadedFile: file,
|
||||
textContent: file.textContent
|
||||
});
|
||||
}
|
||||
|
||||
for (const [index, attachment] of attachments.entries()) {
|
||||
if (attachment.type === 'imageFile') {
|
||||
items.push({
|
||||
id: `attachment-${index}`,
|
||||
name: attachment.name,
|
||||
preview: attachment.base64Url,
|
||||
type: 'image',
|
||||
isImage: true,
|
||||
attachment,
|
||||
attachmentIndex: index
|
||||
});
|
||||
} else if (attachment.type === 'textFile') {
|
||||
items.push({
|
||||
id: `attachment-${index}`,
|
||||
name: attachment.name,
|
||||
type: 'text',
|
||||
isImage: false,
|
||||
attachment,
|
||||
attachmentIndex: index,
|
||||
textContent: attachment.content
|
||||
});
|
||||
} else if (attachment.type === 'context') {
|
||||
// Legacy format from old webui - treat as text file
|
||||
items.push({
|
||||
id: `attachment-${index}`,
|
||||
name: attachment.name,
|
||||
type: 'text',
|
||||
isImage: false,
|
||||
attachment,
|
||||
attachmentIndex: index,
|
||||
textContent: attachment.content
|
||||
});
|
||||
} else if (attachment.type === 'audioFile') {
|
||||
items.push({
|
||||
id: `attachment-${index}`,
|
||||
name: attachment.name,
|
||||
type: attachment.mimeType || 'audio',
|
||||
isImage: false,
|
||||
attachment,
|
||||
attachmentIndex: index
|
||||
});
|
||||
} else if (attachment.type === 'pdfFile') {
|
||||
items.push({
|
||||
id: `attachment-${index}`,
|
||||
name: attachment.name,
|
||||
type: 'application/pdf',
|
||||
isImage: false,
|
||||
attachment,
|
||||
attachmentIndex: index,
|
||||
textContent: attachment.content
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return items.reverse();
|
||||
}
|
||||
|
||||
function openPreview(item: (typeof displayItems)[0], event?: Event) {
|
||||
if (event) {
|
||||
event.preventDefault();
|
||||
|
|
@ -119,7 +46,6 @@
|
|||
attachment: item.attachment,
|
||||
preview: item.preview,
|
||||
name: item.name,
|
||||
type: item.type,
|
||||
size: item.size,
|
||||
textContent: item.textContent
|
||||
};
|
||||
|
|
@ -138,12 +64,13 @@
|
|||
class="cursor-pointer"
|
||||
id={item.id}
|
||||
name={item.name}
|
||||
type={item.type}
|
||||
size={item.size}
|
||||
{readonly}
|
||||
onRemove={onFileRemove}
|
||||
textContent={item.textContent}
|
||||
onClick={(event) => openPreview(item, event)}
|
||||
attachment={item.attachment}
|
||||
uploadedFile={item.uploadedFile}
|
||||
onClick={(event?: MouseEvent) => openPreview(item, event)}
|
||||
/>
|
||||
{/each}
|
||||
</div>
|
||||
|
|
@ -183,8 +110,8 @@
|
|||
attachment={previewItem.attachment}
|
||||
preview={previewItem.preview}
|
||||
name={previewItem.name}
|
||||
type={previewItem.type}
|
||||
size={previewItem.size}
|
||||
textContent={previewItem.textContent}
|
||||
{activeModelId}
|
||||
/>
|
||||
{/if}
|
||||
|
|
|
|||
|
|
@ -9,15 +9,13 @@
|
|||
} from '$lib/components/app';
|
||||
import { INPUT_CLASSES } from '$lib/constants/input-classes';
|
||||
import { config } from '$lib/stores/settings.svelte';
|
||||
import { FileTypeCategory, MimeTypeApplication } from '$lib/enums/files';
|
||||
import {
|
||||
AudioRecorder,
|
||||
convertToWav,
|
||||
createAudioFile,
|
||||
isAudioRecordingSupported
|
||||
} from '$lib/utils/audio-recording';
|
||||
import { onMount } from 'svelte';
|
||||
import { modelsStore, modelOptions, selectedModelId } from '$lib/stores/models.svelte';
|
||||
import { isRouterMode } from '$lib/stores/server.svelte';
|
||||
import { chatStore } from '$lib/stores/chat.svelte';
|
||||
import { activeMessages } from '$lib/stores/conversations.svelte';
|
||||
import {
|
||||
FileTypeCategory,
|
||||
MimeTypeApplication,
|
||||
FileExtensionAudio,
|
||||
FileExtensionImage,
|
||||
FileExtensionPdf,
|
||||
|
|
@ -25,8 +23,15 @@
|
|||
MimeTypeAudio,
|
||||
MimeTypeImage,
|
||||
MimeTypeText
|
||||
} from '$lib/enums/files';
|
||||
import { isIMEComposing } from '$lib/utils/is-ime-composing';
|
||||
} from '$lib/enums';
|
||||
import { isIMEComposing } from '$lib/utils';
|
||||
import {
|
||||
AudioRecorder,
|
||||
convertToWav,
|
||||
createAudioFile,
|
||||
isAudioRecordingSupported
|
||||
} from '$lib/utils/browser-only';
|
||||
import { onMount } from 'svelte';
|
||||
|
||||
interface Props {
|
||||
class?: string;
|
||||
|
|
@ -53,6 +58,7 @@
|
|||
}: Props = $props();
|
||||
|
||||
let audioRecorder: AudioRecorder | undefined;
|
||||
let chatFormActionsRef: ChatFormActions | undefined = $state(undefined);
|
||||
let currentConfig = $derived(config());
|
||||
let fileAcceptString = $state<string | undefined>(undefined);
|
||||
let fileInputRef: ChatFormFileInputInvisible | undefined = $state(undefined);
|
||||
|
|
@ -63,18 +69,97 @@
|
|||
let recordingSupported = $state(false);
|
||||
let textareaRef: ChatFormTextarea | undefined = $state(undefined);
|
||||
|
||||
// Check if model is selected (in ROUTER mode)
|
||||
let conversationModel = $derived(
|
||||
chatStore.getConversationModel(activeMessages() as DatabaseMessage[])
|
||||
);
|
||||
let isRouter = $derived(isRouterMode());
|
||||
let hasModelSelected = $derived(!isRouter || !!conversationModel || !!selectedModelId());
|
||||
|
||||
// Get active model ID for capability detection
|
||||
let activeModelId = $derived.by(() => {
|
||||
const options = modelOptions();
|
||||
|
||||
if (!isRouter) {
|
||||
return options.length > 0 ? options[0].model : null;
|
||||
}
|
||||
|
||||
// First try user-selected model
|
||||
const selectedId = selectedModelId();
|
||||
if (selectedId) {
|
||||
const model = options.find((m) => m.id === selectedId);
|
||||
if (model) return model.model;
|
||||
}
|
||||
|
||||
// Fallback to conversation model
|
||||
if (conversationModel) {
|
||||
const model = options.find((m) => m.model === conversationModel);
|
||||
if (model) return model.model;
|
||||
}
|
||||
|
||||
return null;
|
||||
});
|
||||
|
||||
// State for model props reactivity
|
||||
let modelPropsVersion = $state(0);
|
||||
|
||||
// Fetch model props when active model changes (works for both MODEL and ROUTER mode)
|
||||
$effect(() => {
|
||||
if (activeModelId) {
|
||||
const cached = modelsStore.getModelProps(activeModelId);
|
||||
if (!cached) {
|
||||
modelsStore.fetchModelProps(activeModelId).then(() => {
|
||||
modelPropsVersion++;
|
||||
});
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Derive modalities from active model (works for both MODEL and ROUTER mode)
|
||||
let hasAudioModality = $derived.by(() => {
|
||||
if (activeModelId) {
|
||||
void modelPropsVersion; // Trigger reactivity on props fetch
|
||||
return modelsStore.modelSupportsAudio(activeModelId);
|
||||
}
|
||||
|
||||
return false;
|
||||
});
|
||||
|
||||
let hasVisionModality = $derived.by(() => {
|
||||
if (activeModelId) {
|
||||
void modelPropsVersion; // Trigger reactivity on props fetch
|
||||
return modelsStore.modelSupportsVision(activeModelId);
|
||||
}
|
||||
|
||||
return false;
|
||||
});
|
||||
|
||||
function checkModelSelected(): boolean {
|
||||
if (!hasModelSelected) {
|
||||
// Open the model selector
|
||||
chatFormActionsRef?.openModelSelector();
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
function getAcceptStringForFileType(fileType: FileTypeCategory): string {
|
||||
switch (fileType) {
|
||||
case FileTypeCategory.IMAGE:
|
||||
return [...Object.values(FileExtensionImage), ...Object.values(MimeTypeImage)].join(',');
|
||||
|
||||
case FileTypeCategory.AUDIO:
|
||||
return [...Object.values(FileExtensionAudio), ...Object.values(MimeTypeAudio)].join(',');
|
||||
|
||||
case FileTypeCategory.PDF:
|
||||
return [...Object.values(FileExtensionPdf), ...Object.values(MimeTypeApplication)].join(
|
||||
','
|
||||
);
|
||||
|
||||
case FileTypeCategory.TEXT:
|
||||
return [...Object.values(FileExtensionText), MimeTypeText.PLAIN].join(',');
|
||||
|
||||
default:
|
||||
return '';
|
||||
}
|
||||
|
|
@ -103,6 +188,9 @@
|
|||
|
||||
if ((!message.trim() && uploadedFiles.length === 0) || disabled || isLoading) return;
|
||||
|
||||
// Check if model is selected first
|
||||
if (!checkModelSelected()) return;
|
||||
|
||||
const messageToSend = message.trim();
|
||||
const filesToSend = [...uploadedFiles];
|
||||
|
||||
|
|
@ -131,6 +219,7 @@
|
|||
if (files.length > 0) {
|
||||
event.preventDefault();
|
||||
onFileUpload?.(files);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -154,6 +243,7 @@
|
|||
async function handleMicClick() {
|
||||
if (!audioRecorder || !recordingSupported) {
|
||||
console.warn('Audio recording not supported');
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -187,6 +277,9 @@
|
|||
event.preventDefault();
|
||||
if ((!message.trim() && uploadedFiles.length === 0) || disabled || isLoading) return;
|
||||
|
||||
// Check if model is selected first
|
||||
if (!checkModelSelected()) return;
|
||||
|
||||
const messageToSend = message.trim();
|
||||
const filesToSend = [...uploadedFiles];
|
||||
|
||||
|
|
@ -225,12 +318,16 @@
|
|||
<ChatFormFileInputInvisible
|
||||
bind:this={fileInputRef}
|
||||
bind:accept={fileAcceptString}
|
||||
{hasAudioModality}
|
||||
{hasVisionModality}
|
||||
onFileSelect={handleFileSelect}
|
||||
/>
|
||||
|
||||
<form
|
||||
onsubmit={handleSubmit}
|
||||
class="{INPUT_CLASSES} border-radius-bottom-none mx-auto max-w-[48rem] overflow-hidden rounded-3xl backdrop-blur-md {className}"
|
||||
class="{INPUT_CLASSES} border-radius-bottom-none mx-auto max-w-[48rem] overflow-hidden rounded-3xl backdrop-blur-md {disabled
|
||||
? 'cursor-not-allowed opacity-60'
|
||||
: ''} {className}"
|
||||
>
|
||||
<ChatAttachmentsList
|
||||
bind:uploadedFiles
|
||||
|
|
@ -238,6 +335,7 @@
|
|||
limitToSingleRow
|
||||
class="py-5"
|
||||
style="scroll-padding: 1rem;"
|
||||
activeModelId={activeModelId ?? undefined}
|
||||
/>
|
||||
|
||||
<div
|
||||
|
|
@ -252,10 +350,13 @@
|
|||
/>
|
||||
|
||||
<ChatFormActions
|
||||
bind:this={chatFormActionsRef}
|
||||
canSend={message.trim().length > 0 || uploadedFiles.length > 0}
|
||||
hasText={message.trim().length > 0}
|
||||
{disabled}
|
||||
{isLoading}
|
||||
{isRecording}
|
||||
{uploadedFiles}
|
||||
onFileUpload={handleFileUpload}
|
||||
onMicClick={handleMicClick}
|
||||
onStop={handleStop}
|
||||
|
|
|
|||
|
|
@ -1,22 +1,29 @@
|
|||
<script lang="ts">
|
||||
import { Paperclip, Image, FileText, File, Volume2 } from '@lucide/svelte';
|
||||
import { Paperclip } from '@lucide/svelte';
|
||||
import { Button } from '$lib/components/ui/button';
|
||||
import * as DropdownMenu from '$lib/components/ui/dropdown-menu';
|
||||
import * as Tooltip from '$lib/components/ui/tooltip';
|
||||
import { TOOLTIP_DELAY_DURATION } from '$lib/constants/tooltip-config';
|
||||
import { FileTypeCategory } from '$lib/enums/files';
|
||||
import { supportsAudio, supportsVision } from '$lib/stores/server.svelte';
|
||||
import { FILE_TYPE_ICONS } from '$lib/constants/icons';
|
||||
import { FileTypeCategory } from '$lib/enums';
|
||||
|
||||
interface Props {
|
||||
class?: string;
|
||||
disabled?: boolean;
|
||||
hasAudioModality?: boolean;
|
||||
hasVisionModality?: boolean;
|
||||
onFileUpload?: (fileType?: FileTypeCategory) => void;
|
||||
}
|
||||
|
||||
let { class: className = '', disabled = false, onFileUpload }: Props = $props();
|
||||
let {
|
||||
class: className = '',
|
||||
disabled = false,
|
||||
hasAudioModality = false,
|
||||
hasVisionModality = false,
|
||||
onFileUpload
|
||||
}: Props = $props();
|
||||
|
||||
const fileUploadTooltipText = $derived.by(() => {
|
||||
return !supportsVision()
|
||||
return !hasVisionModality
|
||||
? 'Text files and PDFs supported. Images, audio, and video require vision models.'
|
||||
: 'Attach files';
|
||||
});
|
||||
|
|
@ -29,7 +36,7 @@
|
|||
<div class="flex items-center gap-1 {className}">
|
||||
<DropdownMenu.Root>
|
||||
<DropdownMenu.Trigger name="Attach files">
|
||||
<Tooltip.Root delayDuration={TOOLTIP_DELAY_DURATION}>
|
||||
<Tooltip.Root>
|
||||
<Tooltip.Trigger>
|
||||
<Button
|
||||
class="file-upload-button h-8 w-8 rounded-full bg-transparent p-0 text-muted-foreground hover:bg-foreground/10 hover:text-foreground"
|
||||
|
|
@ -49,40 +56,40 @@
|
|||
</DropdownMenu.Trigger>
|
||||
|
||||
<DropdownMenu.Content align="start" class="w-48">
|
||||
<Tooltip.Root delayDuration={TOOLTIP_DELAY_DURATION}>
|
||||
<Tooltip.Root>
|
||||
<Tooltip.Trigger class="w-full">
|
||||
<DropdownMenu.Item
|
||||
class="images-button flex cursor-pointer items-center gap-2"
|
||||
disabled={!supportsVision()}
|
||||
disabled={!hasVisionModality}
|
||||
onclick={() => handleFileUpload(FileTypeCategory.IMAGE)}
|
||||
>
|
||||
<Image class="h-4 w-4" />
|
||||
<FILE_TYPE_ICONS.image class="h-4 w-4" />
|
||||
|
||||
<span>Images</span>
|
||||
</DropdownMenu.Item>
|
||||
</Tooltip.Trigger>
|
||||
|
||||
{#if !supportsVision()}
|
||||
{#if !hasVisionModality}
|
||||
<Tooltip.Content>
|
||||
<p>Images require vision models to be processed</p>
|
||||
</Tooltip.Content>
|
||||
{/if}
|
||||
</Tooltip.Root>
|
||||
|
||||
<Tooltip.Root delayDuration={TOOLTIP_DELAY_DURATION}>
|
||||
<Tooltip.Root>
|
||||
<Tooltip.Trigger class="w-full">
|
||||
<DropdownMenu.Item
|
||||
class="audio-button flex cursor-pointer items-center gap-2"
|
||||
disabled={!supportsAudio()}
|
||||
disabled={!hasAudioModality}
|
||||
onclick={() => handleFileUpload(FileTypeCategory.AUDIO)}
|
||||
>
|
||||
<Volume2 class="h-4 w-4" />
|
||||
<FILE_TYPE_ICONS.audio class="h-4 w-4" />
|
||||
|
||||
<span>Audio Files</span>
|
||||
</DropdownMenu.Item>
|
||||
</Tooltip.Trigger>
|
||||
|
||||
{#if !supportsAudio()}
|
||||
{#if !hasAudioModality}
|
||||
<Tooltip.Content>
|
||||
<p>Audio files require audio models to be processed</p>
|
||||
</Tooltip.Content>
|
||||
|
|
@ -93,24 +100,24 @@
|
|||
class="flex cursor-pointer items-center gap-2"
|
||||
onclick={() => handleFileUpload(FileTypeCategory.TEXT)}
|
||||
>
|
||||
<FileText class="h-4 w-4" />
|
||||
<FILE_TYPE_ICONS.text class="h-4 w-4" />
|
||||
|
||||
<span>Text Files</span>
|
||||
</DropdownMenu.Item>
|
||||
|
||||
<Tooltip.Root delayDuration={TOOLTIP_DELAY_DURATION}>
|
||||
<Tooltip.Root>
|
||||
<Tooltip.Trigger class="w-full">
|
||||
<DropdownMenu.Item
|
||||
class="flex cursor-pointer items-center gap-2"
|
||||
onclick={() => handleFileUpload(FileTypeCategory.PDF)}
|
||||
>
|
||||
<File class="h-4 w-4" />
|
||||
<FILE_TYPE_ICONS.pdf class="h-4 w-4" />
|
||||
|
||||
<span>PDF Files</span>
|
||||
</DropdownMenu.Item>
|
||||
</Tooltip.Trigger>
|
||||
|
||||
{#if !supportsVision()}
|
||||
{#if !hasVisionModality}
|
||||
<Tooltip.Content>
|
||||
<p>PDFs will be converted to text. Image-based PDFs may not work properly.</p>
|
||||
</Tooltip.Content>
|
||||
|
|
|
|||
|
|
@ -1,12 +1,12 @@
|
|||
<script lang="ts">
|
||||
import { Mic } from '@lucide/svelte';
|
||||
import { Mic, Square } from '@lucide/svelte';
|
||||
import { Button } from '$lib/components/ui/button';
|
||||
import * as Tooltip from '$lib/components/ui/tooltip';
|
||||
import { supportsAudio } from '$lib/stores/server.svelte';
|
||||
|
||||
interface Props {
|
||||
class?: string;
|
||||
disabled?: boolean;
|
||||
hasAudioModality?: boolean;
|
||||
isLoading?: boolean;
|
||||
isRecording?: boolean;
|
||||
onMicClick?: () => void;
|
||||
|
|
@ -15,6 +15,7 @@
|
|||
let {
|
||||
class: className = '',
|
||||
disabled = false,
|
||||
hasAudioModality = false,
|
||||
isLoading = false,
|
||||
isRecording = false,
|
||||
onMicClick
|
||||
|
|
@ -22,25 +23,27 @@
|
|||
</script>
|
||||
|
||||
<div class="flex items-center gap-1 {className}">
|
||||
<Tooltip.Root delayDuration={100}>
|
||||
<Tooltip.Root>
|
||||
<Tooltip.Trigger>
|
||||
<Button
|
||||
class="h-8 w-8 rounded-full p-0 {isRecording
|
||||
? 'animate-pulse bg-red-500 text-white hover:bg-red-600'
|
||||
: 'bg-transparent text-muted-foreground hover:bg-foreground/10 hover:text-foreground'} {!supportsAudio()
|
||||
? 'cursor-not-allowed opacity-50'
|
||||
: ''}"
|
||||
disabled={disabled || isLoading || !supportsAudio()}
|
||||
disabled={disabled || isLoading || !hasAudioModality}
|
||||
onclick={onMicClick}
|
||||
type="button"
|
||||
>
|
||||
<span class="sr-only">{isRecording ? 'Stop recording' : 'Start recording'}</span>
|
||||
|
||||
{#if isRecording}
|
||||
<Square class="h-4 w-4 animate-pulse fill-white" />
|
||||
{:else}
|
||||
<Mic class="h-4 w-4" />
|
||||
{/if}
|
||||
</Button>
|
||||
</Tooltip.Trigger>
|
||||
|
||||
{#if !supportsAudio()}
|
||||
{#if !hasAudioModality}
|
||||
<Tooltip.Content>
|
||||
<p>Current model does not support audio</p>
|
||||
</Tooltip.Content>
|
||||
|
|
|
|||
|
|
@ -0,0 +1,55 @@
|
|||
<script lang="ts">
|
||||
import { ArrowUp } from '@lucide/svelte';
|
||||
import { Button } from '$lib/components/ui/button';
|
||||
import * as Tooltip from '$lib/components/ui/tooltip';
|
||||
import { cn } from '$lib/components/ui/utils';
|
||||
|
||||
interface Props {
|
||||
canSend?: boolean;
|
||||
disabled?: boolean;
|
||||
isLoading?: boolean;
|
||||
showErrorState?: boolean;
|
||||
tooltipLabel?: string;
|
||||
}
|
||||
|
||||
let {
|
||||
canSend = false,
|
||||
disabled = false,
|
||||
isLoading = false,
|
||||
showErrorState = false,
|
||||
tooltipLabel
|
||||
}: Props = $props();
|
||||
|
||||
let isDisabled = $derived(!canSend || disabled || isLoading);
|
||||
</script>
|
||||
|
||||
{#snippet submitButton(props = {})}
|
||||
<Button
|
||||
type="submit"
|
||||
disabled={isDisabled}
|
||||
class={cn(
|
||||
'h-8 w-8 rounded-full p-0',
|
||||
showErrorState
|
||||
? 'bg-red-400/10 text-red-400 hover:bg-red-400/20 hover:text-red-400 disabled:opacity-100'
|
||||
: ''
|
||||
)}
|
||||
{...props}
|
||||
>
|
||||
<span class="sr-only">Send</span>
|
||||
<ArrowUp class="h-12 w-12" />
|
||||
</Button>
|
||||
{/snippet}
|
||||
|
||||
{#if tooltipLabel}
|
||||
<Tooltip.Root>
|
||||
<Tooltip.Trigger>
|
||||
{@render submitButton()}
|
||||
</Tooltip.Trigger>
|
||||
|
||||
<Tooltip.Content>
|
||||
<p>{tooltipLabel}</p>
|
||||
</Tooltip.Content>
|
||||
</Tooltip.Root>
|
||||
{:else}
|
||||
{@render submitButton()}
|
||||
{/if}
|
||||
|
|
@ -1,13 +1,20 @@
|
|||
<script lang="ts">
|
||||
import { Square, ArrowUp } from '@lucide/svelte';
|
||||
import { Square } from '@lucide/svelte';
|
||||
import { Button } from '$lib/components/ui/button';
|
||||
import {
|
||||
ChatFormActionFileAttachments,
|
||||
ChatFormActionRecord,
|
||||
ChatFormModelSelector
|
||||
ChatFormActionSubmit,
|
||||
ModelsSelector
|
||||
} from '$lib/components/app';
|
||||
import { FileTypeCategory } from '$lib/enums';
|
||||
import { getFileTypeCategory } from '$lib/utils';
|
||||
import { config } from '$lib/stores/settings.svelte';
|
||||
import type { FileTypeCategory } from '$lib/enums/files';
|
||||
import { modelsStore, modelOptions, selectedModelId } from '$lib/stores/models.svelte';
|
||||
import { isRouterMode } from '$lib/stores/server.svelte';
|
||||
import { chatStore } from '$lib/stores/chat.svelte';
|
||||
import { activeMessages, usedModalities } from '$lib/stores/conversations.svelte';
|
||||
import { useModelChangeValidation } from '$lib/hooks/use-model-change-validation.svelte';
|
||||
|
||||
interface Props {
|
||||
canSend?: boolean;
|
||||
|
|
@ -15,6 +22,8 @@
|
|||
disabled?: boolean;
|
||||
isLoading?: boolean;
|
||||
isRecording?: boolean;
|
||||
hasText?: boolean;
|
||||
uploadedFiles?: ChatUploadedFile[];
|
||||
onFileUpload?: (fileType?: FileTypeCategory) => void;
|
||||
onMicClick?: () => void;
|
||||
onStop?: () => void;
|
||||
|
|
@ -26,20 +35,150 @@
|
|||
disabled = false,
|
||||
isLoading = false,
|
||||
isRecording = false,
|
||||
hasText = false,
|
||||
uploadedFiles = [],
|
||||
onFileUpload,
|
||||
onMicClick,
|
||||
onStop
|
||||
}: Props = $props();
|
||||
|
||||
let currentConfig = $derived(config());
|
||||
let isRouter = $derived(isRouterMode());
|
||||
|
||||
let conversationModel = $derived(
|
||||
chatStore.getConversationModel(activeMessages() as DatabaseMessage[])
|
||||
);
|
||||
|
||||
let previousConversationModel: string | null = null;
|
||||
|
||||
$effect(() => {
|
||||
if (conversationModel && conversationModel !== previousConversationModel) {
|
||||
previousConversationModel = conversationModel;
|
||||
modelsStore.selectModelByName(conversationModel);
|
||||
}
|
||||
});
|
||||
|
||||
let activeModelId = $derived.by(() => {
|
||||
const options = modelOptions();
|
||||
|
||||
if (!isRouter) {
|
||||
return options.length > 0 ? options[0].model : null;
|
||||
}
|
||||
|
||||
const selectedId = selectedModelId();
|
||||
if (selectedId) {
|
||||
const model = options.find((m) => m.id === selectedId);
|
||||
if (model) return model.model;
|
||||
}
|
||||
|
||||
if (conversationModel) {
|
||||
const model = options.find((m) => m.model === conversationModel);
|
||||
if (model) return model.model;
|
||||
}
|
||||
|
||||
return null;
|
||||
});
|
||||
|
||||
let modelPropsVersion = $state(0); // Used to trigger reactivity after fetch
|
||||
|
||||
$effect(() => {
|
||||
if (activeModelId) {
|
||||
const cached = modelsStore.getModelProps(activeModelId);
|
||||
|
||||
if (!cached) {
|
||||
modelsStore.fetchModelProps(activeModelId).then(() => {
|
||||
modelPropsVersion++;
|
||||
});
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
let hasAudioModality = $derived.by(() => {
|
||||
if (activeModelId) {
|
||||
void modelPropsVersion;
|
||||
|
||||
return modelsStore.modelSupportsAudio(activeModelId);
|
||||
}
|
||||
|
||||
return false;
|
||||
});
|
||||
|
||||
let hasVisionModality = $derived.by(() => {
|
||||
if (activeModelId) {
|
||||
void modelPropsVersion;
|
||||
|
||||
return modelsStore.modelSupportsVision(activeModelId);
|
||||
}
|
||||
|
||||
return false;
|
||||
});
|
||||
|
||||
let hasAudioAttachments = $derived(
|
||||
uploadedFiles.some((file) => getFileTypeCategory(file.type) === FileTypeCategory.AUDIO)
|
||||
);
|
||||
let shouldShowRecordButton = $derived(
|
||||
hasAudioModality && !hasText && !hasAudioAttachments && currentConfig.autoMicOnEmpty
|
||||
);
|
||||
|
||||
let hasModelSelected = $derived(!isRouter || !!conversationModel || !!selectedModelId());
|
||||
|
||||
let isSelectedModelInCache = $derived.by(() => {
|
||||
if (!isRouter) return true;
|
||||
|
||||
if (conversationModel) {
|
||||
return modelOptions().some((option) => option.model === conversationModel);
|
||||
}
|
||||
|
||||
const currentModelId = selectedModelId();
|
||||
if (!currentModelId) return false;
|
||||
|
||||
return modelOptions().some((option) => option.id === currentModelId);
|
||||
});
|
||||
|
||||
let submitTooltip = $derived.by(() => {
|
||||
if (!hasModelSelected) {
|
||||
return 'Please select a model first';
|
||||
}
|
||||
|
||||
if (!isSelectedModelInCache) {
|
||||
return 'Selected model is not available, please select another';
|
||||
}
|
||||
|
||||
return '';
|
||||
});
|
||||
|
||||
let selectorModelRef: ModelsSelector | undefined = $state(undefined);
|
||||
|
||||
export function openModelSelector() {
|
||||
selectorModelRef?.open();
|
||||
}
|
||||
|
||||
const { handleModelChange } = useModelChangeValidation({
|
||||
getRequiredModalities: () => usedModalities(),
|
||||
onValidationFailure: async (previousModelId) => {
|
||||
if (previousModelId) {
|
||||
await modelsStore.selectModelById(previousModelId);
|
||||
}
|
||||
}
|
||||
});
|
||||
</script>
|
||||
|
||||
<div class="flex w-full items-center gap-2 {className}">
|
||||
<ChatFormActionFileAttachments class="mr-auto" {disabled} {onFileUpload} />
|
||||
<div class="flex w-full items-center gap-3 {className}" style="container-type: inline-size">
|
||||
<ChatFormActionFileAttachments
|
||||
class="mr-auto"
|
||||
{disabled}
|
||||
{hasAudioModality}
|
||||
{hasVisionModality}
|
||||
{onFileUpload}
|
||||
/>
|
||||
|
||||
{#if currentConfig.modelSelectorEnabled}
|
||||
<ChatFormModelSelector class="shrink-0" />
|
||||
{/if}
|
||||
<ModelsSelector
|
||||
bind:this={selectorModelRef}
|
||||
currentModel={conversationModel}
|
||||
forceForegroundText={true}
|
||||
useGlobalSelection={true}
|
||||
onModelChange={handleModelChange}
|
||||
/>
|
||||
|
||||
{#if isLoading}
|
||||
<Button
|
||||
|
|
@ -50,16 +189,15 @@
|
|||
<span class="sr-only">Stop</span>
|
||||
<Square class="h-8 w-8 fill-destructive stroke-destructive" />
|
||||
</Button>
|
||||
{:else if shouldShowRecordButton}
|
||||
<ChatFormActionRecord {disabled} {hasAudioModality} {isLoading} {isRecording} {onMicClick} />
|
||||
{:else}
|
||||
<ChatFormActionRecord {disabled} {isLoading} {isRecording} {onMicClick} />
|
||||
|
||||
<Button
|
||||
type="submit"
|
||||
disabled={!canSend || disabled || isLoading}
|
||||
class="h-8 w-8 rounded-full p-0"
|
||||
>
|
||||
<span class="sr-only">Send</span>
|
||||
<ArrowUp class="h-12 w-12" />
|
||||
</Button>
|
||||
<ChatFormActionSubmit
|
||||
canSend={canSend && hasModelSelected && isSelectedModelInCache}
|
||||
{disabled}
|
||||
{isLoading}
|
||||
tooltipLabel={submitTooltip}
|
||||
showErrorState={hasModelSelected && !isSelectedModelInCache}
|
||||
/>
|
||||
{/if}
|
||||
</div>
|
||||
|
|
|
|||
|
|
@ -1,9 +1,11 @@
|
|||
<script lang="ts">
|
||||
import { generateModalityAwareAcceptString } from '$lib/utils/modality-file-validation';
|
||||
import { generateModalityAwareAcceptString } from '$lib/utils';
|
||||
|
||||
interface Props {
|
||||
accept?: string;
|
||||
class?: string;
|
||||
hasAudioModality?: boolean;
|
||||
hasVisionModality?: boolean;
|
||||
multiple?: boolean;
|
||||
onFileSelect?: (files: File[]) => void;
|
||||
}
|
||||
|
|
@ -11,6 +13,8 @@
|
|||
let {
|
||||
accept = $bindable(),
|
||||
class: className = '',
|
||||
hasAudioModality = false,
|
||||
hasVisionModality = false,
|
||||
multiple = true,
|
||||
onFileSelect
|
||||
}: Props = $props();
|
||||
|
|
@ -18,7 +22,13 @@
|
|||
let fileInputElement: HTMLInputElement | undefined;
|
||||
|
||||
// Use modality-aware accept string by default, but allow override
|
||||
let finalAccept = $derived(accept ?? generateModalityAwareAcceptString());
|
||||
let finalAccept = $derived(
|
||||
accept ??
|
||||
generateModalityAwareAcceptString({
|
||||
hasVision: hasVisionModality,
|
||||
hasAudio: hasAudioModality
|
||||
})
|
||||
);
|
||||
|
||||
export function click() {
|
||||
fileInputElement?.click();
|
||||
|
|
|
|||
|
|
@ -1,352 +0,0 @@
|
|||
<script lang="ts">
|
||||
import { onMount, tick } from 'svelte';
|
||||
import { ChevronDown, Loader2 } from '@lucide/svelte';
|
||||
import { cn } from '$lib/components/ui/utils';
|
||||
import { portalToBody } from '$lib/utils/portal-to-body';
|
||||
import {
|
||||
fetchModels,
|
||||
modelOptions,
|
||||
modelsError,
|
||||
modelsLoading,
|
||||
modelsUpdating,
|
||||
selectModel,
|
||||
selectedModelId
|
||||
} from '$lib/stores/models.svelte';
|
||||
import type { ModelOption } from '$lib/types/models';
|
||||
|
||||
interface Props {
|
||||
class?: string;
|
||||
}
|
||||
|
||||
let { class: className = '' }: Props = $props();
|
||||
|
||||
let options = $derived(modelOptions());
|
||||
let loading = $derived(modelsLoading());
|
||||
let updating = $derived(modelsUpdating());
|
||||
let error = $derived(modelsError());
|
||||
let activeId = $derived(selectedModelId());
|
||||
|
||||
let isMounted = $state(false);
|
||||
let isOpen = $state(false);
|
||||
let container: HTMLDivElement | null = null;
|
||||
let triggerButton = $state<HTMLButtonElement | null>(null);
|
||||
let menuRef = $state<HTMLDivElement | null>(null);
|
||||
let menuPosition = $state<{
|
||||
top: number;
|
||||
left: number;
|
||||
width: number;
|
||||
placement: 'top' | 'bottom';
|
||||
maxHeight: number;
|
||||
} | null>(null);
|
||||
let lockedWidth: number | null = null;
|
||||
|
||||
onMount(async () => {
|
||||
try {
|
||||
await fetchModels();
|
||||
} catch (error) {
|
||||
console.error('Unable to load models:', error);
|
||||
} finally {
|
||||
isMounted = true;
|
||||
}
|
||||
});
|
||||
|
||||
function handlePointerDown(event: PointerEvent) {
|
||||
if (!container) return;
|
||||
|
||||
const target = event.target as Node | null;
|
||||
|
||||
if (target && !container.contains(target) && !(menuRef && menuRef.contains(target))) {
|
||||
closeMenu();
|
||||
}
|
||||
}
|
||||
|
||||
function handleKeydown(event: KeyboardEvent) {
|
||||
if (event.key === 'Escape') {
|
||||
closeMenu();
|
||||
}
|
||||
}
|
||||
|
||||
function handleResize() {
|
||||
if (isOpen) {
|
||||
updateMenuPosition();
|
||||
}
|
||||
}
|
||||
|
||||
async function handleSelect(value: string | undefined) {
|
||||
if (!value) return;
|
||||
|
||||
const option = options.find((item) => item.id === value);
|
||||
if (!option) {
|
||||
console.error('Model is no longer available');
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
await selectModel(option.id);
|
||||
} catch (error) {
|
||||
console.error('Failed to switch model:', error);
|
||||
}
|
||||
}
|
||||
|
||||
const VIEWPORT_GUTTER = 8;
|
||||
const MENU_OFFSET = 6;
|
||||
const MENU_MAX_WIDTH = 320;
|
||||
|
||||
async function openMenu() {
|
||||
if (loading || updating) return;
|
||||
|
||||
isOpen = true;
|
||||
await tick();
|
||||
updateMenuPosition();
|
||||
requestAnimationFrame(() => updateMenuPosition());
|
||||
}
|
||||
|
||||
function toggleOpen() {
|
||||
if (loading || updating) return;
|
||||
|
||||
if (isOpen) {
|
||||
closeMenu();
|
||||
} else {
|
||||
void openMenu();
|
||||
}
|
||||
}
|
||||
|
||||
function closeMenu() {
|
||||
if (!isOpen) return;
|
||||
|
||||
isOpen = false;
|
||||
menuPosition = null;
|
||||
lockedWidth = null;
|
||||
}
|
||||
|
||||
async function handleOptionSelect(optionId: string) {
|
||||
try {
|
||||
await handleSelect(optionId);
|
||||
} finally {
|
||||
closeMenu();
|
||||
}
|
||||
}
|
||||
|
||||
$effect(() => {
|
||||
if (loading || updating) {
|
||||
closeMenu();
|
||||
}
|
||||
});
|
||||
|
||||
$effect(() => {
|
||||
const optionCount = options.length;
|
||||
|
||||
if (!isOpen || optionCount <= 0) return;
|
||||
|
||||
queueMicrotask(() => updateMenuPosition());
|
||||
});
|
||||
|
||||
function updateMenuPosition() {
|
||||
if (!isOpen || !triggerButton || !menuRef) return;
|
||||
|
||||
const triggerRect = triggerButton.getBoundingClientRect();
|
||||
const viewportWidth = window.innerWidth;
|
||||
const viewportHeight = window.innerHeight;
|
||||
|
||||
if (viewportWidth === 0 || viewportHeight === 0) return;
|
||||
|
||||
const scrollWidth = menuRef.scrollWidth;
|
||||
const scrollHeight = menuRef.scrollHeight;
|
||||
|
||||
const availableWidth = Math.max(0, viewportWidth - VIEWPORT_GUTTER * 2);
|
||||
const constrainedMaxWidth = Math.min(MENU_MAX_WIDTH, availableWidth || MENU_MAX_WIDTH);
|
||||
const safeMaxWidth =
|
||||
constrainedMaxWidth > 0 ? constrainedMaxWidth : Math.min(MENU_MAX_WIDTH, viewportWidth);
|
||||
const desiredMinWidth = Math.min(160, safeMaxWidth || 160);
|
||||
|
||||
let width = lockedWidth;
|
||||
if (width === null) {
|
||||
const naturalWidth = Math.min(scrollWidth, safeMaxWidth);
|
||||
const baseWidth = Math.max(triggerRect.width, naturalWidth, desiredMinWidth);
|
||||
width = Math.min(baseWidth, safeMaxWidth || baseWidth);
|
||||
lockedWidth = width;
|
||||
} else {
|
||||
width = Math.min(Math.max(width, desiredMinWidth), safeMaxWidth || width);
|
||||
}
|
||||
|
||||
if (width > 0) {
|
||||
menuRef.style.width = `${width}px`;
|
||||
}
|
||||
|
||||
const availableBelow = Math.max(
|
||||
0,
|
||||
viewportHeight - VIEWPORT_GUTTER - triggerRect.bottom - MENU_OFFSET
|
||||
);
|
||||
const availableAbove = Math.max(0, triggerRect.top - VIEWPORT_GUTTER - MENU_OFFSET);
|
||||
const viewportAllowance = Math.max(0, viewportHeight - VIEWPORT_GUTTER * 2);
|
||||
const fallbackAllowance = Math.max(1, viewportAllowance > 0 ? viewportAllowance : scrollHeight);
|
||||
|
||||
function computePlacement(placement: 'top' | 'bottom') {
|
||||
const available = placement === 'bottom' ? availableBelow : availableAbove;
|
||||
const allowedHeight =
|
||||
available > 0 ? Math.min(available, fallbackAllowance) : fallbackAllowance;
|
||||
const maxHeight = Math.min(scrollHeight, allowedHeight);
|
||||
const height = Math.max(0, maxHeight);
|
||||
|
||||
let top: number;
|
||||
if (placement === 'bottom') {
|
||||
const rawTop = triggerRect.bottom + MENU_OFFSET;
|
||||
const minTop = VIEWPORT_GUTTER;
|
||||
const maxTop = viewportHeight - VIEWPORT_GUTTER - height;
|
||||
if (maxTop < minTop) {
|
||||
top = minTop;
|
||||
} else {
|
||||
top = Math.min(Math.max(rawTop, minTop), maxTop);
|
||||
}
|
||||
} else {
|
||||
const rawTop = triggerRect.top - MENU_OFFSET - height;
|
||||
const minTop = VIEWPORT_GUTTER;
|
||||
const maxTop = viewportHeight - VIEWPORT_GUTTER - height;
|
||||
if (maxTop < minTop) {
|
||||
top = minTop;
|
||||
} else {
|
||||
top = Math.max(Math.min(rawTop, maxTop), minTop);
|
||||
}
|
||||
}
|
||||
|
||||
return { placement, top, height, maxHeight };
|
||||
}
|
||||
|
||||
const belowMetrics = computePlacement('bottom');
|
||||
const aboveMetrics = computePlacement('top');
|
||||
|
||||
let metrics = belowMetrics;
|
||||
if (scrollHeight > belowMetrics.maxHeight && aboveMetrics.maxHeight > belowMetrics.maxHeight) {
|
||||
metrics = aboveMetrics;
|
||||
}
|
||||
|
||||
menuRef.style.maxHeight = metrics.maxHeight > 0 ? `${Math.round(metrics.maxHeight)}px` : '';
|
||||
|
||||
let left = triggerRect.right - width;
|
||||
const maxLeft = viewportWidth - VIEWPORT_GUTTER - width;
|
||||
if (maxLeft < VIEWPORT_GUTTER) {
|
||||
left = VIEWPORT_GUTTER;
|
||||
} else {
|
||||
if (left > maxLeft) {
|
||||
left = maxLeft;
|
||||
}
|
||||
if (left < VIEWPORT_GUTTER) {
|
||||
left = VIEWPORT_GUTTER;
|
||||
}
|
||||
}
|
||||
|
||||
menuPosition = {
|
||||
top: Math.round(metrics.top),
|
||||
left: Math.round(left),
|
||||
width: Math.round(width),
|
||||
placement: metrics.placement,
|
||||
maxHeight: Math.round(metrics.maxHeight)
|
||||
};
|
||||
}
|
||||
|
||||
function getDisplayOption(): ModelOption | undefined {
|
||||
if (activeId) {
|
||||
return options.find((option) => option.id === activeId);
|
||||
}
|
||||
|
||||
return options[0];
|
||||
}
|
||||
</script>
|
||||
|
||||
<svelte:window onresize={handleResize} />
|
||||
|
||||
<svelte:document onpointerdown={handlePointerDown} onkeydown={handleKeydown} />
|
||||
|
||||
<div
|
||||
class={cn('relative z-10 flex max-w-[200px] min-w-[120px] flex-col items-end gap-1', className)}
|
||||
bind:this={container}
|
||||
>
|
||||
{#if loading && options.length === 0 && !isMounted}
|
||||
<div class="flex items-center gap-2 text-xs text-muted-foreground">
|
||||
<Loader2 class="h-4 w-4 animate-spin" />
|
||||
Loading models…
|
||||
</div>
|
||||
{:else if options.length === 0}
|
||||
<p class="text-xs text-muted-foreground">No models available.</p>
|
||||
{:else}
|
||||
{@const selectedOption = getDisplayOption()}
|
||||
|
||||
<div class="relative w-full">
|
||||
<button
|
||||
type="button"
|
||||
class={cn(
|
||||
'flex w-full items-center justify-end gap-2 rounded-md px-2 py-1 text-sm text-muted-foreground transition hover:text-foreground focus:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-60',
|
||||
isOpen ? 'text-foreground' : ''
|
||||
)}
|
||||
aria-haspopup="listbox"
|
||||
aria-expanded={isOpen}
|
||||
onclick={toggleOpen}
|
||||
bind:this={triggerButton}
|
||||
disabled={loading || updating}
|
||||
>
|
||||
<span class="max-w-[160px] truncate text-right font-medium">
|
||||
{selectedOption?.name || 'Select model'}
|
||||
</span>
|
||||
|
||||
{#if updating}
|
||||
<Loader2 class="h-3.5 w-3.5 animate-spin text-muted-foreground" />
|
||||
{:else}
|
||||
<ChevronDown
|
||||
class={cn(
|
||||
'h-4 w-4 text-muted-foreground transition-transform',
|
||||
isOpen ? 'rotate-180 text-foreground' : ''
|
||||
)}
|
||||
/>
|
||||
{/if}
|
||||
</button>
|
||||
|
||||
{#if isOpen}
|
||||
<div
|
||||
bind:this={menuRef}
|
||||
use:portalToBody
|
||||
class={cn(
|
||||
'fixed z-[1000] overflow-hidden rounded-md border bg-popover shadow-lg transition-opacity',
|
||||
menuPosition ? 'opacity-100' : 'pointer-events-none opacity-0'
|
||||
)}
|
||||
role="listbox"
|
||||
style:top={menuPosition ? `${menuPosition.top}px` : undefined}
|
||||
style:left={menuPosition ? `${menuPosition.left}px` : undefined}
|
||||
style:width={menuPosition ? `${menuPosition.width}px` : undefined}
|
||||
data-placement={menuPosition?.placement ?? 'bottom'}
|
||||
>
|
||||
<div
|
||||
class="overflow-y-auto py-1"
|
||||
style:max-height={menuPosition && menuPosition.maxHeight > 0
|
||||
? `${menuPosition.maxHeight}px`
|
||||
: undefined}
|
||||
>
|
||||
{#each options as option (option.id)}
|
||||
<button
|
||||
type="button"
|
||||
class={cn(
|
||||
'flex w-full flex-col items-start gap-0.5 px-3 py-2 text-left text-sm transition hover:bg-muted focus:bg-muted focus:outline-none',
|
||||
option.id === selectedOption?.id ? 'bg-accent text-accent-foreground' : ''
|
||||
)}
|
||||
role="option"
|
||||
aria-selected={option.id === selectedOption?.id}
|
||||
onclick={() => handleOptionSelect(option.id)}
|
||||
>
|
||||
<span class="block w-full truncate font-medium" title={option.name}>
|
||||
{option.name}
|
||||
</span>
|
||||
|
||||
{#if option.description}
|
||||
<span class="text-xs text-muted-foreground">{option.description}</span>
|
||||
{/if}
|
||||
</button>
|
||||
{/each}
|
||||
</div>
|
||||
</div>
|
||||
{/if}
|
||||
</div>
|
||||
{/if}
|
||||
|
||||
{#if error}
|
||||
<p class="text-xs text-destructive">{error}</p>
|
||||
{/if}
|
||||
</div>
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
<script lang="ts">
|
||||
import autoResizeTextarea from '$lib/utils/autoresize-textarea';
|
||||
import { autoResizeTextarea } from '$lib/utils';
|
||||
import { onMount } from 'svelte';
|
||||
|
||||
interface Props {
|
||||
|
|
|
|||
|
|
@ -1,8 +1,6 @@
|
|||
<script lang="ts">
|
||||
import { getDeletionInfo } from '$lib/stores/chat.svelte';
|
||||
import { copyToClipboard } from '$lib/utils/copy';
|
||||
import { isIMEComposing } from '$lib/utils/is-ime-composing';
|
||||
import type { ApiChatCompletionToolCall } from '$lib/types/api';
|
||||
import { chatStore } from '$lib/stores/chat.svelte';
|
||||
import { copyToClipboard, isIMEComposing } from '$lib/utils';
|
||||
import ChatMessageAssistant from './ChatMessageAssistant.svelte';
|
||||
import ChatMessageUser from './ChatMessageUser.svelte';
|
||||
|
||||
|
|
@ -20,7 +18,7 @@
|
|||
) => void;
|
||||
onEditUserMessagePreserveResponses?: (message: DatabaseMessage, newContent: string) => void;
|
||||
onNavigateToSibling?: (siblingId: string) => void;
|
||||
onRegenerateWithBranching?: (message: DatabaseMessage) => void;
|
||||
onRegenerateWithBranching?: (message: DatabaseMessage, modelOverride?: string) => void;
|
||||
siblingInfo?: ChatMessageSiblingInfo | null;
|
||||
}
|
||||
|
||||
|
|
@ -98,7 +96,7 @@
|
|||
}
|
||||
|
||||
async function handleDelete() {
|
||||
deletionInfo = await getDeletionInfo(message.id);
|
||||
deletionInfo = await chatStore.getDeletionInfo(message.id);
|
||||
showDeleteDialog = true;
|
||||
}
|
||||
|
||||
|
|
@ -133,8 +131,8 @@
|
|||
}
|
||||
}
|
||||
|
||||
function handleRegenerate() {
|
||||
onRegenerateWithBranching?.(message);
|
||||
function handleRegenerate(modelOverride?: string) {
|
||||
onRegenerateWithBranching?.(message, modelOverride);
|
||||
}
|
||||
|
||||
function handleContinue() {
|
||||
|
|
|
|||
|
|
@ -71,7 +71,7 @@
|
|||
{/if}
|
||||
|
||||
{#if role === 'assistant' && onRegenerate}
|
||||
<ActionButton icon={RefreshCw} tooltip="Regenerate" onclick={onRegenerate} />
|
||||
<ActionButton icon={RefreshCw} tooltip="Regenerate" onclick={() => onRegenerate()} />
|
||||
{/if}
|
||||
|
||||
{#if role === 'assistant' && onContinue}
|
||||
|
|
|
|||
|
|
@ -1,29 +1,26 @@
|
|||
<script lang="ts">
|
||||
import { ChatMessageThinkingBlock, MarkdownContent } from '$lib/components/app';
|
||||
import { useProcessingState } from '$lib/hooks/use-processing-state.svelte';
|
||||
import { isLoading } from '$lib/stores/chat.svelte';
|
||||
import autoResizeTextarea from '$lib/utils/autoresize-textarea';
|
||||
import { fade } from 'svelte/transition';
|
||||
import {
|
||||
Check,
|
||||
Copy,
|
||||
Package,
|
||||
X,
|
||||
Gauge,
|
||||
Clock,
|
||||
WholeWord,
|
||||
ChartNoAxesColumn,
|
||||
Wrench
|
||||
} from '@lucide/svelte';
|
||||
ModelBadge,
|
||||
ChatMessageActions,
|
||||
ChatMessageStatistics,
|
||||
ChatMessageThinkingBlock,
|
||||
CopyToClipboardIcon,
|
||||
MarkdownContent,
|
||||
ModelsSelector
|
||||
} from '$lib/components/app';
|
||||
import { useProcessingState } from '$lib/hooks/use-processing-state.svelte';
|
||||
import { useModelChangeValidation } from '$lib/hooks/use-model-change-validation.svelte';
|
||||
import { isLoading } from '$lib/stores/chat.svelte';
|
||||
import { autoResizeTextarea, copyToClipboard } from '$lib/utils';
|
||||
import { fade } from 'svelte/transition';
|
||||
import { Check, X, Wrench } from '@lucide/svelte';
|
||||
import { Button } from '$lib/components/ui/button';
|
||||
import { Checkbox } from '$lib/components/ui/checkbox';
|
||||
import { INPUT_CLASSES } from '$lib/constants/input-classes';
|
||||
import ChatMessageActions from './ChatMessageActions.svelte';
|
||||
import Label from '$lib/components/ui/label/label.svelte';
|
||||
import { config } from '$lib/stores/settings.svelte';
|
||||
import { modelName as serverModelName } from '$lib/stores/server.svelte';
|
||||
import { copyToClipboard } from '$lib/utils/copy';
|
||||
import type { ApiChatCompletionToolCall } from '$lib/types/api';
|
||||
import { conversationsStore } from '$lib/stores/conversations.svelte';
|
||||
import { isRouterMode } from '$lib/stores/server.svelte';
|
||||
|
||||
interface Props {
|
||||
class?: string;
|
||||
|
|
@ -46,7 +43,7 @@
|
|||
onEditKeydown?: (event: KeyboardEvent) => void;
|
||||
onEditedContentChange?: (content: string) => void;
|
||||
onNavigateToSibling?: (siblingId: string) => void;
|
||||
onRegenerate: () => void;
|
||||
onRegenerate: (modelOverride?: string) => void;
|
||||
onSaveEdit?: () => void;
|
||||
onShowDeleteDialogChange: (show: boolean) => void;
|
||||
onShouldBranchAfterEditChange?: (value: boolean) => void;
|
||||
|
|
@ -93,15 +90,18 @@
|
|||
|
||||
const processingState = useProcessingState();
|
||||
let currentConfig = $derived(config());
|
||||
let serverModel = $derived(serverModelName());
|
||||
let isRouter = $derived(isRouterMode());
|
||||
let displayedModel = $derived((): string | null => {
|
||||
if (!currentConfig.showModelInfo) return null;
|
||||
|
||||
if (message.model) {
|
||||
return message.model;
|
||||
}
|
||||
|
||||
return serverModel;
|
||||
return null;
|
||||
});
|
||||
|
||||
const { handleModelChange } = useModelChangeValidation({
|
||||
getRequiredModalities: () => conversationsStore.getModalitiesUpToMessage(message.id),
|
||||
onSuccess: (modelName) => onRegenerate(modelName)
|
||||
});
|
||||
|
||||
function handleCopyModel() {
|
||||
|
|
@ -244,21 +244,24 @@
|
|||
|
||||
<div class="info my-6 grid gap-4">
|
||||
{#if displayedModel()}
|
||||
<span class="inline-flex items-center gap-2 text-xs text-muted-foreground">
|
||||
<span class="inline-flex items-center gap-1">
|
||||
<Package class="h-3.5 w-3.5" />
|
||||
<span class="inline-flex flex-wrap items-center gap-2 text-xs text-muted-foreground">
|
||||
{#if isRouter}
|
||||
<ModelsSelector
|
||||
currentModel={displayedModel()}
|
||||
onModelChange={handleModelChange}
|
||||
disabled={isLoading()}
|
||||
upToMessageId={message.id}
|
||||
/>
|
||||
{:else}
|
||||
<ModelBadge model={displayedModel() || undefined} onclick={handleCopyModel} />
|
||||
{/if}
|
||||
|
||||
<span>Model used:</span>
|
||||
</span>
|
||||
|
||||
<button
|
||||
class="inline-flex cursor-pointer items-center gap-1 rounded-sm bg-muted-foreground/15 px-1.5 py-0.75"
|
||||
onclick={handleCopyModel}
|
||||
>
|
||||
{displayedModel()}
|
||||
|
||||
<Copy class="ml-1 h-3 w-3 " />
|
||||
</button>
|
||||
{#if currentConfig.showMessageStats && message.timings && message.timings.predicted_n && message.timings.predicted_ms}
|
||||
<ChatMessageStatistics
|
||||
predictedTokens={message.timings.predicted_n}
|
||||
predictedMs={message.timings.predicted_ms}
|
||||
/>
|
||||
{/if}
|
||||
</span>
|
||||
{/if}
|
||||
|
||||
|
|
@ -282,8 +285,10 @@
|
|||
onclick={() => handleCopyToolCall(badge.copyValue)}
|
||||
>
|
||||
{badge.label}
|
||||
|
||||
<Copy class="ml-1 h-3 w-3" />
|
||||
<CopyToClipboardIcon
|
||||
text={badge.copyValue}
|
||||
ariaLabel={`Copy tool call ${badge.label}`}
|
||||
/>
|
||||
</button>
|
||||
{/each}
|
||||
{:else if fallbackToolCalls}
|
||||
|
|
@ -295,45 +300,12 @@
|
|||
onclick={() => handleCopyToolCall(fallbackToolCalls)}
|
||||
>
|
||||
{fallbackToolCalls}
|
||||
|
||||
<Copy class="ml-1 h-3 w-3" />
|
||||
<CopyToClipboardIcon text={fallbackToolCalls} ariaLabel="Copy tool call payload" />
|
||||
</button>
|
||||
{/if}
|
||||
</span>
|
||||
{/if}
|
||||
{/if}
|
||||
|
||||
{#if currentConfig.showMessageStats && message.timings && message.timings.predicted_n && message.timings.predicted_ms}
|
||||
{@const tokensPerSecond = (message.timings.predicted_n / message.timings.predicted_ms) * 1000}
|
||||
<span class="inline-flex items-center gap-2 text-xs text-muted-foreground">
|
||||
<span class="inline-flex items-center gap-1">
|
||||
<ChartNoAxesColumn class="h-3.5 w-3.5" />
|
||||
|
||||
<span>Statistics:</span>
|
||||
</span>
|
||||
|
||||
<div class="inline-flex flex-wrap items-center gap-2 text-xs text-muted-foreground">
|
||||
<span
|
||||
class="inline-flex items-center gap-1 rounded-sm bg-muted-foreground/15 px-1.5 py-0.75"
|
||||
>
|
||||
<Gauge class="h-3 w-3" />
|
||||
{tokensPerSecond.toFixed(2)} tokens/s
|
||||
</span>
|
||||
<span
|
||||
class="inline-flex items-center gap-1 rounded-sm bg-muted-foreground/15 px-1.5 py-0.75"
|
||||
>
|
||||
<WholeWord class="h-3 w-3" />
|
||||
{message.timings.predicted_n} tokens
|
||||
</span>
|
||||
<span
|
||||
class="inline-flex items-center gap-1 rounded-sm bg-muted-foreground/15 px-1.5 py-0.75"
|
||||
>
|
||||
<Clock class="h-3 w-3" />
|
||||
{(message.timings.predicted_ms / 1000).toFixed(2)}s
|
||||
</span>
|
||||
</div>
|
||||
</span>
|
||||
{/if}
|
||||
</div>
|
||||
|
||||
{#if message.timestamp && !isEditing}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,20 @@
|
|||
<script lang="ts">
|
||||
import { Clock, Gauge, WholeWord } from '@lucide/svelte';
|
||||
import { BadgeChatStatistic } from '$lib/components/app';
|
||||
|
||||
interface Props {
|
||||
predictedTokens: number;
|
||||
predictedMs: number;
|
||||
}
|
||||
|
||||
let { predictedTokens, predictedMs }: Props = $props();
|
||||
|
||||
let tokensPerSecond = $derived((predictedTokens / predictedMs) * 1000);
|
||||
let timeInSeconds = $derived((predictedMs / 1000).toFixed(2));
|
||||
</script>
|
||||
|
||||
<BadgeChatStatistic icon={WholeWord} value="{predictedTokens} tokens" />
|
||||
|
||||
<BadgeChatStatistic icon={Clock} value="{timeInSeconds}s" />
|
||||
|
||||
<BadgeChatStatistic icon={Gauge} value="{tokensPerSecond.toFixed(2)} tokens/s" />
|
||||
|
|
@ -5,7 +5,7 @@
|
|||
import { ChatAttachmentsList, MarkdownContent } from '$lib/components/app';
|
||||
import { INPUT_CLASSES } from '$lib/constants/input-classes';
|
||||
import { config } from '$lib/stores/settings.svelte';
|
||||
import autoResizeTextarea from '$lib/utils/autoresize-textarea';
|
||||
import { autoResizeTextarea } from '$lib/utils';
|
||||
import ChatMessageActions from './ChatMessageActions.svelte';
|
||||
|
||||
interface Props {
|
||||
|
|
|
|||
|
|
@ -1,17 +1,9 @@
|
|||
<script lang="ts">
|
||||
import { ChatMessage } from '$lib/components/app';
|
||||
import { DatabaseStore } from '$lib/stores/database';
|
||||
import {
|
||||
activeConversation,
|
||||
continueAssistantMessage,
|
||||
deleteMessage,
|
||||
editAssistantMessage,
|
||||
editMessageWithBranching,
|
||||
editUserMessagePreserveResponses,
|
||||
navigateToSibling,
|
||||
regenerateMessageWithBranching
|
||||
} from '$lib/stores/chat.svelte';
|
||||
import { getMessageSiblings } from '$lib/utils/branching';
|
||||
import { DatabaseService } from '$lib/services/database';
|
||||
import { chatStore } from '$lib/stores/chat.svelte';
|
||||
import { conversationsStore, activeConversation } from '$lib/stores/conversations.svelte';
|
||||
import { getMessageSiblings } from '$lib/utils';
|
||||
|
||||
interface Props {
|
||||
class?: string;
|
||||
|
|
@ -27,7 +19,7 @@
|
|||
const conversation = activeConversation();
|
||||
|
||||
if (conversation) {
|
||||
DatabaseStore.getConversationMessages(conversation.id).then((messages) => {
|
||||
DatabaseService.getConversationMessages(conversation.id).then((messages) => {
|
||||
allConversationMessages = messages;
|
||||
});
|
||||
} else {
|
||||
|
|
@ -65,13 +57,13 @@
|
|||
});
|
||||
|
||||
async function handleNavigateToSibling(siblingId: string) {
|
||||
await navigateToSibling(siblingId);
|
||||
await conversationsStore.navigateToSibling(siblingId);
|
||||
}
|
||||
|
||||
async function handleEditWithBranching(message: DatabaseMessage, newContent: string) {
|
||||
onUserAction?.();
|
||||
|
||||
await editMessageWithBranching(message.id, newContent);
|
||||
await chatStore.editMessageWithBranching(message.id, newContent);
|
||||
|
||||
refreshAllMessages();
|
||||
}
|
||||
|
|
@ -83,15 +75,15 @@
|
|||
) {
|
||||
onUserAction?.();
|
||||
|
||||
await editAssistantMessage(message.id, newContent, shouldBranch);
|
||||
await chatStore.editAssistantMessage(message.id, newContent, shouldBranch);
|
||||
|
||||
refreshAllMessages();
|
||||
}
|
||||
|
||||
async function handleRegenerateWithBranching(message: DatabaseMessage) {
|
||||
async function handleRegenerateWithBranching(message: DatabaseMessage, modelOverride?: string) {
|
||||
onUserAction?.();
|
||||
|
||||
await regenerateMessageWithBranching(message.id);
|
||||
await chatStore.regenerateMessageWithBranching(message.id, modelOverride);
|
||||
|
||||
refreshAllMessages();
|
||||
}
|
||||
|
|
@ -99,7 +91,7 @@
|
|||
async function handleContinueAssistantMessage(message: DatabaseMessage) {
|
||||
onUserAction?.();
|
||||
|
||||
await continueAssistantMessage(message.id);
|
||||
await chatStore.continueAssistantMessage(message.id);
|
||||
|
||||
refreshAllMessages();
|
||||
}
|
||||
|
|
@ -110,13 +102,13 @@
|
|||
) {
|
||||
onUserAction?.();
|
||||
|
||||
await editUserMessagePreserveResponses(message.id, newContent);
|
||||
await chatStore.editUserMessagePreserveResponses(message.id, newContent);
|
||||
|
||||
refreshAllMessages();
|
||||
}
|
||||
|
||||
async function handleDeleteMessage(message: DatabaseMessage) {
|
||||
await deleteMessage(message.id);
|
||||
await chatStore.deleteMessage(message.id);
|
||||
|
||||
refreshAllMessages();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,47 +3,34 @@
|
|||
import {
|
||||
ChatForm,
|
||||
ChatScreenHeader,
|
||||
ChatScreenWarning,
|
||||
ChatMessages,
|
||||
ChatScreenProcessingInfo,
|
||||
DialogEmptyFileAlert,
|
||||
DialogChatError,
|
||||
ServerErrorSplash,
|
||||
ServerInfo,
|
||||
ServerLoadingSplash,
|
||||
DialogConfirmation
|
||||
} from '$lib/components/app';
|
||||
import * as Alert from '$lib/components/ui/alert';
|
||||
import * as AlertDialog from '$lib/components/ui/alert-dialog';
|
||||
import {
|
||||
AUTO_SCROLL_AT_BOTTOM_THRESHOLD,
|
||||
AUTO_SCROLL_INTERVAL,
|
||||
INITIAL_SCROLL_DELAY
|
||||
} from '$lib/constants/auto-scroll';
|
||||
import { chatStore, errorDialog, isLoading } from '$lib/stores/chat.svelte';
|
||||
import {
|
||||
conversationsStore,
|
||||
activeMessages,
|
||||
activeConversation,
|
||||
deleteConversation,
|
||||
dismissErrorDialog,
|
||||
errorDialog,
|
||||
isLoading,
|
||||
sendMessage,
|
||||
stopGeneration
|
||||
} from '$lib/stores/chat.svelte';
|
||||
activeConversation
|
||||
} from '$lib/stores/conversations.svelte';
|
||||
import { config } from '$lib/stores/settings.svelte';
|
||||
import {
|
||||
supportsVision,
|
||||
supportsAudio,
|
||||
serverLoading,
|
||||
serverWarning,
|
||||
serverStore
|
||||
} from '$lib/stores/server.svelte';
|
||||
import { parseFilesToMessageExtras } from '$lib/utils/convert-files-to-extra';
|
||||
import { isFileTypeSupported } from '$lib/utils/file-type';
|
||||
import { filterFilesByModalities } from '$lib/utils/modality-file-validation';
|
||||
import { processFilesToChatUploaded } from '$lib/utils/process-uploaded-files';
|
||||
import { serverLoading, serverError, serverStore, isRouterMode } from '$lib/stores/server.svelte';
|
||||
import { modelsStore, modelOptions, selectedModelId } from '$lib/stores/models.svelte';
|
||||
import { isFileTypeSupported, filterFilesByModalities } from '$lib/utils';
|
||||
import { parseFilesToMessageExtras, processFilesToChatUploaded } from '$lib/utils/browser-only';
|
||||
import { onMount } from 'svelte';
|
||||
import { fade, fly, slide } from 'svelte/transition';
|
||||
import { Trash2 } from '@lucide/svelte';
|
||||
import { Trash2, AlertTriangle, RefreshCw } from '@lucide/svelte';
|
||||
import ChatScreenDragOverlay from './ChatScreenDragOverlay.svelte';
|
||||
|
||||
let { showCenteredEmpty = false } = $props();
|
||||
|
|
@ -84,20 +71,84 @@
|
|||
|
||||
let activeErrorDialog = $derived(errorDialog());
|
||||
let isServerLoading = $derived(serverLoading());
|
||||
let hasPropsError = $derived(!!serverError());
|
||||
|
||||
let isCurrentConversationLoading = $derived(isLoading());
|
||||
|
||||
let isRouter = $derived(isRouterMode());
|
||||
|
||||
let conversationModel = $derived(
|
||||
chatStore.getConversationModel(activeMessages() as DatabaseMessage[])
|
||||
);
|
||||
|
||||
let activeModelId = $derived.by(() => {
|
||||
const options = modelOptions();
|
||||
|
||||
if (!isRouter) {
|
||||
return options.length > 0 ? options[0].model : null;
|
||||
}
|
||||
|
||||
const selectedId = selectedModelId();
|
||||
if (selectedId) {
|
||||
const model = options.find((m) => m.id === selectedId);
|
||||
if (model) return model.model;
|
||||
}
|
||||
|
||||
if (conversationModel) {
|
||||
const model = options.find((m) => m.model === conversationModel);
|
||||
if (model) return model.model;
|
||||
}
|
||||
|
||||
return null;
|
||||
});
|
||||
|
||||
let modelPropsVersion = $state(0);
|
||||
|
||||
$effect(() => {
|
||||
if (activeModelId) {
|
||||
const cached = modelsStore.getModelProps(activeModelId);
|
||||
if (!cached) {
|
||||
modelsStore.fetchModelProps(activeModelId).then(() => {
|
||||
modelPropsVersion++;
|
||||
});
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
let hasAudioModality = $derived.by(() => {
|
||||
if (activeModelId) {
|
||||
void modelPropsVersion;
|
||||
return modelsStore.modelSupportsAudio(activeModelId);
|
||||
}
|
||||
|
||||
return false;
|
||||
});
|
||||
|
||||
let hasVisionModality = $derived.by(() => {
|
||||
if (activeModelId) {
|
||||
void modelPropsVersion;
|
||||
|
||||
return modelsStore.modelSupportsVision(activeModelId);
|
||||
}
|
||||
|
||||
return false;
|
||||
});
|
||||
|
||||
async function handleDeleteConfirm() {
|
||||
const conversation = activeConversation();
|
||||
|
||||
if (conversation) {
|
||||
await deleteConversation(conversation.id);
|
||||
await conversationsStore.deleteConversation(conversation.id);
|
||||
}
|
||||
|
||||
showDeleteDialog = false;
|
||||
}
|
||||
|
||||
function handleDragEnter(event: DragEvent) {
|
||||
event.preventDefault();
|
||||
|
||||
dragCounter++;
|
||||
|
||||
if (event.dataTransfer?.types.includes('Files')) {
|
||||
isDragOver = true;
|
||||
}
|
||||
|
|
@ -105,7 +156,9 @@
|
|||
|
||||
function handleDragLeave(event: DragEvent) {
|
||||
event.preventDefault();
|
||||
|
||||
dragCounter--;
|
||||
|
||||
if (dragCounter === 0) {
|
||||
isDragOver = false;
|
||||
}
|
||||
|
|
@ -113,7 +166,7 @@
|
|||
|
||||
function handleErrorDialogOpenChange(open: boolean) {
|
||||
if (!open) {
|
||||
dismissErrorDialog();
|
||||
chatStore.dismissErrorDialog();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -123,6 +176,7 @@
|
|||
|
||||
function handleDrop(event: DragEvent) {
|
||||
event.preventDefault();
|
||||
|
||||
isDragOver = false;
|
||||
dragCounter = 0;
|
||||
|
||||
|
|
@ -180,7 +234,9 @@
|
|||
}
|
||||
|
||||
async function handleSendMessage(message: string, files?: ChatUploadedFile[]): Promise<boolean> {
|
||||
const result = files ? await parseFilesToMessageExtras(files) : undefined;
|
||||
const result = files
|
||||
? await parseFilesToMessageExtras(files, activeModelId ?? undefined)
|
||||
: undefined;
|
||||
|
||||
if (result?.emptyFiles && result.emptyFiles.length > 0) {
|
||||
emptyFileNames = result.emptyFiles;
|
||||
|
|
@ -200,7 +256,7 @@
|
|||
userScrolledUp = false;
|
||||
autoScrollEnabled = true;
|
||||
}
|
||||
await sendMessage(message, extras);
|
||||
await chatStore.sendMessage(message, extras);
|
||||
scrollChatToBottom();
|
||||
|
||||
return true;
|
||||
|
|
@ -218,16 +274,20 @@
|
|||
}
|
||||
}
|
||||
|
||||
const { supportedFiles, unsupportedFiles, modalityReasons } =
|
||||
filterFilesByModalities(generallySupported);
|
||||
// Use model-specific capabilities for file validation
|
||||
const capabilities = { hasVision: hasVisionModality, hasAudio: hasAudioModality };
|
||||
const { supportedFiles, unsupportedFiles, modalityReasons } = filterFilesByModalities(
|
||||
generallySupported,
|
||||
capabilities
|
||||
);
|
||||
|
||||
const allUnsupportedFiles = [...generallyUnsupported, ...unsupportedFiles];
|
||||
|
||||
if (allUnsupportedFiles.length > 0) {
|
||||
const supportedTypes: string[] = ['text files', 'PDFs'];
|
||||
|
||||
if (supportsVision()) supportedTypes.push('images');
|
||||
if (supportsAudio()) supportedTypes.push('audio files');
|
||||
if (hasVisionModality) supportedTypes.push('images');
|
||||
if (hasAudioModality) supportedTypes.push('audio files');
|
||||
|
||||
fileErrorData = {
|
||||
generallyUnsupported,
|
||||
|
|
@ -239,7 +299,10 @@
|
|||
}
|
||||
|
||||
if (supportedFiles.length > 0) {
|
||||
const processed = await processFilesToChatUploaded(supportedFiles);
|
||||
const processed = await processFilesToChatUploaded(
|
||||
supportedFiles,
|
||||
activeModelId ?? undefined
|
||||
);
|
||||
uploadedFiles = [...uploadedFiles, ...processed];
|
||||
}
|
||||
}
|
||||
|
|
@ -322,17 +385,37 @@
|
|||
>
|
||||
<ChatScreenProcessingInfo />
|
||||
|
||||
{#if serverWarning()}
|
||||
<ChatScreenWarning class="pointer-events-auto mx-auto max-w-[48rem] px-4" />
|
||||
{#if hasPropsError}
|
||||
<div
|
||||
class="pointer-events-auto mx-auto mb-4 max-w-[48rem] px-1"
|
||||
in:fly={{ y: 10, duration: 250 }}
|
||||
>
|
||||
<Alert.Root variant="destructive">
|
||||
<AlertTriangle class="h-4 w-4" />
|
||||
<Alert.Title class="flex items-center justify-between">
|
||||
<span>Server unavailable</span>
|
||||
<button
|
||||
onclick={() => serverStore.fetch()}
|
||||
disabled={isServerLoading}
|
||||
class="flex items-center gap-1.5 rounded-lg bg-destructive/20 px-2 py-1 text-xs font-medium hover:bg-destructive/30 disabled:opacity-50"
|
||||
>
|
||||
<RefreshCw class="h-3 w-3 {isServerLoading ? 'animate-spin' : ''}" />
|
||||
{isServerLoading ? 'Retrying...' : 'Retry'}
|
||||
</button>
|
||||
</Alert.Title>
|
||||
<Alert.Description>{serverError()}</Alert.Description>
|
||||
</Alert.Root>
|
||||
</div>
|
||||
{/if}
|
||||
|
||||
<div class="conversation-chat-form pointer-events-auto rounded-t-3xl pb-4">
|
||||
<ChatForm
|
||||
disabled={hasPropsError}
|
||||
isLoading={isCurrentConversationLoading}
|
||||
onFileRemove={handleFileRemove}
|
||||
onFileUpload={handleFileUpload}
|
||||
onSend={handleSendMessage}
|
||||
onStop={() => stopGeneration()}
|
||||
onStop={() => chatStore.stopGeneration()}
|
||||
showHelperText={false}
|
||||
bind:uploadedFiles
|
||||
/>
|
||||
|
|
@ -342,9 +425,7 @@
|
|||
{:else if isServerLoading}
|
||||
<!-- Server Loading State -->
|
||||
<ServerLoadingSplash />
|
||||
{:else if serverStore.error && !serverStore.modelName}
|
||||
<ServerErrorSplash error={serverStore.error} />
|
||||
{:else if serverStore.modelName}
|
||||
{:else}
|
||||
<div
|
||||
aria-label="Welcome screen with file drop zone"
|
||||
class="flex h-full items-center justify-center"
|
||||
|
|
@ -355,27 +436,44 @@
|
|||
role="main"
|
||||
>
|
||||
<div class="w-full max-w-[48rem] px-4">
|
||||
<div class="mb-8 text-center" in:fade={{ duration: 300 }}>
|
||||
<h1 class="mb-2 text-3xl font-semibold tracking-tight">llama.cpp</h1>
|
||||
<div class="mb-10 text-center" in:fade={{ duration: 300 }}>
|
||||
<h1 class="mb-4 text-3xl font-semibold tracking-tight">llama.cpp</h1>
|
||||
|
||||
<p class="text-lg text-muted-foreground">How can I help you today?</p>
|
||||
<p class="text-lg text-muted-foreground">
|
||||
{serverStore.props?.modalities?.audio
|
||||
? 'Record audio, type a message '
|
||||
: 'Type a message'} or upload files to get started
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<div class="mb-6 flex justify-center" in:fly={{ y: 10, duration: 300, delay: 200 }}>
|
||||
<ServerInfo />
|
||||
{#if hasPropsError}
|
||||
<div class="mb-4" in:fly={{ y: 10, duration: 250 }}>
|
||||
<Alert.Root variant="destructive">
|
||||
<AlertTriangle class="h-4 w-4" />
|
||||
<Alert.Title class="flex items-center justify-between">
|
||||
<span>Server unavailable</span>
|
||||
<button
|
||||
onclick={() => serverStore.fetch()}
|
||||
disabled={isServerLoading}
|
||||
class="flex items-center gap-1.5 rounded-lg bg-destructive/20 px-2 py-1 text-xs font-medium hover:bg-destructive/30 disabled:opacity-50"
|
||||
>
|
||||
<RefreshCw class="h-3 w-3 {isServerLoading ? 'animate-spin' : ''}" />
|
||||
{isServerLoading ? 'Retrying...' : 'Retry'}
|
||||
</button>
|
||||
</Alert.Title>
|
||||
<Alert.Description>{serverError()}</Alert.Description>
|
||||
</Alert.Root>
|
||||
</div>
|
||||
|
||||
{#if serverWarning()}
|
||||
<ChatScreenWarning />
|
||||
{/if}
|
||||
|
||||
<div in:fly={{ y: 10, duration: 250, delay: 300 }}>
|
||||
<div in:fly={{ y: 10, duration: 250, delay: hasPropsError ? 0 : 300 }}>
|
||||
<ChatForm
|
||||
disabled={hasPropsError}
|
||||
isLoading={isCurrentConversationLoading}
|
||||
onFileRemove={handleFileRemove}
|
||||
onFileUpload={handleFileUpload}
|
||||
onSend={handleSendMessage}
|
||||
onStop={() => stopGeneration()}
|
||||
onStop={() => chatStore.stopGeneration()}
|
||||
showHelperText={true}
|
||||
bind:uploadedFiles
|
||||
/>
|
||||
|
|
|
|||
|
|
@ -1,34 +1,47 @@
|
|||
<script lang="ts">
|
||||
import { untrack } from 'svelte';
|
||||
import { PROCESSING_INFO_TIMEOUT } from '$lib/constants/processing-info';
|
||||
import { useProcessingState } from '$lib/hooks/use-processing-state.svelte';
|
||||
import { slotsService } from '$lib/services/slots';
|
||||
import { isLoading, activeMessages, activeConversation } from '$lib/stores/chat.svelte';
|
||||
import { chatStore, isLoading, isChatStreaming } from '$lib/stores/chat.svelte';
|
||||
import { activeMessages, activeConversation } from '$lib/stores/conversations.svelte';
|
||||
import { config } from '$lib/stores/settings.svelte';
|
||||
|
||||
const processingState = useProcessingState();
|
||||
|
||||
let isCurrentConversationLoading = $derived(isLoading());
|
||||
let isStreaming = $derived(isChatStreaming());
|
||||
let hasProcessingData = $derived(processingState.processingState !== null);
|
||||
let processingDetails = $derived(processingState.getProcessingDetails());
|
||||
let showSlotsInfo = $derived(isCurrentConversationLoading || config().keepStatsVisible);
|
||||
|
||||
// Track loading state reactively by checking if conversation ID is in loading conversations array
|
||||
let showProcessingInfo = $derived(
|
||||
isCurrentConversationLoading || isStreaming || config().keepStatsVisible || hasProcessingData
|
||||
);
|
||||
|
||||
$effect(() => {
|
||||
const conversation = activeConversation();
|
||||
|
||||
untrack(() => chatStore.setActiveProcessingConversation(conversation?.id ?? null));
|
||||
});
|
||||
|
||||
$effect(() => {
|
||||
const keepStatsVisible = config().keepStatsVisible;
|
||||
const shouldMonitor = keepStatsVisible || isCurrentConversationLoading || isStreaming;
|
||||
|
||||
if (keepStatsVisible || isCurrentConversationLoading) {
|
||||
if (shouldMonitor) {
|
||||
processingState.startMonitoring();
|
||||
}
|
||||
|
||||
if (!isCurrentConversationLoading && !keepStatsVisible) {
|
||||
setTimeout(() => {
|
||||
if (!config().keepStatsVisible) {
|
||||
if (!isCurrentConversationLoading && !isStreaming && !keepStatsVisible) {
|
||||
const timeout = setTimeout(() => {
|
||||
if (!config().keepStatsVisible && !isChatStreaming()) {
|
||||
processingState.stopMonitoring();
|
||||
}
|
||||
}, PROCESSING_INFO_TIMEOUT);
|
||||
|
||||
return () => clearTimeout(timeout);
|
||||
}
|
||||
});
|
||||
|
||||
// Update processing state from stored timings
|
||||
$effect(() => {
|
||||
const conversation = activeConversation();
|
||||
const messages = activeMessages() as DatabaseMessage[];
|
||||
|
|
@ -36,47 +49,18 @@
|
|||
|
||||
if (keepStatsVisible && conversation) {
|
||||
if (messages.length === 0) {
|
||||
slotsService.clearConversationState(conversation.id);
|
||||
untrack(() => chatStore.clearProcessingState(conversation.id));
|
||||
return;
|
||||
}
|
||||
|
||||
// Search backwards through messages to find most recent assistant message with timing data
|
||||
// Using reverse iteration for performance - avoids array copy and stops at first match
|
||||
let foundTimingData = false;
|
||||
|
||||
for (let i = messages.length - 1; i >= 0; i--) {
|
||||
const message = messages[i];
|
||||
if (message.role === 'assistant' && message.timings) {
|
||||
foundTimingData = true;
|
||||
|
||||
slotsService
|
||||
.updateFromTimingData(
|
||||
{
|
||||
prompt_n: message.timings.prompt_n || 0,
|
||||
predicted_n: message.timings.predicted_n || 0,
|
||||
predicted_per_second:
|
||||
message.timings.predicted_n && message.timings.predicted_ms
|
||||
? (message.timings.predicted_n / message.timings.predicted_ms) * 1000
|
||||
: 0,
|
||||
cache_n: message.timings.cache_n || 0
|
||||
},
|
||||
conversation.id
|
||||
)
|
||||
.catch((error) => {
|
||||
console.warn('Failed to update processing state from stored timings:', error);
|
||||
});
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!foundTimingData) {
|
||||
slotsService.clearConversationState(conversation.id);
|
||||
if (!isCurrentConversationLoading && !isStreaming) {
|
||||
untrack(() => chatStore.restoreProcessingStateFromMessages(messages, conversation.id));
|
||||
}
|
||||
}
|
||||
});
|
||||
</script>
|
||||
|
||||
<div class="chat-processing-info-container pointer-events-none" class:visible={showSlotsInfo}>
|
||||
<div class="chat-processing-info-container pointer-events-none" class:visible={showProcessingInfo}>
|
||||
<div class="chat-processing-info-content">
|
||||
{#each processingDetails as detail (detail)}
|
||||
<span class="chat-processing-info-detail pointer-events-auto">{detail}</span>
|
||||
|
|
|
|||
|
|
@ -1,38 +0,0 @@
|
|||
<script lang="ts">
|
||||
import { AlertTriangle, RefreshCw } from '@lucide/svelte';
|
||||
import { serverLoading, serverStore } from '$lib/stores/server.svelte';
|
||||
import { fly } from 'svelte/transition';
|
||||
|
||||
interface Props {
|
||||
class?: string;
|
||||
}
|
||||
|
||||
let { class: className = '' }: Props = $props();
|
||||
|
||||
function handleRefreshServer() {
|
||||
serverStore.fetchServerProps();
|
||||
}
|
||||
</script>
|
||||
|
||||
<div class="mb-3 {className}" in:fly={{ y: 10, duration: 250 }}>
|
||||
<div
|
||||
class="rounded-md border border-yellow-200 bg-yellow-50 px-3 py-2 dark:border-yellow-800 dark:bg-yellow-950"
|
||||
>
|
||||
<div class="flex items-center justify-between">
|
||||
<div class="flex items-center">
|
||||
<AlertTriangle class="h-4 w-4 text-yellow-600 dark:text-yellow-400" />
|
||||
<p class="ml-2 text-sm text-yellow-800 dark:text-yellow-200">
|
||||
Server `/props` endpoint not available - using cached data
|
||||
</p>
|
||||
</div>
|
||||
<button
|
||||
onclick={handleRefreshServer}
|
||||
disabled={serverLoading()}
|
||||
class="ml-3 flex items-center gap-1.5 rounded bg-yellow-100 px-2 py-1 text-xs font-medium text-yellow-800 hover:bg-yellow-200 disabled:opacity-50 dark:bg-yellow-900 dark:text-yellow-200 dark:hover:bg-yellow-800"
|
||||
>
|
||||
<RefreshCw class="h-3 w-3 {serverLoading() ? 'animate-spin' : ''}" />
|
||||
{serverLoading() ? 'Checking...' : 'Retry'}
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
|
@ -17,7 +17,7 @@
|
|||
ChatSettingsFields
|
||||
} from '$lib/components/app';
|
||||
import { ScrollArea } from '$lib/components/ui/scroll-area';
|
||||
import { config, updateMultipleConfig } from '$lib/stores/settings.svelte';
|
||||
import { config, settingsStore } from '$lib/stores/settings.svelte';
|
||||
import { setMode } from 'mode-watcher';
|
||||
import type { Component } from 'svelte';
|
||||
|
||||
|
|
@ -79,19 +79,14 @@
|
|||
title: 'Display',
|
||||
icon: Monitor,
|
||||
fields: [
|
||||
{
|
||||
key: 'showThoughtInProgress',
|
||||
label: 'Show thought in progress',
|
||||
type: 'checkbox'
|
||||
},
|
||||
{
|
||||
key: 'showMessageStats',
|
||||
label: 'Show message generation statistics',
|
||||
type: 'checkbox'
|
||||
},
|
||||
{
|
||||
key: 'showTokensPerSecond',
|
||||
label: 'Show tokens per second',
|
||||
key: 'showThoughtInProgress',
|
||||
label: 'Show thought in progress',
|
||||
type: 'checkbox'
|
||||
},
|
||||
{
|
||||
|
|
@ -100,19 +95,20 @@
|
|||
type: 'checkbox'
|
||||
},
|
||||
{
|
||||
key: 'showModelInfo',
|
||||
label: 'Show model information',
|
||||
key: 'autoMicOnEmpty',
|
||||
label: 'Show microphone on empty input',
|
||||
type: 'checkbox',
|
||||
isExperimental: true
|
||||
},
|
||||
{
|
||||
key: 'renderUserContentAsMarkdown',
|
||||
label: 'Render user content as Markdown',
|
||||
type: 'checkbox'
|
||||
},
|
||||
{
|
||||
key: 'disableAutoScroll',
|
||||
label: 'Disable automatic scroll',
|
||||
type: 'checkbox'
|
||||
},
|
||||
{
|
||||
key: 'renderUserContentAsMarkdown',
|
||||
label: 'Render user content as Markdown',
|
||||
type: 'checkbox'
|
||||
}
|
||||
]
|
||||
},
|
||||
|
|
@ -237,11 +233,6 @@
|
|||
title: 'Developer',
|
||||
icon: Code,
|
||||
fields: [
|
||||
{
|
||||
key: 'modelSelectorEnabled',
|
||||
label: 'Enable model selector',
|
||||
type: 'checkbox'
|
||||
},
|
||||
{
|
||||
key: 'showToolCalls',
|
||||
label: 'Show tool call labels',
|
||||
|
|
@ -347,7 +338,7 @@
|
|||
}
|
||||
}
|
||||
|
||||
updateMultipleConfig(processedConfig);
|
||||
settingsStore.updateMultipleConfig(processedConfig);
|
||||
onSave?.();
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -6,8 +6,7 @@
|
|||
import * as Select from '$lib/components/ui/select';
|
||||
import { Textarea } from '$lib/components/ui/textarea';
|
||||
import { SETTING_CONFIG_DEFAULT, SETTING_CONFIG_INFO } from '$lib/constants/settings-config';
|
||||
import { supportsVision } from '$lib/stores/server.svelte';
|
||||
import { getParameterInfo, resetParameterToServerDefault } from '$lib/stores/settings.svelte';
|
||||
import { settingsStore } from '$lib/stores/settings.svelte';
|
||||
import { ParameterSyncService } from '$lib/services/parameter-sync';
|
||||
import { ChatSettingsParameterSourceIndicator } from '$lib/components/app';
|
||||
import type { Component } from 'svelte';
|
||||
|
|
@ -27,7 +26,7 @@
|
|||
return null;
|
||||
}
|
||||
|
||||
return getParameterInfo(key);
|
||||
return settingsStore.getParameterInfo(key);
|
||||
}
|
||||
</script>
|
||||
|
||||
|
|
@ -82,7 +81,7 @@
|
|||
<button
|
||||
type="button"
|
||||
onclick={() => {
|
||||
resetParameterToServerDefault(field.key);
|
||||
settingsStore.resetParameterToServerDefault(field.key);
|
||||
// Trigger UI update by calling onConfigChange with the default value
|
||||
const defaultValue = propsDefault ?? SETTING_CONFIG_DEFAULT[field.key];
|
||||
onConfigChange(field.key, String(defaultValue));
|
||||
|
|
@ -175,7 +174,7 @@
|
|||
<button
|
||||
type="button"
|
||||
onclick={() => {
|
||||
resetParameterToServerDefault(field.key);
|
||||
settingsStore.resetParameterToServerDefault(field.key);
|
||||
// Trigger UI update by calling onConfigChange with the default value
|
||||
const defaultValue = propsDefault ?? SETTING_CONFIG_DEFAULT[field.key];
|
||||
onConfigChange(field.key, String(defaultValue));
|
||||
|
|
@ -210,14 +209,10 @@
|
|||
</p>
|
||||
{/if}
|
||||
{:else if field.type === 'checkbox'}
|
||||
{@const pdfDisabled = field.key === 'pdfAsImage' && !supportsVision()}
|
||||
{@const isDisabled = pdfDisabled}
|
||||
|
||||
<div class="flex items-start space-x-3">
|
||||
<Checkbox
|
||||
id={field.key}
|
||||
checked={Boolean(localConfig[field.key])}
|
||||
disabled={isDisabled}
|
||||
onCheckedChange={(checked) => onConfigChange(field.key, checked)}
|
||||
class="mt-1"
|
||||
/>
|
||||
|
|
@ -225,9 +220,7 @@
|
|||
<div class="space-y-1">
|
||||
<label
|
||||
for={field.key}
|
||||
class="cursor-pointer text-sm leading-none font-medium {isDisabled
|
||||
? 'text-muted-foreground'
|
||||
: ''} flex items-center gap-1.5"
|
||||
class="flex cursor-pointer items-center gap-1.5 pt-1 pb-0.5 text-sm leading-none font-medium"
|
||||
>
|
||||
{field.label}
|
||||
|
||||
|
|
@ -240,11 +233,6 @@
|
|||
<p class="text-xs text-muted-foreground">
|
||||
{field.help || SETTING_CONFIG_INFO[field.key]}
|
||||
</p>
|
||||
{:else if pdfDisabled}
|
||||
<p class="text-xs text-muted-foreground">
|
||||
PDF-to-image processing requires a vision-capable model. PDFs will be processed as
|
||||
text.
|
||||
</p>
|
||||
{/if}
|
||||
</div>
|
||||
</div>
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
<script lang="ts">
|
||||
import { Button } from '$lib/components/ui/button';
|
||||
import * as AlertDialog from '$lib/components/ui/alert-dialog';
|
||||
import { forceSyncWithServerDefaults } from '$lib/stores/settings.svelte';
|
||||
import { settingsStore } from '$lib/stores/settings.svelte';
|
||||
import { RotateCcw } from '@lucide/svelte';
|
||||
|
||||
interface Props {
|
||||
|
|
@ -18,7 +18,7 @@
|
|||
}
|
||||
|
||||
function handleConfirmReset() {
|
||||
forceSyncWithServerDefaults();
|
||||
settingsStore.forceSyncWithServerDefaults();
|
||||
onReset?.();
|
||||
|
||||
showResetDialog = false;
|
||||
|
|
|
|||
|
|
@ -2,10 +2,9 @@
|
|||
import { Download, Upload } from '@lucide/svelte';
|
||||
import { Button } from '$lib/components/ui/button';
|
||||
import { DialogConversationSelection } from '$lib/components/app';
|
||||
import { DatabaseStore } from '$lib/stores/database';
|
||||
import type { ExportedConversations } from '$lib/types/database';
|
||||
import { createMessageCountMap } from '$lib/utils/conversation-utils';
|
||||
import { chatStore } from '$lib/stores/chat.svelte';
|
||||
import { DatabaseService } from '$lib/services/database';
|
||||
import { createMessageCountMap } from '$lib/utils';
|
||||
import { conversationsStore } from '$lib/stores/conversations.svelte';
|
||||
|
||||
let exportedConversations = $state<DatabaseConversation[]>([]);
|
||||
let importedConversations = $state<DatabaseConversation[]>([]);
|
||||
|
|
@ -22,7 +21,7 @@
|
|||
|
||||
async function handleExportClick() {
|
||||
try {
|
||||
const allConversations = await DatabaseStore.getAllConversations();
|
||||
const allConversations = await DatabaseService.getAllConversations();
|
||||
if (allConversations.length === 0) {
|
||||
alert('No conversations to export');
|
||||
return;
|
||||
|
|
@ -30,7 +29,7 @@
|
|||
|
||||
const conversationsWithMessages = await Promise.all(
|
||||
allConversations.map(async (conv) => {
|
||||
const messages = await DatabaseStore.getConversationMessages(conv.id);
|
||||
const messages = await DatabaseService.getConversationMessages(conv.id);
|
||||
return { conv, messages };
|
||||
})
|
||||
);
|
||||
|
|
@ -48,7 +47,7 @@
|
|||
try {
|
||||
const allData: ExportedConversations = await Promise.all(
|
||||
selectedConversations.map(async (conv) => {
|
||||
const messages = await DatabaseStore.getConversationMessages(conv.id);
|
||||
const messages = await DatabaseService.getConversationMessages(conv.id);
|
||||
return { conv: $state.snapshot(conv), messages: $state.snapshot(messages) };
|
||||
})
|
||||
);
|
||||
|
|
@ -136,9 +135,9 @@
|
|||
.snapshot(fullImportData)
|
||||
.filter((item) => selectedIds.has(item.conv.id));
|
||||
|
||||
await DatabaseStore.importConversations(selectedData);
|
||||
await DatabaseService.importConversations(selectedData);
|
||||
|
||||
await chatStore.loadConversations();
|
||||
await conversationsStore.loadConversations();
|
||||
|
||||
importedConversations = selectedConversations;
|
||||
showImportSummary = true;
|
||||
|
|
|
|||
|
|
@ -7,11 +7,7 @@
|
|||
import * as Sidebar from '$lib/components/ui/sidebar';
|
||||
import * as AlertDialog from '$lib/components/ui/alert-dialog';
|
||||
import Input from '$lib/components/ui/input/input.svelte';
|
||||
import {
|
||||
conversations,
|
||||
deleteConversation,
|
||||
updateConversationName
|
||||
} from '$lib/stores/chat.svelte';
|
||||
import { conversationsStore, conversations } from '$lib/stores/conversations.svelte';
|
||||
import ChatSidebarActions from './ChatSidebarActions.svelte';
|
||||
|
||||
const sidebar = Sidebar.useSidebar();
|
||||
|
|
@ -56,7 +52,7 @@
|
|||
showDeleteDialog = false;
|
||||
|
||||
setTimeout(() => {
|
||||
deleteConversation(selectedConversation.id);
|
||||
conversationsStore.deleteConversation(selectedConversation.id);
|
||||
selectedConversation = null;
|
||||
}, 100); // Wait for animation to finish
|
||||
}
|
||||
|
|
@ -67,7 +63,7 @@
|
|||
|
||||
showEditDialog = false;
|
||||
|
||||
updateConversationName(selectedConversation.id, editedName);
|
||||
conversationsStore.updateConversationName(selectedConversation.id, editedName);
|
||||
selectedConversation = null;
|
||||
}
|
||||
|
||||
|
|
@ -105,7 +101,7 @@
|
|||
</script>
|
||||
|
||||
<ScrollArea class="h-[100vh]">
|
||||
<Sidebar.Header class=" top-0 z-10 gap-6 bg-sidebar/50 px-4 pt-4 pb-2 backdrop-blur-lg md:sticky">
|
||||
<Sidebar.Header class=" top-0 z-10 gap-6 bg-sidebar/50 px-4 py-4 pb-2 backdrop-blur-lg md:sticky">
|
||||
<a href="#/" onclick={handleMobileSidebarItemClick}>
|
||||
<h1 class="inline-flex items-center gap-1 px-2 text-xl font-semibold">llama.cpp</h1>
|
||||
</a>
|
||||
|
|
@ -154,8 +150,6 @@
|
|||
</Sidebar.Menu>
|
||||
</Sidebar.GroupContent>
|
||||
</Sidebar.Group>
|
||||
|
||||
<div class="bottom-0 z-10 bg-sidebar bg-sidebar/50 px-4 py-4 backdrop-blur-lg md:sticky"></div>
|
||||
</ScrollArea>
|
||||
|
||||
<DialogConfirmation
|
||||
|
|
|
|||
|
|
@ -1,7 +1,8 @@
|
|||
<script lang="ts">
|
||||
import { Trash2, Pencil, MoreHorizontal, Download, Loader2 } from '@lucide/svelte';
|
||||
import { ActionDropdown } from '$lib/components/app';
|
||||
import { downloadConversation, getAllLoadingConversations } from '$lib/stores/chat.svelte';
|
||||
import { getAllLoadingChats } from '$lib/stores/chat.svelte';
|
||||
import { conversationsStore } from '$lib/stores/conversations.svelte';
|
||||
import { onMount } from 'svelte';
|
||||
|
||||
interface Props {
|
||||
|
|
@ -25,7 +26,7 @@
|
|||
let renderActionsDropdown = $state(false);
|
||||
let dropdownOpen = $state(false);
|
||||
|
||||
let isLoading = $derived(getAllLoadingConversations().includes(conversation.id));
|
||||
let isLoading = $derived(getAllLoadingChats().includes(conversation.id));
|
||||
|
||||
function handleEdit(event: Event) {
|
||||
event.stopPropagation();
|
||||
|
|
@ -114,7 +115,7 @@
|
|||
label: 'Export',
|
||||
onclick: (e) => {
|
||||
e.stopPropagation();
|
||||
downloadConversation(conversation.id);
|
||||
conversationsStore.downloadConversation(conversation.id);
|
||||
},
|
||||
shortcut: ['shift', 'cmd', 's']
|
||||
},
|
||||
|
|
|
|||
|
|
@ -1,49 +1,39 @@
|
|||
<script lang="ts">
|
||||
import * as Dialog from '$lib/components/ui/dialog';
|
||||
import { ChatAttachmentPreview } from '$lib/components/app';
|
||||
import { formatFileSize } from '$lib/utils/file-preview';
|
||||
import { formatFileSize } from '$lib/utils';
|
||||
|
||||
interface Props {
|
||||
open: boolean;
|
||||
onOpenChange?: (open: boolean) => void;
|
||||
// Either an uploaded file or a stored attachment
|
||||
uploadedFile?: ChatUploadedFile;
|
||||
attachment?: DatabaseMessageExtra;
|
||||
// For uploaded files
|
||||
preview?: string;
|
||||
name?: string;
|
||||
type?: string;
|
||||
size?: number;
|
||||
textContent?: string;
|
||||
// For vision modality check
|
||||
activeModelId?: string;
|
||||
}
|
||||
|
||||
let {
|
||||
open = $bindable(),
|
||||
onOpenChange,
|
||||
uploadedFile,
|
||||
attachment,
|
||||
preview,
|
||||
name,
|
||||
type,
|
||||
size,
|
||||
textContent
|
||||
textContent,
|
||||
activeModelId
|
||||
}: Props = $props();
|
||||
|
||||
let chatAttachmentPreviewRef: ChatAttachmentPreview | undefined = $state();
|
||||
|
||||
let displayName = $derived(uploadedFile?.name || attachment?.name || name || 'Unknown File');
|
||||
|
||||
let displayType = $derived(
|
||||
uploadedFile?.type ||
|
||||
(attachment?.type === 'imageFile'
|
||||
? 'image'
|
||||
: attachment?.type === 'textFile'
|
||||
? 'text'
|
||||
: attachment?.type === 'audioFile'
|
||||
? attachment.mimeType || 'audio'
|
||||
: attachment?.type === 'pdfFile'
|
||||
? 'application/pdf'
|
||||
: type || 'unknown')
|
||||
);
|
||||
|
||||
let displaySize = $derived(uploadedFile?.size || size);
|
||||
|
||||
$effect(() => {
|
||||
|
|
@ -53,14 +43,13 @@
|
|||
});
|
||||
</script>
|
||||
|
||||
<Dialog.Root bind:open>
|
||||
<Dialog.Root bind:open {onOpenChange}>
|
||||
<Dialog.Content class="grid max-h-[90vh] max-w-5xl overflow-hidden sm:w-auto sm:max-w-6xl">
|
||||
<Dialog.Header>
|
||||
<Dialog.Title>{displayName}</Dialog.Title>
|
||||
<Dialog.Title class="pr-8">{displayName}</Dialog.Title>
|
||||
<Dialog.Description>
|
||||
{displayType}
|
||||
{#if displaySize}
|
||||
• {formatFileSize(displaySize)}
|
||||
{formatFileSize(displaySize)}
|
||||
{/if}
|
||||
</Dialog.Description>
|
||||
</Dialog.Header>
|
||||
|
|
@ -70,9 +59,9 @@
|
|||
{uploadedFile}
|
||||
{attachment}
|
||||
{preview}
|
||||
{name}
|
||||
{type}
|
||||
name={displayName}
|
||||
{textContent}
|
||||
{activeModelId}
|
||||
/>
|
||||
</Dialog.Content>
|
||||
</Dialog.Root>
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@
|
|||
imageHeight?: string;
|
||||
imageWidth?: string;
|
||||
imageClass?: string;
|
||||
activeModelId?: string;
|
||||
}
|
||||
|
||||
let {
|
||||
|
|
@ -21,7 +22,8 @@
|
|||
onFileRemove,
|
||||
imageHeight = 'h-24',
|
||||
imageWidth = 'w-auto',
|
||||
imageClass = ''
|
||||
imageClass = '',
|
||||
activeModelId
|
||||
}: Props = $props();
|
||||
|
||||
let totalCount = $derived(uploadedFiles.length + attachments.length);
|
||||
|
|
@ -45,6 +47,7 @@
|
|||
{imageHeight}
|
||||
{imageWidth}
|
||||
{imageClass}
|
||||
{activeModelId}
|
||||
/>
|
||||
</Dialog.Content>
|
||||
</Dialog.Portal>
|
||||
|
|
|
|||
|
|
@ -0,0 +1,226 @@
|
|||
<script lang="ts">
|
||||
import * as Dialog from '$lib/components/ui/dialog';
|
||||
import * as Table from '$lib/components/ui/table';
|
||||
import { BadgeModality, CopyToClipboardIcon } from '$lib/components/app';
|
||||
import { serverStore } from '$lib/stores/server.svelte';
|
||||
import { modelsStore } from '$lib/stores/models.svelte';
|
||||
import { ChatService } from '$lib/services/chat';
|
||||
import { formatFileSize, formatParameters, formatNumber } from '$lib/utils';
|
||||
|
||||
interface Props {
|
||||
open?: boolean;
|
||||
onOpenChange?: (open: boolean) => void;
|
||||
}
|
||||
|
||||
let { open = $bindable(), onOpenChange }: Props = $props();
|
||||
|
||||
let serverProps = $derived(serverStore.props);
|
||||
let modelName = $derived(modelsStore.singleModelName);
|
||||
|
||||
// Get modalities from modelStore using the model ID from the first model
|
||||
// For now it supports only for single-model mode, will be extended with further improvements for multi-model functioanlities
|
||||
let modalities = $derived.by(() => {
|
||||
if (!modelsData?.data?.[0]?.id) return [];
|
||||
|
||||
return modelsStore.getModelModalitiesArray(modelsData.data[0].id);
|
||||
});
|
||||
|
||||
let modelsData = $state<ApiModelListResponse | null>(null);
|
||||
let isLoadingModels = $state(false);
|
||||
|
||||
// Fetch models data when dialog opens
|
||||
$effect(() => {
|
||||
if (open && !modelsData) {
|
||||
loadModelsData();
|
||||
}
|
||||
});
|
||||
|
||||
async function loadModelsData() {
|
||||
isLoadingModels = true;
|
||||
|
||||
try {
|
||||
modelsData = await ChatService.getModels();
|
||||
} catch (error) {
|
||||
console.error('Failed to load models data:', error);
|
||||
// Set empty data to prevent infinite loading
|
||||
modelsData = { object: 'list', data: [] };
|
||||
} finally {
|
||||
isLoadingModels = false;
|
||||
}
|
||||
}
|
||||
</script>
|
||||
|
||||
<Dialog.Root bind:open {onOpenChange}>
|
||||
<Dialog.Content class="@container z-9999 !max-w-[60rem] max-w-full">
|
||||
<style>
|
||||
@container (max-width: 56rem) {
|
||||
.resizable-text-container {
|
||||
max-width: calc(100vw - var(--threshold));
|
||||
}
|
||||
}
|
||||
</style>
|
||||
|
||||
<Dialog.Header>
|
||||
<Dialog.Title>Model Information</Dialog.Title>
|
||||
<Dialog.Description>Current model details and capabilities</Dialog.Description>
|
||||
</Dialog.Header>
|
||||
|
||||
<div class="space-y-6 py-4">
|
||||
{#if isLoadingModels}
|
||||
<div class="flex items-center justify-center py-8">
|
||||
<div class="text-sm text-muted-foreground">Loading model information...</div>
|
||||
</div>
|
||||
{:else if modelsData && modelsData.data.length > 0}
|
||||
{@const modelMeta = modelsData.data[0].meta}
|
||||
|
||||
{#if serverProps}
|
||||
<Table.Root>
|
||||
<Table.Header>
|
||||
<Table.Row>
|
||||
<Table.Head class="w-[10rem]">Model</Table.Head>
|
||||
|
||||
<Table.Head>
|
||||
<div class="inline-flex items-center gap-2">
|
||||
<span
|
||||
class="resizable-text-container min-w-0 flex-1 truncate"
|
||||
style:--threshold="12rem"
|
||||
>
|
||||
{modelName}
|
||||
</span>
|
||||
|
||||
<CopyToClipboardIcon
|
||||
text={modelName || ''}
|
||||
canCopy={!!modelName}
|
||||
ariaLabel="Copy model name to clipboard"
|
||||
/>
|
||||
</div>
|
||||
</Table.Head>
|
||||
</Table.Row>
|
||||
</Table.Header>
|
||||
<Table.Body>
|
||||
<!-- Model Path -->
|
||||
<Table.Row>
|
||||
<Table.Cell class="h-10 align-middle font-medium">File Path</Table.Cell>
|
||||
|
||||
<Table.Cell
|
||||
class="inline-flex h-10 items-center gap-2 align-middle font-mono text-xs"
|
||||
>
|
||||
<span
|
||||
class="resizable-text-container min-w-0 flex-1 truncate"
|
||||
style:--threshold="14rem"
|
||||
>
|
||||
{serverProps.model_path}
|
||||
</span>
|
||||
|
||||
<CopyToClipboardIcon
|
||||
text={serverProps.model_path}
|
||||
ariaLabel="Copy model path to clipboard"
|
||||
/>
|
||||
</Table.Cell>
|
||||
</Table.Row>
|
||||
|
||||
<!-- Context Size -->
|
||||
<Table.Row>
|
||||
<Table.Cell class="h-10 align-middle font-medium">Context Size</Table.Cell>
|
||||
<Table.Cell
|
||||
>{formatNumber(serverProps.default_generation_settings.n_ctx)} tokens</Table.Cell
|
||||
>
|
||||
</Table.Row>
|
||||
|
||||
<!-- Training Context -->
|
||||
{#if modelMeta?.n_ctx_train}
|
||||
<Table.Row>
|
||||
<Table.Cell class="h-10 align-middle font-medium">Training Context</Table.Cell>
|
||||
<Table.Cell>{formatNumber(modelMeta.n_ctx_train)} tokens</Table.Cell>
|
||||
</Table.Row>
|
||||
{/if}
|
||||
|
||||
<!-- Model Size -->
|
||||
{#if modelMeta?.size}
|
||||
<Table.Row>
|
||||
<Table.Cell class="h-10 align-middle font-medium">Model Size</Table.Cell>
|
||||
<Table.Cell>{formatFileSize(modelMeta.size)}</Table.Cell>
|
||||
</Table.Row>
|
||||
{/if}
|
||||
|
||||
<!-- Parameters -->
|
||||
{#if modelMeta?.n_params}
|
||||
<Table.Row>
|
||||
<Table.Cell class="h-10 align-middle font-medium">Parameters</Table.Cell>
|
||||
<Table.Cell>{formatParameters(modelMeta.n_params)}</Table.Cell>
|
||||
</Table.Row>
|
||||
{/if}
|
||||
|
||||
<!-- Embedding Size -->
|
||||
{#if modelMeta?.n_embd}
|
||||
<Table.Row>
|
||||
<Table.Cell class="align-middle font-medium">Embedding Size</Table.Cell>
|
||||
<Table.Cell>{formatNumber(modelMeta.n_embd)}</Table.Cell>
|
||||
</Table.Row>
|
||||
{/if}
|
||||
|
||||
<!-- Vocabulary Size -->
|
||||
{#if modelMeta?.n_vocab}
|
||||
<Table.Row>
|
||||
<Table.Cell class="align-middle font-medium">Vocabulary Size</Table.Cell>
|
||||
<Table.Cell>{formatNumber(modelMeta.n_vocab)} tokens</Table.Cell>
|
||||
</Table.Row>
|
||||
{/if}
|
||||
|
||||
<!-- Vocabulary Type -->
|
||||
{#if modelMeta?.vocab_type}
|
||||
<Table.Row>
|
||||
<Table.Cell class="align-middle font-medium">Vocabulary Type</Table.Cell>
|
||||
<Table.Cell class="align-middle capitalize">{modelMeta.vocab_type}</Table.Cell>
|
||||
</Table.Row>
|
||||
{/if}
|
||||
|
||||
<!-- Total Slots -->
|
||||
<Table.Row>
|
||||
<Table.Cell class="align-middle font-medium">Parallel Slots</Table.Cell>
|
||||
<Table.Cell>{serverProps.total_slots}</Table.Cell>
|
||||
</Table.Row>
|
||||
|
||||
<!-- Modalities -->
|
||||
{#if modalities.length > 0}
|
||||
<Table.Row>
|
||||
<Table.Cell class="align-middle font-medium">Modalities</Table.Cell>
|
||||
<Table.Cell>
|
||||
<div class="flex flex-wrap gap-1">
|
||||
<BadgeModality {modalities} />
|
||||
</div>
|
||||
</Table.Cell>
|
||||
</Table.Row>
|
||||
{/if}
|
||||
|
||||
<!-- Build Info -->
|
||||
<Table.Row>
|
||||
<Table.Cell class="align-middle font-medium">Build Info</Table.Cell>
|
||||
<Table.Cell class="align-middle font-mono text-xs"
|
||||
>{serverProps.build_info}</Table.Cell
|
||||
>
|
||||
</Table.Row>
|
||||
|
||||
<!-- Chat Template -->
|
||||
{#if serverProps.chat_template}
|
||||
<Table.Row>
|
||||
<Table.Cell class="align-middle font-medium">Chat Template</Table.Cell>
|
||||
<Table.Cell class="py-10">
|
||||
<div class="max-h-120 overflow-y-auto rounded-md bg-muted p-4">
|
||||
<pre
|
||||
class="font-mono text-xs whitespace-pre-wrap">{serverProps.chat_template}</pre>
|
||||
</div>
|
||||
</Table.Cell>
|
||||
</Table.Row>
|
||||
{/if}
|
||||
</Table.Body>
|
||||
</Table.Root>
|
||||
{/if}
|
||||
{:else if !isLoadingModels}
|
||||
<div class="flex items-center justify-center py-8">
|
||||
<div class="text-sm text-muted-foreground">No model information available</div>
|
||||
</div>
|
||||
{/if}
|
||||
</div>
|
||||
</Dialog.Content>
|
||||
</Dialog.Root>
|
||||
|
|
@ -0,0 +1,76 @@
|
|||
<script lang="ts">
|
||||
import * as AlertDialog from '$lib/components/ui/alert-dialog';
|
||||
import { AlertTriangle, ArrowRight } from '@lucide/svelte';
|
||||
import { goto } from '$app/navigation';
|
||||
import { page } from '$app/state';
|
||||
|
||||
interface Props {
|
||||
open: boolean;
|
||||
modelName: string;
|
||||
availableModels?: string[];
|
||||
onOpenChange?: (open: boolean) => void;
|
||||
}
|
||||
|
||||
let { open = $bindable(), modelName, availableModels = [], onOpenChange }: Props = $props();
|
||||
|
||||
function handleOpenChange(newOpen: boolean) {
|
||||
open = newOpen;
|
||||
onOpenChange?.(newOpen);
|
||||
}
|
||||
|
||||
function handleSelectModel(model: string) {
|
||||
// Build URL with selected model, preserving other params
|
||||
const url = new URL(page.url);
|
||||
url.searchParams.set('model', model);
|
||||
|
||||
handleOpenChange(false);
|
||||
goto(url.toString());
|
||||
}
|
||||
</script>
|
||||
|
||||
<AlertDialog.Root {open} onOpenChange={handleOpenChange}>
|
||||
<AlertDialog.Content class="max-w-lg">
|
||||
<AlertDialog.Header>
|
||||
<AlertDialog.Title class="flex items-center gap-2">
|
||||
<AlertTriangle class="h-5 w-5 text-amber-500" />
|
||||
Model Not Available
|
||||
</AlertDialog.Title>
|
||||
|
||||
<AlertDialog.Description>
|
||||
The requested model could not be found. Select an available model to continue.
|
||||
</AlertDialog.Description>
|
||||
</AlertDialog.Header>
|
||||
|
||||
<div class="space-y-3">
|
||||
<div class="rounded-lg border border-amber-500/40 bg-amber-500/10 px-4 py-3 text-sm">
|
||||
<p class="font-medium text-amber-600 dark:text-amber-400">
|
||||
Requested: <code class="rounded bg-amber-500/20 px-1.5 py-0.5">{modelName}</code>
|
||||
</p>
|
||||
</div>
|
||||
|
||||
{#if availableModels.length > 0}
|
||||
<div class="text-sm">
|
||||
<p class="mb-2 font-medium text-muted-foreground">Select an available model:</p>
|
||||
<div class="max-h-48 space-y-1 overflow-y-auto rounded-md border p-1">
|
||||
{#each availableModels as model (model)}
|
||||
<button
|
||||
type="button"
|
||||
class="group flex w-full items-center justify-between gap-2 rounded-sm px-3 py-2 text-left text-sm transition-colors hover:bg-accent hover:text-accent-foreground"
|
||||
onclick={() => handleSelectModel(model)}
|
||||
>
|
||||
<span class="min-w-0 truncate font-mono text-xs">{model}</span>
|
||||
<ArrowRight
|
||||
class="h-4 w-4 shrink-0 text-muted-foreground opacity-0 transition-opacity group-hover:opacity-100"
|
||||
/>
|
||||
</button>
|
||||
{/each}
|
||||
</div>
|
||||
</div>
|
||||
{/if}
|
||||
</div>
|
||||
|
||||
<AlertDialog.Footer>
|
||||
<AlertDialog.Action onclick={() => handleOpenChange(false)}>Cancel</AlertDialog.Action>
|
||||
</AlertDialog.Footer>
|
||||
</AlertDialog.Content>
|
||||
</AlertDialog.Root>
|
||||
|
|
@ -10,20 +10,21 @@ export { default as ChatForm } from './chat/ChatForm/ChatForm.svelte';
|
|||
export { default as ChatFormActionFileAttachments } from './chat/ChatForm/ChatFormActions/ChatFormActionFileAttachments.svelte';
|
||||
export { default as ChatFormActionRecord } from './chat/ChatForm/ChatFormActions/ChatFormActionRecord.svelte';
|
||||
export { default as ChatFormActions } from './chat/ChatForm/ChatFormActions/ChatFormActions.svelte';
|
||||
export { default as ChatFormActionSubmit } from './chat/ChatForm/ChatFormActions/ChatFormActionSubmit.svelte';
|
||||
export { default as ChatFormFileInputInvisible } from './chat/ChatForm/ChatFormFileInputInvisible.svelte';
|
||||
export { default as ChatFormHelperText } from './chat/ChatForm/ChatFormHelperText.svelte';
|
||||
export { default as ChatFormModelSelector } from './chat/ChatForm/ChatFormModelSelector.svelte';
|
||||
export { default as ChatFormTextarea } from './chat/ChatForm/ChatFormTextarea.svelte';
|
||||
|
||||
export { default as ChatMessage } from './chat/ChatMessages/ChatMessage.svelte';
|
||||
export { default as ChatMessages } from './chat/ChatMessages/ChatMessages.svelte';
|
||||
export { default as ChatMessageActions } from './chat/ChatMessages/ChatMessageActions.svelte';
|
||||
export { default as ChatMessageBranchingControls } from './chat/ChatMessages/ChatMessageBranchingControls.svelte';
|
||||
export { default as ChatMessageStatistics } from './chat/ChatMessages/ChatMessageStatistics.svelte';
|
||||
export { default as ChatMessageThinkingBlock } from './chat/ChatMessages/ChatMessageThinkingBlock.svelte';
|
||||
export { default as ChatMessages } from './chat/ChatMessages/ChatMessages.svelte';
|
||||
|
||||
export { default as ChatScreen } from './chat/ChatScreen/ChatScreen.svelte';
|
||||
export { default as ChatScreenHeader } from './chat/ChatScreen/ChatScreenHeader.svelte';
|
||||
export { default as ChatScreenProcessingInfo } from './chat/ChatScreen/ChatScreenProcessingInfo.svelte';
|
||||
export { default as ChatScreenWarning } from './chat/ChatScreen/ChatScreenWarning.svelte';
|
||||
|
||||
export { default as ChatSettings } from './chat/ChatSettings/ChatSettings.svelte';
|
||||
export { default as ChatSettingsFooter } from './chat/ChatSettings/ChatSettingsFooter.svelte';
|
||||
|
|
@ -45,19 +46,27 @@ export { default as DialogConfirmation } from './dialogs/DialogConfirmation.svel
|
|||
export { default as DialogConversationSelection } from './dialogs/DialogConversationSelection.svelte';
|
||||
export { default as DialogConversationTitleUpdate } from './dialogs/DialogConversationTitleUpdate.svelte';
|
||||
export { default as DialogEmptyFileAlert } from './dialogs/DialogEmptyFileAlert.svelte';
|
||||
export { default as DialogModelInformation } from './dialogs/DialogModelInformation.svelte';
|
||||
export { default as DialogModelNotAvailable } from './dialogs/DialogModelNotAvailable.svelte';
|
||||
|
||||
// Miscellanous
|
||||
|
||||
export { default as ActionButton } from './misc/ActionButton.svelte';
|
||||
export { default as ActionDropdown } from './misc/ActionDropdown.svelte';
|
||||
export { default as BadgeChatStatistic } from './misc/BadgeChatStatistic.svelte';
|
||||
export { default as BadgeInfo } from './misc/BadgeInfo.svelte';
|
||||
export { default as ModelBadge } from './models/ModelBadge.svelte';
|
||||
export { default as BadgeModality } from './misc/BadgeModality.svelte';
|
||||
export { default as ConversationSelection } from './misc/ConversationSelection.svelte';
|
||||
export { default as CopyToClipboardIcon } from './misc/CopyToClipboardIcon.svelte';
|
||||
export { default as KeyboardShortcutInfo } from './misc/KeyboardShortcutInfo.svelte';
|
||||
export { default as MarkdownContent } from './misc/MarkdownContent.svelte';
|
||||
export { default as RemoveButton } from './misc/RemoveButton.svelte';
|
||||
export { default as SyntaxHighlightedCode } from './misc/SyntaxHighlightedCode.svelte';
|
||||
export { default as ModelsSelector } from './models/ModelsSelector.svelte';
|
||||
|
||||
// Server
|
||||
|
||||
export { default as ServerStatus } from './server/ServerStatus.svelte';
|
||||
export { default as ServerErrorSplash } from './server/ServerErrorSplash.svelte';
|
||||
export { default as ServerLoadingSplash } from './server/ServerLoadingSplash.svelte';
|
||||
export { default as ServerInfo } from './server/ServerInfo.svelte';
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
<script lang="ts">
|
||||
import { Button } from '$lib/components/ui/button';
|
||||
import * as Tooltip from '$lib/components/ui/tooltip';
|
||||
import { TOOLTIP_DELAY_DURATION } from '$lib/constants/tooltip-config';
|
||||
import type { Component } from 'svelte';
|
||||
|
||||
interface Props {
|
||||
|
|
@ -27,7 +26,7 @@
|
|||
}: Props = $props();
|
||||
</script>
|
||||
|
||||
<Tooltip.Root delayDuration={TOOLTIP_DELAY_DURATION}>
|
||||
<Tooltip.Root>
|
||||
<Tooltip.Trigger>
|
||||
<Button
|
||||
{variant}
|
||||
|
|
|
|||
|
|
@ -2,7 +2,6 @@
|
|||
import * as DropdownMenu from '$lib/components/ui/dropdown-menu';
|
||||
import * as Tooltip from '$lib/components/ui/tooltip';
|
||||
import { KeyboardShortcutInfo } from '$lib/components/app';
|
||||
import { TOOLTIP_DELAY_DURATION } from '$lib/constants/tooltip-config';
|
||||
import type { Component } from 'svelte';
|
||||
|
||||
interface ActionItem {
|
||||
|
|
@ -40,7 +39,7 @@
|
|||
onclick={(e) => e.stopPropagation()}
|
||||
>
|
||||
{#if triggerTooltip}
|
||||
<Tooltip.Root delayDuration={TOOLTIP_DELAY_DURATION}>
|
||||
<Tooltip.Root>
|
||||
<Tooltip.Trigger>
|
||||
{@render iconComponent(triggerIcon, 'h-3 w-3')}
|
||||
<span class="sr-only">{triggerTooltip}</span>
|
||||
|
|
|
|||
|
|
@ -0,0 +1,25 @@
|
|||
<script lang="ts">
|
||||
import { BadgeInfo } from '$lib/components/app';
|
||||
import { copyToClipboard } from '$lib/utils';
|
||||
import type { Component } from 'svelte';
|
||||
|
||||
interface Props {
|
||||
class?: string;
|
||||
icon: Component;
|
||||
value: string | number;
|
||||
}
|
||||
|
||||
let { class: className = '', icon: Icon, value }: Props = $props();
|
||||
|
||||
function handleClick() {
|
||||
void copyToClipboard(String(value));
|
||||
}
|
||||
</script>
|
||||
|
||||
<BadgeInfo class={className} onclick={handleClick}>
|
||||
{#snippet icon()}
|
||||
<Icon class="h-3 w-3" />
|
||||
{/snippet}
|
||||
|
||||
{value}
|
||||
</BadgeInfo>
|
||||
|
|
@ -0,0 +1,27 @@
|
|||
<script lang="ts">
|
||||
import { cn } from '$lib/components/ui/utils';
|
||||
import type { Snippet } from 'svelte';
|
||||
|
||||
interface Props {
|
||||
children: Snippet;
|
||||
class?: string;
|
||||
icon?: Snippet;
|
||||
onclick?: () => void;
|
||||
}
|
||||
|
||||
let { children, class: className = '', icon, onclick }: Props = $props();
|
||||
</script>
|
||||
|
||||
<button
|
||||
class={cn(
|
||||
'inline-flex cursor-pointer items-center gap-1 rounded-sm bg-muted-foreground/15 px-1.5 py-0.75',
|
||||
className
|
||||
)}
|
||||
{onclick}
|
||||
>
|
||||
{#if icon}
|
||||
{@render icon()}
|
||||
{/if}
|
||||
|
||||
{@render children()}
|
||||
</button>
|
||||
|
|
@ -0,0 +1,39 @@
|
|||
<script lang="ts">
|
||||
import { ModelModality } from '$lib/enums';
|
||||
import { MODALITY_ICONS, MODALITY_LABELS } from '$lib/constants/icons';
|
||||
import { cn } from '$lib/components/ui/utils';
|
||||
|
||||
type DisplayableModality = ModelModality.VISION | ModelModality.AUDIO;
|
||||
|
||||
interface Props {
|
||||
modalities: ModelModality[];
|
||||
class?: string;
|
||||
}
|
||||
|
||||
let { modalities, class: className = '' }: Props = $props();
|
||||
|
||||
// Filter to only modalities that have icons (VISION, AUDIO)
|
||||
const displayableModalities = $derived(
|
||||
modalities.filter(
|
||||
(m): m is DisplayableModality => m === ModelModality.VISION || m === ModelModality.AUDIO
|
||||
)
|
||||
);
|
||||
</script>
|
||||
|
||||
{#each displayableModalities as modality, index (index)}
|
||||
{@const IconComponent = MODALITY_ICONS[modality]}
|
||||
{@const label = MODALITY_LABELS[modality]}
|
||||
|
||||
<span
|
||||
class={cn(
|
||||
'inline-flex items-center gap-1 rounded-md bg-muted px-2 py-1 text-xs font-medium',
|
||||
className
|
||||
)}
|
||||
>
|
||||
{#if IconComponent}
|
||||
<IconComponent class="h-3 w-3" />
|
||||
{/if}
|
||||
|
||||
{label}
|
||||
</span>
|
||||
{/each}
|
||||
|
|
@ -0,0 +1,18 @@
|
|||
<script lang="ts">
|
||||
import { Copy } from '@lucide/svelte';
|
||||
import { copyToClipboard } from '$lib/utils';
|
||||
|
||||
interface Props {
|
||||
ariaLabel?: string;
|
||||
canCopy?: boolean;
|
||||
text: string;
|
||||
}
|
||||
|
||||
let { ariaLabel = 'Copy to clipboard', canCopy = true, text }: Props = $props();
|
||||
</script>
|
||||
|
||||
<Copy
|
||||
class="h-3 w-3 flex-shrink-0 cursor-{canCopy ? 'pointer' : 'not-allowed'}"
|
||||
aria-label={ariaLabel}
|
||||
onclick={() => canCopy && copyToClipboard(text)}
|
||||
/>
|
||||
|
|
@ -7,9 +7,8 @@
|
|||
import remarkRehype from 'remark-rehype';
|
||||
import rehypeKatex from 'rehype-katex';
|
||||
import rehypeStringify from 'rehype-stringify';
|
||||
import { copyCodeToClipboard } from '$lib/utils/copy';
|
||||
import { copyCodeToClipboard, preprocessLaTeX } from '$lib/utils';
|
||||
import { rehypeRestoreTableHtml } from '$lib/markdown/table-html-restorer';
|
||||
import { preprocessLaTeX } from '$lib/utils/latex-protection';
|
||||
import { browser } from '$app/environment';
|
||||
import '$styles/katex-custom.scss';
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,96 @@
|
|||
<script lang="ts">
|
||||
import hljs from 'highlight.js';
|
||||
import { browser } from '$app/environment';
|
||||
import { mode } from 'mode-watcher';
|
||||
|
||||
import githubDarkCss from 'highlight.js/styles/github-dark.css?inline';
|
||||
import githubLightCss from 'highlight.js/styles/github.css?inline';
|
||||
|
||||
interface Props {
|
||||
code: string;
|
||||
language?: string;
|
||||
class?: string;
|
||||
maxHeight?: string;
|
||||
maxWidth?: string;
|
||||
}
|
||||
|
||||
let {
|
||||
code,
|
||||
language = 'text',
|
||||
class: className = '',
|
||||
maxHeight = '60vh',
|
||||
maxWidth = ''
|
||||
}: Props = $props();
|
||||
|
||||
let highlightedHtml = $state('');
|
||||
|
||||
function loadHighlightTheme(isDark: boolean) {
|
||||
if (!browser) return;
|
||||
|
||||
const existingThemes = document.querySelectorAll('style[data-highlight-theme-preview]');
|
||||
existingThemes.forEach((style) => style.remove());
|
||||
|
||||
const style = document.createElement('style');
|
||||
style.setAttribute('data-highlight-theme-preview', 'true');
|
||||
style.textContent = isDark ? githubDarkCss : githubLightCss;
|
||||
|
||||
document.head.appendChild(style);
|
||||
}
|
||||
|
||||
$effect(() => {
|
||||
const currentMode = mode.current;
|
||||
const isDark = currentMode === 'dark';
|
||||
|
||||
loadHighlightTheme(isDark);
|
||||
});
|
||||
|
||||
$effect(() => {
|
||||
if (!code) {
|
||||
highlightedHtml = '';
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
// Check if the language is supported
|
||||
const lang = language.toLowerCase();
|
||||
const isSupported = hljs.getLanguage(lang);
|
||||
|
||||
if (isSupported) {
|
||||
const result = hljs.highlight(code, { language: lang });
|
||||
highlightedHtml = result.value;
|
||||
} else {
|
||||
// Try auto-detection or fallback to plain text
|
||||
const result = hljs.highlightAuto(code);
|
||||
highlightedHtml = result.value;
|
||||
}
|
||||
} catch {
|
||||
// Fallback to escaped plain text
|
||||
highlightedHtml = code.replace(/&/g, '&').replace(/</g, '<').replace(/>/g, '>');
|
||||
}
|
||||
});
|
||||
</script>
|
||||
|
||||
<div
|
||||
class="code-preview-wrapper overflow-auto rounded-lg border border-border bg-muted {className}"
|
||||
style="max-height: {maxHeight};"
|
||||
>
|
||||
<pre class="m-0 overflow-x-auto p-4 max-w-[{maxWidth}]"><code class="hljs text-sm leading-relaxed"
|
||||
>{@html highlightedHtml}</code
|
||||
></pre>
|
||||
</div>
|
||||
|
||||
<style>
|
||||
.code-preview-wrapper {
|
||||
font-family:
|
||||
ui-monospace, SFMono-Regular, 'SF Mono', Monaco, 'Cascadia Code', 'Roboto Mono', Consolas,
|
||||
'Liberation Mono', Menlo, monospace;
|
||||
}
|
||||
|
||||
.code-preview-wrapper pre {
|
||||
background: transparent;
|
||||
}
|
||||
|
||||
.code-preview-wrapper code {
|
||||
background: transparent;
|
||||
}
|
||||
</style>
|
||||
|
|
@ -0,0 +1,56 @@
|
|||
<script lang="ts">
|
||||
import { Package } from '@lucide/svelte';
|
||||
import { BadgeInfo, CopyToClipboardIcon } from '$lib/components/app';
|
||||
import { modelsStore } from '$lib/stores/models.svelte';
|
||||
import { serverStore } from '$lib/stores/server.svelte';
|
||||
import * as Tooltip from '$lib/components/ui/tooltip';
|
||||
|
||||
interface Props {
|
||||
class?: string;
|
||||
model?: string;
|
||||
onclick?: () => void;
|
||||
showCopyIcon?: boolean;
|
||||
showTooltip?: boolean;
|
||||
}
|
||||
|
||||
let {
|
||||
class: className = '',
|
||||
model: modelProp,
|
||||
onclick,
|
||||
showCopyIcon = false,
|
||||
showTooltip = false
|
||||
}: Props = $props();
|
||||
|
||||
let model = $derived(modelProp || modelsStore.singleModelName);
|
||||
let isModelMode = $derived(serverStore.isModelMode);
|
||||
</script>
|
||||
|
||||
{#snippet badgeContent()}
|
||||
<BadgeInfo class={className} {onclick}>
|
||||
{#snippet icon()}
|
||||
<Package class="h-3 w-3" />
|
||||
{/snippet}
|
||||
|
||||
{model}
|
||||
|
||||
{#if showCopyIcon}
|
||||
<CopyToClipboardIcon text={model || ''} ariaLabel="Copy model name" />
|
||||
{/if}
|
||||
</BadgeInfo>
|
||||
{/snippet}
|
||||
|
||||
{#if model && isModelMode}
|
||||
{#if showTooltip}
|
||||
<Tooltip.Root>
|
||||
<Tooltip.Trigger>
|
||||
{@render badgeContent()}
|
||||
</Tooltip.Trigger>
|
||||
|
||||
<Tooltip.Content>
|
||||
{onclick ? 'Click for model details' : model}
|
||||
</Tooltip.Content>
|
||||
</Tooltip.Root>
|
||||
{:else}
|
||||
{@render badgeContent()}
|
||||
{/if}
|
||||
{/if}
|
||||
|
|
@ -0,0 +1,596 @@
|
|||
<script lang="ts">
|
||||
import { onMount, tick } from 'svelte';
|
||||
import { ChevronDown, EyeOff, Loader2, MicOff, Package, Power } from '@lucide/svelte';
|
||||
import * as Tooltip from '$lib/components/ui/tooltip';
|
||||
import { cn } from '$lib/components/ui/utils';
|
||||
import { portalToBody } from '$lib/utils';
|
||||
import {
|
||||
modelsStore,
|
||||
modelOptions,
|
||||
modelsLoading,
|
||||
modelsUpdating,
|
||||
selectedModelId,
|
||||
routerModels,
|
||||
propsCacheVersion,
|
||||
singleModelName
|
||||
} from '$lib/stores/models.svelte';
|
||||
import { usedModalities, conversationsStore } from '$lib/stores/conversations.svelte';
|
||||
import { ServerModelStatus } from '$lib/enums';
|
||||
import { isRouterMode } from '$lib/stores/server.svelte';
|
||||
import { DialogModelInformation } from '$lib/components/app';
|
||||
import {
|
||||
MENU_MAX_WIDTH,
|
||||
MENU_OFFSET,
|
||||
VIEWPORT_GUTTER
|
||||
} from '$lib/constants/floating-ui-constraints';
|
||||
|
||||
interface Props {
|
||||
class?: string;
|
||||
currentModel?: string | null;
|
||||
/** Callback when model changes. Return false to keep menu open (e.g., for validation failures) */
|
||||
onModelChange?: (modelId: string, modelName: string) => Promise<boolean> | boolean | void;
|
||||
disabled?: boolean;
|
||||
forceForegroundText?: boolean;
|
||||
/** When true, user's global selection takes priority over currentModel (for form selector) */
|
||||
useGlobalSelection?: boolean;
|
||||
/**
|
||||
* When provided, only consider modalities from messages BEFORE this message.
|
||||
* Used for regeneration - allows selecting models that don't support modalities
|
||||
* used in later messages.
|
||||
*/
|
||||
upToMessageId?: string;
|
||||
}
|
||||
|
||||
let {
|
||||
class: className = '',
|
||||
currentModel = null,
|
||||
onModelChange,
|
||||
disabled = false,
|
||||
forceForegroundText = false,
|
||||
useGlobalSelection = false,
|
||||
upToMessageId
|
||||
}: Props = $props();
|
||||
|
||||
let options = $derived(modelOptions());
|
||||
let loading = $derived(modelsLoading());
|
||||
let updating = $derived(modelsUpdating());
|
||||
let activeId = $derived(selectedModelId());
|
||||
let isRouter = $derived(isRouterMode());
|
||||
let serverModel = $derived(singleModelName());
|
||||
|
||||
// Reactive router models state - needed for proper reactivity of status checks
|
||||
let currentRouterModels = $derived(routerModels());
|
||||
|
||||
let requiredModalities = $derived(
|
||||
upToMessageId ? conversationsStore.getModalitiesUpToMessage(upToMessageId) : usedModalities()
|
||||
);
|
||||
|
||||
function getModelStatus(modelId: string): ServerModelStatus | null {
|
||||
const model = currentRouterModels.find((m) => m.id === modelId);
|
||||
return (model?.status?.value as ServerModelStatus) ?? null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if a model supports all modalities used in the conversation.
|
||||
* Returns true if the model can be selected, false if it should be disabled.
|
||||
*/
|
||||
function isModelCompatible(option: ModelOption): boolean {
|
||||
void propsCacheVersion();
|
||||
|
||||
const modelModalities = modelsStore.getModelModalities(option.model);
|
||||
|
||||
if (!modelModalities) {
|
||||
const status = getModelStatus(option.model);
|
||||
|
||||
if (status === ServerModelStatus.LOADED) {
|
||||
if (requiredModalities.vision || requiredModalities.audio) return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
if (requiredModalities.vision && !modelModalities.vision) return false;
|
||||
if (requiredModalities.audio && !modelModalities.audio) return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets missing modalities for a model.
|
||||
* Returns object with vision/audio booleans indicating what's missing.
|
||||
*/
|
||||
function getMissingModalities(option: ModelOption): { vision: boolean; audio: boolean } | null {
|
||||
void propsCacheVersion();
|
||||
|
||||
const modelModalities = modelsStore.getModelModalities(option.model);
|
||||
|
||||
if (!modelModalities) {
|
||||
const status = getModelStatus(option.model);
|
||||
|
||||
if (status === ServerModelStatus.LOADED) {
|
||||
const missing = {
|
||||
vision: requiredModalities.vision,
|
||||
audio: requiredModalities.audio
|
||||
};
|
||||
|
||||
if (missing.vision || missing.audio) return missing;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
const missing = {
|
||||
vision: requiredModalities.vision && !modelModalities.vision,
|
||||
audio: requiredModalities.audio && !modelModalities.audio
|
||||
};
|
||||
|
||||
if (!missing.vision && !missing.audio) return null;
|
||||
|
||||
return missing;
|
||||
}
|
||||
|
||||
let isHighlightedCurrentModelActive = $derived(
|
||||
!isRouter || !currentModel
|
||||
? false
|
||||
: (() => {
|
||||
const currentOption = options.find((option) => option.model === currentModel);
|
||||
|
||||
return currentOption ? currentOption.id === activeId : false;
|
||||
})()
|
||||
);
|
||||
|
||||
let isCurrentModelInCache = $derived(() => {
|
||||
if (!isRouter || !currentModel) return true;
|
||||
|
||||
return options.some((option) => option.model === currentModel);
|
||||
});
|
||||
|
||||
let isOpen = $state(false);
|
||||
let showModelDialog = $state(false);
|
||||
let container: HTMLDivElement | null = null;
|
||||
let menuRef = $state<HTMLDivElement | null>(null);
|
||||
let triggerButton = $state<HTMLButtonElement | null>(null);
|
||||
let menuPosition = $state<{
|
||||
top: number;
|
||||
left: number;
|
||||
width: number;
|
||||
placement: 'top' | 'bottom';
|
||||
maxHeight: number;
|
||||
} | null>(null);
|
||||
|
||||
onMount(async () => {
|
||||
try {
|
||||
await modelsStore.fetch();
|
||||
} catch (error) {
|
||||
console.error('Unable to load models:', error);
|
||||
}
|
||||
});
|
||||
|
||||
function toggleOpen() {
|
||||
if (loading || updating) return;
|
||||
|
||||
if (isRouter) {
|
||||
// Router mode: show dropdown
|
||||
if (isOpen) {
|
||||
closeMenu();
|
||||
} else {
|
||||
openMenu();
|
||||
}
|
||||
} else {
|
||||
// Single model mode: show dialog
|
||||
showModelDialog = true;
|
||||
}
|
||||
}
|
||||
|
||||
async function openMenu() {
|
||||
if (loading || updating) return;
|
||||
|
||||
isOpen = true;
|
||||
await tick();
|
||||
updateMenuPosition();
|
||||
requestAnimationFrame(() => updateMenuPosition());
|
||||
|
||||
if (isRouter) {
|
||||
modelsStore.fetchRouterModels().then(() => {
|
||||
modelsStore.fetchModalitiesForLoadedModels();
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
export function open() {
|
||||
if (isRouter) {
|
||||
openMenu();
|
||||
} else {
|
||||
showModelDialog = true;
|
||||
}
|
||||
}
|
||||
|
||||
function closeMenu() {
|
||||
if (!isOpen) return;
|
||||
|
||||
isOpen = false;
|
||||
menuPosition = null;
|
||||
}
|
||||
|
||||
function handlePointerDown(event: PointerEvent) {
|
||||
if (!container) return;
|
||||
|
||||
const target = event.target as Node | null;
|
||||
|
||||
if (target && !container.contains(target) && !(menuRef && menuRef.contains(target))) {
|
||||
closeMenu();
|
||||
}
|
||||
}
|
||||
|
||||
function handleKeydown(event: KeyboardEvent) {
|
||||
if (event.key === 'Escape') {
|
||||
closeMenu();
|
||||
}
|
||||
}
|
||||
|
||||
function handleResize() {
|
||||
if (isOpen) {
|
||||
updateMenuPosition();
|
||||
}
|
||||
}
|
||||
|
||||
function updateMenuPosition() {
|
||||
if (!isOpen || !triggerButton || !menuRef) return;
|
||||
|
||||
const triggerRect = triggerButton.getBoundingClientRect();
|
||||
const viewportWidth = window.innerWidth;
|
||||
const viewportHeight = window.innerHeight;
|
||||
|
||||
if (viewportWidth === 0 || viewportHeight === 0) return;
|
||||
|
||||
const scrollWidth = menuRef.scrollWidth;
|
||||
const scrollHeight = menuRef.scrollHeight;
|
||||
|
||||
const availableWidth = Math.max(0, viewportWidth - VIEWPORT_GUTTER * 2);
|
||||
const constrainedMaxWidth = Math.min(MENU_MAX_WIDTH, availableWidth || MENU_MAX_WIDTH);
|
||||
const safeMaxWidth =
|
||||
constrainedMaxWidth > 0 ? constrainedMaxWidth : Math.min(MENU_MAX_WIDTH, viewportWidth);
|
||||
const desiredMinWidth = Math.min(160, safeMaxWidth || 160);
|
||||
|
||||
let width = Math.min(
|
||||
Math.max(triggerRect.width, scrollWidth, desiredMinWidth),
|
||||
safeMaxWidth || 320
|
||||
);
|
||||
|
||||
const availableBelow = Math.max(
|
||||
0,
|
||||
viewportHeight - VIEWPORT_GUTTER - triggerRect.bottom - MENU_OFFSET
|
||||
);
|
||||
const availableAbove = Math.max(0, triggerRect.top - VIEWPORT_GUTTER - MENU_OFFSET);
|
||||
const viewportAllowance = Math.max(0, viewportHeight - VIEWPORT_GUTTER * 2);
|
||||
const fallbackAllowance = Math.max(1, viewportAllowance > 0 ? viewportAllowance : scrollHeight);
|
||||
|
||||
function computePlacement(placement: 'top' | 'bottom') {
|
||||
const available = placement === 'bottom' ? availableBelow : availableAbove;
|
||||
const allowedHeight =
|
||||
available > 0 ? Math.min(available, fallbackAllowance) : fallbackAllowance;
|
||||
const maxHeight = Math.min(scrollHeight, allowedHeight);
|
||||
const height = Math.max(0, maxHeight);
|
||||
|
||||
let top: number;
|
||||
if (placement === 'bottom') {
|
||||
const rawTop = triggerRect.bottom + MENU_OFFSET;
|
||||
const minTop = VIEWPORT_GUTTER;
|
||||
const maxTop = viewportHeight - VIEWPORT_GUTTER - height;
|
||||
if (maxTop < minTop) {
|
||||
top = minTop;
|
||||
} else {
|
||||
top = Math.min(Math.max(rawTop, minTop), maxTop);
|
||||
}
|
||||
} else {
|
||||
const rawTop = triggerRect.top - MENU_OFFSET - height;
|
||||
const minTop = VIEWPORT_GUTTER;
|
||||
const maxTop = viewportHeight - VIEWPORT_GUTTER - height;
|
||||
if (maxTop < minTop) {
|
||||
top = minTop;
|
||||
} else {
|
||||
top = Math.max(Math.min(rawTop, maxTop), minTop);
|
||||
}
|
||||
}
|
||||
|
||||
return { placement, top, height, maxHeight };
|
||||
}
|
||||
|
||||
const belowMetrics = computePlacement('bottom');
|
||||
const aboveMetrics = computePlacement('top');
|
||||
|
||||
let metrics = belowMetrics;
|
||||
if (scrollHeight > belowMetrics.maxHeight && aboveMetrics.maxHeight > belowMetrics.maxHeight) {
|
||||
metrics = aboveMetrics;
|
||||
}
|
||||
|
||||
let left = triggerRect.right - width;
|
||||
const maxLeft = viewportWidth - VIEWPORT_GUTTER - width;
|
||||
if (maxLeft < VIEWPORT_GUTTER) {
|
||||
left = VIEWPORT_GUTTER;
|
||||
} else {
|
||||
if (left > maxLeft) {
|
||||
left = maxLeft;
|
||||
}
|
||||
if (left < VIEWPORT_GUTTER) {
|
||||
left = VIEWPORT_GUTTER;
|
||||
}
|
||||
}
|
||||
|
||||
menuPosition = {
|
||||
top: Math.round(metrics.top),
|
||||
left: Math.round(left),
|
||||
width: Math.round(width),
|
||||
placement: metrics.placement,
|
||||
maxHeight: Math.round(metrics.maxHeight)
|
||||
};
|
||||
}
|
||||
|
||||
async function handleSelect(modelId: string) {
|
||||
const option = options.find((opt) => opt.id === modelId);
|
||||
if (!option) return;
|
||||
|
||||
let shouldCloseMenu = true;
|
||||
|
||||
if (onModelChange) {
|
||||
// If callback provided, use it (for regenerate functionality)
|
||||
const result = await onModelChange(option.id, option.model);
|
||||
|
||||
// If callback returns false, keep menu open (validation failed)
|
||||
if (result === false) {
|
||||
shouldCloseMenu = false;
|
||||
}
|
||||
} else {
|
||||
// Update global selection
|
||||
await modelsStore.selectModelById(option.id);
|
||||
|
||||
// Load the model if not already loaded (router mode)
|
||||
if (isRouter && getModelStatus(option.model) !== ServerModelStatus.LOADED) {
|
||||
try {
|
||||
await modelsStore.loadModel(option.model);
|
||||
} catch (error) {
|
||||
console.error('Failed to load model:', error);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (shouldCloseMenu) {
|
||||
closeMenu();
|
||||
}
|
||||
}
|
||||
|
||||
function getDisplayOption(): ModelOption | undefined {
|
||||
if (!isRouter) {
|
||||
if (serverModel) {
|
||||
return {
|
||||
id: 'current',
|
||||
model: serverModel,
|
||||
name: serverModel.split('/').pop() || serverModel,
|
||||
capabilities: [] // Empty array for single model mode
|
||||
};
|
||||
}
|
||||
|
||||
return undefined;
|
||||
}
|
||||
|
||||
// When useGlobalSelection is true (form selector), prioritize user selection
|
||||
// Otherwise (message display), prioritize currentModel
|
||||
if (useGlobalSelection && activeId) {
|
||||
const selected = options.find((option) => option.id === activeId);
|
||||
if (selected) return selected;
|
||||
}
|
||||
|
||||
// Show currentModel (from message payload or conversation)
|
||||
if (currentModel) {
|
||||
if (!isCurrentModelInCache()) {
|
||||
return {
|
||||
id: 'not-in-cache',
|
||||
model: currentModel,
|
||||
name: currentModel.split('/').pop() || currentModel,
|
||||
capabilities: []
|
||||
};
|
||||
}
|
||||
|
||||
return options.find((option) => option.model === currentModel);
|
||||
}
|
||||
|
||||
// Fallback to user selection (for new chats before first message)
|
||||
if (activeId) {
|
||||
return options.find((option) => option.id === activeId);
|
||||
}
|
||||
|
||||
// No selection - return undefined to show "Select model"
|
||||
return undefined;
|
||||
}
|
||||
</script>
|
||||
|
||||
<svelte:window onresize={handleResize} />
|
||||
<svelte:document onpointerdown={handlePointerDown} onkeydown={handleKeydown} />
|
||||
|
||||
<div class={cn('relative inline-flex flex-col items-end gap-1', className)} bind:this={container}>
|
||||
{#if loading && options.length === 0 && isRouter}
|
||||
<div class="flex items-center gap-2 text-xs text-muted-foreground">
|
||||
<Loader2 class="h-3.5 w-3.5 animate-spin" />
|
||||
Loading models…
|
||||
</div>
|
||||
{:else if options.length === 0 && isRouter}
|
||||
<p class="text-xs text-muted-foreground">No models available.</p>
|
||||
{:else}
|
||||
{@const selectedOption = getDisplayOption()}
|
||||
|
||||
<div class="relative">
|
||||
<button
|
||||
type="button"
|
||||
class={cn(
|
||||
`inline-flex cursor-pointer items-center gap-1.5 rounded-sm bg-muted-foreground/10 px-1.5 py-1 text-xs transition hover:text-foreground focus:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-60`,
|
||||
!isCurrentModelInCache()
|
||||
? 'bg-red-400/10 !text-red-400 hover:bg-red-400/20 hover:text-red-400'
|
||||
: forceForegroundText
|
||||
? 'text-foreground'
|
||||
: isHighlightedCurrentModelActive
|
||||
? 'text-foreground'
|
||||
: 'text-muted-foreground',
|
||||
isOpen ? 'text-foreground' : '',
|
||||
className
|
||||
)}
|
||||
style="max-width: min(calc(100cqw - 6.5rem), 32rem)"
|
||||
aria-haspopup={isRouter ? 'listbox' : undefined}
|
||||
aria-expanded={isRouter ? isOpen : undefined}
|
||||
onclick={toggleOpen}
|
||||
bind:this={triggerButton}
|
||||
disabled={disabled || updating}
|
||||
>
|
||||
<Package class="h-3.5 w-3.5" />
|
||||
|
||||
<span class="truncate font-medium">
|
||||
{selectedOption?.model || 'Select model'}
|
||||
</span>
|
||||
|
||||
{#if updating}
|
||||
<Loader2 class="h-3 w-3.5 animate-spin" />
|
||||
{:else if isRouter}
|
||||
<ChevronDown class="h-3 w-3.5" />
|
||||
{/if}
|
||||
</button>
|
||||
|
||||
{#if isOpen && isRouter}
|
||||
<div
|
||||
bind:this={menuRef}
|
||||
use:portalToBody
|
||||
class={cn(
|
||||
'fixed z-[1000] overflow-hidden rounded-md border bg-popover shadow-lg transition-opacity',
|
||||
menuPosition ? 'opacity-100' : 'pointer-events-none opacity-0'
|
||||
)}
|
||||
role="listbox"
|
||||
style:top={menuPosition ? `${menuPosition.top}px` : undefined}
|
||||
style:left={menuPosition ? `${menuPosition.left}px` : undefined}
|
||||
style:width={menuPosition ? `${menuPosition.width}px` : undefined}
|
||||
data-placement={menuPosition?.placement ?? 'bottom'}
|
||||
>
|
||||
<div
|
||||
class="overflow-y-auto py-1"
|
||||
style:max-height={menuPosition && menuPosition.maxHeight > 0
|
||||
? `${menuPosition.maxHeight}px`
|
||||
: undefined}
|
||||
>
|
||||
{#if !isCurrentModelInCache() && currentModel}
|
||||
<!-- Show unavailable model as first option (disabled) -->
|
||||
<button
|
||||
type="button"
|
||||
class="flex w-full cursor-not-allowed items-center bg-red-400/10 px-3 py-2 text-left text-sm text-red-400"
|
||||
role="option"
|
||||
aria-selected="true"
|
||||
aria-disabled="true"
|
||||
disabled
|
||||
>
|
||||
<span class="truncate">{selectedOption?.name || currentModel}</span>
|
||||
<span class="ml-2 text-xs whitespace-nowrap opacity-70">(not available)</span>
|
||||
</button>
|
||||
<div class="my-1 h-px bg-border"></div>
|
||||
{/if}
|
||||
{#each options as option (option.id)}
|
||||
{@const status = getModelStatus(option.model)}
|
||||
{@const isLoaded = status === ServerModelStatus.LOADED}
|
||||
{@const isLoading = status === ServerModelStatus.LOADING}
|
||||
{@const isSelected = currentModel === option.model || activeId === option.id}
|
||||
{@const isCompatible = isModelCompatible(option)}
|
||||
{@const missingModalities = getMissingModalities(option)}
|
||||
<div
|
||||
class={cn(
|
||||
'group flex w-full items-center gap-2 px-3 py-2 text-left text-sm transition focus:outline-none',
|
||||
isCompatible
|
||||
? 'cursor-pointer hover:bg-muted focus:bg-muted'
|
||||
: 'cursor-not-allowed opacity-50',
|
||||
isSelected
|
||||
? 'bg-accent text-accent-foreground'
|
||||
: isCompatible
|
||||
? 'hover:bg-accent hover:text-accent-foreground'
|
||||
: '',
|
||||
isLoaded ? 'text-popover-foreground' : 'text-muted-foreground'
|
||||
)}
|
||||
role="option"
|
||||
aria-selected={isSelected}
|
||||
aria-disabled={!isCompatible}
|
||||
tabindex={isCompatible ? 0 : -1}
|
||||
onclick={() => isCompatible && handleSelect(option.id)}
|
||||
onkeydown={(e) => {
|
||||
if (isCompatible && (e.key === 'Enter' || e.key === ' ')) {
|
||||
e.preventDefault();
|
||||
handleSelect(option.id);
|
||||
}
|
||||
}}
|
||||
>
|
||||
<span class="min-w-0 flex-1 truncate">{option.model}</span>
|
||||
|
||||
{#if missingModalities}
|
||||
<span class="flex shrink-0 items-center gap-1 text-muted-foreground/70">
|
||||
{#if missingModalities.vision}
|
||||
<Tooltip.Root>
|
||||
<Tooltip.Trigger>
|
||||
<EyeOff class="h-3.5 w-3.5" />
|
||||
</Tooltip.Trigger>
|
||||
<Tooltip.Content class="z-[9999]">
|
||||
<p>No vision support</p>
|
||||
</Tooltip.Content>
|
||||
</Tooltip.Root>
|
||||
{/if}
|
||||
{#if missingModalities.audio}
|
||||
<Tooltip.Root>
|
||||
<Tooltip.Trigger>
|
||||
<MicOff class="h-3.5 w-3.5" />
|
||||
</Tooltip.Trigger>
|
||||
<Tooltip.Content class="z-[9999]">
|
||||
<p>No audio support</p>
|
||||
</Tooltip.Content>
|
||||
</Tooltip.Root>
|
||||
{/if}
|
||||
</span>
|
||||
{/if}
|
||||
|
||||
{#if isLoading}
|
||||
<Tooltip.Root>
|
||||
<Tooltip.Trigger>
|
||||
<Loader2 class="h-4 w-4 shrink-0 animate-spin text-muted-foreground" />
|
||||
</Tooltip.Trigger>
|
||||
<Tooltip.Content class="z-[9999]">
|
||||
<p>Loading model...</p>
|
||||
</Tooltip.Content>
|
||||
</Tooltip.Root>
|
||||
{:else if isLoaded}
|
||||
<Tooltip.Root>
|
||||
<Tooltip.Trigger>
|
||||
<button
|
||||
type="button"
|
||||
class="relative ml-2 flex h-4 w-4 shrink-0 items-center justify-center"
|
||||
onclick={(e) => {
|
||||
e.stopPropagation();
|
||||
modelsStore.unloadModel(option.model);
|
||||
}}
|
||||
>
|
||||
<span
|
||||
class="mr-2 h-2 w-2 rounded-full bg-green-500 transition-opacity group-hover:opacity-0"
|
||||
></span>
|
||||
<Power
|
||||
class="absolute mr-2 h-4 w-4 text-red-500 opacity-0 transition-opacity group-hover:opacity-100 hover:text-red-600"
|
||||
/>
|
||||
</button>
|
||||
</Tooltip.Trigger>
|
||||
<Tooltip.Content class="z-[9999]">
|
||||
<p>Unload model</p>
|
||||
</Tooltip.Content>
|
||||
</Tooltip.Root>
|
||||
{:else}
|
||||
<span class="mx-2 h-2 w-2 rounded-full bg-muted-foreground/50"></span>
|
||||
{/if}
|
||||
</div>
|
||||
{/each}
|
||||
</div>
|
||||
</div>
|
||||
{/if}
|
||||
</div>
|
||||
{/if}
|
||||
</div>
|
||||
|
||||
{#if showModelDialog && !isRouter}
|
||||
<DialogModelInformation bind:open={showModelDialog} />
|
||||
{/if}
|
||||
|
|
@ -5,7 +5,7 @@
|
|||
import { Input } from '$lib/components/ui/input';
|
||||
import Label from '$lib/components/ui/label/label.svelte';
|
||||
import { serverStore, serverLoading } from '$lib/stores/server.svelte';
|
||||
import { config, updateConfig } from '$lib/stores/settings.svelte';
|
||||
import { config, settingsStore } from '$lib/stores/settings.svelte';
|
||||
import { fade, fly, scale } from 'svelte/transition';
|
||||
|
||||
interface Props {
|
||||
|
|
@ -42,7 +42,7 @@
|
|||
if (onRetry) {
|
||||
onRetry();
|
||||
} else {
|
||||
serverStore.fetchServerProps();
|
||||
serverStore.fetch();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -61,7 +61,7 @@
|
|||
|
||||
try {
|
||||
// Update the API key in settings first
|
||||
updateConfig('apiKey', apiKeyInput.trim());
|
||||
settingsStore.updateConfig('apiKey', apiKeyInput.trim());
|
||||
|
||||
// Test the API key by making a real request to the server
|
||||
const response = await fetch('./props', {
|
||||
|
|
|
|||
|
|
@ -1,43 +0,0 @@
|
|||
<script lang="ts">
|
||||
import { Server, Eye, Mic } from '@lucide/svelte';
|
||||
import { Badge } from '$lib/components/ui/badge';
|
||||
import { serverStore } from '$lib/stores/server.svelte';
|
||||
|
||||
let modalities = $derived(serverStore.supportedModalities);
|
||||
let model = $derived(serverStore.modelName);
|
||||
let props = $derived(serverStore.serverProps);
|
||||
</script>
|
||||
|
||||
{#if props}
|
||||
<div class="flex flex-wrap items-center justify-center gap-4 text-sm text-muted-foreground">
|
||||
{#if model}
|
||||
<Badge variant="outline" class="text-xs">
|
||||
<Server class="mr-1 h-3 w-3" />
|
||||
|
||||
<span class="block max-w-[50vw] truncate">{model}</span>
|
||||
</Badge>
|
||||
{/if}
|
||||
|
||||
<div class="flex gap-4">
|
||||
{#if props.default_generation_settings.n_ctx}
|
||||
<Badge variant="secondary" class="text-xs">
|
||||
ctx: {props.default_generation_settings.n_ctx.toLocaleString()}
|
||||
</Badge>
|
||||
{/if}
|
||||
|
||||
{#if modalities.length > 0}
|
||||
{#each modalities as modality (modality)}
|
||||
<Badge variant="secondary" class="text-xs">
|
||||
{#if modality === 'vision'}
|
||||
<Eye class="mr-1 h-3 w-3" />
|
||||
{:else if modality === 'audio'}
|
||||
<Mic class="mr-1 h-3 w-3" />
|
||||
{/if}
|
||||
|
||||
{modality}
|
||||
</Badge>
|
||||
{/each}
|
||||
{/if}
|
||||
</div>
|
||||
</div>
|
||||
{/if}
|
||||
|
|
@ -2,7 +2,8 @@
|
|||
import { AlertTriangle, Server } from '@lucide/svelte';
|
||||
import { Badge } from '$lib/components/ui/badge';
|
||||
import { Button } from '$lib/components/ui/button';
|
||||
import { serverProps, serverLoading, serverError, modelName } from '$lib/stores/server.svelte';
|
||||
import { serverProps, serverLoading, serverError } from '$lib/stores/server.svelte';
|
||||
import { singleModelName } from '$lib/stores/models.svelte';
|
||||
|
||||
interface Props {
|
||||
class?: string;
|
||||
|
|
@ -13,7 +14,7 @@
|
|||
|
||||
let error = $derived(serverError());
|
||||
let loading = $derived(serverLoading());
|
||||
let model = $derived(modelName());
|
||||
let model = $derived(singleModelName());
|
||||
let serverData = $derived(serverProps());
|
||||
|
||||
function getStatusColor() {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,23 @@
|
|||
<script lang="ts">
|
||||
import type { HTMLAttributes } from 'svelte/elements';
|
||||
import { cn, type WithElementRef } from '$lib/components/ui/utils.js';
|
||||
|
||||
let {
|
||||
ref = $bindable(null),
|
||||
class: className,
|
||||
children,
|
||||
...restProps
|
||||
}: WithElementRef<HTMLAttributes<HTMLDivElement>> = $props();
|
||||
</script>
|
||||
|
||||
<div
|
||||
bind:this={ref}
|
||||
data-slot="alert-description"
|
||||
class={cn(
|
||||
'col-start-2 grid justify-items-start gap-1 text-sm text-muted-foreground [&_p]:leading-relaxed',
|
||||
className
|
||||
)}
|
||||
{...restProps}
|
||||
>
|
||||
{@render children?.()}
|
||||
</div>
|
||||
|
|
@ -0,0 +1,20 @@
|
|||
<script lang="ts">
|
||||
import type { HTMLAttributes } from 'svelte/elements';
|
||||
import { cn, type WithElementRef } from '$lib/components/ui/utils.js';
|
||||
|
||||
let {
|
||||
ref = $bindable(null),
|
||||
class: className,
|
||||
children,
|
||||
...restProps
|
||||
}: WithElementRef<HTMLAttributes<HTMLDivElement>> = $props();
|
||||
</script>
|
||||
|
||||
<div
|
||||
bind:this={ref}
|
||||
data-slot="alert-title"
|
||||
class={cn('col-start-2 line-clamp-1 min-h-4 font-medium tracking-tight', className)}
|
||||
{...restProps}
|
||||
>
|
||||
{@render children?.()}
|
||||
</div>
|
||||
|
|
@ -0,0 +1,44 @@
|
|||
<script lang="ts" module>
|
||||
import { type VariantProps, tv } from 'tailwind-variants';
|
||||
|
||||
export const alertVariants = tv({
|
||||
base: 'relative grid w-full grid-cols-[0_1fr] items-start gap-y-0.5 rounded-lg border px-4 py-3 text-sm has-[>svg]:grid-cols-[calc(var(--spacing)*4)_1fr] has-[>svg]:gap-x-3 [&>svg]:size-4 [&>svg]:translate-y-0.5 [&>svg]:text-current',
|
||||
variants: {
|
||||
variant: {
|
||||
default: 'bg-card text-card-foreground',
|
||||
destructive:
|
||||
'text-destructive bg-card *:data-[slot=alert-description]:text-destructive/90 [&>svg]:text-current'
|
||||
}
|
||||
},
|
||||
defaultVariants: {
|
||||
variant: 'default'
|
||||
}
|
||||
});
|
||||
|
||||
export type AlertVariant = VariantProps<typeof alertVariants>['variant'];
|
||||
</script>
|
||||
|
||||
<script lang="ts">
|
||||
import type { HTMLAttributes } from 'svelte/elements';
|
||||
import { cn, type WithElementRef } from '$lib/components/ui/utils.js';
|
||||
|
||||
let {
|
||||
ref = $bindable(null),
|
||||
class: className,
|
||||
variant = 'default',
|
||||
children,
|
||||
...restProps
|
||||
}: WithElementRef<HTMLAttributes<HTMLDivElement>> & {
|
||||
variant?: AlertVariant;
|
||||
} = $props();
|
||||
</script>
|
||||
|
||||
<div
|
||||
bind:this={ref}
|
||||
data-slot="alert"
|
||||
class={cn(alertVariants({ variant }), className)}
|
||||
{...restProps}
|
||||
role="alert"
|
||||
>
|
||||
{@render children?.()}
|
||||
</div>
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue