Merge remote-tracking branch 'upstream/master'
This commit is contained in:
commit
2abcdd6379
|
|
@ -49,7 +49,7 @@ RUN source /usr/local/Ascend/ascend-toolkit/set_env.sh --force \
|
|||
# -- Organize build artifacts for copying in later stages --
|
||||
# Create a lib directory to store all .so files
|
||||
RUN mkdir -p /app/lib && \
|
||||
find build -name "*.so" -exec cp {} /app/lib \;
|
||||
find build -name "*.so*" -exec cp -P {} /app/lib \;
|
||||
|
||||
# Create a full directory to store all executables and Python scripts
|
||||
RUN mkdir -p /app/full && \
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@ RUN if [ "$TARGETARCH" = "amd64" ] || [ "$TARGETARCH" = "arm64" ]; then \
|
|||
cmake --build build -j $(nproc)
|
||||
|
||||
RUN mkdir -p /app/lib && \
|
||||
find build -name "*.so" -exec cp {} /app/lib \;
|
||||
find build -name "*.so*" -exec cp -P {} /app/lib \;
|
||||
|
||||
RUN mkdir -p /app/full \
|
||||
&& cp build/bin/* /app/full \
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@ RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
|
|||
cmake --build build --config Release -j$(nproc)
|
||||
|
||||
RUN mkdir -p /app/lib && \
|
||||
find build -name "*.so" -exec cp {} /app/lib \;
|
||||
find build -name "*.so*" -exec cp -P {} /app/lib \;
|
||||
|
||||
RUN mkdir -p /app/full \
|
||||
&& cp build/bin/* /app/full \
|
||||
|
|
|
|||
|
|
@ -21,7 +21,7 @@ RUN if [ "${GGML_SYCL_F16}" = "ON" ]; then \
|
|||
cmake --build build --config Release -j$(nproc)
|
||||
|
||||
RUN mkdir -p /app/lib && \
|
||||
find build -name "*.so" -exec cp {} /app/lib \;
|
||||
find build -name "*.so*" -exec cp -P {} /app/lib \;
|
||||
|
||||
RUN mkdir -p /app/full \
|
||||
&& cp build/bin/* /app/full \
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@ RUN if [ "${MUSA_DOCKER_ARCH}" != "default" ]; then \
|
|||
cmake --build build --config Release -j$(nproc)
|
||||
|
||||
RUN mkdir -p /app/lib && \
|
||||
find build -name "*.so" -exec cp {} /app/lib \;
|
||||
find build -name "*.so*" -exec cp -P {} /app/lib \;
|
||||
|
||||
RUN mkdir -p /app/full \
|
||||
&& cp build/bin/* /app/full \
|
||||
|
|
|
|||
|
|
@ -45,7 +45,7 @@ RUN HIPCXX="$(hipconfig -l)/clang" HIP_PATH="$(hipconfig -R)" \
|
|||
&& cmake --build build --config Release -j$(nproc)
|
||||
|
||||
RUN mkdir -p /app/lib \
|
||||
&& find build -name "*.so" -exec cp {} /app/lib \;
|
||||
&& find build -name "*.so*" -exec cp -P {} /app/lib \;
|
||||
|
||||
RUN mkdir -p /app/full \
|
||||
&& cp build/bin/* /app/full \
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@ RUN cmake -B build -DGGML_NATIVE=OFF -DGGML_VULKAN=ON -DLLAMA_BUILD_TESTS=OFF -D
|
|||
cmake --build build --config Release -j$(nproc)
|
||||
|
||||
RUN mkdir -p /app/lib && \
|
||||
find build -name "*.so" -exec cp {} /app/lib \;
|
||||
find build -name "*.so*" -exec cp -P {} /app/lib \;
|
||||
|
||||
RUN mkdir -p /app/full \
|
||||
&& cp build/bin/* /app/full \
|
||||
|
|
|
|||
|
|
@ -0,0 +1,52 @@
|
|||
name: Check vendor
|
||||
|
||||
on:
|
||||
workflow_dispatch: # allows manual triggering
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
paths: [
|
||||
'vendor/**',
|
||||
'scripts/sync_vendor.py'
|
||||
]
|
||||
|
||||
pull_request:
|
||||
types: [opened, synchronize, reopened]
|
||||
paths: [
|
||||
'vendor/**',
|
||||
'scripts/sync_vendor.py'
|
||||
]
|
||||
|
||||
jobs:
|
||||
check-vendor:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Setup Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: '3.x'
|
||||
|
||||
- name: Run vendor sync
|
||||
run: |
|
||||
set -euo pipefail
|
||||
python3 scripts/sync_vendor.py
|
||||
|
||||
- name: Check for changes
|
||||
run: |
|
||||
set -euo pipefail
|
||||
# detect modified or untracked files
|
||||
changed=$(git status --porcelain --untracked-files=all || true)
|
||||
if [ -n "$changed" ]; then
|
||||
echo "Vendor sync modified files:"
|
||||
echo "$changed" | awk '{ print $2 }' | sed '/^$/d'
|
||||
echo "Failing because vendor files mismatch. Please update scripts/sync_vendor.py"
|
||||
exit 1
|
||||
else
|
||||
echo "Vendor files are up-to-date."
|
||||
fi
|
||||
|
|
@ -209,7 +209,7 @@ jobs:
|
|||
working-directory: tools/server/webui
|
||||
|
||||
- name: Run UI tests
|
||||
run: npm run test:ui
|
||||
run: npm run test:ui -- --testTimeout=60000
|
||||
working-directory: tools/server/webui
|
||||
|
||||
- name: Run E2E tests
|
||||
|
|
|
|||
|
|
@ -102,6 +102,7 @@ option(LLAMA_TOOLS_INSTALL "llama: install tools" ${LLAMA_TOOLS_INSTALL_
|
|||
|
||||
# 3rd party libs
|
||||
option(LLAMA_CURL "llama: use libcurl to download model from an URL" ON)
|
||||
option(LLAMA_HTTPLIB "llama: if libcurl is disabled, use httplib to download model from an URL" ON)
|
||||
option(LLAMA_OPENSSL "llama: use openssl to support HTTPS" OFF)
|
||||
option(LLAMA_LLGUIDANCE "llama-common: include LLGuidance library for structured output in common utils" OFF)
|
||||
|
||||
|
|
@ -210,7 +211,9 @@ endif()
|
|||
|
||||
if (LLAMA_BUILD_COMMON)
|
||||
add_subdirectory(common)
|
||||
add_subdirectory(vendor/cpp-httplib)
|
||||
if (LLAMA_HTTPLIB)
|
||||
add_subdirectory(vendor/cpp-httplib)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (LLAMA_BUILD_COMMON AND LLAMA_BUILD_TESTS AND NOT CMAKE_JS_VERSION)
|
||||
|
|
|
|||
|
|
@ -454,6 +454,8 @@ cmake -B build-visionos -G Xcode \
|
|||
-DCMAKE_C_FLAGS="-D_XOPEN_SOURCE=700 ${COMMON_C_FLAGS}" \
|
||||
-DCMAKE_CXX_FLAGS="-D_XOPEN_SOURCE=700 ${COMMON_CXX_FLAGS}" \
|
||||
-DLLAMA_CURL=OFF \
|
||||
-DLLAMA_HTTPLIB=OFF \
|
||||
-DLLAMA_BUILD_SERVER=OFF \
|
||||
-S .
|
||||
cmake --build build-visionos --config Release -- -quiet
|
||||
|
||||
|
|
@ -468,6 +470,8 @@ cmake -B build-visionos-sim -G Xcode \
|
|||
-DCMAKE_C_FLAGS="-D_XOPEN_SOURCE=700 ${COMMON_C_FLAGS}" \
|
||||
-DCMAKE_CXX_FLAGS="-D_XOPEN_SOURCE=700 ${COMMON_CXX_FLAGS}" \
|
||||
-DLLAMA_CURL=OFF \
|
||||
-DLLAMA_HTTPLIB=OFF \
|
||||
-DLLAMA_BUILD_SERVER=OFF \
|
||||
-S .
|
||||
cmake --build build-visionos-sim --config Release -- -quiet
|
||||
|
||||
|
|
|
|||
|
|
@ -91,47 +91,12 @@ if (LLAMA_CURL)
|
|||
target_compile_definitions(${TARGET} PUBLIC LLAMA_USE_CURL)
|
||||
include_directories(${CURL_INCLUDE_DIRS})
|
||||
set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} ${CURL_LIBRARIES})
|
||||
else()
|
||||
elseif (LLAMA_HTTPLIB)
|
||||
# otherwise, use cpp-httplib
|
||||
target_compile_definitions(${TARGET} PUBLIC LLAMA_USE_HTTPLIB)
|
||||
set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} cpp-httplib)
|
||||
endif()
|
||||
|
||||
if (LLAMA_OPENSSL)
|
||||
find_package(OpenSSL)
|
||||
if (OpenSSL_FOUND)
|
||||
include(CheckCSourceCompiles)
|
||||
set(SAVED_CMAKE_REQUIRED_INCLUDES ${CMAKE_REQUIRED_INCLUDES})
|
||||
set(CMAKE_REQUIRED_INCLUDES ${OPENSSL_INCLUDE_DIR})
|
||||
check_c_source_compiles("
|
||||
#include <openssl/opensslv.h>
|
||||
#if defined(OPENSSL_IS_BORINGSSL) || defined(LIBRESSL_VERSION_NUMBER)
|
||||
# if OPENSSL_VERSION_NUMBER < 0x1010107f
|
||||
# error bad version
|
||||
# endif
|
||||
#else
|
||||
# if OPENSSL_VERSION_NUMBER < 0x30000000L
|
||||
# error bad version
|
||||
# endif
|
||||
#endif
|
||||
int main() { return 0; }
|
||||
" OPENSSL_VERSION_SUPPORTED)
|
||||
set(CMAKE_REQUIRED_INCLUDES ${SAVED_CMAKE_REQUIRED_INCLUDES})
|
||||
if (OPENSSL_VERSION_SUPPORTED)
|
||||
message(STATUS "OpenSSL found: ${OPENSSL_VERSION}")
|
||||
target_compile_definitions(${TARGET} PUBLIC CPPHTTPLIB_OPENSSL_SUPPORT)
|
||||
target_link_libraries(${TARGET} PUBLIC OpenSSL::SSL OpenSSL::Crypto)
|
||||
if (APPLE AND CMAKE_SYSTEM_NAME STREQUAL "Darwin")
|
||||
target_compile_definitions(${TARGET} PUBLIC CPPHTTPLIB_USE_CERTS_FROM_MACOSX_KEYCHAIN)
|
||||
find_library(CORE_FOUNDATION_FRAMEWORK CoreFoundation REQUIRED)
|
||||
find_library(SECURITY_FRAMEWORK Security REQUIRED)
|
||||
target_link_libraries(${TARGET} PUBLIC ${CORE_FOUNDATION_FRAMEWORK} ${SECURITY_FRAMEWORK})
|
||||
endif()
|
||||
endif()
|
||||
else()
|
||||
message(STATUS "OpenSSL not found, SSL support disabled")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (LLAMA_LLGUIDANCE)
|
||||
include(ExternalProject)
|
||||
set(LLGUIDANCE_SRC ${CMAKE_BINARY_DIR}/llguidance/source)
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@
|
|||
#if defined(LLAMA_USE_CURL)
|
||||
#include <curl/curl.h>
|
||||
#include <curl/easy.h>
|
||||
#else
|
||||
#elif defined(LLAMA_USE_HTTPLIB)
|
||||
#include "http.h"
|
||||
#endif
|
||||
|
||||
|
|
@ -470,7 +470,7 @@ std::pair<long, std::vector<char>> common_remote_get_content(const std::string &
|
|||
return { res_code, std::move(res_buffer) };
|
||||
}
|
||||
|
||||
#else
|
||||
#elif defined(LLAMA_USE_HTTPLIB)
|
||||
|
||||
static bool is_output_a_tty() {
|
||||
#if defined(_WIN32)
|
||||
|
|
@ -716,6 +716,8 @@ std::pair<long, std::vector<char>> common_remote_get_content(const std::string
|
|||
|
||||
#endif // LLAMA_USE_CURL
|
||||
|
||||
#if defined(LLAMA_USE_CURL) || defined(LLAMA_USE_HTTPLIB)
|
||||
|
||||
static bool common_download_file_single(const std::string & url,
|
||||
const std::string & path,
|
||||
const std::string & bearer_token,
|
||||
|
|
@ -910,33 +912,6 @@ common_hf_file_res common_get_hf_file(const std::string & hf_repo_with_tag, cons
|
|||
return { hf_repo, ggufFile, mmprojFile };
|
||||
}
|
||||
|
||||
std::vector<common_cached_model_info> common_list_cached_models() {
|
||||
std::vector<common_cached_model_info> models;
|
||||
const std::string cache_dir = fs_get_cache_directory();
|
||||
const std::vector<common_file_info> files = fs_list_files(cache_dir);
|
||||
for (const auto & file : files) {
|
||||
if (string_starts_with(file.name, "manifest=") && string_ends_with(file.name, ".json")) {
|
||||
common_cached_model_info model_info;
|
||||
model_info.manifest_path = file.path;
|
||||
std::string fname = file.name;
|
||||
string_replace_all(fname, ".json", ""); // remove extension
|
||||
auto parts = string_split<std::string>(fname, '=');
|
||||
if (parts.size() == 4) {
|
||||
// expect format: manifest=<user>=<model>=<tag>=<other>
|
||||
model_info.user = parts[1];
|
||||
model_info.model = parts[2];
|
||||
model_info.tag = parts[3];
|
||||
} else {
|
||||
// invalid format
|
||||
continue;
|
||||
}
|
||||
model_info.size = 0; // TODO: get GGUF size, not manifest size
|
||||
models.push_back(model_info);
|
||||
}
|
||||
}
|
||||
return models;
|
||||
}
|
||||
|
||||
//
|
||||
// Docker registry functions
|
||||
//
|
||||
|
|
@ -1055,3 +1030,46 @@ std::string common_docker_resolve_model(const std::string & docker) {
|
|||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
common_hf_file_res common_get_hf_file(const std::string &, const std::string &, bool) {
|
||||
throw std::runtime_error("download functionality is not enabled in this build");
|
||||
}
|
||||
|
||||
bool common_download_model(const common_params_model &, const std::string &, bool) {
|
||||
throw std::runtime_error("download functionality is not enabled in this build");
|
||||
}
|
||||
|
||||
std::string common_docker_resolve_model(const std::string &) {
|
||||
throw std::runtime_error("download functionality is not enabled in this build");
|
||||
}
|
||||
|
||||
#endif // LLAMA_USE_CURL || LLAMA_USE_HTTPLIB
|
||||
|
||||
std::vector<common_cached_model_info> common_list_cached_models() {
|
||||
std::vector<common_cached_model_info> models;
|
||||
const std::string cache_dir = fs_get_cache_directory();
|
||||
const std::vector<common_file_info> files = fs_list_files(cache_dir);
|
||||
for (const auto & file : files) {
|
||||
if (string_starts_with(file.name, "manifest=") && string_ends_with(file.name, ".json")) {
|
||||
common_cached_model_info model_info;
|
||||
model_info.manifest_path = file.path;
|
||||
std::string fname = file.name;
|
||||
string_replace_all(fname, ".json", ""); // remove extension
|
||||
auto parts = string_split<std::string>(fname, '=');
|
||||
if (parts.size() == 4) {
|
||||
// expect format: manifest=<user>=<model>=<tag>=<other>
|
||||
model_info.user = parts[1];
|
||||
model_info.model = parts[2];
|
||||
model_info.tag = parts[3];
|
||||
} else {
|
||||
// invalid format
|
||||
continue;
|
||||
}
|
||||
model_info.size = 0; // TODO: get GGUF size, not manifest size
|
||||
models.push_back(model_info);
|
||||
}
|
||||
}
|
||||
return models;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -313,7 +313,12 @@ Converting the matmul weight format from ND to NZ to improve performance. Enable
|
|||
|
||||
### GGML_CANN_ACL_GRAPH
|
||||
|
||||
Operators are executed using ACL graph execution, rather than in op-by-op (eager) mode. Enabled by default.
|
||||
Operators are executed using ACL graph execution, rather than in op-by-op (eager) mode. Enabled by default. This option is only effective if `USE_ACL_GRAPH` was enabled at compilation time. To enable it, recompile using:
|
||||
|
||||
```sh
|
||||
cmake -B build -DGGML_CANN=on -DCMAKE_BUILD_TYPE=release -DUSE_ACL_GRAPH=ON
|
||||
cmake --build build --config release
|
||||
```
|
||||
|
||||
### GGML_CANN_GRAPH_CACHE_CAPACITY
|
||||
|
||||
|
|
|
|||
50
docs/ops.md
50
docs/ops.md
|
|
@ -18,17 +18,17 @@ Legend:
|
|||
| ACC | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ |
|
||||
| ADD | ❌ | ✅ | ✅ | ✅ | 🟡 | 🟡 | ✅ | ✅ | ❌ |
|
||||
| ADD1 | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ❌ | ❌ |
|
||||
| ADD_ID | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
|
||||
| ARANGE | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ |
|
||||
| ADD_ID | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ |
|
||||
| ARANGE | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ❌ | ❌ |
|
||||
| ARGMAX | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ |
|
||||
| ARGSORT | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ |
|
||||
| CEIL | ❌ | ❌ | ✅ | 🟡 | ❌ | ❌ | ✅ | ❌ | ❌ |
|
||||
| CEIL | ❌ | ❌ | ✅ | 🟡 | ❌ | ❌ | 🟡 | ❌ | ❌ |
|
||||
| CLAMP | ❌ | ✅ | ✅ | ✅ | 🟡 | 🟡 | ✅ | 🟡 | ❌ |
|
||||
| CONCAT | ❌ | ✅ | ✅ | 🟡 | ✅ | 🟡 | ✅ | ✅ | ❌ |
|
||||
| CONT | ❌ | 🟡 | ✅ | ✅ | ✅ | 🟡 | 🟡 | 🟡 | ❌ |
|
||||
| CONV_2D | ❌ | ❌ | ✅ | 🟡 | ❌ | ✅ | ❌ | ✅ | ❌ |
|
||||
| CONV_2D | ❌ | ❌ | ✅ | ✅ | ❌ | ✅ | ❌ | ✅ | ❌ |
|
||||
| CONV_2D_DW | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ |
|
||||
| CONV_3D | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
|
||||
| CONV_3D | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
|
||||
| CONV_TRANSPOSE_1D | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ |
|
||||
| CONV_TRANSPOSE_2D | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ |
|
||||
| COS | ❌ | ✅ | ✅ | ✅ | 🟡 | ❌ | ✅ | 🟡 | ❌ |
|
||||
|
|
@ -36,13 +36,16 @@ Legend:
|
|||
| CPY | ❌ | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | ❌ |
|
||||
| CROSS_ENTROPY_LOSS | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ |
|
||||
| CROSS_ENTROPY_LOSS_BACK | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ |
|
||||
| CUMSUM | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
|
||||
| DIAG_MASK_INF | ❌ | ✅ | ✅ | ✅ | 🟡 | 🟡 | ✅ | ✅ | ❌ |
|
||||
| DIV | ❌ | ✅ | ✅ | ✅ | 🟡 | 🟡 | ✅ | ✅ | ❌ |
|
||||
| DUP | ❌ | ✅ | ✅ | 🟡 | 🟡 | 🟡 | ✅ | 🟡 | ❌ |
|
||||
| ELU | ❌ | ✅ | ✅ | 🟡 | 🟡 | ❌ | 🟡 | ❌ | ❌ |
|
||||
| EXP | ❌ | ✅ | ✅ | 🟡 | 🟡 | ❌ | 🟡 | ❌ | ❌ |
|
||||
| EXPM1 | ❌ | ❌ | ✅ | 🟡 | ❌ | ❌ | ❌ | ❌ | ❌ |
|
||||
| FILL | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
|
||||
| FLASH_ATTN_EXT | ❌ | 🟡 | ✅ | 🟡 | 🟡 | ❌ | ❌ | 🟡 | ❌ |
|
||||
| FLOOR | ❌ | ❌ | ✅ | 🟡 | ❌ | ❌ | ✅ | ❌ | ❌ |
|
||||
| FLOOR | ❌ | ❌ | ✅ | 🟡 | ❌ | ❌ | 🟡 | ❌ | ❌ |
|
||||
| GATED_LINEAR_ATTN | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ✅ | ❌ | ❌ |
|
||||
| GEGLU | ❌ | ✅ | ✅ | ✅ | 🟡 | ✅ | ✅ | 🟡 | ❌ |
|
||||
| GEGLU_ERF | ❌ | ✅ | ✅ | ✅ | 🟡 | ✅ | ✅ | 🟡 | ❌ |
|
||||
|
|
@ -57,11 +60,11 @@ Legend:
|
|||
| HARDSIGMOID | ❌ | ✅ | ✅ | 🟡 | 🟡 | ❌ | 🟡 | ❌ | ❌ |
|
||||
| HARDSWISH | ❌ | ✅ | ✅ | 🟡 | 🟡 | ❌ | 🟡 | ❌ | ❌ |
|
||||
| IM2COL | ❌ | ✅ | ✅ | ✅ | 🟡 | ✅ | ✅ | ✅ | ❌ |
|
||||
| IM2COL_3D | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
|
||||
| IM2COL_3D | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ |
|
||||
| L2_NORM | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ |
|
||||
| LEAKY_RELU | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ |
|
||||
| LOG | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ❌ | ❌ |
|
||||
| MEAN | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ |
|
||||
| MEAN | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ❌ | ❌ |
|
||||
| MUL | ❌ | ✅ | ✅ | ✅ | 🟡 | 🟡 | ✅ | ✅ | ❌ |
|
||||
| MUL_MAT | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 |
|
||||
| MUL_MAT_ID | ❌ | 🟡 | ✅ | ✅ | ✅ | 🟡 | 🟡 | ✅ | ❌ |
|
||||
|
|
@ -69,26 +72,26 @@ Legend:
|
|||
| NORM | ❌ | ✅ | ✅ | ✅ | 🟡 | ✅ | ✅ | 🟡 | ❌ |
|
||||
| NORM_MUL_ADD | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ |
|
||||
| OPT_STEP_ADAMW | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ |
|
||||
| OPT_STEP_SGD | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
|
||||
| OPT_STEP_SGD | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ |
|
||||
| OUT_PROD | 🟡 | ❌ | 🟡 | 🟡 | ❌ | ❌ | 🟡 | ❌ | ❌ |
|
||||
| PAD | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | 🟡 | ✅ | ❌ |
|
||||
| PAD_REFLECT_1D | ❌ | ✅ | ✅ | ❌ | ✅ | ❌ | ✅ | ❌ | ❌ |
|
||||
| PAD | ❌ | ✅ | ✅ | 🟡 | ✅ | ✅ | 🟡 | ✅ | ❌ |
|
||||
| PAD_REFLECT_1D | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ❌ | ❌ |
|
||||
| POOL_2D | ❌ | 🟡 | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ |
|
||||
| REGLU | ❌ | ✅ | ✅ | ✅ | 🟡 | ✅ | ✅ | 🟡 | ❌ |
|
||||
| RELU | ❌ | ✅ | ✅ | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | ❌ |
|
||||
| REPEAT | ❌ | ✅ | ✅ | 🟡 | ✅ | 🟡 | ✅ | 🟡 | ❌ |
|
||||
| REPEAT_BACK | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ |
|
||||
| REPEAT_BACK | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | ❌ |
|
||||
| RMS_NORM | ❌ | ✅ | ✅ | ✅ | 🟡 | ✅ | ✅ | ✅ | ❌ |
|
||||
| RMS_NORM_BACK | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | ❌ |
|
||||
| RMS_NORM_MUL_ADD | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ |
|
||||
| ROLL | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ |
|
||||
| RMS_NORM_MUL_ADD | ❌ | ✅ | ❌ | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ |
|
||||
| ROLL | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | ❌ |
|
||||
| ROPE | ❌ | 🟡 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ |
|
||||
| ROPE_BACK | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ |
|
||||
| ROUND | ❌ | ❌ | ✅ | 🟡 | ❌ | ❌ | ✅ | ❌ | ❌ |
|
||||
| ROUND | ❌ | ❌ | ✅ | 🟡 | ❌ | ❌ | 🟡 | ❌ | ❌ |
|
||||
| RWKV_WKV6 | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ |
|
||||
| RWKV_WKV7 | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ |
|
||||
| SCALE | ❌ | 🟡 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ |
|
||||
| SET | ❌ | ❌ | ✅ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ |
|
||||
| SET | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | 🟡 | ❌ | ❌ |
|
||||
| SET_ROWS | ❌ | ❌ | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | ❌ |
|
||||
| SGN | ❌ | ✅ | ✅ | 🟡 | 🟡 | ❌ | 🟡 | ❌ | ❌ |
|
||||
| SIGMOID | ❌ | ✅ | ✅ | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | ❌ |
|
||||
|
|
@ -96,21 +99,24 @@ Legend:
|
|||
| SILU_BACK | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ |
|
||||
| SIN | ❌ | ✅ | ✅ | ✅ | 🟡 | ❌ | ✅ | 🟡 | ❌ |
|
||||
| SOFTCAP | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ |
|
||||
| SOFTPLUS | ❌ | ❌ | ✅ | 🟡 | ❌ | ❌ | ❌ | ❌ | ❌ |
|
||||
| SOFT_MAX | ❌ | 🟡 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ |
|
||||
| SOFT_MAX_BACK | ❌ | ❌ | 🟡 | 🟡 | ❌ | ❌ | 🟡 | ✅ | ❌ |
|
||||
| SOLVE_TRI | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
|
||||
| SQR | ❌ | ✅ | ✅ | ✅ | 🟡 | ❌ | ✅ | 🟡 | ❌ |
|
||||
| SQRT | ❌ | ✅ | ✅ | ✅ | 🟡 | ❌ | ✅ | ❌ | ❌ |
|
||||
| SSM_CONV | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ❌ |
|
||||
| SSM_CONV | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ |
|
||||
| SSM_SCAN | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ❌ |
|
||||
| STEP | ❌ | ✅ | ✅ | 🟡 | 🟡 | ❌ | 🟡 | ❌ | ❌ |
|
||||
| SUB | ❌ | ✅ | ✅ | ✅ | 🟡 | 🟡 | ✅ | ✅ | ❌ |
|
||||
| SUM | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | ❌ |
|
||||
| SUM_ROWS | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | 🟡 | ✅ | ❌ |
|
||||
| SUM | ❌ | ✅ | ✅ | 🟡 | ❌ | ❌ | 🟡 | ✅ | ❌ |
|
||||
| SUM_ROWS | ❌ | ✅ | ✅ | 🟡 | ✅ | ✅ | 🟡 | ✅ | ❌ |
|
||||
| SWIGLU | ❌ | ✅ | ✅ | ✅ | 🟡 | ✅ | ✅ | 🟡 | ❌ |
|
||||
| SWIGLU_OAI | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
|
||||
| SWIGLU_OAI | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ |
|
||||
| TANH | ❌ | ✅ | ✅ | 🟡 | 🟡 | ✅ | 🟡 | 🟡 | ❌ |
|
||||
| TIMESTEP_EMBEDDING | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ |
|
||||
| TOPK_MOE | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ |
|
||||
| TRUNC | ❌ | ❌ | ✅ | 🟡 | ❌ | ❌ | ✅ | ❌ | ❌ |
|
||||
| TRI | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
|
||||
| TRUNC | ❌ | ❌ | ✅ | 🟡 | ❌ | ❌ | 🟡 | ❌ | ❌ |
|
||||
| UPSCALE | ❌ | 🟡 | ✅ | ✅ | 🟡 | ✅ | 🟡 | ✅ | ❌ |
|
||||
| XIELU | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
|
||||
| XIELU | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
|
||||
|
|
|
|||
21200
docs/ops/CPU.csv
21200
docs/ops/CPU.csv
File diff suppressed because it is too large
Load Diff
23118
docs/ops/CUDA.csv
23118
docs/ops/CUDA.csv
File diff suppressed because it is too large
Load Diff
4693
docs/ops/SYCL.csv
4693
docs/ops/SYCL.csv
File diff suppressed because it is too large
Load Diff
|
|
@ -475,6 +475,7 @@ extern "C" {
|
|||
GGML_OP_COS,
|
||||
GGML_OP_SUM,
|
||||
GGML_OP_SUM_ROWS,
|
||||
GGML_OP_CUMSUM,
|
||||
GGML_OP_MEAN,
|
||||
GGML_OP_ARGMAX,
|
||||
GGML_OP_COUNT_EQUAL,
|
||||
|
|
@ -530,6 +531,8 @@ extern "C" {
|
|||
GGML_OP_TIMESTEP_EMBEDDING,
|
||||
GGML_OP_ARGSORT,
|
||||
GGML_OP_LEAKY_RELU,
|
||||
GGML_OP_TRI,
|
||||
GGML_OP_FILL,
|
||||
|
||||
GGML_OP_FLASH_ATTN_EXT,
|
||||
GGML_OP_FLASH_ATTN_BACK,
|
||||
|
|
@ -542,6 +545,7 @@ extern "C" {
|
|||
GGML_OP_RWKV_WKV6,
|
||||
GGML_OP_GATED_LINEAR_ATTN,
|
||||
GGML_OP_RWKV_WKV7,
|
||||
GGML_OP_SOLVE_TRI,
|
||||
|
||||
GGML_OP_UNARY,
|
||||
|
||||
|
|
@ -576,6 +580,8 @@ extern "C" {
|
|||
GGML_UNARY_OP_HARDSWISH,
|
||||
GGML_UNARY_OP_HARDSIGMOID,
|
||||
GGML_UNARY_OP_EXP,
|
||||
GGML_UNARY_OP_EXPM1,
|
||||
GGML_UNARY_OP_SOFTPLUS,
|
||||
GGML_UNARY_OP_GELU_ERF,
|
||||
GGML_UNARY_OP_XIELU,
|
||||
GGML_UNARY_OP_FLOOR,
|
||||
|
|
@ -620,6 +626,13 @@ extern "C" {
|
|||
GGML_TENSOR_FLAG_LOSS = 8, // ...defines loss for numerical optimization (multiple loss tensors add up)
|
||||
};
|
||||
|
||||
enum ggml_tri_type {
|
||||
GGML_TRI_TYPE_UPPER_DIAG = 0,
|
||||
GGML_TRI_TYPE_UPPER = 1,
|
||||
GGML_TRI_TYPE_LOWER_DIAG = 2,
|
||||
GGML_TRI_TYPE_LOWER = 3
|
||||
};
|
||||
|
||||
struct ggml_init_params {
|
||||
// memory pool
|
||||
size_t mem_size; // bytes
|
||||
|
|
@ -957,6 +970,22 @@ extern "C" {
|
|||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_expm1(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_expm1_inplace(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_softplus(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_softplus_inplace(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_sin(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a);
|
||||
|
|
@ -983,6 +1012,10 @@ extern "C" {
|
|||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_cumsum(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a);
|
||||
|
||||
// mean along rows
|
||||
GGML_API struct ggml_tensor * ggml_mean(
|
||||
struct ggml_context * ctx,
|
||||
|
|
@ -2187,6 +2220,23 @@ extern "C" {
|
|||
int shift2,
|
||||
int shift3);
|
||||
|
||||
// Convert matrix into a triangular one (upper, strict upper, lower or strict lower) by writing
|
||||
// zeroes everywhere outside the masked area
|
||||
GGML_API struct ggml_tensor * ggml_tri(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
enum ggml_tri_type type);
|
||||
|
||||
// Fill tensor a with constant c
|
||||
GGML_API struct ggml_tensor * ggml_fill(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
float c);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_fill_inplace(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
float c);
|
||||
|
||||
// Ref: https://github.com/CompVis/stable-diffusion/blob/main/ldm/modules/diffusionmodules/util.py#L151
|
||||
// timesteps: [N,]
|
||||
|
|
@ -2356,6 +2406,27 @@ extern "C" {
|
|||
struct ggml_tensor * b,
|
||||
struct ggml_tensor * state);
|
||||
|
||||
/* Solves a specific equation of the form Ax=B, where A is a triangular matrix
|
||||
* without zeroes on the diagonal (i.e. invertible).
|
||||
* B can have any number of columns, but must have the same number of rows as A
|
||||
* If A is [n, n] and B is [n, m], then the result will be [n, m] as well
|
||||
* Has O(n^3) complexity (unlike most matrix ops out there), so use on cases
|
||||
* where n > 100 sparingly, pre-chunk if necessary.
|
||||
*
|
||||
* If left = false, solves xA=B instead
|
||||
* If lower = false, assumes upper triangular instead
|
||||
* If uni = true, assumes diagonal of A to be all ones (will override actual values)
|
||||
*
|
||||
* TODO: currently only lower, right, non-unitriangular variant is implemented
|
||||
*/
|
||||
GGML_API struct ggml_tensor * ggml_solve_tri(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
bool left,
|
||||
bool lower,
|
||||
bool uni);
|
||||
|
||||
// custom operators
|
||||
|
||||
typedef void (*ggml_custom1_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, int ith, int nth, void * userdata);
|
||||
|
|
|
|||
|
|
@ -1698,8 +1698,6 @@ bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph *
|
|||
GGML_ASSERT(sched);
|
||||
GGML_ASSERT((int)sched->hash_set.size >= measure_graph->n_nodes + measure_graph->n_leafs);
|
||||
|
||||
ggml_backend_sched_reset(sched);
|
||||
|
||||
ggml_backend_sched_synchronize(sched);
|
||||
|
||||
ggml_backend_sched_split_graph(sched, measure_graph);
|
||||
|
|
|
|||
|
|
@ -448,6 +448,121 @@ void ggml_cann_norm(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
|
|||
ggml_cann_release_resources(ctx, norm, acl_src, acl_dst);
|
||||
}
|
||||
|
||||
void ggml_cann_l2_norm(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
|
||||
ggml_tensor * src = dst->src[0];
|
||||
|
||||
aclTensor * acl_src = ggml_cann_create_tensor(src);
|
||||
aclTensor * acl_dst = ggml_cann_create_tensor(dst);
|
||||
|
||||
size_t type_size = ggml_type_size(src->type);
|
||||
int64_t n_bytes = src->ne[3]* src->ne[2]* src->ne[1]* type_size;
|
||||
ggml_cann_pool_alloc temp_buffer_allocator(ctx.pool(), n_bytes);
|
||||
void * buffer = temp_buffer_allocator.get();
|
||||
|
||||
int64_t div_ne[] = {1, src->ne[1], src->ne[2], src->ne[3]};
|
||||
size_t div_nb[GGML_MAX_DIMS];
|
||||
div_nb[0] = sizeof(float);
|
||||
for (int i = 1; i < GGML_MAX_DIMS; ++i) {
|
||||
div_nb[i] = div_nb[i - 1] * div_ne[i - 1];
|
||||
}
|
||||
aclTensor * acl_div = ggml_cann_create_tensor(buffer, ACL_FLOAT, type_size, div_ne, div_nb, GGML_MAX_DIMS);
|
||||
|
||||
std::vector<int64_t> norm_dims = { 3 };
|
||||
aclIntArray * dims_array = aclCreateIntArray(norm_dims.data(), norm_dims.size());
|
||||
|
||||
float p_value = 2.0f;
|
||||
aclScalar * p_scalar = aclCreateScalar(&p_value, aclDataType::ACL_FLOAT);
|
||||
GGML_CANN_CALL_ACLNN_OP(ctx, Norm, acl_src, p_scalar, dims_array, true, acl_div);
|
||||
GGML_CANN_CALL_ACLNN_OP(ctx, Div, acl_src, acl_div, acl_dst);
|
||||
ggml_cann_release_resources(ctx, dims_array, p_scalar, acl_src, acl_dst, acl_div);
|
||||
}
|
||||
|
||||
void ggml_cann_cross_entropy_loss(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
|
||||
ggml_tensor * src0 = dst->src[0];
|
||||
ggml_tensor * src1 = dst->src[1];
|
||||
|
||||
const int64_t nc = src0->ne[0];
|
||||
const int64_t nr = ggml_nrows(src0);
|
||||
|
||||
int64_t logits_ne[] = {nc, nr};
|
||||
size_t logits_nb[2];
|
||||
logits_nb[0] = ggml_type_size(src0->type);
|
||||
logits_nb[1] = logits_nb[0] * logits_ne[0];
|
||||
aclTensor * acl_logits = ggml_cann_create_tensor(src0->data, ACL_FLOAT, sizeof(float), logits_ne, logits_nb, 2);
|
||||
|
||||
size_t log_softmax_type_size = sizeof(float);
|
||||
int64_t log_softmax_n_bytes = nr * nc * log_softmax_type_size;
|
||||
ggml_cann_pool_alloc log_softmax_allocator(ctx.pool(), log_softmax_n_bytes);
|
||||
void * log_softmax_buffer = log_softmax_allocator.get();
|
||||
|
||||
int64_t log_softmax_ne[] = {nc, nr};
|
||||
size_t log_softmax_nb[2];
|
||||
log_softmax_nb[0] = log_softmax_type_size;
|
||||
log_softmax_nb[1] = log_softmax_nb[0] * log_softmax_ne[0];
|
||||
aclTensor * acl_log_softmax = ggml_cann_create_tensor(log_softmax_buffer, ACL_FLOAT, log_softmax_type_size, log_softmax_ne, log_softmax_nb, 2);
|
||||
|
||||
GGML_CANN_CALL_ACLNN_OP(ctx, LogSoftmax, acl_logits, 1, acl_log_softmax);
|
||||
|
||||
int64_t labels_ne[] = {nc, nr};
|
||||
size_t labels_nb[2];
|
||||
labels_nb[0] = ggml_type_size(src1->type);
|
||||
labels_nb[1] = labels_nb[0] * labels_ne[0];
|
||||
aclTensor * acl_labels = ggml_cann_create_tensor(src1->data, ACL_FLOAT, sizeof(float), labels_ne, labels_nb, 2);
|
||||
|
||||
size_t mul_type_size = sizeof(float);
|
||||
int64_t mul_n_bytes = nr * nc * mul_type_size;
|
||||
ggml_cann_pool_alloc mul_allocator(ctx.pool(), mul_n_bytes);
|
||||
void * mul_buffer = mul_allocator.get();
|
||||
|
||||
int64_t mul_ne[] = {nc, nr};
|
||||
size_t mul_nb[2];
|
||||
mul_nb[0] = mul_type_size;
|
||||
mul_nb[1] = mul_nb[0] * mul_ne[0];
|
||||
aclTensor * acl_mul_result = ggml_cann_create_tensor(mul_buffer, ACL_FLOAT, mul_type_size, mul_ne, mul_nb, 2);
|
||||
|
||||
GGML_CANN_CALL_ACLNN_OP(ctx, Mul, acl_log_softmax, acl_labels, acl_mul_result);
|
||||
|
||||
size_t sum_per_sample_type_size = sizeof(float);
|
||||
int64_t sum_per_sample_n_bytes = nr * sum_per_sample_type_size;
|
||||
ggml_cann_pool_alloc sum_per_sample_allocator(ctx.pool(), sum_per_sample_n_bytes);
|
||||
void * sum_per_sample_buffer = sum_per_sample_allocator.get();
|
||||
|
||||
int64_t sum_per_sample_ne[] = {nr};
|
||||
size_t sum_per_sample_nb[1];
|
||||
sum_per_sample_nb[0] = sum_per_sample_type_size;
|
||||
aclTensor * acl_sum_per_sample = ggml_cann_create_tensor(sum_per_sample_buffer, ACL_FLOAT, sum_per_sample_type_size, sum_per_sample_ne, sum_per_sample_nb, 1);
|
||||
|
||||
std::vector<int64_t> sum_dims = {1};
|
||||
aclIntArray * dims_array = aclCreateIntArray(sum_dims.data(), sum_dims.size());
|
||||
bool keep_dims = false;
|
||||
|
||||
GGML_CANN_CALL_ACLNN_OP(ctx, ReduceSum, acl_mul_result, dims_array, keep_dims, ACL_FLOAT, acl_sum_per_sample);
|
||||
|
||||
size_t total_sum_type_size = sizeof(float);
|
||||
int64_t total_sum_n_bytes = 1 * total_sum_type_size;
|
||||
ggml_cann_pool_alloc total_sum_allocator(ctx.pool(), total_sum_n_bytes);
|
||||
void * total_sum_buffer = total_sum_allocator.get();
|
||||
|
||||
int64_t total_sum_ne[] = {1};
|
||||
size_t total_sum_nb[1];
|
||||
total_sum_nb[0] = total_sum_type_size;
|
||||
|
||||
aclTensor * acl_total_sum = ggml_cann_create_tensor(total_sum_buffer, ACL_FLOAT, total_sum_type_size, total_sum_ne, total_sum_nb, 1);
|
||||
|
||||
std::vector<int64_t> total_sum_dims = {0};
|
||||
aclIntArray * total_sum_dims_array = aclCreateIntArray(total_sum_dims.data(), total_sum_dims.size());
|
||||
|
||||
GGML_CANN_CALL_ACLNN_OP(ctx, ReduceSum, acl_sum_per_sample, total_sum_dims_array, keep_dims, ACL_FLOAT, acl_total_sum);
|
||||
|
||||
float value = -1.0f / static_cast<float>(nr);
|
||||
aclScalar * scale_factor = aclCreateScalar(&value, aclDataType::ACL_FLOAT);
|
||||
aclTensor * acl_dst = ggml_cann_create_tensor(dst->data, ACL_FLOAT, sizeof(float), total_sum_ne, total_sum_nb, 1);
|
||||
|
||||
GGML_CANN_CALL_ACLNN_OP(ctx, Muls, acl_total_sum, scale_factor, acl_dst);
|
||||
|
||||
ggml_cann_release_resources(ctx, acl_logits, acl_log_softmax, acl_labels, acl_mul_result, acl_sum_per_sample, acl_total_sum, acl_dst, scale_factor, dims_array, total_sum_dims_array);
|
||||
}
|
||||
|
||||
void ggml_cann_group_norm(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
|
||||
ggml_tensor * src = dst->src[0];
|
||||
|
||||
|
|
|
|||
|
|
@ -46,6 +46,8 @@
|
|||
#include <aclnnop/aclnn_cos.h>
|
||||
#include <aclnnop/aclnn_log.h>
|
||||
#include <aclnnop/aclnn_sign.h>
|
||||
#include <aclnnop/aclnn_norm.h>
|
||||
#include <aclnnop/aclnn_logsoftmax.h>
|
||||
#include "acl_tensor.h"
|
||||
#include "common.h"
|
||||
|
||||
|
|
@ -187,6 +189,66 @@ void ggml_cann_argsort(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
|||
*/
|
||||
void ggml_cann_norm(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
||||
|
||||
/**
|
||||
* @brief Computes the L2 Normalization for a ggml tensor using the CANN
|
||||
* backend.
|
||||
*
|
||||
* @details This function applies the L2 Normalization operation on the
|
||||
* input tensor `src` and stores the result in the destination tensor
|
||||
* `dst`. L2 Normalization scales the input tensor such that the
|
||||
* L2 norm along the specified dimension equals 1. This operation
|
||||
* is commonly used in neural networks for feature normalization
|
||||
* and vector scaling.
|
||||
* The operation is defined as:
|
||||
* \f[
|
||||
* \text{out} = \frac{x}{\sqrt{\sum{x^2}}}
|
||||
* \f]
|
||||
* The normalization is performed along the last dimension by default.
|
||||
*
|
||||
* @param ctx The CANN context used for operations.
|
||||
* @param dst The destination tensor where the normalized values will be stored.
|
||||
* @attention The normalization is performed along the last dimension of the
|
||||
* input tensor by default.
|
||||
*/
|
||||
void ggml_cann_l2_norm(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
||||
|
||||
/**
|
||||
* @brief Computes the Cross Entropy Loss for a ggml tensor using the CANN
|
||||
* backend.
|
||||
*
|
||||
* @details This function computes the cross entropy loss between the predicted
|
||||
* logits and target probability distributions. The operation follows
|
||||
* the same computation pattern as the CPU implementation:
|
||||
* 1. Applies log_softmax to the logits along the class dimension
|
||||
* 2. Element-wise multiplication with target distributions
|
||||
* 3. Summation along the class dimension to get per-sample losses
|
||||
* 4. Global summation and scaling by -1/nr to get final loss
|
||||
*
|
||||
* The computation can be expressed as:
|
||||
* \f[
|
||||
* \text{loss} = -\frac{1}{N} \sum_{i=1}^{N} \sum_{j=1}^{C} y_{ij} \cdot \log(\text{softmax}(x_{ij}))
|
||||
* \f]
|
||||
* where \f$N\f$ is the total number of samples, \f$C\f$ is the number
|
||||
* of classes, \f$x\f$ are the logits, and \f$y\f$ are the target
|
||||
* probability distributions.
|
||||
*
|
||||
* @param ctx The CANN context used for operations.
|
||||
* @param dst The destination tensor where the computed loss will be stored.
|
||||
* This should be a scalar tensor containing the final loss value.
|
||||
*
|
||||
* @note This implementation computes cross entropy between probability
|
||||
* distributions, not the typical classification cross entropy that
|
||||
* expects class indices as targets. Both input tensors (src0 and src1)
|
||||
* should have the same shape and represent probability distributions
|
||||
* over the class dimension.
|
||||
* @note The function expects two source tensors:
|
||||
* - dst->src[0]: Logits tensor (before softmax)
|
||||
* - dst->src[1]: Target probability distributions tensor
|
||||
* @note The computation is performed using CANN backend operators including
|
||||
* LogSoftmax, Mul, ReduceSum, and Muls for the final scaling.
|
||||
*/
|
||||
void ggml_cann_cross_entropy_loss(ggml_backend_cann_context & ctx, ggml_tensor * dst);
|
||||
|
||||
/**
|
||||
* @brief Computes the Group Normalization for a ggml tensor using the CANN
|
||||
* backend.
|
||||
|
|
|
|||
|
|
@ -1777,6 +1777,12 @@ static bool ggml_cann_compute_forward(ggml_backend_cann_context & ctx, struct gg
|
|||
case GGML_OP_GROUP_NORM:
|
||||
ggml_cann_group_norm(ctx, dst);
|
||||
break;
|
||||
case GGML_OP_L2_NORM:
|
||||
ggml_cann_l2_norm(ctx, dst);
|
||||
break;
|
||||
case GGML_OP_CROSS_ENTROPY_LOSS:
|
||||
ggml_cann_cross_entropy_loss(ctx, dst);
|
||||
break;
|
||||
case GGML_OP_CONCAT:
|
||||
ggml_cann_concat(ctx, dst);
|
||||
break;
|
||||
|
|
@ -2515,6 +2521,8 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev, const ggml_ten
|
|||
// value of paddingW should be at most half of kernelW
|
||||
return (p0 <= (k0 / 2)) && (p1 <= (k1 / 2));
|
||||
}
|
||||
case GGML_OP_L2_NORM:
|
||||
case GGML_OP_CROSS_ENTROPY_LOSS:
|
||||
case GGML_OP_DUP:
|
||||
case GGML_OP_SUM:
|
||||
case GGML_OP_IM2COL:
|
||||
|
|
|
|||
|
|
@ -1731,6 +1731,10 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|||
{
|
||||
ggml_compute_forward_sum_rows(params, tensor);
|
||||
} break;
|
||||
case GGML_OP_CUMSUM:
|
||||
{
|
||||
ggml_compute_forward_cumsum(params, tensor);
|
||||
} break;
|
||||
case GGML_OP_MEAN:
|
||||
{
|
||||
ggml_compute_forward_mean(params, tensor);
|
||||
|
|
@ -1927,6 +1931,14 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|||
{
|
||||
ggml_compute_forward_leaky_relu(params, tensor);
|
||||
} break;
|
||||
case GGML_OP_TRI:
|
||||
{
|
||||
ggml_compute_forward_tri(params, tensor);
|
||||
} break;
|
||||
case GGML_OP_FILL:
|
||||
{
|
||||
ggml_compute_forward_fill(params, tensor);
|
||||
} break;
|
||||
case GGML_OP_FLASH_ATTN_EXT:
|
||||
{
|
||||
ggml_compute_forward_flash_attn_ext(params, tensor);
|
||||
|
|
@ -1982,6 +1994,10 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|||
{
|
||||
ggml_compute_forward_rwkv_wkv7(params, tensor);
|
||||
} break;
|
||||
case GGML_OP_SOLVE_TRI:
|
||||
{
|
||||
ggml_compute_forward_solve_tri(params, tensor);
|
||||
} break;
|
||||
case GGML_OP_MAP_CUSTOM1:
|
||||
{
|
||||
ggml_compute_forward_map_custom1(params, tensor);
|
||||
|
|
@ -2140,6 +2156,9 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
|
|||
case GGML_OP_ADD_ID:
|
||||
case GGML_OP_ADD1:
|
||||
case GGML_OP_ACC:
|
||||
case GGML_OP_CUMSUM:
|
||||
case GGML_OP_TRI:
|
||||
case GGML_OP_FILL:
|
||||
{
|
||||
n_tasks = n_threads;
|
||||
} break;
|
||||
|
|
@ -2157,6 +2176,7 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
|
|||
n_tasks = 1;
|
||||
} break;
|
||||
case GGML_OP_COUNT_EQUAL:
|
||||
case GGML_OP_SOLVE_TRI:
|
||||
{
|
||||
n_tasks = n_threads;
|
||||
} break;
|
||||
|
|
@ -2179,6 +2199,8 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
|
|||
case GGML_UNARY_OP_HARDSWISH:
|
||||
case GGML_UNARY_OP_HARDSIGMOID:
|
||||
case GGML_UNARY_OP_EXP:
|
||||
case GGML_UNARY_OP_SOFTPLUS:
|
||||
case GGML_UNARY_OP_EXPM1:
|
||||
case GGML_UNARY_OP_FLOOR:
|
||||
case GGML_UNARY_OP_CEIL:
|
||||
case GGML_UNARY_OP_ROUND:
|
||||
|
|
|
|||
|
|
@ -7,8 +7,10 @@
|
|||
#include "unary-ops.h"
|
||||
#include "vec.h"
|
||||
|
||||
#include <float.h>
|
||||
#include <cfloat>
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <functional>
|
||||
|
||||
// ggml_compute_forward_dup
|
||||
|
||||
|
|
@ -1394,6 +1396,56 @@ void ggml_compute_forward_sum(
|
|||
}
|
||||
}
|
||||
|
||||
// ggml_compute_forward_cumsum
|
||||
|
||||
static void ggml_compute_forward_cumsum_f32(
|
||||
const ggml_compute_params * params,
|
||||
ggml_tensor * dst) {
|
||||
|
||||
const ggml_tensor * src0 = dst->src[0];
|
||||
|
||||
GGML_ASSERT(src0->nb[0] == sizeof(float));
|
||||
GGML_ASSERT(dst->nb[0] == sizeof(float));
|
||||
|
||||
GGML_TENSOR_UNARY_OP_LOCALS
|
||||
|
||||
GGML_ASSERT(ne0 == ne00);
|
||||
GGML_ASSERT(ne1 == ne01);
|
||||
GGML_ASSERT(ne2 == ne02);
|
||||
GGML_ASSERT(ne3 == ne03);
|
||||
|
||||
const auto [ir0, ir1] = get_thread_range(params, src0);
|
||||
|
||||
for (int64_t ir = ir0; ir < ir1; ++ir) {
|
||||
const int64_t i03 = ir/(ne02*ne01);
|
||||
const int64_t i02 = (ir - i03*ne02*ne01)/ne01;
|
||||
const int64_t i01 = (ir - i03*ne02*ne01 - i02*ne01);
|
||||
|
||||
float * src_row = (float *) ((char *) src0->data + i01*nb01 + i02*nb02 + i03*nb03);
|
||||
float * dst_row = (float *) ((char *) dst->data + i01*nb1 + i02*nb2 + i03*nb3);
|
||||
|
||||
ggml_vec_cumsum_f32(ne00, dst_row, src_row);
|
||||
}
|
||||
}
|
||||
|
||||
void ggml_compute_forward_cumsum(
|
||||
const ggml_compute_params * params,
|
||||
ggml_tensor * dst) {
|
||||
|
||||
const ggml_tensor * src0 = dst->src[0];
|
||||
|
||||
switch (src0->type) {
|
||||
case GGML_TYPE_F32:
|
||||
{
|
||||
ggml_compute_forward_cumsum_f32(params, dst);
|
||||
} break;
|
||||
default:
|
||||
{
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ggml_compute_forward_sum_rows
|
||||
|
||||
static void ggml_compute_forward_sum_rows_f32(
|
||||
|
|
@ -2140,6 +2192,83 @@ static void ggml_compute_forward_gelu(
|
|||
}
|
||||
}
|
||||
|
||||
// ggml_compute_fill
|
||||
|
||||
static void ggml_compute_forward_fill_f32(const ggml_compute_params * params, ggml_tensor * dst) {
|
||||
const float c = ggml_get_op_params_f32(dst, 0);
|
||||
|
||||
GGML_TENSOR_LOCALS(int64_t, ne, dst, ne);
|
||||
GGML_TENSOR_LOCALS(size_t, nb, dst, nb);
|
||||
|
||||
const auto [ir0, ir1] = get_thread_range(params, dst);
|
||||
|
||||
for (int64_t ir = ir0; ir < ir1; ++ir) {
|
||||
const int64_t i03 = ir/(ne2*ne1);
|
||||
const int64_t i02 = (ir - i03*ne2*ne1)/ne1;
|
||||
const int64_t i01 = (ir - i03*ne2*ne1 - i02*ne1);
|
||||
|
||||
float * dst_ptr = (float *) ((char *) dst->data + i03*nb3 + i02*nb2 + i01*nb1);
|
||||
|
||||
ggml_vec_set_f32(ne0, dst_ptr, c);
|
||||
}
|
||||
}
|
||||
|
||||
void ggml_compute_forward_fill(const ggml_compute_params * params, ggml_tensor * dst) {
|
||||
ggml_compute_forward_fill_f32(params, dst);
|
||||
}
|
||||
|
||||
// ggml_compute_tri
|
||||
|
||||
static void ggml_compute_forward_tri_f32(const ggml_compute_params * params, ggml_tensor * dst) {
|
||||
const ggml_tensor * src0 = dst->src[0];
|
||||
|
||||
const ggml_tri_type ttype = (ggml_tri_type) ggml_get_op_params_i32(dst, 0);
|
||||
|
||||
GGML_ASSERT(ggml_is_contiguous(src0));
|
||||
|
||||
GGML_TENSOR_UNARY_OP_LOCALS
|
||||
|
||||
const auto [ir0, ir1] = get_thread_range(params, src0);
|
||||
|
||||
bool (*bipred)(int, int);
|
||||
|
||||
switch (ttype) {
|
||||
case GGML_TRI_TYPE_LOWER: bipred = [](int i, int r) { return i < r; }; break;
|
||||
case GGML_TRI_TYPE_LOWER_DIAG: bipred = [](int i, int r) { return i <= r; }; break;
|
||||
case GGML_TRI_TYPE_UPPER: bipred = [](int i, int r) { return i > r; }; break;
|
||||
case GGML_TRI_TYPE_UPPER_DIAG: bipred = [](int i, int r) { return i >= r; }; break;
|
||||
default: GGML_ABORT("invalid tri type");
|
||||
}
|
||||
|
||||
for (int64_t ir = ir0; ir < ir1; ++ir) {
|
||||
const int64_t i03 = ir/(ne02*ne01);
|
||||
const int64_t i02 = (ir - i03*ne02*ne01)/ne01;
|
||||
const int64_t i01 = (ir - i03*ne02*ne01 - i02*ne01);
|
||||
|
||||
const float * src_ptr = (const float *) ((const char *) src0->data + i03*nb03 + i02*nb02 + i01*nb01);
|
||||
float * dst_ptr = ( float *) (( char *) dst->data + i03*nb3 + i02*nb2 + i01*nb1);
|
||||
|
||||
for (int i0 = 0; i0 < ne0; ++i0) {
|
||||
dst_ptr[i0] = bipred(i0, i01) ? src_ptr[i0] : 0.0f;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ggml_compute_forward_tri(const ggml_compute_params * params, ggml_tensor * dst) {
|
||||
const ggml_tensor * src0 = dst->src[0];
|
||||
|
||||
switch (src0->type) {
|
||||
case GGML_TYPE_F32:
|
||||
{
|
||||
ggml_compute_forward_tri_f32(params, dst);
|
||||
} break;
|
||||
default:
|
||||
{
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ggml_compute_forward_gelu_erf
|
||||
|
||||
static void ggml_compute_forward_gelu_erf_f32(
|
||||
|
|
@ -7664,6 +7793,18 @@ void ggml_compute_forward_timestep_embedding(
|
|||
|
||||
// ggml_compute_forward_argsort
|
||||
|
||||
template<enum ggml_sort_order order>
|
||||
struct argsort_cmp {
|
||||
const float * data;
|
||||
bool operator()(int32_t a, int32_t b) const {
|
||||
if constexpr (order == GGML_SORT_ORDER_ASC) {
|
||||
return data[a] < data[b];
|
||||
} else {
|
||||
return data[a] > data[b];
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
static void ggml_compute_forward_argsort_f32(
|
||||
const ggml_compute_params * params,
|
||||
ggml_tensor * dst) {
|
||||
|
|
@ -7682,23 +7823,25 @@ static void ggml_compute_forward_argsort_f32(
|
|||
ggml_sort_order order = (ggml_sort_order) ggml_get_op_params_i32(dst, 0);
|
||||
|
||||
for (int64_t i = ith; i < nr; i += nth) {
|
||||
int32_t * dst_data = (int32_t *)((char *) dst->data + i*nb1);
|
||||
const float * src_data = (float *)((char *) src0->data + i*nb01);
|
||||
|
||||
int32_t * dst_data = (int32_t *)((char *) dst->data + i*nb1);
|
||||
|
||||
for (int64_t j = 0; j < ne0; j++) {
|
||||
dst_data[j] = j;
|
||||
}
|
||||
|
||||
// C doesn't have a functional sort, so we do a bubble sort instead
|
||||
for (int64_t j = 0; j < ne0; j++) {
|
||||
for (int64_t k = j + 1; k < ne0; k++) {
|
||||
if ((order == GGML_SORT_ORDER_ASC && src_data[dst_data[j]] > src_data[dst_data[k]]) ||
|
||||
(order == GGML_SORT_ORDER_DESC && src_data[dst_data[j]] < src_data[dst_data[k]])) {
|
||||
int32_t tmp = dst_data[j];
|
||||
dst_data[j] = dst_data[k];
|
||||
dst_data[k] = tmp;
|
||||
}
|
||||
}
|
||||
switch (order) {
|
||||
case GGML_SORT_ORDER_ASC:
|
||||
std::sort(dst_data, dst_data + ne0, argsort_cmp<GGML_SORT_ORDER_ASC>{src_data});
|
||||
break;
|
||||
|
||||
case GGML_SORT_ORDER_DESC:
|
||||
std::sort(dst_data, dst_data + ne0, argsort_cmp<GGML_SORT_ORDER_DESC>{src_data});
|
||||
break;
|
||||
|
||||
default:
|
||||
GGML_ABORT("invalid sort order");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -8521,7 +8664,7 @@ static void ggml_compute_forward_ssm_scan_f32(
|
|||
// n_head
|
||||
for (int h = ih0; h < ih1; ++h) {
|
||||
// ref: https://github.com/state-spaces/mamba/blob/62db608da60f6fc790b8ed9f4b3225e95ca15fde/mamba_ssm/ops/triton/softplus.py#L16
|
||||
const float dt_soft_plus = ggml_softplus(dt[h]);
|
||||
const float dt_soft_plus = ggml_compute_softplus_f32(dt[h]);
|
||||
const float dA = expf(dt_soft_plus * A[h]);
|
||||
const int g = h / (nh / ng); // repeat_interleave
|
||||
|
||||
|
|
@ -8618,7 +8761,7 @@ static void ggml_compute_forward_ssm_scan_f32(
|
|||
// n_head
|
||||
for (int h = ih0; h < ih1; ++h) {
|
||||
// ref: https://github.com/state-spaces/mamba/blob/62db608da60f6fc790b8ed9f4b3225e95ca15fde/mamba_ssm/ops/triton/softplus.py#L16
|
||||
const float dt_soft_plus = ggml_softplus(dt[h]);
|
||||
const float dt_soft_plus = ggml_compute_softplus_f32(dt[h]);
|
||||
const int g = h / (nh / ng); // repeat_interleave
|
||||
|
||||
// dim
|
||||
|
|
@ -8901,6 +9044,14 @@ void ggml_compute_forward_unary(
|
|||
{
|
||||
ggml_compute_forward_xielu(params, dst);
|
||||
} break;
|
||||
case GGML_UNARY_OP_EXPM1:
|
||||
{
|
||||
ggml_compute_forward_expm1(params, dst);
|
||||
} break;
|
||||
case GGML_UNARY_OP_SOFTPLUS:
|
||||
{
|
||||
ggml_compute_forward_softplus(params, dst);
|
||||
} break;
|
||||
default:
|
||||
{
|
||||
GGML_ABORT("fatal error");
|
||||
|
|
@ -9497,6 +9648,76 @@ void ggml_compute_forward_gla(
|
|||
}
|
||||
}
|
||||
|
||||
static void ggml_compute_forward_solve_tri_f32(const struct ggml_compute_params * params, struct ggml_tensor * dst) {
|
||||
const struct ggml_tensor * src0 = dst->src[0]; // A (lower triangular)
|
||||
const struct ggml_tensor * src1 = dst->src[1]; // B (RHS)
|
||||
|
||||
GGML_TENSOR_BINARY_OP_LOCALS;
|
||||
|
||||
GGML_ASSERT(src0->type == GGML_TYPE_F32);
|
||||
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
||||
GGML_ASSERT(dst->type == GGML_TYPE_F32);
|
||||
|
||||
GGML_ASSERT(ne00 == ne01); // A must be square
|
||||
GGML_ASSERT(ne0 == ne10); // solution cols == B cols
|
||||
GGML_ASSERT(ne1 == ne11); // solution rows == B rows
|
||||
|
||||
GGML_ASSERT(ne02 == ne12 && ne12 == ne2);
|
||||
GGML_ASSERT(ne03 == ne13 && ne13 == ne3);
|
||||
|
||||
const int ith = params->ith;
|
||||
const int nth = params->nth;
|
||||
|
||||
const int64_t k = ne10; // number of RHS columns
|
||||
const int64_t n = ne11; // A is n×n
|
||||
const int64_t nr = ne02 * ne03 * k; // we're parallelizing on columns here, so seq x token x column will be the unit
|
||||
|
||||
// chunks per thread
|
||||
const int64_t dr = (nr + nth - 1)/nth;
|
||||
|
||||
// chunk range for this thread
|
||||
const int64_t ir0 = dr*ith;
|
||||
const int64_t ir1 = MIN(ir0 + dr, nr);
|
||||
|
||||
const float * A = (const float *) src0->data; // [n, n, B1, B2]
|
||||
const float * B = (const float *) src1->data; // [n, k, B1, B2]
|
||||
float * X = ( float *) dst->data; // [n, k, B1, B2]
|
||||
|
||||
for (int64_t ir = ir0; ir < ir1; ++ir) {
|
||||
const int64_t i03 = ir/(ne02*k);
|
||||
const int64_t i02 = (ir - i03*ne02*k)/k;
|
||||
const int64_t i01 = (ir - i03*ne02*k - i02*k);
|
||||
|
||||
const float * A_batch = A + i02 * nb02 / sizeof(float) + i03 * nb03 / sizeof(float);
|
||||
const float * B_batch = B + i02 * nb12 / sizeof(float) + i03 * nb13 / sizeof(float);
|
||||
|
||||
float * X_batch = X + i02 * nb2 / sizeof(float) + i03 * nb3 / sizeof(float);
|
||||
|
||||
for (int64_t i00 = 0; i00 < n; ++i00) {
|
||||
float sum = 0.0f;
|
||||
for (int64_t t = 0; t < i00; ++t) {
|
||||
sum += A_batch[i00 * n + t] * X_batch[i01 * n + t];
|
||||
}
|
||||
|
||||
const float diag = A_batch[i00 * n + i00];
|
||||
GGML_ASSERT(diag != 0.0f && "Zero diagonal in triangular matrix");
|
||||
|
||||
X_batch[i01 * n + i00] = (B_batch[i00 * k + i01] - sum) / diag;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ggml_compute_forward_solve_tri(const struct ggml_compute_params * params, struct ggml_tensor * dst) {
|
||||
const ggml_tensor * src0 = dst->src[0];
|
||||
const ggml_tensor * src1 = dst->src[1];
|
||||
|
||||
if (src0->type == GGML_TYPE_F32 && src1->type == GGML_TYPE_F32) {
|
||||
ggml_compute_forward_solve_tri_f32(params, dst);
|
||||
} else {
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
}
|
||||
|
||||
// ggml_compute_forward_rwkv_wkv7
|
||||
|
||||
static void ggml_compute_forward_rwkv_wkv7_f32(
|
||||
|
|
|
|||
|
|
@ -34,6 +34,7 @@ void ggml_compute_forward_add1(const struct ggml_compute_params * params, struct
|
|||
void ggml_compute_forward_acc(const struct ggml_compute_params * params, struct ggml_tensor * dst);
|
||||
void ggml_compute_forward_sum(const struct ggml_compute_params * params, struct ggml_tensor * dst);
|
||||
void ggml_compute_forward_sum_rows(const struct ggml_compute_params * params, struct ggml_tensor * dst);
|
||||
void ggml_compute_forward_cumsum(const struct ggml_compute_params * params, struct ggml_tensor * dst);
|
||||
void ggml_compute_forward_mean(const struct ggml_compute_params * params, struct ggml_tensor * dst);
|
||||
void ggml_compute_forward_argmax(const struct ggml_compute_params * params, struct ggml_tensor * dst);
|
||||
void ggml_compute_forward_count_equal(const struct ggml_compute_params * params, struct ggml_tensor * dst);
|
||||
|
|
@ -81,6 +82,8 @@ void ggml_compute_forward_arange(const struct ggml_compute_params * params, stru
|
|||
void ggml_compute_forward_timestep_embedding(const struct ggml_compute_params * params, struct ggml_tensor * dst);
|
||||
void ggml_compute_forward_argsort(const struct ggml_compute_params * params, struct ggml_tensor * dst);
|
||||
void ggml_compute_forward_leaky_relu(const struct ggml_compute_params * params, struct ggml_tensor * dst);
|
||||
void ggml_compute_forward_tri(const struct ggml_compute_params * params, struct ggml_tensor * dst);
|
||||
void ggml_compute_forward_fill(const struct ggml_compute_params * params, struct ggml_tensor * dst);
|
||||
void ggml_compute_forward_flash_attn_ext(const struct ggml_compute_params * params, struct ggml_tensor * dst);
|
||||
void ggml_compute_forward_flash_attn_back(
|
||||
const struct ggml_compute_params * params,
|
||||
|
|
@ -96,6 +99,7 @@ void ggml_compute_forward_get_rel_pos(const struct ggml_compute_params * params,
|
|||
void ggml_compute_forward_add_rel_pos(const struct ggml_compute_params * params, struct ggml_tensor * dst);
|
||||
void ggml_compute_forward_rwkv_wkv6(const struct ggml_compute_params * params, struct ggml_tensor * dst);
|
||||
void ggml_compute_forward_rwkv_wkv7(const struct ggml_compute_params * params, struct ggml_tensor * dst);
|
||||
void ggml_compute_forward_solve_tri(const struct ggml_compute_params * params, struct ggml_tensor * dst);
|
||||
void ggml_compute_forward_gla(const struct ggml_compute_params * params, struct ggml_tensor * dst);
|
||||
void ggml_compute_forward_map_custom1(const struct ggml_compute_params * params, struct ggml_tensor * dst);
|
||||
void ggml_compute_forward_map_custom2(const struct ggml_compute_params * params, struct ggml_tensor * dst);
|
||||
|
|
|
|||
|
|
@ -1600,29 +1600,52 @@ template <typename BLOC_TYPE, int64_t INTER_SIZE, int64_t NB_COLS, ggml_type PAR
|
|||
return false;
|
||||
}
|
||||
|
||||
void forward_mul_mat_one_chunk(ggml_compute_params * params, ggml_tensor * op, int64_t src0_start, int64_t src0_end) {
|
||||
void forward_mul_mat_one_chunk(ggml_compute_params * params,
|
||||
ggml_tensor * op,
|
||||
int64_t src0_start,
|
||||
int64_t src0_end,
|
||||
int64_t src1_start,
|
||||
int64_t src1_end) {
|
||||
const ggml_tensor * src0 = op->src[0];
|
||||
const ggml_tensor * src1 = op->src[1];
|
||||
ggml_tensor * dst = op;
|
||||
|
||||
GGML_TENSOR_BINARY_OP_LOCALS
|
||||
|
||||
const void * src1_wdata = params->wdata;
|
||||
const size_t src1_col_stride = ggml_row_size(PARAM_TYPE, ne10);
|
||||
|
||||
GGML_ASSERT(ne03 == 1 && ne13 == 1);
|
||||
GGML_ASSERT(ne12 % ne02 == 0);
|
||||
const int64_t r2 = ne12 / ne02;
|
||||
|
||||
const int64_t i12 = src1_start / ne1;
|
||||
const int64_t i11 = src1_start - i12 * ne1;
|
||||
|
||||
// Determine batch index
|
||||
const int64_t i02 = i12 / r2;
|
||||
|
||||
const int64_t i1 = i11;
|
||||
const int64_t i2 = i12;
|
||||
|
||||
const char * src0_ptr = (const char *) src0->data + i02 * nb02;
|
||||
const char * src1_ptr = (const char *) params->wdata + (i11 + i12 * ne11) * src1_col_stride;
|
||||
char * dst_ptr = ((char *) dst->data + (i1 * nb1 + i2 * nb2));
|
||||
|
||||
const int64_t nrows = src1_end - src1_start;
|
||||
const int64_t ncols = src0_end - src0_start;
|
||||
|
||||
GGML_ASSERT(src1_ptr + src1_col_stride * nrows <= (const char *) params->wdata + params->wsize);
|
||||
|
||||
// If there are more than three rows in src1, use gemm; otherwise, use gemv.
|
||||
if (ne11 > 3) {
|
||||
gemm<BLOC_TYPE, INTER_SIZE, NB_COLS, PARAM_TYPE>(ne00,
|
||||
(float *) ((char *) dst->data) + src0_start, ne01,
|
||||
(const char *) src0->data + src0_start * nb01,
|
||||
(const char *) src1_wdata, ne11 - ne11 % 4, src0_end - src0_start);
|
||||
if (nrows > 3) {
|
||||
gemm<BLOC_TYPE, INTER_SIZE, NB_COLS, PARAM_TYPE>(ne00, (float *) (dst_ptr) + src0_start, nb1 / nb0,
|
||||
src0_ptr + src0_start * nb01, src1_ptr,
|
||||
nrows - (nrows % 4), ncols);
|
||||
}
|
||||
for (int iter = ne11 - ne11 % 4; iter < ne11; iter++) {
|
||||
gemv<BLOC_TYPE, INTER_SIZE, NB_COLS, PARAM_TYPE>(ne00,
|
||||
(float *) ((char *) dst->data + (iter * nb1)) + src0_start, ne01,
|
||||
(const char *) src0->data + src0_start * nb01,
|
||||
(const char *) src1_wdata + (src1_col_stride * iter), 1,
|
||||
src0_end - src0_start);
|
||||
for (int iter = nrows - (nrows % 4); iter < nrows; iter++) {
|
||||
gemv<BLOC_TYPE, INTER_SIZE, NB_COLS, PARAM_TYPE>(ne00, (float *) (dst_ptr + (iter * nb1)) + src0_start,
|
||||
ne01, src0_ptr + src0_start * nb01,
|
||||
src1_ptr + (src1_col_stride * iter), 1 /* nrows */, ncols);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1647,6 +1670,12 @@ template <typename BLOC_TYPE, int64_t INTER_SIZE, int64_t NB_COLS, ggml_type PAR
|
|||
GGML_ASSERT(nb1 <= nb2);
|
||||
GGML_ASSERT(nb2 <= nb3);
|
||||
|
||||
// TODO: General batched mul mat for 4D tensors
|
||||
// Currently only supports 3D tensors
|
||||
GGML_ASSERT(ne03 == 1);
|
||||
GGML_ASSERT(ne13 == 1);
|
||||
GGML_ASSERT(ne3 == 1);
|
||||
|
||||
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
||||
|
||||
GGML_ASSERT(ggml_n_dims(op->src[0]) == 2);
|
||||
|
|
@ -1654,47 +1683,64 @@ template <typename BLOC_TYPE, int64_t INTER_SIZE, int64_t NB_COLS, ggml_type PAR
|
|||
|
||||
char * wdata = static_cast<char *>(params->wdata);
|
||||
const size_t nbw1 = ggml_row_size(PARAM_TYPE, ne10);
|
||||
const size_t nbw2 = nbw1 * ne11;
|
||||
|
||||
assert(params->wsize >= nbw1 * ne11);
|
||||
assert(params->wsize >= nbw2 * ne12);
|
||||
|
||||
const ggml_from_float_t from_float = ggml_get_type_traits_cpu(PARAM_TYPE)->from_float;
|
||||
|
||||
int64_t i11_processed = 0;
|
||||
for (int64_t i11 = ith * 4; i11 < ne11 - ne11 % 4; i11 += nth * 4) {
|
||||
ggml_quantize_mat_t<INTER_SIZE, PARAM_TYPE>((float *) ((char *) src1->data + i11 * nb11), (void *) (wdata + i11 * nbw1), 4, ne10);
|
||||
}
|
||||
// INFO: Quantization is done in planes to avoid extra complexity in chunking.
|
||||
// Flattening dimensions not multiple of INTER_SIZE would require extra handling depending on how
|
||||
// the planes are broadcast.
|
||||
for (int64_t i12 = 0; i12 < ne12; i12++) {
|
||||
char * data_ptr = (char *) src1->data + i12 * nb12;
|
||||
char * wdata_ptr = wdata + i12 * nbw2;
|
||||
|
||||
i11_processed = ne11 - ne11 % 4;
|
||||
for (int64_t i11 = i11_processed + ith; i11 < ne11; i11 += nth) {
|
||||
from_float((float *) ((char *) src1->data + i11 * nb11), (void *) (wdata + i11 * nbw1), ne10);
|
||||
for (int64_t i11 = ith * 4; i11 < ne11 - ne11 % 4; i11 += nth * 4) {
|
||||
ggml_quantize_mat_t<INTER_SIZE, PARAM_TYPE>((float *) (data_ptr + i11 * nb11),
|
||||
(void *) (wdata_ptr + i11 * nbw1), 4, ne10);
|
||||
}
|
||||
|
||||
const int64_t i11_processed = ne11 - ne11 % 4;
|
||||
for (int64_t i11 = i11_processed + ith; i11 < ne11; i11 += nth) {
|
||||
from_float((float *) (data_ptr + i11 * nb11), (void *) (wdata_ptr + i11 * nbw1), ne10);
|
||||
}
|
||||
}
|
||||
|
||||
// disable for NUMA
|
||||
const bool disable_chunking = ggml_is_numa();
|
||||
|
||||
// 4x chunks per thread
|
||||
int64_t nr = ggml_nrows(op->src[0]);
|
||||
int nth_scaled = nth * 4;
|
||||
int64_t chunk_size = (nr + nth_scaled - 1) / nth_scaled;
|
||||
int64_t nchunk = (nr + chunk_size - 1) / chunk_size;
|
||||
const int64_t nr0 = ggml_nrows(op->src[0]);
|
||||
|
||||
int nth_scaled = nth * 4;
|
||||
int64_t chunk_size0 = (nr0 + nth_scaled - 1) / nth_scaled;
|
||||
int64_t nchunk0 = (nr0 + chunk_size0 - 1) / chunk_size0;
|
||||
|
||||
// src1 is chunked only by full planes.
|
||||
// When we flatten we need to address dimensions not multiple of the q8 INTER_SIZE
|
||||
// to route them thorugh GEMV.
|
||||
// nchunk1 = ne12 also avoids messing the chunking for models with no 3d tensors
|
||||
// to avoid affecting their performance
|
||||
int64_t nchunk1 = ne12;
|
||||
|
||||
// Ensure minimum chunk size to avoid alignment issues with high thread counts
|
||||
// Minimum chunk size should be at least NB_COLS to prevent overlapping chunks after alignment
|
||||
const int64_t min_chunk_size = NB_COLS;
|
||||
if (nchunk > 0 && (nr / nchunk) < min_chunk_size && nr >= min_chunk_size) {
|
||||
nchunk = (nr + min_chunk_size - 1) / min_chunk_size;
|
||||
if (nchunk0 > 0 && (nr0 / nchunk0) < min_chunk_size && nr0 >= min_chunk_size) {
|
||||
nchunk0 = (nr0 + min_chunk_size - 1) / min_chunk_size;
|
||||
}
|
||||
|
||||
if (nth == 1 || nchunk < nth || disable_chunking) {
|
||||
nchunk = nth;
|
||||
if (nth == 1 || nchunk0 < nth || disable_chunking) {
|
||||
nchunk0 = nth;
|
||||
}
|
||||
|
||||
const int64_t dr0 = (nr0 + nchunk0 - 1) / nchunk0;
|
||||
|
||||
// Ensure nchunk doesn't exceed the number of rows divided by minimum chunk size
|
||||
// This prevents creating too many tiny chunks that could overlap after alignment
|
||||
const int64_t max_nchunk = (nr + min_chunk_size - 1) / min_chunk_size;
|
||||
if (nchunk > max_nchunk) {
|
||||
nchunk = max_nchunk;
|
||||
}
|
||||
const int64_t max_nchunk = (nr0 + min_chunk_size - 1) / min_chunk_size;
|
||||
nchunk0 = MIN(nchunk0, max_nchunk);
|
||||
|
||||
if (ith == 0) {
|
||||
// Every thread starts at ith, so the first unprocessed chunk is nth. This save a bit of coordination right at the start.
|
||||
|
|
@ -1706,23 +1752,30 @@ template <typename BLOC_TYPE, int64_t INTER_SIZE, int64_t NB_COLS, ggml_type PAR
|
|||
// The first chunk comes from our thread_id, the rest will get auto-assigned.
|
||||
int current_chunk = ith;
|
||||
|
||||
while (current_chunk < nchunk) {
|
||||
int64_t src0_start = (current_chunk * ne01) / nchunk;
|
||||
int64_t src0_end = ((current_chunk + 1) * ne01) / nchunk;
|
||||
while (current_chunk < nchunk0 * nchunk1) {
|
||||
const int64_t ith0 = current_chunk % nchunk0;
|
||||
const int64_t ith1 = current_chunk / nchunk0;
|
||||
|
||||
int64_t src0_start = dr0 * ith0;
|
||||
int64_t src0_end = MIN(src0_start + dr0, nr0);
|
||||
|
||||
// full-plane range for src1
|
||||
int64_t src1_start = ith1 * ne11;
|
||||
int64_t src1_end = (ith1 + 1) * ne11;
|
||||
|
||||
// Align boundaries to NB_COLS - round up to ensure all data is included
|
||||
// The chunk size limiting above ensures chunks are large enough to prevent overlaps
|
||||
src0_start = (src0_start % NB_COLS) ? src0_start + NB_COLS - (src0_start % NB_COLS) : src0_start;
|
||||
src0_end = (src0_end % NB_COLS) ? src0_end + NB_COLS - (src0_end % NB_COLS) : src0_end;
|
||||
if (src0_end > ne01) {
|
||||
src0_end = ne01;
|
||||
}
|
||||
src0_end = (src0_end % NB_COLS) ? src0_end + NB_COLS - (src0_end % NB_COLS) : src0_end;
|
||||
src0_end = MIN(src0_end, ne01);
|
||||
|
||||
// Make sure current plane is the last one before exiting
|
||||
if (src0_start >= src0_end) {
|
||||
break;
|
||||
current_chunk = ggml_threadpool_chunk_add(params->threadpool, 1);
|
||||
continue;
|
||||
}
|
||||
|
||||
forward_mul_mat_one_chunk(params, dst, src0_start, src0_end);
|
||||
forward_mul_mat_one_chunk(params, dst, src0_start, src0_end, src1_start, src1_end);
|
||||
|
||||
current_chunk = ggml_threadpool_chunk_add(params->threadpool, 1);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -73,6 +73,14 @@ static inline float op_log(float x) {
|
|||
return logf(x);
|
||||
}
|
||||
|
||||
static inline float op_expm1(float x) {
|
||||
return expf(x) - 1.0f;
|
||||
}
|
||||
|
||||
static inline float op_softplus(float x) {
|
||||
return (x > 20.0f) ? x : logf(1.0f + expf(x));
|
||||
}
|
||||
|
||||
static inline float op_floor(float x) {
|
||||
return floorf(x);
|
||||
}
|
||||
|
|
@ -290,6 +298,14 @@ void ggml_compute_forward_log(const ggml_compute_params * params, ggml_tensor *
|
|||
unary_op<op_log>(params, dst);
|
||||
}
|
||||
|
||||
void ggml_compute_forward_expm1(const ggml_compute_params * params, ggml_tensor * dst) {
|
||||
unary_op<op_expm1>(params, dst);
|
||||
}
|
||||
|
||||
void ggml_compute_forward_softplus(const ggml_compute_params * params, ggml_tensor * dst) {
|
||||
unary_op<op_softplus>(params, dst);
|
||||
}
|
||||
|
||||
void ggml_compute_forward_floor(const ggml_compute_params * params, ggml_tensor * dst) {
|
||||
unary_op<op_floor>(params, dst);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -22,6 +22,8 @@ void ggml_compute_forward_sqrt(const struct ggml_compute_params * params, struct
|
|||
void ggml_compute_forward_sin(const struct ggml_compute_params * params, struct ggml_tensor * dst);
|
||||
void ggml_compute_forward_cos(const struct ggml_compute_params * params, struct ggml_tensor * dst);
|
||||
void ggml_compute_forward_log(const struct ggml_compute_params * params, struct ggml_tensor * dst);
|
||||
void ggml_compute_forward_expm1(const struct ggml_compute_params * params, struct ggml_tensor * dst);
|
||||
void ggml_compute_forward_softplus(const struct ggml_compute_params * params, struct ggml_tensor * dst);
|
||||
void ggml_compute_forward_floor(const struct ggml_compute_params * params, struct ggml_tensor * dst);
|
||||
void ggml_compute_forward_ceil(const struct ggml_compute_params * params, struct ggml_tensor * dst);
|
||||
void ggml_compute_forward_round(const struct ggml_compute_params * params, struct ggml_tensor * dst);
|
||||
|
|
|
|||
|
|
@ -360,6 +360,13 @@ void ggml_vec_silu_f32(const int n, float * y, const float * x) {
|
|||
for (; i + 3 < n; i += 4) {
|
||||
vst1q_f32(y + i, ggml_v_silu(vld1q_f32(x + i)));
|
||||
}
|
||||
#elif defined(__riscv_v_intrinsic)
|
||||
for (int vl; i < n; i += vl) {
|
||||
vl = __riscv_vsetvl_e32m2(n - i);
|
||||
vfloat32m2_t vx = __riscv_vle32_v_f32m2(&x[i], vl);
|
||||
vfloat32m2_t vy = ggml_v_silu_m2(vx, vl);
|
||||
__riscv_vse32_v_f32m2(&y[i], vy, vl);
|
||||
}
|
||||
#endif
|
||||
for (; i < n; ++i) {
|
||||
y[i] = ggml_silu_f32(x[i]);
|
||||
|
|
@ -460,6 +467,16 @@ ggml_float ggml_vec_cvar_f32(const int n, float * y, const float * x, const floa
|
|||
val = vec_mul(val, val);
|
||||
sum += (ggml_float)vec_hsum_f32x4(val);
|
||||
}
|
||||
#elif defined(__riscv_v_intrinsic)
|
||||
vfloat64m1_t vsum = __riscv_vfmv_v_f_f64m1(0, 1);
|
||||
for (int vl; i < n; i += vl) {
|
||||
vl = __riscv_vsetvl_e32m2(n - i);
|
||||
vfloat32m2_t val = __riscv_vfsub_vf_f32m2(__riscv_vle32_v_f32m2(&x[i], vl), mean, vl);
|
||||
__riscv_vse32_v_f32m2(&y[i], val, vl);
|
||||
val = __riscv_vfmul_vv_f32m2(val, val, vl);
|
||||
vsum = __riscv_vfwredusum_vs_f32m2_f64m1(val, vsum, vl);
|
||||
}
|
||||
sum = (ggml_float)__riscv_vfmv_f_s_f64m1_f64(vsum);
|
||||
#endif
|
||||
for (; i < n; ++i) {
|
||||
float val = x[i] - mean;
|
||||
|
|
|
|||
|
|
@ -1416,6 +1416,16 @@ inline static void ggml_vec_sum_f32(const int n, float * s, const float * x) {
|
|||
#endif
|
||||
}
|
||||
|
||||
inline static void ggml_vec_cumsum_f32(const int n, float * y, const float * x) {
|
||||
for (int i = 0; i < n; ++i) {
|
||||
if (i == 0) {
|
||||
y[i] = x[i];
|
||||
} else {
|
||||
y[i] = y[i - 1] + x[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline static void ggml_vec_sum_f32_ggf(const int n, ggml_float * s, const float * x) {
|
||||
ggml_float sum = 0.0;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
|
|
|
|||
|
|
@ -586,6 +586,12 @@ static __device__ __forceinline__ void ggml_cuda_mad(half2 & acc, const half2 v,
|
|||
// If dst and src point at different address spaces then they are guaranteed to not be aliased.
|
||||
template <int nbytes, int alignment = 0>
|
||||
static __device__ __forceinline__ void ggml_cuda_memcpy_1(void * __restrict__ dst, const void * __restrict__ src) {
|
||||
static_assert(
|
||||
nbytes <= ggml_cuda_get_max_cpy_bytes() || alignment == 0,
|
||||
"You are misusing the alignment parameter for ggml_cuda_memcpy_1. "
|
||||
"The intent is for the parameter is only as a workaround if either one of the pointers is not properly aligned. "
|
||||
"If you use it to do more bytes per copy than ggml_cuda_max_cpy_bytes() the reads and writes may not be coalesced. "
|
||||
"Call ggml_cuda_memcpy_1 in a loop instead.");
|
||||
if constexpr (alignment != 0) {
|
||||
static_assert(nbytes % alignment == 0, "bad alignment");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2527,6 +2527,12 @@ static bool ggml_cuda_compute_forward(ggml_backend_cuda_context & ctx, struct gg
|
|||
case GGML_UNARY_OP_TRUNC:
|
||||
ggml_cuda_op_trunc(ctx, dst);
|
||||
break;
|
||||
case GGML_UNARY_OP_EXPM1:
|
||||
ggml_cuda_op_expm1(ctx, dst);
|
||||
break;
|
||||
case GGML_UNARY_OP_SOFTPLUS:
|
||||
ggml_cuda_op_softplus(ctx, dst);
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
|
@ -2992,6 +2998,36 @@ static void update_cuda_graph_executable(ggml_backend_cuda_context * cuda_ctx) {
|
|||
}
|
||||
#endif
|
||||
|
||||
static bool ggml_cuda_should_fuse_rope_set_rows(const ggml_tensor * rope,
|
||||
const ggml_tensor * view,
|
||||
const ggml_tensor * set_rows) {
|
||||
// ne3 not tested
|
||||
if (rope->src[0]->ne[3] != 1) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (set_rows->type != GGML_TYPE_F32 && set_rows->type != GGML_TYPE_F16) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (set_rows->src[1]->type != GGML_TYPE_I64) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// The view should flatten two dims of rope into one dim
|
||||
if (!ggml_is_contiguous(view) || view->ne[0] != rope->ne[0] * rope->ne[1]) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Only norm/neox shaders have the fusion code
|
||||
const int mode = ((const int32_t *) rope->op_params)[2];
|
||||
if (mode != GGML_ROPE_TYPE_NORMAL && mode != GGML_ROPE_TYPE_NEOX) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool ggml_cuda_can_fuse(const struct ggml_cgraph * cgraph, int node_idx, std::initializer_list<enum ggml_op> ops, std::initializer_list<enum ggml_unary_op> unary_ops) {
|
||||
#ifndef NDEBUG
|
||||
const size_t num_unary = std::count(ops.begin(), ops.end(), GGML_OP_UNARY);
|
||||
|
|
@ -3067,6 +3103,16 @@ static bool ggml_cuda_can_fuse(const struct ggml_cgraph * cgraph, int node_idx,
|
|||
}
|
||||
}
|
||||
|
||||
if (ops.size() == 3 && ggml_can_fuse_subgraph(cgraph, node_idx, ops, { node_idx + 2 })) {
|
||||
const ggml_tensor * rope = cgraph->nodes[node_idx];
|
||||
const ggml_tensor * view = cgraph->nodes[node_idx + 1];
|
||||
const ggml_tensor * set_rows = cgraph->nodes[node_idx + 2];
|
||||
|
||||
if (ggml_cuda_should_fuse_rope_set_rows(rope, view, set_rows)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if (!ggml_can_fuse(cgraph, node_idx, ops)) {
|
||||
return false;
|
||||
}
|
||||
|
|
@ -3196,6 +3242,15 @@ static void evaluate_and_capture_cuda_graph(ggml_backend_cuda_context * cuda_ctx
|
|||
continue;
|
||||
}
|
||||
|
||||
if (ggml_cuda_can_fuse(cgraph, i, { GGML_OP_ROPE, GGML_OP_VIEW, GGML_OP_SET_ROWS }, {})) {
|
||||
ggml_tensor * rope = cgraph->nodes[i];
|
||||
ggml_tensor * set_rows = cgraph->nodes[i + 2];
|
||||
|
||||
ggml_cuda_op_rope_fused(*cuda_ctx, rope, set_rows);
|
||||
i += 2;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (node->op == GGML_OP_ADD) {
|
||||
int n_fuse = 0;
|
||||
ggml_op ops[8];
|
||||
|
|
@ -3780,6 +3835,8 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g
|
|||
case GGML_UNARY_OP_GELU_QUICK:
|
||||
case GGML_UNARY_OP_TANH:
|
||||
case GGML_UNARY_OP_EXP:
|
||||
case GGML_UNARY_OP_EXPM1:
|
||||
case GGML_UNARY_OP_SOFTPLUS:
|
||||
case GGML_UNARY_OP_ELU:
|
||||
case GGML_UNARY_OP_FLOOR:
|
||||
case GGML_UNARY_OP_CEIL:
|
||||
|
|
|
|||
|
|
@ -1,3 +1,6 @@
|
|||
#include "convert.cuh"
|
||||
#include "ggml-cuda/common.cuh"
|
||||
#include "ggml.h"
|
||||
#include "rope.cuh"
|
||||
|
||||
struct rope_corr_dims {
|
||||
|
|
@ -37,11 +40,23 @@ static __device__ void rope_yarn(
|
|||
}
|
||||
}
|
||||
|
||||
template<bool forward, bool has_ff, typename T>
|
||||
static __global__ void rope_norm(
|
||||
const T * x, T * dst, const int ne0, const int ne1, const int s1, const int s2, const int n_dims,
|
||||
const int32_t * pos, const float freq_scale, const float ext_factor, const float attn_factor,
|
||||
const rope_corr_dims corr_dims, const float theta_scale, const float * freq_factors) {
|
||||
template <bool forward, bool has_ff, typename T, typename D>
|
||||
static __global__ void rope_norm(const T * x,
|
||||
D * dst,
|
||||
const int ne0,
|
||||
const int ne1,
|
||||
const int s1,
|
||||
const int s2,
|
||||
const int n_dims,
|
||||
const int32_t * pos,
|
||||
const float freq_scale,
|
||||
const float ext_factor,
|
||||
const float attn_factor,
|
||||
const rope_corr_dims corr_dims,
|
||||
const float theta_scale,
|
||||
const float * freq_factors,
|
||||
const int64_t * row_indices,
|
||||
const int set_rows_stride) {
|
||||
const int i0 = 2*(blockDim.y*blockIdx.y + threadIdx.y);
|
||||
|
||||
if (i0 >= ne0) {
|
||||
|
|
@ -53,13 +68,27 @@ static __global__ void rope_norm(
|
|||
const int row_x = row_dst % ne1;
|
||||
const int channel_x = row_dst / ne1;
|
||||
|
||||
const int idst = row_dst*ne0 + i0;
|
||||
int idst = row_dst * ne0 + i0;
|
||||
const int ix = channel_x*s2 + row_x*s1 + i0;
|
||||
|
||||
if (i0 >= n_dims) {
|
||||
dst[idst + 0] = x[ix + 0];
|
||||
dst[idst + 1] = x[ix + 1];
|
||||
// Fusion optimization: ROPE + VIEW + SET_ROWS.
|
||||
// The rope output is viewed as a 1D tensor and offset based on a row index in row_indices.
|
||||
if (set_rows_stride != 0) {
|
||||
idst = row_x * ne0 + i0;
|
||||
idst += row_indices[channel_x] * set_rows_stride;
|
||||
}
|
||||
|
||||
const auto & store_coaelsced = [&](float x0, float x1) {
|
||||
if constexpr (std::is_same_v<float, D>) {
|
||||
float2 v = make_float2(x0, x1);
|
||||
ggml_cuda_memcpy_1<8>(dst + idst, &v);
|
||||
} else if constexpr (std::is_same_v<half, D>) {
|
||||
half2 v = make_half2(x0, x1);
|
||||
ggml_cuda_memcpy_1<4>(dst + idst, &v);
|
||||
}
|
||||
};
|
||||
if (i0 >= n_dims) {
|
||||
store_coaelsced(x[ix + 0], x[ix + 1]);
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -75,15 +104,26 @@ static __global__ void rope_norm(
|
|||
const float x0 = x[ix + 0];
|
||||
const float x1 = x[ix + 1];
|
||||
|
||||
dst[idst + 0] = x0*cos_theta - x1*sin_theta;
|
||||
dst[idst + 1] = x0*sin_theta + x1*cos_theta;
|
||||
store_coaelsced(x0 * cos_theta - x1 * sin_theta, x0 * sin_theta + x1 * cos_theta);
|
||||
}
|
||||
|
||||
template<bool forward, bool has_ff, typename T>
|
||||
static __global__ void rope_neox(
|
||||
const T * x, T * dst, const int ne0, const int ne1, const int s1, const int s2, const int n_dims,
|
||||
const int32_t * pos, const float freq_scale, const float ext_factor, const float attn_factor,
|
||||
const rope_corr_dims corr_dims, const float theta_scale, const float * freq_factors) {
|
||||
template <bool forward, bool has_ff, typename T, typename D>
|
||||
static __global__ void rope_neox(const T * x,
|
||||
D * dst,
|
||||
const int ne0,
|
||||
const int ne1,
|
||||
const int s1,
|
||||
const int s2,
|
||||
const int n_dims,
|
||||
const int32_t * pos,
|
||||
const float freq_scale,
|
||||
const float ext_factor,
|
||||
const float attn_factor,
|
||||
const rope_corr_dims corr_dims,
|
||||
const float theta_scale,
|
||||
const float * freq_factors,
|
||||
const int64_t * row_indices,
|
||||
const int set_rows_stride) {
|
||||
const int i0 = 2*(blockDim.y*blockIdx.y + threadIdx.y);
|
||||
|
||||
if (i0 >= ne0) {
|
||||
|
|
@ -95,12 +135,19 @@ static __global__ void rope_neox(
|
|||
const int row_x = row_dst % ne1;
|
||||
const int channel_x = row_dst / ne1;
|
||||
|
||||
const int idst = row_dst*ne0 + i0/2;
|
||||
int idst = row_dst * ne0 + i0 / 2;
|
||||
const int ix = channel_x*s2 + row_x*s1 + i0/2;
|
||||
|
||||
// Fusion optimization: ROPE + VIEW + SET_ROWS.
|
||||
// The rope output is viewed as a 1D tensor and offset based on a row index in row_indices.
|
||||
if (set_rows_stride != 0) {
|
||||
idst = row_x * ne0 + i0 / 2;
|
||||
idst += row_indices[channel_x] * set_rows_stride;
|
||||
}
|
||||
|
||||
if (i0 >= n_dims) {
|
||||
dst[idst + i0/2 + 0] = x[ix + i0/2 + 0];
|
||||
dst[idst + i0/2 + 1] = x[ix + i0/2 + 1];
|
||||
dst[idst + i0 / 2 + 0] = ggml_cuda_cast<D>(x[ix + i0 / 2 + 0]);
|
||||
dst[idst + i0 / 2 + 1] = ggml_cuda_cast<D>(x[ix + i0 / 2 + 1]);
|
||||
|
||||
return;
|
||||
}
|
||||
|
|
@ -117,8 +164,8 @@ static __global__ void rope_neox(
|
|||
const float x0 = x[ix + 0];
|
||||
const float x1 = x[ix + n_dims/2];
|
||||
|
||||
dst[idst + 0] = x0*cos_theta - x1*sin_theta;
|
||||
dst[idst + n_dims/2] = x0*sin_theta + x1*cos_theta;
|
||||
dst[idst + 0] = ggml_cuda_cast<D>(x0 * cos_theta - x1 * sin_theta);
|
||||
dst[idst + n_dims / 2] = ggml_cuda_cast<D>(x0 * sin_theta + x1 * cos_theta);
|
||||
}
|
||||
|
||||
template<bool forward, bool has_ff, typename T>
|
||||
|
|
@ -238,11 +285,25 @@ static __global__ void rope_vision(
|
|||
dst[idst + n_dims] = x0*sin_theta + x1*cos_theta;
|
||||
}
|
||||
|
||||
template<bool forward, typename T>
|
||||
static void rope_norm_cuda(
|
||||
const T * x, T * dst, const int ne0, const int ne1, const int s1, const int s2, const int n_dims, const int nr,
|
||||
const int32_t * pos, const float freq_scale, const float freq_base, const float ext_factor, const float attn_factor,
|
||||
const rope_corr_dims corr_dims, const float * freq_factors, cudaStream_t stream) {
|
||||
template <bool forward, typename T, typename D>
|
||||
static void rope_norm_cuda(const T * x,
|
||||
D * dst,
|
||||
const int ne0,
|
||||
const int ne1,
|
||||
const int s1,
|
||||
const int s2,
|
||||
const int n_dims,
|
||||
const int nr,
|
||||
const int32_t * pos,
|
||||
const float freq_scale,
|
||||
const float freq_base,
|
||||
const float ext_factor,
|
||||
const float attn_factor,
|
||||
const rope_corr_dims corr_dims,
|
||||
const float * freq_factors,
|
||||
const int64_t * row_indices,
|
||||
const int set_rows_stride,
|
||||
cudaStream_t stream) {
|
||||
GGML_ASSERT(ne0 % 2 == 0);
|
||||
const dim3 block_dims(1, CUDA_ROPE_BLOCK_SIZE, 1);
|
||||
const int n_blocks_x = (ne0 + 2*CUDA_ROPE_BLOCK_SIZE - 1) / (2*CUDA_ROPE_BLOCK_SIZE);
|
||||
|
|
@ -252,20 +313,34 @@ static void rope_norm_cuda(
|
|||
|
||||
if (freq_factors == nullptr) {
|
||||
rope_norm<forward, false><<<block_nums, block_dims, 0, stream>>>(
|
||||
x, dst, ne0, ne1, s1, s2, n_dims, pos, freq_scale, ext_factor,
|
||||
attn_factor, corr_dims, theta_scale, freq_factors);
|
||||
x, dst, ne0, ne1, s1, s2, n_dims, pos, freq_scale, ext_factor, attn_factor, corr_dims, theta_scale,
|
||||
freq_factors, row_indices, set_rows_stride);
|
||||
} else {
|
||||
rope_norm<forward, true><<<block_nums, block_dims, 0, stream>>>(
|
||||
x, dst, ne0, ne1, s1, s2, n_dims, pos, freq_scale, ext_factor,
|
||||
attn_factor, corr_dims, theta_scale, freq_factors);
|
||||
x, dst, ne0, ne1, s1, s2, n_dims, pos, freq_scale, ext_factor, attn_factor, corr_dims, theta_scale,
|
||||
freq_factors, row_indices, set_rows_stride);
|
||||
}
|
||||
}
|
||||
|
||||
template<bool forward, typename T>
|
||||
static void rope_neox_cuda(
|
||||
const T * x, T * dst, const int ne0, const int ne1, const int s1, const int s2, const int n_dims, const int nr,
|
||||
const int32_t * pos, const float freq_scale, const float freq_base, const float ext_factor, const float attn_factor,
|
||||
const rope_corr_dims corr_dims, const float * freq_factors, cudaStream_t stream) {
|
||||
template <bool forward, typename T, typename D>
|
||||
static void rope_neox_cuda(const T * x,
|
||||
D * dst,
|
||||
const int ne0,
|
||||
const int ne1,
|
||||
const int s1,
|
||||
const int s2,
|
||||
const int n_dims,
|
||||
const int nr,
|
||||
const int32_t * pos,
|
||||
const float freq_scale,
|
||||
const float freq_base,
|
||||
const float ext_factor,
|
||||
const float attn_factor,
|
||||
const rope_corr_dims corr_dims,
|
||||
const float * freq_factors,
|
||||
const int64_t * row_indices,
|
||||
const int set_rows_stride,
|
||||
cudaStream_t stream) {
|
||||
GGML_ASSERT(ne0 % 2 == 0);
|
||||
const dim3 block_dims(1, CUDA_ROPE_BLOCK_SIZE, 1);
|
||||
const int n_blocks_x = (ne0 + 2*CUDA_ROPE_BLOCK_SIZE - 1) / (2*CUDA_ROPE_BLOCK_SIZE);
|
||||
|
|
@ -274,13 +349,13 @@ static void rope_neox_cuda(
|
|||
const float theta_scale = powf(freq_base, -2.0f/n_dims);
|
||||
|
||||
if (freq_factors == nullptr) {
|
||||
rope_neox<forward, false, T><<<block_nums, block_dims, 0, stream>>>(
|
||||
x, dst, ne0, ne1, s1, s2, n_dims, pos, freq_scale, ext_factor,
|
||||
attn_factor, corr_dims, theta_scale, freq_factors);
|
||||
rope_neox<forward, false><<<block_nums, block_dims, 0, stream>>>(
|
||||
x, dst, ne0, ne1, s1, s2, n_dims, pos, freq_scale, ext_factor, attn_factor, corr_dims, theta_scale,
|
||||
freq_factors, row_indices, set_rows_stride);
|
||||
} else {
|
||||
rope_neox<forward, true, T><<<block_nums, block_dims, 0, stream>>>(
|
||||
x, dst, ne0, ne1, s1, s2, n_dims, pos, freq_scale, ext_factor,
|
||||
attn_factor, corr_dims, theta_scale, freq_factors);
|
||||
rope_neox<forward, true><<<block_nums, block_dims, 0, stream>>>(
|
||||
x, dst, ne0, ne1, s1, s2, n_dims, pos, freq_scale, ext_factor, attn_factor, corr_dims, theta_scale,
|
||||
freq_factors, row_indices, set_rows_stride);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -333,7 +408,9 @@ static void rope_vision_cuda(
|
|||
}
|
||||
|
||||
template <bool forward>
|
||||
void ggml_cuda_op_rope_impl(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
|
||||
void ggml_cuda_op_rope_impl(ggml_backend_cuda_context & ctx,
|
||||
ggml_tensor * dst,
|
||||
const ggml_tensor * set_rows = nullptr) {
|
||||
const ggml_tensor * src0 = dst->src[0];
|
||||
const ggml_tensor * src1 = dst->src[1];
|
||||
const ggml_tensor * src2 = dst->src[2];
|
||||
|
|
@ -341,12 +418,25 @@ void ggml_cuda_op_rope_impl(ggml_backend_cuda_context & ctx, ggml_tensor * dst)
|
|||
const float * src0_d = (const float *)src0->data;
|
||||
const float * src1_d = (const float *)src1->data;
|
||||
|
||||
float * dst_d = (float *)dst->data;
|
||||
void * dst_d = dst->data;
|
||||
const int64_t * row_indices = nullptr;
|
||||
ggml_type dst_type = dst->type;
|
||||
int set_rows_stride = 0;
|
||||
|
||||
if (set_rows != nullptr) {
|
||||
GGML_ASSERT(forward);
|
||||
dst_d = set_rows->data;
|
||||
row_indices = (const int64_t *) set_rows->src[1]->data;
|
||||
dst_type = set_rows->type;
|
||||
set_rows_stride = set_rows->nb[1] / ggml_type_size(set_rows->type);
|
||||
}
|
||||
cudaStream_t stream = ctx.stream();
|
||||
|
||||
GGML_ASSERT(src0->type == GGML_TYPE_F32 || src0->type == GGML_TYPE_F16);
|
||||
GGML_ASSERT( dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16);
|
||||
GGML_ASSERT(src0->type == dst->type);
|
||||
// When not fused, src0 and dst types must match
|
||||
// When fused (ROPE+VIEW+SET_ROWS), src0 may be F32 and dst may be F16
|
||||
GGML_ASSERT(src0->type == dst->type || (src0->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F16));
|
||||
|
||||
const int64_t ne00 = src0->ne[0]; // head dims
|
||||
const int64_t ne01 = src0->ne[1]; // num heads
|
||||
|
|
@ -404,14 +494,18 @@ void ggml_cuda_op_rope_impl(ggml_backend_cuda_context & ctx, ggml_tensor * dst)
|
|||
|
||||
// compute
|
||||
if (is_neox) {
|
||||
if (src0->type == GGML_TYPE_F32) {
|
||||
rope_neox_cuda<forward>(
|
||||
(const float *) src0_d, (float *) dst_d, ne00, ne01, s01, s02, n_dims, nr, pos, freq_scale,
|
||||
freq_base, ext_factor, attn_factor, corr_dims, freq_factors, stream);
|
||||
} else if (src0->type == GGML_TYPE_F16) {
|
||||
rope_neox_cuda<forward>(
|
||||
(const half *) src0_d, (half *) dst_d, ne00, ne01, s01, s02, n_dims, nr, pos, freq_scale,
|
||||
freq_base, ext_factor, attn_factor, corr_dims, freq_factors, stream);
|
||||
if (src0->type == GGML_TYPE_F32 && dst_type == GGML_TYPE_F32) {
|
||||
rope_neox_cuda<forward, float, float>((const float *) src0_d, (float *) dst_d, ne00, ne01, s01, s02, n_dims,
|
||||
nr, pos, freq_scale, freq_base, ext_factor, attn_factor, corr_dims,
|
||||
freq_factors, row_indices, set_rows_stride, stream);
|
||||
} else if (src0->type == GGML_TYPE_F32 && dst_type == GGML_TYPE_F16) {
|
||||
rope_neox_cuda<forward, float, half>((const float *) src0_d, (half *) dst_d, ne00, ne01, s01, s02, n_dims,
|
||||
nr, pos, freq_scale, freq_base, ext_factor, attn_factor, corr_dims,
|
||||
freq_factors, row_indices, set_rows_stride, stream);
|
||||
} else if (src0->type == GGML_TYPE_F16 && dst_type == GGML_TYPE_F16) {
|
||||
rope_neox_cuda<forward, half, half>((const half *) src0_d, (half *) dst_d, ne00, ne01, s01, s02, n_dims, nr,
|
||||
pos, freq_scale, freq_base, ext_factor, attn_factor, corr_dims,
|
||||
freq_factors, row_indices, set_rows_stride, stream);
|
||||
} else {
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
|
|
@ -440,14 +534,18 @@ void ggml_cuda_op_rope_impl(ggml_backend_cuda_context & ctx, ggml_tensor * dst)
|
|||
GGML_ABORT("fatal error");
|
||||
}
|
||||
} else {
|
||||
if (src0->type == GGML_TYPE_F32) {
|
||||
rope_norm_cuda<forward>(
|
||||
(const float *) src0_d, (float *) dst_d, ne00, ne01, s01, s02, n_dims, nr, pos, freq_scale,
|
||||
freq_base, ext_factor, attn_factor, corr_dims, freq_factors, stream);
|
||||
} else if (src0->type == GGML_TYPE_F16) {
|
||||
rope_norm_cuda<forward>(
|
||||
(const half *) src0_d, (half *) dst_d, ne00, ne01, s01, s02, n_dims, nr, pos, freq_scale,
|
||||
freq_base, ext_factor, attn_factor, corr_dims, freq_factors, stream);
|
||||
if (src0->type == GGML_TYPE_F32 && dst_type == GGML_TYPE_F32) {
|
||||
rope_norm_cuda<forward, float, float>((const float *) src0_d, (float *) dst_d, ne00, ne01, s01, s02, n_dims,
|
||||
nr, pos, freq_scale, freq_base, ext_factor, attn_factor, corr_dims,
|
||||
freq_factors, row_indices, set_rows_stride, stream);
|
||||
} else if (src0->type == GGML_TYPE_F32 && dst_type == GGML_TYPE_F16) {
|
||||
rope_norm_cuda<forward, float, half>((const float *) src0_d, (half *) dst_d, ne00, ne01, s01, s02, n_dims,
|
||||
nr, pos, freq_scale, freq_base, ext_factor, attn_factor, corr_dims,
|
||||
freq_factors, row_indices, set_rows_stride, stream);
|
||||
} else if (src0->type == GGML_TYPE_F16 && dst_type == GGML_TYPE_F16) {
|
||||
rope_norm_cuda<forward, half, half>((const half *) src0_d, (half *) dst_d, ne00, ne01, s01, s02, n_dims, nr,
|
||||
pos, freq_scale, freq_base, ext_factor, attn_factor, corr_dims,
|
||||
freq_factors, row_indices, set_rows_stride, stream);
|
||||
} else {
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
|
|
@ -461,3 +559,7 @@ void ggml_cuda_op_rope(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
|
|||
void ggml_cuda_op_rope_back(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
|
||||
ggml_cuda_op_rope_impl<false>(ctx, dst);
|
||||
}
|
||||
|
||||
void ggml_cuda_op_rope_fused(ggml_backend_cuda_context & ctx, ggml_tensor * rope, ggml_tensor * set_rows) {
|
||||
ggml_cuda_op_rope_impl<true>(ctx, rope, set_rows);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5,3 +5,5 @@
|
|||
void ggml_cuda_op_rope(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
||||
|
||||
void ggml_cuda_op_rope_back(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
||||
|
||||
void ggml_cuda_op_rope_fused(ggml_backend_cuda_context & ctx, ggml_tensor * dst, ggml_tensor * set_rows);
|
||||
|
|
|
|||
|
|
@ -81,6 +81,14 @@ static __device__ __forceinline__ float op_log(float x) {
|
|||
return logf(x);
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ float op_expm1(float x) {
|
||||
return expm1f(x);
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ float op_softplus(float x) {
|
||||
return (x > 20.0f) ? x : logf(1.0f + expf(x));
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ float op_elu(float x) {
|
||||
return (x > 0.f) ? x : expm1f(x);
|
||||
}
|
||||
|
|
@ -233,6 +241,14 @@ void ggml_cuda_op_round(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
|
|||
void ggml_cuda_op_trunc(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
|
||||
ggml_cuda_op_unary<op_trunc>(ctx, dst);
|
||||
}
|
||||
|
||||
void ggml_cuda_op_expm1(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
|
||||
ggml_cuda_op_unary<op_expm1>(ctx, dst);
|
||||
}
|
||||
|
||||
void ggml_cuda_op_softplus(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
|
||||
ggml_cuda_op_unary<op_softplus>(ctx, dst);
|
||||
}
|
||||
/* gated ops */
|
||||
|
||||
template <float (*op)(float), typename T>
|
||||
|
|
|
|||
|
|
@ -61,6 +61,10 @@ void ggml_cuda_op_cos(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
|||
|
||||
void ggml_cuda_op_log(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
||||
|
||||
void ggml_cuda_op_expm1(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
||||
|
||||
void ggml_cuda_op_softplus(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
||||
|
||||
void ggml_cuda_op_elu(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
||||
|
||||
void ggml_cuda_op_floor(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
||||
|
|
|
|||
|
|
@ -102,7 +102,7 @@ static bool ggml_op_is_empty(enum ggml_op op) {
|
|||
}
|
||||
}
|
||||
|
||||
static inline float ggml_softplus(float input) {
|
||||
static inline float ggml_compute_softplus_f32(float input) {
|
||||
return (input > 20.0f) ? input : logf(1 + expf(input));
|
||||
}
|
||||
//
|
||||
|
|
|
|||
|
|
@ -1438,6 +1438,30 @@ ggml_metal_pipeline_t ggml_metal_library_get_pipeline_conv_transpose_2d(ggml_met
|
|||
return res;
|
||||
}
|
||||
|
||||
ggml_metal_pipeline_t ggml_metal_library_get_pipeline_conv_2d(ggml_metal_library_t lib, const ggml_tensor * op) {
|
||||
assert(op->op == GGML_OP_CONV_2D);
|
||||
|
||||
GGML_ASSERT(ggml_is_contiguous(op->src[0]));
|
||||
GGML_ASSERT(op->src[0]->type == GGML_TYPE_F16 || op->src[0]->type == GGML_TYPE_F32);
|
||||
GGML_ASSERT(op->src[1]->type == GGML_TYPE_F32);
|
||||
GGML_ASSERT(op->type == GGML_TYPE_F32);
|
||||
|
||||
char base[256];
|
||||
char name[256];
|
||||
|
||||
snprintf(base, 256, "kernel_conv_2d_%s_%s", ggml_type_name(op->src[0]->type), ggml_type_name(op->src[1]->type));
|
||||
snprintf(name, 256, "%s", base);
|
||||
|
||||
ggml_metal_pipeline_t res = ggml_metal_library_get_pipeline(lib, name);
|
||||
if (res) {
|
||||
return res;
|
||||
}
|
||||
|
||||
res = ggml_metal_library_compile_pipeline(lib, base, name, nullptr);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
ggml_metal_pipeline_t ggml_metal_library_get_pipeline_upscale(ggml_metal_library_t lib, const ggml_tensor * op) {
|
||||
assert(op->op == GGML_OP_UPSCALE);
|
||||
|
||||
|
|
|
|||
|
|
@ -133,6 +133,7 @@ ggml_metal_pipeline_t ggml_metal_library_get_pipeline_rope (ggml_me
|
|||
ggml_metal_pipeline_t ggml_metal_library_get_pipeline_im2col (ggml_metal_library_t lib, const struct ggml_tensor * op);
|
||||
ggml_metal_pipeline_t ggml_metal_library_get_pipeline_conv_transpose_1d (ggml_metal_library_t lib, const struct ggml_tensor * op);
|
||||
ggml_metal_pipeline_t ggml_metal_library_get_pipeline_conv_transpose_2d (ggml_metal_library_t lib, const struct ggml_tensor * op);
|
||||
ggml_metal_pipeline_t ggml_metal_library_get_pipeline_conv_2d (ggml_metal_library_t lib, const struct ggml_tensor * op);
|
||||
ggml_metal_pipeline_t ggml_metal_library_get_pipeline_upscale (ggml_metal_library_t lib, const struct ggml_tensor * op);
|
||||
ggml_metal_pipeline_t ggml_metal_library_get_pipeline_pad (ggml_metal_library_t lib, const struct ggml_tensor * op);
|
||||
ggml_metal_pipeline_t ggml_metal_library_get_pipeline_pad_reflect_1d (ggml_metal_library_t lib, const struct ggml_tensor * op);
|
||||
|
|
|
|||
|
|
@ -885,6 +885,11 @@ bool ggml_metal_device_supports_op(ggml_metal_device_t dev, const struct ggml_te
|
|||
return true;
|
||||
case GGML_OP_IM2COL:
|
||||
return ggml_is_contiguous(op->src[1]) && op->src[1]->type == GGML_TYPE_F32 && (op->type == GGML_TYPE_F16 || op->type == GGML_TYPE_F32);
|
||||
case GGML_OP_CONV_2D:
|
||||
return ggml_is_contiguous(op->src[0]) &&
|
||||
op->src[1]->type == GGML_TYPE_F32 &&
|
||||
op->type == GGML_TYPE_F32 &&
|
||||
(op->src[0]->type == GGML_TYPE_F16 || op->src[0]->type == GGML_TYPE_F32);
|
||||
case GGML_OP_POOL_1D:
|
||||
return false;
|
||||
case GGML_OP_UPSCALE:
|
||||
|
|
|
|||
|
|
@ -528,6 +528,36 @@ typedef struct {
|
|||
uint64_t nb2;
|
||||
} ggml_metal_kargs_conv_transpose_2d;
|
||||
|
||||
typedef struct {
|
||||
uint64_t nb00;
|
||||
uint64_t nb01;
|
||||
uint64_t nb02;
|
||||
uint64_t nb03;
|
||||
uint64_t nb10;
|
||||
uint64_t nb11;
|
||||
uint64_t nb12;
|
||||
uint64_t nb13;
|
||||
uint64_t nb0;
|
||||
uint64_t nb1;
|
||||
uint64_t nb2;
|
||||
uint64_t nb3;
|
||||
int32_t IW;
|
||||
int32_t IH;
|
||||
int32_t KW;
|
||||
int32_t KH;
|
||||
int32_t IC;
|
||||
int32_t OC;
|
||||
int32_t OW;
|
||||
int32_t OH;
|
||||
int32_t N;
|
||||
int32_t s0;
|
||||
int32_t s1;
|
||||
int32_t p0;
|
||||
int32_t p1;
|
||||
int32_t d0;
|
||||
int32_t d1;
|
||||
} ggml_metal_kargs_conv_2d;
|
||||
|
||||
typedef struct {
|
||||
uint64_t ofs0;
|
||||
uint64_t ofs1;
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@
|
|||
|
||||
#include <cassert>
|
||||
#include <algorithm>
|
||||
#include <limits>
|
||||
|
||||
static ggml_metal_buffer_id ggml_metal_get_buffer_id(const ggml_tensor * t) {
|
||||
if (!t) {
|
||||
|
|
@ -364,6 +365,10 @@ static int ggml_metal_op_encode_impl(ggml_metal_op_t ctx, int idx) {
|
|||
{
|
||||
n_fuse = ggml_metal_op_im2col(ctx, idx);
|
||||
} break;
|
||||
case GGML_OP_CONV_2D:
|
||||
{
|
||||
n_fuse = ggml_metal_op_conv_2d(ctx, idx);
|
||||
} break;
|
||||
case GGML_OP_CONV_TRANSPOSE_1D:
|
||||
{
|
||||
n_fuse = ggml_metal_op_conv_transpose_1d(ctx, idx);
|
||||
|
|
@ -1036,11 +1041,6 @@ int ggml_metal_op_set_rows(ggml_metal_op_t ctx, int idx) {
|
|||
|
||||
nth = std::min(nth, nk0);
|
||||
|
||||
if (nth*nrptg > ggml_metal_pipeline_max_theads_per_threadgroup(pipeline)) {
|
||||
nth = ggml_metal_pipeline_max_theads_per_threadgroup(pipeline);
|
||||
nrptg = 1;
|
||||
}
|
||||
|
||||
ggml_metal_kargs_set_rows args = {
|
||||
/*.nk0 =*/ nk0,
|
||||
/*.ne01 =*/ ne01,
|
||||
|
|
@ -3082,6 +3082,84 @@ int ggml_metal_op_im2col(ggml_metal_op_t ctx, int idx) {
|
|||
return 1;
|
||||
}
|
||||
|
||||
int ggml_metal_op_conv_2d(ggml_metal_op_t ctx, int idx) {
|
||||
ggml_tensor * op = ctx->node(idx);
|
||||
|
||||
ggml_metal_library_t lib = ctx->lib;
|
||||
ggml_metal_encoder_t enc = ctx->enc;
|
||||
|
||||
GGML_TENSOR_LOCALS( int32_t, ne0, op->src[0], ne);
|
||||
GGML_TENSOR_LOCALS(uint64_t, nb0, op->src[0], nb);
|
||||
GGML_TENSOR_LOCALS( int32_t, ne1, op->src[1], ne);
|
||||
GGML_TENSOR_LOCALS(uint64_t, nb1, op->src[1], nb);
|
||||
GGML_TENSOR_LOCALS( int32_t, ne, op, ne);
|
||||
GGML_TENSOR_LOCALS(uint64_t, nb, op, nb);
|
||||
|
||||
GGML_ASSERT(ggml_is_contiguous(op->src[0]));
|
||||
GGML_ASSERT(op->src[1]->type == GGML_TYPE_F32);
|
||||
GGML_ASSERT(op->type == GGML_TYPE_F32);
|
||||
GGML_ASSERT(op->src[0]->type == GGML_TYPE_F16 || op->src[0]->type == GGML_TYPE_F32);
|
||||
|
||||
const int32_t s0 = ((const int32_t *) op->op_params)[0];
|
||||
const int32_t s1 = ((const int32_t *) op->op_params)[1];
|
||||
const int32_t p0 = ((const int32_t *) op->op_params)[2];
|
||||
const int32_t p1 = ((const int32_t *) op->op_params)[3];
|
||||
const int32_t d0 = ((const int32_t *) op->op_params)[4];
|
||||
const int32_t d1 = ((const int32_t *) op->op_params)[5];
|
||||
|
||||
ggml_metal_kargs_conv_2d args = {
|
||||
/*.nb00 =*/ nb00,
|
||||
/*.nb01 =*/ nb01,
|
||||
/*.nb02 =*/ nb02,
|
||||
/*.nb03 =*/ nb03,
|
||||
/*.nb10 =*/ nb10,
|
||||
/*.nb11 =*/ nb11,
|
||||
/*.nb12 =*/ nb12,
|
||||
/*.nb13 =*/ nb13,
|
||||
/*.nb0 =*/ nb0,
|
||||
/*.nb1 =*/ nb1,
|
||||
/*.nb2 =*/ nb2,
|
||||
/*.nb3 =*/ nb3,
|
||||
/*.IW =*/ ne10,
|
||||
/*.IH =*/ ne11,
|
||||
/*.KW =*/ ne00,
|
||||
/*.KH =*/ ne01,
|
||||
/*.IC =*/ ne02,
|
||||
/*.OC =*/ ne03,
|
||||
/*.OW =*/ ne0,
|
||||
/*.OH =*/ ne1,
|
||||
/*.N =*/ ne3,
|
||||
/*.s0 =*/ s0,
|
||||
/*.s1 =*/ s1,
|
||||
/*.p0 =*/ p0,
|
||||
/*.p1 =*/ p1,
|
||||
/*.d0 =*/ d0,
|
||||
/*.d1 =*/ d1,
|
||||
};
|
||||
|
||||
ggml_metal_pipeline_t pipeline = ggml_metal_library_get_pipeline_conv_2d(lib, op);
|
||||
|
||||
int nth = ggml_metal_pipeline_max_theads_per_threadgroup(pipeline);
|
||||
nth = std::min(nth, 256);
|
||||
nth = std::max(nth, 1);
|
||||
|
||||
const uint64_t n_out = ggml_nelements(op);
|
||||
|
||||
uint64_t tg = (n_out + nth - 1)/nth;
|
||||
tg = std::max<uint64_t>(tg, 1);
|
||||
tg = std::min<uint64_t>(tg, (uint64_t) std::numeric_limits<int>::max());
|
||||
|
||||
ggml_metal_encoder_set_pipeline(enc, pipeline);
|
||||
ggml_metal_encoder_set_bytes (enc, &args, sizeof(args), 0);
|
||||
ggml_metal_encoder_set_buffer (enc, ggml_metal_get_buffer_id(op->src[0]), 1);
|
||||
ggml_metal_encoder_set_buffer (enc, ggml_metal_get_buffer_id(op->src[1]), 2);
|
||||
ggml_metal_encoder_set_buffer (enc, ggml_metal_get_buffer_id(op), 3);
|
||||
|
||||
ggml_metal_encoder_dispatch_threadgroups(enc, tg, 1, 1, nth, 1, 1);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
int ggml_metal_op_conv_transpose_1d(ggml_metal_op_t ctx, int idx) {
|
||||
ggml_tensor * op = ctx->node(idx);
|
||||
|
||||
|
|
|
|||
|
|
@ -70,6 +70,7 @@ int ggml_metal_op_group_norm (ggml_metal_op_t ctx, int idx);
|
|||
int ggml_metal_op_norm (ggml_metal_op_t ctx, int idx);
|
||||
int ggml_metal_op_rope (ggml_metal_op_t ctx, int idx);
|
||||
int ggml_metal_op_im2col (ggml_metal_op_t ctx, int idx);
|
||||
int ggml_metal_op_conv_2d (ggml_metal_op_t ctx, int idx);
|
||||
int ggml_metal_op_conv_transpose_1d (ggml_metal_op_t ctx, int idx);
|
||||
int ggml_metal_op_conv_transpose_2d (ggml_metal_op_t ctx, int idx);
|
||||
int ggml_metal_op_upscale (ggml_metal_op_t ctx, int idx);
|
||||
|
|
|
|||
|
|
@ -4146,6 +4146,120 @@ template [[host_name("kernel_im2col_f16")]] kernel im2col_t kernel_im2col<half>;
|
|||
//template [[host_name("kernel_im2col_ext_f32")]] kernel im2col_ext_t kernel_im2col_ext<float>;
|
||||
//template [[host_name("kernel_im2col_ext_f16")]] kernel im2col_ext_t kernel_im2col_ext<half>;
|
||||
|
||||
template <typename TK>
|
||||
kernel void kernel_conv_2d(
|
||||
constant ggml_metal_kargs_conv_2d & args,
|
||||
device const char * weights,
|
||||
device const char * src,
|
||||
device char * dst,
|
||||
uint3 tgpig[[threadgroup_position_in_grid]],
|
||||
uint3 tgpg[[threadgroups_per_grid]],
|
||||
uint3 tpitg[[thread_position_in_threadgroup]],
|
||||
uint3 ntg[[threads_per_threadgroup]]) {
|
||||
|
||||
const uint threads_per_tg = ntg.x * ntg.y * ntg.z;
|
||||
const uint tg_index = (tgpig.z * tgpg.y + tgpig.y) * tgpg.x + tgpig.x;
|
||||
const uint local_thread = tpitg.z * (ntg.x * ntg.y) + tpitg.y * ntg.x + tpitg.x;
|
||||
const uint thread_index = tg_index * threads_per_tg + local_thread;
|
||||
const uint64_t total_threads = (uint64_t) threads_per_tg * tgpg.x * tgpg.y * tgpg.z;
|
||||
const uint64_t total_outputs = (uint64_t) args.N * args.OC * args.OH * args.OW;
|
||||
|
||||
for (uint64_t index = thread_index; index < total_outputs; index += total_threads) {
|
||||
uint64_t tmp = index;
|
||||
|
||||
const int32_t ow = tmp % args.OW; tmp /= args.OW;
|
||||
const int32_t oh = tmp % args.OH; tmp /= args.OH;
|
||||
const int32_t oc = tmp % args.OC; tmp /= args.OC;
|
||||
const int32_t n = tmp;
|
||||
|
||||
float acc = 0.0f;
|
||||
|
||||
const int32_t base_x = ow*args.s0 - args.p0;
|
||||
const int32_t base_y = oh*args.s1 - args.p1;
|
||||
|
||||
int32_t ky_start = 0;
|
||||
if (base_y < 0) {
|
||||
ky_start = (-base_y + args.d1 - 1)/args.d1;
|
||||
}
|
||||
int32_t ky_end = args.KH;
|
||||
const int32_t y_max = args.IH - 1 - base_y;
|
||||
if (y_max < 0) {
|
||||
ky_end = ky_start;
|
||||
} else if (base_y + (args.KH - 1)*args.d1 >= args.IH) {
|
||||
ky_end = min(ky_end, y_max/args.d1 + 1);
|
||||
}
|
||||
|
||||
int32_t kx_start = 0;
|
||||
if (base_x < 0) {
|
||||
kx_start = (-base_x + args.d0 - 1)/args.d0;
|
||||
}
|
||||
int32_t kx_end = args.KW;
|
||||
const int32_t x_max = args.IW - 1 - base_x;
|
||||
if (x_max < 0) {
|
||||
kx_end = kx_start;
|
||||
} else if (base_x + (args.KW - 1)*args.d0 >= args.IW) {
|
||||
kx_end = min(kx_end, x_max/args.d0 + 1);
|
||||
}
|
||||
|
||||
if (ky_start < ky_end && kx_start < kx_end) {
|
||||
const uint64_t src_base_n = (uint64_t) n * args.nb13;
|
||||
const uint64_t w_base_oc = (uint64_t) oc * args.nb03;
|
||||
|
||||
for (int32_t ic = 0; ic < args.IC; ++ic) {
|
||||
const uint64_t src_base_nc = src_base_n + (uint64_t) ic * args.nb12;
|
||||
const uint64_t w_base_ocic = w_base_oc + (uint64_t) ic * args.nb02;
|
||||
|
||||
for (int32_t ky = ky_start; ky < ky_end; ++ky) {
|
||||
const int32_t iy = base_y + ky*args.d1;
|
||||
const uint64_t src_base_row = src_base_nc + (uint64_t) iy * args.nb11;
|
||||
const uint64_t w_base_row = w_base_ocic + (uint64_t) ky * args.nb01;
|
||||
|
||||
for (int32_t kx = kx_start; kx < kx_end; ++kx) {
|
||||
const int32_t ix = base_x + kx*args.d0;
|
||||
const uint64_t src_offs = src_base_row + (uint64_t) ix * args.nb10;
|
||||
const uint64_t w_offs = w_base_row + (uint64_t) kx * args.nb00;
|
||||
|
||||
const float x = *(device const float *)(src + src_offs);
|
||||
const float w = (float) (*(device const TK *)(weights + w_offs));
|
||||
|
||||
acc += x * w;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const uint64_t dst_offs =
|
||||
(uint64_t) n * args.nb3 +
|
||||
(uint64_t) oc * args.nb2 +
|
||||
(uint64_t) oh * args.nb1 +
|
||||
(uint64_t) ow * args.nb0;
|
||||
|
||||
*(device float *)(dst + dst_offs) = acc;
|
||||
}
|
||||
}
|
||||
|
||||
template [[host_name("kernel_conv_2d_f32_f32")]]
|
||||
kernel void kernel_conv_2d<float>(
|
||||
constant ggml_metal_kargs_conv_2d & args,
|
||||
device const char * weights,
|
||||
device const char * src,
|
||||
device char * dst,
|
||||
uint3 tgpig[[threadgroup_position_in_grid]],
|
||||
uint3 tgpg[[threadgroups_per_grid]],
|
||||
uint3 tpitg[[thread_position_in_threadgroup]],
|
||||
uint3 ntg[[threads_per_threadgroup]]);
|
||||
|
||||
template [[host_name("kernel_conv_2d_f16_f32")]]
|
||||
kernel void kernel_conv_2d<half>(
|
||||
constant ggml_metal_kargs_conv_2d & args,
|
||||
device const char * weights,
|
||||
device const char * src,
|
||||
device char * dst,
|
||||
uint3 tgpig[[threadgroup_position_in_grid]],
|
||||
uint3 tgpg[[threadgroups_per_grid]],
|
||||
uint3 tpitg[[thread_position_in_threadgroup]],
|
||||
uint3 ntg[[threads_per_threadgroup]]);
|
||||
|
||||
typedef void (conv_transpose_1d_t)(
|
||||
constant ggml_metal_kargs_conv_transpose_1d & args,
|
||||
device const float * src0,
|
||||
|
|
|
|||
|
|
@ -3933,6 +3933,7 @@ static bool ggml_sycl_compute_forward(ggml_backend_sycl_context & ctx, struct gg
|
|||
break;
|
||||
case GGML_OP_SSM_CONV:
|
||||
ggml_sycl_ssm_conv(ctx, dst);
|
||||
break;
|
||||
case GGML_OP_ROLL:
|
||||
ggml_sycl_roll(ctx, dst);
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -76,7 +76,7 @@ enum MatMulIdType {
|
|||
|
||||
namespace {
|
||||
|
||||
void execute_command(const std::string& command, std::string& stdout_str, std::string& stderr_str) {
|
||||
void execute_command(std::vector<std::string>& command, std::string& stdout_str, std::string& stderr_str) {
|
||||
#ifdef _WIN32
|
||||
HANDLE stdout_read, stdout_write;
|
||||
HANDLE stderr_read, stderr_write;
|
||||
|
|
@ -99,8 +99,10 @@ void execute_command(const std::string& command, std::string& stdout_str, std::s
|
|||
si.hStdOutput = stdout_write;
|
||||
si.hStdError = stderr_write;
|
||||
|
||||
std::vector<char> cmd(command.begin(), command.end());
|
||||
cmd.push_back('\0');
|
||||
std::string cmd;
|
||||
for (const auto& part : command) {
|
||||
cmd += part + " ";
|
||||
}
|
||||
|
||||
if (!CreateProcessA(NULL, cmd.data(), NULL, NULL, TRUE, 0, NULL, NULL, &si, &pi)) {
|
||||
throw std::runtime_error("Failed to create process");
|
||||
|
|
@ -138,6 +140,12 @@ void execute_command(const std::string& command, std::string& stdout_str, std::s
|
|||
throw std::runtime_error("Failed to fork process");
|
||||
}
|
||||
|
||||
std::vector<char*> argv;
|
||||
for (std::string& part : command) {
|
||||
argv.push_back(part.data());
|
||||
}
|
||||
argv.push_back(nullptr);
|
||||
|
||||
if (pid == 0) {
|
||||
close(stdout_pipe[0]);
|
||||
close(stderr_pipe[0]);
|
||||
|
|
@ -145,7 +153,7 @@ void execute_command(const std::string& command, std::string& stdout_str, std::s
|
|||
dup2(stderr_pipe[1], STDERR_FILENO);
|
||||
close(stdout_pipe[1]);
|
||||
close(stderr_pipe[1]);
|
||||
execl("/bin/sh", "sh", "-c", command.c_str(), (char*) nullptr);
|
||||
execvp(argv[0], argv.data());
|
||||
_exit(EXIT_FAILURE);
|
||||
} else {
|
||||
close(stdout_pipe[1]);
|
||||
|
|
@ -316,21 +324,27 @@ compile_count_guard acquire_compile_slot() {
|
|||
void string_to_spv_func(std::string name, std::string in_path, std::string out_path, std::map<std::string, std::string> defines, bool coopmat, bool dep_file, compile_count_guard slot) {
|
||||
std::string target_env = (name.find("_cm2") != std::string::npos) ? "--target-env=vulkan1.3" : "--target-env=vulkan1.2";
|
||||
|
||||
#ifdef _WIN32
|
||||
std::vector<std::string> cmd = {GLSLC, "-fshader-stage=compute", target_env, "\"" + in_path + "\"", "-o", "\"" + out_path + "\""};
|
||||
#else
|
||||
std::vector<std::string> cmd = {GLSLC, "-fshader-stage=compute", target_env, in_path, "-o", out_path};
|
||||
#endif
|
||||
|
||||
// disable spirv-opt for coopmat shaders for https://github.com/ggerganov/llama.cpp/issues/10734
|
||||
// disable spirv-opt for bf16 shaders for https://github.com/ggml-org/llama.cpp/issues/15344
|
||||
// disable spirv-opt for rope shaders for https://github.com/ggml-org/llama.cpp/issues/16860
|
||||
std::string opt_level = (coopmat || name.find("bf16") != std::string::npos || name.find("rope") != std::string::npos) ? "" : "-O";
|
||||
|
||||
#ifdef _WIN32
|
||||
std::vector<std::string> cmd = {GLSLC, "-fshader-stage=compute", target_env, opt_level, "\"" + in_path + "\"", "-o", "\"" + out_path + "\""};
|
||||
#else
|
||||
std::vector<std::string> cmd = {GLSLC, "-fshader-stage=compute", target_env, opt_level, in_path, "-o", out_path};
|
||||
#endif
|
||||
if (!coopmat && name.find("bf16") == std::string::npos && name.find("rope") == std::string::npos) {
|
||||
cmd.push_back("-O");
|
||||
}
|
||||
|
||||
if (dep_file) {
|
||||
cmd.push_back("-MD");
|
||||
cmd.push_back("-MF");
|
||||
#ifdef _WIN32
|
||||
cmd.push_back("\"" + target_cpp + ".d\"");
|
||||
#else
|
||||
cmd.push_back(target_cpp + ".d");
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef GGML_VULKAN_SHADER_DEBUG_INFO
|
||||
|
|
@ -354,9 +368,13 @@ void string_to_spv_func(std::string name, std::string in_path, std::string out_p
|
|||
// }
|
||||
// std::cout << std::endl;
|
||||
|
||||
execute_command(command, stdout_str, stderr_str);
|
||||
execute_command(cmd, stdout_str, stderr_str);
|
||||
if (!stderr_str.empty()) {
|
||||
std::cerr << "cannot compile " << name << "\n\n" << command << "\n\n" << stderr_str << std::endl;
|
||||
std::cerr << "cannot compile " << name << "\n\n";
|
||||
for (const auto& part : cmd) {
|
||||
std::cerr << part << " ";
|
||||
}
|
||||
std::cerr << "\n\n" << stderr_str << std::endl;
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -430,7 +448,7 @@ void matmul_shaders(bool fp16, MatMulIdType matmul_id_type, bool coopmat, bool c
|
|||
base_dict["ACC_TYPE" ] = f16acc ? "float16_t" : "float";
|
||||
base_dict["ACC_TYPE_VEC2"] = f16acc ? "f16vec2" : "vec2";
|
||||
if (f16acc) {
|
||||
base_dict["ACC_TYPE_MAX"] = "\"float16_t(65504.0)\"";
|
||||
base_dict["ACC_TYPE_MAX"] = "float16_t(65504.0)";
|
||||
}
|
||||
|
||||
if (coopmat) {
|
||||
|
|
@ -610,7 +628,7 @@ void process_shaders() {
|
|||
fa_base_dict["ACC_TYPE"] = f16acc ? "float16_t" : "float";
|
||||
fa_base_dict["ACC_TYPEV4"] = f16acc ? "f16vec4" : "vec4";
|
||||
if (f16acc) {
|
||||
fa_base_dict["ACC_TYPE_MAX"] = "\"float16_t(65504.0)\"";
|
||||
fa_base_dict["ACC_TYPE_MAX"] = "float16_t(65504.0)";
|
||||
}
|
||||
|
||||
for (const auto& tname : type_names) {
|
||||
|
|
@ -1081,11 +1099,6 @@ int main(int argc, char** argv) {
|
|||
|
||||
if (args.find("--glslc") != args.end()) {
|
||||
GLSLC = args["--glslc"]; // Path to glslc
|
||||
|
||||
if (!std::filesystem::exists(GLSLC) || !std::filesystem::is_regular_file(GLSLC)) {
|
||||
std::cerr << "Error: glslc not found at " << GLSLC << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
if (args.find("--source") != args.end()) {
|
||||
input_filepath = args["--source"]; // The shader source file to compile
|
||||
|
|
|
|||
159
ggml/src/ggml.c
159
ggml/src/ggml.c
|
|
@ -935,6 +935,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
|
|||
"COS",
|
||||
"SUM",
|
||||
"SUM_ROWS",
|
||||
"CUMSUM",
|
||||
"MEAN",
|
||||
"ARGMAX",
|
||||
"COUNT_EQUAL",
|
||||
|
|
@ -990,6 +991,8 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
|
|||
"TIMESTEP_EMBEDDING",
|
||||
"ARGSORT",
|
||||
"LEAKY_RELU",
|
||||
"TRI",
|
||||
"FILL",
|
||||
|
||||
"FLASH_ATTN_EXT",
|
||||
"FLASH_ATTN_BACK",
|
||||
|
|
@ -1002,6 +1005,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
|
|||
"RWKV_WKV6",
|
||||
"GATED_LINEAR_ATTN",
|
||||
"RWKV_WKV7",
|
||||
"SOLVE_TRI",
|
||||
|
||||
"UNARY",
|
||||
|
||||
|
|
@ -1019,7 +1023,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
|
|||
"GLU",
|
||||
};
|
||||
|
||||
static_assert(GGML_OP_COUNT == 90, "GGML_OP_COUNT != 90");
|
||||
static_assert(GGML_OP_COUNT == 94, "GGML_OP_COUNT != 94");
|
||||
|
||||
static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
||||
"none",
|
||||
|
|
@ -1039,6 +1043,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
|||
"cos(x)",
|
||||
"Σx",
|
||||
"Σx_k",
|
||||
"cumsum(x)",
|
||||
"Σx/n",
|
||||
"argmax(x)",
|
||||
"count_equal(x)",
|
||||
|
|
@ -1094,6 +1099,8 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
|||
"timestep_embedding(timesteps, dim, max_period)",
|
||||
"argsort(x)",
|
||||
"leaky_relu(x)",
|
||||
"tri(x)",
|
||||
"fill(x, c)",
|
||||
|
||||
"flash_attn_ext(x)",
|
||||
"flash_attn_back(x)",
|
||||
|
|
@ -1106,6 +1113,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
|||
"rwkv_wkv6(k, v, r, tf, td, s)",
|
||||
"gated_linear_attn(k, v, q, gate, s)",
|
||||
"rwkv_wkv7(r, w, k, v, a, b, s)",
|
||||
"A X = B, A triangular, solve X",
|
||||
|
||||
"unary(x)",
|
||||
|
||||
|
|
@ -1123,7 +1131,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
|||
"glu(x)",
|
||||
};
|
||||
|
||||
static_assert(GGML_OP_COUNT == 90, "GGML_OP_COUNT != 90");
|
||||
static_assert(GGML_OP_COUNT == 94, "GGML_OP_COUNT != 94");
|
||||
|
||||
static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
|
||||
|
||||
|
|
@ -1142,6 +1150,8 @@ static const char * GGML_UNARY_OP_NAME[GGML_UNARY_OP_COUNT] = {
|
|||
"HARDSWISH",
|
||||
"HARDSIGMOID",
|
||||
"EXP",
|
||||
"EXPM1",
|
||||
"SOFTPLUS",
|
||||
"GELU_ERF",
|
||||
"XIELU",
|
||||
"FLOOR",
|
||||
|
|
@ -1150,7 +1160,7 @@ static const char * GGML_UNARY_OP_NAME[GGML_UNARY_OP_COUNT] = {
|
|||
"TRUNC",
|
||||
};
|
||||
|
||||
static_assert(GGML_UNARY_OP_COUNT == 20, "GGML_UNARY_OP_COUNT != 20");
|
||||
static_assert(GGML_UNARY_OP_COUNT == 22, "GGML_UNARY_OP_COUNT != 22");
|
||||
|
||||
static const char * GGML_GLU_OP_NAME[GGML_GLU_OP_COUNT] = {
|
||||
"REGLU",
|
||||
|
|
@ -2258,6 +2268,30 @@ struct ggml_tensor * ggml_log_inplace(
|
|||
return ggml_log_impl(ctx, a, true);
|
||||
}
|
||||
|
||||
struct ggml_tensor * ggml_expm1(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a) {
|
||||
return ggml_unary(ctx, a, GGML_UNARY_OP_EXPM1);
|
||||
}
|
||||
|
||||
struct ggml_tensor * ggml_expm1_inplace(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a) {
|
||||
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_EXPM1);
|
||||
}
|
||||
|
||||
struct ggml_tensor * ggml_softplus(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a) {
|
||||
return ggml_unary(ctx, a, GGML_UNARY_OP_SOFTPLUS);
|
||||
}
|
||||
|
||||
struct ggml_tensor * ggml_softplus_inplace(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a) {
|
||||
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SOFTPLUS);
|
||||
}
|
||||
|
||||
// ggml_sin
|
||||
|
||||
static struct ggml_tensor * ggml_sin_impl(
|
||||
|
|
@ -2341,6 +2375,21 @@ struct ggml_tensor * ggml_sum_rows(
|
|||
return result;
|
||||
}
|
||||
|
||||
// ggml_cumsum
|
||||
|
||||
struct ggml_tensor * ggml_cumsum(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a) {
|
||||
GGML_ASSERT(a->type == GGML_TYPE_F32);
|
||||
|
||||
struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
|
||||
|
||||
result->op = GGML_OP_CUMSUM;
|
||||
result->src[0] = a;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// ggml_mean
|
||||
|
||||
struct ggml_tensor * ggml_mean(
|
||||
|
|
@ -2668,8 +2717,8 @@ struct ggml_tensor * ggml_xielu(
|
|||
struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
|
||||
|
||||
ggml_set_op_params_i32(result, 0, (int32_t) GGML_UNARY_OP_XIELU);
|
||||
ggml_set_op_params_f32(result, 1, beta + ggml_softplus(alpha_n));
|
||||
ggml_set_op_params_f32(result, 2, ggml_softplus(alpha_p));
|
||||
ggml_set_op_params_f32(result, 1, beta + ggml_compute_softplus_f32(alpha_n));
|
||||
ggml_set_op_params_f32(result, 2, ggml_compute_softplus_f32(alpha_p));
|
||||
ggml_set_op_params_f32(result, 3, beta);
|
||||
ggml_set_op_params_f32(result, 4, eps);
|
||||
|
||||
|
|
@ -5028,6 +5077,61 @@ struct ggml_tensor * ggml_timestep_embedding(
|
|||
return result;
|
||||
}
|
||||
|
||||
// ggml_tri
|
||||
|
||||
struct ggml_tensor * ggml_tri(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
enum ggml_tri_type type) {
|
||||
GGML_ASSERT(a->type == GGML_TYPE_F32);
|
||||
|
||||
GGML_ASSERT(ggml_is_contiguous(a));
|
||||
GGML_ASSERT(a->ne[0] == a->ne[1]);
|
||||
|
||||
struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
|
||||
|
||||
ggml_set_op_params_i32(result, 0, type);
|
||||
|
||||
result->op = GGML_OP_TRI;
|
||||
result->src[0] = a;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// ggml_fill
|
||||
|
||||
static struct ggml_tensor * ggml_fill_impl(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
float c,
|
||||
bool inplace) {
|
||||
GGML_ASSERT(a->type == GGML_TYPE_F32);
|
||||
GGML_ASSERT(ggml_is_contiguous(a));
|
||||
|
||||
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
||||
|
||||
ggml_set_op_params_f32(result, 0, c);
|
||||
|
||||
result->op = GGML_OP_FILL;
|
||||
result->src[0] = a;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
struct ggml_tensor * ggml_fill(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
float c) {
|
||||
return ggml_fill_impl(ctx, a, c, false);
|
||||
}
|
||||
|
||||
struct ggml_tensor * ggml_fill_inplace(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
float c) {
|
||||
return ggml_fill_impl(ctx, a, c, true);
|
||||
}
|
||||
|
||||
// ggml_argsort
|
||||
|
||||
struct ggml_tensor * ggml_argsort(
|
||||
|
|
@ -5882,6 +5986,41 @@ struct ggml_tensor * ggml_opt_step_sgd(
|
|||
return result;
|
||||
}
|
||||
|
||||
// solve_tri
|
||||
|
||||
struct ggml_tensor * ggml_solve_tri(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
bool left,
|
||||
bool lower,
|
||||
bool uni) {
|
||||
GGML_ASSERT(a->type == GGML_TYPE_F32);
|
||||
GGML_ASSERT(b->type == GGML_TYPE_F32);
|
||||
|
||||
// A must be square and lower diagonal
|
||||
GGML_ASSERT(a->ne[0] == a->ne[1]);
|
||||
// B must have same outer dimension as A
|
||||
GGML_ASSERT(a->ne[1] == b->ne[1]);
|
||||
|
||||
// batch dimensions must be equal
|
||||
GGML_ASSERT(a->ne[2] == b->ne[2]);
|
||||
GGML_ASSERT(a->ne[3] == b->ne[3]);
|
||||
|
||||
GGML_ASSERT(ggml_is_contiguous(a));
|
||||
GGML_ASSERT(ggml_is_contiguous(b));
|
||||
|
||||
GGML_ASSERT(lower && left && !uni); // TODO: support other variants
|
||||
|
||||
struct ggml_tensor * result = ggml_new_tensor_4d(ctx, GGML_TYPE_F32, b->ne[0], b->ne[1], b->ne[2], b->ne[3]);
|
||||
|
||||
result->op = GGML_OP_SOLVE_TRI;
|
||||
result->src[0] = a;
|
||||
result->src[1] = b;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
struct ggml_hash_set ggml_hash_set_new(size_t size) {
|
||||
|
|
@ -6454,6 +6593,16 @@ static void ggml_compute_backward(
|
|||
ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, tensor, grad));
|
||||
}
|
||||
} break;
|
||||
case GGML_UNARY_OP_EXPM1: {
|
||||
if (src0_needs_grads) {
|
||||
ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, grad, ggml_exp(ctx, src0)));
|
||||
}
|
||||
} break;
|
||||
case GGML_UNARY_OP_SOFTPLUS: {
|
||||
if (src0_needs_grads) {
|
||||
ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, grad, ggml_sigmoid(ctx, src0)));
|
||||
}
|
||||
} break;
|
||||
default: {
|
||||
fprintf(stderr, "%s: unsupported unary op for backward pass: %s\n",
|
||||
__func__, ggml_unary_op_name(ggml_get_unary_op(tensor)));
|
||||
|
|
|
|||
|
|
@ -12,9 +12,11 @@ vendor = {
|
|||
|
||||
"https://raw.githubusercontent.com/nothings/stb/refs/heads/master/stb_image.h": "vendor/stb/stb_image.h",
|
||||
|
||||
"https://github.com/mackron/miniaudio/raw/refs/tags/0.11.22/miniaudio.h": "vendor/miniaudio/miniaudio.h",
|
||||
# not using latest tag to avoid this issue: https://github.com/ggml-org/llama.cpp/pull/17179#discussion_r2515877926
|
||||
# "https://github.com/mackron/miniaudio/raw/refs/tags/0.11.23/miniaudio.h": "vendor/miniaudio/miniaudio.h",
|
||||
"https://github.com/mackron/miniaudio/raw/669ed3e844524fcd883231b13095baee9f6de304/miniaudio.h": "vendor/miniaudio/miniaudio.h",
|
||||
|
||||
"https://raw.githubusercontent.com/yhirose/cpp-httplib/refs/tags/v0.20.1/httplib.h": "vendor/cpp-httplib/httplib.h",
|
||||
"https://raw.githubusercontent.com/yhirose/cpp-httplib/refs/tags/v0.27.0/httplib.h": "vendor/cpp-httplib/httplib.h",
|
||||
}
|
||||
|
||||
for url, filename in vendor.items():
|
||||
|
|
|
|||
|
|
@ -1592,9 +1592,10 @@ ggml_tensor * llm_graph_context::build_attn(
|
|||
int il) const {
|
||||
// these nodes are added to the graph together so that they are not reordered
|
||||
// by doing so, the number of splits in the graph is reduced
|
||||
// expand k later to enable rope fusion which directly writes into k-v cache
|
||||
ggml_build_forward_expand(gf, q_cur);
|
||||
ggml_build_forward_expand(gf, k_cur);
|
||||
ggml_build_forward_expand(gf, v_cur);
|
||||
ggml_build_forward_expand(gf, k_cur);
|
||||
|
||||
const auto * mctx_cur = inp->mctx;
|
||||
|
||||
|
|
|
|||
|
|
@ -1013,7 +1013,7 @@ private:
|
|||
}
|
||||
private:
|
||||
uint32_t get_node(size_t index) {
|
||||
if (index > xcda_array_size) {
|
||||
if (index >= xcda_array_size) {
|
||||
throw std::runtime_error("Index out of array bounds in XCDA array!");
|
||||
}
|
||||
return xcda_array[index];
|
||||
|
|
|
|||
|
|
@ -192,6 +192,38 @@ static void init_tensor_kq_mask(ggml_tensor * tensor, float min = -1.0f, float m
|
|||
ggml_backend_tensor_set(tensor, data_f16.data(), 0, data_f16.size()*sizeof(ggml_fp16_t));
|
||||
}
|
||||
|
||||
// generate a lower triangular matrix
|
||||
static void init_tensor_tril(ggml_tensor * tensor, float min = -1.0f, float max = 1.0f) {
|
||||
GGML_ASSERT(tensor->type == GGML_TYPE_F32);
|
||||
GGML_ASSERT(tensor->ne[0] == tensor->ne[1]);
|
||||
|
||||
GGML_TENSOR_LOCALS(int32_t, ne, tensor, ne);
|
||||
GGML_TENSOR_LOCALS(size_t, nb, tensor, nb);
|
||||
|
||||
std::vector<float> data_f32(ne0*ne1*ne2*ne3);
|
||||
|
||||
std::random_device rd;
|
||||
std::mt19937 gen(rd());
|
||||
std::uniform_real_distribution<float> dis(min, max);
|
||||
|
||||
for (int64_t i3 = 0; i3 < ne3; i3++) {
|
||||
for (int64_t i2 = 0; i2 < ne2; i2++) {
|
||||
for (int64_t i1 = 0; i1 < ne1; i1++) {
|
||||
for (int64_t i0 = 0; i0 < ne0; i0++) {
|
||||
int64_t idx = (i0 * nb0 + i1 * nb1 + i2 * nb2 + i3 * nb3) / sizeof(float);
|
||||
if (i0 <= i1) {
|
||||
data_f32[idx] = dis(gen);
|
||||
} else {
|
||||
data_f32[idx] = 0.0f;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ggml_backend_tensor_set(tensor, data_f32.data(), 0, ggml_nbytes(tensor));
|
||||
}
|
||||
|
||||
static std::vector<float> tensor_to_float(const ggml_tensor * t) {
|
||||
std::vector<float> tv;
|
||||
tv.reserve(ggml_nelements(t));
|
||||
|
|
@ -1821,7 +1853,8 @@ struct test_unary : public test_case {
|
|||
|
||||
ggml_tensor * build_graph(ggml_context * ctx) override {
|
||||
const bool grad_supported = op == GGML_UNARY_OP_ABS || op == GGML_UNARY_OP_SGN || op == GGML_UNARY_OP_NEG ||
|
||||
op == GGML_UNARY_OP_STEP || op == GGML_UNARY_OP_RELU || op == GGML_UNARY_OP_SILU;
|
||||
op == GGML_UNARY_OP_STEP || op == GGML_UNARY_OP_RELU || op == GGML_UNARY_OP_SILU ||
|
||||
op == GGML_UNARY_OP_EXPM1 || op == GGML_UNARY_OP_SOFTPLUS;
|
||||
|
||||
ggml_tensor * a;
|
||||
if (v & 1) {
|
||||
|
|
@ -2796,7 +2829,7 @@ struct test_bin_bcast : public test_case {
|
|||
const std::array<int, 4> nr;
|
||||
int nf; // number of fused ops, nf == 1 -> single op (no fusion)
|
||||
|
||||
bool run_whole_graph() override { return true; }
|
||||
bool run_whole_graph() override { return nf > 1; }
|
||||
|
||||
std::string vars() override {
|
||||
return VARS_TO_STR4(type, ne, nr, nf);
|
||||
|
|
@ -5412,6 +5445,7 @@ struct test_pad : public test_case {
|
|||
}
|
||||
};
|
||||
|
||||
// GGML_OP_PAD (with extension)
|
||||
struct test_pad_ext : public test_case {
|
||||
const ggml_type type;
|
||||
const std::array<int64_t, 4> ne_a;
|
||||
|
|
@ -5819,6 +5853,7 @@ struct test_opt_step_adamw : public test_case {
|
|||
}
|
||||
};
|
||||
|
||||
// GGML_OP_OPT_STEP_SGD
|
||||
struct test_opt_step_sgd : public test_case {
|
||||
const ggml_type type;
|
||||
const std::array<int64_t, 4> ne;
|
||||
|
|
@ -5858,6 +5893,170 @@ struct test_opt_step_sgd : public test_case {
|
|||
}
|
||||
};
|
||||
|
||||
// GGML_OP_CUMSUM
|
||||
struct test_cumsum : public test_case {
|
||||
const ggml_type type;
|
||||
const std::array<int64_t, 4> ne;
|
||||
|
||||
std::string vars() override { return VARS_TO_STR2(type, ne); }
|
||||
|
||||
test_cumsum(ggml_type type = GGML_TYPE_F32,
|
||||
std::array<int64_t, 4> ne = { 10, 5, 4, 3 })
|
||||
: type(type), ne(ne) {}
|
||||
|
||||
ggml_tensor * build_graph(ggml_context * ctx) override {
|
||||
ggml_tensor * a = ggml_new_tensor_4d(ctx, type, ne[0], ne[1], ne[2], ne[3]);
|
||||
ggml_set_param(a);
|
||||
ggml_set_name(a, "a");
|
||||
|
||||
ggml_tensor * out = ggml_cumsum(ctx, a);
|
||||
|
||||
ggml_set_name(out, "out");
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
void initialize_tensors(ggml_context * ctx) override {
|
||||
for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
|
||||
init_tensor_uniform(t, -1.0f, 1.0f);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// GGML_OP_XIELU
|
||||
struct test_xielu : public test_case {
|
||||
const ggml_type type;
|
||||
const std::array<int64_t, 4> ne;
|
||||
|
||||
std::string vars() override { return VARS_TO_STR2(type, ne); }
|
||||
|
||||
test_xielu(ggml_type type = GGML_TYPE_F32,
|
||||
std::array<int64_t, 4> ne = { 10, 5, 4, 3 })
|
||||
: type(type), ne(ne) {}
|
||||
|
||||
ggml_tensor * build_graph(ggml_context * ctx) override {
|
||||
ggml_tensor * a = ggml_new_tensor_4d(ctx, type, ne[0], ne[1], ne[2], ne[3]);
|
||||
ggml_set_param(a);
|
||||
ggml_set_name(a, "a");
|
||||
|
||||
float alpha_n = 4.0f;
|
||||
float alpha_p = 20.0f;
|
||||
float beta = 0.5f;
|
||||
float eps = 0.0000001f;
|
||||
|
||||
ggml_tensor * out = ggml_xielu(ctx, a, alpha_n, alpha_p, beta, eps);
|
||||
|
||||
ggml_set_name(out, "out");
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
void initialize_tensors(ggml_context * ctx) override {
|
||||
for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
|
||||
init_tensor_uniform(t, -1.0f, 1.0f);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// GGML_OP_TRI
|
||||
struct test_tri : public test_case {
|
||||
const ggml_type type;
|
||||
const std::array<int64_t, 4> ne;
|
||||
const ggml_tri_type tri_type;
|
||||
|
||||
std::string vars() override { return VARS_TO_STR3(type, ne, tri_type); }
|
||||
|
||||
test_tri(ggml_tri_type tri_type, ggml_type type = GGML_TYPE_F32,
|
||||
std::array<int64_t, 4> ne = { 10, 10, 4, 3 })
|
||||
: type(type), ne(ne), tri_type(tri_type) {
|
||||
GGML_ASSERT(ne[0] == ne[1]);
|
||||
}
|
||||
|
||||
ggml_tensor * build_graph(ggml_context * ctx) override {
|
||||
ggml_tensor * a = ggml_new_tensor_4d(ctx, type, ne[0], ne[1], ne[2], ne[3]);
|
||||
ggml_set_param(a);
|
||||
ggml_set_name(a, "a");
|
||||
|
||||
ggml_tensor * out = ggml_tri(ctx, a, tri_type);
|
||||
|
||||
ggml_set_name(out, "out");
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
void initialize_tensors(ggml_context * ctx) override {
|
||||
for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
|
||||
init_tensor_uniform(t, -1.0f, 1.0f);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// GGML_OP_FILL
|
||||
struct test_fill : public test_case {
|
||||
const ggml_type type;
|
||||
const std::array<int64_t, 4> ne;
|
||||
float c;
|
||||
|
||||
std::string vars() override { return VARS_TO_STR3(type, ne, c); }
|
||||
|
||||
test_fill(float c, ggml_type type = GGML_TYPE_F32,
|
||||
std::array<int64_t, 4> ne = { 10, 10, 4, 3 })
|
||||
: type(type), ne(ne), c(c) {}
|
||||
|
||||
ggml_tensor * build_graph(ggml_context * ctx) override {
|
||||
ggml_tensor * a = ggml_new_tensor_4d(ctx, type, ne[0], ne[1], ne[2], ne[3]);
|
||||
ggml_set_param(a);
|
||||
ggml_set_name(a, "a");
|
||||
|
||||
ggml_tensor * out = ggml_fill(ctx, a, c);
|
||||
|
||||
ggml_set_name(out, "out");
|
||||
|
||||
return out;
|
||||
}
|
||||
};
|
||||
|
||||
// GGML_OP_SOLVE_TRI
|
||||
struct test_solve_tri : public test_case {
|
||||
const ggml_type type;
|
||||
const std::array<int64_t, 4> ne_lhs;
|
||||
const std::array<int64_t, 4> ne_rhs;
|
||||
|
||||
std::string vars() override { return VARS_TO_STR3(type, ne_lhs, ne_rhs); }
|
||||
|
||||
test_solve_tri(ggml_type type = GGML_TYPE_F32,
|
||||
std::array<int64_t, 4> ne_lhs = { 10, 10, 4, 3 },
|
||||
std::array<int64_t, 4> ne_rhs = { 3, 10, 4, 3 }
|
||||
)
|
||||
: type(type), ne_lhs(ne_lhs), ne_rhs(ne_rhs) {}
|
||||
|
||||
ggml_tensor * build_graph(ggml_context * ctx) override {
|
||||
ggml_tensor * a = ggml_new_tensor_4d(ctx, type, ne_lhs[0], ne_lhs[1], ne_lhs[2], ne_lhs[3]);
|
||||
ggml_set_param(a);
|
||||
ggml_set_name(a, "a");
|
||||
|
||||
ggml_tensor * b = ggml_new_tensor_4d(ctx, type, ne_rhs[0], ne_rhs[1], ne_rhs[2], ne_rhs[3]);
|
||||
ggml_set_param(b);
|
||||
ggml_set_name(b, "b");
|
||||
|
||||
ggml_tensor * out = ggml_solve_tri(ctx, a, b, true, true, false);
|
||||
ggml_set_name(out, "out");
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
void initialize_tensors(ggml_context * ctx) override {
|
||||
for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
|
||||
if (strcmp(t->name, "a") == 0) {
|
||||
// note: avoid zeros in the diagonal
|
||||
init_tensor_tril(t, 0.1, 1.0f);
|
||||
} else {
|
||||
init_tensor_uniform(t, -1.0f, 1.0f);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
enum llm_norm_type {
|
||||
LLM_NORM,
|
||||
LLM_NORM_RMS,
|
||||
|
|
@ -6299,6 +6498,9 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
|||
for (ggml_type type : {GGML_TYPE_F16, GGML_TYPE_F32}) {
|
||||
for (int v : {0, 1}) {
|
||||
for (int op = 0; op < GGML_UNARY_OP_COUNT; op++) {
|
||||
if (op == GGML_UNARY_OP_XIELU) {
|
||||
continue; // need extra params, separate test
|
||||
}
|
||||
test_cases.emplace_back(new test_unary((ggml_unary_op) op, type, { 128, 2, 2, 2 }, v));
|
||||
test_cases.emplace_back(new test_unary((ggml_unary_op) op, type, { 5, 7, 11, 13 }, v));
|
||||
}
|
||||
|
|
@ -7356,6 +7558,26 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
|||
test_cases.emplace_back(new test_arange());
|
||||
test_cases.emplace_back(new test_timestep_embedding());
|
||||
test_cases.emplace_back(new test_leaky_relu());
|
||||
test_cases.emplace_back(new test_cumsum());
|
||||
|
||||
test_cases.emplace_back(new test_xielu());
|
||||
|
||||
test_cases.emplace_back(new test_tri(GGML_TRI_TYPE_LOWER));
|
||||
test_cases.emplace_back(new test_tri(GGML_TRI_TYPE_LOWER_DIAG));
|
||||
test_cases.emplace_back(new test_tri(GGML_TRI_TYPE_UPPER));
|
||||
test_cases.emplace_back(new test_tri(GGML_TRI_TYPE_UPPER_DIAG));
|
||||
|
||||
test_cases.emplace_back(new test_fill(0.0f));
|
||||
test_cases.emplace_back(new test_fill(2.0f, GGML_TYPE_F32, { 303, 207, 11, 3 }));
|
||||
test_cases.emplace_back(new test_fill(-152.0f, GGML_TYPE_F32, { 800, 600, 4, 4 }));
|
||||
|
||||
test_cases.emplace_back(new test_solve_tri());
|
||||
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 11, 11, 1, 1 }, { 5, 11, 1, 1 }));
|
||||
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 17, 17, 2, 4 }, { 9, 17, 2, 4 }));
|
||||
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 30, 30, 7, 1 }, { 8, 30, 7, 1 }));
|
||||
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 42, 42, 5, 2 }, { 10, 42, 5, 2 }));
|
||||
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 64, 64, 2, 2 }, { 10, 64, 2, 2 }));
|
||||
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 100, 100, 4, 4 }, { 41, 100, 4, 4 }));
|
||||
|
||||
for (bool v : {false, true}) {
|
||||
test_cases.emplace_back(new test_pad_ext(GGML_TYPE_F32, {512, 512, 1, 1}, 0, 1, 0, 1, 0, 0, 0, 0, v));
|
||||
|
|
@ -7648,6 +7870,8 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_perf() {
|
|||
test_cases.emplace_back(new test_sum(GGML_TYPE_F32, it));
|
||||
}
|
||||
|
||||
test_cases.emplace_back(new test_argsort(GGML_TYPE_F32, {65000, 16, 1, 1}));
|
||||
|
||||
return test_cases;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -7,6 +7,10 @@ if (MINGW)
|
|||
add_compile_definitions(_WIN32_WINNT=${GGML_WIN_VER})
|
||||
endif()
|
||||
|
||||
if (NOT LLAMA_HTTPLIB)
|
||||
message(FATAL_ERROR "LLAMA_HTTPLIB is OFF, cannot build llama-server. Hint: to skip building server, set -DLLAMA_BUILD_SERVER=OFF")
|
||||
endif()
|
||||
|
||||
set(TARGET_SRCS
|
||||
server.cpp
|
||||
utils.hpp
|
||||
|
|
|
|||
Binary file not shown.
|
|
@ -684,7 +684,7 @@ struct server_task_result {
|
|||
}
|
||||
virtual bool is_stop() {
|
||||
// only used by server_task_result_cmpl_*
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
virtual int get_index() {
|
||||
return -1;
|
||||
|
|
@ -3238,105 +3238,6 @@ struct server_context {
|
|||
queue_results.send(std::move(res));
|
||||
}
|
||||
|
||||
//
|
||||
// Functions to create new task(s) and receive result(s)
|
||||
//
|
||||
|
||||
void cancel_tasks(const std::unordered_set<int> & id_tasks) {
|
||||
std::vector<server_task> cancel_tasks;
|
||||
cancel_tasks.reserve(id_tasks.size());
|
||||
for (const auto & id_task : id_tasks) {
|
||||
SRV_WRN("cancel task, id_task = %d\n", id_task);
|
||||
|
||||
server_task task(SERVER_TASK_TYPE_CANCEL);
|
||||
task.id_target = id_task;
|
||||
queue_results.remove_waiting_task_id(id_task);
|
||||
cancel_tasks.push_back(std::move(task));
|
||||
}
|
||||
// push to beginning of the queue, so it has highest priority
|
||||
queue_tasks.post(std::move(cancel_tasks), true);
|
||||
}
|
||||
|
||||
// receive the results from task(s)
|
||||
void receive_multi_results(
|
||||
const std::unordered_set<int> & id_tasks,
|
||||
const std::function<void(std::vector<server_task_result_ptr>&)> & result_handler,
|
||||
const std::function<void(json)> & error_handler,
|
||||
const std::function<bool()> & is_connection_closed) {
|
||||
std::vector<server_task_result_ptr> results(id_tasks.size());
|
||||
for (int i = 0; i < (int)id_tasks.size(); i++) {
|
||||
server_task_result_ptr result = queue_results.recv_with_timeout(id_tasks, HTTP_POLLING_SECONDS);
|
||||
|
||||
if (is_connection_closed()) {
|
||||
cancel_tasks(id_tasks);
|
||||
return;
|
||||
}
|
||||
|
||||
if (result == nullptr) {
|
||||
i--; // retry
|
||||
continue;
|
||||
}
|
||||
|
||||
if (result->is_error()) {
|
||||
error_handler(result->to_json());
|
||||
cancel_tasks(id_tasks);
|
||||
return;
|
||||
}
|
||||
|
||||
GGML_ASSERT(
|
||||
dynamic_cast<server_task_result_cmpl_final*>(result.get()) != nullptr
|
||||
|| dynamic_cast<server_task_result_embd*>(result.get()) != nullptr
|
||||
|| dynamic_cast<server_task_result_rerank*>(result.get()) != nullptr
|
||||
);
|
||||
const size_t idx = result->get_index();
|
||||
GGML_ASSERT(idx < results.size() && "index out of range");
|
||||
results[idx] = std::move(result);
|
||||
}
|
||||
result_handler(results);
|
||||
}
|
||||
|
||||
// receive the results from task(s), in stream mode
|
||||
void receive_cmpl_results_stream(
|
||||
const std::unordered_set<int> & id_tasks,
|
||||
const std::function<bool(server_task_result_ptr&)> & result_handler,
|
||||
const std::function<void(json)> & error_handler,
|
||||
const std::function<bool()> & is_connection_closed) {
|
||||
size_t n_finished = 0;
|
||||
while (true) {
|
||||
server_task_result_ptr result = queue_results.recv_with_timeout(id_tasks, HTTP_POLLING_SECONDS);
|
||||
|
||||
if (is_connection_closed()) {
|
||||
cancel_tasks(id_tasks);
|
||||
return;
|
||||
}
|
||||
|
||||
if (result == nullptr) {
|
||||
continue; // retry
|
||||
}
|
||||
|
||||
if (result->is_error()) {
|
||||
error_handler(result->to_json());
|
||||
cancel_tasks(id_tasks);
|
||||
return;
|
||||
}
|
||||
|
||||
GGML_ASSERT(
|
||||
dynamic_cast<server_task_result_cmpl_partial*>(result.get()) != nullptr
|
||||
|| dynamic_cast<server_task_result_cmpl_final*>(result.get()) != nullptr
|
||||
);
|
||||
if (!result_handler(result)) {
|
||||
cancel_tasks(id_tasks);
|
||||
break;
|
||||
}
|
||||
|
||||
if (result->is_stop()) {
|
||||
if (++n_finished == id_tasks.size()) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// Functions to process the task
|
||||
//
|
||||
|
|
@ -4418,6 +4319,104 @@ struct server_context {
|
|||
}
|
||||
};
|
||||
|
||||
// generator-like API for server responses, support pooling connection state and aggregating results
|
||||
struct server_response_reader {
|
||||
std::unordered_set<int> id_tasks;
|
||||
server_context & ctx_server;
|
||||
size_t received_count = 0;
|
||||
bool cancelled = false;
|
||||
|
||||
server_response_reader(server_context & ctx_server) : ctx_server(ctx_server) {}
|
||||
~server_response_reader() {
|
||||
stop();
|
||||
}
|
||||
|
||||
void post_tasks(std::vector<server_task> && tasks) {
|
||||
id_tasks = server_task::get_list_id(tasks);
|
||||
ctx_server.queue_results.add_waiting_tasks(tasks);
|
||||
ctx_server.queue_tasks.post(std::move(tasks));
|
||||
}
|
||||
|
||||
bool has_next() {
|
||||
return !cancelled && received_count < id_tasks.size();
|
||||
}
|
||||
|
||||
// return nullptr if should_stop() is true before receiving a result
|
||||
// note: if one error is received, it will stop further processing and return error result
|
||||
server_task_result_ptr next(const std::function<bool()> & should_stop) {
|
||||
while (true) {
|
||||
server_task_result_ptr result = ctx_server.queue_results.recv_with_timeout(id_tasks, HTTP_POLLING_SECONDS);
|
||||
if (result == nullptr) {
|
||||
// timeout, check stop condition
|
||||
if (should_stop()) {
|
||||
SRV_DBG("%s", "stopping wait for next result due to should_stop condition\n");
|
||||
return nullptr;
|
||||
}
|
||||
} else {
|
||||
if (result->is_error()) {
|
||||
stop(); // cancel remaining tasks
|
||||
SRV_DBG("%s", "received error result, stopping further processing\n");
|
||||
return result;
|
||||
}
|
||||
if (result->is_stop()) {
|
||||
received_count++;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
// should not reach here
|
||||
}
|
||||
|
||||
struct batch_response {
|
||||
bool is_terminated = false; // if true, indicates that processing was stopped before all results were received
|
||||
std::vector<server_task_result_ptr> results;
|
||||
server_task_result_ptr error; // nullptr if no error
|
||||
};
|
||||
|
||||
batch_response wait_for_all(const std::function<bool()> & should_stop) {
|
||||
batch_response batch_res;
|
||||
batch_res.results.resize(id_tasks.size());
|
||||
while (has_next()) {
|
||||
auto res = next(should_stop);
|
||||
if (res == nullptr) {
|
||||
batch_res.is_terminated = true;
|
||||
return batch_res;
|
||||
}
|
||||
if (res->is_error()) {
|
||||
batch_res.error = std::move(res);
|
||||
return batch_res;
|
||||
}
|
||||
const size_t idx = res->get_index();
|
||||
GGML_ASSERT(idx < batch_res.results.size() && "index out of range");
|
||||
GGML_ASSERT(batch_res.results[idx] == nullptr && "duplicate result received");
|
||||
batch_res.results[idx] = std::move(res);
|
||||
}
|
||||
return batch_res;
|
||||
}
|
||||
|
||||
void stop() {
|
||||
ctx_server.queue_results.remove_waiting_task_ids(id_tasks);
|
||||
if (has_next() && !cancelled) {
|
||||
// if tasks is not finished yet, cancel them
|
||||
cancelled = true;
|
||||
std::vector<server_task> cancel_tasks;
|
||||
cancel_tasks.reserve(id_tasks.size());
|
||||
for (const auto & id_task : id_tasks) {
|
||||
SRV_WRN("cancel task, id_task = %d\n", id_task);
|
||||
server_task task(SERVER_TASK_TYPE_CANCEL);
|
||||
task.id_target = id_task;
|
||||
ctx_server.queue_results.remove_waiting_task_id(id_task);
|
||||
cancel_tasks.push_back(std::move(task));
|
||||
}
|
||||
// push to beginning of the queue, so it has highest priority
|
||||
ctx_server.queue_tasks.post(std::move(cancel_tasks), true);
|
||||
} else {
|
||||
SRV_DBG("%s", "all tasks already finished, no need to cancel\n");
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
static void log_server_request(const httplib::Request & req, const httplib::Response & res) {
|
||||
// skip GH copilot requests when using default port
|
||||
if (req.path == "/v1/health") {
|
||||
|
|
@ -4432,6 +4431,17 @@ static void log_server_request(const httplib::Request & req, const httplib::Resp
|
|||
SRV_DBG("response: %s\n", res.body.c_str());
|
||||
}
|
||||
|
||||
static void res_err(httplib::Response & res, const json & error_data) {
|
||||
json final_response {{"error", error_data}};
|
||||
res.set_content(safe_json_to_str(final_response), MIMETYPE_JSON);
|
||||
res.status = json_value(error_data, "code", 500);
|
||||
}
|
||||
|
||||
static void res_ok(httplib::Response & res, const json & data) {
|
||||
res.set_content(safe_json_to_str(data), MIMETYPE_JSON);
|
||||
res.status = 200;
|
||||
}
|
||||
|
||||
std::function<void(int)> shutdown_handler;
|
||||
std::atomic_flag is_terminating = ATOMIC_FLAG_INIT;
|
||||
|
||||
|
|
@ -4501,19 +4511,7 @@ int main(int argc, char ** argv) {
|
|||
|
||||
svr->set_default_headers({{"Server", "llama.cpp"}});
|
||||
svr->set_logger(log_server_request);
|
||||
|
||||
auto res_error = [](httplib::Response & res, const json & error_data) {
|
||||
json final_response {{"error", error_data}};
|
||||
res.set_content(safe_json_to_str(final_response), MIMETYPE_JSON);
|
||||
res.status = json_value(error_data, "code", 500);
|
||||
};
|
||||
|
||||
auto res_ok = [](httplib::Response & res, const json & data) {
|
||||
res.set_content(safe_json_to_str(data), MIMETYPE_JSON);
|
||||
res.status = 200;
|
||||
};
|
||||
|
||||
svr->set_exception_handler([&res_error](const httplib::Request &, httplib::Response & res, const std::exception_ptr & ep) {
|
||||
svr->set_exception_handler([](const httplib::Request &, httplib::Response & res, const std::exception_ptr & ep) {
|
||||
std::string message;
|
||||
try {
|
||||
std::rethrow_exception(ep);
|
||||
|
|
@ -4526,17 +4524,17 @@ int main(int argc, char ** argv) {
|
|||
try {
|
||||
json formatted_error = format_error_response(message, ERROR_TYPE_SERVER);
|
||||
LOG_WRN("got exception: %s\n", formatted_error.dump().c_str());
|
||||
res_error(res, formatted_error);
|
||||
res_err(res, formatted_error);
|
||||
} catch (const std::exception & e) {
|
||||
LOG_ERR("got another exception: %s | while hanlding exception: %s\n", e.what(), message.c_str());
|
||||
}
|
||||
});
|
||||
|
||||
svr->set_error_handler([&res_error](const httplib::Request &, httplib::Response & res) {
|
||||
svr->set_error_handler([](const httplib::Request &, httplib::Response & res) {
|
||||
if (res.status == 404) {
|
||||
res_error(res, format_error_response("File Not Found", ERROR_TYPE_NOT_FOUND));
|
||||
res_err(res, format_error_response("File Not Found", ERROR_TYPE_NOT_FOUND));
|
||||
}
|
||||
// for other error codes, we skip processing here because it's already done by res_error()
|
||||
// for other error codes, we skip processing here because it's already done by res_err()
|
||||
});
|
||||
|
||||
// set timeouts and change hostname and port
|
||||
|
|
@ -4562,7 +4560,7 @@ int main(int argc, char ** argv) {
|
|||
// Middlewares
|
||||
//
|
||||
|
||||
auto middleware_validate_api_key = [¶ms, &res_error](const httplib::Request & req, httplib::Response & res) {
|
||||
auto middleware_validate_api_key = [¶ms](const httplib::Request & req, httplib::Response & res) {
|
||||
static const std::unordered_set<std::string> public_endpoints = {
|
||||
"/health",
|
||||
"/v1/health",
|
||||
|
|
@ -4593,14 +4591,14 @@ int main(int argc, char ** argv) {
|
|||
}
|
||||
|
||||
// API key is invalid or not provided
|
||||
res_error(res, format_error_response("Invalid API Key", ERROR_TYPE_AUTHENTICATION));
|
||||
res_err(res, format_error_response("Invalid API Key", ERROR_TYPE_AUTHENTICATION));
|
||||
|
||||
LOG_WRN("Unauthorized: Invalid API Key\n");
|
||||
|
||||
return false;
|
||||
};
|
||||
|
||||
auto middleware_server_state = [&res_error, &state](const httplib::Request & req, httplib::Response & res) {
|
||||
auto middleware_server_state = [&state](const httplib::Request & req, httplib::Response & res) {
|
||||
server_state current_state = state.load();
|
||||
if (current_state == SERVER_STATE_LOADING_MODEL) {
|
||||
auto tmp = string_split<std::string>(req.path, '.');
|
||||
|
|
@ -4611,7 +4609,7 @@ int main(int argc, char ** argv) {
|
|||
// allow the models endpoint to be accessed during loading
|
||||
return true;
|
||||
} else {
|
||||
res_error(res, format_error_response("Loading model", ERROR_TYPE_UNAVAILABLE));
|
||||
res_err(res, format_error_response("Loading model", ERROR_TYPE_UNAVAILABLE));
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
|
@ -4650,7 +4648,7 @@ int main(int argc, char ** argv) {
|
|||
|
||||
const auto handle_slots = [&](const httplib::Request & req, httplib::Response & res) {
|
||||
if (!params.endpoint_slots) {
|
||||
res_error(res, format_error_response("This server does not support slots endpoint. Start it with `--slots`", ERROR_TYPE_NOT_SUPPORTED));
|
||||
res_err(res, format_error_response("This server does not support slots endpoint. Start it with `--slots`", ERROR_TYPE_NOT_SUPPORTED));
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -4668,7 +4666,7 @@ int main(int argc, char ** argv) {
|
|||
ctx_server.queue_results.remove_waiting_task_id(task_id);
|
||||
|
||||
if (result->is_error()) {
|
||||
res_error(res, result->to_json());
|
||||
res_err(res, result->to_json());
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -4679,7 +4677,7 @@ int main(int argc, char ** argv) {
|
|||
// optionally return "fail_on_no_slot" error
|
||||
if (req.has_param("fail_on_no_slot")) {
|
||||
if (res_task->n_idle_slots == 0) {
|
||||
res_error(res, format_error_response("no slot available", ERROR_TYPE_UNAVAILABLE));
|
||||
res_err(res, format_error_response("no slot available", ERROR_TYPE_UNAVAILABLE));
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
|
@ -4689,7 +4687,7 @@ int main(int argc, char ** argv) {
|
|||
|
||||
const auto handle_metrics = [&](const httplib::Request &, httplib::Response & res) {
|
||||
if (!params.endpoint_metrics) {
|
||||
res_error(res, format_error_response("This server does not support metrics endpoint. Start it with `--metrics`", ERROR_TYPE_NOT_SUPPORTED));
|
||||
res_err(res, format_error_response("This server does not support metrics endpoint. Start it with `--metrics`", ERROR_TYPE_NOT_SUPPORTED));
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -4707,7 +4705,7 @@ int main(int argc, char ** argv) {
|
|||
ctx_server.queue_results.remove_waiting_task_id(task_id);
|
||||
|
||||
if (result->is_error()) {
|
||||
res_error(res, result->to_json());
|
||||
res_err(res, result->to_json());
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -4788,11 +4786,11 @@ int main(int argc, char ** argv) {
|
|||
res.status = 200; // HTTP OK
|
||||
};
|
||||
|
||||
const auto handle_slots_save = [&ctx_server, &res_error, &res_ok, ¶ms](const httplib::Request & req, httplib::Response & res, int id_slot) {
|
||||
const auto handle_slots_save = [&ctx_server, ¶ms](const httplib::Request & req, httplib::Response & res, int id_slot) {
|
||||
json request_data = json::parse(req.body);
|
||||
std::string filename = request_data.at("filename");
|
||||
if (!fs_validate_filename(filename)) {
|
||||
res_error(res, format_error_response("Invalid filename", ERROR_TYPE_INVALID_REQUEST));
|
||||
res_err(res, format_error_response("Invalid filename", ERROR_TYPE_INVALID_REQUEST));
|
||||
return;
|
||||
}
|
||||
std::string filepath = params.slot_save_path + filename;
|
||||
|
|
@ -4813,18 +4811,18 @@ int main(int argc, char ** argv) {
|
|||
ctx_server.queue_results.remove_waiting_task_id(task_id);
|
||||
|
||||
if (result->is_error()) {
|
||||
res_error(res, result->to_json());
|
||||
res_err(res, result->to_json());
|
||||
return;
|
||||
}
|
||||
|
||||
res_ok(res, result->to_json());
|
||||
};
|
||||
|
||||
const auto handle_slots_restore = [&ctx_server, &res_error, &res_ok, ¶ms](const httplib::Request & req, httplib::Response & res, int id_slot) {
|
||||
const auto handle_slots_restore = [&ctx_server, ¶ms](const httplib::Request & req, httplib::Response & res, int id_slot) {
|
||||
json request_data = json::parse(req.body);
|
||||
std::string filename = request_data.at("filename");
|
||||
if (!fs_validate_filename(filename)) {
|
||||
res_error(res, format_error_response("Invalid filename", ERROR_TYPE_INVALID_REQUEST));
|
||||
res_err(res, format_error_response("Invalid filename", ERROR_TYPE_INVALID_REQUEST));
|
||||
return;
|
||||
}
|
||||
std::string filepath = params.slot_save_path + filename;
|
||||
|
|
@ -4845,7 +4843,7 @@ int main(int argc, char ** argv) {
|
|||
ctx_server.queue_results.remove_waiting_task_id(task_id);
|
||||
|
||||
if (result->is_error()) {
|
||||
res_error(res, result->to_json());
|
||||
res_err(res, result->to_json());
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -4853,7 +4851,7 @@ int main(int argc, char ** argv) {
|
|||
res_ok(res, result->to_json());
|
||||
};
|
||||
|
||||
const auto handle_slots_erase = [&ctx_server, &res_error, &res_ok](const httplib::Request & /* req */, httplib::Response & res, int id_slot) {
|
||||
const auto handle_slots_erase = [&ctx_server](const httplib::Request & /* req */, httplib::Response & res, int id_slot) {
|
||||
int task_id = ctx_server.queue_tasks.get_new_id();
|
||||
{
|
||||
server_task task(SERVER_TASK_TYPE_SLOT_ERASE);
|
||||
|
|
@ -4868,7 +4866,7 @@ int main(int argc, char ** argv) {
|
|||
ctx_server.queue_results.remove_waiting_task_id(task_id);
|
||||
|
||||
if (result->is_error()) {
|
||||
res_error(res, result->to_json());
|
||||
res_err(res, result->to_json());
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -4876,9 +4874,9 @@ int main(int argc, char ** argv) {
|
|||
res_ok(res, result->to_json());
|
||||
};
|
||||
|
||||
const auto handle_slots_action = [¶ms, &res_error, &handle_slots_save, &handle_slots_restore, &handle_slots_erase](const httplib::Request & req, httplib::Response & res) {
|
||||
const auto handle_slots_action = [¶ms, &handle_slots_save, &handle_slots_restore, &handle_slots_erase](const httplib::Request & req, httplib::Response & res) {
|
||||
if (params.slot_save_path.empty()) {
|
||||
res_error(res, format_error_response("This server does not support slots action. Start it with `--slot-save-path`", ERROR_TYPE_NOT_SUPPORTED));
|
||||
res_err(res, format_error_response("This server does not support slots action. Start it with `--slot-save-path`", ERROR_TYPE_NOT_SUPPORTED));
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -4888,7 +4886,7 @@ int main(int argc, char ** argv) {
|
|||
try {
|
||||
id_slot = std::stoi(id_slot_str);
|
||||
} catch (const std::exception &) {
|
||||
res_error(res, format_error_response("Invalid slot ID", ERROR_TYPE_INVALID_REQUEST));
|
||||
res_err(res, format_error_response("Invalid slot ID", ERROR_TYPE_INVALID_REQUEST));
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -4901,11 +4899,11 @@ int main(int argc, char ** argv) {
|
|||
} else if (action == "erase") {
|
||||
handle_slots_erase(req, res, id_slot);
|
||||
} else {
|
||||
res_error(res, format_error_response("Invalid action", ERROR_TYPE_INVALID_REQUEST));
|
||||
res_err(res, format_error_response("Invalid action", ERROR_TYPE_INVALID_REQUEST));
|
||||
}
|
||||
};
|
||||
|
||||
const auto handle_props = [¶ms, &ctx_server, &res_ok](const httplib::Request &, httplib::Response & res) {
|
||||
const auto handle_props = [¶ms, &ctx_server](const httplib::Request &, httplib::Response & res) {
|
||||
json default_generation_settings_for_props;
|
||||
|
||||
{
|
||||
|
|
@ -4947,9 +4945,9 @@ int main(int argc, char ** argv) {
|
|||
res_ok(res, data);
|
||||
};
|
||||
|
||||
const auto handle_props_change = [&ctx_server, &res_error, &res_ok](const httplib::Request & req, httplib::Response & res) {
|
||||
const auto handle_props_change = [&ctx_server](const httplib::Request & req, httplib::Response & res) {
|
||||
if (!ctx_server.params_base.endpoint_props) {
|
||||
res_error(res, format_error_response("This server does not support changing global properties. Start it with `--props`", ERROR_TYPE_NOT_SUPPORTED));
|
||||
res_err(res, format_error_response("This server does not support changing global properties. Start it with `--props`", ERROR_TYPE_NOT_SUPPORTED));
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -4960,7 +4958,7 @@ int main(int argc, char ** argv) {
|
|||
res_ok(res, {{ "success", true }});
|
||||
};
|
||||
|
||||
const auto handle_api_show = [&ctx_server, &res_ok](const httplib::Request &, httplib::Response & res) {
|
||||
const auto handle_api_show = [&ctx_server](const httplib::Request &, httplib::Response & res) {
|
||||
bool has_mtmd = ctx_server.mctx != nullptr;
|
||||
json data = {
|
||||
{
|
||||
|
|
@ -4991,7 +4989,7 @@ int main(int argc, char ** argv) {
|
|||
|
||||
// handle completion-like requests (completion, chat, infill)
|
||||
// we can optionally provide a custom format for partial results and final results
|
||||
const auto handle_completions_impl = [&ctx_server, &res_error, &res_ok](
|
||||
const auto handle_completions_impl = [&ctx_server](
|
||||
server_task_type type,
|
||||
json & data,
|
||||
const std::vector<raw_buffer> & files,
|
||||
|
|
@ -5001,7 +4999,10 @@ int main(int argc, char ** argv) {
|
|||
GGML_ASSERT(type == SERVER_TASK_TYPE_COMPLETION || type == SERVER_TASK_TYPE_INFILL);
|
||||
|
||||
auto completion_id = gen_chatcmplid();
|
||||
std::unordered_set<int> task_ids;
|
||||
// need to store the reader as a pointer, so that it won't be destroyed when the handle returns
|
||||
// use shared_ptr as it's shared between the chunked_content_provider() and on_complete()
|
||||
const auto rd = std::make_shared<server_response_reader>(ctx_server);
|
||||
|
||||
try {
|
||||
std::vector<server_task> tasks;
|
||||
|
||||
|
|
@ -5019,17 +5020,8 @@ int main(int argc, char ** argv) {
|
|||
// Everything else, including multimodal completions.
|
||||
inputs = tokenize_input_prompts(ctx_server.vocab, ctx_server.mctx, prompt, true, true);
|
||||
}
|
||||
const size_t n_ctx_slot = ctx_server.slots.front().n_ctx;
|
||||
tasks.reserve(inputs.size());
|
||||
for (size_t i = 0; i < inputs.size(); i++) {
|
||||
auto n_prompt_tokens = inputs[i].size();
|
||||
if (n_prompt_tokens >= n_ctx_slot) {
|
||||
json error_data = format_error_response("the request exceeds the available context size, try increasing it", ERROR_TYPE_EXCEED_CONTEXT_SIZE);
|
||||
error_data["n_prompt_tokens"] = n_prompt_tokens;
|
||||
error_data["n_ctx"] = n_ctx_slot;
|
||||
res_error(res, error_data);
|
||||
return;
|
||||
}
|
||||
server_task task = server_task(type);
|
||||
|
||||
task.id = ctx_server.queue_tasks.get_new_id();
|
||||
|
|
@ -5050,65 +5042,104 @@ int main(int argc, char ** argv) {
|
|||
tasks.push_back(std::move(task));
|
||||
}
|
||||
|
||||
task_ids = server_task::get_list_id(tasks);
|
||||
ctx_server.queue_results.add_waiting_tasks(tasks);
|
||||
ctx_server.queue_tasks.post(std::move(tasks));
|
||||
rd->post_tasks(std::move(tasks));
|
||||
} catch (const std::exception & e) {
|
||||
res_error(res, format_error_response(e.what(), ERROR_TYPE_INVALID_REQUEST));
|
||||
res_err(res, format_error_response(e.what(), ERROR_TYPE_INVALID_REQUEST));
|
||||
return;
|
||||
}
|
||||
|
||||
bool stream = json_value(data, "stream", false);
|
||||
|
||||
if (!stream) {
|
||||
ctx_server.receive_multi_results(task_ids, [&](std::vector<server_task_result_ptr> & results) {
|
||||
if (results.size() == 1) {
|
||||
// single result
|
||||
res_ok(res, results[0]->to_json());
|
||||
} else {
|
||||
// multiple results (multitask)
|
||||
json arr = json::array();
|
||||
for (auto & res : results) {
|
||||
arr.push_back(res->to_json());
|
||||
}
|
||||
res_ok(res, arr);
|
||||
// non-stream, wait for the results
|
||||
auto all_results = rd->wait_for_all(is_connection_closed);
|
||||
if (all_results.is_terminated) {
|
||||
return; // connection is closed
|
||||
} else if (all_results.error) {
|
||||
res_err(res, all_results.error->to_json());
|
||||
return;
|
||||
} else {
|
||||
json arr = json::array();
|
||||
for (auto & res : all_results.results) {
|
||||
GGML_ASSERT(dynamic_cast<server_task_result_cmpl_final*>(res.get()) != nullptr);
|
||||
arr.push_back(res->to_json());
|
||||
}
|
||||
}, [&](const json & error_data) {
|
||||
res_error(res, error_data);
|
||||
}, is_connection_closed);
|
||||
// if single request, return single object instead of array
|
||||
res_ok(res, arr.size() == 1 ? arr[0] : arr);
|
||||
}
|
||||
|
||||
ctx_server.queue_results.remove_waiting_task_ids(task_ids);
|
||||
} else {
|
||||
const auto chunked_content_provider = [task_ids, &ctx_server, oaicompat](size_t, httplib::DataSink & sink) {
|
||||
ctx_server.receive_cmpl_results_stream(task_ids, [&](server_task_result_ptr & result) -> bool {
|
||||
json res_json = result->to_json();
|
||||
if (res_json.is_array()) {
|
||||
for (const auto & res : res_json) {
|
||||
if (!server_sent_event(sink, res)) {
|
||||
// sending failed (HTTP connection closed), cancel the generation
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
} else {
|
||||
return server_sent_event(sink, res_json);
|
||||
// in streaming mode, the first error must be treated as non-stream response
|
||||
// this is to match the OAI API behavior
|
||||
// ref: https://github.com/ggml-org/llama.cpp/pull/16486#discussion_r2419657309
|
||||
server_task_result_ptr first_result = rd->next(is_connection_closed);
|
||||
if (first_result == nullptr) {
|
||||
return; // connection is closed
|
||||
} else if (first_result->is_error()) {
|
||||
res_err(res, first_result->to_json());
|
||||
return;
|
||||
} else {
|
||||
GGML_ASSERT(
|
||||
dynamic_cast<server_task_result_cmpl_partial*>(first_result.get()) != nullptr
|
||||
|| dynamic_cast<server_task_result_cmpl_final*>(first_result.get()) != nullptr
|
||||
);
|
||||
}
|
||||
|
||||
// next responses are streamed
|
||||
json first_result_json = first_result->to_json();
|
||||
const auto chunked_content_provider = [first_result_json, rd, oaicompat](size_t, httplib::DataSink & sink) mutable -> bool {
|
||||
// flush the first result as it's not an error
|
||||
if (!first_result_json.empty()) {
|
||||
if (!server_sent_event(sink, first_result_json)) {
|
||||
sink.done();
|
||||
return false; // sending failed, go to on_complete()
|
||||
}
|
||||
}, [&](const json & error_data) {
|
||||
server_sent_event(sink, json{{"error", error_data}});
|
||||
}, [&sink]() {
|
||||
// note: do not use req.is_connection_closed here because req is already destroyed
|
||||
return !sink.is_writable();
|
||||
});
|
||||
if (oaicompat != OAICOMPAT_TYPE_NONE) {
|
||||
static const std::string ev_done = "data: [DONE]\n\n";
|
||||
sink.write(ev_done.data(), ev_done.size());
|
||||
first_result_json.clear(); // mark as sent
|
||||
}
|
||||
sink.done();
|
||||
return false;
|
||||
|
||||
// receive subsequent results
|
||||
auto result = rd->next([&sink]{ return !sink.is_writable(); });
|
||||
if (result == nullptr) {
|
||||
sink.done();
|
||||
return false; // connection is closed, go to on_complete()
|
||||
}
|
||||
|
||||
// send the results
|
||||
json res_json = result->to_json();
|
||||
bool ok = false;
|
||||
if (result->is_error()) {
|
||||
ok = server_sent_event(sink, json {{ "error", result->to_json() }});
|
||||
sink.done();
|
||||
return false; // go to on_complete()
|
||||
} else {
|
||||
GGML_ASSERT(
|
||||
dynamic_cast<server_task_result_cmpl_partial*>(result.get()) != nullptr
|
||||
|| dynamic_cast<server_task_result_cmpl_final*>(result.get()) != nullptr
|
||||
);
|
||||
ok = server_sent_event(sink, res_json);
|
||||
}
|
||||
|
||||
if (!ok) {
|
||||
sink.done();
|
||||
return false; // sending failed, go to on_complete()
|
||||
}
|
||||
|
||||
// check if there is more data
|
||||
if (!rd->has_next()) {
|
||||
if (oaicompat != OAICOMPAT_TYPE_NONE) {
|
||||
static const std::string ev_done = "data: [DONE]\n\n";
|
||||
sink.write(ev_done.data(), ev_done.size());
|
||||
}
|
||||
sink.done();
|
||||
return false; // no more data, go to on_complete()
|
||||
}
|
||||
|
||||
// has next data, continue
|
||||
return true;
|
||||
};
|
||||
|
||||
auto on_complete = [task_ids, &ctx_server] (bool) {
|
||||
ctx_server.queue_results.remove_waiting_task_ids(task_ids);
|
||||
auto on_complete = [rd](bool) {
|
||||
rd->stop();
|
||||
};
|
||||
|
||||
res.set_chunked_content_provider("text/event-stream", chunked_content_provider, on_complete);
|
||||
|
|
@ -5139,7 +5170,7 @@ int main(int argc, char ** argv) {
|
|||
OAICOMPAT_TYPE_COMPLETION);
|
||||
};
|
||||
|
||||
const auto handle_infill = [&ctx_server, &res_error, &handle_completions_impl](const httplib::Request & req, httplib::Response & res) {
|
||||
const auto handle_infill = [&ctx_server, &handle_completions_impl](const httplib::Request & req, httplib::Response & res) {
|
||||
// check model compatibility
|
||||
std::string err;
|
||||
if (llama_vocab_fim_pre(ctx_server.vocab) == LLAMA_TOKEN_NULL) {
|
||||
|
|
@ -5152,7 +5183,7 @@ int main(int argc, char ** argv) {
|
|||
err += "middle token is missing. ";
|
||||
}
|
||||
if (!err.empty()) {
|
||||
res_error(res, format_error_response(string_format("Infill is not supported by this model: %s", err.c_str()), ERROR_TYPE_NOT_SUPPORTED));
|
||||
res_err(res, format_error_response(string_format("Infill is not supported by this model: %s", err.c_str()), ERROR_TYPE_NOT_SUPPORTED));
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -5161,20 +5192,20 @@ int main(int argc, char ** argv) {
|
|||
// validate input
|
||||
if (data.contains("prompt") && !data.at("prompt").is_string()) {
|
||||
// prompt is optional
|
||||
res_error(res, format_error_response("\"prompt\" must be a string", ERROR_TYPE_INVALID_REQUEST));
|
||||
res_err(res, format_error_response("\"prompt\" must be a string", ERROR_TYPE_INVALID_REQUEST));
|
||||
}
|
||||
|
||||
if (!data.contains("input_prefix")) {
|
||||
res_error(res, format_error_response("\"input_prefix\" is required", ERROR_TYPE_INVALID_REQUEST));
|
||||
res_err(res, format_error_response("\"input_prefix\" is required", ERROR_TYPE_INVALID_REQUEST));
|
||||
}
|
||||
|
||||
if (!data.contains("input_suffix")) {
|
||||
res_error(res, format_error_response("\"input_suffix\" is required", ERROR_TYPE_INVALID_REQUEST));
|
||||
res_err(res, format_error_response("\"input_suffix\" is required", ERROR_TYPE_INVALID_REQUEST));
|
||||
}
|
||||
|
||||
if (data.contains("input_extra") && !data.at("input_extra").is_array()) {
|
||||
// input_extra is optional
|
||||
res_error(res, format_error_response("\"input_extra\" must be an array of {\"filename\": string, \"text\": string}", ERROR_TYPE_INVALID_REQUEST));
|
||||
res_err(res, format_error_response("\"input_extra\" must be an array of {\"filename\": string, \"text\": string}", ERROR_TYPE_INVALID_REQUEST));
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -5182,12 +5213,12 @@ int main(int argc, char ** argv) {
|
|||
for (const auto & chunk : input_extra) {
|
||||
// { "text": string, "filename": string }
|
||||
if (!chunk.contains("text") || !chunk.at("text").is_string()) {
|
||||
res_error(res, format_error_response("extra_context chunk must contain a \"text\" field with a string value", ERROR_TYPE_INVALID_REQUEST));
|
||||
res_err(res, format_error_response("extra_context chunk must contain a \"text\" field with a string value", ERROR_TYPE_INVALID_REQUEST));
|
||||
return;
|
||||
}
|
||||
// filename is optional
|
||||
if (chunk.contains("filename") && !chunk.at("filename").is_string()) {
|
||||
res_error(res, format_error_response("extra_context chunk's \"filename\" field must be a string", ERROR_TYPE_INVALID_REQUEST));
|
||||
res_err(res, format_error_response("extra_context chunk's \"filename\" field must be a string", ERROR_TYPE_INVALID_REQUEST));
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
|
@ -5238,7 +5269,7 @@ int main(int argc, char ** argv) {
|
|||
};
|
||||
|
||||
// same with handle_chat_completions, but without inference part
|
||||
const auto handle_apply_template = [&ctx_server, &res_ok](const httplib::Request & req, httplib::Response & res) {
|
||||
const auto handle_apply_template = [&ctx_server](const httplib::Request & req, httplib::Response & res) {
|
||||
auto body = json::parse(req.body);
|
||||
std::vector<raw_buffer> files; // dummy, unused
|
||||
json data = oaicompat_chat_params_parse(
|
||||
|
|
@ -5248,7 +5279,7 @@ int main(int argc, char ** argv) {
|
|||
res_ok(res, {{ "prompt", std::move(data.at("prompt")) }});
|
||||
};
|
||||
|
||||
const auto handle_models = [¶ms, &ctx_server, &state, &res_ok](const httplib::Request &, httplib::Response & res) {
|
||||
const auto handle_models = [¶ms, &ctx_server, &state](const httplib::Request &, httplib::Response & res) {
|
||||
server_state current_state = state.load();
|
||||
json model_meta = nullptr;
|
||||
if (current_state == SERVER_STATE_READY) {
|
||||
|
|
@ -5293,7 +5324,7 @@ int main(int argc, char ** argv) {
|
|||
res_ok(res, models);
|
||||
};
|
||||
|
||||
const auto handle_tokenize = [&ctx_server, &res_ok](const httplib::Request & req, httplib::Response & res) {
|
||||
const auto handle_tokenize = [&ctx_server](const httplib::Request & req, httplib::Response & res) {
|
||||
const json body = json::parse(req.body);
|
||||
|
||||
json tokens_response = json::array();
|
||||
|
|
@ -5334,7 +5365,7 @@ int main(int argc, char ** argv) {
|
|||
res_ok(res, data);
|
||||
};
|
||||
|
||||
const auto handle_detokenize = [&ctx_server, &res_ok](const httplib::Request & req, httplib::Response & res) {
|
||||
const auto handle_detokenize = [&ctx_server](const httplib::Request & req, httplib::Response & res) {
|
||||
const json body = json::parse(req.body);
|
||||
|
||||
std::string content;
|
||||
|
|
@ -5347,14 +5378,14 @@ int main(int argc, char ** argv) {
|
|||
res_ok(res, data);
|
||||
};
|
||||
|
||||
const auto handle_embeddings_impl = [&ctx_server, &res_error, &res_ok](const httplib::Request & req, httplib::Response & res, oaicompat_type oaicompat) {
|
||||
const auto handle_embeddings_impl = [&ctx_server](const httplib::Request & req, httplib::Response & res, oaicompat_type oaicompat) {
|
||||
if (!ctx_server.params_base.embedding) {
|
||||
res_error(res, format_error_response("This server does not support embeddings. Start it with `--embeddings`", ERROR_TYPE_NOT_SUPPORTED));
|
||||
res_err(res, format_error_response("This server does not support embeddings. Start it with `--embeddings`", ERROR_TYPE_NOT_SUPPORTED));
|
||||
return;
|
||||
}
|
||||
|
||||
if (oaicompat != OAICOMPAT_TYPE_NONE && llama_pooling_type(ctx_server.ctx) == LLAMA_POOLING_TYPE_NONE) {
|
||||
res_error(res, format_error_response("Pooling type 'none' is not OAI compatible. Please use a different pooling type", ERROR_TYPE_INVALID_REQUEST));
|
||||
res_err(res, format_error_response("Pooling type 'none' is not OAI compatible. Please use a different pooling type", ERROR_TYPE_INVALID_REQUEST));
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -5368,7 +5399,7 @@ int main(int argc, char ** argv) {
|
|||
oaicompat = OAICOMPAT_TYPE_NONE; // "content" field is not OAI compatible
|
||||
prompt = body.at("content");
|
||||
} else {
|
||||
res_error(res, format_error_response("\"input\" or \"content\" must be provided", ERROR_TYPE_INVALID_REQUEST));
|
||||
res_err(res, format_error_response("\"input\" or \"content\" must be provided", ERROR_TYPE_INVALID_REQUEST));
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -5378,7 +5409,7 @@ int main(int argc, char ** argv) {
|
|||
if (format == "base64") {
|
||||
use_base64 = true;
|
||||
} else if (format != "float") {
|
||||
res_error(res, format_error_response("The format to return the embeddings in. Can be either float or base64", ERROR_TYPE_INVALID_REQUEST));
|
||||
res_err(res, format_error_response("The format to return the embeddings in. Can be either float or base64", ERROR_TYPE_INVALID_REQUEST));
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
|
@ -5387,7 +5418,7 @@ int main(int argc, char ** argv) {
|
|||
for (const auto & tokens : tokenized_prompts) {
|
||||
// this check is necessary for models that do not add BOS token to the input
|
||||
if (tokens.empty()) {
|
||||
res_error(res, format_error_response("Input content cannot be empty", ERROR_TYPE_INVALID_REQUEST));
|
||||
res_err(res, format_error_response("Input content cannot be empty", ERROR_TYPE_INVALID_REQUEST));
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
|
@ -5402,8 +5433,7 @@ int main(int argc, char ** argv) {
|
|||
|
||||
// create and queue the task
|
||||
json responses = json::array();
|
||||
bool error = false;
|
||||
std::unordered_set<int> task_ids;
|
||||
server_response_reader rd(ctx_server);
|
||||
{
|
||||
std::vector<server_task> tasks;
|
||||
for (size_t i = 0; i < tokenized_prompts.size(); i++) {
|
||||
|
|
@ -5419,27 +5449,23 @@ int main(int argc, char ** argv) {
|
|||
|
||||
tasks.push_back(std::move(task));
|
||||
}
|
||||
|
||||
task_ids = server_task::get_list_id(tasks);
|
||||
ctx_server.queue_results.add_waiting_tasks(tasks);
|
||||
ctx_server.queue_tasks.post(std::move(tasks));
|
||||
rd.post_tasks(std::move(tasks));
|
||||
}
|
||||
|
||||
// get the result
|
||||
ctx_server.receive_multi_results(task_ids, [&](std::vector<server_task_result_ptr> & results) {
|
||||
for (auto & res : results) {
|
||||
// wait for the results
|
||||
auto all_results = rd.wait_for_all(req.is_connection_closed);
|
||||
|
||||
// collect results
|
||||
if (all_results.is_terminated) {
|
||||
return; // connection is closed
|
||||
} else if (all_results.error) {
|
||||
res_err(res, all_results.error->to_json());
|
||||
return;
|
||||
} else {
|
||||
for (auto & res : all_results.results) {
|
||||
GGML_ASSERT(dynamic_cast<server_task_result_embd*>(res.get()) != nullptr);
|
||||
responses.push_back(res->to_json());
|
||||
}
|
||||
}, [&](const json & error_data) {
|
||||
res_error(res, error_data);
|
||||
error = true;
|
||||
}, req.is_connection_closed);
|
||||
|
||||
ctx_server.queue_results.remove_waiting_task_ids(task_ids);
|
||||
|
||||
if (error) {
|
||||
return;
|
||||
}
|
||||
|
||||
// write JSON response
|
||||
|
|
@ -5457,9 +5483,9 @@ int main(int argc, char ** argv) {
|
|||
handle_embeddings_impl(req, res, OAICOMPAT_TYPE_EMBEDDING);
|
||||
};
|
||||
|
||||
const auto handle_rerank = [&ctx_server, &res_error, &res_ok](const httplib::Request & req, httplib::Response & res) {
|
||||
const auto handle_rerank = [&ctx_server](const httplib::Request & req, httplib::Response & res) {
|
||||
if (!ctx_server.params_base.embedding || ctx_server.params_base.pooling_type != LLAMA_POOLING_TYPE_RANK) {
|
||||
res_error(res, format_error_response("This server does not support reranking. Start it with `--reranking`", ERROR_TYPE_NOT_SUPPORTED));
|
||||
res_err(res, format_error_response("This server does not support reranking. Start it with `--reranking`", ERROR_TYPE_NOT_SUPPORTED));
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -5474,18 +5500,18 @@ int main(int argc, char ** argv) {
|
|||
if (body.count("query") == 1) {
|
||||
query = body.at("query");
|
||||
if (!query.is_string()) {
|
||||
res_error(res, format_error_response("\"query\" must be a string", ERROR_TYPE_INVALID_REQUEST));
|
||||
res_err(res, format_error_response("\"query\" must be a string", ERROR_TYPE_INVALID_REQUEST));
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
res_error(res, format_error_response("\"query\" must be provided", ERROR_TYPE_INVALID_REQUEST));
|
||||
res_err(res, format_error_response("\"query\" must be provided", ERROR_TYPE_INVALID_REQUEST));
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<std::string> documents = json_value(body, "documents",
|
||||
json_value(body, "texts", std::vector<std::string>()));
|
||||
if (documents.empty()) {
|
||||
res_error(res, format_error_response("\"documents\" must be a non-empty string array", ERROR_TYPE_INVALID_REQUEST));
|
||||
res_err(res, format_error_response("\"documents\" must be a non-empty string array", ERROR_TYPE_INVALID_REQUEST));
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -5493,8 +5519,7 @@ int main(int argc, char ** argv) {
|
|||
|
||||
// create and queue the task
|
||||
json responses = json::array();
|
||||
bool error = false;
|
||||
std::unordered_set<int> task_ids;
|
||||
server_response_reader rd(ctx_server);
|
||||
{
|
||||
std::vector<server_task> tasks;
|
||||
tasks.reserve(documents.size());
|
||||
|
|
@ -5506,24 +5531,23 @@ int main(int argc, char ** argv) {
|
|||
task.tokens = std::move(tmp);
|
||||
tasks.push_back(std::move(task));
|
||||
}
|
||||
|
||||
task_ids = server_task::get_list_id(tasks);
|
||||
ctx_server.queue_results.add_waiting_tasks(tasks);
|
||||
ctx_server.queue_tasks.post(std::move(tasks));
|
||||
rd.post_tasks(std::move(tasks));
|
||||
}
|
||||
|
||||
ctx_server.receive_multi_results(task_ids, [&](std::vector<server_task_result_ptr> & results) {
|
||||
for (auto & res : results) {
|
||||
// wait for the results
|
||||
auto all_results = rd.wait_for_all(req.is_connection_closed);
|
||||
|
||||
// collect results
|
||||
if (all_results.is_terminated) {
|
||||
return; // connection is closed
|
||||
} else if (all_results.error) {
|
||||
res_err(res, all_results.error->to_json());
|
||||
return;
|
||||
} else {
|
||||
for (auto & res : all_results.results) {
|
||||
GGML_ASSERT(dynamic_cast<server_task_result_rerank*>(res.get()) != nullptr);
|
||||
responses.push_back(res->to_json());
|
||||
}
|
||||
}, [&](const json & error_data) {
|
||||
res_error(res, error_data);
|
||||
error = true;
|
||||
}, req.is_connection_closed);
|
||||
|
||||
if (error) {
|
||||
return;
|
||||
}
|
||||
|
||||
// write JSON response
|
||||
|
|
@ -5570,7 +5594,7 @@ int main(int argc, char ** argv) {
|
|||
const auto handle_lora_adapters_apply = [&](const httplib::Request & req, httplib::Response & res) {
|
||||
const json body = json::parse(req.body);
|
||||
if (!body.is_array()) {
|
||||
res_error(res, format_error_response("Request body must be an array", ERROR_TYPE_INVALID_REQUEST));
|
||||
res_err(res, format_error_response("Request body must be an array", ERROR_TYPE_INVALID_REQUEST));
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -5588,7 +5612,7 @@ int main(int argc, char ** argv) {
|
|||
ctx_server.queue_results.remove_waiting_task_id(task_id);
|
||||
|
||||
if (result->is_error()) {
|
||||
res_error(res, result->to_json());
|
||||
res_err(res, result->to_json());
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -453,15 +453,29 @@ static std::string tokens_to_output_formatted_string(const llama_context * ctx,
|
|||
return out;
|
||||
}
|
||||
|
||||
// note: if data is a json array, it will be sent as multiple events, one per item
|
||||
static bool server_sent_event(httplib::DataSink & sink, const json & data) {
|
||||
const std::string str =
|
||||
"data: " +
|
||||
data.dump(-1, ' ', false, json::error_handler_t::replace) +
|
||||
"\n\n"; // required by RFC 8895 - A message is terminated by a blank line (two line terminators in a row).
|
||||
static auto send_single = [](httplib::DataSink & sink, const json & data) -> bool {
|
||||
const std::string str =
|
||||
"data: " +
|
||||
data.dump(-1, ' ', false, json::error_handler_t::replace) +
|
||||
"\n\n"; // required by RFC 8895 - A message is terminated by a blank line (two line terminators in a row).
|
||||
|
||||
LOG_DBG("data stream, to_send: %s", str.c_str());
|
||||
LOG_DBG("data stream, to_send: %s", str.c_str());
|
||||
return sink.write(str.c_str(), str.size());
|
||||
};
|
||||
|
||||
return sink.write(str.c_str(), str.size());
|
||||
if (data.is_array()) {
|
||||
for (const auto & item : data) {
|
||||
if (!send_single(sink, item)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
return send_single(sink, data);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
//
|
||||
|
|
|
|||
|
|
@ -11,8 +11,16 @@ const preview: Preview = {
|
|||
date: /Date$/i
|
||||
}
|
||||
},
|
||||
|
||||
backgrounds: {
|
||||
disable: true
|
||||
},
|
||||
|
||||
a11y: {
|
||||
// 'todo' - show a11y violations in the test UI only
|
||||
// 'error' - fail CI on a11y violations
|
||||
// 'off' - skip a11y checks entirely
|
||||
test: 'todo'
|
||||
}
|
||||
},
|
||||
decorators: [
|
||||
|
|
|
|||
|
|
@ -1,8 +1,9 @@
|
|||
import * as a11yAddonAnnotations from '@storybook/addon-a11y/preview';
|
||||
import { setProjectAnnotations } from '@storybook/sveltekit';
|
||||
import * as previewAnnotations from './preview';
|
||||
import { beforeAll } from 'vitest';
|
||||
|
||||
const project = setProjectAnnotations([previewAnnotations]);
|
||||
const project = setProjectAnnotations([a11yAddonAnnotations, previewAnnotations]);
|
||||
|
||||
beforeAll(async () => {
|
||||
if (project.beforeAll) {
|
||||
|
|
|
|||
|
|
@ -22,20 +22,20 @@
|
|||
"unist-util-visit": "^5.0.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@chromatic-com/storybook": "^4.0.1",
|
||||
"@chromatic-com/storybook": "^4.1.2",
|
||||
"@eslint/compat": "^1.2.5",
|
||||
"@eslint/js": "^9.18.0",
|
||||
"@internationalized/date": "^3.8.2",
|
||||
"@lucide/svelte": "^0.515.0",
|
||||
"@playwright/test": "^1.49.1",
|
||||
"@storybook/addon-a11y": "^9.0.17",
|
||||
"@storybook/addon-docs": "^9.0.17",
|
||||
"@storybook/addon-svelte-csf": "^5.0.7",
|
||||
"@storybook/addon-vitest": "^9.0.17",
|
||||
"@storybook/sveltekit": "^9.0.17",
|
||||
"@sveltejs/adapter-static": "^3.0.8",
|
||||
"@sveltejs/kit": "^2.22.0",
|
||||
"@sveltejs/vite-plugin-svelte": "^6.0.0",
|
||||
"@storybook/addon-a11y": "^10.0.7",
|
||||
"@storybook/addon-docs": "^10.0.7",
|
||||
"@storybook/addon-svelte-csf": "^5.0.10",
|
||||
"@storybook/addon-vitest": "^10.0.7",
|
||||
"@storybook/sveltekit": "^10.0.7",
|
||||
"@sveltejs/adapter-static": "^3.0.10",
|
||||
"@sveltejs/kit": "^2.48.4",
|
||||
"@sveltejs/vite-plugin-svelte": "^6.2.1",
|
||||
"@tailwindcss/forms": "^0.5.9",
|
||||
"@tailwindcss/typography": "^0.5.15",
|
||||
"@tailwindcss/vite": "^4.0.0",
|
||||
|
|
@ -46,21 +46,21 @@
|
|||
"dexie": "^4.0.11",
|
||||
"eslint": "^9.18.0",
|
||||
"eslint-config-prettier": "^10.0.1",
|
||||
"eslint-plugin-storybook": "^9.0.17",
|
||||
"eslint-plugin-storybook": "^10.0.7",
|
||||
"eslint-plugin-svelte": "^3.0.0",
|
||||
"fflate": "^0.8.2",
|
||||
"globals": "^16.0.0",
|
||||
"http-server": "^14.1.1",
|
||||
"mdast": "^3.0.0",
|
||||
"mdsvex": "^0.12.3",
|
||||
"playwright": "^1.53.0",
|
||||
"playwright": "^1.56.1",
|
||||
"prettier": "^3.4.2",
|
||||
"prettier-plugin-svelte": "^3.3.3",
|
||||
"prettier-plugin-tailwindcss": "^0.6.11",
|
||||
"rehype-katex": "^7.0.1",
|
||||
"remark-math": "^6.0.0",
|
||||
"sass": "^1.93.3",
|
||||
"storybook": "^9.0.17",
|
||||
"storybook": "^10.0.7",
|
||||
"svelte": "^5.0.0",
|
||||
"svelte-check": "^4.0.0",
|
||||
"tailwind-merge": "^3.3.1",
|
||||
|
|
@ -71,7 +71,7 @@
|
|||
"typescript-eslint": "^8.20.0",
|
||||
"unified": "^11.0.5",
|
||||
"uuid": "^13.0.0",
|
||||
"vite": "^7.0.4",
|
||||
"vite": "^7.2.2",
|
||||
"vite-plugin-devtools-json": "^0.2.0",
|
||||
"vitest": "^3.2.3",
|
||||
"vitest-browser-svelte": "^0.1.0"
|
||||
|
|
@ -133,9 +133,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/@chromatic-com/storybook": {
|
||||
"version": "4.0.1",
|
||||
"resolved": "https://registry.npmjs.org/@chromatic-com/storybook/-/storybook-4.0.1.tgz",
|
||||
"integrity": "sha512-GQXe5lyZl3yLewLJQyFXEpOp2h+mfN2bPrzYaOFNCJjO4Js9deKbRHTOSaiP2FRwZqDLdQwy2+SEGeXPZ94yYw==",
|
||||
"version": "4.1.2",
|
||||
"resolved": "https://registry.npmjs.org/@chromatic-com/storybook/-/storybook-4.1.2.tgz",
|
||||
"integrity": "sha512-QAWGtHwib0qsP5CcO64aJCF75zpFgpKK3jNpxILzQiPK3sVo4EmnVGJVdwcZWpWrGdH8E4YkncGoitw4EXzKMg==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
|
|
@ -150,7 +150,7 @@
|
|||
"yarn": ">=1.22.18"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"storybook": "^0.0.0-0 || ^9.0.0 || ^9.1.0-0"
|
||||
"storybook": "^0.0.0-0 || ^9.0.0 || ^9.1.0-0 || ^9.2.0-0 || ^10.0.0-0 || ^10.1.0-0 || ^10.2.0-0 || ^10.3.0-0"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/aix-ppc64": {
|
||||
|
|
@ -894,6 +894,17 @@
|
|||
"@jridgewell/trace-mapping": "^0.3.24"
|
||||
}
|
||||
},
|
||||
"node_modules/@jridgewell/remapping": {
|
||||
"version": "2.3.5",
|
||||
"resolved": "https://registry.npmjs.org/@jridgewell/remapping/-/remapping-2.3.5.tgz",
|
||||
"integrity": "sha512-LI9u/+laYG4Ds1TDKSJW2YPrIlcVYOwi2fUC6xB43lueCjgxV4lffOCZCtYFiH6TNOX+tQKXx97T4IKHbhyHEQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@jridgewell/gen-mapping": "^0.3.5",
|
||||
"@jridgewell/trace-mapping": "^0.3.24"
|
||||
}
|
||||
},
|
||||
"node_modules/@jridgewell/resolve-uri": {
|
||||
"version": "3.1.2",
|
||||
"resolved": "https://registry.npmjs.org/@jridgewell/resolve-uri/-/resolve-uri-3.1.2.tgz",
|
||||
|
|
@ -1502,13 +1513,13 @@
|
|||
}
|
||||
},
|
||||
"node_modules/@playwright/test": {
|
||||
"version": "1.54.1",
|
||||
"resolved": "https://registry.npmjs.org/@playwright/test/-/test-1.54.1.tgz",
|
||||
"integrity": "sha512-FS8hQ12acieG2dYSksmLOF7BNxnVf2afRJdCuM1eMSxj6QTSE6G4InGF7oApGgDb65MX7AwMVlIkpru0yZA4Xw==",
|
||||
"version": "1.56.1",
|
||||
"resolved": "https://registry.npmjs.org/@playwright/test/-/test-1.56.1.tgz",
|
||||
"integrity": "sha512-vSMYtL/zOcFpvJCW71Q/OEGQb7KYBPAdKh35WNSkaZA75JlAO8ED8UN6GUNTm3drWomcbcqRPFqQbLae8yBTdg==",
|
||||
"dev": true,
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"playwright": "1.54.1"
|
||||
"playwright": "1.56.1"
|
||||
},
|
||||
"bin": {
|
||||
"playwright": "cli.js"
|
||||
|
|
@ -1812,9 +1823,9 @@
|
|||
"license": "MIT"
|
||||
},
|
||||
"node_modules/@storybook/addon-a11y": {
|
||||
"version": "9.0.17",
|
||||
"resolved": "https://registry.npmjs.org/@storybook/addon-a11y/-/addon-a11y-9.0.17.tgz",
|
||||
"integrity": "sha512-9cXNK3q/atx3hwJAt9HkJbd9vUxCXfKKiNNuSACbf8h9/j6u3jktulKOf6Xjc3B8lwn6ZpdK/x1HHZN2kTqsvg==",
|
||||
"version": "10.0.7",
|
||||
"resolved": "https://registry.npmjs.org/@storybook/addon-a11y/-/addon-a11y-10.0.7.tgz",
|
||||
"integrity": "sha512-JsYPpZ/n67/2bI1XJeyrAWHHQkHemPkPHjCA0tAUnMz1Shlo/LV2q1Ahgpxoihx4strbHwZz71bcS4MqkHBduA==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
|
|
@ -1826,20 +1837,20 @@
|
|||
"url": "https://opencollective.com/storybook"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"storybook": "^9.0.17"
|
||||
"storybook": "^10.0.7"
|
||||
}
|
||||
},
|
||||
"node_modules/@storybook/addon-docs": {
|
||||
"version": "9.0.17",
|
||||
"resolved": "https://registry.npmjs.org/@storybook/addon-docs/-/addon-docs-9.0.17.tgz",
|
||||
"integrity": "sha512-LOX/kKgQGnyulrqZHsvf77+ZoH/nSUaplGr5hvZglW/U6ak6fO9seJyXAzVKEnC6p+F8n02kFBZbi3s+znQhSg==",
|
||||
"version": "10.0.7",
|
||||
"resolved": "https://registry.npmjs.org/@storybook/addon-docs/-/addon-docs-10.0.7.tgz",
|
||||
"integrity": "sha512-qQQMoeYZC4W+/8ubfOZiTrE8nYC/f4wWP1uq4peRyDy1N2nIN9SwhyxwMn0m3VpeGmRBga5dLvJY9ko6SnJekg==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@mdx-js/react": "^3.0.0",
|
||||
"@storybook/csf-plugin": "9.0.17",
|
||||
"@storybook/icons": "^1.2.12",
|
||||
"@storybook/react-dom-shim": "9.0.17",
|
||||
"@storybook/csf-plugin": "10.0.7",
|
||||
"@storybook/icons": "^1.6.0",
|
||||
"@storybook/react-dom-shim": "10.0.7",
|
||||
"react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0",
|
||||
"react-dom": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0",
|
||||
"ts-dedent": "^2.0.0"
|
||||
|
|
@ -1849,13 +1860,13 @@
|
|||
"url": "https://opencollective.com/storybook"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"storybook": "^9.0.17"
|
||||
"storybook": "^10.0.7"
|
||||
}
|
||||
},
|
||||
"node_modules/@storybook/addon-svelte-csf": {
|
||||
"version": "5.0.7",
|
||||
"resolved": "https://registry.npmjs.org/@storybook/addon-svelte-csf/-/addon-svelte-csf-5.0.7.tgz",
|
||||
"integrity": "sha512-6Zmy5HjOlrrG6OoKRTGDr9LR6zRK4/Sa7raFzQRKHGASgMlfKsMdNTNO0sxnMUWCu2JMS6HsuoLtB3Ma8SlYtg==",
|
||||
"version": "5.0.10",
|
||||
"resolved": "https://registry.npmjs.org/@storybook/addon-svelte-csf/-/addon-svelte-csf-5.0.10.tgz",
|
||||
"integrity": "sha512-poSvTS7VdaQ42ZoqW5e4+2Hv1iLO0mekH9fwn/QuBNse48R4WlTyR8XFbHRTfatl9gdc9ZYC4uWzazrmV6zGIA==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
|
|
@ -1868,22 +1879,22 @@
|
|||
"zimmerframe": "^1.1.2"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@storybook/svelte": "^0.0.0-0 || ^8.2.0 || ^9.0.0 || ^9.1.0-0",
|
||||
"@storybook/svelte": "^0.0.0-0 || ^8.2.0 || ^9.0.0 || ^9.1.0-0 || ^10.0.0-0",
|
||||
"@sveltejs/vite-plugin-svelte": "^4.0.0 || ^5.0.0 || ^6.0.0",
|
||||
"storybook": "^0.0.0-0 || ^8.2.0 || ^9.0.0 || ^9.1.0-0",
|
||||
"storybook": "^0.0.0-0 || ^8.2.0 || ^9.0.0 || ^9.1.0-0 || ^10.0.0-0",
|
||||
"svelte": "^5.0.0",
|
||||
"vite": "^5.0.0 || ^6.0.0 || ^7.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@storybook/addon-vitest": {
|
||||
"version": "9.0.17",
|
||||
"resolved": "https://registry.npmjs.org/@storybook/addon-vitest/-/addon-vitest-9.0.17.tgz",
|
||||
"integrity": "sha512-eogqcGbACR1sTedBSE2SP/4QV1ruicHYEhYjBtoPIjvYgymN1g5KSuQNysLx4f0SvAzczrcNjX2WVVLX2DVyzA==",
|
||||
"version": "10.0.7",
|
||||
"resolved": "https://registry.npmjs.org/@storybook/addon-vitest/-/addon-vitest-10.0.7.tgz",
|
||||
"integrity": "sha512-i6v/mAl+elrUxb+1f4NdnM17t/fg+KGJWL1U9quflXTd3KiLY0xJB4LwNP6yYo7Imc5NIO2fRkJbGvNqLBRe2Q==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@storybook/global": "^5.0.0",
|
||||
"@storybook/icons": "^1.4.0",
|
||||
"@storybook/icons": "^1.6.0",
|
||||
"prompts": "^2.4.0",
|
||||
"ts-dedent": "^2.2.0"
|
||||
},
|
||||
|
|
@ -1892,15 +1903,19 @@
|
|||
"url": "https://opencollective.com/storybook"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@vitest/browser": "^3.0.0",
|
||||
"@vitest/runner": "^3.0.0",
|
||||
"storybook": "^9.0.17",
|
||||
"vitest": "^3.0.0"
|
||||
"@vitest/browser": "^3.0.0 || ^4.0.0",
|
||||
"@vitest/browser-playwright": "^4.0.0",
|
||||
"@vitest/runner": "^3.0.0 || ^4.0.0",
|
||||
"storybook": "^10.0.7",
|
||||
"vitest": "^3.0.0 || ^4.0.0"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"@vitest/browser": {
|
||||
"optional": true
|
||||
},
|
||||
"@vitest/browser-playwright": {
|
||||
"optional": true
|
||||
},
|
||||
"@vitest/runner": {
|
||||
"optional": true
|
||||
},
|
||||
|
|
@ -1910,13 +1925,13 @@
|
|||
}
|
||||
},
|
||||
"node_modules/@storybook/builder-vite": {
|
||||
"version": "9.0.17",
|
||||
"resolved": "https://registry.npmjs.org/@storybook/builder-vite/-/builder-vite-9.0.17.tgz",
|
||||
"integrity": "sha512-lyuvgGhb0NaVk1tdB4xwzky6+YXQfxlxfNQqENYZ9uYQZdPfErMa4ZTXVQTV+CQHAa2NL+p/dG2JPAeu39e9UA==",
|
||||
"version": "10.0.7",
|
||||
"resolved": "https://registry.npmjs.org/@storybook/builder-vite/-/builder-vite-10.0.7.tgz",
|
||||
"integrity": "sha512-wk2TAoUY5+9t78GWVBndu9rEo9lo6Ec3SRrLT4VpIlcS2GPK+5f26UC2uvIBwOF/N7JrUUKq/zWDZ3m+do9QDg==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@storybook/csf-plugin": "9.0.17",
|
||||
"@storybook/csf-plugin": "10.0.7",
|
||||
"ts-dedent": "^2.0.0"
|
||||
},
|
||||
"funding": {
|
||||
|
|
@ -1924,7 +1939,7 @@
|
|||
"url": "https://opencollective.com/storybook"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"storybook": "^9.0.17",
|
||||
"storybook": "^10.0.7",
|
||||
"vite": "^5.0.0 || ^6.0.0 || ^7.0.0"
|
||||
}
|
||||
},
|
||||
|
|
@ -1939,20 +1954,38 @@
|
|||
}
|
||||
},
|
||||
"node_modules/@storybook/csf-plugin": {
|
||||
"version": "9.0.17",
|
||||
"resolved": "https://registry.npmjs.org/@storybook/csf-plugin/-/csf-plugin-9.0.17.tgz",
|
||||
"integrity": "sha512-6Q4eo1ObrLlsnB6bIt6T8+45XAb4to2pQGNrI7QPkLQRLrZinrJcNbLY7AGkyIoCOEsEbq08n09/nClQUbu8HA==",
|
||||
"version": "10.0.7",
|
||||
"resolved": "https://registry.npmjs.org/@storybook/csf-plugin/-/csf-plugin-10.0.7.tgz",
|
||||
"integrity": "sha512-YaYYlCyJBwxaMk7yREOdz+9MDSgxIYGdeJ9EIq/bUndmkoj9SRo1P9/0lC5dseWQoiGy4T3PbZiWruD8uM5m3g==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"unplugin": "^1.3.1"
|
||||
"unplugin": "^2.3.5"
|
||||
},
|
||||
"funding": {
|
||||
"type": "opencollective",
|
||||
"url": "https://opencollective.com/storybook"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"storybook": "^9.0.17"
|
||||
"esbuild": "*",
|
||||
"rollup": "*",
|
||||
"storybook": "^10.0.7",
|
||||
"vite": "*",
|
||||
"webpack": "*"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"esbuild": {
|
||||
"optional": true
|
||||
},
|
||||
"rollup": {
|
||||
"optional": true
|
||||
},
|
||||
"vite": {
|
||||
"optional": true
|
||||
},
|
||||
"webpack": {
|
||||
"optional": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/@storybook/global": {
|
||||
|
|
@ -1963,9 +1996,9 @@
|
|||
"license": "MIT"
|
||||
},
|
||||
"node_modules/@storybook/icons": {
|
||||
"version": "1.4.0",
|
||||
"resolved": "https://registry.npmjs.org/@storybook/icons/-/icons-1.4.0.tgz",
|
||||
"integrity": "sha512-Td73IeJxOyalzvjQL+JXx72jlIYHgs+REaHiREOqfpo3A2AYYG71AUbcv+lg7mEDIweKVCxsMQ0UKo634c8XeA==",
|
||||
"version": "1.6.0",
|
||||
"resolved": "https://registry.npmjs.org/@storybook/icons/-/icons-1.6.0.tgz",
|
||||
"integrity": "sha512-hcFZIjW8yQz8O8//2WTIXylm5Xsgc+lW9ISLgUk1xGmptIJQRdlhVIXCpSyLrQaaRiyhQRaVg7l3BD9S216BHw==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
|
|
@ -1977,9 +2010,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/@storybook/react-dom-shim": {
|
||||
"version": "9.0.17",
|
||||
"resolved": "https://registry.npmjs.org/@storybook/react-dom-shim/-/react-dom-shim-9.0.17.tgz",
|
||||
"integrity": "sha512-ak/x/m6MDDxdE6rCDymTltaiQF3oiKrPHSwfM+YPgQR6MVmzTTs4+qaPfeev7FZEHq23IkfDMTmSTTJtX7Vs9A==",
|
||||
"version": "10.0.7",
|
||||
"resolved": "https://registry.npmjs.org/@storybook/react-dom-shim/-/react-dom-shim-10.0.7.tgz",
|
||||
"integrity": "sha512-bp4OnMtZGwPJQDqNRi4K5iibLbZ2TZZMkWW7oSw5jjPFpGSreSjCe8LH9yj/lDnK8Ox9bGMCBFE5RV5XuML29w==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"funding": {
|
||||
|
|
@ -1987,126 +2020,75 @@
|
|||
"url": "https://opencollective.com/storybook"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0-beta",
|
||||
"react-dom": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0-beta",
|
||||
"storybook": "^9.0.17"
|
||||
"react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0",
|
||||
"react-dom": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0",
|
||||
"storybook": "^10.0.7"
|
||||
}
|
||||
},
|
||||
"node_modules/@storybook/svelte": {
|
||||
"version": "9.0.17",
|
||||
"resolved": "https://registry.npmjs.org/@storybook/svelte/-/svelte-9.0.17.tgz",
|
||||
"integrity": "sha512-RwOswdq7S3+ZOuoM/oRrcmlsKdjcd/3wMHbuirzYoAhdwsjubSuRepMV64O9RnlXd3x7rZw4fXpq1M/SVo5XiQ==",
|
||||
"version": "10.0.7",
|
||||
"resolved": "https://registry.npmjs.org/@storybook/svelte/-/svelte-10.0.7.tgz",
|
||||
"integrity": "sha512-rO+YQhHucy47Vh67z318pALmd6x+K1Kj30Fb4a6oOEw4xn4zCo9KTmkMWs24c4oduEXD/eJu3badlRmsVXzyfA==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"ts-dedent": "^2.0.0",
|
||||
"type-fest": "~2.19"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=20.0.0"
|
||||
},
|
||||
"funding": {
|
||||
"type": "opencollective",
|
||||
"url": "https://opencollective.com/storybook"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"storybook": "^9.0.17",
|
||||
"storybook": "^10.0.7",
|
||||
"svelte": "^5.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@storybook/sveltekit": {
|
||||
"version": "9.0.17",
|
||||
"resolved": "https://registry.npmjs.org/@storybook/sveltekit/-/sveltekit-9.0.17.tgz",
|
||||
"integrity": "sha512-CUOATuW5Qk3SjNvmjH+wyx2GCsMF1cvw3gwkujV9kehPebzV20NhgHpbzSoepvwF7+Bj6jl8V6UxiMWk0jJFmA==",
|
||||
"node_modules/@storybook/svelte-vite": {
|
||||
"version": "10.0.7",
|
||||
"resolved": "https://registry.npmjs.org/@storybook/svelte-vite/-/svelte-vite-10.0.7.tgz",
|
||||
"integrity": "sha512-q9/RtrhX1CnznO6AO9MDEy1bsccbGeRxW28FLpgUrztV4IGZ/dFUrFIFurKRyuA3/nFsbtzp1F5jFt3RExmmTw==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@storybook/builder-vite": "9.0.17",
|
||||
"@storybook/svelte": "9.0.17",
|
||||
"@storybook/svelte-vite": "9.0.17"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=20.0.0"
|
||||
},
|
||||
"funding": {
|
||||
"type": "opencollective",
|
||||
"url": "https://opencollective.com/storybook"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"storybook": "^9.0.17",
|
||||
"svelte": "^5.0.0",
|
||||
"vite": "^5.0.0 || ^6.0.0 || ^7.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@storybook/sveltekit/node_modules/@storybook/svelte-vite": {
|
||||
"version": "9.0.17",
|
||||
"resolved": "https://registry.npmjs.org/@storybook/svelte-vite/-/svelte-vite-9.0.17.tgz",
|
||||
"integrity": "sha512-fRIxOZy9IRI6BfL1LgFn+B+IckGOlT1SstD01y9ddO4pVKWih/l+vb44bnZs+Z0faJZbrG/LgfnXTOPj052Z8g==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@storybook/builder-vite": "9.0.17",
|
||||
"@storybook/svelte": "9.0.17",
|
||||
"@storybook/builder-vite": "10.0.7",
|
||||
"@storybook/svelte": "10.0.7",
|
||||
"magic-string": "^0.30.0",
|
||||
"svelte2tsx": "^0.7.35",
|
||||
"svelte2tsx": "^0.7.44",
|
||||
"typescript": "^4.9.4 || ^5.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=20.0.0"
|
||||
"funding": {
|
||||
"type": "opencollective",
|
||||
"url": "https://opencollective.com/storybook"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@sveltejs/vite-plugin-svelte": "^2.0.0 || ^3.0.0 || ^4.0.0 || ^5.0.0 || ^6.0.0",
|
||||
"storybook": "^10.0.7",
|
||||
"svelte": "^5.0.0",
|
||||
"vite": "^5.0.0 || ^6.0.0 || ^7.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@storybook/sveltekit": {
|
||||
"version": "10.0.7",
|
||||
"resolved": "https://registry.npmjs.org/@storybook/sveltekit/-/sveltekit-10.0.7.tgz",
|
||||
"integrity": "sha512-ujTW7PfWvgBrzd7jzaZe9JgjUeM5YvBKm+xru6t7Dr4bdfmkKqlZHPRdXn/sy+fQNyfg6JL2WKy2KIIeA+RvSg==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@storybook/builder-vite": "10.0.7",
|
||||
"@storybook/svelte": "10.0.7",
|
||||
"@storybook/svelte-vite": "10.0.7"
|
||||
},
|
||||
"funding": {
|
||||
"type": "opencollective",
|
||||
"url": "https://opencollective.com/storybook"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@sveltejs/vite-plugin-svelte": "^2.0.0 || ^3.0.0 || ^4.0.0 || ^5.0.0",
|
||||
"storybook": "^9.0.17",
|
||||
"storybook": "^10.0.7",
|
||||
"svelte": "^5.0.0",
|
||||
"vite": "^5.0.0 || ^6.0.0 || ^7.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@storybook/sveltekit/node_modules/@sveltejs/vite-plugin-svelte": {
|
||||
"version": "5.1.1",
|
||||
"resolved": "https://registry.npmjs.org/@sveltejs/vite-plugin-svelte/-/vite-plugin-svelte-5.1.1.tgz",
|
||||
"integrity": "sha512-Y1Cs7hhTc+a5E9Va/xwKlAJoariQyHY+5zBgCZg4PFWNYQ1nMN9sjK1zhw1gK69DuqVP++sht/1GZg1aRwmAXQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"@sveltejs/vite-plugin-svelte-inspector": "^4.0.1",
|
||||
"debug": "^4.4.1",
|
||||
"deepmerge": "^4.3.1",
|
||||
"kleur": "^4.1.5",
|
||||
"magic-string": "^0.30.17",
|
||||
"vitefu": "^1.0.6"
|
||||
},
|
||||
"engines": {
|
||||
"node": "^18.0.0 || ^20.0.0 || >=22"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"svelte": "^5.0.0",
|
||||
"vite": "^6.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@storybook/sveltekit/node_modules/@sveltejs/vite-plugin-svelte/node_modules/@sveltejs/vite-plugin-svelte-inspector": {
|
||||
"version": "4.0.1",
|
||||
"resolved": "https://registry.npmjs.org/@sveltejs/vite-plugin-svelte-inspector/-/vite-plugin-svelte-inspector-4.0.1.tgz",
|
||||
"integrity": "sha512-J/Nmb2Q2y7mck2hyCX4ckVHcR5tu2J+MtBEQqpDrrgELZ2uvraQcK/ioCV61AqkdXFgriksOKIceDcQmqnGhVw==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"debug": "^4.3.7"
|
||||
},
|
||||
"engines": {
|
||||
"node": "^18.0.0 || ^20.0.0 || >=22"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@sveltejs/vite-plugin-svelte": "^5.0.0",
|
||||
"svelte": "^5.0.0",
|
||||
"vite": "^6.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@sveltejs/acorn-typescript": {
|
||||
"version": "1.0.5",
|
||||
"resolved": "https://registry.npmjs.org/@sveltejs/acorn-typescript/-/acorn-typescript-1.0.5.tgz",
|
||||
|
|
@ -2117,9 +2099,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/@sveltejs/adapter-static": {
|
||||
"version": "3.0.9",
|
||||
"resolved": "https://registry.npmjs.org/@sveltejs/adapter-static/-/adapter-static-3.0.9.tgz",
|
||||
"integrity": "sha512-aytHXcMi7lb9ljsWUzXYQ0p5X1z9oWud2olu/EpmH7aCu4m84h7QLvb5Wp+CFirKcwoNnYvYWhyP/L8Vh1ztdw==",
|
||||
"version": "3.0.10",
|
||||
"resolved": "https://registry.npmjs.org/@sveltejs/adapter-static/-/adapter-static-3.0.10.tgz",
|
||||
"integrity": "sha512-7D9lYFWJmB7zxZyTE/qxjksvMqzMuYrrsyh1f4AlZqeZeACPRySjbC3aFiY55wb1tWUaKOQG9PVbm74JcN2Iew==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peerDependencies": {
|
||||
|
|
@ -2127,9 +2109,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/@sveltejs/kit": {
|
||||
"version": "2.37.0",
|
||||
"resolved": "https://registry.npmjs.org/@sveltejs/kit/-/kit-2.37.0.tgz",
|
||||
"integrity": "sha512-xgKtpjQ6Ry4mdShd01ht5AODUsW7+K1iValPDq7QX8zI1hWOKREH9GjG8SRCN5tC4K7UXmMhuQam7gbLByVcnw==",
|
||||
"version": "2.48.4",
|
||||
"resolved": "https://registry.npmjs.org/@sveltejs/kit/-/kit-2.48.4.tgz",
|
||||
"integrity": "sha512-TGFX1pZUt9qqY20Cv5NyYvy0iLWHf2jXi8s+eCGsig7jQMdwZWKUFMR6TbvFNhfDSUpc1sH/Y5EHv20g3HHA3g==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
|
|
@ -2166,16 +2148,15 @@
|
|||
}
|
||||
},
|
||||
"node_modules/@sveltejs/vite-plugin-svelte": {
|
||||
"version": "6.1.0",
|
||||
"resolved": "https://registry.npmjs.org/@sveltejs/vite-plugin-svelte/-/vite-plugin-svelte-6.1.0.tgz",
|
||||
"integrity": "sha512-+U6lz1wvGEG/BvQyL4z/flyNdQ9xDNv5vrh+vWBWTHaebqT0c9RNggpZTo/XSPoHsSCWBlYaTlRX8pZ9GATXCw==",
|
||||
"version": "6.2.1",
|
||||
"resolved": "https://registry.npmjs.org/@sveltejs/vite-plugin-svelte/-/vite-plugin-svelte-6.2.1.tgz",
|
||||
"integrity": "sha512-YZs/OSKOQAQCnJvM/P+F1URotNnYNeU3P2s4oIpzm1uFaqUEqRxUB0g5ejMjEb5Gjb9/PiBI5Ktrq4rUUF8UVQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@sveltejs/vite-plugin-svelte-inspector": "^5.0.0-next.1",
|
||||
"@sveltejs/vite-plugin-svelte-inspector": "^5.0.0",
|
||||
"debug": "^4.4.1",
|
||||
"deepmerge": "^4.3.1",
|
||||
"kleur": "^4.1.5",
|
||||
"magic-string": "^0.30.17",
|
||||
"vitefu": "^1.1.1"
|
||||
},
|
||||
|
|
@ -3361,19 +3342,6 @@
|
|||
"node": ">= 0.8"
|
||||
}
|
||||
},
|
||||
"node_modules/better-opn": {
|
||||
"version": "3.0.2",
|
||||
"resolved": "https://registry.npmjs.org/better-opn/-/better-opn-3.0.2.tgz",
|
||||
"integrity": "sha512-aVNobHnJqLiUelTaHat9DZ1qM2w0C0Eym4LPI/3JxOnSokGVdsl1T1kN7TFvsEAD8G47A6VKQ0TVHqbBnYMJlQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"open": "^8.0.4"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=12.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/bits-ui": {
|
||||
"version": "2.8.11",
|
||||
"resolved": "https://registry.npmjs.org/bits-ui/-/bits-ui-2.8.11.tgz",
|
||||
|
|
@ -3844,16 +3812,6 @@
|
|||
"node": ">=0.10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/define-lazy-prop": {
|
||||
"version": "2.0.0",
|
||||
"resolved": "https://registry.npmjs.org/define-lazy-prop/-/define-lazy-prop-2.0.0.tgz",
|
||||
"integrity": "sha512-Ds09qNh8yw3khSjiJjiUInaGX9xlqZDY7JVryGxdxV7NPeuqQfplOpQ66yJFZut3jLa5zOwkXw1g9EI2uKh4Og==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=8"
|
||||
}
|
||||
},
|
||||
"node_modules/dequal": {
|
||||
"version": "2.0.3",
|
||||
"resolved": "https://registry.npmjs.org/dequal/-/dequal-2.0.3.tgz",
|
||||
|
|
@ -4042,19 +4000,6 @@
|
|||
"@esbuild/win32-x64": "0.25.8"
|
||||
}
|
||||
},
|
||||
"node_modules/esbuild-register": {
|
||||
"version": "3.6.0",
|
||||
"resolved": "https://registry.npmjs.org/esbuild-register/-/esbuild-register-3.6.0.tgz",
|
||||
"integrity": "sha512-H2/S7Pm8a9CL1uhp9OvjwrBh5Pvx0H8qVOxNu8Wed9Y7qv56MPtq+GGM8RJpq6glYJn9Wspr8uw7l55uyinNeg==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"debug": "^4.3.4"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"esbuild": ">=0.12 <1"
|
||||
}
|
||||
},
|
||||
"node_modules/escape-string-regexp": {
|
||||
"version": "4.0.0",
|
||||
"resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-4.0.0.tgz",
|
||||
|
|
@ -4146,20 +4091,17 @@
|
|||
}
|
||||
},
|
||||
"node_modules/eslint-plugin-storybook": {
|
||||
"version": "9.0.17",
|
||||
"resolved": "https://registry.npmjs.org/eslint-plugin-storybook/-/eslint-plugin-storybook-9.0.17.tgz",
|
||||
"integrity": "sha512-IuTdlwCEwoDNobdygRCxNhlKXHmsDfPtPvHGcsY35x2Bx8KItrjfekO19gJrjc1VT2CMfcZMYF8OBKaxHELupw==",
|
||||
"version": "10.0.7",
|
||||
"resolved": "https://registry.npmjs.org/eslint-plugin-storybook/-/eslint-plugin-storybook-10.0.7.tgz",
|
||||
"integrity": "sha512-qOQq9KdT1jsBgT3qsxUH2n67aj1WR8D1XCoER8Q6yuVlS5TimNwk1mZeWkXVf/o4RQQT6flT2y5cG2gPLZPvJA==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@typescript-eslint/utils": "^8.8.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=20.0.0"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"eslint": ">=8",
|
||||
"storybook": "^9.0.17"
|
||||
"storybook": "^10.0.7"
|
||||
}
|
||||
},
|
||||
"node_modules/eslint-plugin-svelte": {
|
||||
|
|
@ -4405,11 +4347,14 @@
|
|||
}
|
||||
},
|
||||
"node_modules/fdir": {
|
||||
"version": "6.4.6",
|
||||
"resolved": "https://registry.npmjs.org/fdir/-/fdir-6.4.6.tgz",
|
||||
"integrity": "sha512-hiFoqpyZcfNm1yc4u8oWCf9A2c4D3QjCrks3zmoVKVxpQRzmPNar1hUJcBG2RQHvEVGDN+Jm81ZheVLAQMK6+w==",
|
||||
"version": "6.5.0",
|
||||
"resolved": "https://registry.npmjs.org/fdir/-/fdir-6.5.0.tgz",
|
||||
"integrity": "sha512-tIbYtZbucOs0BRGqPJkshJUYdL+SDH7dVM8gjy+ERp3WAUjLEFJE+02kanyHtwjWOnwrKYBiwAmM0p4kLJAnXg==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=12.0.0"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"picomatch": "^3 || ^4"
|
||||
},
|
||||
|
|
@ -5072,22 +5017,6 @@
|
|||
"integrity": "sha512-0aO8FkhNZlj/ZIbNi7Lxxr12obT7cL1moPfE4tg1LkX7LlLfC6DeX4l2ZEud1ukP9jNQyNnfzQVqwbwmAATY4Q==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/is-docker": {
|
||||
"version": "2.2.1",
|
||||
"resolved": "https://registry.npmjs.org/is-docker/-/is-docker-2.2.1.tgz",
|
||||
"integrity": "sha512-F+i2BKsFrH66iaUFc0woD8sLy8getkwTwtOBjvs56Cx4CgJDeKQeqfz8wAYiSb8JOprWhHH5p77PbmYCvvUuXQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"bin": {
|
||||
"is-docker": "cli.js"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=8"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/sindresorhus"
|
||||
}
|
||||
},
|
||||
"node_modules/is-extglob": {
|
||||
"version": "2.1.1",
|
||||
"resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz",
|
||||
|
|
@ -5133,19 +5062,6 @@
|
|||
"url": "https://github.com/sponsors/sindresorhus"
|
||||
}
|
||||
},
|
||||
"node_modules/is-wsl": {
|
||||
"version": "2.2.0",
|
||||
"resolved": "https://registry.npmjs.org/is-wsl/-/is-wsl-2.2.0.tgz",
|
||||
"integrity": "sha512-fKzAra0rGJUUBwGBgNkHZuToZcn+TtXHpeCgmkMJMMYx1sQDYaCSyjJBSCa2nH1DGm7s3n1oBnohoVTBaN7Lww==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"is-docker": "^2.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=8"
|
||||
}
|
||||
},
|
||||
"node_modules/isexe": {
|
||||
"version": "2.0.0",
|
||||
"resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz",
|
||||
|
|
@ -5591,16 +5507,6 @@
|
|||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/lower-case": {
|
||||
"version": "2.0.2",
|
||||
"resolved": "https://registry.npmjs.org/lower-case/-/lower-case-2.0.2.tgz",
|
||||
"integrity": "sha512-7fm3l3NAF9WfN6W3JOmf5drwpVqX78JtoGJ3A6W0a6ZnldM41w2fV5D490psKFTpMds8TJse/eHLFFsNHHjHgg==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"tslib": "^2.0.3"
|
||||
}
|
||||
},
|
||||
"node_modules/lowlight": {
|
||||
"version": "3.3.0",
|
||||
"resolved": "https://registry.npmjs.org/lowlight/-/lowlight-3.3.0.tgz",
|
||||
|
|
@ -6783,17 +6689,6 @@
|
|||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/no-case": {
|
||||
"version": "3.0.4",
|
||||
"resolved": "https://registry.npmjs.org/no-case/-/no-case-3.0.4.tgz",
|
||||
"integrity": "sha512-fgAN3jGAh+RoxUGZHTSOLJIqUc2wmoBwGR4tbpNAKmmovFoWq0OdRkb0VkldReO2a2iBT/OEulG9XSUc10r3zg==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"lower-case": "^2.0.2",
|
||||
"tslib": "^2.0.3"
|
||||
}
|
||||
},
|
||||
"node_modules/node-addon-api": {
|
||||
"version": "7.1.1",
|
||||
"resolved": "https://registry.npmjs.org/node-addon-api/-/node-addon-api-7.1.1.tgz",
|
||||
|
|
@ -6815,24 +6710,6 @@
|
|||
"url": "https://github.com/sponsors/ljharb"
|
||||
}
|
||||
},
|
||||
"node_modules/open": {
|
||||
"version": "8.4.2",
|
||||
"resolved": "https://registry.npmjs.org/open/-/open-8.4.2.tgz",
|
||||
"integrity": "sha512-7x81NCL719oNbsq/3mh+hVrAWmFuEYUqrq/Iw3kUzH8ReypT9QQ0BLoJS7/G9k6N81XjW4qHWtjWwe/9eLy1EQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"define-lazy-prop": "^2.0.0",
|
||||
"is-docker": "^2.1.1",
|
||||
"is-wsl": "^2.2.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/sindresorhus"
|
||||
}
|
||||
},
|
||||
"node_modules/opener": {
|
||||
"version": "1.5.2",
|
||||
"resolved": "https://registry.npmjs.org/opener/-/opener-1.5.2.tgz",
|
||||
|
|
@ -6919,17 +6796,6 @@
|
|||
"url": "https://github.com/inikulin/parse5?sponsor=1"
|
||||
}
|
||||
},
|
||||
"node_modules/pascal-case": {
|
||||
"version": "3.1.2",
|
||||
"resolved": "https://registry.npmjs.org/pascal-case/-/pascal-case-3.1.2.tgz",
|
||||
"integrity": "sha512-uWlGT3YSnK9x3BQJaOdcZwrnV6hPpd8jFH1/ucpiLRPh/2zCVJKS19E4GvYHvaCcACn3foXZ0cLB9Wrx1KGe5g==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"no-case": "^3.0.4",
|
||||
"tslib": "^2.0.3"
|
||||
}
|
||||
},
|
||||
"node_modules/path-exists": {
|
||||
"version": "4.0.0",
|
||||
"resolved": "https://registry.npmjs.org/path-exists/-/path-exists-4.0.0.tgz",
|
||||
|
|
@ -7000,13 +6866,13 @@
|
|||
}
|
||||
},
|
||||
"node_modules/playwright": {
|
||||
"version": "1.54.1",
|
||||
"resolved": "https://registry.npmjs.org/playwright/-/playwright-1.54.1.tgz",
|
||||
"integrity": "sha512-peWpSwIBmSLi6aW2auvrUtf2DqY16YYcCMO8rTVx486jKmDTJg7UAhyrraP98GB8BoPURZP8+nxO7TSd4cPr5g==",
|
||||
"version": "1.56.1",
|
||||
"resolved": "https://registry.npmjs.org/playwright/-/playwright-1.56.1.tgz",
|
||||
"integrity": "sha512-aFi5B0WovBHTEvpM3DzXTUaeN6eN0qWnTkKx4NQaH4Wvcmc153PdaY2UBdSYKaGYw+UyWXSVyxDUg5DoPEttjw==",
|
||||
"dev": true,
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"playwright-core": "1.54.1"
|
||||
"playwright-core": "1.56.1"
|
||||
},
|
||||
"bin": {
|
||||
"playwright": "cli.js"
|
||||
|
|
@ -7019,9 +6885,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/playwright-core": {
|
||||
"version": "1.54.1",
|
||||
"resolved": "https://registry.npmjs.org/playwright-core/-/playwright-core-1.54.1.tgz",
|
||||
"integrity": "sha512-Nbjs2zjj0htNhzgiy5wu+3w09YetDx5pkrpI/kZotDlDUaYk0HVA5xrBVPdow4SAUIlhgKcJeJg4GRKW6xHusA==",
|
||||
"version": "1.56.1",
|
||||
"resolved": "https://registry.npmjs.org/playwright-core/-/playwright-core-1.56.1.tgz",
|
||||
"integrity": "sha512-hutraynyn31F+Bifme+Ps9Vq59hKuUCz7H1kDOcBs+2oGguKkWTU50bBWrtz34OUWmIwpBTWDxaRPXrIXkgvmQ==",
|
||||
"dev": true,
|
||||
"license": "Apache-2.0",
|
||||
"bin": {
|
||||
|
|
@ -7852,6 +7718,13 @@
|
|||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/scule": {
|
||||
"version": "1.3.0",
|
||||
"resolved": "https://registry.npmjs.org/scule/-/scule-1.3.0.tgz",
|
||||
"integrity": "sha512-6FtHJEvt+pVMIB9IBY+IcCJ6Z5f1iQnytgyfKMhDKgmzYG+TeH/wx1y3l27rshSbLiSanrR9ffZDrEsmjlQF2g==",
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/secure-compare": {
|
||||
"version": "3.0.1",
|
||||
"resolved": "https://registry.npmjs.org/secure-compare/-/secure-compare-3.0.1.tgz",
|
||||
|
|
@ -8052,26 +7925,26 @@
|
|||
"license": "MIT"
|
||||
},
|
||||
"node_modules/storybook": {
|
||||
"version": "9.0.17",
|
||||
"resolved": "https://registry.npmjs.org/storybook/-/storybook-9.0.17.tgz",
|
||||
"integrity": "sha512-O+9jgJ+Trlq9VGD1uY4OBLKQWHHDKM/A/pA8vMW6PVehhGHNvpzcIC1bngr6mL5gGHZP2nBv+9XG8pTMcggMmg==",
|
||||
"version": "10.0.7",
|
||||
"resolved": "https://registry.npmjs.org/storybook/-/storybook-10.0.7.tgz",
|
||||
"integrity": "sha512-7smAu0o+kdm378Q2uIddk32pn0UdIbrtTVU+rXRVtTVTCrK/P2cCui2y4JH+Bl3NgEq1bbBQpCAF/HKrDjk2Qw==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@storybook/global": "^5.0.0",
|
||||
"@storybook/icons": "^1.6.0",
|
||||
"@testing-library/jest-dom": "^6.6.3",
|
||||
"@testing-library/user-event": "^14.6.1",
|
||||
"@vitest/expect": "3.2.4",
|
||||
"@vitest/mocker": "3.2.4",
|
||||
"@vitest/spy": "3.2.4",
|
||||
"better-opn": "^3.0.2",
|
||||
"esbuild": "^0.18.0 || ^0.19.0 || ^0.20.0 || ^0.21.0 || ^0.22.0 || ^0.23.0 || ^0.24.0 || ^0.25.0",
|
||||
"esbuild-register": "^3.5.0",
|
||||
"recast": "^0.23.5",
|
||||
"semver": "^7.6.2",
|
||||
"ws": "^8.18.0"
|
||||
},
|
||||
"bin": {
|
||||
"storybook": "bin/index.cjs"
|
||||
"storybook": "dist/bin/dispatcher.js"
|
||||
},
|
||||
"funding": {
|
||||
"type": "opencollective",
|
||||
|
|
@ -8418,14 +8291,14 @@
|
|||
}
|
||||
},
|
||||
"node_modules/svelte2tsx": {
|
||||
"version": "0.7.41",
|
||||
"resolved": "https://registry.npmjs.org/svelte2tsx/-/svelte2tsx-0.7.41.tgz",
|
||||
"integrity": "sha512-/TUwpyn/Qc1wcGuayf2GSwvZ7htdAOzpo0JFFm96srKnRXoTD0gy4n06g+XgH8w016S3lPtyFVtFAm+0yJ0BZw==",
|
||||
"version": "0.7.45",
|
||||
"resolved": "https://registry.npmjs.org/svelte2tsx/-/svelte2tsx-0.7.45.tgz",
|
||||
"integrity": "sha512-cSci+mYGygYBHIZLHlm/jYlEc1acjAHqaQaDFHdEBpUueM9kSTnPpvPtSl5VkJOU1qSJ7h1K+6F/LIUYiqC8VA==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"dedent-js": "^1.0.1",
|
||||
"pascal-case": "^3.1.1"
|
||||
"scule": "^1.3.0"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"svelte": "^3.55 || ^4.0.0-next.0 || ^4.0 || ^5.0.0-next.0",
|
||||
|
|
@ -8535,14 +8408,14 @@
|
|||
"license": "MIT"
|
||||
},
|
||||
"node_modules/tinyglobby": {
|
||||
"version": "0.2.14",
|
||||
"resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.14.tgz",
|
||||
"integrity": "sha512-tX5e7OM1HnYr2+a2C/4V0htOcSQcoSTH9KgJnVvNm5zm/cyEWKJ7j7YutsH9CxMdtOkkLFy2AHrMci9IM8IPZQ==",
|
||||
"version": "0.2.15",
|
||||
"resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.15.tgz",
|
||||
"integrity": "sha512-j2Zq4NyQYG5XMST4cbs02Ak8iJUdxRM0XI5QyxXuZOzKOINmWurp3smXu3y5wDcJrptwpSjgXHzIQxR0omXljQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"fdir": "^6.4.4",
|
||||
"picomatch": "^4.0.2"
|
||||
"fdir": "^6.5.0",
|
||||
"picomatch": "^4.0.3"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=12.0.0"
|
||||
|
|
@ -8918,17 +8791,19 @@
|
|||
}
|
||||
},
|
||||
"node_modules/unplugin": {
|
||||
"version": "1.16.1",
|
||||
"resolved": "https://registry.npmjs.org/unplugin/-/unplugin-1.16.1.tgz",
|
||||
"integrity": "sha512-4/u/j4FrCKdi17jaxuJA0jClGxB1AvU2hw/IuayPc4ay1XGaJs/rbb4v5WKwAjNifjmXK9PIFyuPiaK8azyR9w==",
|
||||
"version": "2.3.10",
|
||||
"resolved": "https://registry.npmjs.org/unplugin/-/unplugin-2.3.10.tgz",
|
||||
"integrity": "sha512-6NCPkv1ClwH+/BGE9QeoTIl09nuiAt0gS28nn1PvYXsGKRwM2TCbFA2QiilmehPDTXIe684k4rZI1yl3A1PCUw==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"acorn": "^8.14.0",
|
||||
"@jridgewell/remapping": "^2.3.5",
|
||||
"acorn": "^8.15.0",
|
||||
"picomatch": "^4.0.3",
|
||||
"webpack-virtual-modules": "^0.6.2"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=14.0.0"
|
||||
"node": ">=18.12.0"
|
||||
}
|
||||
},
|
||||
"node_modules/uri-js": {
|
||||
|
|
@ -9054,18 +8929,18 @@
|
|||
}
|
||||
},
|
||||
"node_modules/vite": {
|
||||
"version": "7.0.5",
|
||||
"resolved": "https://registry.npmjs.org/vite/-/vite-7.0.5.tgz",
|
||||
"integrity": "sha512-1mncVwJxy2C9ThLwz0+2GKZyEXuC3MyWtAAlNftlZZXZDP3AJt5FmwcMit/IGGaNZ8ZOB2BNO/HFUB+CpN0NQw==",
|
||||
"version": "7.2.2",
|
||||
"resolved": "https://registry.npmjs.org/vite/-/vite-7.2.2.tgz",
|
||||
"integrity": "sha512-BxAKBWmIbrDgrokdGZH1IgkIk/5mMHDreLDmCJ0qpyJaAteP8NvMhkwr/ZCQNqNH97bw/dANTE9PDzqwJghfMQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"esbuild": "^0.25.0",
|
||||
"fdir": "^6.4.6",
|
||||
"picomatch": "^4.0.2",
|
||||
"fdir": "^6.5.0",
|
||||
"picomatch": "^4.0.3",
|
||||
"postcss": "^8.5.6",
|
||||
"rollup": "^4.40.0",
|
||||
"tinyglobby": "^0.2.14"
|
||||
"rollup": "^4.43.0",
|
||||
"tinyglobby": "^0.2.15"
|
||||
},
|
||||
"bin": {
|
||||
"vite": "bin/vite.js"
|
||||
|
|
|
|||
|
|
@ -24,20 +24,20 @@
|
|||
"cleanup": "rm -rf .svelte-kit build node_modules test-results"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@chromatic-com/storybook": "^4.0.1",
|
||||
"@chromatic-com/storybook": "^4.1.2",
|
||||
"@eslint/compat": "^1.2.5",
|
||||
"@eslint/js": "^9.18.0",
|
||||
"@internationalized/date": "^3.8.2",
|
||||
"@lucide/svelte": "^0.515.0",
|
||||
"@playwright/test": "^1.49.1",
|
||||
"@storybook/addon-a11y": "^9.0.17",
|
||||
"@storybook/addon-docs": "^9.0.17",
|
||||
"@storybook/addon-svelte-csf": "^5.0.7",
|
||||
"@storybook/addon-vitest": "^9.0.17",
|
||||
"@storybook/sveltekit": "^9.0.17",
|
||||
"@sveltejs/adapter-static": "^3.0.8",
|
||||
"@sveltejs/kit": "^2.22.0",
|
||||
"@sveltejs/vite-plugin-svelte": "^6.0.0",
|
||||
"@storybook/addon-a11y": "^10.0.7",
|
||||
"@storybook/addon-docs": "^10.0.7",
|
||||
"@storybook/addon-svelte-csf": "^5.0.10",
|
||||
"@storybook/addon-vitest": "^10.0.7",
|
||||
"@storybook/sveltekit": "^10.0.7",
|
||||
"@sveltejs/adapter-static": "^3.0.10",
|
||||
"@sveltejs/kit": "^2.48.4",
|
||||
"@sveltejs/vite-plugin-svelte": "^6.2.1",
|
||||
"@tailwindcss/forms": "^0.5.9",
|
||||
"@tailwindcss/typography": "^0.5.15",
|
||||
"@tailwindcss/vite": "^4.0.0",
|
||||
|
|
@ -48,21 +48,21 @@
|
|||
"dexie": "^4.0.11",
|
||||
"eslint": "^9.18.0",
|
||||
"eslint-config-prettier": "^10.0.1",
|
||||
"eslint-plugin-storybook": "^9.0.17",
|
||||
"eslint-plugin-storybook": "^10.0.7",
|
||||
"eslint-plugin-svelte": "^3.0.0",
|
||||
"fflate": "^0.8.2",
|
||||
"globals": "^16.0.0",
|
||||
"http-server": "^14.1.1",
|
||||
"mdast": "^3.0.0",
|
||||
"mdsvex": "^0.12.3",
|
||||
"playwright": "^1.53.0",
|
||||
"playwright": "^1.56.1",
|
||||
"prettier": "^3.4.2",
|
||||
"prettier-plugin-svelte": "^3.3.3",
|
||||
"prettier-plugin-tailwindcss": "^0.6.11",
|
||||
"rehype-katex": "^7.0.1",
|
||||
"remark-math": "^6.0.0",
|
||||
"sass": "^1.93.3",
|
||||
"storybook": "^9.0.17",
|
||||
"storybook": "^10.0.7",
|
||||
"svelte": "^5.0.0",
|
||||
"svelte-check": "^4.0.0",
|
||||
"tailwind-merge": "^3.3.1",
|
||||
|
|
@ -73,7 +73,7 @@
|
|||
"typescript-eslint": "^8.20.0",
|
||||
"unified": "^11.0.5",
|
||||
"uuid": "^13.0.0",
|
||||
"vite": "^7.0.4",
|
||||
"vite": "^7.2.2",
|
||||
"vite-plugin-devtools-json": "^0.2.0",
|
||||
"vitest": "^3.2.3",
|
||||
"vitest-browser-svelte": "^0.1.0"
|
||||
|
|
|
|||
|
|
@ -1,6 +1,5 @@
|
|||
<script lang="ts">
|
||||
import { X } from '@lucide/svelte';
|
||||
import { Button } from '$lib/components/ui/button';
|
||||
import { RemoveButton } from '$lib/components/app';
|
||||
import { formatFileSize, getFileTypeLabel, getPreviewText } from '$lib/utils/file-preview';
|
||||
import { FileTypeCategory, MimeTypeText } from '$lib/enums/files';
|
||||
|
||||
|
|
@ -66,17 +65,15 @@
|
|||
</button>
|
||||
{:else}
|
||||
<!-- Non-readonly mode (ChatForm) -->
|
||||
<div class="relative rounded-lg border border-border bg-muted p-3 {className} w-64">
|
||||
<Button
|
||||
type="button"
|
||||
variant="ghost"
|
||||
size="sm"
|
||||
class="absolute top-2 right-2 h-6 w-6 bg-white/20 p-0 hover:bg-white/30"
|
||||
onclick={() => onRemove?.(id)}
|
||||
aria-label="Remove file"
|
||||
>
|
||||
<X class="h-3 w-3" />
|
||||
</Button>
|
||||
<button
|
||||
class="group relative rounded-lg border border-border bg-muted p-3 {className} {textContent
|
||||
? 'max-h-24 max-w-72'
|
||||
: 'max-w-36'} cursor-pointer text-left"
|
||||
onclick={onClick}
|
||||
>
|
||||
<div class="absolute top-2 right-2 opacity-0 transition-opacity group-hover:opacity-100">
|
||||
<RemoveButton {id} {onRemove} />
|
||||
</div>
|
||||
|
||||
<div class="pr-8">
|
||||
<span class="mb-3 block truncate text-sm font-medium text-foreground">{name}</span>
|
||||
|
|
@ -85,7 +82,7 @@
|
|||
<div class="relative">
|
||||
<div
|
||||
class="overflow-hidden font-mono text-xs leading-relaxed break-words whitespace-pre-wrap text-muted-foreground"
|
||||
style="max-height: 3.6em; line-height: 1.2em;"
|
||||
style="max-height: 3rem; line-height: 1.2em;"
|
||||
>
|
||||
{getPreviewText(textContent)}
|
||||
</div>
|
||||
|
|
@ -98,11 +95,11 @@
|
|||
</div>
|
||||
{/if}
|
||||
</div>
|
||||
</div>
|
||||
</button>
|
||||
{/if}
|
||||
{:else}
|
||||
<button
|
||||
class="flex items-center gap-2 gap-3 rounded-lg border border-border bg-muted p-3 {className}"
|
||||
class="group flex items-center gap-3 rounded-lg border border-border bg-muted p-3 {className} relative"
|
||||
onclick={onClick}
|
||||
>
|
||||
<div
|
||||
|
|
@ -112,7 +109,9 @@
|
|||
</div>
|
||||
|
||||
<div class="flex flex-col gap-1">
|
||||
<span class="max-w-36 truncate text-sm font-medium text-foreground md:max-w-72">
|
||||
<span
|
||||
class="max-w-24 truncate text-sm font-medium text-foreground group-hover:pr-6 md:max-w-32"
|
||||
>
|
||||
{name}
|
||||
</span>
|
||||
|
||||
|
|
@ -122,18 +121,9 @@
|
|||
</div>
|
||||
|
||||
{#if !readonly}
|
||||
<Button
|
||||
type="button"
|
||||
variant="ghost"
|
||||
size="sm"
|
||||
class="h-6 w-6 p-0"
|
||||
onclick={(e) => {
|
||||
e.stopPropagation();
|
||||
onRemove?.(id);
|
||||
}}
|
||||
>
|
||||
<X class="h-3 w-3" />
|
||||
</Button>
|
||||
<div class="absolute top-2 right-2 opacity-0 transition-opacity group-hover:opacity-100">
|
||||
<RemoveButton {id} {onRemove} />
|
||||
</div>
|
||||
{/if}
|
||||
</button>
|
||||
{/if}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,5 @@
|
|||
<script lang="ts">
|
||||
import { X } from '@lucide/svelte';
|
||||
import { Button } from '$lib/components/ui/button';
|
||||
import { RemoveButton } from '$lib/components/app';
|
||||
|
||||
interface Props {
|
||||
id: string;
|
||||
|
|
@ -26,12 +25,12 @@
|
|||
class: className = '',
|
||||
// Default to small size for form previews
|
||||
width = 'w-auto',
|
||||
height = 'h-24',
|
||||
height = 'h-16',
|
||||
imageClass = ''
|
||||
}: Props = $props();
|
||||
</script>
|
||||
|
||||
<div class="relative overflow-hidden rounded-lg border border-border bg-muted {className}">
|
||||
<div class="group relative overflow-hidden rounded-lg border border-border bg-muted {className}">
|
||||
{#if onClick}
|
||||
<button
|
||||
type="button"
|
||||
|
|
@ -55,17 +54,9 @@
|
|||
|
||||
{#if !readonly}
|
||||
<div
|
||||
class="absolute top-1 right-1 flex items-center justify-center opacity-0 transition-opacity hover:opacity-100"
|
||||
class="absolute top-1 right-1 flex items-center justify-center opacity-0 transition-opacity group-hover:opacity-100"
|
||||
>
|
||||
<Button
|
||||
type="button"
|
||||
variant="ghost"
|
||||
size="sm"
|
||||
class="h-6 w-6 bg-white/20 p-0 text-white hover:bg-white/30"
|
||||
onclick={() => onRemove?.(id)}
|
||||
>
|
||||
<X class="h-3 w-3" />
|
||||
</Button>
|
||||
<RemoveButton {id} {onRemove} class="text-white" />
|
||||
</div>
|
||||
{/if}
|
||||
</div>
|
||||
|
|
|
|||
|
|
@ -153,7 +153,7 @@
|
|||
<Dialog.Root bind:open>
|
||||
<Dialog.Content class="grid max-h-[90vh] max-w-5xl overflow-hidden !p-10 sm:w-auto sm:max-w-6xl">
|
||||
<Dialog.Header class="flex-shrink-0">
|
||||
<div class="flex items-center justify-between">
|
||||
<div class="flex items-center justify-between gap-6">
|
||||
<div class="flex items-center gap-3">
|
||||
{#if IconComponent}
|
||||
<IconComponent class="h-5 w-5 text-muted-foreground" />
|
||||
|
|
|
|||
|
|
@ -1,11 +1,16 @@
|
|||
<script lang="ts">
|
||||
import { ChatAttachmentImagePreview, ChatAttachmentFilePreview } from '$lib/components/app';
|
||||
import { Button } from '$lib/components/ui/button';
|
||||
import { ChevronLeft, ChevronRight } from '@lucide/svelte';
|
||||
import { FileTypeCategory } from '$lib/enums/files';
|
||||
import { getFileTypeCategory } from '$lib/utils/file-type';
|
||||
import ChatAttachmentPreviewDialog from './ChatAttachmentPreviewDialog.svelte';
|
||||
import ChatAttachmentsViewAllDialog from './ChatAttachmentsViewAllDialog.svelte';
|
||||
import type { ChatAttachmentDisplayItem, ChatAttachmentPreviewItem } from '$lib/types/chat';
|
||||
|
||||
interface Props {
|
||||
class?: string;
|
||||
style?: string;
|
||||
// For ChatMessage - stored attachments
|
||||
attachments?: DatabaseMessageExtra[];
|
||||
readonly?: boolean;
|
||||
|
|
@ -16,10 +21,13 @@
|
|||
imageClass?: string;
|
||||
imageHeight?: string;
|
||||
imageWidth?: string;
|
||||
// Limit display to single row with "+ X more" button
|
||||
limitToSingleRow?: boolean;
|
||||
}
|
||||
|
||||
let {
|
||||
class: className = '',
|
||||
style = '',
|
||||
attachments = [],
|
||||
readonly = false,
|
||||
onFileRemove,
|
||||
|
|
@ -27,36 +35,23 @@
|
|||
// Default to small size for form previews
|
||||
imageClass = '',
|
||||
imageHeight = 'h-24',
|
||||
imageWidth = 'w-auto'
|
||||
imageWidth = 'w-auto',
|
||||
limitToSingleRow = false
|
||||
}: Props = $props();
|
||||
|
||||
let displayItems = $derived(getDisplayItems());
|
||||
|
||||
// Preview dialog state
|
||||
let canScrollLeft = $state(false);
|
||||
let canScrollRight = $state(false);
|
||||
let isScrollable = $state(false);
|
||||
let previewDialogOpen = $state(false);
|
||||
let previewItem = $state<{
|
||||
uploadedFile?: ChatUploadedFile;
|
||||
attachment?: DatabaseMessageExtra;
|
||||
preview?: string;
|
||||
name?: string;
|
||||
type?: string;
|
||||
size?: number;
|
||||
textContent?: string;
|
||||
} | null>(null);
|
||||
let previewItem = $state<ChatAttachmentPreviewItem | null>(null);
|
||||
let scrollContainer: HTMLDivElement | undefined = $state();
|
||||
let showViewAll = $derived(limitToSingleRow && displayItems.length > 0 && isScrollable);
|
||||
let viewAllDialogOpen = $state(false);
|
||||
|
||||
function getDisplayItems() {
|
||||
const items: Array<{
|
||||
id: string;
|
||||
name: string;
|
||||
size?: number;
|
||||
preview?: string;
|
||||
type: string;
|
||||
isImage: boolean;
|
||||
uploadedFile?: ChatUploadedFile;
|
||||
attachment?: DatabaseMessageExtra;
|
||||
attachmentIndex?: number;
|
||||
textContent?: string;
|
||||
}> = [];
|
||||
function getDisplayItems(): ChatAttachmentDisplayItem[] {
|
||||
const items: ChatAttachmentDisplayItem[] = [];
|
||||
|
||||
// Add uploaded files (ChatForm)
|
||||
for (const file of uploadedFiles) {
|
||||
|
|
@ -127,14 +122,12 @@
|
|||
}
|
||||
}
|
||||
|
||||
return items;
|
||||
return items.reverse();
|
||||
}
|
||||
|
||||
function openPreview(item: (typeof displayItems)[0], event?: Event) {
|
||||
if (event) {
|
||||
event.preventDefault();
|
||||
event.stopPropagation();
|
||||
}
|
||||
function openPreview(item: ChatAttachmentDisplayItem, event?: MouseEvent) {
|
||||
event?.stopPropagation();
|
||||
event?.preventDefault();
|
||||
|
||||
previewItem = {
|
||||
uploadedFile: item.uploadedFile,
|
||||
|
|
@ -147,38 +140,118 @@
|
|||
};
|
||||
previewDialogOpen = true;
|
||||
}
|
||||
|
||||
function scrollLeft(event?: MouseEvent) {
|
||||
event?.stopPropagation();
|
||||
event?.preventDefault();
|
||||
|
||||
if (!scrollContainer) return;
|
||||
|
||||
scrollContainer.scrollBy({ left: scrollContainer.clientWidth * -0.67, behavior: 'smooth' });
|
||||
}
|
||||
|
||||
function scrollRight(event?: MouseEvent) {
|
||||
event?.stopPropagation();
|
||||
event?.preventDefault();
|
||||
|
||||
if (!scrollContainer) return;
|
||||
|
||||
scrollContainer.scrollBy({ left: scrollContainer.clientWidth * 0.67, behavior: 'smooth' });
|
||||
}
|
||||
|
||||
function updateScrollButtons() {
|
||||
if (!scrollContainer) return;
|
||||
|
||||
const { scrollLeft, scrollWidth, clientWidth } = scrollContainer;
|
||||
|
||||
canScrollLeft = scrollLeft > 0;
|
||||
canScrollRight = scrollLeft < scrollWidth - clientWidth - 1;
|
||||
isScrollable = scrollWidth > clientWidth;
|
||||
}
|
||||
|
||||
$effect(() => {
|
||||
if (scrollContainer && displayItems.length) {
|
||||
scrollContainer.scrollLeft = 0;
|
||||
|
||||
setTimeout(() => {
|
||||
updateScrollButtons();
|
||||
}, 0);
|
||||
}
|
||||
});
|
||||
</script>
|
||||
|
||||
{#if displayItems.length > 0}
|
||||
<div class="flex flex-wrap items-start {readonly ? 'justify-end' : ''} gap-3 {className}">
|
||||
{#each displayItems as item (item.id)}
|
||||
{#if item.isImage && item.preview}
|
||||
<ChatAttachmentImagePreview
|
||||
class="cursor-pointer"
|
||||
id={item.id}
|
||||
name={item.name}
|
||||
preview={item.preview}
|
||||
{readonly}
|
||||
onRemove={onFileRemove}
|
||||
height={imageHeight}
|
||||
width={imageWidth}
|
||||
{imageClass}
|
||||
onClick={(event) => openPreview(item, event)}
|
||||
/>
|
||||
{:else}
|
||||
<ChatAttachmentFilePreview
|
||||
class="cursor-pointer"
|
||||
id={item.id}
|
||||
name={item.name}
|
||||
type={item.type}
|
||||
size={item.size}
|
||||
{readonly}
|
||||
onRemove={onFileRemove}
|
||||
textContent={item.textContent}
|
||||
onClick={(event) => openPreview(item, event)}
|
||||
/>
|
||||
{/if}
|
||||
{/each}
|
||||
<div class={className} {style}>
|
||||
<div class="relative">
|
||||
<button
|
||||
class="absolute top-1/2 left-4 z-10 flex h-6 w-6 -translate-y-1/2 items-center justify-center rounded-full bg-foreground/15 shadow-md backdrop-blur-xs transition-opacity hover:bg-foreground/35 {canScrollLeft
|
||||
? 'opacity-100'
|
||||
: 'pointer-events-none opacity-0'}"
|
||||
onclick={scrollLeft}
|
||||
aria-label="Scroll left"
|
||||
>
|
||||
<ChevronLeft class="h-4 w-4" />
|
||||
</button>
|
||||
|
||||
<div
|
||||
class="scrollbar-hide flex items-start gap-3 overflow-x-auto"
|
||||
bind:this={scrollContainer}
|
||||
onscroll={updateScrollButtons}
|
||||
>
|
||||
{#each displayItems as item (item.id)}
|
||||
{#if item.isImage && item.preview}
|
||||
<ChatAttachmentImagePreview
|
||||
class="flex-shrink-0 cursor-pointer {limitToSingleRow ? 'first:ml-4 last:mr-4' : ''}"
|
||||
id={item.id}
|
||||
name={item.name}
|
||||
preview={item.preview}
|
||||
{readonly}
|
||||
onRemove={onFileRemove}
|
||||
height={imageHeight}
|
||||
width={imageWidth}
|
||||
{imageClass}
|
||||
onClick={(event) => openPreview(item, event)}
|
||||
/>
|
||||
{:else}
|
||||
<ChatAttachmentFilePreview
|
||||
class="flex-shrink-0 cursor-pointer {limitToSingleRow ? 'first:ml-4 last:mr-4' : ''}"
|
||||
id={item.id}
|
||||
name={item.name}
|
||||
type={item.type}
|
||||
size={item.size}
|
||||
{readonly}
|
||||
onRemove={onFileRemove}
|
||||
textContent={item.textContent}
|
||||
onClick={(event) => openPreview(item, event)}
|
||||
/>
|
||||
{/if}
|
||||
{/each}
|
||||
</div>
|
||||
|
||||
<button
|
||||
class="absolute top-1/2 right-4 z-10 flex h-6 w-6 -translate-y-1/2 items-center justify-center rounded-full bg-foreground/15 shadow-md backdrop-blur-xs transition-opacity hover:bg-foreground/35 {canScrollRight
|
||||
? 'opacity-100'
|
||||
: 'pointer-events-none opacity-0'}"
|
||||
onclick={scrollRight}
|
||||
aria-label="Scroll right"
|
||||
>
|
||||
<ChevronRight class="h-4 w-4" />
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{#if showViewAll}
|
||||
<div class="mt-2 -mr-2 flex justify-end px-4">
|
||||
<Button
|
||||
type="button"
|
||||
variant="ghost"
|
||||
size="sm"
|
||||
class="h-6 text-xs text-muted-foreground hover:text-foreground"
|
||||
onclick={() => (viewAllDialogOpen = true)}
|
||||
>
|
||||
View all
|
||||
</Button>
|
||||
</div>
|
||||
{/if}
|
||||
</div>
|
||||
{/if}
|
||||
|
||||
|
|
@ -194,3 +267,13 @@
|
|||
textContent={previewItem.textContent}
|
||||
/>
|
||||
{/if}
|
||||
|
||||
<ChatAttachmentsViewAllDialog
|
||||
bind:open={viewAllDialogOpen}
|
||||
{uploadedFiles}
|
||||
{attachments}
|
||||
{readonly}
|
||||
{onFileRemove}
|
||||
imageHeight="h-64"
|
||||
{imageClass}
|
||||
/>
|
||||
|
|
|
|||
|
|
@ -0,0 +1,203 @@
|
|||
<script lang="ts">
|
||||
import * as Dialog from '$lib/components/ui/dialog';
|
||||
import { ChatAttachmentImagePreview, ChatAttachmentFilePreview } from '$lib/components/app';
|
||||
import { FileTypeCategory } from '$lib/enums/files';
|
||||
import { getFileTypeCategory } from '$lib/utils/file-type';
|
||||
import ChatAttachmentPreviewDialog from './ChatAttachmentPreviewDialog.svelte';
|
||||
import type { ChatAttachmentDisplayItem, ChatAttachmentPreviewItem } from '$lib/types/chat';
|
||||
|
||||
interface Props {
|
||||
open?: boolean;
|
||||
uploadedFiles?: ChatUploadedFile[];
|
||||
attachments?: DatabaseMessageExtra[];
|
||||
readonly?: boolean;
|
||||
onFileRemove?: (fileId: string) => void;
|
||||
imageHeight?: string;
|
||||
imageWidth?: string;
|
||||
imageClass?: string;
|
||||
}
|
||||
|
||||
let {
|
||||
open = $bindable(false),
|
||||
uploadedFiles = [],
|
||||
attachments = [],
|
||||
readonly = false,
|
||||
onFileRemove,
|
||||
imageHeight = 'h-24',
|
||||
imageWidth = 'w-auto',
|
||||
imageClass = ''
|
||||
}: Props = $props();
|
||||
|
||||
let previewDialogOpen = $state(false);
|
||||
let previewItem = $state<ChatAttachmentPreviewItem | null>(null);
|
||||
|
||||
let displayItems = $derived(getDisplayItems());
|
||||
let imageItems = $derived(displayItems.filter((item) => item.isImage));
|
||||
let fileItems = $derived(displayItems.filter((item) => !item.isImage));
|
||||
|
||||
function getDisplayItems(): ChatAttachmentDisplayItem[] {
|
||||
const items: ChatAttachmentDisplayItem[] = [];
|
||||
|
||||
for (const file of uploadedFiles) {
|
||||
items.push({
|
||||
id: file.id,
|
||||
name: file.name,
|
||||
size: file.size,
|
||||
preview: file.preview,
|
||||
type: file.type,
|
||||
isImage: getFileTypeCategory(file.type) === FileTypeCategory.IMAGE,
|
||||
uploadedFile: file,
|
||||
textContent: file.textContent
|
||||
});
|
||||
}
|
||||
|
||||
for (const [index, attachment] of attachments.entries()) {
|
||||
if (attachment.type === 'imageFile') {
|
||||
items.push({
|
||||
id: `attachment-${index}`,
|
||||
name: attachment.name,
|
||||
preview: attachment.base64Url,
|
||||
type: 'image',
|
||||
isImage: true,
|
||||
attachment,
|
||||
attachmentIndex: index
|
||||
});
|
||||
} else if (attachment.type === 'textFile') {
|
||||
items.push({
|
||||
id: `attachment-${index}`,
|
||||
name: attachment.name,
|
||||
type: 'text',
|
||||
isImage: false,
|
||||
attachment,
|
||||
attachmentIndex: index,
|
||||
textContent: attachment.content
|
||||
});
|
||||
} else if (attachment.type === 'context') {
|
||||
// Legacy format from old webui - treat as text file
|
||||
items.push({
|
||||
id: `attachment-${index}`,
|
||||
name: attachment.name,
|
||||
type: 'text',
|
||||
isImage: false,
|
||||
attachment,
|
||||
attachmentIndex: index,
|
||||
textContent: attachment.content
|
||||
});
|
||||
} else if (attachment.type === 'audioFile') {
|
||||
items.push({
|
||||
id: `attachment-${index}`,
|
||||
name: attachment.name,
|
||||
type: attachment.mimeType || 'audio',
|
||||
isImage: false,
|
||||
attachment,
|
||||
attachmentIndex: index
|
||||
});
|
||||
} else if (attachment.type === 'pdfFile') {
|
||||
items.push({
|
||||
id: `attachment-${index}`,
|
||||
name: attachment.name,
|
||||
type: 'application/pdf',
|
||||
isImage: false,
|
||||
attachment,
|
||||
attachmentIndex: index,
|
||||
textContent: attachment.content
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return items.reverse();
|
||||
}
|
||||
|
||||
function openPreview(item: (typeof displayItems)[0], event?: Event) {
|
||||
if (event) {
|
||||
event.preventDefault();
|
||||
event.stopPropagation();
|
||||
}
|
||||
|
||||
previewItem = {
|
||||
uploadedFile: item.uploadedFile,
|
||||
attachment: item.attachment,
|
||||
preview: item.preview,
|
||||
name: item.name,
|
||||
type: item.type,
|
||||
size: item.size,
|
||||
textContent: item.textContent
|
||||
};
|
||||
previewDialogOpen = true;
|
||||
}
|
||||
</script>
|
||||
|
||||
<Dialog.Root bind:open>
|
||||
<Dialog.Portal>
|
||||
<Dialog.Overlay />
|
||||
|
||||
<Dialog.Content class="flex !max-h-[90vh] !max-w-6xl flex-col">
|
||||
<Dialog.Header>
|
||||
<Dialog.Title>All Attachments ({displayItems.length})</Dialog.Title>
|
||||
<Dialog.Description class="text-sm text-muted-foreground">
|
||||
View and manage all attached files
|
||||
</Dialog.Description>
|
||||
</Dialog.Header>
|
||||
|
||||
<div class="min-h-0 flex-1 space-y-6 overflow-y-auto px-1">
|
||||
{#if fileItems.length > 0}
|
||||
<div>
|
||||
<h3 class="mb-3 text-sm font-medium text-foreground">Files ({fileItems.length})</h3>
|
||||
<div class="flex flex-wrap items-start gap-3">
|
||||
{#each fileItems as item (item.id)}
|
||||
<ChatAttachmentFilePreview
|
||||
class="cursor-pointer"
|
||||
id={item.id}
|
||||
name={item.name}
|
||||
type={item.type}
|
||||
size={item.size}
|
||||
{readonly}
|
||||
onRemove={onFileRemove}
|
||||
textContent={item.textContent}
|
||||
onClick={(event) => openPreview(item, event)}
|
||||
/>
|
||||
{/each}
|
||||
</div>
|
||||
</div>
|
||||
{/if}
|
||||
|
||||
{#if imageItems.length > 0}
|
||||
<div>
|
||||
<h3 class="mb-3 text-sm font-medium text-foreground">Images ({imageItems.length})</h3>
|
||||
<div class="flex flex-wrap items-start gap-3">
|
||||
{#each imageItems as item (item.id)}
|
||||
{#if item.preview}
|
||||
<ChatAttachmentImagePreview
|
||||
class="cursor-pointer"
|
||||
id={item.id}
|
||||
name={item.name}
|
||||
preview={item.preview}
|
||||
{readonly}
|
||||
onRemove={onFileRemove}
|
||||
height={imageHeight}
|
||||
width={imageWidth}
|
||||
{imageClass}
|
||||
onClick={(event) => openPreview(item, event)}
|
||||
/>
|
||||
{/if}
|
||||
{/each}
|
||||
</div>
|
||||
</div>
|
||||
{/if}
|
||||
</div>
|
||||
</Dialog.Content>
|
||||
</Dialog.Portal>
|
||||
</Dialog.Root>
|
||||
|
||||
{#if previewItem}
|
||||
<ChatAttachmentPreviewDialog
|
||||
bind:open={previewDialogOpen}
|
||||
uploadedFile={previewItem.uploadedFile}
|
||||
attachment={previewItem.attachment}
|
||||
preview={previewItem.preview}
|
||||
name={previewItem.name}
|
||||
type={previewItem.type}
|
||||
size={previewItem.size}
|
||||
textContent={previewItem.textContent}
|
||||
/>
|
||||
{/if}
|
||||
|
|
@ -232,7 +232,13 @@
|
|||
onsubmit={handleSubmit}
|
||||
class="{INPUT_CLASSES} border-radius-bottom-none mx-auto max-w-[48rem] overflow-hidden rounded-3xl backdrop-blur-md {className}"
|
||||
>
|
||||
<ChatAttachmentsList bind:uploadedFiles {onFileRemove} class="mb-3 px-5 pt-5" />
|
||||
<ChatAttachmentsList
|
||||
bind:uploadedFiles
|
||||
{onFileRemove}
|
||||
limitToSingleRow
|
||||
class="py-5"
|
||||
style="scroll-padding: 1rem;"
|
||||
/>
|
||||
|
||||
<div
|
||||
class="flex-column relative min-h-[48px] items-center rounded-3xl px-5 py-3 shadow-sm transition-all focus-within:shadow-md"
|
||||
|
|
|
|||
|
|
@ -333,7 +333,7 @@
|
|||
ondrop={handleDrop}
|
||||
role="main"
|
||||
>
|
||||
<div class="w-full max-w-2xl px-4">
|
||||
<div class="w-full max-w-[48rem] px-4">
|
||||
<div class="mb-8 text-center" in:fade={{ duration: 300 }}>
|
||||
<h1 class="mb-2 text-3xl font-semibold tracking-tight">llama.cpp</h1>
|
||||
|
||||
|
|
@ -368,7 +368,7 @@
|
|||
<AlertDialog.Portal>
|
||||
<AlertDialog.Overlay />
|
||||
|
||||
<AlertDialog.Content class="max-w-md">
|
||||
<AlertDialog.Content class="flex max-w-md flex-col">
|
||||
<AlertDialog.Header>
|
||||
<AlertDialog.Title>File Upload Error</AlertDialog.Title>
|
||||
|
||||
|
|
@ -377,7 +377,7 @@
|
|||
</AlertDialog.Description>
|
||||
</AlertDialog.Header>
|
||||
|
||||
<div class="space-y-4">
|
||||
<div class="!max-h-[50vh] min-h-0 flex-1 space-y-4 overflow-y-auto">
|
||||
{#if fileErrorData.generallyUnsupported.length > 0}
|
||||
<div class="space-y-2">
|
||||
<h4 class="text-sm font-medium text-destructive">Unsupported File Types</h4>
|
||||
|
|
@ -398,8 +398,6 @@
|
|||
|
||||
{#if fileErrorData.modalityUnsupported.length > 0}
|
||||
<div class="space-y-2">
|
||||
<h4 class="text-sm font-medium text-destructive">Model Compatibility Issues</h4>
|
||||
|
||||
<div class="space-y-1">
|
||||
{#each fileErrorData.modalityUnsupported as file (file.name)}
|
||||
<div class="rounded-md bg-destructive/10 px-3 py-2">
|
||||
|
|
@ -415,14 +413,14 @@
|
|||
</div>
|
||||
</div>
|
||||
{/if}
|
||||
</div>
|
||||
|
||||
<div class="rounded-md bg-muted/50 p-3">
|
||||
<h4 class="mb-2 text-sm font-medium">This model supports:</h4>
|
||||
<div class="rounded-md bg-muted/50 p-3">
|
||||
<h4 class="mb-2 text-sm font-medium">This model supports:</h4>
|
||||
|
||||
<p class="text-sm text-muted-foreground">
|
||||
{fileErrorData.supportedTypes.join(', ')}
|
||||
</p>
|
||||
</div>
|
||||
<p class="text-sm text-muted-foreground">
|
||||
{fileErrorData.supportedTypes.join(', ')}
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<AlertDialog.Footer>
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ export { default as ChatAttachmentsList } from './chat/ChatAttachments/ChatAttac
|
|||
export { default as ChatAttachmentFilePreview } from './chat/ChatAttachments/ChatAttachmentFilePreview.svelte';
|
||||
export { default as ChatAttachmentImagePreview } from './chat/ChatAttachments/ChatAttachmentImagePreview.svelte';
|
||||
export { default as ChatAttachmentPreviewDialog } from './chat/ChatAttachments/ChatAttachmentPreviewDialog.svelte';
|
||||
export { default as ChatAttachmentsViewAllDialog } from './chat/ChatAttachments/ChatAttachmentsViewAllDialog.svelte';
|
||||
|
||||
export { default as ChatForm } from './chat/ChatForm/ChatForm.svelte';
|
||||
export { default as ChatFormTextarea } from './chat/ChatForm/ChatFormTextarea.svelte';
|
||||
|
|
@ -42,6 +43,8 @@ export { default as KeyboardShortcutInfo } from './misc/KeyboardShortcutInfo.sve
|
|||
|
||||
export { default as MarkdownContent } from './misc/MarkdownContent.svelte';
|
||||
|
||||
export { default as RemoveButton } from './misc/RemoveButton.svelte';
|
||||
|
||||
export { default as ServerStatus } from './server/ServerStatus.svelte';
|
||||
export { default as ServerErrorSplash } from './server/ServerErrorSplash.svelte';
|
||||
export { default as ServerLoadingSplash } from './server/ServerLoadingSplash.svelte';
|
||||
|
|
|
|||
|
|
@ -0,0 +1,26 @@
|
|||
<script lang="ts">
|
||||
import { X } from '@lucide/svelte';
|
||||
import { Button } from '$lib/components/ui/button';
|
||||
|
||||
interface Props {
|
||||
id: string;
|
||||
onRemove?: (id: string) => void;
|
||||
class?: string;
|
||||
}
|
||||
|
||||
let { id, onRemove, class: className = '' }: Props = $props();
|
||||
</script>
|
||||
|
||||
<Button
|
||||
type="button"
|
||||
variant="ghost"
|
||||
size="sm"
|
||||
class="h-6 w-6 bg-white/20 p-0 hover:bg-white/30 {className}"
|
||||
onclick={(e) => {
|
||||
e.stopPropagation();
|
||||
onRemove?.(id);
|
||||
}}
|
||||
aria-label="Remove file"
|
||||
>
|
||||
<X class="h-3 w-3" />
|
||||
</Button>
|
||||
|
|
@ -11,6 +11,29 @@ export interface ChatUploadedFile {
|
|||
textContent?: string;
|
||||
}
|
||||
|
||||
export interface ChatAttachmentDisplayItem {
|
||||
id: string;
|
||||
name: string;
|
||||
size?: number;
|
||||
preview?: string;
|
||||
type: string;
|
||||
isImage: boolean;
|
||||
uploadedFile?: ChatUploadedFile;
|
||||
attachment?: DatabaseMessageExtra;
|
||||
attachmentIndex?: number;
|
||||
textContent?: string;
|
||||
}
|
||||
|
||||
export interface ChatAttachmentPreviewItem {
|
||||
uploadedFile?: ChatUploadedFile;
|
||||
attachment?: DatabaseMessageExtra;
|
||||
preview?: string;
|
||||
name?: string;
|
||||
type?: string;
|
||||
size?: number;
|
||||
textContent?: string;
|
||||
}
|
||||
|
||||
export interface ChatMessageSiblingInfo {
|
||||
message: DatabaseMessage;
|
||||
siblingIds: string[];
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
<script module lang="ts">
|
||||
import { defineMeta } from '@storybook/addon-svelte-csf';
|
||||
import ChatForm from '$lib/components/app/chat/ChatForm/ChatForm.svelte';
|
||||
import { expect } from 'storybook/internal/test';
|
||||
import { expect } from 'storybook/test';
|
||||
import { mockServerProps, mockConfigs } from './fixtures/storybook-mocks';
|
||||
import jpgAsset from './fixtures/assets/1.jpg?url';
|
||||
import svgAsset from './fixtures/assets/hf-logo.svg?url';
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
<script module lang="ts">
|
||||
import { defineMeta } from '@storybook/addon-svelte-csf';
|
||||
import ChatSidebar from '$lib/components/app/chat/ChatSidebar/ChatSidebar.svelte';
|
||||
import { waitFor } from 'storybook/internal/test';
|
||||
import { waitFor } from 'storybook/test';
|
||||
import { screen } from 'storybook/test';
|
||||
|
||||
const { Story } = defineMeta({
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
<script module lang="ts">
|
||||
import { defineMeta } from '@storybook/addon-svelte-csf';
|
||||
import { expect } from 'storybook/test';
|
||||
import { MarkdownContent } from '$lib/components/app';
|
||||
import { AI_TUTORIAL_MD } from './fixtures/ai-tutorial.js';
|
||||
import { API_DOCS_MD } from './fixtures/api-docs.js';
|
||||
|
|
@ -68,64 +69,62 @@ All links should have \`target="_blank"\` and \`rel="noopener noreferrer"\` attr
|
|||
class: 'max-w-[56rem] w-[calc(100vw-2rem)]'
|
||||
}}
|
||||
play={async ({ canvasElement }) => {
|
||||
const { expect } = await import('storybook/internal/test');
|
||||
|
||||
// Wait for component to render
|
||||
await new Promise(resolve => setTimeout(resolve, 100));
|
||||
|
||||
await new Promise((resolve) => setTimeout(resolve, 100));
|
||||
|
||||
// Find all links in the rendered content
|
||||
const links = canvasElement.querySelectorAll('a[href]');
|
||||
|
||||
|
||||
// Test that we have the expected number of links
|
||||
expect(links.length).toBeGreaterThan(0);
|
||||
|
||||
|
||||
// Test each link for proper attributes
|
||||
links.forEach((link) => {
|
||||
const href = link.getAttribute('href');
|
||||
|
||||
|
||||
// Test that external links have proper security attributes
|
||||
if (href && (href.startsWith('http://') || href.startsWith('https://'))) {
|
||||
expect(link.getAttribute('target')).toBe('_blank');
|
||||
expect(link.getAttribute('rel')).toBe('noopener noreferrer');
|
||||
}
|
||||
});
|
||||
|
||||
|
||||
// Test specific links exist
|
||||
const hugginFaceLink = Array.from(links).find(link =>
|
||||
link.getAttribute('href') === 'https://huggingface.co'
|
||||
const hugginFaceLink = Array.from(links).find(
|
||||
(link) => link.getAttribute('href') === 'https://huggingface.co'
|
||||
);
|
||||
expect(hugginFaceLink).toBeTruthy();
|
||||
expect(hugginFaceLink?.textContent).toBe('Hugging Face Homepage');
|
||||
|
||||
const githubLink = Array.from(links).find(link =>
|
||||
link.getAttribute('href') === 'https://github.com/ggml-org/llama.cpp'
|
||||
|
||||
const githubLink = Array.from(links).find(
|
||||
(link) => link.getAttribute('href') === 'https://github.com/ggml-org/llama.cpp'
|
||||
);
|
||||
expect(githubLink).toBeTruthy();
|
||||
expect(githubLink?.textContent).toBe('GitHub Repository');
|
||||
|
||||
const openaiLink = Array.from(links).find(link =>
|
||||
link.getAttribute('href') === 'https://openai.com'
|
||||
|
||||
const openaiLink = Array.from(links).find(
|
||||
(link) => link.getAttribute('href') === 'https://openai.com'
|
||||
);
|
||||
expect(openaiLink).toBeTruthy();
|
||||
expect(openaiLink?.textContent).toBe('OpenAI Website');
|
||||
|
||||
const googleLink = Array.from(links).find(link =>
|
||||
link.getAttribute('href') === 'https://www.google.com'
|
||||
|
||||
const googleLink = Array.from(links).find(
|
||||
(link) => link.getAttribute('href') === 'https://www.google.com'
|
||||
);
|
||||
expect(googleLink).toBeTruthy();
|
||||
expect(googleLink?.textContent).toBe('Google Search');
|
||||
|
||||
|
||||
// Test inline links (auto-linked URLs)
|
||||
const exampleLink = Array.from(links).find(link =>
|
||||
link.getAttribute('href') === 'https://example.com'
|
||||
const exampleLink = Array.from(links).find(
|
||||
(link) => link.getAttribute('href') === 'https://example.com'
|
||||
);
|
||||
expect(exampleLink).toBeTruthy();
|
||||
|
||||
const pythonDocsLink = Array.from(links).find(link =>
|
||||
link.getAttribute('href') === 'https://docs.python.org'
|
||||
|
||||
const pythonDocsLink = Array.from(links).find(
|
||||
(link) => link.getAttribute('href') === 'https://docs.python.org'
|
||||
);
|
||||
expect(pythonDocsLink).toBeTruthy();
|
||||
|
||||
|
||||
console.log(`✅ URL Links test passed - Found ${links.length} links with proper attributes`);
|
||||
}}
|
||||
/>
|
||||
|
|
|
|||
|
|
@ -22,7 +22,39 @@ target_compile_definitions(${TARGET} PRIVATE
|
|||
CPPHTTPLIB_TCP_NODELAY=1
|
||||
)
|
||||
|
||||
if (${CMAKE_SYSTEM_NAME} MATCHES "visionOS")
|
||||
# quick fix for https://github.com/ggml-org/llama.cpp/actions/runs/19247291428/job/55024294176?pr=17150
|
||||
target_compile_definitions(${TARGET} PRIVATE NI_MAXHOST=1025)
|
||||
if (LLAMA_OPENSSL)
|
||||
find_package(OpenSSL)
|
||||
if (OpenSSL_FOUND)
|
||||
include(CheckCSourceCompiles)
|
||||
set(SAVED_CMAKE_REQUIRED_INCLUDES ${CMAKE_REQUIRED_INCLUDES})
|
||||
set(CMAKE_REQUIRED_INCLUDES ${OPENSSL_INCLUDE_DIR})
|
||||
check_c_source_compiles("
|
||||
#include <openssl/opensslv.h>
|
||||
#if defined(OPENSSL_IS_BORINGSSL) || defined(LIBRESSL_VERSION_NUMBER)
|
||||
# if OPENSSL_VERSION_NUMBER < 0x1010107f
|
||||
# error bad version
|
||||
# endif
|
||||
#else
|
||||
# if OPENSSL_VERSION_NUMBER < 0x30000000L
|
||||
# error bad version
|
||||
# endif
|
||||
#endif
|
||||
int main() { return 0; }
|
||||
" OPENSSL_VERSION_SUPPORTED)
|
||||
set(CMAKE_REQUIRED_INCLUDES ${SAVED_CMAKE_REQUIRED_INCLUDES})
|
||||
if (OPENSSL_VERSION_SUPPORTED)
|
||||
message(STATUS "OpenSSL found: ${OPENSSL_VERSION}")
|
||||
target_compile_definitions(${TARGET} PUBLIC CPPHTTPLIB_OPENSSL_SUPPORT)
|
||||
target_link_libraries(${TARGET} PUBLIC OpenSSL::SSL OpenSSL::Crypto)
|
||||
if (APPLE AND CMAKE_SYSTEM_NAME STREQUAL "Darwin")
|
||||
target_compile_definitions(${TARGET} PUBLIC CPPHTTPLIB_USE_CERTS_FROM_MACOSX_KEYCHAIN)
|
||||
find_library(CORE_FOUNDATION_FRAMEWORK CoreFoundation REQUIRED)
|
||||
find_library(SECURITY_FRAMEWORK Security REQUIRED)
|
||||
target_link_libraries(${TARGET} PUBLIC ${CORE_FOUNDATION_FRAMEWORK} ${SECURITY_FRAMEWORK})
|
||||
endif()
|
||||
endif()
|
||||
else()
|
||||
message(STATUS "OpenSSL not found, SSL support disabled")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue