Merge remote-tracking branch 'upstream/master' into allozaur/20677-webui-server-tools

This commit is contained in:
Aleksander Grygier 2026-04-03 15:08:38 +02:00
commit d24e0ed6db
8 changed files with 76 additions and 122 deletions

View File

@ -1,97 +0,0 @@
ARG UBUNTU_VERSION=24.04
# This needs to generally match the container host's environment.
ARG CUDA_VERSION=13.1.1
# Target the CUDA build image
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
FROM ${BASE_CUDA_DEV_CONTAINER} AS build
# CUDA architecture to build for (defaults to all supported archs)
ARG CUDA_DOCKER_ARCH=default
RUN apt-get update && \
apt-get install -y gcc-14 g++-14 build-essential cmake python3 python3-pip git libssl-dev libgomp1
ENV CC=gcc-14 CXX=g++-14 CUDAHOSTCXX=g++-14
WORKDIR /app
COPY . .
RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \
fi && \
cmake -B build -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DLLAMA_BUILD_TESTS=OFF ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
cmake --build build --config Release -j$(nproc)
RUN mkdir -p /app/lib && \
find build -name "*.so*" -exec cp -P {} /app/lib \;
RUN mkdir -p /app/full \
&& cp build/bin/* /app/full \
&& cp *.py /app/full \
&& cp -r gguf-py /app/full \
&& cp -r requirements /app/full \
&& cp requirements.txt /app/full \
&& cp .devops/tools.sh /app/full/tools.sh
## Base image
FROM ${BASE_CUDA_RUN_CONTAINER} AS base
RUN apt-get update \
&& apt-get install -y libgomp1 curl \
&& apt autoremove -y \
&& apt clean -y \
&& rm -rf /tmp/* /var/tmp/* \
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
&& find /var/cache -type f -delete
COPY --from=build /app/lib/ /app
### Full
FROM base AS full
COPY --from=build /app/full /app
WORKDIR /app
RUN apt-get update \
&& apt-get install -y \
git \
python3 \
python3-pip \
python3-wheel \
&& pip install --break-system-packages --upgrade setuptools \
&& pip install --break-system-packages -r requirements.txt \
&& apt autoremove -y \
&& apt clean -y \
&& rm -rf /tmp/* /var/tmp/* \
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
&& find /var/cache -type f -delete
ENTRYPOINT ["/app/tools.sh"]
### Light, CLI only
FROM base AS light
COPY --from=build /app/full/llama-cli /app/full/llama-completion /app
WORKDIR /app
ENTRYPOINT [ "/app/llama-cli" ]
### Server, Server only
FROM base AS server
ENV LLAMA_ARG_HOST=0.0.0.0
COPY --from=build /app/full/llama-server /app
WORKDIR /app
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
ENTRYPOINT [ "/app/llama-server" ]

View File

@ -73,10 +73,10 @@ jobs:
{ "tag": "cpu", "dockerfile": ".devops/cpu.Dockerfile", "platforms": "linux/amd64", "full": true, "light": true, "server": true, "free_disk_space": false, "runs_on": "ubuntu-24.04" },
{ "tag": "cpu", "dockerfile": ".devops/cpu.Dockerfile", "platforms": "linux/arm64", "full": true, "light": true, "server": true, "free_disk_space": false, "runs_on": "ubuntu-24.04-arm" },
{ "tag": "cpu", "dockerfile": ".devops/s390x.Dockerfile", "platforms": "linux/s390x", "full": true, "light": true, "server": true, "free_disk_space": false, "runs_on": "ubuntu-24.04-s390x" },
{ "tag": "cuda cuda12", "dockerfile": ".devops/cuda.Dockerfile", "platforms": "linux/amd64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04" },
{ "tag": "cuda cuda12", "dockerfile": ".devops/cuda.Dockerfile", "platforms": "linux/arm64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04-arm" },
{ "tag": "cuda13", "dockerfile": ".devops/cuda-new.Dockerfile", "platforms": "linux/amd64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04" },
{ "tag": "cuda13", "dockerfile": ".devops/cuda-new.Dockerfile", "platforms": "linux/arm64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04-arm" },
{ "tag": "cuda cuda12", "dockerfile": ".devops/cuda.Dockerfile", "cuda_version": "12.9.1", "platforms": "linux/amd64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04" },
{ "tag": "cuda cuda12", "dockerfile": ".devops/cuda.Dockerfile", "cuda_version": "12.9.1", "platforms": "linux/arm64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04-arm" },
{ "tag": "cuda13", "dockerfile": ".devops/cuda.Dockerfile", "cuda_version": "13.1.1", "platforms": "linux/amd64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04" },
{ "tag": "cuda13", "dockerfile": ".devops/cuda.Dockerfile", "cuda_version": "13.1.1", "platforms": "linux/arm64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04-arm" },
{ "tag": "musa", "dockerfile": ".devops/musa.Dockerfile", "platforms": "linux/amd64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04" },
{ "tag": "intel", "dockerfile": ".devops/intel.Dockerfile", "platforms": "linux/amd64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04" },
{ "tag": "vulkan", "dockerfile": ".devops/vulkan.Dockerfile", "platforms": "linux/amd64", "full": true, "light": true, "server": true, "free_disk_space": false, "runs_on": "ubuntu-24.04" },

View File

@ -221,7 +221,7 @@ function gg_run_ctest_debug {
set -e
# Check cmake and ctest are installed
# Check required binaries are installed
gg_check_build_requirements
(cmake -G "${CMAKE_GENERATOR}" -DCMAKE_BUILD_TYPE=Debug ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
@ -252,7 +252,7 @@ function gg_run_ctest_release {
set -e
# Check cmake and ctest are installed
# Check required binaries are installed
gg_check_build_requirements
(cmake -G "${CMAKE_GENERATOR}" -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
@ -627,10 +627,38 @@ function gg_sum_rerank_tiny {
}
function gg_check_build_requirements {
if ! command -v git &> /dev/null; then
gg_printf 'git not found, please install'
fi
if ! command -v git-lfs &> /dev/null; then
gg_printf 'git-lfs not found, please install'
fi
if ! command -v wget &> /dev/null; then
gg_printf 'wget not found, please install'
fi
if ! command -v python3 &> /dev/null; then
gg_printf 'python3 not found, please install'
fi
if ! command -v pip3 &> /dev/null; then
gg_printf 'pip3 not found, please install'
fi
if ! python3 -m ensurepip --help &> /dev/null; then
gg_printf 'ensurepip not found, please install python3-venv package'
fi
if ! command -v cmake &> /dev/null; then
gg_printf 'cmake not found, please install'
fi
if ! command -v ccache &> /dev/null; then
gg_printf 'ccache not found, please consider installing for faster builds'
fi
if ! command -v ctest &> /dev/null; then
gg_printf 'ctest not found, please install'
fi

View File

@ -306,6 +306,19 @@ value filter_expression::execute_impl(context & ctx) {
filter_id = "strip"; // alias
}
JJ_DEBUG("Applying filter '%s' to %s", filter_id.c_str(), input->type().c_str());
// TODO: Refactor filters so this coercion can be done automatically
if (!input->is_undefined() && !is_val<value_string>(input) && (
filter_id == "capitalize" ||
filter_id == "lower" ||
filter_id == "replace" ||
filter_id == "strip" ||
filter_id == "title" ||
filter_id == "upper" ||
filter_id == "wordcount"
)) {
JJ_DEBUG("Coercing %s to String for '%s' filter", input->type().c_str(), filter_id.c_str());
input = mk_val<value_string>(input->as_string());
}
return try_builtin_func(ctx, filter_id, input)->invoke(func_args(ctx));
} else if (is_stmt<call_expression>(filter)) {

View File

@ -465,8 +465,9 @@ const func_builtins & value_int_t::get_builtins() const {
double val = static_cast<double>(args.get_pos(0)->as_int());
return mk_val<value_float>(val);
}},
{"tojson", tojson},
{"safe", tojson},
{"string", tojson},
{"tojson", tojson},
};
return builtins;
}
@ -485,8 +486,9 @@ const func_builtins & value_float_t::get_builtins() const {
int64_t val = static_cast<int64_t>(args.get_pos(0)->as_float());
return mk_val<value_int>(val);
}},
{"tojson", tojson},
{"safe", tojson},
{"string", tojson},
{"tojson", tojson},
};
return builtins;
}
@ -771,6 +773,11 @@ const func_builtins & value_string_t::get_builtins() const {
const func_builtins & value_bool_t::get_builtins() const {
static const func_handler tostring = [](const func_args & args) -> value {
args.ensure_vals<value_bool>();
bool val = args.get_pos(0)->as_bool();
return mk_val<value_string>(val ? "True" : "False");
};
static const func_builtins builtins = {
{"default", default_value},
{"int", [](const func_args & args) -> value {
@ -783,11 +790,8 @@ const func_builtins & value_bool_t::get_builtins() const {
bool val = args.get_pos(0)->as_bool();
return mk_val<value_float>(val ? 1.0 : 0.0);
}},
{"string", [](const func_args & args) -> value {
args.ensure_vals<value_bool>();
bool val = args.get_pos(0)->as_bool();
return mk_val<value_string>(val ? "True" : "False");
}},
{"safe", tostring},
{"string", tostring},
{"tojson", tojson},
};
return builtins;
@ -1100,18 +1104,14 @@ const func_builtins & value_object_t::get_builtins() const {
}
const func_builtins & value_none_t::get_builtins() const {
static const func_handler tostring = [](const func_args &) -> value {
return mk_val<value_string>("None");
};
static const func_builtins builtins = {
{"default", default_value},
{"tojson", tojson},
{"string", [](const func_args &) -> value {
return mk_val<value_string>("None");
}},
{"safe", [](const func_args &) -> value {
return mk_val<value_string>("None");
}},
{"strip", [](const func_args &) -> value {
return mk_val<value_string>("None");
}},
{"string", tostring},
{"safe", tostring},
{"items", empty_value_fn<value_array>},
{"map", empty_value_fn<value_array>},
{"reject", empty_value_fn<value_array>},

View File

@ -389,7 +389,7 @@ You can download it from your Linux distro's package manager or from here: [ROCm
The environment variable [`HIP_VISIBLE_DEVICES`](https://rocm.docs.amd.com/en/latest/understand/gpu_isolation.html#hip-visible-devices) can be used to specify which GPU(s) will be used.
If your GPU is not officially supported you can use the environment variable [`HSA_OVERRIDE_GFX_VERSION`] set to a similar GPU, for example 10.3.0 on RDNA2 (e.g. gfx1030, gfx1031, or gfx1035) or 11.0.0 on RDNA3.
If your GPU is not officially supported you can use the environment variable [`HSA_OVERRIDE_GFX_VERSION`] set to a similar GPU, for example 10.3.0 on RDNA2 (e.g. gfx1030, gfx1031, or gfx1035) or 11.0.0 on RDNA3. Note that [`HSA_OVERRIDE_GFX_VERSION`] is [not supported on Windows](https://github.com/ROCm/ROCm/issues/2654)
### Unified Memory

View File

@ -3712,9 +3712,7 @@ int llama_vocab::max_token_len() const {
int llama_vocab::find_bpe_rank(const std::string & token_left, const std::string & token_right) const {
GGML_ASSERT(token_left.find(' ') == std::string::npos);
GGML_ASSERT(token_left.find('\n') == std::string::npos);
GGML_ASSERT(token_right.find(' ') == std::string::npos);
GGML_ASSERT(token_right.find('\n') == std::string::npos);
auto it = pimpl->bpe_ranks.find(std::make_pair(token_left, token_right));
if (it == pimpl->bpe_ranks.end()) {

View File

@ -523,6 +523,18 @@ static void test_filters(testing & t) {
"hello"
);
test_template(t, "upper array",
"{{ items|upper }}",
{{"items", json::array({"hello", "world"})}},
"['HELLO', 'WORLD']"
);
test_template(t, "upper dict",
"{{ items|upper }}",
{{"items", {{"hello", "world"}}}},
"{'HELLO': 'WORLD'}"
);
test_template(t, "capitalize",
"{{ 'heLlo World'|capitalize }}",
json::object(),