diff --git a/.devops/cuda-new.Dockerfile b/.devops/cuda-new.Dockerfile deleted file mode 100644 index 890230cd26..0000000000 --- a/.devops/cuda-new.Dockerfile +++ /dev/null @@ -1,97 +0,0 @@ -ARG UBUNTU_VERSION=24.04 -# This needs to generally match the container host's environment. -ARG CUDA_VERSION=13.1.1 -# Target the CUDA build image -ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION} - -ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION} - -FROM ${BASE_CUDA_DEV_CONTAINER} AS build - -# CUDA architecture to build for (defaults to all supported archs) -ARG CUDA_DOCKER_ARCH=default - -RUN apt-get update && \ - apt-get install -y gcc-14 g++-14 build-essential cmake python3 python3-pip git libssl-dev libgomp1 - -ENV CC=gcc-14 CXX=g++-14 CUDAHOSTCXX=g++-14 - -WORKDIR /app - -COPY . . - -RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \ - export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \ - fi && \ - cmake -B build -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DLLAMA_BUILD_TESTS=OFF ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \ - cmake --build build --config Release -j$(nproc) - -RUN mkdir -p /app/lib && \ - find build -name "*.so*" -exec cp -P {} /app/lib \; - -RUN mkdir -p /app/full \ - && cp build/bin/* /app/full \ - && cp *.py /app/full \ - && cp -r gguf-py /app/full \ - && cp -r requirements /app/full \ - && cp requirements.txt /app/full \ - && cp .devops/tools.sh /app/full/tools.sh - -## Base image -FROM ${BASE_CUDA_RUN_CONTAINER} AS base - -RUN apt-get update \ - && apt-get install -y libgomp1 curl \ - && apt autoremove -y \ - && apt clean -y \ - && rm -rf /tmp/* /var/tmp/* \ - && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \ - && find /var/cache -type f -delete - -COPY --from=build /app/lib/ /app - -### Full -FROM base AS full - -COPY --from=build /app/full /app - -WORKDIR /app - -RUN apt-get update \ - && apt-get install -y \ - git \ - python3 \ - python3-pip \ - python3-wheel \ - && pip install --break-system-packages --upgrade setuptools \ - && pip install --break-system-packages -r requirements.txt \ - && apt autoremove -y \ - && apt clean -y \ - && rm -rf /tmp/* /var/tmp/* \ - && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \ - && find /var/cache -type f -delete - - -ENTRYPOINT ["/app/tools.sh"] - -### Light, CLI only -FROM base AS light - -COPY --from=build /app/full/llama-cli /app/full/llama-completion /app - -WORKDIR /app - -ENTRYPOINT [ "/app/llama-cli" ] - -### Server, Server only -FROM base AS server - -ENV LLAMA_ARG_HOST=0.0.0.0 - -COPY --from=build /app/full/llama-server /app - -WORKDIR /app - -HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ] - -ENTRYPOINT [ "/app/llama-server" ] diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index 569039c421..1d7d6438c7 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -73,10 +73,10 @@ jobs: { "tag": "cpu", "dockerfile": ".devops/cpu.Dockerfile", "platforms": "linux/amd64", "full": true, "light": true, "server": true, "free_disk_space": false, "runs_on": "ubuntu-24.04" }, { "tag": "cpu", "dockerfile": ".devops/cpu.Dockerfile", "platforms": "linux/arm64", "full": true, "light": true, "server": true, "free_disk_space": false, "runs_on": "ubuntu-24.04-arm" }, { "tag": "cpu", "dockerfile": ".devops/s390x.Dockerfile", "platforms": "linux/s390x", "full": true, "light": true, "server": true, "free_disk_space": false, "runs_on": "ubuntu-24.04-s390x" }, - { "tag": "cuda cuda12", "dockerfile": ".devops/cuda.Dockerfile", "platforms": "linux/amd64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04" }, - { "tag": "cuda cuda12", "dockerfile": ".devops/cuda.Dockerfile", "platforms": "linux/arm64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04-arm" }, - { "tag": "cuda13", "dockerfile": ".devops/cuda-new.Dockerfile", "platforms": "linux/amd64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04" }, - { "tag": "cuda13", "dockerfile": ".devops/cuda-new.Dockerfile", "platforms": "linux/arm64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04-arm" }, + { "tag": "cuda cuda12", "dockerfile": ".devops/cuda.Dockerfile", "cuda_version": "12.9.1", "platforms": "linux/amd64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04" }, + { "tag": "cuda cuda12", "dockerfile": ".devops/cuda.Dockerfile", "cuda_version": "12.9.1", "platforms": "linux/arm64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04-arm" }, + { "tag": "cuda13", "dockerfile": ".devops/cuda.Dockerfile", "cuda_version": "13.1.1", "platforms": "linux/amd64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04" }, + { "tag": "cuda13", "dockerfile": ".devops/cuda.Dockerfile", "cuda_version": "13.1.1", "platforms": "linux/arm64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04-arm" }, { "tag": "musa", "dockerfile": ".devops/musa.Dockerfile", "platforms": "linux/amd64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04" }, { "tag": "intel", "dockerfile": ".devops/intel.Dockerfile", "platforms": "linux/amd64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04" }, { "tag": "vulkan", "dockerfile": ".devops/vulkan.Dockerfile", "platforms": "linux/amd64", "full": true, "light": true, "server": true, "free_disk_space": false, "runs_on": "ubuntu-24.04" }, diff --git a/ci/run.sh b/ci/run.sh index e6702a43bd..252a30d47d 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -221,7 +221,7 @@ function gg_run_ctest_debug { set -e - # Check cmake and ctest are installed + # Check required binaries are installed gg_check_build_requirements (cmake -G "${CMAKE_GENERATOR}" -DCMAKE_BUILD_TYPE=Debug ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log @@ -252,7 +252,7 @@ function gg_run_ctest_release { set -e - # Check cmake and ctest are installed + # Check required binaries are installed gg_check_build_requirements (cmake -G "${CMAKE_GENERATOR}" -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log @@ -627,10 +627,38 @@ function gg_sum_rerank_tiny { } function gg_check_build_requirements { + if ! command -v git &> /dev/null; then + gg_printf 'git not found, please install' + fi + + if ! command -v git-lfs &> /dev/null; then + gg_printf 'git-lfs not found, please install' + fi + + if ! command -v wget &> /dev/null; then + gg_printf 'wget not found, please install' + fi + + if ! command -v python3 &> /dev/null; then + gg_printf 'python3 not found, please install' + fi + + if ! command -v pip3 &> /dev/null; then + gg_printf 'pip3 not found, please install' + fi + + if ! python3 -m ensurepip --help &> /dev/null; then + gg_printf 'ensurepip not found, please install python3-venv package' + fi + if ! command -v cmake &> /dev/null; then gg_printf 'cmake not found, please install' fi + if ! command -v ccache &> /dev/null; then + gg_printf 'ccache not found, please consider installing for faster builds' + fi + if ! command -v ctest &> /dev/null; then gg_printf 'ctest not found, please install' fi diff --git a/common/jinja/runtime.cpp b/common/jinja/runtime.cpp index 2232790c31..5b51427aa0 100644 --- a/common/jinja/runtime.cpp +++ b/common/jinja/runtime.cpp @@ -306,6 +306,19 @@ value filter_expression::execute_impl(context & ctx) { filter_id = "strip"; // alias } JJ_DEBUG("Applying filter '%s' to %s", filter_id.c_str(), input->type().c_str()); + // TODO: Refactor filters so this coercion can be done automatically + if (!input->is_undefined() && !is_val(input) && ( + filter_id == "capitalize" || + filter_id == "lower" || + filter_id == "replace" || + filter_id == "strip" || + filter_id == "title" || + filter_id == "upper" || + filter_id == "wordcount" + )) { + JJ_DEBUG("Coercing %s to String for '%s' filter", input->type().c_str(), filter_id.c_str()); + input = mk_val(input->as_string()); + } return try_builtin_func(ctx, filter_id, input)->invoke(func_args(ctx)); } else if (is_stmt(filter)) { diff --git a/common/jinja/value.cpp b/common/jinja/value.cpp index 749113124b..7dc1d65407 100644 --- a/common/jinja/value.cpp +++ b/common/jinja/value.cpp @@ -465,8 +465,9 @@ const func_builtins & value_int_t::get_builtins() const { double val = static_cast(args.get_pos(0)->as_int()); return mk_val(val); }}, - {"tojson", tojson}, + {"safe", tojson}, {"string", tojson}, + {"tojson", tojson}, }; return builtins; } @@ -485,8 +486,9 @@ const func_builtins & value_float_t::get_builtins() const { int64_t val = static_cast(args.get_pos(0)->as_float()); return mk_val(val); }}, - {"tojson", tojson}, + {"safe", tojson}, {"string", tojson}, + {"tojson", tojson}, }; return builtins; } @@ -771,6 +773,11 @@ const func_builtins & value_string_t::get_builtins() const { const func_builtins & value_bool_t::get_builtins() const { + static const func_handler tostring = [](const func_args & args) -> value { + args.ensure_vals(); + bool val = args.get_pos(0)->as_bool(); + return mk_val(val ? "True" : "False"); + }; static const func_builtins builtins = { {"default", default_value}, {"int", [](const func_args & args) -> value { @@ -783,11 +790,8 @@ const func_builtins & value_bool_t::get_builtins() const { bool val = args.get_pos(0)->as_bool(); return mk_val(val ? 1.0 : 0.0); }}, - {"string", [](const func_args & args) -> value { - args.ensure_vals(); - bool val = args.get_pos(0)->as_bool(); - return mk_val(val ? "True" : "False"); - }}, + {"safe", tostring}, + {"string", tostring}, {"tojson", tojson}, }; return builtins; @@ -1100,18 +1104,14 @@ const func_builtins & value_object_t::get_builtins() const { } const func_builtins & value_none_t::get_builtins() const { + static const func_handler tostring = [](const func_args &) -> value { + return mk_val("None"); + }; static const func_builtins builtins = { {"default", default_value}, {"tojson", tojson}, - {"string", [](const func_args &) -> value { - return mk_val("None"); - }}, - {"safe", [](const func_args &) -> value { - return mk_val("None"); - }}, - {"strip", [](const func_args &) -> value { - return mk_val("None"); - }}, + {"string", tostring}, + {"safe", tostring}, {"items", empty_value_fn}, {"map", empty_value_fn}, {"reject", empty_value_fn}, diff --git a/docs/build.md b/docs/build.md index ef086ff434..616a838def 100644 --- a/docs/build.md +++ b/docs/build.md @@ -389,7 +389,7 @@ You can download it from your Linux distro's package manager or from here: [ROCm The environment variable [`HIP_VISIBLE_DEVICES`](https://rocm.docs.amd.com/en/latest/understand/gpu_isolation.html#hip-visible-devices) can be used to specify which GPU(s) will be used. -If your GPU is not officially supported you can use the environment variable [`HSA_OVERRIDE_GFX_VERSION`] set to a similar GPU, for example 10.3.0 on RDNA2 (e.g. gfx1030, gfx1031, or gfx1035) or 11.0.0 on RDNA3. +If your GPU is not officially supported you can use the environment variable [`HSA_OVERRIDE_GFX_VERSION`] set to a similar GPU, for example 10.3.0 on RDNA2 (e.g. gfx1030, gfx1031, or gfx1035) or 11.0.0 on RDNA3. Note that [`HSA_OVERRIDE_GFX_VERSION`] is [not supported on Windows](https://github.com/ROCm/ROCm/issues/2654) ### Unified Memory diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp index 5bce88aab4..cbd361b4b9 100644 --- a/src/llama-vocab.cpp +++ b/src/llama-vocab.cpp @@ -3712,9 +3712,7 @@ int llama_vocab::max_token_len() const { int llama_vocab::find_bpe_rank(const std::string & token_left, const std::string & token_right) const { GGML_ASSERT(token_left.find(' ') == std::string::npos); - GGML_ASSERT(token_left.find('\n') == std::string::npos); GGML_ASSERT(token_right.find(' ') == std::string::npos); - GGML_ASSERT(token_right.find('\n') == std::string::npos); auto it = pimpl->bpe_ranks.find(std::make_pair(token_left, token_right)); if (it == pimpl->bpe_ranks.end()) { diff --git a/tests/test-jinja.cpp b/tests/test-jinja.cpp index 5d4b2806ac..ce3008f4c7 100644 --- a/tests/test-jinja.cpp +++ b/tests/test-jinja.cpp @@ -523,6 +523,18 @@ static void test_filters(testing & t) { "hello" ); + test_template(t, "upper array", + "{{ items|upper }}", + {{"items", json::array({"hello", "world"})}}, + "['HELLO', 'WORLD']" + ); + + test_template(t, "upper dict", + "{{ items|upper }}", + {{"items", {{"hello", "world"}}}}, + "{'HELLO': 'WORLD'}" + ); + test_template(t, "capitalize", "{{ 'heLlo World'|capitalize }}", json::object(),