diff --git a/.devops/cann.Dockerfile b/.devops/cann.Dockerfile index 97ee3eedb6..6de22215e4 100644 --- a/.devops/cann.Dockerfile +++ b/.devops/cann.Dockerfile @@ -42,6 +42,7 @@ RUN source /usr/local/Ascend/ascend-toolkit/set_env.sh --force \ -DGGML_CANN=ON \ -DCMAKE_BUILD_TYPE=Release \ -DSOC_TYPE=ascend${CHIP_TYPE} \ + -DUSE_ACL_GRAPH=ON \ . && \ cmake --build build --config Release -j$(nproc) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index e3b120fcda..187c861437 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1394,6 +1394,11 @@ jobs: arch: [x86, aarch64] chip_type: ['910b', '310p'] build: ['Release'] + use_acl_graph: ['on', 'off'] + exclude: + # 310P does not support USE_ACL_GRAPH=on + - chip_type: '310p' + use_acl_graph: 'on' runs-on: ${{ matrix.arch == 'aarch64' && 'ubuntu-24.04-arm' || 'ubuntu-24.04' }} steps: - name: Checkout @@ -1419,6 +1424,7 @@ jobs: env: BUILD_TYPE: ${{ matrix.build }} SOC_TYPE: ascend${{ matrix.chip_type }} + USE_ACL_GRAPH: ${{ matrix.use_acl_graph }} run: | HOST_UID=$(id -u) HOST_GID=$(id -g) @@ -1428,6 +1434,7 @@ jobs: -w /workspace \ -e SOC_TYPE=${SOC_TYPE} \ -e BUILD_TYPE=${BUILD_TYPE} \ + -e USE_ACL_GRAPH=${USE_ACL_GRAPH} \ "${{ steps.cann-image.outputs.image }}" \ bash -lc ' set -e @@ -1438,7 +1445,8 @@ jobs: cmake -S . -B build \ -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \ -DGGML_CANN=on \ - -DSOC_TYPE=${SOC_TYPE} + -DSOC_TYPE=${SOC_TYPE} \ + -DUSE_ACL_GRAPH=${USE_ACL_GRAPH} cmake --build build -j $(nproc) chown -R '"${HOST_UID}"':'"${HOST_GID}"' /workspace/build diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 272701fb9e..d8b3b95df0 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -681,9 +681,25 @@ jobs: openEuler-cann: strategy: matrix: - arch: [x86, aarch64] - chip_type: ['910b', '310p'] - build: ['Release'] + include: + # 910b with aclgraph (both architectures) + - arch: x86 + chip_type: '910b' + build: 'Release' + use_acl_graph: 'on' + - arch: aarch64 + chip_type: '910b' + build: 'Release' + use_acl_graph: 'on' + # 310p without aclgraph (both architectures) + - arch: x86 + chip_type: '310p' + build: 'Release' + use_acl_graph: 'off' + - arch: aarch64 + chip_type: '310p' + build: 'Release' + use_acl_graph: 'off' runs-on: ${{ matrix.arch == 'aarch64' && 'ubuntu-24.04-arm' || 'ubuntu-24.04' }} steps: - name: Checkout @@ -709,6 +725,7 @@ jobs: env: BUILD_TYPE: ${{ matrix.build }} SOC_TYPE: ascend${{ matrix.chip_type }} + USE_ACL_GRAPH: ${{ matrix.use_acl_graph }} run: | HOST_UID=$(id -u) HOST_GID=$(id -g) @@ -718,6 +735,7 @@ jobs: -w /workspace \ -e SOC_TYPE=${SOC_TYPE} \ -e BUILD_TYPE=${BUILD_TYPE} \ + -e USE_ACL_GRAPH=${USE_ACL_GRAPH} \ "${{ steps.cann-image.outputs.image }}" \ bash -lc ' set -e @@ -728,7 +746,8 @@ jobs: cmake -S . -B build \ -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \ -DGGML_CANN=on \ - -DSOC_TYPE=${SOC_TYPE} + -DSOC_TYPE=${SOC_TYPE} \ + -DUSE_ACL_GRAPH=${USE_ACL_GRAPH} cmake --build build -j $(nproc) chown -R '"${HOST_UID}"':'"${HOST_GID}"' /workspace/build @@ -741,13 +760,13 @@ jobs: - name: Pack artifacts run: | cp LICENSE ./build/bin/ - tar -czvf llama-${{ steps.tag.outputs.name }}-bin-${{ matrix.chip_type }}-openEuler-${{ matrix.arch }}.tar.gz --transform "s,./,llama-${{ steps.tag.outputs.name }}/," -C ./build/bin . + tar -czvf llama-${{ steps.tag.outputs.name }}-bin-${{ matrix.chip_type }}-openEuler-${{ matrix.arch }}${{ matrix.use_acl_graph == 'on' && '-aclgraph' || '' }}.tar.gz --transform "s,./,llama-${{ steps.tag.outputs.name }}/," -C ./build/bin . - name: Upload artifacts uses: actions/upload-artifact@v4 with: - path: llama-${{ steps.tag.outputs.name }}-bin-${{ matrix.chip_type }}-openEuler-${{ matrix.arch }}.tar.gz - name: llama-bin-${{ matrix.chip_type }}-openEuler-${{ matrix.arch }}.tar.gz + path: llama-${{ steps.tag.outputs.name }}-bin-${{ matrix.chip_type }}-openEuler-${{ matrix.arch }}${{ matrix.use_acl_graph == 'on' && '-aclgraph' || '' }}.tar.gz + name: llama-bin-${{ matrix.chip_type }}-openEuler-${{ matrix.arch }}${{ matrix.use_acl_graph == 'on' && '-aclgraph' || '' }}.tar.gz release: if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} @@ -862,9 +881,9 @@ jobs: **openEuler:** - [openEuler x86 (310p)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-310p-openEuler-x86.tar.gz) - - [openEuler x86 (910b)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-910b-openEuler-x86.tar.gz) + - [openEuler x86 (910b, ACL Graph)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-910b-openEuler-x86-aclgraph.tar.gz) - [openEuler aarch64 (310p)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-310p-openEuler-aarch64.tar.gz) - - [openEuler aarch64 (910b)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-910b-openEuler-aarch64.tar.gz) + - [openEuler aarch64 (910b, ACL Graph)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-910b-openEuler-aarch64-aclgraph.tar.gz) - name: Upload release id: upload_release diff --git a/ggml/src/ggml-cann/common.h b/ggml/src/ggml-cann/common.h index 6895349b20..70d3f2b225 100644 --- a/ggml/src/ggml-cann/common.h +++ b/ggml/src/ggml-cann/common.h @@ -382,7 +382,7 @@ struct ggml_cann_graph_lru_cache { std::list cache_list; /**< List storing cached graphs as raw pointers. */ - ggml_cann_graph_lru_cache() { capacity = parse_integer(get_env("GGML_CANN_GRAPH_CACHE_CAPACITY").value_or("12")); } + ggml_cann_graph_lru_cache() { capacity = parse_integer(get_env_as_lowercase("GGML_CANN_GRAPH_CACHE_CAPACITY").value_or("12")); } /** * @brief Push a new graph to the front of the cache. @@ -574,7 +574,7 @@ struct ggml_backend_cann_context { description = aclrtGetSocName(); #ifdef USE_ACL_GRAPH - acl_graph_mode = parse_bool(get_env("GGML_CANN_ACL_GRAPH").value_or("on")); + acl_graph_mode = parse_bool(get_env_as_lowercase("GGML_CANN_ACL_GRAPH").value_or("on")); GGML_LOG_INFO("%s: device %d execution mode is %s (%s)\n", __func__, device, acl_graph_mode ? "GRAPH" : "EAGER", acl_graph_mode ? "acl graph enabled" : "acl graph disabled"); #endif