From 908eef83ca0ed9f2119aa5264ffd6a31abfe34ff Mon Sep 17 00:00:00 2001 From: Yuri Khrustalev Date: Thu, 20 Nov 2025 07:33:56 -0500 Subject: [PATCH 01/11] Add llama-bench for android (#1) --- .../extra-android-android-arm64-tools.yml | 66 +++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 .github/workflows/extra-android-android-arm64-tools.yml diff --git a/.github/workflows/extra-android-android-arm64-tools.yml b/.github/workflows/extra-android-android-arm64-tools.yml new file mode 100644 index 00000000000..90858383981 --- /dev/null +++ b/.github/workflows/extra-android-android-arm64-tools.yml @@ -0,0 +1,66 @@ +name: Android ARM64 llama-bench Build + +on: + workflow_dispatch: + push: + branches: + - benchmarks + + pull_request: + branches: + - benchmarks + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +env: + # https://developer.android.com/ndk/downloads + NDK_VERSION: "29.0.14206865" + +jobs: + android-arm64-llama-bench: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v5 + + - uses: ggml-org/ccache-action@v1.2.16 + with: + key: android-arm64-llama-bench + + - uses: actions/setup-java@v3 + with: + java-version: "17" + distribution: "temurin" + + - uses: android-actions/setup-android@v3 + with: + log-accepted-android-sdk-licenses: false + + - run: | + sdkmanager "ndk;${{ env.NDK_VERSION }}" + echo "ANDROID_NDK=${ANDROID_SDK_ROOT}/ndk/${{ env.NDK_VERSION }}" >> $GITHUB_ENV + + - run: | + cmake -B build -G Ninja \ + -DCMAKE_TOOLCHAIN_FILE=${ANDROID_NDK}/build/cmake/android.toolchain.cmake \ + -DANDROID_ABI=arm64-v8a \ + -DANDROID_PLATFORM=android-28 \ + -DCMAKE_BUILD_TYPE=Release \ + -DGGML_NATIVE=OFF \ + -DGGML_BACKEND_DL=ON \ + -DGGML_CPU_ALL_VARIANTS=ON \ + -DLLAMA_CURL=OFF \ + -DGGML_OPENMP=OFF \ + -DLLAMA_BUILD_TOOLS=ON \ + -DLLAMA_BUILD_EXAMPLES=OFF \ + -DLLAMA_BUILD_TESTS=OFF \ + -DLLAMA_BUILD_SERVER=OFF + cmake --build build --config Release --target llama-bench + + - uses: actions/upload-artifact@v4 + with: + name: llama-bench-android-arm64-v8a + path: build/bin/* + if-no-files-found: error From 0626fa061ff2ff8e3a6be3b0069beeef0d192526 Mon Sep 17 00:00:00 2001 From: Yuri Khrustalev Date: Thu, 20 Nov 2025 07:54:33 -0500 Subject: [PATCH 02/11] Add Vulkan build (#2) --- .../extra-android-android-arm64-tools.yml | 66 -------- .github/workflows/extra_benchmark_tools.yml | 148 ++++++++++++++++++ 2 files changed, 148 insertions(+), 66 deletions(-) delete mode 100644 .github/workflows/extra-android-android-arm64-tools.yml create mode 100644 .github/workflows/extra_benchmark_tools.yml diff --git a/.github/workflows/extra-android-android-arm64-tools.yml b/.github/workflows/extra-android-android-arm64-tools.yml deleted file mode 100644 index 90858383981..00000000000 --- a/.github/workflows/extra-android-android-arm64-tools.yml +++ /dev/null @@ -1,66 +0,0 @@ -name: Android ARM64 llama-bench Build - -on: - workflow_dispatch: - push: - branches: - - benchmarks - - pull_request: - branches: - - benchmarks - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: true - -env: - # https://developer.android.com/ndk/downloads - NDK_VERSION: "29.0.14206865" - -jobs: - android-arm64-llama-bench: - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v5 - - - uses: ggml-org/ccache-action@v1.2.16 - with: - key: android-arm64-llama-bench - - - uses: actions/setup-java@v3 - with: - java-version: "17" - distribution: "temurin" - - - uses: android-actions/setup-android@v3 - with: - log-accepted-android-sdk-licenses: false - - - run: | - sdkmanager "ndk;${{ env.NDK_VERSION }}" - echo "ANDROID_NDK=${ANDROID_SDK_ROOT}/ndk/${{ env.NDK_VERSION }}" >> $GITHUB_ENV - - - run: | - cmake -B build -G Ninja \ - -DCMAKE_TOOLCHAIN_FILE=${ANDROID_NDK}/build/cmake/android.toolchain.cmake \ - -DANDROID_ABI=arm64-v8a \ - -DANDROID_PLATFORM=android-28 \ - -DCMAKE_BUILD_TYPE=Release \ - -DGGML_NATIVE=OFF \ - -DGGML_BACKEND_DL=ON \ - -DGGML_CPU_ALL_VARIANTS=ON \ - -DLLAMA_CURL=OFF \ - -DGGML_OPENMP=OFF \ - -DLLAMA_BUILD_TOOLS=ON \ - -DLLAMA_BUILD_EXAMPLES=OFF \ - -DLLAMA_BUILD_TESTS=OFF \ - -DLLAMA_BUILD_SERVER=OFF - cmake --build build --config Release --target llama-bench - - - uses: actions/upload-artifact@v4 - with: - name: llama-bench-android-arm64-v8a - path: build/bin/* - if-no-files-found: error diff --git a/.github/workflows/extra_benchmark_tools.yml b/.github/workflows/extra_benchmark_tools.yml new file mode 100644 index 00000000000..9c90343b44e --- /dev/null +++ b/.github/workflows/extra_benchmark_tools.yml @@ -0,0 +1,148 @@ +name: Extra Benchmark Tools + +on: + workflow_dispatch: + push: + branches: + - benchmarks + + pull_request: + branches: + - benchmarks + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +env: + # https://developer.android.com/ndk/downloads + NDK_VERSION: "29.0.14206865" + +jobs: + android-arm64: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v5 + + - uses: ggml-org/ccache-action@v1.2.16 + with: + key: android-arm64-llama-bench + + - uses: actions/setup-java@v3 + with: + java-version: "17" + distribution: "temurin" + + - uses: android-actions/setup-android@v3 + with: + log-accepted-android-sdk-licenses: false + + - run: | + sdkmanager "ndk;${{ env.NDK_VERSION }}" + echo "ANDROID_NDK=${ANDROID_SDK_ROOT}/ndk/${{ env.NDK_VERSION }}" >> $GITHUB_ENV + + - run: | + cmake -B build -G Ninja \ + -DCMAKE_TOOLCHAIN_FILE=${ANDROID_NDK}/build/cmake/android.toolchain.cmake \ + -DANDROID_ABI=arm64-v8a \ + -DANDROID_PLATFORM=android-28 \ + -DCMAKE_BUILD_TYPE=Release \ + -DGGML_NATIVE=OFF \ + -DGGML_BACKEND_DL=ON \ + -DGGML_CPU_ALL_VARIANTS=ON \ + -DLLAMA_CURL=OFF \ + -DGGML_OPENMP=OFF \ + -DLLAMA_BUILD_TOOLS=ON \ + -DLLAMA_BUILD_EXAMPLES=OFF \ + -DLLAMA_BUILD_TESTS=OFF \ + -DLLAMA_BUILD_SERVER=OFF + cmake --build build --config Release --target llama-bench + + - uses: actions/upload-artifact@v4 + with: + name: llama-bench-android-arm64-v8a + path: build/bin/* + if-no-files-found: error + + windows: + runs-on: windows-2025 + + env: + OPENBLAS_VERSION: 0.3.23 + VULKAN_VERSION: 1.4.313.2 + + strategy: + matrix: + include: + - backend: 'vulkan' + arch: 'x64' + defines: '-DGGML_VULKAN=ON' + target: 'ggml-vulkan' + - backend: 'opencl-adreno' + arch: 'arm64' + defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/opencl-arm64-release" -DGGML_OPENCL=ON -DGGML_OPENCL_USE_ADRENO_KERNELS=ON' + target: 'ggml-opencl' + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v4 + + - name: ccache + uses: ggml-org/ccache-action@v1.2.16 + with: + key: windows-latest-cmake-${{ matrix.backend }}-${{ matrix.arch }} + variant: ccache + evict-old-files: 1d + + - name: Install Vulkan SDK + id: get_vulkan + if: ${{ matrix.backend == 'vulkan' }} + run: | + curl.exe -o $env:RUNNER_TEMP/VulkanSDK-Installer.exe -L "https://sdk.lunarg.com/sdk/download/${env:VULKAN_VERSION}/windows/vulkansdk-windows-X64-${env:VULKAN_VERSION}.exe" + & "$env:RUNNER_TEMP\VulkanSDK-Installer.exe" --accept-licenses --default-answer --confirm-command install + Add-Content $env:GITHUB_ENV "VULKAN_SDK=C:\VulkanSDK\${env:VULKAN_VERSION}" + Add-Content $env:GITHUB_PATH "C:\VulkanSDK\${env:VULKAN_VERSION}\bin" + + - name: Install Ninja + id: install_ninja + run: | + choco install ninja + + - name: Install OpenCL Headers and Libs + id: install_opencl + if: ${{ matrix.backend == 'opencl-adreno' && matrix.arch == 'arm64' }} + run: | + git clone https://github.com/KhronosGroup/OpenCL-Headers + cd OpenCL-Headers + cmake -B build ` + -DBUILD_TESTING=OFF ` + -DOPENCL_HEADERS_BUILD_TESTING=OFF ` + -DOPENCL_HEADERS_BUILD_CXX_TESTS=OFF ` + -DCMAKE_INSTALL_PREFIX="$env:RUNNER_TEMP/opencl-arm64-release" + cmake --build build --target install + git clone https://github.com/KhronosGroup/OpenCL-ICD-Loader + cd OpenCL-ICD-Loader + cmake -B build-arm64-release ` + -A arm64 ` + -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/opencl-arm64-release" ` + -DCMAKE_INSTALL_PREFIX="$env:RUNNER_TEMP/opencl-arm64-release" + cmake --build build-arm64-release --target install --config release + + - name: Build + id: cmake_build + run: | + cmake -S . -B build ${{ matrix.defines }} -DGGML_NATIVE=OFF -DGGML_CPU=OFF -DGGML_BACKEND_DL=ON -DLLAMA_CURL=OFF -DLLAMA_BUILD_TOOLS=ON -DLLAMA_BUILD_EXAMPLES=OFF -DLLAMA_BUILD_TESTS=OFF -DLLAMA_BUILD_SERVER=OFF + cmake --build build --config Release --target llama-bench + + - name: Pack artifacts + id: pack_artifacts + run: | + 7z a llama-bench-win-${{ matrix.backend }}-${{ matrix.arch }}.zip .\build\bin\Release\* + + - name: Upload artifacts + uses: actions/upload-artifact@v4 + with: + path: llama-bench-win-${{ matrix.backend }}-${{ matrix.arch }}.zip + name: llama-bench-win-${{ matrix.backend }}-${{ matrix.arch }}.zip From 1a2643deeaf63510ddbd85f7142404448039b403 Mon Sep 17 00:00:00 2001 From: Yuri Khrustalev Date: Thu, 20 Nov 2025 08:22:56 -0500 Subject: [PATCH 03/11] Add release button (#5) --- .github/workflows/extra_benchmark_tools.yml | 89 +++++++++++++++++++++ 1 file changed, 89 insertions(+) diff --git a/.github/workflows/extra_benchmark_tools.yml b/.github/workflows/extra_benchmark_tools.yml index 9c90343b44e..33b4c3d3a33 100644 --- a/.github/workflows/extra_benchmark_tools.yml +++ b/.github/workflows/extra_benchmark_tools.yml @@ -2,6 +2,11 @@ name: Extra Benchmark Tools on: workflow_dispatch: + inputs: + create_release: + description: 'Create new release' + required: true + type: boolean push: branches: - benchmarks @@ -146,3 +151,87 @@ jobs: with: path: llama-bench-win-${{ matrix.backend }}-${{ matrix.arch }}.zip name: llama-bench-win-${{ matrix.backend }}-${{ matrix.arch }}.zip + + release: + if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/benchmarks' ) || github.event.inputs.create_release == 'true' }} + + # Fine-grant permission + # https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token + permissions: + contents: write # for creating release + + runs-on: ubuntu-latest + + needs: + - android-arm64 + - windows + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Determine tag name + id: tag + uses: ./.github/actions/get-tag-name + + - name: Download artifacts + id: download-artifact + uses: actions/download-artifact@v4 + with: + path: ./artifact + merge-multiple: true + + - name: Move artifacts + id: move_artifacts + run: | + mkdir -p release + mv -v artifact/*.zip release/ || true + + # Package Android artifacts + if [ -d "artifact/llama-bench-android-arm64-v8a" ]; then + cd artifact + zip -r ../release/llama-bench-${{ steps.tag.outputs.name }}-android-arm64-v8a.zip llama-bench-android-arm64-v8a/ + cd .. + fi + + # Rename Windows artifacts to include tag + cd release + for f in llama-bench-win-*.zip; do + if [ -f "$f" ]; then + newname="llama-bench-${{ steps.tag.outputs.name }}-${f#llama-bench-}" + mv "$f" "$newname" + fi + done + + - name: Create release + id: create_release + uses: ggml-org/action-create-release@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + tag_name: ${{ steps.tag.outputs.name }} + + - name: Upload release + id: upload_release + uses: actions/github-script@v3 + with: + github-token: ${{secrets.GITHUB_TOKEN}} + script: | + const path = require('path'); + const fs = require('fs'); + const release_id = '${{ steps.create_release.outputs.id }}'; + for (let file of await fs.readdirSync('./release')) { + if (path.extname(file) === '.zip') { + console.log('uploadReleaseAsset', file); + await github.repos.uploadReleaseAsset({ + owner: context.repo.owner, + repo: context.repo.repo, + release_id: release_id, + name: file, + data: await fs.readFileSync(`./release/${file}`) + }); + } + } From bb7e07a20e582a4ace7f48b8640e75b5cd745ae5 Mon Sep 17 00:00:00 2001 From: Yuri Khrustalev Date: Thu, 20 Nov 2025 09:29:24 -0500 Subject: [PATCH 04/11] Add release button, take 2 (#6) --- .github/workflows/extra_benchmark_tools.yml | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/.github/workflows/extra_benchmark_tools.yml b/.github/workflows/extra_benchmark_tools.yml index 33b4c3d3a33..1a9f4e97cc9 100644 --- a/.github/workflows/extra_benchmark_tools.yml +++ b/.github/workflows/extra_benchmark_tools.yml @@ -175,7 +175,26 @@ jobs: - name: Determine tag name id: tag - uses: ./.github/actions/get-tag-name + run: | + # Try to get existing tag + if git describe --tags --exact-match 2>/dev/null; then + TAG_NAME=$(git describe --tags --exact-match) + echo "tag_exists=true" >> $GITHUB_OUTPUT + else + # Generate tag name based on date and short SHA + TAG_NAME="bench-tools-$(date +%Y%m%d)-$(git rev-parse --short HEAD)" + echo "tag_exists=false" >> $GITHUB_OUTPUT + fi + echo "name=${TAG_NAME}" >> $GITHUB_OUTPUT + echo "Generated tag name: ${TAG_NAME}" + + - name: Create tag if needed + if: steps.tag.outputs.tag_exists == 'false' + run: | + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + git tag -a "${{ steps.tag.outputs.name }}" -m "Benchmark tools release ${{ steps.tag.outputs.name }}" + git push origin "${{ steps.tag.outputs.name }}" - name: Download artifacts id: download-artifact From 934f1dc31b652374a5451bcf7b31f1b4d284871f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alberto=20Cabrera=20P=C3=A9rez?= <1478977+Alcpz@users.noreply.github.com> Date: Thu, 20 Nov 2025 14:43:53 +0000 Subject: [PATCH 05/11] llama-bench: Enabled codepath to skip depth computation (#7) Implement conditional prefill computation skipping in llama-bench: disable computation for --depth prefill while keeping it enabled for prefill benchmarks. - Default behavior (no flag): Depth prefill skips computation - With `--enable-depth-computation`: Depth prefill performs full computation - `-p` benchmarks: Always perform computation (not affected by this flag) --- include/llama.h | 3 +++ src/llama-context.cpp | 20 ++++++++++++++++++++ src/llama-context.h | 5 +++++ tools/llama-bench/llama-bench.cpp | 8 ++++++++ 4 files changed, 36 insertions(+) diff --git a/include/llama.h b/include/llama.h index 8547226ff21..4e0b1d2093d 100644 --- a/include/llama.h +++ b/include/llama.h @@ -906,6 +906,9 @@ extern "C" { // If true, all model tensors are activated during llama_decode() to load and cache their weights. LLAMA_API void llama_set_warmup(struct llama_context * ctx, bool warmup); + LLAMA_API void llama_set_skip_batched_compute(struct llama_context * ctx, bool skip); + LLAMA_API bool llama_get_skip_batched_compute(struct llama_context * ctx); + // Set abort callback LLAMA_API void llama_set_abort_callback(struct llama_context * ctx, ggml_abort_callback abort_callback, void * abort_callback_data); diff --git a/src/llama-context.cpp b/src/llama-context.cpp index 70a3ec62dfc..dd4f28a9512 100644 --- a/src/llama-context.cpp +++ b/src/llama-context.cpp @@ -715,6 +715,14 @@ void llama_context::set_warmup(bool value) { cparams.warmup = value; } +void llama_context::set_skip_batched_compute(bool value) { + skip_batched_compute = value; +} + +bool llama_context::get_skip_batched_compute() const { + return skip_batched_compute; +} + void llama_context::set_adapter_lora( llama_adapter_lora * adapter, float scale) { @@ -1465,6 +1473,10 @@ llm_graph_params llama_context::graph_params( ggml_status llama_context::graph_compute( ggml_cgraph * gf, bool batched) { + if (batched && skip_batched_compute) { + return GGML_STATUS_SUCCESS; + } + int n_threads = batched ? cparams.n_threads_batch : cparams.n_threads; ggml_threadpool_t tp = batched ? threadpool_batch : threadpool; @@ -2465,6 +2477,14 @@ void llama_set_warmup(llama_context * ctx, bool warmup) { ctx->set_warmup(warmup); } +void llama_set_skip_batched_compute(llama_context * ctx, bool skip) { + ctx->set_skip_batched_compute(skip); +} + +bool llama_get_skip_batched_compute(llama_context * ctx) { + return ctx->get_skip_batched_compute(); +} + void llama_synchronize(llama_context * ctx) { ctx->synchronize(); } diff --git a/src/llama-context.h b/src/llama-context.h index 20cbd789554..ec7a06860d7 100644 --- a/src/llama-context.h +++ b/src/llama-context.h @@ -79,6 +79,8 @@ struct llama_context { void set_embeddings (bool value); void set_causal_attn(bool value); void set_warmup(bool value); + void set_skip_batched_compute(bool value); + bool get_skip_batched_compute() const; void set_adapter_lora( llama_adapter_lora * adapter, @@ -296,6 +298,9 @@ struct llama_context { // env: LLAMA_GRAPH_REUSE_DISABLE bool graph_reuse_disable = false; + // skip batched compute (used for depth prefill in benchmarks) + bool skip_batched_compute = false; + // perf mutable int64_t t_start_us = 0; mutable int64_t t_load_us = 0; diff --git a/tools/llama-bench/llama-bench.cpp b/tools/llama-bench/llama-bench.cpp index 852a512451d..3de18fe709e 100644 --- a/tools/llama-bench/llama-bench.cpp +++ b/tools/llama-bench/llama-bench.cpp @@ -344,6 +344,7 @@ struct cmd_params { bool verbose; bool progress; bool no_warmup; + bool enable_depth_computation; output_formats output_format; output_formats output_format_stderr; }; @@ -382,6 +383,7 @@ static const cmd_params cmd_params_defaults = { /* verbose */ false, /* progress */ false, /* no_warmup */ false, + /* enable_depth_computation */ false, /* output_format */ MARKDOWN, /* output_format_stderr */ NONE, }; @@ -406,6 +408,7 @@ static void print_usage(int /* argc */, char ** argv) { printf(" -v, --verbose verbose output\n"); printf(" --progress print test progress indicators\n"); printf(" --no-warmup skip warmup runs before benchmarking\n"); + printf(" --enable-depth-computation enable computation during depth prefill (disabled by default)\n"); if (llama_supports_rpc()) { printf(" -rpc, --rpc register RPC devices (comma separated)\n"); } @@ -509,6 +512,7 @@ static cmd_params parse_cmd_params(int argc, char ** argv) { params.delay = cmd_params_defaults.delay; params.progress = cmd_params_defaults.progress; params.no_warmup = cmd_params_defaults.no_warmup; + params.enable_depth_computation = cmd_params_defaults.enable_depth_computation; for (int i = 1; i < argc; i++) { arg = argv[i]; @@ -933,6 +937,8 @@ static cmd_params parse_cmd_params(int argc, char ** argv) { params.progress = true; } else if (arg == "--no-warmup") { params.no_warmup = true; + } else if (arg == "--enable-depth-computation") { + params.enable_depth_computation = true; } else { invalid_param = true; break; @@ -2160,7 +2166,9 @@ int main(int argc, char ** argv) { fprintf(stderr, "llama-bench: benchmark %d/%zu: depth run %d/%d\n", params_idx, params_count, i + 1, params.reps); } + llama_set_skip_batched_compute(ctx, !params.enable_depth_computation); bool res = test_prompt(ctx, t.n_depth, t.n_batch, t.n_threads); + llama_set_skip_batched_compute(ctx, false); if (!res) { fprintf(stderr, "%s: error: failed to run depth\n", __func__); exit(1); From a956f73a0d8855c27f330c879496f28aff0d134c Mon Sep 17 00:00:00 2001 From: Yuri Khrustalev Date: Thu, 20 Nov 2025 10:00:28 -0500 Subject: [PATCH 06/11] Add release button, take 3 (#8) --- .github/workflows/extra_benchmark_tools.yml | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/.github/workflows/extra_benchmark_tools.yml b/.github/workflows/extra_benchmark_tools.yml index 1a9f4e97cc9..fdee3d6aa90 100644 --- a/.github/workflows/extra_benchmark_tools.yml +++ b/.github/workflows/extra_benchmark_tools.yml @@ -64,10 +64,16 @@ jobs: -DLLAMA_BUILD_SERVER=OFF cmake --build build --config Release --target llama-bench + - name: Pack artifacts + id: pack_artifacts + run: | + cd build/bin + zip -r ../../llama-bench-android-arm64-v8a.zip ./* + - uses: actions/upload-artifact@v4 with: - name: llama-bench-android-arm64-v8a - path: build/bin/* + name: llama-bench-android-arm64-v8a.zip + path: llama-bench-android-arm64-v8a.zip if-no-files-found: error windows: @@ -209,16 +215,9 @@ jobs: mkdir -p release mv -v artifact/*.zip release/ || true - # Package Android artifacts - if [ -d "artifact/llama-bench-android-arm64-v8a" ]; then - cd artifact - zip -r ../release/llama-bench-${{ steps.tag.outputs.name }}-android-arm64-v8a.zip llama-bench-android-arm64-v8a/ - cd .. - fi - - # Rename Windows artifacts to include tag + # Rename all artifacts to include tag cd release - for f in llama-bench-win-*.zip; do + for f in llama-bench-*.zip; do if [ -f "$f" ]; then newname="llama-bench-${{ steps.tag.outputs.name }}-${f#llama-bench-}" mv "$f" "$newname" From 2d985fa8d5bac003f836c02e20f0d752d57ce052 Mon Sep 17 00:00:00 2001 From: Yuri Khrustalev Date: Thu, 20 Nov 2025 10:16:32 -0500 Subject: [PATCH 07/11] Add macos build (#9) --- .github/workflows/extra_benchmark_tools.yml | 48 +++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/.github/workflows/extra_benchmark_tools.yml b/.github/workflows/extra_benchmark_tools.yml index fdee3d6aa90..87a0dd30fb4 100644 --- a/.github/workflows/extra_benchmark_tools.yml +++ b/.github/workflows/extra_benchmark_tools.yml @@ -76,6 +76,53 @@ jobs: path: llama-bench-android-arm64-v8a.zip if-no-files-found: error + macOS-arm64: + runs-on: macos-14 + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: ccache + uses: ggml-org/ccache-action@v1.2.16 + with: + key: macOS-latest-cmake-arm64-bench + evict-old-files: 1d + + - name: Build + id: cmake_build + run: | + sysctl -a + cmake -B build \ + -DCMAKE_INSTALL_RPATH='@loader_path' \ + -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \ + -DLLAMA_FATAL_WARNINGS=ON \ + -DGGML_METAL_USE_BF16=ON \ + -DGGML_METAL_EMBED_LIBRARY=ON \ + -DGGML_RPC=ON \ + -DGGML_NATIVE=OFF \ + -DLLAMA_CURL=OFF \ + -DLLAMA_BUILD_TOOLS=ON \ + -DLLAMA_BUILD_EXAMPLES=OFF \ + -DLLAMA_BUILD_TESTS=OFF \ + -DLLAMA_BUILD_SERVER=OFF + cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) --target llama-bench + + - name: Pack artifacts + id: pack_artifacts + run: | + cd build/bin + zip -r ../../llama-bench-macos-arm64.zip ./* + + - name: Upload artifacts + uses: actions/upload-artifact@v4 + with: + path: llama-bench-macos-arm64.zip + name: llama-bench-macos-arm64.zip + windows: runs-on: windows-2025 @@ -170,6 +217,7 @@ jobs: needs: - android-arm64 + - macOS-arm64 - windows steps: From 8a656aa6fa31f7a08d379a0bcbdad7d926fd1cbc Mon Sep 17 00:00:00 2001 From: Yuri Khrustalev Date: Thu, 20 Nov 2025 11:23:53 -0500 Subject: [PATCH 08/11] Add Windows CUDA bench build job to extra_benchmark_tools workflow (#10) Added a new windows-cuda job that: - Uses Windows 2022 runner with CUDA 12.4 - Installs CUDA toolkit and Ninja build system - Builds llama-bench with CUDA support enabled - Packages and uploads the benchmark tool artifacts - Follows the same pattern as the release.yml windows-cuda job Updated the release job to depend on the new windows-cuda job. *Make sure to read the [contributing guidelines](https://github.com/ggml-org/llama.cpp/blob/master/CONTRIBUTING.md) before submitting a PR* Co-authored-by: Claude --- .github/workflows/extra_benchmark_tools.yml | 58 +++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/.github/workflows/extra_benchmark_tools.yml b/.github/workflows/extra_benchmark_tools.yml index 87a0dd30fb4..e7777db9ff2 100644 --- a/.github/workflows/extra_benchmark_tools.yml +++ b/.github/workflows/extra_benchmark_tools.yml @@ -205,6 +205,63 @@ jobs: path: llama-bench-win-${{ matrix.backend }}-${{ matrix.arch }}.zip name: llama-bench-win-${{ matrix.backend }}-${{ matrix.arch }}.zip + windows-cuda: + runs-on: windows-2022 + + strategy: + matrix: + cuda: ['12.4'] + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v4 + + - name: Install ccache + uses: ggml-org/ccache-action@v1.2.16 + with: + key: windows-cuda-bench-${{ matrix.cuda }} + variant: ccache + evict-old-files: 1d + + - name: Install Cuda Toolkit + uses: ./.github/actions/windows-setup-cuda + with: + cuda_version: ${{ matrix.cuda }} + + - name: Install Ninja + id: install_ninja + run: | + choco install ninja + + - name: Build + id: cmake_build + shell: cmd + run: | + call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x64 + cmake -S . -B build -G "Ninja Multi-Config" ^ + -DGGML_BACKEND_DL=ON ^ + -DGGML_NATIVE=OFF ^ + -DGGML_CUDA=ON ^ + -DLLAMA_CURL=OFF ^ + -DLLAMA_BUILD_TOOLS=ON ^ + -DLLAMA_BUILD_EXAMPLES=OFF ^ + -DLLAMA_BUILD_TESTS=OFF ^ + -DLLAMA_BUILD_SERVER=OFF + set /A NINJA_JOBS=%NUMBER_OF_PROCESSORS%-1 + cmake --build build --config Release -j %NINJA_JOBS% --target llama-bench + + - name: Pack artifacts + id: pack_artifacts + run: | + 7z a llama-bench-win-cuda-${{ matrix.cuda }}-x64.zip .\build\bin\Release\* + + - name: Upload artifacts + uses: actions/upload-artifact@v4 + with: + path: llama-bench-win-cuda-${{ matrix.cuda }}-x64.zip + name: llama-bench-win-cuda-${{ matrix.cuda }}-x64.zip + release: if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/benchmarks' ) || github.event.inputs.create_release == 'true' }} @@ -219,6 +276,7 @@ jobs: - android-arm64 - macOS-arm64 - windows + - windows-cuda steps: - name: Clone From d1f2d38a9dab8d20296be2fedda22bda834d2035 Mon Sep 17 00:00:00 2001 From: Yuri Khrustalev Date: Sat, 22 Nov 2025 22:06:22 -0500 Subject: [PATCH 09/11] Add win cpu variants (#11) --- .github/workflows/extra_benchmark_tools.yml | 136 ++++++++++++++++++-- 1 file changed, 126 insertions(+), 10 deletions(-) diff --git a/.github/workflows/extra_benchmark_tools.yml b/.github/workflows/extra_benchmark_tools.yml index e7777db9ff2..b71b2dfac59 100644 --- a/.github/workflows/extra_benchmark_tools.yml +++ b/.github/workflows/extra_benchmark_tools.yml @@ -123,6 +123,72 @@ jobs: path: llama-bench-macos-arm64.zip name: llama-bench-macos-arm64.zip + windows-cpu: + runs-on: windows-2025 + + strategy: + matrix: + include: + - arch: 'x64' + - arch: 'arm64' + + steps: + - name: Clone + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: ccache + uses: ggml-org/ccache-action@v1.2.16 + with: + key: windows-latest-cmake-cpu-bench-${{ matrix.arch }} + variant: ccache + evict-old-files: 1d + + - name: Install Ninja + run: | + choco install ninja + + - name: libCURL + id: get_libcurl + uses: ./.github/actions/windows-setup-curl + with: + architecture: ${{ matrix.arch == 'x64' && 'win64' || 'win64a' }} + + - name: Build + shell: cmd + env: + CURL_PATH: ${{ steps.get_libcurl.outputs.curl_path }} + run: | + call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" ${{ matrix.arch == 'x64' && 'x64' || 'amd64_arm64' }} + cmake -S . -B build -G "Ninja Multi-Config" ^ + -D CMAKE_TOOLCHAIN_FILE=cmake/${{ matrix.arch }}-windows-llvm.cmake ^ + -DGGML_NATIVE=OFF ^ + -DGGML_BACKEND_DL=ON ^ + -DGGML_CPU_ALL_VARIANTS=${{ matrix.arch == 'x64' && 'ON' || 'OFF' }} ^ + -DGGML_OPENMP=ON ^ + -DCURL_LIBRARY="%CURL_PATH%/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="%CURL_PATH%/include" ^ + -DLLAMA_BUILD_TOOLS=ON ^ + -DLLAMA_BUILD_EXAMPLES=OFF ^ + -DLLAMA_BUILD_TESTS=OFF ^ + -DLLAMA_BUILD_SERVER=OFF + cmake --build build --config Release --target llama-bench + + - name: Pack artifacts + id: pack_artifacts + env: + CURL_PATH: ${{ steps.get_libcurl.outputs.curl_path }} + run: | + Copy-Item $env:CURL_PATH\bin\libcurl-${{ matrix.arch }}.dll .\build\bin\Release\ + Copy-Item "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Redist\MSVC\14.44.35112\debug_nonredist\${{ matrix.arch }}\Microsoft.VC143.OpenMP.LLVM\libomp140.${{ matrix.arch == 'x64' && 'x86_64' || 'aarch64' }}.dll" .\build\bin\Release\ + 7z a llama-bench-win-cpu-${{ matrix.arch }}.zip .\build\bin\Release\* + + - name: Upload artifacts + uses: actions/upload-artifact@v4 + with: + path: llama-bench-win-cpu-${{ matrix.arch }}.zip + name: llama-bench-win-cpu-${{ matrix.arch }}.zip + windows: runs-on: windows-2025 @@ -192,12 +258,12 @@ jobs: id: cmake_build run: | cmake -S . -B build ${{ matrix.defines }} -DGGML_NATIVE=OFF -DGGML_CPU=OFF -DGGML_BACKEND_DL=ON -DLLAMA_CURL=OFF -DLLAMA_BUILD_TOOLS=ON -DLLAMA_BUILD_EXAMPLES=OFF -DLLAMA_BUILD_TESTS=OFF -DLLAMA_BUILD_SERVER=OFF - cmake --build build --config Release --target llama-bench + cmake --build build --config Release --target ${{ matrix.target }} - name: Pack artifacts id: pack_artifacts run: | - 7z a llama-bench-win-${{ matrix.backend }}-${{ matrix.arch }}.zip .\build\bin\Release\* + 7z a llama-bench-win-${{ matrix.backend }}-${{ matrix.arch }}.zip .\build\bin\Release\${{ matrix.target }}.dll - name: Upload artifacts uses: actions/upload-artifact@v4 @@ -242,19 +308,30 @@ jobs: cmake -S . -B build -G "Ninja Multi-Config" ^ -DGGML_BACKEND_DL=ON ^ -DGGML_NATIVE=OFF ^ + -DGGML_CPU=OFF ^ -DGGML_CUDA=ON ^ - -DLLAMA_CURL=OFF ^ - -DLLAMA_BUILD_TOOLS=ON ^ - -DLLAMA_BUILD_EXAMPLES=OFF ^ - -DLLAMA_BUILD_TESTS=OFF ^ - -DLLAMA_BUILD_SERVER=OFF + -DLLAMA_CURL=OFF set /A NINJA_JOBS=%NUMBER_OF_PROCESSORS%-1 - cmake --build build --config Release -j %NINJA_JOBS% --target llama-bench + cmake --build build --config Release -j %NINJA_JOBS% --target ggml-cuda - name: Pack artifacts id: pack_artifacts run: | - 7z a llama-bench-win-cuda-${{ matrix.cuda }}-x64.zip .\build\bin\Release\* + 7z a llama-bench-win-cuda-${{ matrix.cuda }}-x64.zip .\build\bin\Release\ggml-cuda.dll + + - name: Copy and pack Cuda runtime + run: | + echo "Cuda install location: ${{ env.CUDA_PATH }}" + $dst='.\build\bin\cudart\' + robocopy "${{env.CUDA_PATH}}\bin" $dst cudart64_*.dll cublas64_*.dll cublasLt64_*.dll + robocopy "${{env.CUDA_PATH}}\lib" $dst cudart64_*.dll cublas64_*.dll cublasLt64_*.dll + 7z a cudart-llama-bench-win-cuda-${{ matrix.cuda }}-x64.zip $dst\* + + - name: Upload Cuda runtime + uses: actions/upload-artifact@v4 + with: + path: cudart-llama-bench-win-cuda-${{ matrix.cuda }}-x64.zip + name: cudart-llama-bench-win-cuda-${{ matrix.cuda }}-x64.zip - name: Upload artifacts uses: actions/upload-artifact@v4 @@ -275,6 +352,7 @@ jobs: needs: - android-arm64 - macOS-arm64 + - windows-cpu - windows - windows-cuda @@ -319,11 +397,49 @@ jobs: id: move_artifacts run: | mkdir -p release + + echo "Adding CPU backend files to existing zips..." + for arch in x64 arm64; do + cpu_zip="artifact/llama-bench-win-cpu-${arch}.zip" + if [ ! -f "$cpu_zip" ]; then + echo "CPU zip not found for $arch, skipping..." + continue + fi + temp_dir=$(mktemp -d) + echo "Extracting CPU backend for $arch..." + unzip "$cpu_zip" -d "$temp_dir" + + echo "Adding CPU files to $arch zips..." + for target_zip in artifact/llama-bench-win-*-${arch}.zip; do + if [[ "$target_zip" == "$cpu_zip" ]]; then + continue + fi + echo "Adding CPU backend to $(basename "$target_zip")" + realpath_target_zip=$(realpath "$target_zip") + (cd "$temp_dir" && zip -r "$realpath_target_zip" .) + done + + rm -rf "$temp_dir" + done + + echo "Renaming and moving zips to release..." + for zip_file in artifact/llama-bench-win-*.zip; do + base_name=$(basename "$zip_file" .zip) + zip_name="llama-bench-${{ steps.tag.outputs.name }}-${base_name#llama-bench-}.zip" + echo "Moving $zip_file to release/$zip_name" + mv "$zip_file" "release/$zip_name" + done + + echo "Moving other artifacts..." mv -v artifact/*.zip release/ || true - # Rename all artifacts to include tag + # Rename remaining artifacts to include tag cd release for f in llama-bench-*.zip; do + # Skip already renamed files + if [[ "$f" == *"${{ steps.tag.outputs.name }}"* ]]; then + continue + fi if [ -f "$f" ]; then newname="llama-bench-${{ steps.tag.outputs.name }}-${f#llama-bench-}" mv "$f" "$newname" From b3eeeb97b641ee3837827c7f043a187be5296c0b Mon Sep 17 00:00:00 2001 From: Yuri Khrustalev Date: Mon, 8 Dec 2025 18:36:59 -0500 Subject: [PATCH 10/11] Add ubuntu (#12) --- .github/workflows/extra_benchmark_tools.yml | 192 +++++++++++++++++++- 1 file changed, 191 insertions(+), 1 deletion(-) diff --git a/.github/workflows/extra_benchmark_tools.yml b/.github/workflows/extra_benchmark_tools.yml index b71b2dfac59..2b2c469a363 100644 --- a/.github/workflows/extra_benchmark_tools.yml +++ b/.github/workflows/extra_benchmark_tools.yml @@ -339,6 +339,180 @@ jobs: path: llama-bench-win-cuda-${{ matrix.cuda }}-x64.zip name: llama-bench-win-cuda-${{ matrix.cuda }}-x64.zip + ubuntu-cpu: + strategy: + matrix: + include: + - build: 'x64' + os: ubuntu-22.04 + - build: 'arm64' + os: ubuntu-22.04-arm + # Non-public runners - commented out: + # - build: 's390x' + # os: ubuntu-24.04-s390x + # - build: 'ppc64le' + # os: ubuntu-24.04-ppc64le + + runs-on: ${{ matrix.os }} + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v4 + + - name: ccache + uses: ggml-org/ccache-action@v1.2.16 + with: + key: ubuntu-cpu-bench-${{ matrix.build }} + evict-old-files: 1d + + - name: Build + id: cmake_build + run: | + cmake -B build \ + -DCMAKE_BUILD_TYPE=Release \ + -DGGML_NATIVE=OFF \ + -DGGML_BACKEND_DL=ON \ + -DLLAMA_CURL=OFF \ + -DGGML_OPENMP=ON \ + -DLLAMA_BUILD_TOOLS=ON \ + -DLLAMA_BUILD_EXAMPLES=OFF \ + -DLLAMA_BUILD_TESTS=OFF \ + -DLLAMA_BUILD_SERVER=OFF + cmake --build build --config Release --target llama-bench -j $(nproc) + + - name: Pack artifacts + id: pack_artifacts + run: | + cd build/bin + tar -czvf ../../llama-bench-ubuntu-cpu-${{ matrix.build }}.tar.gz ./* + + - name: Upload artifacts + uses: actions/upload-artifact@v4 + with: + path: llama-bench-ubuntu-cpu-${{ matrix.build }}.tar.gz + name: llama-bench-ubuntu-cpu-${{ matrix.build }}.tar.gz + if-no-files-found: error + + ubuntu-vulkan: + runs-on: ubuntu-24.04 + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v4 + + - name: ccache + uses: ggml-org/ccache-action@v1.2.16 + with: + key: ubuntu-24-vulkan-bench + evict-old-files: 1d + + - name: Dependencies + id: depends + run: | + sudo add-apt-repository -y ppa:kisak/kisak-mesa + sudo apt-get update -y + sudo apt-get install -y build-essential mesa-vulkan-drivers libxcb-xinput0 libxcb-xinerama0 libxcb-cursor-dev + + - name: Get latest Vulkan SDK version + id: vulkan_sdk_version + run: | + echo "VULKAN_SDK_VERSION=$(curl https://vulkan.lunarg.com/sdk/latest/linux.txt)" >> "$GITHUB_ENV" + + - name: Use Vulkan SDK Cache + uses: actions/cache@v4 + id: cache-sdk + with: + path: ./vulkan_sdk + key: vulkan-sdk-${{ env.VULKAN_SDK_VERSION }}-${{ runner.os }} + + - name: Setup Vulkan SDK + if: steps.cache-sdk.outputs.cache-hit != 'true' + uses: ./.github/actions/linux-setup-vulkan + with: + path: ./vulkan_sdk + version: ${{ env.VULKAN_SDK_VERSION }} + + - name: Build + id: cmake_build + run: | + source ./vulkan_sdk/setup-env.sh + cmake -B build \ + -DCMAKE_BUILD_TYPE=Release \ + -DGGML_NATIVE=OFF \ + -DGGML_CPU=OFF \ + -DGGML_BACKEND_DL=ON \ + -DGGML_VULKAN=ON \ + -DLLAMA_CURL=OFF + cmake --build build --config Release --target ggml-vulkan -j $(nproc) + + - name: Pack artifacts + id: pack_artifacts + run: | + cd build/bin + tar -czvf ../../llama-bench-ubuntu-vulkan-x64.tar.gz ./* + + - name: Upload artifacts + uses: actions/upload-artifact@v4 + with: + path: llama-bench-ubuntu-vulkan-x64.tar.gz + name: llama-bench-ubuntu-vulkan-x64.tar.gz + if-no-files-found: error + + ubuntu-cuda: + runs-on: ubuntu-latest + container: nvidia/cuda:12.6.2-devel-ubuntu24.04 + + strategy: + matrix: + cuda: ['12.6'] + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v4 + + - name: Install dependencies + env: + DEBIAN_FRONTEND: noninteractive + run: | + apt update + apt install -y cmake build-essential ninja-build libgomp1 git + + - name: ccache + uses: ggml-org/ccache-action@v1.2.16 + with: + key: ubuntu-cuda-bench-${{ matrix.cuda }} + evict-old-files: 1d + + - name: Build + id: cmake_build + run: | + cmake -S . -B build -G Ninja \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_CUDA_ARCHITECTURES=89-real \ + -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined \ + -DGGML_NATIVE=OFF \ + -DGGML_CPU=OFF \ + -DGGML_BACKEND_DL=ON \ + -DGGML_CUDA=ON \ + -DLLAMA_CURL=OFF + cmake --build build --config Release --target ggml-cuda + + - name: Pack artifacts + id: pack_artifacts + run: | + cd build/bin + tar -czvf ../../llama-bench-ubuntu-cuda-${{ matrix.cuda }}-x64.tar.gz ./* + + - name: Upload artifacts + uses: actions/upload-artifact@v4 + with: + path: llama-bench-ubuntu-cuda-${{ matrix.cuda }}-x64.tar.gz + name: llama-bench-ubuntu-cuda-${{ matrix.cuda }}-x64.tar.gz + if-no-files-found: error + release: if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/benchmarks' ) || github.event.inputs.create_release == 'true' }} @@ -355,6 +529,9 @@ jobs: - windows-cpu - windows - windows-cuda + - ubuntu-cpu + - ubuntu-vulkan + - ubuntu-cuda steps: - name: Clone @@ -432,6 +609,7 @@ jobs: echo "Moving other artifacts..." mv -v artifact/*.zip release/ || true + mv -v artifact/*.tar.gz release/ || true # Rename remaining artifacts to include tag cd release @@ -446,6 +624,17 @@ jobs: fi done + for f in llama-bench-*.tar.gz; do + # Skip already renamed files + if [[ "$f" == *"${{ steps.tag.outputs.name }}"* ]]; then + continue + fi + if [ -f "$f" ]; then + newname="llama-bench-${{ steps.tag.outputs.name }}-${f#llama-bench-}" + mv "$f" "$newname" + fi + done + - name: Create release id: create_release uses: ggml-org/action-create-release@v1 @@ -464,7 +653,8 @@ jobs: const fs = require('fs'); const release_id = '${{ steps.create_release.outputs.id }}'; for (let file of await fs.readdirSync('./release')) { - if (path.extname(file) === '.zip') { + const ext = path.extname(file); + if (ext === '.zip' || ext === '.gz') { console.log('uploadReleaseAsset', file); await github.repos.uploadReleaseAsset({ owner: context.repo.owner, From 3e01ecd85b665213109a4499e74fb6421b4c5a61 Mon Sep 17 00:00:00 2001 From: Yuri Khrustalev Date: Wed, 14 Jan 2026 16:54:01 -0500 Subject: [PATCH 11/11] curl off (#13) --- .github/workflows/extra_benchmark_tools.yml | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/.github/workflows/extra_benchmark_tools.yml b/.github/workflows/extra_benchmark_tools.yml index 2b2c469a363..f5de24f5371 100644 --- a/.github/workflows/extra_benchmark_tools.yml +++ b/.github/workflows/extra_benchmark_tools.yml @@ -149,16 +149,8 @@ jobs: run: | choco install ninja - - name: libCURL - id: get_libcurl - uses: ./.github/actions/windows-setup-curl - with: - architecture: ${{ matrix.arch == 'x64' && 'win64' || 'win64a' }} - - name: Build shell: cmd - env: - CURL_PATH: ${{ steps.get_libcurl.outputs.curl_path }} run: | call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" ${{ matrix.arch == 'x64' && 'x64' || 'amd64_arm64' }} cmake -S . -B build -G "Ninja Multi-Config" ^ @@ -167,7 +159,7 @@ jobs: -DGGML_BACKEND_DL=ON ^ -DGGML_CPU_ALL_VARIANTS=${{ matrix.arch == 'x64' && 'ON' || 'OFF' }} ^ -DGGML_OPENMP=ON ^ - -DCURL_LIBRARY="%CURL_PATH%/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="%CURL_PATH%/include" ^ + -DLLAMA_CURL=OFF ^ -DLLAMA_BUILD_TOOLS=ON ^ -DLLAMA_BUILD_EXAMPLES=OFF ^ -DLLAMA_BUILD_TESTS=OFF ^ @@ -176,10 +168,7 @@ jobs: - name: Pack artifacts id: pack_artifacts - env: - CURL_PATH: ${{ steps.get_libcurl.outputs.curl_path }} run: | - Copy-Item $env:CURL_PATH\bin\libcurl-${{ matrix.arch }}.dll .\build\bin\Release\ Copy-Item "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Redist\MSVC\14.44.35112\debug_nonredist\${{ matrix.arch }}\Microsoft.VC143.OpenMP.LLVM\libomp140.${{ matrix.arch == 'x64' && 'x86_64' || 'aarch64' }}.dll" .\build\bin\Release\ 7z a llama-bench-win-cpu-${{ matrix.arch }}.zip .\build\bin\Release\*