From 8166cdafb4246755a02dd97fcdf5d8b7e22d0eb3 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 25 Feb 2026 09:43:03 +0000 Subject: [PATCH 1/3] Initial plan From 4cb71b4d1af7fda579c49f45a0b03a3b13571e12 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 25 Feb 2026 09:44:32 +0000 Subject: [PATCH 2/3] Modernize llama-build-cuda.yaml workflow with clean bash syntax Co-authored-by: chamalgomes <106123134+chamalgomes@users.noreply.github.com> --- .github/workflows/llama-build-cuda.yaml | 122 ++++++++++-------------- 1 file changed, 48 insertions(+), 74 deletions(-) diff --git a/.github/workflows/llama-build-cuda.yaml b/.github/workflows/llama-build-cuda.yaml index 7bd8e3a..7acc7c1 100644 --- a/.github/workflows/llama-build-cuda.yaml +++ b/.github/workflows/llama-build-cuda.yaml @@ -6,110 +6,84 @@ permissions: contents: write jobs: - define_matrix: - name: Define Build Matrix - runs-on: ubuntu-latest - outputs: - matrix: ${{ steps.set-matrix.outputs.matrix }} - defaults: - run: - shell: pwsh - - steps: - - name: Define Job Output - id: set-matrix - run: | - $matrix = @{ - 'os' = @('ubuntu-24.04') - 'pyver' = @("3.13", "3.14") - 'cuda' = @("13.1.1") - 'releasetag' = @("basic") - } - - $matrixOut = ConvertTo-Json $matrix -Compress - Write-Output ('matrix=' + $matrixOut) >> $env:GITHUB_OUTPUT - build_wheels: - name: Build Wheel ${{ matrix.os }} ${{ matrix.pyver }} ${{ matrix.cuda }} ${{ matrix.releasetag == 'wheels' && 'AVX2' || matrix.releasetag }} - needs: define_matrix - runs-on: ${{ matrix.os }} + name: Build Wheel CUDA ${{ matrix.cuda_config.ver }} Py ${{ matrix.pyver }} + runs-on: ubuntu-latest + container: + image: nvidia/cuda:${{ matrix.cuda_config.ver }}-devel-ubuntu24.04 permissions: id-token: write contents: write attestations: write strategy: - matrix: ${{ fromJSON(needs.define_matrix.outputs.matrix) }} - defaults: - run: - shell: pwsh - env: - CUDAVER: ${{ matrix.cuda }} - AVXVER: ${{ matrix.releasetag }} + fail-fast: false + matrix: + cuda_config: + - ver: 13.1.1 + short: cu131 + arch: 75;80; + pyver: ["3.13", "3.14"] steps: - - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5 + - name: Checkout repository + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5 with: - submodules: "recursive" - - - name: Verify GLIBC version - run: ldd --version + submodules: recursive - - name: Install the latest version of uv + - name: Setup uv and Python uses: astral-sh/setup-uv@5a7eac68fb9809dea845d802897dc5c723910fa3 # v7 with: version: "latest" - activate-environment: true - - - name: Setup Mamba - uses: conda-incubator/setup-miniconda@835234971496cad1653abb28a638a281cf32541f # v3.2.0 - with: - activate-environment: "build" + enable-cache: true python-version: ${{ matrix.pyver }} - miniforge-version: latest - add-pip-as-python-dependency: true - auto-activate-base: false - - name: Install Dependencies - env: - MAMBA_DOWNLOAD_FAILFAST: "0" - MAMBA_NO_LOW_SPEED_LIMIT: "1" + - name: Verify GLIBC version + run: ldd --version + + - name: Verify installation run: | - $cudaVersion = $env:CUDAVER - mamba install -y "cuda-toolkit=$cudaVersion" -c nvidia + python --version + uv --version + cmake --version + nvcc --version + nvidia-smi || true - name: Build Wheel working-directory: vendor/llama-cpp-python + env: + VERBOSE: 1 + CMAKE_ARGS: >- + -DGGML_CUDA=on + -DLLAVA_BUILD=off + -DCMAKE_CUDA_ARCHITECTURES=${{ matrix.cuda_config.arch }} + -DGGML_CUDA_FORCE_MMQ=OFF + -DGGML_AVX2=off + -DGGML_FMA=off + -DGGML_F16C=off + -DLLAMA_BUILD_EXAMPLES=OFF + -DLLAMA_BUILD_TESTS=OFF + -DLLAMA_BUILD_SERVER=OFF + -DCMAKE_EXE_LINKER_FLAGS="-L/usr/local/cuda/lib64/stubs -lcuda" run: | - $cudaVersion = $env:CUDAVER.Remove($env:CUDAVER.LastIndexOf('.')).Replace('.','') - $env:CUDA_PATH = $env:CONDA_PREFIX - $env:CUDA_HOME = $env:CONDA_PREFIX - $env:CUDA_TOOLKIT_ROOT_DIR = $env:CONDA_PREFIX - - if ($IsLinux) { - $env:LD_LIBRARY_PATH = $env:CONDA_PREFIX + '/lib:' + $env:LD_LIBRARY_PATH - } - $env:VERBOSE = '1' - $env:CMAKE_ARGS = '-DGGML_CUDA=on -DLLAVA_BUILD=off -DCMAKE_CUDA_ARCHITECTURES=75;80;' - $env:CMAKE_ARGS = "-DGGML_CUDA_FORCE_MMQ=OFF $env:CMAKE_ARGS" - $env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DGGML_AVX2=off -DGGML_FMA=off -DGGML_F16C=off' - # Rename the release version with +cu{cuda_ver} build tag - (Get-Content llama_cpp/__init__.py) -replace '__version__ = "([^"]*)"', ('__version__ = "' + '$1' + '+cu' + $cudaVersion + '"') | Set-Content llama_cpp/__init__.py + sed -i 's/__version__ = "\([^"]*\)"/__version__ = "\1+${{ matrix.cuda_config.short }}"/' llama_cpp/__init__.py - #Build wheel + # Build wheel using uv uv -v build --no-create-gitignore --python cpython@${{ matrix.pyver }} --wheel - - # write the build tag to the output - Write-Output "CUDA_VERSION=$cudaVersion" >> $env:GITHUB_ENV - - uses: softprops/action-gh-release@5be0e66d93ac7ed76da52eca8bb058f665c3a5fe # v2 + - name: List built wheels + run: ls -lh vendor/llama-cpp-python/dist/*.whl + + - name: Upload Release Assets + uses: softprops/action-gh-release@5be0e66d93ac7ed76da52eca8bb058f665c3a5fe # v2 with: files: vendor/llama-cpp-python/dist/*.whl tag_name: ${{ github.ref_name }} env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - uses: actions/attest-build-provenance@977bb373ede98d70efdf65b84cb5f73e068dcc2a # v3 + - name: Attest Build Provenance + uses: actions/attest-build-provenance@977bb373ede98d70efdf65b84cb5f73e068dcc2a # v3 with: subject-path: 'vendor/llama-cpp-python/dist/*.whl' From 3bff1425c3e927dff95cd4483885cb987e29de75 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 25 Feb 2026 22:43:32 +0000 Subject: [PATCH 3/3] Fix container dependencies, trailing semicolon in arch, and inline uv install Co-authored-by: chamalgomes <106123134+chamalgomes@users.noreply.github.com> --- .github/workflows/llama-build-cuda.yaml | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/.github/workflows/llama-build-cuda.yaml b/.github/workflows/llama-build-cuda.yaml index 7acc7c1..a3d4e31 100644 --- a/.github/workflows/llama-build-cuda.yaml +++ b/.github/workflows/llama-build-cuda.yaml @@ -21,21 +21,27 @@ jobs: cuda_config: - ver: 13.1.1 short: cu131 - arch: 75;80; + arch: 75;80 pyver: ["3.13", "3.14"] steps: + - name: Install system dependencies + run: | + apt-get update + apt-get install -y software-properties-common git curl build-essential cmake libssl-dev + - name: Checkout repository uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5 with: submodules: recursive - - name: Setup uv and Python - uses: astral-sh/setup-uv@5a7eac68fb9809dea845d802897dc5c723910fa3 # v7 - with: - version: "latest" - enable-cache: true - python-version: ${{ matrix.pyver }} + - name: Install uv + run: | + curl -LsSf https://astral.sh/uv/install.sh | sh + echo "$HOME/.local/bin" >> $GITHUB_PATH + + - name: Install Python + run: uv python install ${{ matrix.pyver }} - name: Verify GLIBC version run: ldd --version