chamalgomes · Copilot · Feb 25, 2026 · Feb 25, 2026 · Feb 25, 2026 · Copilot
diff --git a/.github/workflows/llama-build-cuda.yaml b/.github/workflows/llama-build-cuda.yaml
@@ -6,110 +6,90 @@ permissions:
   contents: write
 
 jobs:
-  define_matrix:
-    name: Define Build Matrix
-    runs-on: ubuntu-latest
-    outputs:
-      matrix: ${{ steps.set-matrix.outputs.matrix }}
-    defaults:
-      run:
-        shell: pwsh
-
-    steps:
-      - name: Define Job Output
-        id: set-matrix
-        run: |
-          $matrix = @{
-              'os' = @('ubuntu-24.04')
-              'pyver' = @("3.13", "3.14")
-              'cuda' = @("13.1.1")
-              'releasetag' = @("basic")
-          }
-
-          $matrixOut = ConvertTo-Json $matrix -Compress
-          Write-Output ('matrix=' + $matrixOut) >> $env:GITHUB_OUTPUT
-
   build_wheels:
-    name: Build Wheel ${{ matrix.os }} ${{ matrix.pyver }} ${{ matrix.cuda }} ${{ matrix.releasetag == 'wheels' && 'AVX2' || matrix.releasetag }}
-    needs: define_matrix
-    runs-on: ${{ matrix.os }}
+    name: Build Wheel CUDA ${{ matrix.cuda_config.ver }} Py ${{ matrix.pyver }}
+    runs-on: ubuntu-latest
+    container:
+      image: nvidia/cuda:${{ matrix.cuda_config.ver }}-devel-ubuntu24.04
     permissions:
       id-token: write
       contents: write
       attestations: write
     strategy:
-      matrix: ${{ fromJSON(needs.define_matrix.outputs.matrix) }}
-    defaults:
-      run:
-        shell: pwsh
-    env:
-      CUDAVER: ${{ matrix.cuda }}
-      AVXVER: ${{ matrix.releasetag }}
+      fail-fast: false
+      matrix:
+        cuda_config:
+          - ver: 13.1.1
+            short: cu131
+            arch: 75;80
+        pyver: ["3.13", "3.14"]
 
     steps:
-      - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5
+      - name: Install system dependencies
+        run: |
+          apt-get update
+          apt-get install -y software-properties-common git curl build-essential cmake libssl-dev
+
+      - name: Checkout repository
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5
         with:
-          submodules: "recursive"
+          submodules: recursive
 
-      - name: Verify GLIBC version
-        run: ldd --version
+      - name: Install uv
+        run: |
+          curl -LsSf https://astral.sh/uv/install.sh | sh
+          echo "$HOME/.local/bin" >> $GITHUB_PATH
 
-      - name: Install the latest version of uv
-        uses: astral-sh/setup-uv@5a7eac68fb9809dea845d802897dc5c723910fa3 # v7
-        with:
-          version: "latest"
-          activate-environment: true
+      - name: Install Python
+        run: uv python install ${{ matrix.pyver }}
 
-      - name: Setup Mamba
-        uses: conda-incubator/setup-miniconda@835234971496cad1653abb28a638a281cf32541f # v3.2.0
-        with:
-          activate-environment: "build"
-          python-version: ${{ matrix.pyver }}
-          miniforge-version: latest
-          add-pip-as-python-dependency: true
-          auto-activate-base: false
+      - name: Verify GLIBC version
+        run: ldd --version
 
-      - name: Install Dependencies
-        env:
-          MAMBA_DOWNLOAD_FAILFAST: "0"
-          MAMBA_NO_LOW_SPEED_LIMIT: "1"
+      - name: Verify installation
         run: |
-          $cudaVersion = $env:CUDAVER
-          mamba install -y "cuda-toolkit=$cudaVersion" -c nvidia
+          python --version
+          uv --version
+          cmake --version
+          nvcc --version
+          nvidia-smi || true
 
       - name: Build Wheel
         working-directory: vendor/llama-cpp-python
+        env:
+          VERBOSE: 1
+          CMAKE_ARGS: >-
+            -DGGML_CUDA=on
+            -DLLAVA_BUILD=off
+            -DCMAKE_CUDA_ARCHITECTURES=${{ matrix.cuda_config.arch }}
+            -DGGML_CUDA_FORCE_MMQ=OFF
+            -DGGML_AVX2=off
+            -DGGML_FMA=off
+            -DGGML_F16C=off
+            -DLLAMA_BUILD_EXAMPLES=OFF
+            -DLLAMA_BUILD_TESTS=OFF
+            -DLLAMA_BUILD_SERVER=OFF
+            -DCMAKE_EXE_LINKER_FLAGS="-L/usr/local/cuda/lib64/stubs -lcuda"
         run: |
-          $cudaVersion = $env:CUDAVER.Remove($env:CUDAVER.LastIndexOf('.')).Replace('.','')
-          $env:CUDA_PATH = $env:CONDA_PREFIX
-          $env:CUDA_HOME = $env:CONDA_PREFIX
-          $env:CUDA_TOOLKIT_ROOT_DIR = $env:CONDA_PREFIX
-
-          if ($IsLinux) {
-            $env:LD_LIBRARY_PATH = $env:CONDA_PREFIX + '/lib:' + $env:LD_LIBRARY_PATH
-          } 
-          $env:VERBOSE = '1'
-          $env:CMAKE_ARGS = '-DGGML_CUDA=on -DLLAVA_BUILD=off -DCMAKE_CUDA_ARCHITECTURES=75;80;'
-          $env:CMAKE_ARGS = "-DGGML_CUDA_FORCE_MMQ=OFF $env:CMAKE_ARGS"
-          $env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DGGML_AVX2=off -DGGML_FMA=off -DGGML_F16C=off'
-
           # Rename the release version with +cu{cuda_ver} build tag
-          (Get-Content llama_cpp/__init__.py) -replace '__version__ = "([^"]*)"', ('__version__ = "' + '$1' + '+cu' + $cudaVersion + '"') | Set-Content llama_cpp/__init__.py
+          sed -i 's/__version__ = "\([^"]*\)"/__version__ = "\1+${{ matrix.cuda_config.short }}"/' llama_cpp/__init__.py
 
-          #Build wheel
+          # Build wheel using uv
           uv -v build --no-create-gitignore --python cpython@${{ matrix.pyver }} --wheel
-
-          # write the build tag to the output
-          Write-Output "CUDA_VERSION=$cudaVersion" >> $env:GITHUB_ENV
 
-      - uses: softprops/action-gh-release@5be0e66d93ac7ed76da52eca8bb058f665c3a5fe # v2
+      - name: List built wheels
+        run: ls -lh vendor/llama-cpp-python/dist/*.whl
+
+      - name: Upload Release Assets
+        uses: softprops/action-gh-release@5be0e66d93ac7ed76da52eca8bb058f665c3a5fe # v2
         with:
           files: vendor/llama-cpp-python/dist/*.whl
           tag_name: ${{ github.ref_name }}
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
 
-      - uses: actions/attest-build-provenance@977bb373ede98d70efdf65b84cb5f73e068dcc2a # v3
+      - name: Attest Build Provenance
+        uses: actions/attest-build-provenance@977bb373ede98d70efdf65b84cb5f73e068dcc2a # v3
         with:
           subject-path: 'vendor/llama-cpp-python/dist/*.whl'