Build Wheels & Release #15
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Build Wheels & Release | |
on: | |
workflow_dispatch: | |
inputs: | |
release: | |
description: 'Release? 1 = yes, 0 = no' | |
default: '0' | |
required: true | |
type: string | |
permissions: | |
contents: write | |
jobs: | |
build_wheels: | |
name: ${{ matrix.os }} P${{ matrix.pyver }} C${{ matrix.cuda }} R${{ matrix.rocm }} T${{ matrix.torch }} | |
runs-on: ${{ matrix.os }} | |
defaults: | |
run: | |
shell: pwsh | |
strategy: | |
matrix: | |
include: | |
# Ubuntu 20.04 CUDA | |
# Python 3.10 | |
- { artname: 'wheel', os: ubuntu-22.04, pyver: '3.10', cuda: '12.8.1', rocm: '', torch: '2.7.0', cudaarch: '8.0 8.6 8.9 9.0 10.0 12.0+PTX' } | |
# Python 3.11 | |
- { artname: 'wheel', os: ubuntu-22.04, pyver: '3.11', cuda: '12.8.1', rocm: '', torch: '2.7.0', cudaarch: '8.0 8.6 8.9 9.0 10.0 12.0+PTX' } | |
# Python 3.12 | |
- { artname: 'wheel', os: ubuntu-22.04, pyver: '3.12', cuda: '12.8.1', rocm: '', torch: '2.7.0', cudaarch: '8.0 8.6 8.9 9.0 10.0 12.0+PTX' } | |
# Python 3.13 | |
- { artname: 'wheel', os: ubuntu-22.04, pyver: '3.13', cuda: '12.8.1', rocm: '', torch: '2.7.0', cudaarch: '8.0 8.6 8.9 9.0 10.0 12.0+PTX' } | |
# Windows 2022 CUDA | |
# Python 3.10 | |
- { artname: 'wheel', os: windows-2022, pyver: '3.10', cuda: '12.8.1', rocm: '', torch: '2.7.0', cudaarch: '8.0 8.6 8.9 9.0 10.0 12.0+PTX' } | |
# Python 3.11 | |
- { artname: 'wheel', os: windows-2022, pyver: '3.11', cuda: '12.8.1', rocm: '', torch: '2.7.0', cudaarch: '8.0 8.6 8.9 9.0 10.0 12.0+PTX' } | |
# Python 3.12 | |
- { artname: 'wheel', os: windows-2022, pyver: '3.12', cuda: '12.8.1', rocm: '', torch: '2.7.0', cudaarch: '8.0 8.6 8.9 9.0 10.0 12.0+PTX' } | |
# Python 3.13 | |
- { artname: 'wheel', os: windows-2022, pyver: '3.13', cuda: '12.8.1', rocm: '', torch: '2.7.0', cudaarch: '8.0 8.6 8.9 9.0 10.0 12.0+PTX' } | |
# sdist | |
- { artname: 'sdist', os: ubuntu-22.04, pyver: '3.11', cuda: '', rocm: '', torch: '2.7.0', cudaarch: '' } | |
fail-fast: false | |
steps: | |
#Free disk space | |
- name: Free Disk Space | |
uses: jlumbroso/[email protected] | |
if: runner.os == 'Linux' | |
with: | |
tool-cache: true | |
android: true | |
dotnet: true | |
haskell: true | |
large-packages: false | |
swap-storage: true | |
# Setup Python | |
- uses: actions/checkout@v4 | |
# Get version string from package | |
- name: Get version string | |
id: package_version | |
run: | | |
$versionString = Get-Content $(Join-Path 'exllamav3' 'version.py') -raw | |
if ($versionString -match '__version__ = "(\d+\.(?:\d+\.?(?:dev\d+)?)*)"') | |
{ | |
Write-Output $('::notice file=build-wheels-release.yml,line=200,title=Package Version::Detected package version is: {0}' -f $Matches[1]) | |
Write-Output "PACKAGE_VERSION=$($Matches[1])" >> "$env:GITHUB_OUTPUT" | |
} | |
else | |
{ | |
Write-Output '::error file=build-wheels-release.yml,line=203::Could not parse version from exllamav2/version.py! You must upload wheels manually!' | |
Write-Output "PACKAGE_VERSION=None" >> "$env:GITHUB_OUTPUT" | |
} | |
# Pin VS build tools to 17.9 for wider compat | |
- name: Install VS2022 BuildTools 17.9.7 | |
run: choco install -y visualstudio2022buildtools --version=117.9.7.0 --params "--add Microsoft.VisualStudio.Component.VC.Tools.x86.x64 --installChannelUri https://aka.ms/vs/17/release/180911598_-255012421/channel" | |
if: runner.os == 'Windows' | |
# Install uv for easier python setup | |
- name: Install the latest version of uv and set the python version | |
uses: astral-sh/setup-uv@v5 | |
with: | |
python-version: ${{ matrix.pyver }} | |
- name: Install Windows CUDA 12.8 | |
if: runner.os == 'Windows' && contains(matrix.cuda, '12.8') | |
run: | | |
mkdir -p "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.8" | |
choco install unzip -y | |
curl -fL -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cudart/windows-x86_64/cuda_cudart-windows-x86_64-12.8.57-archive.zip" | |
curl -fL -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvcc/windows-x86_64/cuda_nvcc-windows-x86_64-12.8.61-archive.zip" | |
curl -fL -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvrtc/windows-x86_64/cuda_nvrtc-windows-x86_64-12.8.61-archive.zip" | |
curl -fL -O "https://developer.download.nvidia.com/compute/cuda/redist/libcublas/windows-x86_64/libcublas-windows-x86_64-12.8.3.14-archive.zip" | |
curl -fL -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvtx/windows-x86_64/cuda_nvtx-windows-x86_64-12.8.55-archive.zip" | |
curl -fL -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_profiler_api/windows-x86_64/cuda_profiler_api-windows-x86_64-12.8.55-archive.zip" | |
curl -fL -O "https://developer.download.nvidia.com/compute/cuda/redist/visual_studio_integration/windows-x86_64/visual_studio_integration-windows-x86_64-12.8.55-archive.zip" | |
curl -fL -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvprof/windows-x86_64/cuda_nvprof-windows-x86_64-12.8.57-archive.zip" | |
curl -fL -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cccl/windows-x86_64/cuda_cccl-windows-x86_64-12.8.55-archive.zip" | |
curl -fL -O "https://developer.download.nvidia.com/compute/cuda/redist/libcusparse/windows-x86_64/libcusparse-windows-x86_64-12.5.7.53-archive.zip" | |
curl -fL -O "https://developer.download.nvidia.com/compute/cuda/redist/libcusolver/windows-x86_64/libcusolver-windows-x86_64-11.7.2.55-archive.zip" | |
curl -fL -O "https://developer.download.nvidia.com/compute/cuda/redist/libcurand/windows-x86_64/libcurand-windows-x86_64-10.3.9.55-archive.zip" | |
curl -fL -O "https://developer.download.nvidia.com/compute/cuda/redist/libcufft/windows-x86_64/libcufft-windows-x86_64-11.3.3.41-archive.zip" | |
unzip '*.zip' -d "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.8" | |
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.8\cuda_cudart-windows-x86_64-12.8.57-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.8" /E /I /H /Y | |
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.8\cuda_nvcc-windows-x86_64-12.8.61-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.8" /E /I /H /Y | |
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.8\cuda_nvrtc-windows-x86_64-12.8.61-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.8" /E /I /H /Y | |
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.8\libcublas-windows-x86_64-12.8.3.14-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.8" /E /I /H /Y | |
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.8\cuda_nvtx-windows-x86_64-12.8.55-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.8" /E /I /H /Y | |
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.8\cuda_profiler_api-windows-x86_64-12.8.55-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.8" /E /I /H /Y | |
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.8\visual_studio_integration-windows-x86_64-12.8.55-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.8" /E /I /H /Y | |
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.8\cuda_nvprof-windows-x86_64-12.8.57-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.8" /E /I /H /Y | |
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.8\cuda_cccl-windows-x86_64-12.8.55-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.8" /E /I /H /Y | |
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.8\libcusparse-windows-x86_64-12.5.7.53-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.8" /E /I /H /Y | |
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.8\libcusolver-windows-x86_64-11.7.2.55-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.8" /E /I /H /Y | |
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.8\libcurand-windows-x86_64-10.3.9.55-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.8" /E /I /H /Y | |
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.8\libcufft-windows-x86_64-11.3.3.41-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.8" /E /I /H /Y | |
echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.8\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append | |
echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.8\libnvvp" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append | |
echo "CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.8" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8 | |
echo "CUDA_PATH_V12_8=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.8" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8 | |
# Attempt to pin nvidia-persistenced to exactly 575.57.08-0ubuntu1 until dependency issue is fixed in CUDA | |
# https://forums.developer.nvidia.com/t/nvidia-driver-570-installation-is-broken-again/335219 | |
- name: Enable NVIDIA CUDA APT repo | |
if: runner.os == 'Linux' | |
shell: bash | |
run: | | |
set -euo pipefail | |
UBUNTU=$(lsb_release -rs | tr -d '.') | |
DIST="ubuntu${UBUNTU}" | |
ARCH="x86_64" | |
sudo mkdir -p /usr/share/keyrings | |
curl -fsSL \ | |
https://developer.download.nvidia.com/compute/cuda/repos/${DIST}/${ARCH}/cuda-archive-keyring.gpg | | |
sudo tee /usr/share/keyrings/cuda-archive-keyring.gpg >/dev/null | |
echo "deb [signed-by=/usr/share/keyrings/cuda-archive-keyring.gpg] \ | |
https://developer.download.nvidia.com/compute/cuda/repos/${DIST}/${ARCH}/ /" | | |
sudo tee /etc/apt/sources.list.d/cuda.list | |
sudo apt-get update -y | |
- name: Pin nvidia-persistenced 575.57.08-0ubuntu1 | |
if: runner.os != 'Windows' && matrix.cuda != '' | |
run: | | |
sudo apt-get update | |
sudo apt-get install -y --allow-downgrades nvidia-persistenced=575.57.08-0ubuntu1 libnvidia-cfg1=575.57.08-0ubuntu1 | |
sudo apt-mark hold nvidia-persistenced libnvidia-cfg1 | |
# TODO: Find specific sub-packages | |
- name: Install Linux CUDA ${{ matrix.cuda }} | |
uses: Jimver/[email protected] | |
id: cuda-toolkit-Linux | |
with: | |
cuda: "${{ matrix.cuda }}" | |
linux-local-args: '["--toolkit"]' | |
method: "network" | |
if: runner.os != 'Windows' && matrix.cuda != '' | |
- name: Install CUDA build Dependencies | |
if: matrix.cuda != '' | |
id: cuda_deps | |
run: | | |
git config --system core.longpaths true | |
$cudaVersion = '${{ matrix.cuda }}' | |
$cudaVersionPytorch = '${{ matrix.cuda }}'.Remove('${{ matrix.cuda }}'.LastIndexOf('.')).Replace('.','') | |
Write-Output "CUDA_VERSION_PYTORCH=$cudaVersionPytorch" >> "$env:GITHUB_OUTPUT" | |
$pytorchIndexUrl = "https://download.pytorch.org/whl/cu$cudaVersionPytorch" | |
uv pip install torch==${{ matrix.torch }} --index-url $pytorchIndexUrl | |
uv pip install --upgrade build setuptools==69.5.1 wheel packaging ninja safetensors tokenizers numpy | |
- name: Build for CUDA | |
if: matrix.cuda != '' | |
run: | | |
# --- Spawn the VS shell | |
if ($IsWindows) { | |
Import-Module 'C:\Program Files (x86)\Microsoft Visual Studio\2022\BuildTools\Common7\Tools\Microsoft.VisualStudio.DevShell.dll' | |
Enter-VsDevShell -VsInstallPath 'C:\Program Files (x86)\Microsoft Visual Studio\2022\BuildTools' -DevCmdArguments '-arch=x64 -host_arch=x64' | |
$env:DISTUTILS_USE_SDK=1 | |
} | |
# --- Build wheel | |
$BUILDTAG = "+cu${{ steps.cuda_deps.outputs.CUDA_VERSION_PYTORCH }}-torch${{ matrix.torch }}" | |
$env:BUILD_TARGET = "cuda" | |
$env:TORCH_CUDA_ARCH_LIST = '${{ matrix.cudaarch }}' | |
python -m build -n --wheel -C--build-option=egg_info "-C--build-option=--tag-build=$BUILDTAG" | |
# Build sdist | |
- name: Build sdist | |
if: matrix.cuda == '' && matrix.rocm == '' | |
run: | | |
# --- Install dependencies | |
uv pip install torch==${{ matrix.torch }} --index-url https://download.pytorch.org/whl/cpu | |
uv pip install --upgrade build setuptools==69.5.1 wheel packaging ninja safetensors tokenizers numpy | |
# --- Build wheel | |
$env:EXLLAMA_NOCOMPILE=1 | |
python -m build -n | |
# Upload files | |
- name: Upload files to GitHub release | |
if: steps.package_version.outputs.PACKAGE_VERSION != 'None' && inputs.release == '1' | |
uses: svenstaro/[email protected] | |
with: | |
file: ./dist/*.whl | |
tag: ${{ format('v{0}', steps.package_version.outputs.PACKAGE_VERSION) }} | |
file_glob: true | |
overwrite: true | |
release_name: ${{ steps.package_version.outputs.PACKAGE_VERSION }} |