poolside-linux-binary-manywheel #18
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# basically a partial copy of ./generated-linux-binary-manywheel-nightly.yml | |
# as the original version is autogenerated, we would need to manually sync this periodically, | |
# but this way we avoid conflicts | |
name: poolside-linux-binary-manywheel | |
on: | |
# only manual triggers for now | |
workflow_dispatch: | |
inputs: | |
publish: | |
description: Upload to CodeArtifact | |
type: choice | |
required: true | |
default: true | |
options: | |
- true | |
- false | |
env: | |
# Needed for conda builds | |
ANACONDA_USER: pytorch | |
BINARY_ENV_FILE: /tmp/env | |
BUILD_ENVIRONMENT: linux-binary-manywheel | |
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
PYTORCH_FINAL_PACKAGE_DIR: /artifacts | |
PYTORCH_ROOT: /pytorch | |
BUILDER_ROOT: /builder | |
SHA1: ${{ github.event.pull_request.head.sha || github.sha }} | |
SKIP_ALL_TESTS: 1 | |
# All vars below are from the auto-generated ./generated-linux-binary-manywheel-nightly.yml | |
PACKAGE_TYPE: manywheel | |
# TODO: This is a legacy variable that we eventually want to get rid of in | |
# favor of GPU_ARCH_VERSION | |
DESIRED_CUDA: cu126 | |
GPU_ARCH_VERSION: 12.6 | |
GPU_ARCH_TYPE: cuda | |
# Note: we might need to fix a specific version of this image or build one ourselves | |
DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.6-main | |
USE_SPLIT_BUILD: False | |
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | |
MAX_JOBS: 32 | |
TORCH_CUDA_ARCH_LIST: "8.6;9.0+PTX" | |
# For publish: | |
CODEARTIFACT_DOMAIN: poolside | |
CODEARTIFACT_REPOSITORY: poolside-packages-python | |
concurrency: | |
# for now we only allow one build at a time | |
group: poolside-nightly-pytorch-build | |
cancel-in-progress: true | |
jobs: | |
build: | |
if: ${{ github.repository_owner == 'poolsideai' }} | |
runs-on: "ubuntu-22.04-64-pytorchci" | |
permissions: # required for AWS Credentials | |
id-token: write | |
contents: read | |
strategy: | |
matrix: | |
desired_python: ["3.10", "3.12"] | |
include: | |
- desired_python: "3.10" | |
desired_python_major: "3" | |
desired_python_minor: "10" | |
- desired_python: "3.12" | |
desired_python_major: "3" | |
desired_python_minor: "12" | |
env: | |
BUILD_NAME: manywheel-py${{ matrix.desired_python_major }}_${{ matrix.desired_python_minor }}-cuda12_6 | |
DESIRED_PYTHON: ${{ matrix.desired_python }} | |
timeout-minutes: 210 | |
steps: | |
- name: Make the env permanent during this workflow (but not the secrets) | |
shell: bash | |
run: | | |
{ | |
echo "PYTORCH_ROOT=${{ env.PYTORCH_ROOT }}" | |
echo "BUILDER_ROOT=${{ env.BUILDER_ROOT }}" | |
echo "PACKAGE_TYPE=${{ env.PACKAGE_TYPE }}" | |
echo "DESIRED_CUDA=${{ env.DESIRED_CUDA }}" | |
echo "GPU_ARCH_VERSION=${{ env.GPU_ARCH_VERSION }}" | |
echo "GPU_ARCH_TYPE=${{ env.GPU_ARCH_TYPE }}" | |
echo "DOCKER_IMAGE=${{ env.DOCKER_IMAGE }}" | |
echo "SKIP_ALL_TESTS=${{ env.SKIP_ALL_TESTS }}" | |
echo "DESIRED_PYTHON=${{ env.DESIRED_PYTHON }}" | |
echo "PYTORCH_EXTRA_INSTALL_REQUIREMENTS=${{ env.PYTORCH_EXTRA_INSTALL_REQUIREMENTS }}" | |
echo "ANACONDA_USER=${{ env.ANACONDA_USER }}" | |
echo "BINARY_ENV_FILE=${{ env.BINARY_ENV_FILE }}" | |
echo "BUILD_ENVIRONMENT=${{ env.BUILD_ENVIRONMENT }}" | |
echo "BUILD_NAME=${{ env.BUILD_NAME }}" | |
echo "PR_NUMBER=${{ env.PR_NUMBER }}" | |
echo "PYTORCH_FINAL_PACKAGE_DIR=${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" | |
echo "SHA1=${{ env.SHA1 }}" | |
echo "USE_SPLIT_BUILD=${{ env.use_split_build }}" | |
echo "MAX_JOBS=${{ env.MAX_JOBS }}" | |
echo "TORCH_CUDA_ARCH_LIST=${{ env.TORCH_CUDA_ARCH_LIST }}" | |
} >> "${GITHUB_ENV} }}" | |
- name: Checkout PyTorch | |
uses: pytorch/pytorch/.github/actions/checkout-pytorch@main | |
- name: Checkout PyTorch to pytorch dir | |
uses: malfet/checkout@silent-checkout | |
with: | |
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
submodules: recursive | |
path: pytorch | |
quiet-checkout: true | |
- name: Clean PyTorch checkout | |
run: | | |
# Remove any artifacts from the previous checkouts | |
git clean -fxd | |
working-directory: pytorch | |
- name: Checkout pytorch/builder to builder dir | |
uses: malfet/checkout@silent-checkout | |
with: | |
ref: main | |
submodules: recursive | |
repository: pytorch/builder | |
path: builder | |
quiet-checkout: true | |
- name: Clean pytorch/builder checkout | |
run: | | |
# Remove any artifacts from the previous checkouts | |
git clean -fxd | |
working-directory: builder | |
- name: Build PyTorch binary | |
run: | | |
set -x | |
mkdir -p ${RUNNER_TEMP}/artifacts | |
container_name=$(docker run \ | |
-e BINARY_ENV_FILE \ | |
-e BUILD_ENVIRONMENT \ | |
-e DESIRED_CUDA \ | |
-e DESIRED_DEVTOOLSET \ | |
-e DESIRED_PYTHON \ | |
-e GITHUB_ACTIONS \ | |
-e GPU_ARCH_TYPE \ | |
-e GPU_ARCH_VERSION \ | |
-e LIBTORCH_VARIANT \ | |
-e PACKAGE_TYPE \ | |
-e PYTORCH_FINAL_PACKAGE_DIR \ | |
-e PYTORCH_ROOT \ | |
-e BUILDER_ROOT \ | |
-e SKIP_ALL_TESTS \ | |
-e PYTORCH_EXTRA_INSTALL_REQUIREMENTS \ | |
-e USE_SPLIT_BUILD \ | |
-e MAX_JOBS \ | |
-e TORCH_CUDA_ARCH_LIST \ | |
--tty \ | |
--detach \ | |
-v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \ | |
-v "${GITHUB_WORKSPACE}/builder:/builder" \ | |
-v "${RUNNER_TEMP}/artifacts:/artifacts" \ | |
-w / \ | |
"${DOCKER_IMAGE}" | |
) | |
docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh" | |
if [[ ${BUILD_ENVIRONMENT} == *"aarch64"* ]]; then | |
docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash /pytorch/.ci/aarch64_linux/aarch64_ci_build.sh" | |
else | |
docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash /pytorch/.ci/${{ env.PACKAGE_TYPE }}/build.sh" | |
fi | |
docker exec -t "${container_name}" chown -R "$(id -u):$(id -g)" /artifacts | |
- name: Cleanup docker | |
if: always() | |
shell: bash | |
run: | | |
# stop the container for clean worker stop | |
# ignore expansion of "docker ps -q" since it could be empty | |
# shellcheck disable=SC2046 | |
docker stop $(docker ps -q) || true | |
# upload to github artifacts (as we might not publish) | |
- uses: actions/[email protected] | |
with: | |
name: ${{ env.BUILD_NAME }} | |
if-no-files-found: error | |
path: | |
${{ runner.temp }}/artifacts/* | |
- name: Install publish dependencies | |
if: github.event.inputs.publish == 'true' | |
run: | | |
python -m pip install --upgrade pip | |
pip install twine | |
- name: Configure AWS credentials for publish | |
if: github.event.inputs.publish == 'true' | |
uses: aws-actions/configure-aws-credentials@v4 | |
with: | |
role-to-assume: arn:aws:iam::${{ secrets.AWS_ACCOUNT_ID }}:role/gh-action-publish-artifacts-role | |
aws-region: us-east-1 | |
- name: Publish to CodeArtifact | |
if: github.event.inputs.publish == 'true' | |
run: | | |
export TWINE_USERNAME=aws | |
export TWINE_PASSWORD=$(aws codeartifact get-authorization-token --domain ${{ env.CODEARTIFACT_DOMAIN }} --domain-owner ${{ secrets.AWS_ACCOUNT_ID }} --query authorizationToken --output text) | |
export TWINE_REPOSITORY_URL=$(aws codeartifact get-repository-endpoint --domain ${{ env.CODEARTIFACT_DOMAIN }} --domain-owner ${{ secrets.AWS_ACCOUNT_ID }} --repository ${{ env.CODEARTIFACT_REPOSITORY }} --region us-east-1 --format pypi --query repositoryEndpoint --output text) | |
twine upload --verbose ${{ runner.temp }}/artifacts/* |