Skip to content

poolside-linux-binary-manywheel #3

poolside-linux-binary-manywheel

poolside-linux-binary-manywheel #3

# basically a partial copy of ./generated-linux-binary-manywheel-nightly.yml
# as the original version is autogenerated, we would need to manually sync this periodically,
# but this way we avoid conflicts
name: poolside-linux-binary-manywheel
on:
# only manual triggers for now
workflow_dispatch:
inputs:
timeout-minutes:
required: false
default: 210
type: number
description: timeout for the job
env:
# Needed for conda builds
ANACONDA_USER: pytorch
AWS_DEFAULT_REGION: us-east-1
BINARY_ENV_FILE: /tmp/env
BUILD_ENVIRONMENT: linux-binary-manywheel
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
PR_NUMBER: ${{ github.event.pull_request.number }}
PYTORCH_FINAL_PACKAGE_DIR: /artifacts
PYTORCH_ROOT: /pytorch
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
SKIP_ALL_TESTS: 1
# All vars below are from the auto-generated ./generated-linux-binary-manywheel-nightly.yml
PYTORCH_ROOT: /pytorch

Check failure on line 30 in .github/workflows/poolside-nightly-build.yaml

View workflow run for this annotation

GitHub Actions / poolside-linux-binary-manywheel

Invalid workflow file

The workflow is not valid. .github/workflows/poolside-nightly-build.yaml (Line: 30, Col: 3): 'PYTORCH_ROOT' is already defined .github/workflows/poolside-nightly-build.yaml (Line: 42, Col: 3): 'BUILD_ENVIRONMENT' is already defined
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: cu118
GPU_ARCH_VERSION: 11.8
GPU_ARCH_TYPE: cuda
# Note: we might need to fix a specific version of this image or build one ourselves
DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.8-main
USE_SPLIT_BUILD: False
DESIRED_PYTHON: "3.10"
BUILD_NAME: manywheel-py3_10-cuda11_8
BUILD_ENVIRONMENT: linux-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu11==11.8.89; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu11==11.8.89; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu11==11.8.87; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu11==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu11==11.11.3.6; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu11==10.9.0.58; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu11==10.3.0.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu11==11.4.1.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu11==11.7.5.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu11==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu11==11.8.86; platform_system == 'Linux' and platform_machine == 'x86_64'
concurrency:
# for now we only allow one build at a time
group: poolside-nightly-pytorch-build
cancel-in-progress: true
jobs:
manywheel-py3_10-cuda11_8-build:
if: ${{ github.repository_owner == 'poolsideai' }}
runs_on: "ubuntu-22.04-64-pytorchci"
timeout-minutes: ${{ inputs.timeout-minutes }}
steps:
- name: Make the env permanent during this workflow (but not the secrets)
shell: bash
run: |
{
echo "PYTORCH_ROOT=${{ env.PYTORCH_ROOT }}"
echo "PACKAGE_TYPE=${{ env.PACKAGE_TYPE }}"
echo "DESIRED_CUDA=${{ env.DESIRED_CUDA }}"
echo "GPU_ARCH_VERSION=${{ env.GPU_ARCH_VERSION }}"
echo "GPU_ARCH_TYPE=${{ env.GPU_ARCH_TYPE }}"
echo "DOCKER_IMAGE=${{ env.DOCKER_IMAGE }}"
echo "SKIP_ALL_TESTS=${{ env.SKIP_ALL_TESTS }}"
echo "DESIRED_PYTHON=${{ env.DESIRED_PYTHON }}"
echo "PYTORCH_EXTRA_INSTALL_REQUIREMENTS=${{ env.PYTORCH_EXTRA_INSTALL_REQUIREMENTS }}"
echo "ANACONDA_USER=${{ env.ANACONDA_USER }}"
echo "BINARY_ENV_FILE=${{ env.BINARY_ENV_FILE }}"
echo "BUILD_ENVIRONMENT=${{ env.BUILD_ENVIRONMENT }}"
echo "BUILD_NAME=${{ env.BUILD_NAME }}"
echo "PR_NUMBER=${{ env.PR_NUMBER }}"
echo "PYTORCH_FINAL_PACKAGE_DIR=${{ env.PYTORCH_FINAL_PACKAGE_DIR }}"
echo "SHA1=${{ env.SHA1 }}"
echo "USE_SPLIT_BUILD=${{ env.use_split_build }}"
} >> "${GITHUB_ENV} }}"
- name: Checkout PyTorch
uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
- name: Checkout PyTorch to pytorch dir
uses: malfet/checkout@silent-checkout
with:
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
quiet-checkout: true
- name: Clean PyTorch checkout
run: |
# Remove any artifacts from the previous checkouts
git clean -fxd
working-directory: pytorch
- name: Build PyTorch binary
run: |
set -x
chown -R root ${GITHUB_WORKSPACE}/pytorch
mkdir -p ${RUNNER_TEMP}/artifacts
container_name=$(docker run \
-e BINARY_ENV_FILE \
-e BUILD_ENVIRONMENT \
-e DESIRED_CUDA \
-e DESIRED_DEVTOOLSET \
-e DESIRED_PYTHON \
-e GITHUB_ACTIONS \
-e GPU_ARCH_TYPE \
-e GPU_ARCH_VERSION \
-e LIBTORCH_VARIANT \
-e PACKAGE_TYPE \
-e PYTORCH_FINAL_PACKAGE_DIR \
-e PYTORCH_ROOT \
-e SKIP_ALL_TESTS \
-e PYTORCH_EXTRA_INSTALL_REQUIREMENTS \
-e USE_SPLIT_BUILD \
--tty \
--detach \
-v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \
-v "${RUNNER_TEMP}/artifacts:/artifacts" \
-w / \
"${DOCKER_IMAGE}"
)
docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh"
if [[ ${BUILD_ENVIRONMENT} == *"aarch64"* ]]; then
docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash /pytorch/.ci/aarch64_linux/aarch64_ci_build.sh"
else
docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash /pytorch/.ci/${{ env.PACKAGE_TYPE }}/build.sh"
fi
docker exec -t "${container_name}" chown -R "$(id -u):$(id -g)" /artifacts
- uses: actions/[email protected]
with:
name: ${{ build_name }}
if-no-files-found: error
path:
${{ runner.temp }}/artifacts/*
- name: Cleanup docker
if: always()
shell: bash
run: |
# stop the container for clean worker stop
# ignore expansion of "docker ps -q" since it could be empty
# shellcheck disable=SC2046
docker stop $(docker ps -q) || true