poolside-linux-binary-manywheel #24

Workflow file for this run

.github/workflows/poolside-nightly-build.yaml at 42570c5

	# basically a partial copy of ./generated-linux-binary-manywheel-nightly.yml
	# as the original version is autogenerated, we would need to manually sync this periodically,
	# but this way we avoid conflicts
	name: poolside-linux-binary-manywheel


	on:
	# only manual triggers for now
	workflow_dispatch:
	inputs:
	publish:
	description: Upload to CodeArtifact
	type: choice
	required: true
	default: true
	options:
	- true
	- false
	env:
	# version for uploading to CodeArtifact
	# current date will be added to it later as a suffix
	PYTORCH_BUILD_VERSION_PREFIX: "2.6.0.dev20241210+cu126.poolside"
	# Needed for conda builds
	ANACONDA_USER: pytorch
	BINARY_ENV_FILE: /tmp/env
	BUILD_ENVIRONMENT: linux-binary-manywheel
	GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
	PYTORCH_FINAL_PACKAGE_DIR: /artifacts
	PYTORCH_ROOT: /pytorch
	BUILDER_ROOT: /builder
	SHA1: ${{ github.event.pull_request.head.sha \|\| github.sha }}
	SKIP_ALL_TESTS: 1
	# All vars below are from the auto-generated ./generated-linux-binary-manywheel-nightly.yml
	PACKAGE_TYPE: manywheel
	# TODO: This is a legacy variable that we eventually want to get rid of in
	# favor of GPU_ARCH_VERSION
	DESIRED_CUDA: cu126
	GPU_ARCH_VERSION: 12.6
	GPU_ARCH_TYPE: cuda
	# Note: we might need to fix a specific version of this image or build one ourselves
	DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.6-main
	PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' \| nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' \| nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' \| nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' \| nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' \| nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' \| nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' \| nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' \| nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' \| nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' \| nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' \| nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' \| nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64'
	MAX_JOBS: 32
	TORCH_CUDA_ARCH_LIST: "8.6;9.0+PTX"
	# To publish:
	CODEARTIFACT_DOMAIN: poolside
	CODEARTIFACT_REPOSITORY: poolside-packages-python

	concurrency:
	# for now we only allow one build at a time
	group: poolside-nightly-pytorch-build
	cancel-in-progress: true

	jobs:
	build:
	if: ${{ github.repository_owner == 'poolsideai' }}
	runs-on: "ubuntu-22.04-64-pytorchci"
	permissions: # required for AWS Credentials
	id-token: write
	contents: read
	strategy:
	matrix:
	desired_python: ["3.10", "3.12"]
	include:
	- desired_python: "3.10"
	desired_python_major: "3"
	desired_python_minor: "10"
	- desired_python: "3.12"
	desired_python_major: "3"
	desired_python_minor: "12"
	env:
	BUILD_NAME: manywheel-py${{ matrix.desired_python_major }}_${{ matrix.desired_python_minor }}-cuda12_6
	DESIRED_PYTHON: ${{ matrix.desired_python }}
	timeout-minutes: 210
	steps:
	- name: Make the env permanent during this workflow (but not the secrets)
	shell: bash
	run: \|
	{
	echo "PYTORCH_ROOT=${{ env.PYTORCH_ROOT }}"
	echo "BUILDER_ROOT=${{ env.BUILDER_ROOT }}"
	echo "PACKAGE_TYPE=${{ env.PACKAGE_TYPE }}"
	echo "DESIRED_CUDA=${{ env.DESIRED_CUDA }}"
	echo "GPU_ARCH_VERSION=${{ env.GPU_ARCH_VERSION }}"
	echo "GPU_ARCH_TYPE=${{ env.GPU_ARCH_TYPE }}"
	echo "DOCKER_IMAGE=${{ env.DOCKER_IMAGE }}"
	echo "SKIP_ALL_TESTS=${{ env.SKIP_ALL_TESTS }}"
	echo "DESIRED_PYTHON=${{ env.DESIRED_PYTHON }}"
	echo "PYTORCH_EXTRA_INSTALL_REQUIREMENTS=${{ env.PYTORCH_EXTRA_INSTALL_REQUIREMENTS }}"
	echo "ANACONDA_USER=${{ env.ANACONDA_USER }}"
	echo "BINARY_ENV_FILE=${{ env.BINARY_ENV_FILE }}"
	echo "BUILD_ENVIRONMENT=${{ env.BUILD_ENVIRONMENT }}"
	echo "BUILD_NAME=${{ env.BUILD_NAME }}"
	echo "PYTORCH_FINAL_PACKAGE_DIR=${{ env.PYTORCH_FINAL_PACKAGE_DIR }}"
	echo "SHA1=${{ env.SHA1 }}"
	echo "MAX_JOBS=${{ env.MAX_JOBS }}"
	echo "TORCH_CUDA_ARCH_LIST=${{ env.TORCH_CUDA_ARCH_LIST }}"
	DATE=$(date -u +%Y%m%d)
	echo "PYTORCH_BUILD_VERSION=${{ env.PYTORCH_BUILD_VERSION_PREFIX }}.$DATE"
	} >> "$GITHUB_ENV"

	- name: Checkout PyTorch
	uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
	- name: Checkout PyTorch to pytorch dir
	uses: malfet/checkout@silent-checkout
	with:
	ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha \|\| github.sha }}
	submodules: recursive
	path: pytorch
	quiet-checkout: true

	- name: Clean PyTorch checkout
	run: \|
	# Remove any artifacts from the previous checkouts
	git clean -fxd
	working-directory: pytorch

	- name: Checkout pytorch/builder to builder dir
	uses: malfet/checkout@silent-checkout
	with:
	ref: main
	submodules: recursive
	repository: pytorch/builder
	path: builder
	quiet-checkout: true

	- name: Clean pytorch/builder checkout
	run: \|
	# Remove any artifacts from the previous checkouts
	git clean -fxd
	working-directory: builder

	- name: Build PyTorch binary
	id: package
	run: \|
	set -x

	echo "Building torch==$PYTORCH_BUILD_VERSION"

	mkdir -p ${RUNNER_TEMP}/artifacts
	container_name=$(docker run \
	-e BINARY_ENV_FILE \
	-e BUILD_ENVIRONMENT \
	-e DESIRED_CUDA \
	-e DESIRED_DEVTOOLSET \
	-e DESIRED_PYTHON \
	-e GITHUB_ACTIONS \
	-e GPU_ARCH_TYPE \
	-e GPU_ARCH_VERSION \
	-e LIBTORCH_VARIANT \
	-e PACKAGE_TYPE \
	-e PYTORCH_FINAL_PACKAGE_DIR \
	-e PYTORCH_ROOT \
	-e BUILDER_ROOT \
	-e SKIP_ALL_TESTS \
	-e PYTORCH_EXTRA_INSTALL_REQUIREMENTS \
	-e USE_SPLIT_BUILD \
	-e MAX_JOBS \
	-e TORCH_CUDA_ARCH_LIST \
	-e PYTORCH_BUILD_VERSION \
	--tty \
	--detach \
	-v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \
	-v "${GITHUB_WORKSPACE}/builder:/builder" \
	-v "${RUNNER_TEMP}/artifacts:/artifacts" \
	-w / \
	"${DOCKER_IMAGE}"
	)
	docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh"
	if [[ ${BUILD_ENVIRONMENT} == "aarch64" ]]; then
	docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash /pytorch/.ci/aarch64_linux/aarch64_ci_build.sh"
	else
	docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash /pytorch/.ci/${{ env.PACKAGE_TYPE }}/build.sh"
	fi
	docker exec -t "${container_name}" chown -R "$(id -u):$(id -g)" /artifacts
	echo "##[set-output name=version;]$(echo ${{ runner.temp }}/artifacts/*.whl \| cut -d- -f2)"

	- name: Cleanup docker
	if: always()
	shell: bash
	run: \|
	# stop the container for clean worker stop
	# ignore expansion of "docker ps -q" since it could be empty
	# shellcheck disable=SC2046
	docker stop $(docker ps -q) \|\| true

	# upload to github artifacts (as we might not publish)
	- uses: actions/[email protected]
	if: github.event.inputs.publish == 'false'
	with:
	name: ${{ env.BUILD_NAME }}
	if-no-files-found: error
	path:
	${{ runner.temp }}/artifacts/*


	- name: Install publish dependencies
	if: github.event.inputs.publish == 'true'
	run: \|
	set -x
	python -m pip install --upgrade pip
	python -m pip install twine
	sudo npm install -g badgen-cli

	- name: Configure AWS credentials for publishing
	if: github.event.inputs.publish == 'true'
	uses: aws-actions/configure-aws-credentials@v4
	with:
	role-to-assume: arn:aws:iam::${{ secrets.AWS_ACCOUNT_ID }}:role/gh-action-publish-artifacts-role
	aws-region: us-east-1

	- name: Upload version badge
	if: github.event.inputs.publish == 'true' && matrix.desired_python == '3.10'
	run: \|
	set -x
	badgen --subject version --status ${{ steps.package.outputs.version }} --color blue > version.svg
	aws s3 cp --region us-east-2 --cache-control no-cache --acl public-read version.svg s3://pytorch-version/version.svg

	- name: Publish to CodeArtifact
	if: github.event.inputs.publish == 'true'
	run: \|
	export TWINE_USERNAME=aws
	export TWINE_PASSWORD=$(aws codeartifact get-authorization-token --domain ${{ env.CODEARTIFACT_DOMAIN }} --domain-owner ${{ secrets.AWS_ACCOUNT_ID }} --query authorizationToken --output text)
	export TWINE_REPOSITORY_URL=$(aws codeartifact get-repository-endpoint --domain ${{ env.CODEARTIFACT_DOMAIN }} --domain-owner ${{ secrets.AWS_ACCOUNT_ID }} --repository ${{ env.CODEARTIFACT_REPOSITORY }} --region us-east-1 --format pypi --query repositoryEndpoint --output text)
	set -x
	twine upload --verbose ${{ runner.temp }}/artifacts/*

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

poolside-linux-binary-manywheel #24

Workflow file

poolside-linux-binary-manywheel #24

Jobs

Run details

Workflow file for this run