forked from pytorch/pytorch
-
Notifications
You must be signed in to change notification settings - Fork 0
227 lines (209 loc) · 9.97 KB
/
poolside-nightly-build.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
# basically a partial copy of ./generated-linux-binary-manywheel-nightly.yml
# as the original version is autogenerated, we would need to manually sync this periodically,
# but this way we avoid conflicts
name: poolside-linux-binary-manywheel
on:
# only manual triggers for now
workflow_dispatch:
inputs:
publish:
description: Upload to CodeArtifact
type: choice
required: true
default: true
options:
- true
- false
env:
# version for uploading to CodeArtifact
# current date will be added to it later as a suffix
PYTORCH_BUILD_VERSION_PREFIX: "2.6.0.dev20241210+cu126.poolside"
# Needed for conda builds
ANACONDA_USER: pytorch
BINARY_ENV_FILE: /tmp/env
BUILD_ENVIRONMENT: linux-binary-manywheel
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
PYTORCH_FINAL_PACKAGE_DIR: /artifacts
PYTORCH_ROOT: /pytorch
BUILDER_ROOT: /builder
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
SKIP_ALL_TESTS: 1
# All vars below are from the auto-generated ./generated-linux-binary-manywheel-nightly.yml
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: cu126
GPU_ARCH_VERSION: 12.6
GPU_ARCH_TYPE: cuda
# Note: we might need to fix a specific version of this image or build one ourselves
DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.6-main
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.5.1.17; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64'
MAX_JOBS: 32
TORCH_CUDA_ARCH_LIST: "8.6;9.0+PTX"
# To publish:
CODEARTIFACT_DOMAIN: poolside
CODEARTIFACT_REPOSITORY: poolside-packages-python
concurrency:
# for now we only allow one build at a time
group: poolside-nightly-pytorch-build
cancel-in-progress: true
jobs:
build:
if: ${{ github.repository_owner == 'poolsideai' }}
runs-on: "ubuntu-22.04-64-pytorchci"
permissions: # required for AWS Credentials
id-token: write
contents: read
strategy:
fail-fast: false
matrix:
desired_python: ["3.10", "3.12"]
include:
- desired_python: "3.10"
desired_python_major: "3"
desired_python_minor: "10"
- desired_python: "3.12"
desired_python_major: "3"
desired_python_minor: "12"
env:
BUILD_NAME: manywheel-py${{ matrix.desired_python_major }}_${{ matrix.desired_python_minor }}-cuda12_6
DESIRED_PYTHON: ${{ matrix.desired_python }}
timeout-minutes: 210
steps:
- name: Make the env permanent during this workflow (but not the secrets)
shell: bash
run: |
{
echo "PYTORCH_ROOT=${{ env.PYTORCH_ROOT }}"
echo "BUILDER_ROOT=${{ env.BUILDER_ROOT }}"
echo "PACKAGE_TYPE=${{ env.PACKAGE_TYPE }}"
echo "DESIRED_CUDA=${{ env.DESIRED_CUDA }}"
echo "GPU_ARCH_VERSION=${{ env.GPU_ARCH_VERSION }}"
echo "GPU_ARCH_TYPE=${{ env.GPU_ARCH_TYPE }}"
echo "DOCKER_IMAGE=${{ env.DOCKER_IMAGE }}"
echo "SKIP_ALL_TESTS=${{ env.SKIP_ALL_TESTS }}"
echo "DESIRED_PYTHON=${{ env.DESIRED_PYTHON }}"
echo "PYTORCH_EXTRA_INSTALL_REQUIREMENTS=${{ env.PYTORCH_EXTRA_INSTALL_REQUIREMENTS }}"
echo "ANACONDA_USER=${{ env.ANACONDA_USER }}"
echo "BINARY_ENV_FILE=${{ env.BINARY_ENV_FILE }}"
echo "BUILD_ENVIRONMENT=${{ env.BUILD_ENVIRONMENT }}"
echo "BUILD_NAME=${{ env.BUILD_NAME }}"
echo "PYTORCH_FINAL_PACKAGE_DIR=${{ env.PYTORCH_FINAL_PACKAGE_DIR }}"
echo "SHA1=${{ env.SHA1 }}"
echo "MAX_JOBS=${{ env.MAX_JOBS }}"
echo "TORCH_CUDA_ARCH_LIST=${{ env.TORCH_CUDA_ARCH_LIST }}"
DATE=$(date -u +%Y%m%d)
echo "PYTORCH_BUILD_VERSION=${{ env.PYTORCH_BUILD_VERSION_PREFIX }}.$DATE"
} >> "$GITHUB_ENV"
- name: Checkout PyTorch
uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
- name: Checkout PyTorch to pytorch dir
uses: malfet/checkout@silent-checkout
with:
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
quiet-checkout: true
- name: Clean PyTorch checkout
run: |
# Remove any artifacts from the previous checkouts
git clean -fxd
working-directory: pytorch
- name: Checkout pytorch/builder to builder dir
uses: malfet/checkout@silent-checkout
with:
ref: main
submodules: recursive
repository: pytorch/builder
path: builder
quiet-checkout: true
- name: Clean pytorch/builder checkout
run: |
# Remove any artifacts from the previous checkouts
git clean -fxd
working-directory: builder
- name: Build PyTorch binary
id: package
run: |
set -x
echo "Building torch==$PYTORCH_BUILD_VERSION"
mkdir -p ${RUNNER_TEMP}/artifacts
container_name=$(docker run \
-e BINARY_ENV_FILE \
-e BUILD_ENVIRONMENT \
-e DESIRED_CUDA \
-e DESIRED_DEVTOOLSET \
-e DESIRED_PYTHON \
-e GITHUB_ACTIONS \
-e GPU_ARCH_TYPE \
-e GPU_ARCH_VERSION \
-e LIBTORCH_VARIANT \
-e PACKAGE_TYPE \
-e PYTORCH_FINAL_PACKAGE_DIR \
-e PYTORCH_ROOT \
-e BUILDER_ROOT \
-e SKIP_ALL_TESTS \
-e PYTORCH_EXTRA_INSTALL_REQUIREMENTS \
-e USE_SPLIT_BUILD \
-e MAX_JOBS \
-e TORCH_CUDA_ARCH_LIST \
-e PYTORCH_BUILD_VERSION \
--tty \
--detach \
-v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \
-v "${GITHUB_WORKSPACE}/builder:/builder" \
-v "${RUNNER_TEMP}/artifacts:/artifacts" \
-w / \
"${DOCKER_IMAGE}"
)
docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh"
if [[ ${BUILD_ENVIRONMENT} == *"aarch64"* ]]; then
docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash /pytorch/.ci/aarch64_linux/aarch64_ci_build.sh"
else
docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash /pytorch/.ci/${{ env.PACKAGE_TYPE }}/build.sh"
fi
docker exec -t "${container_name}" chown -R "$(id -u):$(id -g)" /artifacts
echo "##[set-output name=version;]$(echo ${{ runner.temp }}/artifacts/*.whl | cut -d- -f2)"
- name: Cleanup docker
if: always()
shell: bash
run: |
# stop the container for clean worker stop
# ignore expansion of "docker ps -q" since it could be empty
# shellcheck disable=SC2046
docker stop $(docker ps -q) || true
# upload to github artifacts (as we might not publish)
- uses: actions/[email protected]
if: github.event.inputs.publish == 'false'
with:
name: ${{ env.BUILD_NAME }}
if-no-files-found: error
path:
${{ runner.temp }}/artifacts/*
- name: Install publish dependencies
if: github.event.inputs.publish == 'true'
run: |
set -x
python -m pip install --upgrade pip
python -m pip install twine
sudo npm install -g badgen-cli
- name: Configure AWS credentials for publishing
if: github.event.inputs.publish == 'true'
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: arn:aws:iam::${{ secrets.AWS_ACCOUNT_ID }}:role/gh-action-publish-artifacts-role
aws-region: us-east-1
- name: Upload version badge
if: github.event.inputs.publish == 'true' && matrix.desired_python == '3.10'
run: |
set -x
badgen --subject version --status ${{ steps.package.outputs.version }} --color blue > version.svg
aws s3 cp --region us-east-2 --cache-control no-cache --acl public-read version.svg s3://pytorch-version/version.svg
- name: Publish to CodeArtifact
if: github.event.inputs.publish == 'true'
run: |
export TWINE_USERNAME=aws
export TWINE_PASSWORD=$(aws codeartifact get-authorization-token --domain ${{ env.CODEARTIFACT_DOMAIN }} --domain-owner ${{ secrets.AWS_ACCOUNT_ID }} --query authorizationToken --output text)
export TWINE_REPOSITORY_URL=$(aws codeartifact get-repository-endpoint --domain ${{ env.CODEARTIFACT_DOMAIN }} --domain-owner ${{ secrets.AWS_ACCOUNT_ID }} --repository ${{ env.CODEARTIFACT_REPOSITORY }} --region us-east-1 --format pypi --query repositoryEndpoint --output text)
set -x
twine upload --verbose ${{ runner.temp }}/artifacts/*