-
Notifications
You must be signed in to change notification settings - Fork 2.8k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add Github action workflows for running continuous tests with Pytest
Changes: - Adds `wheel_tests.yml` that will be used to run continuous jobs that builds artifacts and runs CPU/CUDA tests. Jobs will run by workflow calls to `build_artifacts.yml`/`pytest_cpu.yml`/`pytest_gpu.yml`. - Adds testing of CUDA tests on H100 gpus - Make script executable - Change the name of GPU scripts and workflows to CUDA to be more clear as to what is being tested PiperOrigin-RevId: 702497163
- Loading branch information
1 parent
043c260
commit da64a88
Showing
9 changed files
with
439 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,148 @@ | ||
# CI - Build JAX Artifacts | ||
# This workflow builds JAX wheels (jax, jaxlib, jax-cuda-plugin, and jax-cuda-pjrt) and optionally | ||
# uploads them to a Google Cloud Storage (GCS) bucket. It can be triggered manually via | ||
# workflow_dispatch or called by other workflows via workflow_call. | ||
name: CI - Build JAX Artifacts | ||
|
||
on: | ||
workflow_dispatch: | ||
inputs: | ||
runner: | ||
description: "Which runner should the workflow run on?" | ||
type: choice | ||
required: true | ||
default: "linux-x86-n2-16" | ||
options: | ||
- "linux-x86-n2-16" | ||
- "linux-arm64-c4a-64" | ||
- "windows-x86-n2-16" | ||
artifact: | ||
description: "Which JAX artifact to build?" | ||
type: choice | ||
required: true | ||
default: "jaxlib" | ||
options: | ||
- "jax" | ||
- "jaxlib" | ||
- "jax-cuda-plugin" | ||
- "jax-cuda-pjrt" | ||
python: | ||
description: "Which python version should the artifact be built for?" | ||
type: choice | ||
required: false | ||
default: "3.12" | ||
options: | ||
- "3.10" | ||
- "3.11" | ||
- "3.12" | ||
- "3.13" | ||
clone_main_xla: | ||
description: "Should latest XLA be used?" | ||
type: choice | ||
required: false | ||
default: "0" | ||
options: | ||
- "1" | ||
- "0" | ||
halt-for-connection: | ||
description: 'Should this workflow run wait for a remote connection?' | ||
type: choice | ||
required: false | ||
default: 'no' | ||
options: | ||
- 'yes' | ||
- 'no' | ||
workflow_call: | ||
inputs: | ||
runner: | ||
description: "Which runner should the workflow run on?" | ||
type: string | ||
required: true | ||
default: "linux-x86-n2-16" | ||
artifact: | ||
description: "Which JAX artifact to build?" | ||
type: string | ||
required: true | ||
default: "jaxlib" | ||
python: | ||
description: "Which python version should the artifact be built for?" | ||
type: string | ||
required: false | ||
default: "3.12" | ||
clone_main_xla: | ||
description: "Should latest XLA be used?" | ||
type: string | ||
required: false | ||
default: "0" | ||
upload_artifacts: | ||
description: "Should the artifacts be uploaded to a GCS bucket?" | ||
required: true | ||
default: true | ||
type: boolean | ||
upload_url_prefix: | ||
description: "GCS location prefix to where the artifacts should be uploaded" | ||
required: true | ||
default: 'gs://general-ml-ci-transient/jax-github-actions/jax/${{ github.workflow }}/${{ github.run_number }}' | ||
type: string | ||
|
||
permissions: | ||
contents: read | ||
|
||
jobs: | ||
build-artifacts: | ||
defaults: | ||
run: | ||
# Explicitly set the shell to bash to override Windows's default (cmd) | ||
shell: bash | ||
|
||
runs-on: ${{ inputs.runner }} | ||
|
||
container: ${{ (contains(inputs.runner, 'linux-x86') && 'us-central1-docker.pkg.dev/tensorflow-sigs/tensorflow/ml-build:latest') || | ||
(contains(inputs.runner, 'linux-arm64') && 'us-central1-docker.pkg.dev/tensorflow-sigs/tensorflow/ml-build-arm64:latest') || | ||
(contains(inputs.runner, 'windows-x86') && null) }} | ||
|
||
env: | ||
JAXCI_HERMETIC_PYTHON_VERSION: "${{ inputs.python }}" | ||
JAXCI_CLONE_MAIN_XLA: "${{ inputs.clone_main_xla }}" | ||
|
||
name: Build ${{ inputs.artifact }} (${{ inputs.runner }}, Python ${{ inputs.python }}, clone main XLA=${{ inputs.clone_main_xla }}) | ||
|
||
steps: | ||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 | ||
|
||
- name: Enable RBE if building on Linux x86 or Windows x86 | ||
if: contains(inputs.runner, 'linux-x86') || contains(inputs.runner, 'windows-x86') | ||
run: echo "JAXCI_BUILD_ARTIFACT_WITH_RBE=1" >> $GITHUB_ENV | ||
|
||
# Halt for testing | ||
- name: Wait For Connection | ||
uses: google-ml-infra/actions/ci_connection@main | ||
with: | ||
halt-dispatch-input: ${{ inputs.halt-for-connection }} | ||
|
||
- name: Build ${{ inputs.artifact }} | ||
run: ./ci/build_artifacts.sh "${{ inputs.artifact }}" | ||
|
||
- name: Set PLATFORM env var for use in artifact upload URL | ||
run: | | ||
os=$(uname -s | awk '{print tolower($0)}') | ||
arch=$(uname -m) | ||
# Adjust name for Windows | ||
if [[ $os =~ "msys_nt" ]]; then | ||
os="windows" | ||
fi | ||
echo "PLATFORM=${os}_${arch}" >> $GITHUB_ENV | ||
- name: Upload artifacts to a GCS bucket (non-Windows runs) | ||
if: >- | ||
${{ inputs.upload_artifacts && !contains(inputs.runner, 'windows-x86') }} | ||
run: gsutil -m cp -r $(pwd)/dist/*.whl "${{ inputs.upload_url_prefix }}"/$PLATFORM/python${JAXCI_HERMETIC_PYTHON_VERSION}/ | ||
|
||
# Set shell to cmd to avoid path errors when using gcloud commands on Windows | ||
- name: Upload artifacts to a GCS bucket (Windows runs) | ||
if: >- | ||
${{ inputs.upload_artifacts && contains(inputs.runner, 'windows-x86') }} | ||
shell: cmd | ||
run: gsutil -m cp -r dist/*.whl "${{ inputs.upload_url_prefix }}"/$PLATFORM/python${JAXCI_HERMETIC_PYTHON_VERSION}/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
# CI - Pytest CPU | ||
# | ||
# This workflow runs the CPU tests with Pytest. It can only be triggered by other workflows via | ||
# `workflow_call`. It is used by the `wheel_tests.yml` workflow to run the Pytest CPU tests as a | ||
# continuous job. | ||
# | ||
# It consists of the following job: | ||
# run-tests: | ||
# - Downloads the jaxlib wheel from a GCS bucket. This wheel is usually built by the | ||
# build-jaxlib-artifact job in the `wheel_tests.yml` workflow. | ||
# - Executes the `run_pytest_cpu.sh` script, which performs the following actions: | ||
# - Installs the downloaded jaxlib wheel. | ||
# - Runs the CPU tests with Pytest. | ||
name: CI - Pytest CPU | ||
|
||
on: | ||
workflow_call: | ||
inputs: | ||
runner: | ||
description: "Which runner should the workflow run on?" | ||
type: string | ||
required: true | ||
default: "linux-x86-n2-16" | ||
python: | ||
description: "Which python version should the artifact be built for?" | ||
type: string | ||
required: true | ||
default: "3.12" | ||
enable-x64: | ||
description: "Should x64 mode be enabled?" | ||
type: string | ||
required: true | ||
default: "0" | ||
download_url_prefix: | ||
description: "GCS location prefix from where the artifacts should be downloaded" | ||
required: true | ||
default: 'gs://general-ml-ci-transient/jax-github-actions/jax/${{ github.workflow }}/${{ github.run_number }}/${{ github.run_attempt }}' | ||
type: string | ||
halt-for-connection: | ||
description: 'Should this workflow run wait for a remote connection?' | ||
type: boolean | ||
required: false | ||
default: false | ||
|
||
jobs: | ||
run-tests: | ||
defaults: | ||
run: | ||
# Explicitly set the shell to bash to override Windows's default (cmd) | ||
shell: bash | ||
runs-on: ${{ inputs.runner }} | ||
container: ${{ (contains(inputs.runner, 'linux-x86') && 'us-central1-docker.pkg.dev/tensorflow-sigs/tensorflow/ml-build:latest') || | ||
(contains(inputs.runner, 'linux-arm64') && 'us-central1-docker.pkg.dev/tensorflow-sigs/tensorflow/ml-build-arm64:latest') || | ||
(contains(inputs.runner, 'windows-x86') && null) }} | ||
|
||
name: "Pytest CPU (${{ inputs.runner }}, Python ${{ inputs.python }}, x64=${{ inputs.enable-x64 }})" | ||
|
||
env: | ||
JAXCI_HERMETIC_PYTHON_VERSION: "${{ inputs.python }}" | ||
JAXCI_PYTHON: "python${{ inputs.python }}" | ||
JAXCI_ENABLE_X64: "${{ inputs.enable-x64 }}" | ||
|
||
steps: | ||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 | ||
# Halt for testing | ||
- name: Wait For Connection | ||
uses: google-ml-infra/actions/ci_connection@main | ||
with: | ||
halt-dispatch-input: ${{ inputs.halt-for-connection }} | ||
- name: Set Platform env var for use in artifact download URL | ||
run: | | ||
os=$(uname -s | awk '{print tolower($0)}') | ||
arch=$(uname -m) | ||
# Adjust name for Windows | ||
if [[ $os =~ "msys_nt" ]]; then | ||
os="windows" | ||
fi | ||
echo "PLATFORM=${os}_${arch}" >> $GITHUB_ENV | ||
- name: Download jaxlib wheel from GCS (non-Windows runs) | ||
if: ${{ !contains(matrix.runner, 'windows-x86') }} | ||
run: >- | ||
mkdir -p $(pwd)/dist && | ||
gsutil -m cp -r "${{ inputs.download_url_prefix }}"/$PLATFORM/python${JAXCI_HERMETIC_PYTHON_VERSION}/jaxlib*.whl $(pwd)/dist/ | ||
- name: Download jaxlib wheel from GCS (Windows runs) | ||
if: ${{ contains(matrix.runner, 'windows-x86') }} | ||
shell: cmd | ||
run: >- | ||
mkdir dist && | ||
gsutil -m cp -r "${{ inputs.download_url_prefix }}"/$PLATFORM/python${JAXCI_HERMETIC_PYTHON_VERSION}/jaxlib*.whl dist/ | ||
- name: Install Python dependencies | ||
run: $JAXCI_PYTHON -m pip install -r build/requirements.in | ||
- name: Run Pytest CPU tests | ||
run: ./ci/run_pytest_cpu.sh |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
# CI - Pytest CUDA | ||
# | ||
# This workflow runs the CUDA tests with Pytest. It can only be triggered by other workflows via | ||
# `workflow_call`. It is used by the `wheel_tests.yml` workflow to run the Pytest CUDA tests as a | ||
# continuous job. | ||
# | ||
# It consists of the following job: | ||
# run-tests: | ||
# - Downloads the jaxlib and CUDA artifacts from a GCS bucket. These wheels are usually built by | ||
# the artifact build jobs in the `wheel_tests.yml` workflow. | ||
# - Executes the `run_pytest_cuda.sh` script, which performs the following actions: | ||
# - Installs the downloaded jaxlib wheel. | ||
# - Runs the CUDA tests with Pytest. | ||
name: CI - Pytest CUDA | ||
|
||
on: | ||
workflow_call: | ||
inputs: | ||
runner: | ||
description: "Which runner should the workflow run on?" | ||
type: string | ||
required: true | ||
default: "linux-x86-n2-16" | ||
python: | ||
description: "Which python version to test?" | ||
type: string | ||
required: true | ||
default: "3.12" | ||
cuda: | ||
description: "Which CUDA version to test?" | ||
type: string | ||
required: true | ||
default: "12.3" | ||
enable-x64: | ||
description: "Should x64 mode be enabled?" | ||
type: string | ||
required: true | ||
default: "0" | ||
download_url_prefix: | ||
description: "GCS location prefix from where the artifacts should be downloaded" | ||
required: true | ||
default: 'gs://general-ml-ci-transient/jax-github-actions/jax/${{ github.workflow }}/${{ github.run_number }}/${{ github.run_attempt }}' | ||
type: string | ||
halt-for-connection: | ||
description: 'Should this workflow run wait for a remote connection?' | ||
type: boolean | ||
required: false | ||
default: false | ||
|
||
jobs: | ||
run-tests: | ||
runs-on: ${{ inputs.runner }} | ||
# TODO: Update to the generic ML ecosystem test containers when they are ready. | ||
container: ${{ (contains(inputs.cuda, '12.3') && 'us-central1-docker.pkg.dev/tensorflow-sigs/tensorflow/nosla-cuda12.3-cudnn9.1-ubuntu20.04-manylinux2014-multipython:latest') || | ||
(contains(inputs.cuda, '12.1') && 'us-central1-docker.pkg.dev/tensorflow-sigs/tensorflow/nosla-cuda12.1-cudnn9.1-ubuntu20.04-manylinux2014-multipython:latest') }} | ||
name: "Pytest CUDA (${{ inputs.runner }}, CUDA ${{ inputs.cuda }}, Python ${{ inputs.python }}, x64=${{ inputs.enable-x64 }})" | ||
|
||
env: | ||
JAXCI_HERMETIC_PYTHON_VERSION: "${{ inputs.python }}" | ||
JAXCI_PYTHON: "python${{ inputs.python }}" | ||
JAXCI_ENABLE_X64: "${{ inputs.enable-x64 }}" | ||
|
||
steps: | ||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 | ||
# Halt for testing | ||
- name: Wait For Connection | ||
uses: google-ml-infra/actions/ci_connection@main | ||
with: | ||
halt-dispatch-input: ${{ inputs.halt-for-connection }} | ||
- name: Set Platform env var for use in artifact download URL | ||
run: | | ||
os=$(uname -s | awk '{print tolower($0)}') | ||
arch=$(uname -m) | ||
echo "PLATFORM=${os}_${arch}" >> $GITHUB_ENV | ||
- name: Download artifacts from GCS | ||
run: mkdir -p $(pwd)/dist && gsutil -m cp -r "${{ inputs.download_url_prefix }}"/$PLATFORM/python${JAXCI_HERMETIC_PYTHON_VERSION}/*.whl $(pwd)/dist/ | ||
- name: Install Python dependencies | ||
run: $JAXCI_PYTHON -m pip install -r build/requirements.in | ||
- name: Run Pytest CUDA tests | ||
run: ./ci/run_pytest_cuda.sh |
Oops, something went wrong.