From 311b2571375f300622a1b463b88d0ec5a1b84122 Mon Sep 17 00:00:00 2001 From: Etienne Perot Date: Fri, 1 Nov 2024 19:02:11 -0700 Subject: [PATCH] Add CUDA tests to release pipeline. These run in compatibility-checking mode, meaning that they run tests which are expected to fail and ensure that these tests still fail. This takes even longer than when not checking this, so these run only as part of the release pipeline. PiperOrigin-RevId: 692355383 --- .buildkite/release.yaml | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/.buildkite/release.yaml b/.buildkite/release.yaml index 53f4a6276a..a355d5b14d 100644 --- a/.buildkite/release.yaml +++ b/.buildkite/release.yaml @@ -1,15 +1,16 @@ agents: queue: release _templates: + retry_settings: &retry_settings + automatic: + - exit_status: -1 + limit: 10 + - exit_status: "*" + limit: 2 common: &common timeout_in_minutes: 180 retry: - automatic: - - exit_status: -1 - limit: 10 - - exit_status: "*" - limit: 2 - + <<: *retry_settings notify: - email: "gvisor-eng+buildkite@google.com" if: build.state == "failed" @@ -75,6 +76,18 @@ steps: - make gpu-all-tests agents: queue: gpu + - label: ":fish: CUDA tests" + # This is its own test rather than being part of the GPU tests, + # because it takes around 30 minutes to run. + parallelism: 32 + timeout_in_minutes: 120 + retry: + <<: *retry_settings + commands: + - make sudo TARGETS=//tools/gpu:main ARGS="install --latest" || cat /var/log/nvidia-installer.log + - make cuda-tests ARGS="--cuda_verify_compatibility=true" + agents: + queue: gpu - <<: *common label: ":screwdriver: All GPU Drivers Test" parallelism: 8