From 2edcee37d9fbe56b8385c42c171c689da02dbc41 Mon Sep 17 00:00:00 2001 From: Ayush Ranjan Date: Tue, 7 Jan 2025 23:24:50 -0800 Subject: [PATCH] Specify all capabilities in TestGPUCheckpointRestore. This test was broken by 5e6589e0b7b1 ("Update CUDA test compatibility to keep up with added gVisor support.") which requires all images/gpu/cuda-tests/run_sample.go users to specify "all" driver capabilities. PiperOrigin-RevId: 713170590 --- pkg/test/dockerutil/gpu.go | 11 +++++------ test/gpu/cuda_test.go | 4 ++-- test/gpu/ffmpeg_test.go | 4 ++-- test/gpu/smoke_test.go | 2 +- test/gpu/sr_test.go | 4 +++- 5 files changed, 13 insertions(+), 12 deletions(-) diff --git a/pkg/test/dockerutil/gpu.go b/pkg/test/dockerutil/gpu.go index 9c46bb2045..910b030f1c 100644 --- a/pkg/test/dockerutil/gpu.go +++ b/pkg/test/dockerutil/gpu.go @@ -41,13 +41,12 @@ const ( ) const ( - // AllGPUCapabilities is the environment variable that enables all NVIDIA + // AllGPUCapabilitiesEnv is the environment variable that enables all NVIDIA // GPU capabilities within a container. - AllGPUCapabilities = "NVIDIA_DRIVER_CAPABILITIES=all" + AllGPUCapabilitiesEnv = "NVIDIA_DRIVER_CAPABILITIES=all" - // DefaultGPUCapabilities is the environment variable that enables default - // NVIDIA GPU capabilities within a container. - DefaultGPUCapabilities = "NVIDIA_DRIVER_CAPABILITIES=compute,utility" + // DefaultGPUCapabilities are the driver capabilities enabled by default. + DefaultGPUCapabilities = "compute,utility" ) // GPURunOpts returns Docker run options with GPU support enabled. @@ -79,7 +78,7 @@ func GPURunOpts(sniffGPUOpts SniffGPUOpts) (RunOpts, error) { ReadOnly: true, }) } - gpuEnv := []string{sniffGPUOpts.GPUCapabilities()} + gpuEnv := []string{"NVIDIA_DRIVER_CAPABILITIES=" + sniffGPUOpts.GPUCapabilities()} if !*setCOSGPU { return RunOpts{ diff --git a/test/gpu/cuda_test.go b/test/gpu/cuda_test.go index 0e8709bced..1944459594 100644 --- a/test/gpu/cuda_test.go +++ b/test/gpu/cuda_test.go @@ -433,7 +433,7 @@ func (*FullyCompatible) IsExpectedFailure(ctx context.Context, env *TestEnvironm // getContainerOpts returns the container run options to run CUDA tests. func getContainerOpts() (dockerutil.RunOpts, error) { opts, err := dockerutil.GPURunOpts(dockerutil.SniffGPUOpts{ - Capabilities: dockerutil.AllGPUCapabilities, + Capabilities: "all", }) if err != nil { return dockerutil.RunOpts{}, fmt.Errorf("failed to get GPU run options: %w", err) @@ -885,7 +885,7 @@ func TestCUDA(t *testing.T) { t.Errorf( " $ docker run --runtime=%s --gpus=all -e %s --rm %s /run_sample %s", dockerutil.Runtime(), - dockerutil.AllGPUCapabilities, + dockerutil.AllGPUCapabilitiesEnv, runOpts.Image, failedTests[0], ) diff --git a/test/gpu/ffmpeg_test.go b/test/gpu/ffmpeg_test.go index 9c819791fe..a6aaa32b5c 100644 --- a/test/gpu/ffmpeg_test.go +++ b/test/gpu/ffmpeg_test.go @@ -34,7 +34,7 @@ func TestFffmpegEncodeGPU(t *testing.T) { container := dockerutil.MakeContainer(ctx, t) defer container.CleanUp(ctx) opts, err := dockerutil.GPURunOpts(dockerutil.SniffGPUOpts{ - Capabilities: "NVIDIA_DRIVER_CAPABILITIES=video", + Capabilities: "video", AllowIncompatibleIoctl: true, // TODO(gvisor.dev/issue/9452): Remove once supported in gVisor. }) if err != nil { @@ -60,7 +60,7 @@ func TestFffmpegDecodeGPU(t *testing.T) { container := dockerutil.MakeContainer(ctx, t) defer container.CleanUp(ctx) opts, err := dockerutil.GPURunOpts(dockerutil.SniffGPUOpts{ - Capabilities: "NVIDIA_DRIVER_CAPABILITIES=video", + Capabilities: "video", AllowIncompatibleIoctl: true, // TODO(gvisor.dev/issue/9452): Remove once supported in gVisor. }) if err != nil { diff --git a/test/gpu/smoke_test.go b/test/gpu/smoke_test.go index ffa7a66f9c..40ea8be3d7 100644 --- a/test/gpu/smoke_test.go +++ b/test/gpu/smoke_test.go @@ -28,7 +28,7 @@ func TestGPUHello(t *testing.T) { defer c.CleanUp(ctx) opts, err := dockerutil.GPURunOpts(dockerutil.SniffGPUOpts{ - Capabilities: dockerutil.AllGPUCapabilities, + Capabilities: "all", }) if err != nil { t.Fatalf("failed to get GPU run options: %v", err) diff --git a/test/gpu/sr_test.go b/test/gpu/sr_test.go index 9fd9b5db61..b08498a611 100644 --- a/test/gpu/sr_test.go +++ b/test/gpu/sr_test.go @@ -35,7 +35,9 @@ func TestGPUCheckpointRestore(t *testing.T) { c := dockerutil.MakeContainer(ctx, t) defer c.CleanUp(ctx) - opts, err := dockerutil.GPURunOpts(dockerutil.SniffGPUOpts{}) + opts, err := dockerutil.GPURunOpts(dockerutil.SniffGPUOpts{ + Capabilities: "all", + }) if err != nil { t.Fatalf("failed to get GPU run options: %v", err) }