From 2e05b3db2665edb461cbadd984749e00d37c4e6f Mon Sep 17 00:00:00 2001 From: Chen Sun Date: Thu, 21 Nov 2024 09:15:11 -0800 Subject: [PATCH] chore(components): User virtual env in GCPC Dockerfile and update incompatible dependencies Signed-off-by: Chen Sun PiperOrigin-RevId: 698806473 --- components/google-cloud/Dockerfile | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/components/google-cloud/Dockerfile b/components/google-cloud/Dockerfile index c63557c3dec..595241ca81e 100644 --- a/components/google-cloud/Dockerfile +++ b/components/google-cloud/Dockerfile @@ -14,12 +14,20 @@ # Base image to use for this docker FROM marketplace.gcr.io/google/ubuntu2404:latest -RUN apt update && apt -y install python3 python3-pip git +RUN apt update && apt -y install git python3 \ + python3-pip \ + python3-venv WORKDIR /root -# Upgrade pip to latest -RUN pip3 install --upgrade pip +# Create a virtual environment +RUN python3 -m venv venv + +# Activate the virtual environment +ENV PATH="venv/bin:$PATH" + +# Python 3.12 removed distutils +RUN pip3 install -U pip setuptools # Required by gcp_launcher # Using google-cloud-aiplatform>=1.21.0 to avoid dataset creatation timeout @@ -28,17 +36,10 @@ RUN pip3 install -U google-cloud-storage RUN pip3 install -U google-api-python-client # Required by dataflow_launcher -# Pin to `2.50.0` for compatibility with `google-cloud-aiplatform`, which -# depends on `shapely<3.0.0dev`. -# Prefer an exact pin, since GCPC's apache_beam version must match the -# version the in custom Dataflow worker images for the Dataflow job to succeed. -# Inexact pins risk that the apache_beam in GCPC drifts away from a -# user-specified version in the image. -# From docs: """When running your pipeline, launch the pipeline using the Apache Beam SDK with the same version and language version as the SDK on your custom container image. This step avoids unexpected errors from incompatible dependencies or SDKs.""" https://cloud.google.com/dataflow/docs/guides/using-custom-containers#before_you_begin_2 -RUN pip3 install -U "apache_beam[gcp]==2.50.0" +RUN pip3 install -U apache_beam # Required for sklearn/train_test_split_jsonl -RUN pip3 install -U "fsspec>=0.7.4" "gcsfs>=0.6.0" "pandas<=1.3.5" "scikit-learn<=1.0.2" +RUN pip3 install -U "fsspec>=0.7.4" "gcsfs>=0.6.0" "pandas<=1.3.5" "scikit-learn" # Required by experimental.notebooks.NotebooksExecutorOp RUN pip3 install -U google-cloud-notebooks