From 3869e11e014bd989f02dc5bfc13c060cc10b4b72 Mon Sep 17 00:00:00 2001 From: Maroun Touma Date: Tue, 17 Dec 2024 21:43:23 -0500 Subject: [PATCH 1/6] trigger a test of all transforms Signed-off-by: Maroun Touma --- transforms/.make.cicd.targets | 1 + 1 file changed, 1 insertion(+) diff --git a/transforms/.make.cicd.targets b/transforms/.make.cicd.targets index 23475f57f..95f310a4e 100644 --- a/transforms/.make.cicd.targets +++ b/transforms/.make.cicd.targets @@ -17,6 +17,7 @@ TRANSFORM_RAY_SRC?="-m dpk_$(TRANSFORM_NAME).ray.transform" TRANSFORM_SPARK_SRC?="-m dpk_$(TRANSFORM_NAME).spark.transform" + venv:: .defaults.create-venv source venv/bin/activate && $(PIP) install -e $(REPOROOT)/data-processing-lib[ray,spark] source venv/bin/activate && $(PIP) install -e $(REPOROOT)/data-connector-lib From 07904bbe7ba412c7493d93997febe6fbc8b1147a Mon Sep 17 00:00:00 2001 From: Maroun Touma Date: Tue, 17 Dec 2024 21:46:19 -0500 Subject: [PATCH 2/6] trigger a full suute test Signed-off-by: Maroun Touma --- .make.defaults | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.make.defaults b/.make.defaults index 80df91c8e..4674326d8 100644 --- a/.make.defaults +++ b/.make.defaults @@ -231,7 +231,7 @@ __check_defined = \ --build-arg GIT_COMMIT=$(shell git log -1 --format=%h) . $(DOCKER) tag $(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_VERSION) $(DOCKER_REGISTRY_ENDPOINT)/$(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_VERSION) - + # Copy a source tree in LIB_PATH, including src, pyproject.toml to LIB_NAME # Generally used to copy source from within the repo into a local directory for use by a Dockerfile .PHONY: .defaults.copy-lib From eb5f9894163f8bbcf88b0a98aeefbabaf279b231 Mon Sep 17 00:00:00 2001 From: Maroun Touma Date: Wed, 18 Dec 2024 07:07:02 -0500 Subject: [PATCH 3/6] remove remote image from .make.defaults Signed-off-by: Maroun Touma --- .make.defaults | 3 +-- kfp/kfp_ray_components/Makefile | 3 +++ transforms/.make.cicd.targets | 3 ++- transforms/.make.transforms | 4 ++++ 4 files changed, 10 insertions(+), 3 deletions(-) diff --git a/.make.defaults b/.make.defaults index 4674326d8..8bb53e487 100644 --- a/.make.defaults +++ b/.make.defaults @@ -47,7 +47,6 @@ DOCKER_REGISTRY_USER?=$(DPK_DOCKER_REGISTRY_USER) DOCKER_REGISTRY_KEY?=$(DPK_DOCKER_REGISTRY_KEY) DOCKER_REGISTRY_ENDPOINT?=$(DOCKER_HOSTNAME)/$(DOCKER_NAMESPACE) DOCKER_LOCAL_IMAGE=$(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_VERSION) -DOCKER_REMOTE_IMAGE=$(DOCKER_REGISTRY_ENDPOINT)/$(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_VERSION) DOCKER_SPARK_BASE_IMAGE_NAME=data-prep-kit-spark-$(SPARK_VERSION) DOCKER_SPARK_BASE_IMAGE=$(DOCKER_SPARK_BASE_IMAGE_NAME):$(DOCKER_IMAGE_VERSION) RAY_BASE_IMAGE?=docker.io/rayproject/ray:${RAY}-py310 @@ -231,7 +230,7 @@ __check_defined = \ --build-arg GIT_COMMIT=$(shell git log -1 --format=%h) . $(DOCKER) tag $(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_VERSION) $(DOCKER_REGISTRY_ENDPOINT)/$(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_VERSION) - + # Copy a source tree in LIB_PATH, including src, pyproject.toml to LIB_NAME # Generally used to copy source from within the repo into a local directory for use by a Dockerfile .PHONY: .defaults.copy-lib diff --git a/kfp/kfp_ray_components/Makefile b/kfp/kfp_ray_components/Makefile index 17488bc2e..cac6047ce 100644 --- a/kfp/kfp_ray_components/Makefile +++ b/kfp/kfp_ray_components/Makefile @@ -16,6 +16,9 @@ else DOCKER_IMAGE_NAME=kfp-data-processing endif +## removed from .make.defaults and set here before loading in kind +DOCKER_REMOTE_IMAGE=$(DOCKER_REGISTRY_ENDPOINT)/$(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_VERSION) + .PHONY: .lib-src-image .lib-src-image:: .default.build-lib-wheel @$(eval LIB_WHEEL_FILE := $(shell find data-processing-dist/*.whl)) diff --git a/transforms/.make.cicd.targets b/transforms/.make.cicd.targets index 95f310a4e..9d2780849 100644 --- a/transforms/.make.cicd.targets +++ b/transforms/.make.cicd.targets @@ -17,7 +17,6 @@ TRANSFORM_RAY_SRC?="-m dpk_$(TRANSFORM_NAME).ray.transform" TRANSFORM_SPARK_SRC?="-m dpk_$(TRANSFORM_NAME).spark.transform" - venv:: .defaults.create-venv source venv/bin/activate && $(PIP) install -e $(REPOROOT)/data-processing-lib[ray,spark] source venv/bin/activate && $(PIP) install -e $(REPOROOT)/data-connector-lib @@ -67,6 +66,7 @@ test-image:: .default.build-lib-wheel $(MAKE) DOCKER_FILE=Dockerfile.python \ TRANSFORM_RUNTIME_SRC_FILE=$(TRANSFORM_PYTHON_SRC) \ DOCKER_IMAGE_NAME=$(TRANSFORM_NAME)-python \ + DOCKER_REMOTE_IMAGE=$(DOCKER_REGISTRY_ENDPOINT)/$(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_VERSION) \ test-image-sequence ; \ fi ;\ fi @@ -76,6 +76,7 @@ test-image:: .default.build-lib-wheel TRANSFORM_RUNTIME_SRC_FILE=$(TRANSFORM_RAY_SRC) \ DOCKER_IMAGE_NAME=$(TRANSFORM_NAME)-ray \ BASE_IMAGE=$(RAY_BASE_IMAGE) \ + DOCKER_REMOTE_IMAGE=$(DOCKER_REGISTRY_ENDPOINT)/$(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_VERSION) \ test-image-sequence ; \ fi ;\ fi diff --git a/transforms/.make.transforms b/transforms/.make.transforms index 247ea2ae4..860d08a46 100644 --- a/transforms/.make.transforms +++ b/transforms/.make.transforms @@ -35,6 +35,10 @@ DOCKER_IMAGE_NAME?=$(TRANSFORM_NAME)-$(TRANSFORM_RUNTIME) TRANSFORM_RUNTIME_SRC_FILE?=$(TRANSFORM_NAME)_transform_$(TRANSFORM_RUNTIME).py TRANSFORM_TEST_FILE?=test/test_$(TRANSFORM_NAME).py +## Used when publish or building the image by legacy tansforms. +## Will be removed when all transfdorms get refactored +DOCKER_REMOTE_IMAGE=$(DOCKER_REGISTRY_ENDPOINT)/$(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_VERSION) + # The following taken from https://stackoverflow.com/a/65243296/45375 # Lists all targets and optional help text found in the target. # :: rule means we first run the help from the main make.defaults. From 171242de45b2712c1477136697757410876e7a05 Mon Sep 17 00:00:00 2001 From: Maroun Touma Date: Thu, 19 Dec 2024 06:19:06 -0500 Subject: [PATCH 4/6] fix typo Signed-off-by: Maroun Touma --- transforms/.make.transforms | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/transforms/.make.transforms b/transforms/.make.transforms index 860d08a46..8d4589a65 100644 --- a/transforms/.make.transforms +++ b/transforms/.make.transforms @@ -36,7 +36,7 @@ TRANSFORM_RUNTIME_SRC_FILE?=$(TRANSFORM_NAME)_transform_$(TRANSFORM_RUNTIME).py TRANSFORM_TEST_FILE?=test/test_$(TRANSFORM_NAME).py ## Used when publish or building the image by legacy tansforms. -## Will be removed when all transfdorms get refactored +## Will be removed when all transforms get refactored DOCKER_REMOTE_IMAGE=$(DOCKER_REGISTRY_ENDPOINT)/$(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_VERSION) # The following taken from https://stackoverflow.com/a/65243296/45375 From 31fb868102ca670346c238850336e2bb38d4d99f Mon Sep 17 00:00:00 2001 From: Maroun Touma Date: Thu, 19 Dec 2024 06:27:15 -0500 Subject: [PATCH 5/6] remove REMOTE_IMAGE from workflow-test Signed-off-by: Maroun Touma --- transforms/language/doc_chunk/kfp_ray/Makefile | 3 +-- .../language/doc_quality/kfp_ray/Makefile | 1 - .../language/html2parquet/kfp_ray/Makefile | 18 +----------------- transforms/language/lang_id/kfp_ray/Makefile | 1 - .../language/pdf2parquet/kfp_ray/Makefile | 3 +-- .../language/text_encoder/kfp_ray/Makefile | 3 +-- transforms/universal/doc_id/kfp_ray/Makefile | 16 ---------------- transforms/universal/ededup/kfp_ray/Makefile | 1 - transforms/universal/hap/kfp_ray/Makefile | 1 - 9 files changed, 4 insertions(+), 43 deletions(-) diff --git a/transforms/language/doc_chunk/kfp_ray/Makefile b/transforms/language/doc_chunk/kfp_ray/Makefile index fcc12450d..7244ce142 100644 --- a/transforms/language/doc_chunk/kfp_ray/Makefile +++ b/transforms/language/doc_chunk/kfp_ray/Makefile @@ -33,8 +33,7 @@ workflow-test: workflow-build TRANSFORM_RUNTIME=$(TRANSFORM_RUNTIME) \ TRANSFORM_NAME=$(TRANSFORM_NAME) \ BUILD_SPECIFIC_RUNTIME=$(BUILD_SPECIFIC_RUNTIME) \ - DOCKER_REMOTE_IMAGE=$(DOCKER_REGISTRY_ENDPOINT)/$(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_VERSION) \ - PIPELINE_FILE=doc_chunk_wf.yaml .workflows.test-pipeline + PIPELINE_FILE=$(TRANSFORM_NAME)_wf.yaml .workflows.test-pipeline .PHONY: workflow-upload workflow-upload: workflow-build diff --git a/transforms/language/doc_quality/kfp_ray/Makefile b/transforms/language/doc_quality/kfp_ray/Makefile index 858db1b0a..0c831e1c7 100644 --- a/transforms/language/doc_quality/kfp_ray/Makefile +++ b/transforms/language/doc_quality/kfp_ray/Makefile @@ -33,7 +33,6 @@ workflow-test: workflow-build TRANSFORM_RUNTIME=$(TRANSFORM_RUNTIME) \ TRANSFORM_NAME=$(TRANSFORM_NAME) \ BUILD_SPECIFIC_RUNTIME=$(BUILD_SPECIFIC_RUNTIME) \ - DOCKER_REMOTE_IMAGE=$(DOCKER_REGISTRY_ENDPOINT)/$(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_VERSION) \ PIPELINE_FILE=$(TRANSFORM_NAME)_wf.yaml .workflows.test-pipeline .PHONY: workflow-upload diff --git a/transforms/language/html2parquet/kfp_ray/Makefile b/transforms/language/html2parquet/kfp_ray/Makefile index 882f9a0ca..c2fb8dbdd 100644 --- a/transforms/language/html2parquet/kfp_ray/Makefile +++ b/transforms/language/html2parquet/kfp_ray/Makefile @@ -27,21 +27,6 @@ clean: @# Help: Clean up the virtual environment. rm -rf ${REPOROOT}/transforms/venv -venv:: - -build:: - -test:: - -test-src:: - -test-image:: - -publish:: - -image:: - -load-image:: .PHONY: workflow-build workflow-build: workflow-venv @@ -53,8 +38,7 @@ workflow-test: workflow-build TRANSFORM_RUNTIME=$(TRANSFORM_RUNTIME) \ TRANSFORM_NAME=$(TRANSFORM_NAME) \ BUILD_SPECIFIC_RUNTIME=$(BUILD_SPECIFIC_RUNTIME) \ - DOCKER_REMOTE_IMAGE=$(DOCKER_REGISTRY_ENDPOINT)/$(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_VERSION) \ - PIPELINE_FILE=html2parquet_wf.yaml .workflows.test-pipeline + PIPELINE_FILE=$(TRANSFORM_NAME)_wf.yaml .workflows.test-pipeline .PHONY: workflow-upload workflow-upload: diff --git a/transforms/language/lang_id/kfp_ray/Makefile b/transforms/language/lang_id/kfp_ray/Makefile index 7c3da2456..7244ce142 100644 --- a/transforms/language/lang_id/kfp_ray/Makefile +++ b/transforms/language/lang_id/kfp_ray/Makefile @@ -33,7 +33,6 @@ workflow-test: workflow-build TRANSFORM_RUNTIME=$(TRANSFORM_RUNTIME) \ TRANSFORM_NAME=$(TRANSFORM_NAME) \ BUILD_SPECIFIC_RUNTIME=$(BUILD_SPECIFIC_RUNTIME) \ - DOCKER_REMOTE_IMAGE=$(DOCKER_REGISTRY_ENDPOINT)/$(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_VERSION) \ PIPELINE_FILE=$(TRANSFORM_NAME)_wf.yaml .workflows.test-pipeline .PHONY: workflow-upload diff --git a/transforms/language/pdf2parquet/kfp_ray/Makefile b/transforms/language/pdf2parquet/kfp_ray/Makefile index bf55c9b9c..0c831e1c7 100644 --- a/transforms/language/pdf2parquet/kfp_ray/Makefile +++ b/transforms/language/pdf2parquet/kfp_ray/Makefile @@ -33,8 +33,7 @@ workflow-test: workflow-build TRANSFORM_RUNTIME=$(TRANSFORM_RUNTIME) \ TRANSFORM_NAME=$(TRANSFORM_NAME) \ BUILD_SPECIFIC_RUNTIME=$(BUILD_SPECIFIC_RUNTIME) \ - DOCKER_REMOTE_IMAGE=$(DOCKER_REGISTRY_ENDPOINT)/$(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_VERSION) \ - PIPELINE_FILE=pdf2parquet_wf.yaml .workflows.test-pipeline + PIPELINE_FILE=$(TRANSFORM_NAME)_wf.yaml .workflows.test-pipeline .PHONY: workflow-upload workflow-upload: workflow-build diff --git a/transforms/language/text_encoder/kfp_ray/Makefile b/transforms/language/text_encoder/kfp_ray/Makefile index 049e3e59b..7244ce142 100644 --- a/transforms/language/text_encoder/kfp_ray/Makefile +++ b/transforms/language/text_encoder/kfp_ray/Makefile @@ -33,8 +33,7 @@ workflow-test: workflow-build TRANSFORM_RUNTIME=$(TRANSFORM_RUNTIME) \ TRANSFORM_NAME=$(TRANSFORM_NAME) \ BUILD_SPECIFIC_RUNTIME=$(BUILD_SPECIFIC_RUNTIME) \ - DOCKER_REMOTE_IMAGE=$(DOCKER_REGISTRY_ENDPOINT)/$(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_VERSION) \ - PIPELINE_FILE=text_encoder_wf.yaml .workflows.test-pipeline + PIPELINE_FILE=$(TRANSFORM_NAME)_wf.yaml .workflows.test-pipeline .PHONY: workflow-upload workflow-upload: workflow-build diff --git a/transforms/universal/doc_id/kfp_ray/Makefile b/transforms/universal/doc_id/kfp_ray/Makefile index be5a2144f..dc693bc6e 100644 --- a/transforms/universal/doc_id/kfp_ray/Makefile +++ b/transforms/universal/doc_id/kfp_ray/Makefile @@ -27,21 +27,6 @@ clean: @# Help: Clean up the virtual environment. rm -rf ${REPOROOT}/transforms/venv -venv:: - -build:: - -test:: - -test-src:: - -test-image:: - -publish:: - -image:: - -load-image:: .PHONY: workflow-build workflow-build: workflow-venv @@ -53,7 +38,6 @@ workflow-test: workflow-build TRANSFORM_RUNTIME=$(TRANSFORM_RUNTIME) \ TRANSFORM_NAME=$(TRANSFORM_NAME) \ BUILD_SPECIFIC_RUNTIME=$(BUILD_SPECIFIC_RUNTIME) \ - DOCKER_REMOTE_IMAGE=$(DOCKER_REGISTRY_ENDPOINT)/$(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_VERSION) \ PIPELINE_FILE=$(TRANSFORM_NAME)_wf.yaml .workflows.test-pipeline .PHONY: workflow-upload diff --git a/transforms/universal/ededup/kfp_ray/Makefile b/transforms/universal/ededup/kfp_ray/Makefile index 858db1b0a..0c831e1c7 100644 --- a/transforms/universal/ededup/kfp_ray/Makefile +++ b/transforms/universal/ededup/kfp_ray/Makefile @@ -33,7 +33,6 @@ workflow-test: workflow-build TRANSFORM_RUNTIME=$(TRANSFORM_RUNTIME) \ TRANSFORM_NAME=$(TRANSFORM_NAME) \ BUILD_SPECIFIC_RUNTIME=$(BUILD_SPECIFIC_RUNTIME) \ - DOCKER_REMOTE_IMAGE=$(DOCKER_REGISTRY_ENDPOINT)/$(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_VERSION) \ PIPELINE_FILE=$(TRANSFORM_NAME)_wf.yaml .workflows.test-pipeline .PHONY: workflow-upload diff --git a/transforms/universal/hap/kfp_ray/Makefile b/transforms/universal/hap/kfp_ray/Makefile index 7c3da2456..7244ce142 100644 --- a/transforms/universal/hap/kfp_ray/Makefile +++ b/transforms/universal/hap/kfp_ray/Makefile @@ -33,7 +33,6 @@ workflow-test: workflow-build TRANSFORM_RUNTIME=$(TRANSFORM_RUNTIME) \ TRANSFORM_NAME=$(TRANSFORM_NAME) \ BUILD_SPECIFIC_RUNTIME=$(BUILD_SPECIFIC_RUNTIME) \ - DOCKER_REMOTE_IMAGE=$(DOCKER_REGISTRY_ENDPOINT)/$(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_VERSION) \ PIPELINE_FILE=$(TRANSFORM_NAME)_wf.yaml .workflows.test-pipeline .PHONY: workflow-upload From 25911e7bea3570b7ce0502f8b700c4af321b48e4 Mon Sep 17 00:00:00 2001 From: Maroun Touma Date: Fri, 20 Dec 2024 08:47:51 -0500 Subject: [PATCH 6/6] merge with dev Signed-off-by: Maroun Touma --- transforms/.make.cicd.targets | 1 + transforms/universal/fdedup/kfp_ray/Makefile | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/transforms/.make.cicd.targets b/transforms/.make.cicd.targets index 9d2780849..d01a9530c 100644 --- a/transforms/.make.cicd.targets +++ b/transforms/.make.cicd.targets @@ -86,6 +86,7 @@ test-image:: .default.build-lib-wheel TRANSFORM_RUNTIME_SRC_FILE=$(TRANSFORM_SPARK_SRC) \ DOCKER_IMAGE_NAME=$(TRANSFORM_NAME)-spark \ BASE_IMAGE=$(SPARK_BASE_IMAGE) \ + DOCKER_REMOTE_IMAGE=$(DOCKER_REGISTRY_ENDPOINT)/$(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_VERSION) \ test-image-sequence ; \ fi ;\ fi diff --git a/transforms/universal/fdedup/kfp_ray/Makefile b/transforms/universal/fdedup/kfp_ray/Makefile index 5c1ae0778..cd88d5430 100644 --- a/transforms/universal/fdedup/kfp_ray/Makefile +++ b/transforms/universal/fdedup/kfp_ray/Makefile @@ -37,7 +37,6 @@ workflow-test: workflow-build TRANSFORM_RUNTIME=$(TRANSFORM_RUNTIME) \ TRANSFORM_NAME=$(TRANSFORM_NAME) \ BUILD_SPECIFIC_RUNTIME=$(BUILD_SPECIFIC_RUNTIME) \ - DOCKER_REMOTE_IMAGE=$(DOCKER_REGISTRY_ENDPOINT)/$(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_VERSION) \ PIPELINE_FILE=$(TRANSFORM_NAME)_wf.yaml .workflows.test-pipeline .PHONY: workflow-upload