diff --git a/.env b/.env index bebe06b..d4af096 100644 --- a/.env +++ b/.env @@ -12,6 +12,14 @@ DB_PORT=5432 ADMINER_TAG=4.7.9-standalone ADMINER_PORT=8081 +# Prefect +PREFECT_TAG=2.13.2-python3.10 +PREFECT_PORT=4200 +PREFECT_UI_URL=http://127.0.0.1:4200/api +PREFECT_SERVER_API_URL=http://127.0.0.1:4200/api +PREFECT_API_DATABASE_CONNECTION_URL=postgresql+asyncpg://${DB_USER}:${DB_PASSWORD}@postgres:5432/${DB_NAME} +PREFECT_API_URL=http://prefect-server:4200/api + # MLFlow MLFLOW_PORT=5000 MLFLOW_BACKEND_STORE_URI=postgresql://${DB_USER}:${DB_PASSWORD}@postgres:5432/${DB_NAME} diff --git a/README.md b/README.md index b978341..4f59dfe 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,10 @@ # Prefect Surrogate Models -[![Validate Pipeline](https://github.com/JBris/prefect-surrogate-models/actions/workflows/validation.yml/badge.svg)](https://github.com/JBris/prefect-surrogate-models/actions/workflows/validation.yml) [![Generate Documentation](https://github.com/JBris/prefect-surrogate-models/actions/workflows/docs.yml/badge.svg)](https://github.com/JBris/prefect-surrogate-models/actions/workflows/docs.yml) [![pages-build-deployment](https://github.com/JBris/prefect-surrogate-models/actions/workflows/pages/pages-build-deployment/badge.svg)](https://github.com/JBris/prefect-surrogate-models/actions/workflows/pages/pages-build-deployment) +[![Validate Pipeline](https://github.com/JBris/prefect-surrogate-models/actions/workflows/validation.yaml/badge.svg?branch=main)](https://github.com/JBris/prefect-surrogate-models/actions/workflows/validation.yaml) [![Generate Documentation](https://github.com/JBris/prefect-surrogate-models/actions/workflows/docs.yaml/badge.svg)](https://github.com/JBris/prefect-surrogate-models/actions/workflows/docs.yaml) [![pages-build-deployment](https://github.com/JBris/prefect-surrogate-models/actions/workflows/pages/pages-build-deployment/badge.svg?branch=gh-pages)](https://github.com/JBris/prefect-surrogate-models/actions/workflows/pages/pages-build-deployment) Website: [Prefect Surrogate Models](https://jbris.github.io/prefect-surrogate-models/) -*Demonstrating the use of Prefect to orchestrate the creation of machine learning surrogate models applied to mechanistic crop models.* +*Demonstrating the use of Prefect to orchestrate the creation of machine learning surrogate models as applied to mechanistic crop models.* # Table of contents diff --git a/docker-compose.yml b/docker-compose.yml index a35180f..386fdbd 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -2,24 +2,66 @@ version: "3.9" services: - # mlflow: - # image: $GITHUB_CONTAINER_REPO - # container_name: ${PROJECT_NAME}-mlflow - # hostname: mlflow - # restart: unless-stopped - # stop_grace_period: 10s - # env_file: .env - # environment: - # MLFLOW_BACKEND_STORE_URI: $MLFLOW_BACKEND_STORE_URI - # MLFLOW_S3_ENDPOINT_URL: $MLFLOW_S3_ENDPOINT_URL - # AWS_ACCESS_KEY_ID: $AWS_ACCESS_KEY_ID - # AWS_SECRET_ACCESS_KEY: $AWS_SECRET_ACCESS_KEY - # ports: - # - ${MLFLOW_PORT}:5000 - # command: > - # mlflow server --serve-artifacts --host 0.0.0.0 --port 5000 - # --backend-store-uri "${MLFLOW_BACKEND_STORE_URI}" --default-artifact-root s3://mlflow/ + prefect-cli: + image: $GITHUB_CONTAINER_REPO + container_name: ${PROJECT_NAME}-prefect-cli + env_file: .env + working_dir: /flows + environment: + PREFECT_API_URL: $PREFECT_API_URL + volumes: + - ./flows:/flows + entrypoint: "python" + + mlflow: + image: $GITHUB_CONTAINER_REPO + container_name: ${PROJECT_NAME}-mlflow + hostname: mlflow + restart: unless-stopped + stop_grace_period: 10s + env_file: .env + environment: + MLFLOW_BACKEND_STORE_URI: $MLFLOW_BACKEND_STORE_URI + MLFLOW_S3_ENDPOINT_URL: $MLFLOW_S3_ENDPOINT_URL + AWS_ACCESS_KEY_ID: $AWS_ACCESS_KEY_ID + AWS_SECRET_ACCESS_KEY: $AWS_SECRET_ACCESS_KEY + ports: + - ${MLFLOW_PORT}:5000 + command: > + mlflow server --serve-artifacts --host 0.0.0.0 --port 5000 + --backend-store-uri "${MLFLOW_BACKEND_STORE_URI}" --default-artifact-root s3://mlflow/ + + prefect-server: + image: prefecthq/prefect:${PREFECT_TAG} + container_name: ${PROJECT_NAME}-prefect-server + hostname: prefect-server + restart: unless-stopped + stop_grace_period: 10s + env_file: .env + environment: + PREFECT_UI_URL: $PREFECT_UI_URL + PREFECT_API_URL: $PREFECT_SERVER_API_URL + PREFECT_SERVER_API_HOST: 0.0.0.0 + PREFECT_API_DATABASE_CONNECTION_URL: $PREFECT_API_DATABASE_CONNECTION_URL + depends_on: + - postgres + ports: + - ${PREFECT_PORT}:4200 + volumes: + - prefect-data:/root/.prefect + entrypoint: ["/opt/prefect/entrypoint.sh", "prefect", "server", "start"] + prefect-agent: + image: prefecthq/prefect:${PREFECT_TAG} + container_name: ${PROJECT_NAME}-prefect-agent + hostname: prefect-agent + restart: unless-stopped + stop_grace_period: 10s + env_file: .env + environment: + PREFECT_API_URL: $PREFECT_API_URL + entrypoint: ["/opt/prefect/entrypoint.sh", "prefect", "agent", "start", "-q", "queue"] + minio: image: minio/minio:${MINIO_TAG} container_name: ${PROJECT_NAME}-minio @@ -56,6 +98,7 @@ services: /bin/sh -c ' sleep 5; /usr/bin/mc config host add s3 http://minio:9000 ${MINIO_ACCESS_KEY} ${MINIO_SECRET_KEY} --api S3v4; [[ ! -z "`/usr/bin/mc ls s3 | grep challenge`" ]] || /usr/bin/mc mb s3/mlflow; /usr/bin/mc policy download s3/mlflow; + [[ ! -z "`/usr/bin/mc ls s3 | grep challenge`" ]] || /usr/bin/mc mb s3/prefect-flows; /usr/bin/mc policy download s3/prefect-flows; [[ ! -z "`/usr/bin/mc ls s3 | grep challenge`" ]] || /usr/bin/mc mb s3/data; /usr/bin/mc policy download s3/data; exit 0; ' postgres: @@ -104,6 +147,7 @@ networks: driver: bridge volumes: + prefect-data: {} postgres-data: {} minio-data: {} portainer-data: {} diff --git a/docs/source/flows/index.rst b/docs/source/flows/index.rst new file mode 100644 index 0000000..ad29a6a --- /dev/null +++ b/docs/source/flows/index.rst @@ -0,0 +1,6 @@ +Prefect Surrogate Models Flows +================================================= + +.. toctree:: + :maxdepth: 2 + :caption: Contents: diff --git a/docs/source/index.rst b/docs/source/index.rst index ac62bb5..8a17076 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -10,7 +10,7 @@ Welcome to Prefect Surrogate Models's documentation! :maxdepth: 2 :caption: Contents: - + flows/index.rst Indices and tables ================== diff --git a/flows/.prefectignore b/flows/.prefectignore new file mode 100644 index 0000000..9e0903b --- /dev/null +++ b/flows/.prefectignore @@ -0,0 +1,161 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +*.db \ No newline at end of file diff --git a/flows/test.py b/flows/test.py new file mode 100644 index 0000000..24c02d7 --- /dev/null +++ b/flows/test.py @@ -0,0 +1,20 @@ +#!/usr/bin/env python + +from prefect import flow, task + +@task +def hello_world(name): + print(f"hello world: {name}") + +@task +def bye_world(name): + print(f"bye world: {name}") + +@flow(name="test flow") +def test_flow(names=["john", "smith"]): + for name in names: + hello_world(name) + bye_world(name) + +if __name__ == "__main__": + test_flow() \ No newline at end of file diff --git a/scripts/prefect.sh b/scripts/prefect.sh new file mode 100644 index 0000000..a269ae8 --- /dev/null +++ b/scripts/prefect.sh @@ -0,0 +1,5 @@ +#!/usr/bin/env bash + +. .env + +docker compose run --rm prefect-cli "$@" diff --git a/services/python/requirements.txt b/services/python/requirements.txt index b34067d..e9b16f2 100644 --- a/services/python/requirements.txt +++ b/services/python/requirements.txt @@ -42,11 +42,13 @@ coolname==2.2.0 croniter==1.4.1 cryptography==41.0.3 cycler==0.11.0 +dask==2023.9.2 databricks-cli==0.17.7 dateparser==1.1.8 decorator==5.1.1 dictdiffer==0.9.0 diskcache==5.6.3 +distributed==2023.9.2 distro==1.8.0 docker==6.1.3 docutils==0.20.1 @@ -111,6 +113,7 @@ kombu==5.3.2 kubernetes==28.1.0 linear-operator==0.5.2 lit==16.0.6 +locket==1.0.0 Mako==1.2.4 Markdown==3.4.4 markdown-it-py==3.0.0 @@ -120,6 +123,7 @@ mccabe==0.7.0 mdurl==0.1.2 mlflow==2.4.1 mpmath==1.3.0 +msgpack==1.0.6 multidict==6.0.4 mypy-extensions==1.0.0 networkx==3.1 @@ -142,6 +146,7 @@ optuna==3.2.0 orjson==3.9.6 packaging==23.1 pandas==2.0.2 +partd==1.4.1 pathspec==0.11.2 PCSE==5.5.5 pendulum==2.1.2 @@ -198,6 +203,7 @@ six==1.16.0 smmap==5.0.0 sniffio==1.3.0 snowballstemmer==2.2.0 +sortedcontainers==2.4.0 Sphinx==7.2.5 sphinx-argparse==0.4.0 sphinxcontrib-applehelp==1.0.7 @@ -212,12 +218,15 @@ sqltrie==0.7.0 starlette==0.27.0 sympy==1.12 tabulate==0.9.0 +tblib==2.0.0 text-unidecode==1.3 threadpoolctl==3.2.0 toml==0.10.2 tomli==2.0.1 tomlkit==0.12.1 +toolz==0.12.0 torch==2.0.1 +tornado==6.3.3 tqdm==4.65.0 traitlets-pcse==5.0.0.dev0 triton==2.0.0 @@ -237,6 +246,7 @@ Werkzeug==2.3.7 wheel==0.38.4 yarl==1.9.2 zc.lockfile==3.0.post1 +zict==3.0.0 zipp==3.16.2 zope.event==5.0 zope.interface==6.0