From 38dc5c91e2171abda3793b677183a3f72313990f Mon Sep 17 00:00:00 2001 From: Chun Cai Date: Tue, 7 Jan 2025 14:52:10 +0800 Subject: [PATCH] fix: lower `num_workers` to 4 (#4535) For multi-task training in pytorch, each data source will have their own dataloader. If the number of workers of dataloaders is large, there will be many (number of tasks * num_workers) worker processes stressing CPU. ## Summary by CodeRabbit - **Performance Optimization** - Adjusted default maximum worker configuration from 8 to 4 CPUs - Reduced potential parallel processing resources for the environment - **Documentation** - Updated documentation to reflect the change in default value for `NUM_WORKERS` from 8 to 4 --------- Signed-off-by: Chun Cai --- deepmd/pt/utils/env.py | 2 +- doc/env.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/deepmd/pt/utils/env.py b/deepmd/pt/utils/env.py index 9803f8d04d..0e1322a640 100644 --- a/deepmd/pt/utils/env.py +++ b/deepmd/pt/utils/env.py @@ -21,7 +21,7 @@ ncpus = len(os.sched_getaffinity(0)) except AttributeError: ncpus = os.cpu_count() -NUM_WORKERS = int(os.environ.get("NUM_WORKERS", min(8, ncpus))) +NUM_WORKERS = int(os.environ.get("NUM_WORKERS", min(4, ncpus))) # Make sure DDP uses correct device if applicable LOCAL_RANK = os.environ.get("LOCAL_RANK") LOCAL_RANK = int(0 if LOCAL_RANK is None else LOCAL_RANK) diff --git a/doc/env.md b/doc/env.md index 3cf42b724a..4ca7101236 100644 --- a/doc/env.md +++ b/doc/env.md @@ -72,7 +72,7 @@ Default backend. :::{envvar} NUM_WORKERS -**Default**: 8 or the number of cores (whichever is smaller) +**Default**: 4 or the number of cores (whichever is smaller) {{ pytorch_icon }} Number of subprocesses to use for data loading in the PyTorch backend. See [PyTorch documentation](https://pytorch.org/docs/stable/data.html) for details.