From 38c09db936286ea32665c80602cbd08907c81c03 Mon Sep 17 00:00:00 2001
From: Kayla Meyer <129152803+meyerkm@users.noreply.github.com>
Date: Wed, 4 Dec 2024 08:47:06 +0100
Subject: [PATCH] Add in Snakemake Log-Files (#147)

* remove double logging

* add in logging redirct

* add in logging redirct
-remove logging from train rule (already performed with parallel)
- remove params.prefix bug from log paths

* bugfix logging redirect for associate.py
- add in logging handlers.clear()
- define logging level

* output additional fields in model_config.yaml , to be used for pretrained_models setup

* adding in logging directive to cv pipeline

* add in log to final regenie pipeline

* fixup! Format Python code with psf/black pull_request

---------

Co-authored-by: PMBio <PMBio@users.noreply.github.com>
---
 deeprvat/cv_utils.py                          |  2 +-
 deeprvat/data/dense_gt.py                     |  2 +-
 deeprvat/data/rare.py                         |  2 +-
 deeprvat/deeprvat/associate.py                | 23 ++++++-----
 .../common_variant_condition_utils.py         |  2 +-
 deeprvat/deeprvat/config.py                   |  2 +-
 deeprvat/deeprvat/evaluate.py                 |  2 +-
 deeprvat/deeprvat/models.py                   |  2 +-
 deeprvat/deeprvat/train.py                    | 29 +++++++++----
 deeprvat/metrics.py                           |  2 +-
 deeprvat/seed_gene_discovery/evaluate.py      |  2 +-
 .../seed_gene_discovery.py                    |  2 +-
 deeprvat/utils.py                             |  2 +-
 .../association_dataset.snakefile             | 12 +++++-
 .../association_testing/burdens.snakefile     | 27 +++++++++---
 .../regress_eval.snakefile                    | 26 +++++++++---
 .../regress_eval_regenie.snakefile            | 41 ++++++++++++++++---
 ...regress_eval_regenie_conditional.snakefile | 21 ++++++++++
 ...ting_control_for_common_variants.snakefile |  5 +++
 ...ting_precomputed_burdens_regenie.snakefile |  1 +
 .../association_testing_pretrained.snakefile  |  5 +++
 ...ation_testing_pretrained_regenie.snakefile |  5 +++
 pipelines/cv_training/cv_burdens.snakefile    | 21 ++++++++++
 pipelines/cv_training/cv_training.snakefile   | 13 ++++--
 .../cv_training_association_testing.snakefile |  6 ++-
 pipelines/run_training.snakefile              |  5 +++
 pipelines/training/config.snakefile           |  6 ++-
 pipelines/training/train.snakefile            |  8 +++-
 pipelines/training/training_dataset.snakefile | 12 +++++-
 .../training_association_testing.snakefile    |  5 +++
 ...ning_association_testing_regenie.snakefile |  5 +++
 31 files changed, 240 insertions(+), 58 deletions(-)

diff --git a/deeprvat/cv_utils.py b/deeprvat/cv_utils.py
index 533ca3fb..9122ce01 100644
--- a/deeprvat/cv_utils.py
+++ b/deeprvat/cv_utils.py
@@ -16,7 +16,7 @@
 
 logging.basicConfig(
     format="[%(asctime)s] %(levelname)s:%(name)s: %(message)s",
-    level="INFO",
+    level=logging.INFO,
     stream=sys.stdout,
 )
 logger = logging.getLogger(__name__)
diff --git a/deeprvat/data/dense_gt.py b/deeprvat/data/dense_gt.py
index a8aa69a8..8b9684ea 100644
--- a/deeprvat/data/dense_gt.py
+++ b/deeprvat/data/dense_gt.py
@@ -22,7 +22,7 @@
 
 logging.basicConfig(
     format="[%(asctime)s] %(levelname)s:%(name)s: %(message)s",
-    level="INFO",
+    level=logging.INFO,
     stream=sys.stdout,
 )
 logger = logging.getLogger(__name__)
diff --git a/deeprvat/data/rare.py b/deeprvat/data/rare.py
index 04d36ef3..907908b2 100644
--- a/deeprvat/data/rare.py
+++ b/deeprvat/data/rare.py
@@ -12,7 +12,7 @@
 
 logging.basicConfig(
     format="[%(asctime)s] %(levelname)s:%(name)s: %(message)s",
-    level="INFO",
+    level=logging.INFO,
     stream=sys.stdout,
 )
 logger = logging.getLogger(__name__)
diff --git a/deeprvat/deeprvat/associate.py b/deeprvat/deeprvat/associate.py
index e6fc380e..1a341278 100644
--- a/deeprvat/deeprvat/associate.py
+++ b/deeprvat/deeprvat/associate.py
@@ -25,10 +25,10 @@
 from tqdm import tqdm, trange
 import zarr
 import re
-
 import deeprvat.deeprvat.models as deeprvat_models
 from deeprvat.data import DenseGTDataset
 
+logging.root.handlers.clear()  # Remove all handlers associated with the root logger object
 logging.basicConfig(
     format="[%(asctime)s] %(levelname)s:%(name)s: %(message)s",
     level=logging.INFO,
@@ -48,6 +48,11 @@
 AGG_FCT = {"mean": np.mean, "max": np.max}
 
 
+@click.group()
+def cli():
+    pass
+
+
 def get_burden(
     batch: Dict,
     agg_models: Dict[str, List[nn.Module]],
@@ -99,11 +104,6 @@ def separate_parallel_results(results: List) -> Tuple[List, ...]:
     return tuple(map(list, zip(*results)))
 
 
-@click.group()
-def cli():
-    pass
-
-
 def make_dataset_(
     config: Dict,
     debug: bool = False,
@@ -306,7 +306,6 @@ def make_regenie_input_(
     gene_metadata_file: Path,
     gtf: Path,
 ):
-    logger.setLevel(logging.INFO)
 
     ## Check options
     if not skip_burdens and burdens_genes_samples is None:
@@ -420,7 +419,7 @@ def make_regenie_input_(
         if average_repeats:
             logger.info("Averaging burdens across all repeats")
             burdens = np.zeros((n_samples, n_genes))
-            for repeat in trange(burdens_zarr.shape[2]):
+            for repeat in trange(burdens_zarr.shape[2], file=sys.stdout):
                 burdens += burdens_zarr[:n_samples, :, repeat]
             burdens = burdens / burdens_zarr.shape[2]
         else:
@@ -448,7 +447,7 @@ def make_regenie_input_(
             n_samples,
             samples=list(sample_ids.astype(str)),
         ) as f:
-            for i in trange(n_genes):
+            for i in trange(n_genes, file=sys.stdout):
                 varid = f"pseudovariant_gene_{ensgids[i]}"
                 this_burdens = burdens[:, i]  # Rescale scores to be in range (0, 2)
                 genotypes = np.stack(
@@ -746,7 +745,7 @@ def load_models(
     }
 
     if len(checkpoint_files[first_repeat]) > 1:
-        logging.info(
+        logger.info(
             f"  Averaging results from {len(checkpoint_files[first_repeat])} models for each repeat"
         )
 
@@ -1064,7 +1063,9 @@ def combine_burden_chunks_(
     end_id = 0
 
     for i, chunk in tqdm(
-        enumerate(range(0, n_chunks)), desc=f"Merging {n_chunks} chunks"
+        enumerate(range(0, n_chunks)),
+        desc=f"Merging {n_chunks} chunks",
+        file=sys.stdout,
     ):
         chunk_dir = burdens_chunks_dir / f"chunk_{chunk}"
 
diff --git a/deeprvat/deeprvat/common_variant_condition_utils.py b/deeprvat/deeprvat/common_variant_condition_utils.py
index 0005745a..566f30a6 100644
--- a/deeprvat/deeprvat/common_variant_condition_utils.py
+++ b/deeprvat/deeprvat/common_variant_condition_utils.py
@@ -20,7 +20,7 @@
 
 logging.basicConfig(
     format="[%(asctime)s] %(levelname)s:%(name)s: %(message)s",
-    level="INFO",
+    level=logging.INFO,
     stream=sys.stdout,
 )
 logger = logging.getLogger(__name__)
diff --git a/deeprvat/deeprvat/config.py b/deeprvat/deeprvat/config.py
index a2abfb9f..5e903437 100644
--- a/deeprvat/deeprvat/config.py
+++ b/deeprvat/deeprvat/config.py
@@ -16,7 +16,7 @@
 
 logging.basicConfig(
     format="[%(asctime)s] %(levelname)s:%(name)s: %(message)s",
-    level="INFO",
+    level=logging.INFO,
     stream=sys.stdout,
 )
 logger = logging.getLogger(__name__)
diff --git a/deeprvat/deeprvat/evaluate.py b/deeprvat/deeprvat/evaluate.py
index 58130f4d..72a44bd4 100644
--- a/deeprvat/deeprvat/evaluate.py
+++ b/deeprvat/deeprvat/evaluate.py
@@ -14,7 +14,7 @@
 
 logging.basicConfig(
     format="[%(asctime)s] %(levelname)s:%(name)s: %(message)s",
-    level="INFO",
+    level=logging.INFO,
     stream=sys.stdout,
 )
 logger = logging.getLogger(__name__)
diff --git a/deeprvat/deeprvat/models.py b/deeprvat/deeprvat/models.py
index 6fc189d4..7c5993aa 100644
--- a/deeprvat/deeprvat/models.py
+++ b/deeprvat/deeprvat/models.py
@@ -18,7 +18,7 @@
 
 logging.basicConfig(
     format="[%(asctime)s] %(levelname)s:%(name)s: %(message)s",
-    level="INFO",
+    level=logging.INFO,
     stream=sys.stdout,
 )
 logger = logging.getLogger(__name__)
diff --git a/deeprvat/deeprvat/train.py b/deeprvat/deeprvat/train.py
index e2245ff7..29d64ebd 100644
--- a/deeprvat/deeprvat/train.py
+++ b/deeprvat/deeprvat/train.py
@@ -9,7 +9,7 @@
 from pprint import pformat, pprint
 from tempfile import TemporaryDirectory
 from typing import Dict, Optional, Tuple, Union
-
+import re
 import click
 import math
 import numpy as np
@@ -37,10 +37,9 @@
 from torch.utils.data import DataLoader, Dataset, Subset
 from tqdm import tqdm
 
-
 logging.basicConfig(
     format="[%(asctime)s] %(levelname)s:%(name)s: %(message)s",
-    level="INFO",
+    level=logging.INFO,
     stream=sys.stdout,
 )
 logger = logging.getLogger(__name__)
@@ -872,20 +871,20 @@ def run_bagging(
                 trainer.fit(model, dm)
             except RuntimeError as e:
                 # if batch_size is choosen to big, it will be reduced until it fits the GPU
-                logging.error(f"Caught RuntimeError: {e}")
+                logger.error(f"Caught RuntimeError: {e}")
                 if str(e).find("CUDA out of memory") != -1:
                     if dm.hparams.batch_size > 4:
-                        logging.error(
+                        logger.error(
                             "Retrying training with half the original batch size"
                         )
                         gc.collect()
                         torch.cuda.empty_cache()
                         dm.hparams.batch_size = dm.hparams.batch_size // 2
                     else:
-                        logging.error("Batch size is already <= 4, giving up")
+                        logger.error("Batch size is already <= 4, giving up")
                         raise RuntimeError("Could not find small enough batch size")
                 else:
-                    logging.error(f"Caught unknown error: {e}")
+                    logger.error(f"Caught unknown error: {e}")
                     raise e
             else:
                 break
@@ -1167,7 +1166,21 @@ def best_training_run(
         config = yaml.safe_load(f)
 
     with open(config_file_out, "w") as f:
-        yaml.dump({"model": config["model"]}, f)
+        yaml.dump(
+            {
+                "model": config["model"],
+                "rare_variant_annotations": config["training_data"]["dataset_config"][
+                    "rare_embedding"
+                ]["config"]["annotations"],
+                "training_data_thresholds": {
+                    k: str(re.sub(f"^{k} ", "", v))
+                    for k, v in config["training_data"]["dataset_config"][
+                        "rare_embedding"
+                    ]["config"]["thresholds"].items()
+                },
+            },
+            f,
+        )
 
     n_bags = config["training"]["n_bags"] if not debug else 3
     for k in range(n_bags):
diff --git a/deeprvat/metrics.py b/deeprvat/metrics.py
index f7b74a01..ee4bed36 100644
--- a/deeprvat/metrics.py
+++ b/deeprvat/metrics.py
@@ -8,7 +8,7 @@
 
 logging.basicConfig(
     format="[%(asctime)s] %(levelname)s:%(name)s: %(message)s",
-    level="INFO",
+    level=logging.INFO,
     stream=sys.stdout,
 )
 logger = logging.getLogger(__name__)
diff --git a/deeprvat/seed_gene_discovery/evaluate.py b/deeprvat/seed_gene_discovery/evaluate.py
index e06eea6d..05ebb087 100644
--- a/deeprvat/seed_gene_discovery/evaluate.py
+++ b/deeprvat/seed_gene_discovery/evaluate.py
@@ -15,7 +15,7 @@
 
 logging.basicConfig(
     format="[%(asctime)s] %(levelname)s:%(name)s: %(message)s",
-    level="INFO",
+    level=logging.INFO,
     stream=sys.stdout,
 )
 logger = logging.getLogger(__name__)
diff --git a/deeprvat/seed_gene_discovery/seed_gene_discovery.py b/deeprvat/seed_gene_discovery/seed_gene_discovery.py
index 786d17ea..208152cc 100644
--- a/deeprvat/seed_gene_discovery/seed_gene_discovery.py
+++ b/deeprvat/seed_gene_discovery/seed_gene_discovery.py
@@ -24,7 +24,7 @@
 
 logging.basicConfig(
     format="[%(asctime)s] %(levelname)s:%(name)s: %(message)s",
-    level="INFO",
+    level=logging.INFO,
     stream=sys.stdout,
 )
 logger = logging.getLogger(__name__)
diff --git a/deeprvat/utils.py b/deeprvat/utils.py
index a28b4a5c..e515a7ec 100644
--- a/deeprvat/utils.py
+++ b/deeprvat/utils.py
@@ -17,7 +17,7 @@
 
 logging.basicConfig(
     format="[%(asctime)s] %(levelname)s:%(name)s: %(message)s",
-    level="INFO",
+    level=logging.INFO,
     stream=sys.stdout,
 )
 logger = logging.getLogger(__name__)
diff --git a/pipelines/association_testing/association_dataset.snakefile b/pipelines/association_testing/association_dataset.snakefile
index 12f27503..fc751a7d 100644
--- a/pipelines/association_testing/association_dataset.snakefile
+++ b/pipelines/association_testing/association_dataset.snakefile
@@ -16,12 +16,16 @@ rule association_dataset:
     resources:
         mem_mb = lambda wildcards, attempt: 32000 * (attempt + 1),
     priority: 30
+    log:
+        stdout="logs/association_dataset/{phenotype}.stdout", 
+        stderr="logs/association_dataset/{phenotype}.stderr"
     shell:
         'deeprvat_associate make-dataset '
         + debug +
         "--skip-genotypes "
         '{input.data_config} '
-        '{output}'
+        '{output} '
+        + logging_redirct
 
 
 rule association_dataset_burdens:
@@ -33,8 +37,12 @@ rule association_dataset_burdens:
     resources:
         mem_mb = lambda wildcards, attempt: 32000 * (attempt + 1)
     priority: 30
+    log:
+        stdout=f"logs/association_dataset_burdens/{phenotypes[0]}.stdout", 
+        stderr=f"logs/association_dataset_burdens/{phenotypes[0]}.stderr"
     shell:
         'deeprvat_associate make-dataset '
         + debug +
         '{input.data_config} '
-        '{output}'
+        '{output} '
+        + logging_redirct
diff --git a/pipelines/association_testing/burdens.snakefile b/pipelines/association_testing/burdens.snakefile
index f3bdd304..0e213df8 100644
--- a/pipelines/association_testing/burdens.snakefile
+++ b/pipelines/association_testing/burdens.snakefile
@@ -16,12 +16,16 @@ rule combine_burdens:
     threads: 1
     resources:
         mem_mb = lambda wildcards, attempt: 4098 + (attempt - 1) * 4098,
+    log:
+        stdout="logs/combine_burdens/combine_burdens.stdout", 
+        stderr="logs/combine_burdens/combine_burdens.stderr"
     shell:
         ' '.join([
             'deeprvat_associate combine-burden-chunks',
             '{params.prefix}/burdens/chunks/',
             ' --n-chunks ' + str(n_burden_chunks),
-            '{params.prefix}/burdens',
+            '{params.prefix}/burdens ',
+            logging_redirct
         ])
         
 rule all_xy:
@@ -42,6 +46,9 @@ rule compute_xy:
     threads: 8
     resources:
         mem_mb = lambda wildcards, attempt: 20480 + (attempt - 1) * 4098,
+    log:
+        stdout="logs/compute_xy/{phenotype}.stdout", 
+        stderr="logs/compute_xy/{phenotype}.stderr"
     shell:
         ' && '.join([
             ('deeprvat_associate compute-xy '
@@ -49,7 +56,8 @@ rule compute_xy:
              '{input.data_config} '
              "{output.samples} "
              "{output.x} "
-             "{output.y}")
+             "{output.y} "
+             + logging_redirct)
         ])
 
 
@@ -73,6 +81,9 @@ rule compute_burdens:
     resources:
         mem_mb = 32000,
         gpus = 1
+    log:
+        stdout="logs/compute_burdens/compute_burdens_{chunk}.stdout", 
+        stderr="logs/compute_burdens/compute_burdens_{chunk}.stderr"
     shell:
         ' '.join([
             'deeprvat_associate compute-burdens '
@@ -83,7 +94,8 @@ rule compute_burdens:
             '{input.data_config} '
             '{input.model_config} '
             '{input.checkpoints} '
-            '{params.prefix}/burdens'],
+            '{params.prefix}/burdens '
+            + logging_redirct ],
         )
 
 
@@ -98,11 +110,16 @@ rule reverse_models:
     threads: 4
     resources:
         mem_mb = 20480,
+    log:
+        stdout="logs/reverse_models/reverse_models.stdout", 
+        stderr="logs/reverse_models/reverse_models.stderr"
     shell:
         " && ".join([
             ("deeprvat_associate reverse-models "
              "{input.model_config} "
              "{input.data_config} "
-             "{input.checkpoints}"),
-             "touch {output}"
+             "{input.checkpoints} "
+             + logging_redirct),
+             "touch {output} "
+             
         ])
diff --git a/pipelines/association_testing/regress_eval.snakefile b/pipelines/association_testing/regress_eval.snakefile
index 4c219d1e..006c987b 100644
--- a/pipelines/association_testing/regress_eval.snakefile
+++ b/pipelines/association_testing/regress_eval.snakefile
@@ -22,6 +22,9 @@ rule evaluate:
     params:
         n_combis = 1,
         use_baseline_results = '--use-baseline-results' if 'baseline_results' in config else ''
+    log:
+        stdout="logs/evaluate/{phenotype}.stdout", 
+        stderr="logs/evaluate/{phenotype}.stderr"
     shell:
         'deeprvat_evaluate '
         + debug +
@@ -29,7 +32,8 @@ rule evaluate:
         '--phenotype {wildcards.phenotype} '
         '{input.associations} '
         '{input.data_config} '
-        '{wildcards.phenotype}/deeprvat/eval'
+        '{wildcards.phenotype}/deeprvat/eval '
+        + logging_redirct
 
 
 rule combine_regression_chunks:
@@ -40,11 +44,15 @@ rule combine_regression_chunks:
     threads: 1
     resources:
         mem_mb = lambda wildcards, attempt: 12000 + (attempt - 1) * 4098,
+    log:
+        stdout="logs/combine_regression_chunks/{phenotype}.stdout", 
+        stderr="logs/combine_regression_chunks/{phenotype}.stderr"
     shell:
         'deeprvat_associate combine-regression-results '
         '--model-name repeat_0 ' 
         '{input} '
-        '{output}'
+        '{output} '
+        + logging_redirct
 
 
 rule regress:
@@ -71,6 +79,9 @@ rule regress:
         xy_dir = "{phenotype}/deeprvat/xy",
         # burden_dir = 'burdens',
         out_dir = '{phenotype}/deeprvat/average_regression_results'
+    log:
+        stdout="logs/regress/{phenotype}_regress_{chunk}.stdout", 
+        stderr="logs/regress/{phenotype}_regress_{chunk}.stderr"
     shell:
         'deeprvat_associate regress '
         + debug +
@@ -82,7 +93,8 @@ rule regress:
         '{input.data_config} '
         "{params.xy_dir} "
         "{params.burden_file} "
-        '{params.out_dir}'
+        '{params.out_dir} '
+        + logging_redirct
 
 
 rule average_burdens:
@@ -100,14 +112,18 @@ rule average_burdens:
     resources:
         mem_mb = lambda wildcards, attempt: 4098 + (attempt - 1) * 4098,
     priority: 10,
+    log:
+        stdout="logs/average_burdens/average_burdens_{chunk}.stdout", 
+        stderr="logs/average_burdens/average_burdens_{chunk}.stderr"
     shell:
         ' && '.join([
-            ('deeprvat_associate  average-burdens '
+            ('deeprvat_associate average-burdens '
              '--n-chunks ' + str(n_avg_chunks) + ' '
              '--chunk {wildcards.chunk} '
              '{params.repeats} '
              '--agg-fct mean  '  #TODO remove this
              '{params.burdens_in} '
-             '{params.burdens_out}'),
+             '{params.burdens_out} '
+             + logging_redirct),
             'touch {output}'
         ])
diff --git a/pipelines/association_testing/regress_eval_regenie.snakefile b/pipelines/association_testing/regress_eval_regenie.snakefile
index 03b80e3f..9713ff48 100644
--- a/pipelines/association_testing/regress_eval_regenie.snakefile
+++ b/pipelines/association_testing/regress_eval_regenie.snakefile
@@ -15,6 +15,9 @@ rule evaluate:
         mem_mb = 16000,
     params:
         use_baseline_results = '--use-baseline-results' if 'baseline_results' in config else ''
+    log:
+        stdout="logs/evaluate/{phenotype}.stdout", 
+        stderr="logs/evaluate/{phenotype}.stderr"
     shell:
         'deeprvat_evaluate '
         + debug +
@@ -22,7 +25,8 @@ rule evaluate:
         '--phenotype {wildcards.phenotype} '
         '{input.associations} '
         '{input.data_config} '
-        '{wildcards.phenotype}/deeprvat/eval'
+        '{wildcards.phenotype}/deeprvat/eval '
+        + logging_redirct
 
 rule all_regenie:
     input:
@@ -45,10 +49,14 @@ rule convert_regenie_output:
     threads: 1
     resources:
         mem_mb = 2048
+    log:
+        stdout="logs/convert_regenie_output/convert_regenie_output.stdout",
+        stderr="logs/convert_regenie_output/convert_regenie_output.stderr"
     shell:
         "deeprvat_associate convert-regenie-output "
         "{params.pheno_options} "
-        "{params.gene_file}"
+        "{params.gene_file} "
+        + logging_redirct
 
 rule regenie_step2:
     input:
@@ -68,6 +76,9 @@ rule regenie_step2:
     threads: 16
     resources:
         mem_mb = 16384
+    log:
+        stdout="logs/regenie_step2/regenie_step2.stdout",
+        stderr="logs/regenie_step2/regenie_step2.stderr",
     shell:
         "regenie "
         "--step 2 "
@@ -80,7 +91,8 @@ rule regenie_step2:
         f"--bsize {regenie_step2_bsize} "
         "--threads 16 "
         + " ".join(regenie_config_step2.get("options", [])) + " " +
-        "--out regenie_output/step2/deeprvat"
+        "--out regenie_output/step2/deeprvat "
+        + logging_redirct
 
 rule regenie_step1:
     input:
@@ -96,6 +108,9 @@ rule regenie_step1:
     threads: 24
     resources:
         mem_mb = 16000
+    log:
+        stdout="logs/regenie_step1/regenie_step1.stdout", 
+        stderr="logs/regenie_step1/regenie_step1.stderr"
     shell:
         "mkdir -p regenie_step1_tmp && "
         "regenie "
@@ -110,8 +125,10 @@ rule regenie_step1:
         "--lowmem "
         "--lowmem-prefix regenie_step1_tmp/deeprvat "
         + " ".join(regenie_config_step1.get("options", [])) + " " +
-        "--out regenie_output/step1/deeprvat ; "
-        "rm -rf regenie_step1_tmp"
+        "--out regenie_output/step1/deeprvat "
+        + logging_redirct + " ; "
+        "rm -rf regenie_step1_tmp "
+        
 
 
 # rule regenie_step1_runl1:
@@ -228,6 +245,9 @@ rule make_regenie_burdens:
     threads: 8
     resources:
         mem_mb = 64000
+    log:
+        stdout="logs/make_regenie_burdens/make_regenie_burdens.stdout", 
+        stderr="logs/make_regenie_burdens/make_regenie_burdens.stderr"
     shell:
         "deeprvat_associate make-regenie-input "
         + debug +
@@ -242,6 +262,7 @@ rule make_regenie_burdens:
         "--burdens-genes-samples {params.burdens} {params.genes} {params.samples} "
         "{input.gene_file} "
         "{input.gtf_file} "
+        + logging_redirct
 
 rule make_regenie_metadata:
     input:
@@ -262,6 +283,9 @@ rule make_regenie_metadata:
     threads: 1
     resources:
         mem_mb = 16000
+    log:
+        stdout="logs/make_regenie_metadata/make_regenie_metadata.stdout",
+        stderr="logs/make_regenie_metadata/make_regenie_metadata.stderr",
     shell:
         "deeprvat_associate make-regenie-input "
         + debug +
@@ -273,6 +297,7 @@ rule make_regenie_metadata:
         "--phenotype-file {output.phenotype_file} "
         "{input.gene_file} "
         "{input.gtf_file} "
+        + logging_redirct
 
 
 rule average_burdens:
@@ -290,6 +315,9 @@ rule average_burdens:
     resources:
         mem_mb = lambda wildcards, attempt: 4098 + (attempt - 1) * 4098,
     priority: 10,
+    log:
+        stdout="logs/average_burdens/average_burdens_{chunk}.stdout", 
+        stderr="logs/average_burdens/average_burdens_{chunk}.stderr"
     shell:
         ' && '.join([
             ('deeprvat_associate  average-burdens '
@@ -298,6 +326,7 @@ rule average_burdens:
              '{params.repeats} '
              '--agg-fct mean  '  #TODO remove this
              '{params.burdens_in} '
-             '{params.burdens_out}'),
+             '{params.burdens_out} '
+             + logging_redirct),
             'touch {output}'
         ])
diff --git a/pipelines/association_testing/regress_eval_regenie_conditional.snakefile b/pipelines/association_testing/regress_eval_regenie_conditional.snakefile
index 369b7fa8..3dce454b 100644
--- a/pipelines/association_testing/regress_eval_regenie_conditional.snakefile
+++ b/pipelines/association_testing/regress_eval_regenie_conditional.snakefile
@@ -59,6 +59,9 @@ rule convert_regenie_output:
     threads: 1
     resources:
         mem_mb = 2048
+    log:
+        stdout="logs/convert_regenie_output/convert_regenie_output.stdout",
+        stderr="logs/convert_regenie_output/convert_regenie_output.stderr"
     shell:
         "deeprvat_associate convert-regenie-output "
         "--phenotype {wildcards.phenotype} {input} {output} "
@@ -81,6 +84,9 @@ rule regenie_step2:
     threads: 16
     resources:
         mem_mb = lambda wildcards, attempt: 32768 * attempt
+    log:
+        stdout="logs/regenie_step2/regenie_step2.stdout",
+        stderr="logs/regenie_step2/regenie_step2.stderr",
     shell:
         "regenie "
         "--step 2 "
@@ -110,6 +116,9 @@ rule regenie_step1:
     threads: 24
     resources:
         mem_mb = 16000
+    log:
+        stdout="logs/regenie_step1/regenie_step1.stdout", 
+        stderr="logs/regenie_step1/regenie_step1.stderr"
     shell:
         "mkdir -p regenie_step1_tmp && "
         "regenie "
@@ -244,6 +253,9 @@ rule make_regenie_burdens:
     threads: 8
     resources:
         mem_mb = 64000
+    log:
+        stdout="logs/make_regenie_burdens/make_regenie_burdens.stdout", 
+        stderr="logs/make_regenie_burdens/make_regenie_burdens.stderr"
     shell:
         "deeprvat_associate make-regenie-input "
         + debug +
@@ -273,6 +285,9 @@ rule make_regenie_step2_metadata:
     threads: 1
     resources:
         mem_mb = 16000
+    log:
+        stdout="logs/make_regenie_step2_metadata/make_regenie_step2_metadata.stdout",
+        stderr="logs/make_regenie_step2_metadata/make_regenie_step2_metadata.stderr",
     shell:
         "deeprvat_associate make-regenie-input "
         + debug +
@@ -305,6 +320,9 @@ rule make_regenie_step1_metadata:
     threads: 1
     resources:
         mem_mb = 16000
+    log:
+        stdout="logs/make_regenie_step1_metadata/make_regenie_step1_metadata.stdout",
+        stderr="logs/make_regenie_step1_metadata/make_regenie_step1_metadata.stderr",
     shell:
         "deeprvat_associate make-regenie-input "
         + debug +
@@ -334,6 +352,9 @@ rule average_burdens:
     resources:
         mem_mb = lambda wildcards, attempt: 4098 + (attempt - 1) * 4098,
     priority: 10,
+    log:
+        stdout="logs/average_burdens/average_burdens_{chunk}.stdout", 
+        stderr="logs/average_burdens/average_burdens_{chunk}.stderr"
     shell:
         ' && '.join([
             ('deeprvat_associate  average-burdens '
diff --git a/pipelines/association_testing_control_for_common_variants.snakefile b/pipelines/association_testing_control_for_common_variants.snakefile
index 07fbdee5..85ea5b97 100644
--- a/pipelines/association_testing_control_for_common_variants.snakefile
+++ b/pipelines/association_testing_control_for_common_variants.snakefile
@@ -1,10 +1,15 @@
 from pathlib import Path
 from deeprvat.deeprvat.config import create_main_config
+import logging
 
 create_main_config("deeprvat_input_config.yaml")
 
+#remove duplicate logging handlers from loaded deeprvat.config module
+logging.root.handlers.clear()
+
 configfile: 'deeprvat_config.yaml'
 
+logging_redirct = "1> {log.stdout} 2> {log.stderr}" #for Linux-based systems
 debug_flag = config.get('debug', False)
 phenotypes = config['phenotypes']
 phenotypes = list(phenotypes.keys()) if type(phenotypes) == dict else phenotypes
diff --git a/pipelines/association_testing_precomputed_burdens_regenie.snakefile b/pipelines/association_testing_precomputed_burdens_regenie.snakefile
index b6778b05..913c4e32 100644
--- a/pipelines/association_testing_precomputed_burdens_regenie.snakefile
+++ b/pipelines/association_testing_precomputed_burdens_regenie.snakefile
@@ -2,6 +2,7 @@ from pathlib import Path
 
 configfile: 'config.yaml'
 
+logging_redirct = "1> {log.stdout} 2> {log.stderr}" #for Linux-based systems
 debug_flag = config.get('debug', False)
 phenotypes = config['phenotypes']
 phenotypes = list(phenotypes.keys()) if type(phenotypes) == dict else phenotypes
diff --git a/pipelines/association_testing_pretrained.snakefile b/pipelines/association_testing_pretrained.snakefile
index 2bd12b15..fe898fcf 100644
--- a/pipelines/association_testing_pretrained.snakefile
+++ b/pipelines/association_testing_pretrained.snakefile
@@ -1,10 +1,15 @@
 from pathlib import Path
 from deeprvat.deeprvat.config import create_main_config
+import logging
 
 create_main_config("deeprvat_input_pretrained_models_config.yaml")
 
+#remove duplicate logging handlers from loaded deeprvat.config module
+logging.root.handlers.clear()
+
 configfile: 'deeprvat_config.yaml'
 
+logging_redirct = "1> {log.stdout} 2> {log.stderr}" #for Linux-based systems
 debug_flag = config.get('debug', False)
 phenotypes = config['phenotypes']
 phenotypes = list(phenotypes.keys()) if type(phenotypes) == dict else phenotypes
diff --git a/pipelines/association_testing_pretrained_regenie.snakefile b/pipelines/association_testing_pretrained_regenie.snakefile
index c5c7076f..6394e247 100644
--- a/pipelines/association_testing_pretrained_regenie.snakefile
+++ b/pipelines/association_testing_pretrained_regenie.snakefile
@@ -1,10 +1,15 @@
 from pathlib import Path
 from deeprvat.deeprvat.config import create_main_config
+import logging
 
 create_main_config("deeprvat_input_pretrained_models_config.yaml")
 
+#remove duplicate logging handlers from loaded deeprvat.config module
+logging.root.handlers.clear()
+
 configfile: 'deeprvat_config.yaml'
 
+logging_redirct = "1> {log.stdout} 2> {log.stderr}" #for Linux-based systems
 debug_flag = config.get('debug', False)
 phenotypes = config['phenotypes']
 phenotypes = list(phenotypes.keys()) if type(phenotypes) == dict else phenotypes
diff --git a/pipelines/cv_training/cv_burdens.snakefile b/pipelines/cv_training/cv_burdens.snakefile
index 0cce2d35..6a5572f1 100644
--- a/pipelines/cv_training/cv_burdens.snakefile
+++ b/pipelines/cv_training/cv_burdens.snakefile
@@ -26,6 +26,9 @@ rule make_deeprvat_test_config:
         data_config="cv_split{cv_split}/deeprvat/{phenotype}/deeprvat/config.yaml",
     output:
         data_config_test="cv_split{cv_split}/deeprvat/{phenotype}/deeprvat/config_test.yaml",
+    log: 
+        stdout="logs/make_deeprvat_test_config/cv_split{cv_split}_{phenotype}.stdout", 
+        stderr="logs/make_deeprvat_test_config/cv_split{cv_split}_{phenotype}.stderr"
     shell:
         " && ".join(
         [
@@ -47,6 +50,9 @@ use rule association_dataset from deeprvat_workflow as deeprvat_association_data
     output:
         temp("cv_split{cv_split}/deeprvat/{phenotype}/deeprvat/association_dataset.pkl"),
     threads: 4
+    log:
+        stdout="logs/association_dataset/cv_split{cv_split}_{phenotype}.stdout", 
+        stderr="logs/association_dataset/cv_split{cv_split}_{phenotype}.stderr"
 
 use rule association_dataset_burdens from deeprvat_workflow as deeprvat_association_dataset_burdens with:
     input:
@@ -54,6 +60,9 @@ use rule association_dataset_burdens from deeprvat_workflow as deeprvat_associat
     output:
         temp("cv_split{cv_split}/deeprvat/burdens/association_dataset.pkl"),
     threads: 4
+    log:
+        stdout=f"logs/association_dataset_burdens/cv_split{{cv_split}}_{burden_phenotype}.stdout", 
+        stderr=f"logs/association_dataset_burdens/cv_split{{cv_split}}_{burden_phenotype}.stderr"
 
 
 rule combine_test_burdens:
@@ -94,6 +103,9 @@ rule combine_test_burdens:
         ),
     resources:
         mem_mb=lambda wildcards, attempt: 32000 + attempt * 4098 * 2,
+    log:
+        stdout="logs/combine_test_burdens/{phenotype}.stdout", 
+        stderr="logs/combine_test_burdens/{phenotype}.stderr"
     shell:
         " && ".join(
             [
@@ -118,11 +130,17 @@ rule combine_test_burdens:
 use rule combine_burdens from deeprvat_workflow as deeprvat_combine_burdens with:
     params:
         prefix="cv_split{cv_split}/deeprvat",
+    log:
+        stdout="logs/combine_burdens/cv_split{cv_split}.stdout", 
+        stderr="logs/combine_burdens/cv_split{cv_split}.stderr"
 
 
 use rule compute_burdens from deeprvat_workflow as deeprvat_compute_burdens with:
     params:
         prefix="cv_split{cv_split}/deeprvat",
+    log:
+        stdout="logs/compute_burdens/cv_split{cv_split}_burdens_{chunk}.stdout", 
+        stderr="logs/compute_burdens/cv_split{cv_split}_burdens_{chunk}.stderr"
 
 
 use rule compute_xy from deeprvat_workflow as deeprvat_compute_xy with:
@@ -133,6 +151,9 @@ use rule compute_xy from deeprvat_workflow as deeprvat_compute_xy with:
         samples = directory('cv_split{cv_split}/deeprvat/{phenotype}/deeprvat/xy/sample_ids.zarr'),
         x = directory('cv_split{cv_split}/deeprvat/{phenotype}/deeprvat/xy/x.zarr'),
         y = directory('cv_split{cv_split}/deeprvat/{phenotype}/deeprvat/xy/y.zarr'),
+    log:
+        stdout="logs/compute_xy/cv_split{cv_split}_{phenotype}.stdout", 
+        stderr="logs/compute_xy/cv_split{cv_split}_{phenotype}.stderr"
 
 
 use rule reverse_models from deeprvat_workflow as deeprvat_reverse_models
diff --git a/pipelines/cv_training/cv_training.snakefile b/pipelines/cv_training/cv_training.snakefile
index aef4419b..35679b99 100644
--- a/pipelines/cv_training/cv_training.snakefile
+++ b/pipelines/cv_training/cv_training.snakefile
@@ -18,6 +18,9 @@ rule spread_config:
     threads: 1
     resources:
         mem_mb = 1024,
+    log: 
+        stdout="logs/spread_config/cv_split{cv_split}.stdout", 
+        stderr="logs/spread_config/cv_split{cv_split}.stderr"
     shell:
         ' && '.join([
             conda_check,
@@ -48,7 +51,9 @@ use rule link_config from deeprvat_workflow as deeprvat_link_config
 use rule best_training_run from deeprvat_workflow as deeprvat_best_training_run with:
     params:
         prefix = 'cv_split{cv_split}/deeprvat'
-
+    log:
+        stdout="logs/best_training_run/cv_split{cv_split}_repeat_{repeat}.stdout", 
+        stderr="logs/best_training_run/cv_split{cv_split}_repeat_{repeat}.stderr"
 
 use rule train from deeprvat_workflow as deeprvat_train with:
     priority: 1000
@@ -85,9 +90,9 @@ use rule config from deeprvat_workflow as deeprvat_config with:
         baseline_out = lambda wildcards: f'--baseline-results-out cv_split{wildcards.cv_split}/deeprvat/{wildcards.phenotype}/deeprvat/baseline_results.parquet' if wildcards.phenotype in training_phenotypes else ' ',
         seed_genes_out = lambda wildcards: f'--seed-genes-out cv_split{wildcards.cv_split}/deeprvat/{wildcards.phenotype}/deeprvat/seed_genes.parquet' if wildcards.phenotype in training_phenotypes else ' ',
         association_only = lambda wildcards: f'--association-only' if wildcards.phenotype not in training_phenotypes else ' '
+    log: 
+        stdout="logs/config/cv_split{cv_split}_{phenotype}.stdout", 
+        stderr="logs/config/cv_split{cv_split}_{phenotype}.stderr"
 
 use rule create_main_config from deeprvat_workflow as deeprvat_create_main_config
 
-
-
-
diff --git a/pipelines/cv_training/cv_training_association_testing.snakefile b/pipelines/cv_training/cv_training_association_testing.snakefile
index 9f507b5b..be390cea 100644
--- a/pipelines/cv_training/cv_training_association_testing.snakefile
+++ b/pipelines/cv_training/cv_training_association_testing.snakefile
@@ -1,13 +1,17 @@
 from pathlib import Path
 from deeprvat.deeprvat.config import create_main_config
+import logging
 
 create_main_config("deeprvat_input_config.yaml")
 
-configfile: "deeprvat_config.yaml"
+#remove duplicate logging handlers from loaded deeprvat.config module
+logging.root.handlers.clear()
 
+configfile: "deeprvat_config.yaml"
 
 conda_check = 'conda info | grep "active environment"'
 
+logging_redirct = "1> {log.stdout} 2> {log.stderr}" #for Linux-based systems
 debug_flag = config.get("debug", False)
 phenotypes = config["phenotypes"]
 phenotypes = list(phenotypes.keys()) if type(phenotypes) == dict else phenotypes
diff --git a/pipelines/run_training.snakefile b/pipelines/run_training.snakefile
index f507853d..25a52931 100644
--- a/pipelines/run_training.snakefile
+++ b/pipelines/run_training.snakefile
@@ -1,10 +1,15 @@
 from pathlib import Path
 from deeprvat.deeprvat.config import create_main_config
+import logging
 
 create_main_config("deeprvat_input_training_config.yaml")
 
+#remove duplicate logging handlers from loaded deeprvat.config module
+logging.root.handlers.clear()
+
 configfile: 'deeprvat_config.yaml'
 
+logging_redirct = "1> {log.stdout} 2> {log.stderr}" #for Linux-based systems
 debug_flag = config.get('debug', False)
 debug = '--debug ' if debug_flag else ''
 deterministic_flag = config.get('deterministic', False)
diff --git a/pipelines/training/config.snakefile b/pipelines/training/config.snakefile
index dc93dc8a..674385bf 100644
--- a/pipelines/training/config.snakefile
+++ b/pipelines/training/config.snakefile
@@ -35,6 +35,9 @@ rule config:
         association_only=lambda wildcards: f"--association-only"
         if wildcards.phenotype not in training_phenotypes
         else " ",
+    log: 
+        stdout="logs/config/config_{phenotype}.stdout", 
+        stderr="logs/config/config_{phenotype}.stderr"
     shell:
         (
             "deeprvat_config update-config "
@@ -44,5 +47,6 @@ rule config:
             "{params.baseline_out} "
             "{params.seed_genes_out} "
             "{input.data_config} "
-            "{output.data_config}"
+            "{output.data_config} "
+            + logging_redirct
         )
diff --git a/pipelines/training/train.snakefile b/pipelines/training/train.snakefile
index a9261067..1fa80b66 100644
--- a/pipelines/training/train.snakefile
+++ b/pipelines/training/train.snakefile
@@ -5,7 +5,7 @@ rule link_config:
         model_path / 'model_config.yaml'
     threads: 1
     shell:
-        "ln -rfs {input} {output}"
+        "ln -rfs {input} {output} "
         # "ln -s repeat_0/model_config.yaml {output}"
 
 rule best_training_run:
@@ -23,6 +23,9 @@ rule best_training_run:
     threads: 1
     resources:
         mem_mb = 2048,
+    log:
+        stdout="logs/best_training_run/repeat_{repeat}.stdout", 
+        stderr="logs/best_training_run/repeat_{repeat}.stderr"
     shell:
         (
             'deeprvat_train best-training-run '
@@ -30,7 +33,8 @@ rule best_training_run:
             '{params.prefix}/{model_path}/repeat_{wildcards.repeat} '
             '{params.prefix}/{model_path}/repeat_{wildcards.repeat}/best '
             '{params.prefix}/{model_path}/repeat_{wildcards.repeat}/hyperparameter_optimization.db '
-            '{output.model_config}'
+            '{output.model_config} '
+            + logging_redirct
         )
 
 rule train:
diff --git a/pipelines/training/training_dataset.snakefile b/pipelines/training/training_dataset.snakefile
index a8cbc563..a8965d29 100644
--- a/pipelines/training/training_dataset.snakefile
+++ b/pipelines/training/training_dataset.snakefile
@@ -11,6 +11,9 @@ rule training_dataset:
         mem_mb=lambda wildcards, attempt: 32000 + 12000 * attempt,
         load=16000,
     priority: 5000
+    log:
+        stdout="logs/training_dataset/{phenotype}.stdout", 
+        stderr="logs/training_dataset/{phenotype}.stderr"
     shell:
         (
             "deeprvat_train make-dataset "
@@ -22,7 +25,8 @@ rule training_dataset:
             "{input.data_config} "
             "{output.input_tensor} "
             "{output.covariates} "
-            "{output.y}"
+            "{output.y} "
+            + logging_redirct
         )
 
 
@@ -35,11 +39,15 @@ rule training_dataset_pickle:
     resources:
         mem_mb=40000,  # lambda wildcards, attempt: 38000 + 12000 * attempt
         load=16000,
+    log:
+        stdout="logs/training_dataset_pickle/{phenotype}.stdout", 
+        stderr="logs/training_dataset_pickle/{phenotype}.stderr"
     shell:
         (
             "deeprvat_train make-dataset "
             "--pickle-only "
             "--training-dataset-file {output} "
             "{input} "
-            "dummy dummy dummy"
+            "dummy dummy dummy "
+            + logging_redirct
         )
\ No newline at end of file
diff --git a/pipelines/training_association_testing.snakefile b/pipelines/training_association_testing.snakefile
index c90e9fc4..9a38796e 100644
--- a/pipelines/training_association_testing.snakefile
+++ b/pipelines/training_association_testing.snakefile
@@ -1,10 +1,15 @@
 from pathlib import Path
 from deeprvat.deeprvat.config import create_main_config
+import logging
 
 create_main_config("deeprvat_input_config.yaml")
 
+#remove duplicate logging handlers from loaded deeprvat.config module
+logging.root.handlers.clear()
+
 configfile: 'deeprvat_config.yaml'
 
+logging_redirct = "1> {log.stdout} 2> {log.stderr}" #for Linux-based systems
 debug_flag = config.get('debug', False)
 debug = '--debug ' if debug_flag else ''
 deterministic_flag = config.get('deterministic', False)
diff --git a/pipelines/training_association_testing_regenie.snakefile b/pipelines/training_association_testing_regenie.snakefile
index 3c5d4bdb..dad504fe 100644
--- a/pipelines/training_association_testing_regenie.snakefile
+++ b/pipelines/training_association_testing_regenie.snakefile
@@ -1,10 +1,15 @@
 from pathlib import Path
 from deeprvat.deeprvat.config import create_main_config
+import logging
 
 create_main_config("deeprvat_input_config.yaml")
 
+#remove duplicate logging handlers from loaded deeprvat.config module
+logging.root.handlers.clear()
+
 configfile: 'deeprvat_config.yaml'
 
+logging_redirct = "1> {log.stdout} 2> {log.stderr}" #for Linux-based systems
 debug_flag = config.get('debug', False)
 deterministic_flag = config.get('deterministic', False) # TODO SHOULD THIS BE HERE?
 deterministic = '--deterministic ' if deterministic_flag else ''