From 38c09db936286ea32665c80602cbd08907c81c03 Mon Sep 17 00:00:00 2001 From: Kayla Meyer <129152803+meyerkm@users.noreply.github.com> Date: Wed, 4 Dec 2024 08:47:06 +0100 Subject: [PATCH] Add in Snakemake Log-Files (#147) * remove double logging * add in logging redirct * add in logging redirct -remove logging from train rule (already performed with parallel) - remove params.prefix bug from log paths * bugfix logging redirect for associate.py - add in logging handlers.clear() - define logging level * output additional fields in model_config.yaml , to be used for pretrained_models setup * adding in logging directive to cv pipeline * add in log to final regenie pipeline * fixup! Format Python code with psf/black pull_request --------- Co-authored-by: PMBio --- deeprvat/cv_utils.py | 2 +- deeprvat/data/dense_gt.py | 2 +- deeprvat/data/rare.py | 2 +- deeprvat/deeprvat/associate.py | 23 ++++++----- .../common_variant_condition_utils.py | 2 +- deeprvat/deeprvat/config.py | 2 +- deeprvat/deeprvat/evaluate.py | 2 +- deeprvat/deeprvat/models.py | 2 +- deeprvat/deeprvat/train.py | 29 +++++++++---- deeprvat/metrics.py | 2 +- deeprvat/seed_gene_discovery/evaluate.py | 2 +- .../seed_gene_discovery.py | 2 +- deeprvat/utils.py | 2 +- .../association_dataset.snakefile | 12 +++++- .../association_testing/burdens.snakefile | 27 +++++++++--- .../regress_eval.snakefile | 26 +++++++++--- .../regress_eval_regenie.snakefile | 41 ++++++++++++++++--- ...regress_eval_regenie_conditional.snakefile | 21 ++++++++++ ...ting_control_for_common_variants.snakefile | 5 +++ ...ting_precomputed_burdens_regenie.snakefile | 1 + .../association_testing_pretrained.snakefile | 5 +++ ...ation_testing_pretrained_regenie.snakefile | 5 +++ pipelines/cv_training/cv_burdens.snakefile | 21 ++++++++++ pipelines/cv_training/cv_training.snakefile | 13 ++++-- .../cv_training_association_testing.snakefile | 6 ++- pipelines/run_training.snakefile | 5 +++ pipelines/training/config.snakefile | 6 ++- pipelines/training/train.snakefile | 8 +++- pipelines/training/training_dataset.snakefile | 12 +++++- .../training_association_testing.snakefile | 5 +++ ...ning_association_testing_regenie.snakefile | 5 +++ 31 files changed, 240 insertions(+), 58 deletions(-) diff --git a/deeprvat/cv_utils.py b/deeprvat/cv_utils.py index 533ca3fb..9122ce01 100644 --- a/deeprvat/cv_utils.py +++ b/deeprvat/cv_utils.py @@ -16,7 +16,7 @@ logging.basicConfig( format="[%(asctime)s] %(levelname)s:%(name)s: %(message)s", - level="INFO", + level=logging.INFO, stream=sys.stdout, ) logger = logging.getLogger(__name__) diff --git a/deeprvat/data/dense_gt.py b/deeprvat/data/dense_gt.py index a8aa69a8..8b9684ea 100644 --- a/deeprvat/data/dense_gt.py +++ b/deeprvat/data/dense_gt.py @@ -22,7 +22,7 @@ logging.basicConfig( format="[%(asctime)s] %(levelname)s:%(name)s: %(message)s", - level="INFO", + level=logging.INFO, stream=sys.stdout, ) logger = logging.getLogger(__name__) diff --git a/deeprvat/data/rare.py b/deeprvat/data/rare.py index 04d36ef3..907908b2 100644 --- a/deeprvat/data/rare.py +++ b/deeprvat/data/rare.py @@ -12,7 +12,7 @@ logging.basicConfig( format="[%(asctime)s] %(levelname)s:%(name)s: %(message)s", - level="INFO", + level=logging.INFO, stream=sys.stdout, ) logger = logging.getLogger(__name__) diff --git a/deeprvat/deeprvat/associate.py b/deeprvat/deeprvat/associate.py index e6fc380e..1a341278 100644 --- a/deeprvat/deeprvat/associate.py +++ b/deeprvat/deeprvat/associate.py @@ -25,10 +25,10 @@ from tqdm import tqdm, trange import zarr import re - import deeprvat.deeprvat.models as deeprvat_models from deeprvat.data import DenseGTDataset +logging.root.handlers.clear() # Remove all handlers associated with the root logger object logging.basicConfig( format="[%(asctime)s] %(levelname)s:%(name)s: %(message)s", level=logging.INFO, @@ -48,6 +48,11 @@ AGG_FCT = {"mean": np.mean, "max": np.max} +@click.group() +def cli(): + pass + + def get_burden( batch: Dict, agg_models: Dict[str, List[nn.Module]], @@ -99,11 +104,6 @@ def separate_parallel_results(results: List) -> Tuple[List, ...]: return tuple(map(list, zip(*results))) -@click.group() -def cli(): - pass - - def make_dataset_( config: Dict, debug: bool = False, @@ -306,7 +306,6 @@ def make_regenie_input_( gene_metadata_file: Path, gtf: Path, ): - logger.setLevel(logging.INFO) ## Check options if not skip_burdens and burdens_genes_samples is None: @@ -420,7 +419,7 @@ def make_regenie_input_( if average_repeats: logger.info("Averaging burdens across all repeats") burdens = np.zeros((n_samples, n_genes)) - for repeat in trange(burdens_zarr.shape[2]): + for repeat in trange(burdens_zarr.shape[2], file=sys.stdout): burdens += burdens_zarr[:n_samples, :, repeat] burdens = burdens / burdens_zarr.shape[2] else: @@ -448,7 +447,7 @@ def make_regenie_input_( n_samples, samples=list(sample_ids.astype(str)), ) as f: - for i in trange(n_genes): + for i in trange(n_genes, file=sys.stdout): varid = f"pseudovariant_gene_{ensgids[i]}" this_burdens = burdens[:, i] # Rescale scores to be in range (0, 2) genotypes = np.stack( @@ -746,7 +745,7 @@ def load_models( } if len(checkpoint_files[first_repeat]) > 1: - logging.info( + logger.info( f" Averaging results from {len(checkpoint_files[first_repeat])} models for each repeat" ) @@ -1064,7 +1063,9 @@ def combine_burden_chunks_( end_id = 0 for i, chunk in tqdm( - enumerate(range(0, n_chunks)), desc=f"Merging {n_chunks} chunks" + enumerate(range(0, n_chunks)), + desc=f"Merging {n_chunks} chunks", + file=sys.stdout, ): chunk_dir = burdens_chunks_dir / f"chunk_{chunk}" diff --git a/deeprvat/deeprvat/common_variant_condition_utils.py b/deeprvat/deeprvat/common_variant_condition_utils.py index 0005745a..566f30a6 100644 --- a/deeprvat/deeprvat/common_variant_condition_utils.py +++ b/deeprvat/deeprvat/common_variant_condition_utils.py @@ -20,7 +20,7 @@ logging.basicConfig( format="[%(asctime)s] %(levelname)s:%(name)s: %(message)s", - level="INFO", + level=logging.INFO, stream=sys.stdout, ) logger = logging.getLogger(__name__) diff --git a/deeprvat/deeprvat/config.py b/deeprvat/deeprvat/config.py index a2abfb9f..5e903437 100644 --- a/deeprvat/deeprvat/config.py +++ b/deeprvat/deeprvat/config.py @@ -16,7 +16,7 @@ logging.basicConfig( format="[%(asctime)s] %(levelname)s:%(name)s: %(message)s", - level="INFO", + level=logging.INFO, stream=sys.stdout, ) logger = logging.getLogger(__name__) diff --git a/deeprvat/deeprvat/evaluate.py b/deeprvat/deeprvat/evaluate.py index 58130f4d..72a44bd4 100644 --- a/deeprvat/deeprvat/evaluate.py +++ b/deeprvat/deeprvat/evaluate.py @@ -14,7 +14,7 @@ logging.basicConfig( format="[%(asctime)s] %(levelname)s:%(name)s: %(message)s", - level="INFO", + level=logging.INFO, stream=sys.stdout, ) logger = logging.getLogger(__name__) diff --git a/deeprvat/deeprvat/models.py b/deeprvat/deeprvat/models.py index 6fc189d4..7c5993aa 100644 --- a/deeprvat/deeprvat/models.py +++ b/deeprvat/deeprvat/models.py @@ -18,7 +18,7 @@ logging.basicConfig( format="[%(asctime)s] %(levelname)s:%(name)s: %(message)s", - level="INFO", + level=logging.INFO, stream=sys.stdout, ) logger = logging.getLogger(__name__) diff --git a/deeprvat/deeprvat/train.py b/deeprvat/deeprvat/train.py index e2245ff7..29d64ebd 100644 --- a/deeprvat/deeprvat/train.py +++ b/deeprvat/deeprvat/train.py @@ -9,7 +9,7 @@ from pprint import pformat, pprint from tempfile import TemporaryDirectory from typing import Dict, Optional, Tuple, Union - +import re import click import math import numpy as np @@ -37,10 +37,9 @@ from torch.utils.data import DataLoader, Dataset, Subset from tqdm import tqdm - logging.basicConfig( format="[%(asctime)s] %(levelname)s:%(name)s: %(message)s", - level="INFO", + level=logging.INFO, stream=sys.stdout, ) logger = logging.getLogger(__name__) @@ -872,20 +871,20 @@ def run_bagging( trainer.fit(model, dm) except RuntimeError as e: # if batch_size is choosen to big, it will be reduced until it fits the GPU - logging.error(f"Caught RuntimeError: {e}") + logger.error(f"Caught RuntimeError: {e}") if str(e).find("CUDA out of memory") != -1: if dm.hparams.batch_size > 4: - logging.error( + logger.error( "Retrying training with half the original batch size" ) gc.collect() torch.cuda.empty_cache() dm.hparams.batch_size = dm.hparams.batch_size // 2 else: - logging.error("Batch size is already <= 4, giving up") + logger.error("Batch size is already <= 4, giving up") raise RuntimeError("Could not find small enough batch size") else: - logging.error(f"Caught unknown error: {e}") + logger.error(f"Caught unknown error: {e}") raise e else: break @@ -1167,7 +1166,21 @@ def best_training_run( config = yaml.safe_load(f) with open(config_file_out, "w") as f: - yaml.dump({"model": config["model"]}, f) + yaml.dump( + { + "model": config["model"], + "rare_variant_annotations": config["training_data"]["dataset_config"][ + "rare_embedding" + ]["config"]["annotations"], + "training_data_thresholds": { + k: str(re.sub(f"^{k} ", "", v)) + for k, v in config["training_data"]["dataset_config"][ + "rare_embedding" + ]["config"]["thresholds"].items() + }, + }, + f, + ) n_bags = config["training"]["n_bags"] if not debug else 3 for k in range(n_bags): diff --git a/deeprvat/metrics.py b/deeprvat/metrics.py index f7b74a01..ee4bed36 100644 --- a/deeprvat/metrics.py +++ b/deeprvat/metrics.py @@ -8,7 +8,7 @@ logging.basicConfig( format="[%(asctime)s] %(levelname)s:%(name)s: %(message)s", - level="INFO", + level=logging.INFO, stream=sys.stdout, ) logger = logging.getLogger(__name__) diff --git a/deeprvat/seed_gene_discovery/evaluate.py b/deeprvat/seed_gene_discovery/evaluate.py index e06eea6d..05ebb087 100644 --- a/deeprvat/seed_gene_discovery/evaluate.py +++ b/deeprvat/seed_gene_discovery/evaluate.py @@ -15,7 +15,7 @@ logging.basicConfig( format="[%(asctime)s] %(levelname)s:%(name)s: %(message)s", - level="INFO", + level=logging.INFO, stream=sys.stdout, ) logger = logging.getLogger(__name__) diff --git a/deeprvat/seed_gene_discovery/seed_gene_discovery.py b/deeprvat/seed_gene_discovery/seed_gene_discovery.py index 786d17ea..208152cc 100644 --- a/deeprvat/seed_gene_discovery/seed_gene_discovery.py +++ b/deeprvat/seed_gene_discovery/seed_gene_discovery.py @@ -24,7 +24,7 @@ logging.basicConfig( format="[%(asctime)s] %(levelname)s:%(name)s: %(message)s", - level="INFO", + level=logging.INFO, stream=sys.stdout, ) logger = logging.getLogger(__name__) diff --git a/deeprvat/utils.py b/deeprvat/utils.py index a28b4a5c..e515a7ec 100644 --- a/deeprvat/utils.py +++ b/deeprvat/utils.py @@ -17,7 +17,7 @@ logging.basicConfig( format="[%(asctime)s] %(levelname)s:%(name)s: %(message)s", - level="INFO", + level=logging.INFO, stream=sys.stdout, ) logger = logging.getLogger(__name__) diff --git a/pipelines/association_testing/association_dataset.snakefile b/pipelines/association_testing/association_dataset.snakefile index 12f27503..fc751a7d 100644 --- a/pipelines/association_testing/association_dataset.snakefile +++ b/pipelines/association_testing/association_dataset.snakefile @@ -16,12 +16,16 @@ rule association_dataset: resources: mem_mb = lambda wildcards, attempt: 32000 * (attempt + 1), priority: 30 + log: + stdout="logs/association_dataset/{phenotype}.stdout", + stderr="logs/association_dataset/{phenotype}.stderr" shell: 'deeprvat_associate make-dataset ' + debug + "--skip-genotypes " '{input.data_config} ' - '{output}' + '{output} ' + + logging_redirct rule association_dataset_burdens: @@ -33,8 +37,12 @@ rule association_dataset_burdens: resources: mem_mb = lambda wildcards, attempt: 32000 * (attempt + 1) priority: 30 + log: + stdout=f"logs/association_dataset_burdens/{phenotypes[0]}.stdout", + stderr=f"logs/association_dataset_burdens/{phenotypes[0]}.stderr" shell: 'deeprvat_associate make-dataset ' + debug + '{input.data_config} ' - '{output}' + '{output} ' + + logging_redirct diff --git a/pipelines/association_testing/burdens.snakefile b/pipelines/association_testing/burdens.snakefile index f3bdd304..0e213df8 100644 --- a/pipelines/association_testing/burdens.snakefile +++ b/pipelines/association_testing/burdens.snakefile @@ -16,12 +16,16 @@ rule combine_burdens: threads: 1 resources: mem_mb = lambda wildcards, attempt: 4098 + (attempt - 1) * 4098, + log: + stdout="logs/combine_burdens/combine_burdens.stdout", + stderr="logs/combine_burdens/combine_burdens.stderr" shell: ' '.join([ 'deeprvat_associate combine-burden-chunks', '{params.prefix}/burdens/chunks/', ' --n-chunks ' + str(n_burden_chunks), - '{params.prefix}/burdens', + '{params.prefix}/burdens ', + logging_redirct ]) rule all_xy: @@ -42,6 +46,9 @@ rule compute_xy: threads: 8 resources: mem_mb = lambda wildcards, attempt: 20480 + (attempt - 1) * 4098, + log: + stdout="logs/compute_xy/{phenotype}.stdout", + stderr="logs/compute_xy/{phenotype}.stderr" shell: ' && '.join([ ('deeprvat_associate compute-xy ' @@ -49,7 +56,8 @@ rule compute_xy: '{input.data_config} ' "{output.samples} " "{output.x} " - "{output.y}") + "{output.y} " + + logging_redirct) ]) @@ -73,6 +81,9 @@ rule compute_burdens: resources: mem_mb = 32000, gpus = 1 + log: + stdout="logs/compute_burdens/compute_burdens_{chunk}.stdout", + stderr="logs/compute_burdens/compute_burdens_{chunk}.stderr" shell: ' '.join([ 'deeprvat_associate compute-burdens ' @@ -83,7 +94,8 @@ rule compute_burdens: '{input.data_config} ' '{input.model_config} ' '{input.checkpoints} ' - '{params.prefix}/burdens'], + '{params.prefix}/burdens ' + + logging_redirct ], ) @@ -98,11 +110,16 @@ rule reverse_models: threads: 4 resources: mem_mb = 20480, + log: + stdout="logs/reverse_models/reverse_models.stdout", + stderr="logs/reverse_models/reverse_models.stderr" shell: " && ".join([ ("deeprvat_associate reverse-models " "{input.model_config} " "{input.data_config} " - "{input.checkpoints}"), - "touch {output}" + "{input.checkpoints} " + + logging_redirct), + "touch {output} " + ]) diff --git a/pipelines/association_testing/regress_eval.snakefile b/pipelines/association_testing/regress_eval.snakefile index 4c219d1e..006c987b 100644 --- a/pipelines/association_testing/regress_eval.snakefile +++ b/pipelines/association_testing/regress_eval.snakefile @@ -22,6 +22,9 @@ rule evaluate: params: n_combis = 1, use_baseline_results = '--use-baseline-results' if 'baseline_results' in config else '' + log: + stdout="logs/evaluate/{phenotype}.stdout", + stderr="logs/evaluate/{phenotype}.stderr" shell: 'deeprvat_evaluate ' + debug + @@ -29,7 +32,8 @@ rule evaluate: '--phenotype {wildcards.phenotype} ' '{input.associations} ' '{input.data_config} ' - '{wildcards.phenotype}/deeprvat/eval' + '{wildcards.phenotype}/deeprvat/eval ' + + logging_redirct rule combine_regression_chunks: @@ -40,11 +44,15 @@ rule combine_regression_chunks: threads: 1 resources: mem_mb = lambda wildcards, attempt: 12000 + (attempt - 1) * 4098, + log: + stdout="logs/combine_regression_chunks/{phenotype}.stdout", + stderr="logs/combine_regression_chunks/{phenotype}.stderr" shell: 'deeprvat_associate combine-regression-results ' '--model-name repeat_0 ' '{input} ' - '{output}' + '{output} ' + + logging_redirct rule regress: @@ -71,6 +79,9 @@ rule regress: xy_dir = "{phenotype}/deeprvat/xy", # burden_dir = 'burdens', out_dir = '{phenotype}/deeprvat/average_regression_results' + log: + stdout="logs/regress/{phenotype}_regress_{chunk}.stdout", + stderr="logs/regress/{phenotype}_regress_{chunk}.stderr" shell: 'deeprvat_associate regress ' + debug + @@ -82,7 +93,8 @@ rule regress: '{input.data_config} ' "{params.xy_dir} " "{params.burden_file} " - '{params.out_dir}' + '{params.out_dir} ' + + logging_redirct rule average_burdens: @@ -100,14 +112,18 @@ rule average_burdens: resources: mem_mb = lambda wildcards, attempt: 4098 + (attempt - 1) * 4098, priority: 10, + log: + stdout="logs/average_burdens/average_burdens_{chunk}.stdout", + stderr="logs/average_burdens/average_burdens_{chunk}.stderr" shell: ' && '.join([ - ('deeprvat_associate average-burdens ' + ('deeprvat_associate average-burdens ' '--n-chunks ' + str(n_avg_chunks) + ' ' '--chunk {wildcards.chunk} ' '{params.repeats} ' '--agg-fct mean ' #TODO remove this '{params.burdens_in} ' - '{params.burdens_out}'), + '{params.burdens_out} ' + + logging_redirct), 'touch {output}' ]) diff --git a/pipelines/association_testing/regress_eval_regenie.snakefile b/pipelines/association_testing/regress_eval_regenie.snakefile index 03b80e3f..9713ff48 100644 --- a/pipelines/association_testing/regress_eval_regenie.snakefile +++ b/pipelines/association_testing/regress_eval_regenie.snakefile @@ -15,6 +15,9 @@ rule evaluate: mem_mb = 16000, params: use_baseline_results = '--use-baseline-results' if 'baseline_results' in config else '' + log: + stdout="logs/evaluate/{phenotype}.stdout", + stderr="logs/evaluate/{phenotype}.stderr" shell: 'deeprvat_evaluate ' + debug + @@ -22,7 +25,8 @@ rule evaluate: '--phenotype {wildcards.phenotype} ' '{input.associations} ' '{input.data_config} ' - '{wildcards.phenotype}/deeprvat/eval' + '{wildcards.phenotype}/deeprvat/eval ' + + logging_redirct rule all_regenie: input: @@ -45,10 +49,14 @@ rule convert_regenie_output: threads: 1 resources: mem_mb = 2048 + log: + stdout="logs/convert_regenie_output/convert_regenie_output.stdout", + stderr="logs/convert_regenie_output/convert_regenie_output.stderr" shell: "deeprvat_associate convert-regenie-output " "{params.pheno_options} " - "{params.gene_file}" + "{params.gene_file} " + + logging_redirct rule regenie_step2: input: @@ -68,6 +76,9 @@ rule regenie_step2: threads: 16 resources: mem_mb = 16384 + log: + stdout="logs/regenie_step2/regenie_step2.stdout", + stderr="logs/regenie_step2/regenie_step2.stderr", shell: "regenie " "--step 2 " @@ -80,7 +91,8 @@ rule regenie_step2: f"--bsize {regenie_step2_bsize} " "--threads 16 " + " ".join(regenie_config_step2.get("options", [])) + " " + - "--out regenie_output/step2/deeprvat" + "--out regenie_output/step2/deeprvat " + + logging_redirct rule regenie_step1: input: @@ -96,6 +108,9 @@ rule regenie_step1: threads: 24 resources: mem_mb = 16000 + log: + stdout="logs/regenie_step1/regenie_step1.stdout", + stderr="logs/regenie_step1/regenie_step1.stderr" shell: "mkdir -p regenie_step1_tmp && " "regenie " @@ -110,8 +125,10 @@ rule regenie_step1: "--lowmem " "--lowmem-prefix regenie_step1_tmp/deeprvat " + " ".join(regenie_config_step1.get("options", [])) + " " + - "--out regenie_output/step1/deeprvat ; " - "rm -rf regenie_step1_tmp" + "--out regenie_output/step1/deeprvat " + + logging_redirct + " ; " + "rm -rf regenie_step1_tmp " + # rule regenie_step1_runl1: @@ -228,6 +245,9 @@ rule make_regenie_burdens: threads: 8 resources: mem_mb = 64000 + log: + stdout="logs/make_regenie_burdens/make_regenie_burdens.stdout", + stderr="logs/make_regenie_burdens/make_regenie_burdens.stderr" shell: "deeprvat_associate make-regenie-input " + debug + @@ -242,6 +262,7 @@ rule make_regenie_burdens: "--burdens-genes-samples {params.burdens} {params.genes} {params.samples} " "{input.gene_file} " "{input.gtf_file} " + + logging_redirct rule make_regenie_metadata: input: @@ -262,6 +283,9 @@ rule make_regenie_metadata: threads: 1 resources: mem_mb = 16000 + log: + stdout="logs/make_regenie_metadata/make_regenie_metadata.stdout", + stderr="logs/make_regenie_metadata/make_regenie_metadata.stderr", shell: "deeprvat_associate make-regenie-input " + debug + @@ -273,6 +297,7 @@ rule make_regenie_metadata: "--phenotype-file {output.phenotype_file} " "{input.gene_file} " "{input.gtf_file} " + + logging_redirct rule average_burdens: @@ -290,6 +315,9 @@ rule average_burdens: resources: mem_mb = lambda wildcards, attempt: 4098 + (attempt - 1) * 4098, priority: 10, + log: + stdout="logs/average_burdens/average_burdens_{chunk}.stdout", + stderr="logs/average_burdens/average_burdens_{chunk}.stderr" shell: ' && '.join([ ('deeprvat_associate average-burdens ' @@ -298,6 +326,7 @@ rule average_burdens: '{params.repeats} ' '--agg-fct mean ' #TODO remove this '{params.burdens_in} ' - '{params.burdens_out}'), + '{params.burdens_out} ' + + logging_redirct), 'touch {output}' ]) diff --git a/pipelines/association_testing/regress_eval_regenie_conditional.snakefile b/pipelines/association_testing/regress_eval_regenie_conditional.snakefile index 369b7fa8..3dce454b 100644 --- a/pipelines/association_testing/regress_eval_regenie_conditional.snakefile +++ b/pipelines/association_testing/regress_eval_regenie_conditional.snakefile @@ -59,6 +59,9 @@ rule convert_regenie_output: threads: 1 resources: mem_mb = 2048 + log: + stdout="logs/convert_regenie_output/convert_regenie_output.stdout", + stderr="logs/convert_regenie_output/convert_regenie_output.stderr" shell: "deeprvat_associate convert-regenie-output " "--phenotype {wildcards.phenotype} {input} {output} " @@ -81,6 +84,9 @@ rule regenie_step2: threads: 16 resources: mem_mb = lambda wildcards, attempt: 32768 * attempt + log: + stdout="logs/regenie_step2/regenie_step2.stdout", + stderr="logs/regenie_step2/regenie_step2.stderr", shell: "regenie " "--step 2 " @@ -110,6 +116,9 @@ rule regenie_step1: threads: 24 resources: mem_mb = 16000 + log: + stdout="logs/regenie_step1/regenie_step1.stdout", + stderr="logs/regenie_step1/regenie_step1.stderr" shell: "mkdir -p regenie_step1_tmp && " "regenie " @@ -244,6 +253,9 @@ rule make_regenie_burdens: threads: 8 resources: mem_mb = 64000 + log: + stdout="logs/make_regenie_burdens/make_regenie_burdens.stdout", + stderr="logs/make_regenie_burdens/make_regenie_burdens.stderr" shell: "deeprvat_associate make-regenie-input " + debug + @@ -273,6 +285,9 @@ rule make_regenie_step2_metadata: threads: 1 resources: mem_mb = 16000 + log: + stdout="logs/make_regenie_step2_metadata/make_regenie_step2_metadata.stdout", + stderr="logs/make_regenie_step2_metadata/make_regenie_step2_metadata.stderr", shell: "deeprvat_associate make-regenie-input " + debug + @@ -305,6 +320,9 @@ rule make_regenie_step1_metadata: threads: 1 resources: mem_mb = 16000 + log: + stdout="logs/make_regenie_step1_metadata/make_regenie_step1_metadata.stdout", + stderr="logs/make_regenie_step1_metadata/make_regenie_step1_metadata.stderr", shell: "deeprvat_associate make-regenie-input " + debug + @@ -334,6 +352,9 @@ rule average_burdens: resources: mem_mb = lambda wildcards, attempt: 4098 + (attempt - 1) * 4098, priority: 10, + log: + stdout="logs/average_burdens/average_burdens_{chunk}.stdout", + stderr="logs/average_burdens/average_burdens_{chunk}.stderr" shell: ' && '.join([ ('deeprvat_associate average-burdens ' diff --git a/pipelines/association_testing_control_for_common_variants.snakefile b/pipelines/association_testing_control_for_common_variants.snakefile index 07fbdee5..85ea5b97 100644 --- a/pipelines/association_testing_control_for_common_variants.snakefile +++ b/pipelines/association_testing_control_for_common_variants.snakefile @@ -1,10 +1,15 @@ from pathlib import Path from deeprvat.deeprvat.config import create_main_config +import logging create_main_config("deeprvat_input_config.yaml") +#remove duplicate logging handlers from loaded deeprvat.config module +logging.root.handlers.clear() + configfile: 'deeprvat_config.yaml' +logging_redirct = "1> {log.stdout} 2> {log.stderr}" #for Linux-based systems debug_flag = config.get('debug', False) phenotypes = config['phenotypes'] phenotypes = list(phenotypes.keys()) if type(phenotypes) == dict else phenotypes diff --git a/pipelines/association_testing_precomputed_burdens_regenie.snakefile b/pipelines/association_testing_precomputed_burdens_regenie.snakefile index b6778b05..913c4e32 100644 --- a/pipelines/association_testing_precomputed_burdens_regenie.snakefile +++ b/pipelines/association_testing_precomputed_burdens_regenie.snakefile @@ -2,6 +2,7 @@ from pathlib import Path configfile: 'config.yaml' +logging_redirct = "1> {log.stdout} 2> {log.stderr}" #for Linux-based systems debug_flag = config.get('debug', False) phenotypes = config['phenotypes'] phenotypes = list(phenotypes.keys()) if type(phenotypes) == dict else phenotypes diff --git a/pipelines/association_testing_pretrained.snakefile b/pipelines/association_testing_pretrained.snakefile index 2bd12b15..fe898fcf 100644 --- a/pipelines/association_testing_pretrained.snakefile +++ b/pipelines/association_testing_pretrained.snakefile @@ -1,10 +1,15 @@ from pathlib import Path from deeprvat.deeprvat.config import create_main_config +import logging create_main_config("deeprvat_input_pretrained_models_config.yaml") +#remove duplicate logging handlers from loaded deeprvat.config module +logging.root.handlers.clear() + configfile: 'deeprvat_config.yaml' +logging_redirct = "1> {log.stdout} 2> {log.stderr}" #for Linux-based systems debug_flag = config.get('debug', False) phenotypes = config['phenotypes'] phenotypes = list(phenotypes.keys()) if type(phenotypes) == dict else phenotypes diff --git a/pipelines/association_testing_pretrained_regenie.snakefile b/pipelines/association_testing_pretrained_regenie.snakefile index c5c7076f..6394e247 100644 --- a/pipelines/association_testing_pretrained_regenie.snakefile +++ b/pipelines/association_testing_pretrained_regenie.snakefile @@ -1,10 +1,15 @@ from pathlib import Path from deeprvat.deeprvat.config import create_main_config +import logging create_main_config("deeprvat_input_pretrained_models_config.yaml") +#remove duplicate logging handlers from loaded deeprvat.config module +logging.root.handlers.clear() + configfile: 'deeprvat_config.yaml' +logging_redirct = "1> {log.stdout} 2> {log.stderr}" #for Linux-based systems debug_flag = config.get('debug', False) phenotypes = config['phenotypes'] phenotypes = list(phenotypes.keys()) if type(phenotypes) == dict else phenotypes diff --git a/pipelines/cv_training/cv_burdens.snakefile b/pipelines/cv_training/cv_burdens.snakefile index 0cce2d35..6a5572f1 100644 --- a/pipelines/cv_training/cv_burdens.snakefile +++ b/pipelines/cv_training/cv_burdens.snakefile @@ -26,6 +26,9 @@ rule make_deeprvat_test_config: data_config="cv_split{cv_split}/deeprvat/{phenotype}/deeprvat/config.yaml", output: data_config_test="cv_split{cv_split}/deeprvat/{phenotype}/deeprvat/config_test.yaml", + log: + stdout="logs/make_deeprvat_test_config/cv_split{cv_split}_{phenotype}.stdout", + stderr="logs/make_deeprvat_test_config/cv_split{cv_split}_{phenotype}.stderr" shell: " && ".join( [ @@ -47,6 +50,9 @@ use rule association_dataset from deeprvat_workflow as deeprvat_association_data output: temp("cv_split{cv_split}/deeprvat/{phenotype}/deeprvat/association_dataset.pkl"), threads: 4 + log: + stdout="logs/association_dataset/cv_split{cv_split}_{phenotype}.stdout", + stderr="logs/association_dataset/cv_split{cv_split}_{phenotype}.stderr" use rule association_dataset_burdens from deeprvat_workflow as deeprvat_association_dataset_burdens with: input: @@ -54,6 +60,9 @@ use rule association_dataset_burdens from deeprvat_workflow as deeprvat_associat output: temp("cv_split{cv_split}/deeprvat/burdens/association_dataset.pkl"), threads: 4 + log: + stdout=f"logs/association_dataset_burdens/cv_split{{cv_split}}_{burden_phenotype}.stdout", + stderr=f"logs/association_dataset_burdens/cv_split{{cv_split}}_{burden_phenotype}.stderr" rule combine_test_burdens: @@ -94,6 +103,9 @@ rule combine_test_burdens: ), resources: mem_mb=lambda wildcards, attempt: 32000 + attempt * 4098 * 2, + log: + stdout="logs/combine_test_burdens/{phenotype}.stdout", + stderr="logs/combine_test_burdens/{phenotype}.stderr" shell: " && ".join( [ @@ -118,11 +130,17 @@ rule combine_test_burdens: use rule combine_burdens from deeprvat_workflow as deeprvat_combine_burdens with: params: prefix="cv_split{cv_split}/deeprvat", + log: + stdout="logs/combine_burdens/cv_split{cv_split}.stdout", + stderr="logs/combine_burdens/cv_split{cv_split}.stderr" use rule compute_burdens from deeprvat_workflow as deeprvat_compute_burdens with: params: prefix="cv_split{cv_split}/deeprvat", + log: + stdout="logs/compute_burdens/cv_split{cv_split}_burdens_{chunk}.stdout", + stderr="logs/compute_burdens/cv_split{cv_split}_burdens_{chunk}.stderr" use rule compute_xy from deeprvat_workflow as deeprvat_compute_xy with: @@ -133,6 +151,9 @@ use rule compute_xy from deeprvat_workflow as deeprvat_compute_xy with: samples = directory('cv_split{cv_split}/deeprvat/{phenotype}/deeprvat/xy/sample_ids.zarr'), x = directory('cv_split{cv_split}/deeprvat/{phenotype}/deeprvat/xy/x.zarr'), y = directory('cv_split{cv_split}/deeprvat/{phenotype}/deeprvat/xy/y.zarr'), + log: + stdout="logs/compute_xy/cv_split{cv_split}_{phenotype}.stdout", + stderr="logs/compute_xy/cv_split{cv_split}_{phenotype}.stderr" use rule reverse_models from deeprvat_workflow as deeprvat_reverse_models diff --git a/pipelines/cv_training/cv_training.snakefile b/pipelines/cv_training/cv_training.snakefile index aef4419b..35679b99 100644 --- a/pipelines/cv_training/cv_training.snakefile +++ b/pipelines/cv_training/cv_training.snakefile @@ -18,6 +18,9 @@ rule spread_config: threads: 1 resources: mem_mb = 1024, + log: + stdout="logs/spread_config/cv_split{cv_split}.stdout", + stderr="logs/spread_config/cv_split{cv_split}.stderr" shell: ' && '.join([ conda_check, @@ -48,7 +51,9 @@ use rule link_config from deeprvat_workflow as deeprvat_link_config use rule best_training_run from deeprvat_workflow as deeprvat_best_training_run with: params: prefix = 'cv_split{cv_split}/deeprvat' - + log: + stdout="logs/best_training_run/cv_split{cv_split}_repeat_{repeat}.stdout", + stderr="logs/best_training_run/cv_split{cv_split}_repeat_{repeat}.stderr" use rule train from deeprvat_workflow as deeprvat_train with: priority: 1000 @@ -85,9 +90,9 @@ use rule config from deeprvat_workflow as deeprvat_config with: baseline_out = lambda wildcards: f'--baseline-results-out cv_split{wildcards.cv_split}/deeprvat/{wildcards.phenotype}/deeprvat/baseline_results.parquet' if wildcards.phenotype in training_phenotypes else ' ', seed_genes_out = lambda wildcards: f'--seed-genes-out cv_split{wildcards.cv_split}/deeprvat/{wildcards.phenotype}/deeprvat/seed_genes.parquet' if wildcards.phenotype in training_phenotypes else ' ', association_only = lambda wildcards: f'--association-only' if wildcards.phenotype not in training_phenotypes else ' ' + log: + stdout="logs/config/cv_split{cv_split}_{phenotype}.stdout", + stderr="logs/config/cv_split{cv_split}_{phenotype}.stderr" use rule create_main_config from deeprvat_workflow as deeprvat_create_main_config - - - diff --git a/pipelines/cv_training/cv_training_association_testing.snakefile b/pipelines/cv_training/cv_training_association_testing.snakefile index 9f507b5b..be390cea 100644 --- a/pipelines/cv_training/cv_training_association_testing.snakefile +++ b/pipelines/cv_training/cv_training_association_testing.snakefile @@ -1,13 +1,17 @@ from pathlib import Path from deeprvat.deeprvat.config import create_main_config +import logging create_main_config("deeprvat_input_config.yaml") -configfile: "deeprvat_config.yaml" +#remove duplicate logging handlers from loaded deeprvat.config module +logging.root.handlers.clear() +configfile: "deeprvat_config.yaml" conda_check = 'conda info | grep "active environment"' +logging_redirct = "1> {log.stdout} 2> {log.stderr}" #for Linux-based systems debug_flag = config.get("debug", False) phenotypes = config["phenotypes"] phenotypes = list(phenotypes.keys()) if type(phenotypes) == dict else phenotypes diff --git a/pipelines/run_training.snakefile b/pipelines/run_training.snakefile index f507853d..25a52931 100644 --- a/pipelines/run_training.snakefile +++ b/pipelines/run_training.snakefile @@ -1,10 +1,15 @@ from pathlib import Path from deeprvat.deeprvat.config import create_main_config +import logging create_main_config("deeprvat_input_training_config.yaml") +#remove duplicate logging handlers from loaded deeprvat.config module +logging.root.handlers.clear() + configfile: 'deeprvat_config.yaml' +logging_redirct = "1> {log.stdout} 2> {log.stderr}" #for Linux-based systems debug_flag = config.get('debug', False) debug = '--debug ' if debug_flag else '' deterministic_flag = config.get('deterministic', False) diff --git a/pipelines/training/config.snakefile b/pipelines/training/config.snakefile index dc93dc8a..674385bf 100644 --- a/pipelines/training/config.snakefile +++ b/pipelines/training/config.snakefile @@ -35,6 +35,9 @@ rule config: association_only=lambda wildcards: f"--association-only" if wildcards.phenotype not in training_phenotypes else " ", + log: + stdout="logs/config/config_{phenotype}.stdout", + stderr="logs/config/config_{phenotype}.stderr" shell: ( "deeprvat_config update-config " @@ -44,5 +47,6 @@ rule config: "{params.baseline_out} " "{params.seed_genes_out} " "{input.data_config} " - "{output.data_config}" + "{output.data_config} " + + logging_redirct ) diff --git a/pipelines/training/train.snakefile b/pipelines/training/train.snakefile index a9261067..1fa80b66 100644 --- a/pipelines/training/train.snakefile +++ b/pipelines/training/train.snakefile @@ -5,7 +5,7 @@ rule link_config: model_path / 'model_config.yaml' threads: 1 shell: - "ln -rfs {input} {output}" + "ln -rfs {input} {output} " # "ln -s repeat_0/model_config.yaml {output}" rule best_training_run: @@ -23,6 +23,9 @@ rule best_training_run: threads: 1 resources: mem_mb = 2048, + log: + stdout="logs/best_training_run/repeat_{repeat}.stdout", + stderr="logs/best_training_run/repeat_{repeat}.stderr" shell: ( 'deeprvat_train best-training-run ' @@ -30,7 +33,8 @@ rule best_training_run: '{params.prefix}/{model_path}/repeat_{wildcards.repeat} ' '{params.prefix}/{model_path}/repeat_{wildcards.repeat}/best ' '{params.prefix}/{model_path}/repeat_{wildcards.repeat}/hyperparameter_optimization.db ' - '{output.model_config}' + '{output.model_config} ' + + logging_redirct ) rule train: diff --git a/pipelines/training/training_dataset.snakefile b/pipelines/training/training_dataset.snakefile index a8cbc563..a8965d29 100644 --- a/pipelines/training/training_dataset.snakefile +++ b/pipelines/training/training_dataset.snakefile @@ -11,6 +11,9 @@ rule training_dataset: mem_mb=lambda wildcards, attempt: 32000 + 12000 * attempt, load=16000, priority: 5000 + log: + stdout="logs/training_dataset/{phenotype}.stdout", + stderr="logs/training_dataset/{phenotype}.stderr" shell: ( "deeprvat_train make-dataset " @@ -22,7 +25,8 @@ rule training_dataset: "{input.data_config} " "{output.input_tensor} " "{output.covariates} " - "{output.y}" + "{output.y} " + + logging_redirct ) @@ -35,11 +39,15 @@ rule training_dataset_pickle: resources: mem_mb=40000, # lambda wildcards, attempt: 38000 + 12000 * attempt load=16000, + log: + stdout="logs/training_dataset_pickle/{phenotype}.stdout", + stderr="logs/training_dataset_pickle/{phenotype}.stderr" shell: ( "deeprvat_train make-dataset " "--pickle-only " "--training-dataset-file {output} " "{input} " - "dummy dummy dummy" + "dummy dummy dummy " + + logging_redirct ) \ No newline at end of file diff --git a/pipelines/training_association_testing.snakefile b/pipelines/training_association_testing.snakefile index c90e9fc4..9a38796e 100644 --- a/pipelines/training_association_testing.snakefile +++ b/pipelines/training_association_testing.snakefile @@ -1,10 +1,15 @@ from pathlib import Path from deeprvat.deeprvat.config import create_main_config +import logging create_main_config("deeprvat_input_config.yaml") +#remove duplicate logging handlers from loaded deeprvat.config module +logging.root.handlers.clear() + configfile: 'deeprvat_config.yaml' +logging_redirct = "1> {log.stdout} 2> {log.stderr}" #for Linux-based systems debug_flag = config.get('debug', False) debug = '--debug ' if debug_flag else '' deterministic_flag = config.get('deterministic', False) diff --git a/pipelines/training_association_testing_regenie.snakefile b/pipelines/training_association_testing_regenie.snakefile index 3c5d4bdb..dad504fe 100644 --- a/pipelines/training_association_testing_regenie.snakefile +++ b/pipelines/training_association_testing_regenie.snakefile @@ -1,10 +1,15 @@ from pathlib import Path from deeprvat.deeprvat.config import create_main_config +import logging create_main_config("deeprvat_input_config.yaml") +#remove duplicate logging handlers from loaded deeprvat.config module +logging.root.handlers.clear() + configfile: 'deeprvat_config.yaml' +logging_redirct = "1> {log.stdout} 2> {log.stderr}" #for Linux-based systems debug_flag = config.get('debug', False) deterministic_flag = config.get('deterministic', False) # TODO SHOULD THIS BE HERE? deterministic = '--deterministic ' if deterministic_flag else ''