diff --git a/workflows/download.smk b/workflows/download.smk index 4795b2fa..727e7ccb 100644 --- a/workflows/download.smk +++ b/workflows/download.smk @@ -1,11 +1,13 @@ import os from shared.functions import get_git_directory + # workflow specific setting configfile: "example_configs/download_config.yaml" # listed all the available datasets here configfile: "path_configs/datasets.yaml" + print("Run Download Workflow") # Attach the specific github directory here @@ -14,6 +16,7 @@ GIT_DIR = get_git_directory(config) # Leave only datasets datasets = config.pop("datasets") + # Get all the dataset folder def get_all_input(wildcards): all_folder = [] @@ -21,8 +24,10 @@ def get_all_input(wildcards): all_folder.append(config["results_dir"] + "/" + dataset) return all_folder + ############## starting snakemake pipelines ################## + # Defining all output wanted from this snakemake rule all: input: diff --git a/workflows/example_configs/methods_config.yaml b/workflows/example_configs/methods_config.yaml index aa785a32..62f8ccb8 100644 --- a/workflows/example_configs/methods_config.yaml +++ b/workflows/example_configs/methods_config.yaml @@ -8,19 +8,19 @@ seed: 2023 # Methods to run for the pipeline, modify based on your need use_methods: - "bass" - - "BayesSpace" - - "DRSC" - - "GraphST" - - "SEDR" - - "SOTIP" - - "SpiceMix" - - "maple" - - "meringue" - - "precast" - - "SC.MEB" - - "spaGCN" - - "stardust" - - "DeepST" +# - "BayesSpace" +# - "DRSC" +# - "GraphST" +# - "SEDR" +# - "SOTIP" +# - "SpiceMix" +# - "maple" +# - "meringue" +# - "precast" +# - "SC_MEB" +# - "spaGCN" +# - "stardust" +# - "DeepST" # - "STAGATE" # res not n_clust # - "scanpy" # - "SpaceFlow" diff --git a/workflows/methods.smk b/workflows/methods.smk index 8b08146c..215782db 100644 --- a/workflows/methods.smk +++ b/workflows/methods.smk @@ -3,6 +3,7 @@ import json from shared.functions import get_git_directory, get_ncluster, get_sample_dirs + # script specific setting configfile: "example_configs/methods_config.yaml" # All methods available @@ -14,6 +15,7 @@ SEED = config["seed"] methods = config.pop("methods") + # Find the technology of the datasets from their experiments.json def get_technology(path): import json @@ -50,6 +52,7 @@ def create_input_all(wildcards): files += create_input(method) return files + rule all: input: create_input_all, @@ -61,13 +64,13 @@ def get_sample_image(wildcards): opt = json.load(file) if opt["image"]: - files = ["H_E.tiff", "H_E.png"] - for file in files: - image = config["data_dir"] + "/" + wildcards.sample + "/" + file - if os.path.isfile(image): - return "--image " + image + files = ["H_E.tiff", "H_E.png"] + for file in files: + image = config["data_dir"] + "/" + wildcards.sample + "/" + file + if os.path.isfile(image): + return "--image " + image else: - return "" + return "" def get_config_file(wildcards): @@ -84,6 +87,7 @@ def get_config_file(wildcards): ########################################################## # requirements + # Find if the method has an additional shell scripts for installation def get_requirements(wildcards): if methods[wildcards.method].get("env_additional") is not None: @@ -91,10 +95,12 @@ def get_requirements(wildcards): else: return [] + # if additional scripts are found, go through this process before generating the results rule installation_requirements: params: - install_script=lambda wildcards: GIT_DIR + methods[wildcards.method]["env_additional"], + install_script=lambda wildcards: GIT_DIR + + methods[wildcards.method]["env_additional"], output: temp("{method}_requirements.info"), conda: @@ -108,11 +114,13 @@ rule installation_requirements: ########################################################## # methods + # Get optargs options based on optargs files def get_optargs(wildcards): with open(GIT_DIR + methods[wildcards.method]["optargs"], "r") as file: opt = json.load(file) - return(opt) + return opt + # Get matrix in the input session def get_matrix_input(wildcards): @@ -122,29 +130,36 @@ def get_matrix_input(wildcards): # Find preprocessing steps match opt["matrix"]: case "counts": - matrix_input=config["data_dir"] + f"/{wildcards.sample}/counts.mtx" + matrix_input = config["data_dir"] + f"/{wildcards.sample}/counts.mtx" case "transform": - matrix_input=config["data_dir"] + f"/{wildcards.sample}/log1p/counts.mtx" + matrix_input = config["data_dir"] + f"/{wildcards.sample}/log1p/counts.mtx" case "dimensionality_reduction": - matrix_input=config["data_dir"] + f"/{wildcards.sample}/log1p/hvg/pca_20/dimensionality_reduction.tsv" + matrix_input = ( + config["data_dir"] + + f"/{wildcards.sample}/log1p/hvg/pca_20/dimensionality_reduction.tsv" + ) if matrix_input == []: - raise(ValueError("no valid matrix option! Check your optargs.json file!")) + raise (ValueError("no valid matrix option! Check your optargs.json file!")) return matrix_input + # Get features def get_feature_input(wildcards): opt = get_optargs(wildcards) # feature input option if opt["integrated_feature_selection"]: - feature_input=config["data_dir"] + f"/{wildcards.sample}/log1p/hvg/features.tsv" + feature_input = ( + config["data_dir"] + f"/{wildcards.sample}/log1p/hvg/features.tsv" + ) else: - feature_input=config["data_dir"] + f"/{wildcards.sample}/features.tsv" + feature_input = config["data_dir"] + f"/{wildcards.sample}/features.tsv" return feature_input + # Get neighbors def get_neighbor_input(wildcards): opt = get_optargs(wildcards) @@ -152,10 +167,14 @@ def get_neighbor_input(wildcards): neighbor_input = [] # feature input option if opt["neighbors"]: - neighbor_input=config["data_dir"] + f"/{wildcards.sample}/delaunay_triangulation/spatial_connectivities.mtx" + neighbor_input = ( + config["data_dir"] + + f"/{wildcards.sample}/delaunay_triangulation/spatial_connectivities.mtx" + ) return neighbor_input + rule method_with_config: input: coordinates=config["data_dir"] + "/{sample}/coordinates.tsv", @@ -168,16 +187,20 @@ rule method_with_config: dir=directory(config["data_dir"] + "/{sample}/{method}/{config_file_name}"), file=config["data_dir"] + "/{sample}/{method}/{config_file_name}/domains.tsv", params: - matrix=lambda wildcards: "-m " if get_optargs(wildcards)["matrix"]!="dimensionality_reduction" else "--dim_red ", + matrix=lambda wildcards: ( + "-m " + if get_optargs(wildcards)["matrix"] != "dimensionality_reduction" + else "--dim_red " + ), neighbors=lambda wildcards: "-n " if get_optargs(wildcards)["neighbors"] else "", n_clusters=lambda wildcards: get_ncluster( config["data_dir"] + "/samples.tsv", wildcards.sample - ), + ), technology=TECHNOLOGY, seed=SEED, configfile=get_config_file, image=get_sample_image, - script=lambda wildcards: GIT_DIR + methods[wildcards.method]["script"] + script=lambda wildcards: GIT_DIR + methods[wildcards.method]["script"], conda: lambda wildcards: GIT_DIR + methods[wildcards.method]["env"] wildcard_constraints: @@ -198,6 +221,7 @@ rule method_with_config: --config {params.configfile} """ + rule method_without_config: input: coordinates=config["data_dir"] + "/{sample}/coordinates.tsv", @@ -210,14 +234,17 @@ rule method_without_config: dir=directory(config["data_dir"] + "/{sample}/{method}"), file=config["data_dir"] + "/{sample}/{method}/domains.tsv", params: - matrix=lambda wildcards: "-m " if get_optargs(wildcards)["matrix"]!="dimensionality_reduction" else "--dim_red ", + matrix=lambda wildcards: ( + "-m " + if get_optargs(wildcards)["matrix"] != "dimensionality_reduction" + else "--dim_red " + ), neighbors=lambda wildcards: "-n " if get_optargs(wildcards)["neighbors"] else "", n_clusters=lambda wildcards: get_ncluster( config["data_dir"] + "/samples.tsv", wildcards.sample - ), + ), technology=TECHNOLOGY, seed=SEED, - configfile=get_config_file, image=get_sample_image, script=lambda wildcards: GIT_DIR + methods[wildcards.method]["script"], conda: @@ -237,4 +264,4 @@ rule method_without_config: --n_clusters {params.n_clusters} \ --technology {params.technology} \ --seed {params.seed} - """ \ No newline at end of file + """ diff --git a/workflows/metrics.smk b/workflows/metrics.smk index 5379c1f8..19f846a1 100644 --- a/workflows/metrics.smk +++ b/workflows/metrics.smk @@ -3,12 +3,14 @@ import json from shared.functions import check_files_in_folder, get_git_directory, get_sample_dirs + # this specific pipeline setting configfile: "example_configs/metrics_config.yaml" # All methods and metrics available configfile: "path_configs/metrics.yaml" configfile: "path_configs/methods.yaml" + GIT_DIR = get_git_directory(config) # Get all the methods and metrics that's being used @@ -16,10 +18,8 @@ metrics = config["metrics"] methods = list(config["methods"].keys()) -def generate_metrics_results( - data_dir, metrics_name, methods, file_ext -): - # getting metrics optargs.json file +def generate_metrics_results(data_dir, metrics_name, methods, file_ext): + # getting metrics optargs.json file with open(GIT_DIR + metrics[metrics_name]["optargs"], "r") as file: opt = json.load(file) @@ -28,7 +28,7 @@ def generate_metrics_results( for sample_dir in get_sample_dirs(data_dir): # Check if ground truth is needed if opt["groundtruth"] and "labels.tsv" not in os.listdir(sample_dir): - continue + continue # Check all method results for method in methods: @@ -41,8 +41,10 @@ def generate_metrics_results( ) # method config directory for dir_to_check in dirs_to_check: - # Check if embedding is needed - if opt["embedding"] and "embedding.tsv" not in os.listdir(os.path.join(method_dir, dir_to_check)): + # Check if embedding is needed + if opt["embedding"] and "embedding.tsv" not in os.listdir( + os.path.join(method_dir, dir_to_check) + ): continue # Check if results exist @@ -51,7 +53,11 @@ def generate_metrics_results( ): # Metric config directory - config_files = config["config_files"][metrics_name].keys() if opt["config_file"] else [""] + config_files = ( + config["config_files"][metrics_name].keys() + if opt["config_file"] + else [""] + ) # Generating final metric results path for config_file_name in config_files: @@ -71,8 +77,10 @@ def generate_all_input(wildcards): all_input = [] for metric in config["use_metrics"]: all_input += generate_metrics_results( - data_dir=config["data_dir"], metrics_name=metric, - methods=methods, file_ext="txt" + data_dir=config["data_dir"], + metrics_name=metric, + methods=methods, + file_ext="txt", ) return all_input @@ -86,12 +94,13 @@ def get_metric(wildcards): # Trim metric_config if it has config path to it metric = wildcards.metric_config if "config" in metric: - metric = metric[:metric.find("/")] + metric = metric[: metric.find("/")] + + return metric - return(metric) def get_sample_labels(wildcards): - # getting metrics optargs.json file + # getting metrics optargs.json file metric = get_metric(wildcards) with open(GIT_DIR + metrics[metric]["optargs"], "r") as file: opt = json.load(file) @@ -123,6 +132,7 @@ def get_method_embedding(wildcards): else: return "" + def get_metric_config(wildcards): # getting metrics optargs.json file metric = get_metric(wildcards) @@ -130,13 +140,21 @@ def get_metric_config(wildcards): opt = json.load(file) if opt["config_file"]: - config_key = wildcards.metric_config[wildcards.metric_config.find("/")+1: ] - if len(config)==0: + config_key = wildcards.metric_config[wildcards.metric_config.find("/") + 1 :] + if len(config) == 0: stop("Wrong optargs or no config folder found") - return "-c " + GIT_DIR + "metric/" + metric + "/" + config["config_files"][metric][config_key] + return ( + "-c " + + GIT_DIR + + "metric/" + + metric + + "/" + + config["config_files"][metric][config_key] + ) else: return "" + def get_sample_coordinate(wildcards): # getting metrics optargs.json file metric = get_metric(wildcards) @@ -145,17 +163,23 @@ def get_sample_coordinate(wildcards): if "physical_coordinate" in opt.keys(): if opt["physical_coordinate"]: - return "--coordinates " + config["data_dir"] + f"/{wildcards.sample}/coordinates.tsv" + return ( + "--coordinates " + + config["data_dir"] + + f"/{wildcards.sample}/coordinates.tsv" + ) else: return "" else: return "" + rule metric: input: domains=config["data_dir"] + "/{sample}/{method_config}/domains.tsv", output: - file=config["data_dir"] + "/{sample}/{method_config}/{metric_config}/results.txt", + file=config["data_dir"] + + "/{sample}/{method_config}/{metric_config}/results.txt", wildcard_constraints: sample="[a-zA-Z0-9_-]+", method_config="[a-zA-Z0-9_-]+(\/config_[a-zA-Z0-9_-]+)?", @@ -166,8 +190,8 @@ rule metric: sample_labels=get_sample_labels, embeddings=get_method_embedding, config=get_metric_config, - script=lambda wildcards:GIT_DIR + metrics[get_metric(wildcards)]["script"], - physical_coordinate=get_sample_coordinate + script=lambda wildcards: GIT_DIR + metrics[get_metric(wildcards)]["script"], + physical_coordinate=get_sample_coordinate, shell: """ {params.script} \