Skip to content

Commit

Permalink
formated snakeameke files
Browse files Browse the repository at this point in the history
  • Loading branch information
Jieran-S committed Feb 23, 2024
1 parent 1da98d2 commit 350b8ba
Show file tree
Hide file tree
Showing 4 changed files with 111 additions and 55 deletions.
5 changes: 5 additions & 0 deletions workflows/download.smk
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
import os
from shared.functions import get_git_directory


# workflow specific setting
configfile: "example_configs/download_config.yaml"
# listed all the available datasets here
configfile: "path_configs/datasets.yaml"


print("Run Download Workflow")

# Attach the specific github directory here
Expand All @@ -14,15 +16,18 @@ GIT_DIR = get_git_directory(config)
# Leave only datasets
datasets = config.pop("datasets")


# Get all the dataset folder
def get_all_input(wildcards):
all_folder = []
for dataset in config["use_datasets"]:
all_folder.append(config["results_dir"] + "/" + dataset)
return all_folder


############## starting snakemake pipelines ##################


# Defining all output wanted from this snakemake
rule all:
input:
Expand Down
26 changes: 13 additions & 13 deletions workflows/example_configs/methods_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,19 @@ seed: 2023
# Methods to run for the pipeline, modify based on your need
use_methods:
- "bass"
- "BayesSpace"
- "DRSC"
- "GraphST"
- "SEDR"
- "SOTIP"
- "SpiceMix"
- "maple"
- "meringue"
- "precast"
- "SC.MEB"
- "spaGCN"
- "stardust"
- "DeepST"
# - "BayesSpace"
# - "DRSC"
# - "GraphST"
# - "SEDR"
# - "SOTIP"
# - "SpiceMix"
# - "maple"
# - "meringue"
# - "precast"
# - "SC_MEB"
# - "spaGCN"
# - "stardust"
# - "DeepST"
# - "STAGATE" # res not n_clust
# - "scanpy"
# - "SpaceFlow"
Expand Down
71 changes: 49 additions & 22 deletions workflows/methods.smk
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import json

from shared.functions import get_git_directory, get_ncluster, get_sample_dirs


# script specific setting
configfile: "example_configs/methods_config.yaml"
# All methods available
Expand All @@ -14,6 +15,7 @@ SEED = config["seed"]

methods = config.pop("methods")


# Find the technology of the datasets from their experiments.json
def get_technology(path):
import json
Expand Down Expand Up @@ -50,6 +52,7 @@ def create_input_all(wildcards):
files += create_input(method)
return files


rule all:
input:
create_input_all,
Expand All @@ -61,13 +64,13 @@ def get_sample_image(wildcards):
opt = json.load(file)

if opt["image"]:
files = ["H_E.tiff", "H_E.png"]
for file in files:
image = config["data_dir"] + "/" + wildcards.sample + "/" + file
if os.path.isfile(image):
return "--image " + image
files = ["H_E.tiff", "H_E.png"]
for file in files:
image = config["data_dir"] + "/" + wildcards.sample + "/" + file
if os.path.isfile(image):
return "--image " + image
else:
return ""
return ""


def get_config_file(wildcards):
Expand All @@ -84,17 +87,20 @@ def get_config_file(wildcards):
##########################################################
# requirements


# Find if the method has an additional shell scripts for installation
def get_requirements(wildcards):
if methods[wildcards.method].get("env_additional") is not None:
return f"{wildcards.method}_requirements.info"
else:
return []


# if additional scripts are found, go through this process before generating the results
rule installation_requirements:
params:
install_script=lambda wildcards: GIT_DIR + methods[wildcards.method]["env_additional"],
install_script=lambda wildcards: GIT_DIR
+ methods[wildcards.method]["env_additional"],
output:
temp("{method}_requirements.info"),
conda:
Expand All @@ -108,11 +114,13 @@ rule installation_requirements:
##########################################################
# methods


# Get optargs options based on optargs files
def get_optargs(wildcards):
with open(GIT_DIR + methods[wildcards.method]["optargs"], "r") as file:
opt = json.load(file)
return(opt)
return opt


# Get matrix in the input session
def get_matrix_input(wildcards):
Expand All @@ -122,40 +130,51 @@ def get_matrix_input(wildcards):
# Find preprocessing steps
match opt["matrix"]:
case "counts":
matrix_input=config["data_dir"] + f"/{wildcards.sample}/counts.mtx"
matrix_input = config["data_dir"] + f"/{wildcards.sample}/counts.mtx"
case "transform":
matrix_input=config["data_dir"] + f"/{wildcards.sample}/log1p/counts.mtx"
matrix_input = config["data_dir"] + f"/{wildcards.sample}/log1p/counts.mtx"
case "dimensionality_reduction":
matrix_input=config["data_dir"] + f"/{wildcards.sample}/log1p/hvg/pca_20/dimensionality_reduction.tsv"
matrix_input = (
config["data_dir"]
+ f"/{wildcards.sample}/log1p/hvg/pca_20/dimensionality_reduction.tsv"
)

if matrix_input == []:
raise(ValueError("no valid matrix option! Check your optargs.json file!"))
raise (ValueError("no valid matrix option! Check your optargs.json file!"))

return matrix_input


# Get features
def get_feature_input(wildcards):
opt = get_optargs(wildcards)

# feature input option
if opt["integrated_feature_selection"]:
feature_input=config["data_dir"] + f"/{wildcards.sample}/log1p/hvg/features.tsv"
feature_input = (
config["data_dir"] + f"/{wildcards.sample}/log1p/hvg/features.tsv"
)
else:
feature_input=config["data_dir"] + f"/{wildcards.sample}/features.tsv"
feature_input = config["data_dir"] + f"/{wildcards.sample}/features.tsv"

return feature_input


# Get neighbors
def get_neighbor_input(wildcards):
opt = get_optargs(wildcards)

neighbor_input = []
# feature input option
if opt["neighbors"]:
neighbor_input=config["data_dir"] + f"/{wildcards.sample}/delaunay_triangulation/spatial_connectivities.mtx"
neighbor_input = (
config["data_dir"]
+ f"/{wildcards.sample}/delaunay_triangulation/spatial_connectivities.mtx"
)

return neighbor_input


rule method_with_config:
input:
coordinates=config["data_dir"] + "/{sample}/coordinates.tsv",
Expand All @@ -168,16 +187,20 @@ rule method_with_config:
dir=directory(config["data_dir"] + "/{sample}/{method}/{config_file_name}"),
file=config["data_dir"] + "/{sample}/{method}/{config_file_name}/domains.tsv",
params:
matrix=lambda wildcards: "-m " if get_optargs(wildcards)["matrix"]!="dimensionality_reduction" else "--dim_red ",
matrix=lambda wildcards: (
"-m "
if get_optargs(wildcards)["matrix"] != "dimensionality_reduction"
else "--dim_red "
),
neighbors=lambda wildcards: "-n " if get_optargs(wildcards)["neighbors"] else "",
n_clusters=lambda wildcards: get_ncluster(
config["data_dir"] + "/samples.tsv", wildcards.sample
),
),
technology=TECHNOLOGY,
seed=SEED,
configfile=get_config_file,
image=get_sample_image,
script=lambda wildcards: GIT_DIR + methods[wildcards.method]["script"]
script=lambda wildcards: GIT_DIR + methods[wildcards.method]["script"],
conda:
lambda wildcards: GIT_DIR + methods[wildcards.method]["env"]
wildcard_constraints:
Expand All @@ -198,6 +221,7 @@ rule method_with_config:
--config {params.configfile}
"""


rule method_without_config:
input:
coordinates=config["data_dir"] + "/{sample}/coordinates.tsv",
Expand All @@ -210,14 +234,17 @@ rule method_without_config:
dir=directory(config["data_dir"] + "/{sample}/{method}"),
file=config["data_dir"] + "/{sample}/{method}/domains.tsv",
params:
matrix=lambda wildcards: "-m " if get_optargs(wildcards)["matrix"]!="dimensionality_reduction" else "--dim_red ",
matrix=lambda wildcards: (
"-m "
if get_optargs(wildcards)["matrix"] != "dimensionality_reduction"
else "--dim_red "
),
neighbors=lambda wildcards: "-n " if get_optargs(wildcards)["neighbors"] else "",
n_clusters=lambda wildcards: get_ncluster(
config["data_dir"] + "/samples.tsv", wildcards.sample
),
),
technology=TECHNOLOGY,
seed=SEED,
configfile=get_config_file,
image=get_sample_image,
script=lambda wildcards: GIT_DIR + methods[wildcards.method]["script"],
conda:
Expand All @@ -237,4 +264,4 @@ rule method_without_config:
--n_clusters {params.n_clusters} \
--technology {params.technology} \
--seed {params.seed}
"""
"""
Loading

0 comments on commit 350b8ba

Please sign in to comment.