From 216f309f0b933e2c6626d5b275d3089ff32e28c2 Mon Sep 17 00:00:00 2001 From: shdam Date: Thu, 14 Dec 2023 09:50:28 +0000 Subject: [PATCH 1/5] implemented --- method/BANKSY/banksy.r | 199 +++++++++++++++++++++++++++++ method/BANKSY/banksy.yml | 12 ++ method/BANKSY/banksy_env.sh | 13 ++ method/BANKSY/config/config_1.json | 1 + 4 files changed, 225 insertions(+) create mode 100755 method/BANKSY/banksy.r create mode 100644 method/BANKSY/banksy.yml create mode 100644 method/BANKSY/banksy_env.sh create mode 100644 method/BANKSY/config/config_1.json diff --git a/method/BANKSY/banksy.r b/method/BANKSY/banksy.r new file mode 100755 index 00000000..73f08da4 --- /dev/null +++ b/method/BANKSY/banksy.r @@ -0,0 +1,199 @@ +#!/usr/bin/env Rscript + +# Author_and_contribution: Niklas Mueller-Boetticher; created template +# Author_and_contribution: Søren Helweg Dam; implemented method + +suppressPackageStartupMessages({ + library(optparse) + library(jsonlite) + library(SpatialExperiment) + library(Banksy) +}) + +option_list <- list( + make_option( + c("-c", "--coordinates"), + type = "character", default = NULL, + help = "Path to coordinates (as tsv)." + ), + make_option( + c("-m", "--matrix"), + type = "character", default = NA, + help = "Path to (transformed) counts (as mtx)." + ), + make_option( + c("-f", "--features"), + type = "character", default = NULL, + help = "Path to features (as tsv)." + ), + make_option( + c("-o", "--observations"), + type = "character", default = NULL, + help = "Path to observations (as tsv)." + ), + make_option( + c("-n", "--neighbors"), + type = "character", default = NA, + help = "Path to neighbor definitions. Square matrix (not necessarily symmetric) where each row contains the neighbors of this observation (as mtx)." + ), + make_option( + c("-d", "--out_dir"), + type = "character", default = NULL, + help = "Output directory." + ), + make_option( + c("--dim_red"), + type = "character", default = NA, + help = "Reduced dimensionality representation (e.g. PCA)." + ), + make_option( + c("--image"), + type = "character", default = NA, + help = "Path to H&E staining." + ), + make_option( + c("--n_clusters"), + type = "integer", default = NULL, + help = "Number of clusters to return." + ), + make_option( + c("--technology"), + type = "character", default = NULL, + help = "The technology of the dataset (Visium, ST, imaging-based)." + ), + make_option( + c("--seed"), + type = "integer", default = NULL, + help = "Seed to use for random operations." + ), + make_option( + c("--config"), + type = "character", default = NA, + help = "Optional config file (json) used to pass additional parameters." + ) +) + +description <- "BANKSY cluster cluster cells in a feature space" + +opt_parser <- OptionParser( + usage = description, + option_list = option_list +) +opt <- parse_args(opt_parser) + +out_dir <- opt$out_dir + +# Output files +label_file <- file.path(out_dir, "domains.tsv") +embedding_file <- file.path(out_dir, "embedding.tsv") +# if additional output files are required write it also to out_dir + +# Use these filepaths as input ... +coord_file <- opt$coordinates +feature_file <- opt$features +observation_file <- opt$observations + +if (!is.na(opt$neighbors)) { + neighbors_file <- opt$neighbors +} +if (!is.na(opt$matrix)) { + matrix_file <- opt$matrix +} +if (!is.na(opt$dim_red)) { + dimred_file <- opt$dim_red +} +if (!is.na(opt$image)) { + image_file <- opt$image +} +if (!is.na(opt$config)) { + config_file <- opt$config + config <- fromJSON(config_file) +} + +technology <- opt$technology +n_clusters <- opt$n_clusters + +# You can get SpatialExperiment directly +get_SpatialExperiment <- function( + feature_file, + observation_file, + coord_file, + matrix_file = NA, + reducedDim_file = NA, + assay_name = "counts", + reducedDim_name = "reducedDim") { + rowData <- read.delim(feature_file, stringsAsFactors = FALSE, row.names = 1) + colData <- read.delim(observation_file, stringsAsFactors = FALSE, row.names = 1) + + coordinates <- read.delim(coord_file, sep = "\t", row.names = 1) + coordinates <- as.matrix(coordinates[rownames(colData), ]) + coordinates[,c(1:2)] <- as.numeric(coordinates[,c(1:2)]) + + spe <- SpatialExperiment::SpatialExperiment( + rowData = rowData, colData = colData, spatialCoords = coordinates + ) + + if (!is.na(matrix_file)) { + assay(spe, assay_name, withDimnames = FALSE) <- as(Matrix::t(Matrix::readMM(matrix_file)), "CsparseMatrix") + assay(spe, "logcounts", withDimnames = FALSE) <- as(Matrix::t(Matrix::readMM(matrix_file)), "CsparseMatrix") + } + + # Filter features and samples + if ("selected" %in% colnames(rowData(spe))) { + spe <- spe[as.logical(rowData(spe)$selected), ] + } + if ("selected" %in% colnames(colData(spe))) { + spe <- spe[, as.logical(colData(spe)$selected)] + } + + if (!is.na(reducedDim_file)) { + dimRed <- read.delim(reducedDim_file, stringsAsFactors = FALSE, row.names = 1) + reducedDim(spe, reducedDim_name) <- as.matrix(dimRed[colnames(spe), ]) + } + return(spe) +} + + +# Seed +seed <- opt$seed +set.seed(seed) + +# You can use the data as SpatialExperiment +spe <- get_SpatialExperiment(feature_file = feature_file, observation_file = observation_file, + coord_file = coord_file,matrix_file = matrix_file) + + +## Your code goes here +lambda <- config$lambda +k_geom <- config$k_geom +npcs <- config$npcs +resolution <- config$resolution +method <- config$method +#lambda <- 0.8 +#k_geom <- 30 +#npcs <- 50 +#resolution <- 0.8 +#method <- "leiden" +assay_name <- "counts" +spe <- Banksy::computeBanksy(spe, assay_name = assay_name, k_geom = k_geom) +set.seed(seed) +spe <- Banksy::runBanksyPCA(spe, lambda = lambda, npcs = npcs) +spe <- Banksy::clusterBanksy(spe, lambda = lambda, npcs = npcs, resolution = resolution, seed = seed, method = method) + + +# The data.frames with observations may contain a column "selected" which you need to use to +# subset and also use to subset coordinates, neighbors, (transformed) count matrix +#cnames <- colnames(colData(spe)) +label_df <- data.frame("label" = colData(spe)[, clusterNames(spe)], row.names=colnames(spe)) # data.frame with row.names (cell-id/barcode) and 1 column (label) +embedding_df <- as.data.frame(reducedDims(spe)[[1]]) # optional, data.frame with row.names (cell-id/barcode) and n columns + + +## Write output +dir.create(out_dir, showWarnings = FALSE, recursive = TRUE) + +colnames(label_df) <- c("label") +write.table(label_df, file = label_file, sep = "\t", col.names = NA, quote = FALSE) + +if (exists("embedding_df")) { + write.table(embedding_df, file = embedding_file, sep = "\t", col.names = NA, quote = FALSE) +} diff --git a/method/BANKSY/banksy.yml b/method/BANKSY/banksy.yml new file mode 100644 index 00000000..88d5aef2 --- /dev/null +++ b/method/BANKSY/banksy.yml @@ -0,0 +1,12 @@ +name: banksy_env +channels: + - conda-forge + - bioconda +dependencies: + - r-base=4.3.2 + - r-optparse=1.7.3 + - r-remotes=2.4.2 + - r-jsonlite=1.8.8 + - r-biocmanager + #- bioconductor-spatialexperiment=1.12.0 + - leidenAlg=0.10.1 \ No newline at end of file diff --git a/method/BANKSY/banksy_env.sh b/method/BANKSY/banksy_env.sh new file mode 100644 index 00000000..742f9085 --- /dev/null +++ b/method/BANKSY/banksy_env.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +# Create the BANKSY conda environment named banksy_env +conda env create -f banksy.yml + +# Activate the environment +conda activate banksy_env + +# Install the required R packages +Rscript -e "remotes::install_github('prabhakarlab/Banksy', dependencies = TRUE, ref = 'b1a2c8bb2af06346f303637b9bba18faa1a1fe32')" + + + diff --git a/method/BANKSY/config/config_1.json b/method/BANKSY/config/config_1.json new file mode 100644 index 00000000..b5213706 --- /dev/null +++ b/method/BANKSY/config/config_1.json @@ -0,0 +1 @@ +{"method": "leiden", "lambda": 0.8, "k_geom": 50, "npcs": 50, "resolution": 0.8} \ No newline at end of file From 249af142e0715fd661c93539fe47e484fbae51f4 Mon Sep 17 00:00:00 2001 From: shdam Date: Thu, 14 Dec 2023 09:52:09 +0000 Subject: [PATCH 2/5] minor adjustment to env --- method/BANKSY/banksy.r | 5 ----- method/BANKSY/banksy.yml | 2 +- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/method/BANKSY/banksy.r b/method/BANKSY/banksy.r index 73f08da4..aa4ec304 100755 --- a/method/BANKSY/banksy.r +++ b/method/BANKSY/banksy.r @@ -169,11 +169,6 @@ k_geom <- config$k_geom npcs <- config$npcs resolution <- config$resolution method <- config$method -#lambda <- 0.8 -#k_geom <- 30 -#npcs <- 50 -#resolution <- 0.8 -#method <- "leiden" assay_name <- "counts" spe <- Banksy::computeBanksy(spe, assay_name = assay_name, k_geom = k_geom) set.seed(seed) diff --git a/method/BANKSY/banksy.yml b/method/BANKSY/banksy.yml index 88d5aef2..2d1e9615 100644 --- a/method/BANKSY/banksy.yml +++ b/method/BANKSY/banksy.yml @@ -8,5 +8,5 @@ dependencies: - r-remotes=2.4.2 - r-jsonlite=1.8.8 - r-biocmanager - #- bioconductor-spatialexperiment=1.12.0 + - bioconductor-spatialexperiment=1.12.0 - leidenAlg=0.10.1 \ No newline at end of file From 86f82c6556142d2046bca31bd243c3e27bb6d0df Mon Sep 17 00:00:00 2001 From: shdam Date: Thu, 14 Dec 2023 10:08:20 +0000 Subject: [PATCH 3/5] minor adjustments to how banksy is run --- method/BANKSY/banksy.r | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/method/BANKSY/banksy.r b/method/BANKSY/banksy.r index aa4ec304..94b75359 100755 --- a/method/BANKSY/banksy.r +++ b/method/BANKSY/banksy.r @@ -73,7 +73,7 @@ option_list <- list( ) ) -description <- "BANKSY cluster cluster cells in a feature space" +description <- "BANKSY cluster cells in a feature space" opt_parser <- OptionParser( usage = description, @@ -169,11 +169,12 @@ k_geom <- config$k_geom npcs <- config$npcs resolution <- config$resolution method <- config$method -assay_name <- "counts" +assay_name <- "logcounts" spe <- Banksy::computeBanksy(spe, assay_name = assay_name, k_geom = k_geom) set.seed(seed) spe <- Banksy::runBanksyPCA(spe, lambda = lambda, npcs = npcs) -spe <- Banksy::clusterBanksy(spe, lambda = lambda, npcs = npcs, resolution = resolution, seed = seed, method = method) +spe <- Banksy::clusterBanksy(spe, lambda = lambda, npcs = npcs, resolution = resolution, seed = seed, method = method, + dimred = reducedDimNames(spe)) # The data.frames with observations may contain a column "selected" which you need to use to From 8e0198d689752b73fb1f0828efc712f018cb0e19 Mon Sep 17 00:00:00 2001 From: shdam Date: Thu, 14 Dec 2023 10:16:35 +0000 Subject: [PATCH 4/5] add embedding --- method/BANKSY/banksy.r | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/method/BANKSY/banksy.r b/method/BANKSY/banksy.r index 94b75359..4e3c6a62 100755 --- a/method/BANKSY/banksy.r +++ b/method/BANKSY/banksy.r @@ -156,7 +156,6 @@ get_SpatialExperiment <- function( # Seed seed <- opt$seed -set.seed(seed) # You can use the data as SpatialExperiment spe <- get_SpatialExperiment(feature_file = feature_file, observation_file = observation_file, @@ -170,18 +169,18 @@ npcs <- config$npcs resolution <- config$resolution method <- config$method assay_name <- "logcounts" +set.seed(seed) spe <- Banksy::computeBanksy(spe, assay_name = assay_name, k_geom = k_geom) set.seed(seed) spe <- Banksy::runBanksyPCA(spe, lambda = lambda, npcs = npcs) -spe <- Banksy::clusterBanksy(spe, lambda = lambda, npcs = npcs, resolution = resolution, seed = seed, method = method, - dimred = reducedDimNames(spe)) +spe <- Banksy::clusterBanksy(spe, lambda = lambda, use_pcs = TRUE, npcs = npcs, resolution = resolution, seed = seed, method = method) # The data.frames with observations may contain a column "selected" which you need to use to # subset and also use to subset coordinates, neighbors, (transformed) count matrix #cnames <- colnames(colData(spe)) label_df <- data.frame("label" = colData(spe)[, clusterNames(spe)], row.names=colnames(spe)) # data.frame with row.names (cell-id/barcode) and 1 column (label) -embedding_df <- as.data.frame(reducedDims(spe)[[1]]) # optional, data.frame with row.names (cell-id/barcode) and n columns +embedding_df <- as.data.frame(t(assay(spe, "H1"))) # optional, data.frame with row.names (cell-id/barcode) and n columns ## Write output From 95f83d79bb0782b3a59b36d4510c552aad55e506 Mon Sep 17 00:00:00 2001 From: shdam Date: Thu, 14 Dec 2023 10:35:55 +0000 Subject: [PATCH 5/5] removed a set.seed and added release tag --- method/BANKSY/banksy.r | 1 - method/BANKSY/banksy_env.sh | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/method/BANKSY/banksy.r b/method/BANKSY/banksy.r index 4e3c6a62..27824ec6 100755 --- a/method/BANKSY/banksy.r +++ b/method/BANKSY/banksy.r @@ -171,7 +171,6 @@ method <- config$method assay_name <- "logcounts" set.seed(seed) spe <- Banksy::computeBanksy(spe, assay_name = assay_name, k_geom = k_geom) -set.seed(seed) spe <- Banksy::runBanksyPCA(spe, lambda = lambda, npcs = npcs) spe <- Banksy::clusterBanksy(spe, lambda = lambda, use_pcs = TRUE, npcs = npcs, resolution = resolution, seed = seed, method = method) diff --git a/method/BANKSY/banksy_env.sh b/method/BANKSY/banksy_env.sh index 742f9085..58d1389c 100644 --- a/method/BANKSY/banksy_env.sh +++ b/method/BANKSY/banksy_env.sh @@ -7,7 +7,7 @@ conda env create -f banksy.yml conda activate banksy_env # Install the required R packages -Rscript -e "remotes::install_github('prabhakarlab/Banksy', dependencies = TRUE, ref = 'b1a2c8bb2af06346f303637b9bba18faa1a1fe32')" +Rscript -e "remotes::install_github('prabhakarlab/Banksy@v0.1.5', dependencies = TRUE)"