From 216f309f0b933e2c6626d5b275d3089ff32e28c2 Mon Sep 17 00:00:00 2001
From: shdam <sohdam@dtu.dk>
Date: Thu, 14 Dec 2023 09:50:28 +0000
Subject: [PATCH 1/5] implemented

---
 method/BANKSY/banksy.r             | 199 +++++++++++++++++++++++++++++
 method/BANKSY/banksy.yml           |  12 ++
 method/BANKSY/banksy_env.sh        |  13 ++
 method/BANKSY/config/config_1.json |   1 +
 4 files changed, 225 insertions(+)
 create mode 100755 method/BANKSY/banksy.r
 create mode 100644 method/BANKSY/banksy.yml
 create mode 100644 method/BANKSY/banksy_env.sh
 create mode 100644 method/BANKSY/config/config_1.json

diff --git a/method/BANKSY/banksy.r b/method/BANKSY/banksy.r
new file mode 100755
index 00000000..73f08da4
--- /dev/null
+++ b/method/BANKSY/banksy.r
@@ -0,0 +1,199 @@
+#!/usr/bin/env Rscript
+
+# Author_and_contribution: Niklas Mueller-Boetticher; created template
+# Author_and_contribution: Søren Helweg Dam; implemented method
+
+suppressPackageStartupMessages({
+    library(optparse)
+    library(jsonlite)
+    library(SpatialExperiment)
+    library(Banksy)
+})
+
+option_list <- list(
+  make_option(
+    c("-c", "--coordinates"),
+    type = "character", default = NULL,
+    help = "Path to coordinates (as tsv)."
+  ),
+  make_option(
+    c("-m", "--matrix"),
+    type = "character", default = NA,
+    help = "Path to (transformed) counts (as mtx)."
+  ),
+  make_option(
+    c("-f", "--features"),
+    type = "character", default = NULL,
+    help = "Path to features (as tsv)."
+  ),
+  make_option(
+    c("-o", "--observations"),
+    type = "character", default = NULL,
+    help = "Path to observations (as tsv)."
+  ),
+  make_option(
+    c("-n", "--neighbors"),
+    type = "character", default = NA,
+    help = "Path to neighbor definitions. Square matrix (not necessarily symmetric) where each row contains the neighbors of this observation (as mtx)."
+  ),
+  make_option(
+    c("-d", "--out_dir"),
+    type = "character", default = NULL,
+    help = "Output directory."
+  ),
+  make_option(
+    c("--dim_red"),
+    type = "character", default = NA,
+    help = "Reduced dimensionality representation (e.g. PCA)."
+  ),
+  make_option(
+    c("--image"),
+    type = "character", default = NA,
+    help = "Path to H&E staining."
+  ),
+  make_option(
+    c("--n_clusters"),
+    type = "integer", default = NULL,
+    help = "Number of clusters to return."
+  ),
+  make_option(
+    c("--technology"),
+    type = "character", default = NULL,
+    help = "The technology of the dataset (Visium, ST, imaging-based)."
+  ),
+  make_option(
+    c("--seed"),
+    type = "integer", default = NULL,
+    help = "Seed to use for random operations."
+  ),
+  make_option(
+    c("--config"),
+    type = "character", default = NA,
+    help = "Optional config file (json) used to pass additional parameters."
+  )
+)
+
+description <- "BANKSY cluster cluster cells in a feature space"
+
+opt_parser <- OptionParser(
+  usage = description,
+  option_list = option_list
+)
+opt <- parse_args(opt_parser)
+
+out_dir <- opt$out_dir
+
+# Output files
+label_file <- file.path(out_dir, "domains.tsv")
+embedding_file <- file.path(out_dir, "embedding.tsv")
+# if additional output files are required write it also to out_dir
+
+# Use these filepaths as input ...
+coord_file <- opt$coordinates
+feature_file <- opt$features
+observation_file <- opt$observations
+
+if (!is.na(opt$neighbors)) {
+  neighbors_file <- opt$neighbors
+}
+if (!is.na(opt$matrix)) {
+  matrix_file <- opt$matrix
+}
+if (!is.na(opt$dim_red)) {
+  dimred_file <- opt$dim_red
+}
+if (!is.na(opt$image)) {
+  image_file <- opt$image
+}
+if (!is.na(opt$config)) {
+  config_file <- opt$config
+    config <- fromJSON(config_file)
+}
+
+technology <- opt$technology
+n_clusters <- opt$n_clusters
+
+# You can get SpatialExperiment directly
+get_SpatialExperiment <- function(
+    feature_file,
+    observation_file,
+    coord_file,
+    matrix_file = NA,
+    reducedDim_file = NA,
+    assay_name = "counts",
+    reducedDim_name = "reducedDim") {
+  rowData <- read.delim(feature_file, stringsAsFactors = FALSE, row.names = 1)
+  colData <- read.delim(observation_file, stringsAsFactors = FALSE, row.names = 1)
+
+  coordinates <- read.delim(coord_file, sep = "\t", row.names = 1)
+  coordinates <- as.matrix(coordinates[rownames(colData), ])
+  coordinates[,c(1:2)] <- as.numeric(coordinates[,c(1:2)])
+
+  spe <- SpatialExperiment::SpatialExperiment(
+    rowData = rowData, colData = colData, spatialCoords = coordinates
+  )
+
+  if (!is.na(matrix_file)) {
+    assay(spe, assay_name, withDimnames = FALSE) <- as(Matrix::t(Matrix::readMM(matrix_file)), "CsparseMatrix")
+    assay(spe, "logcounts", withDimnames = FALSE) <- as(Matrix::t(Matrix::readMM(matrix_file)), "CsparseMatrix")
+  }
+
+  # Filter features and samples
+  if ("selected" %in% colnames(rowData(spe))) {
+    spe <- spe[as.logical(rowData(spe)$selected), ]
+  }
+  if ("selected" %in% colnames(colData(spe))) {
+    spe <- spe[, as.logical(colData(spe)$selected)]
+  }
+
+  if (!is.na(reducedDim_file)) {
+    dimRed <- read.delim(reducedDim_file, stringsAsFactors = FALSE, row.names = 1)
+    reducedDim(spe, reducedDim_name) <- as.matrix(dimRed[colnames(spe), ])
+  }
+  return(spe)
+}
+
+
+# Seed
+seed <- opt$seed
+set.seed(seed)
+
+# You can use the data as SpatialExperiment
+spe <- get_SpatialExperiment(feature_file = feature_file, observation_file = observation_file,
+                                    coord_file = coord_file,matrix_file = matrix_file)
+
+
+## Your code goes here
+lambda <- config$lambda
+k_geom <- config$k_geom
+npcs <- config$npcs
+resolution <- config$resolution
+method <- config$method
+#lambda <- 0.8
+#k_geom <- 30
+#npcs <- 50
+#resolution <- 0.8
+#method <- "leiden"
+assay_name <- "counts"
+spe <- Banksy::computeBanksy(spe, assay_name = assay_name, k_geom = k_geom)
+set.seed(seed)
+spe <- Banksy::runBanksyPCA(spe, lambda = lambda, npcs = npcs)
+spe <- Banksy::clusterBanksy(spe, lambda = lambda, npcs = npcs, resolution = resolution, seed = seed, method = method)
+
+
+# The data.frames with observations may contain a column "selected" which you need to use to
+# subset and also use to subset coordinates, neighbors, (transformed) count matrix
+#cnames <- colnames(colData(spe))
+label_df <- data.frame("label" = colData(spe)[, clusterNames(spe)], row.names=colnames(spe))  # data.frame with row.names (cell-id/barcode) and 1 column (label)
+embedding_df <- as.data.frame(reducedDims(spe)[[1]])  # optional, data.frame with row.names (cell-id/barcode) and n columns
+
+
+## Write output
+dir.create(out_dir, showWarnings = FALSE, recursive = TRUE)
+
+colnames(label_df) <- c("label")
+write.table(label_df, file = label_file, sep = "\t", col.names = NA, quote = FALSE)
+
+if (exists("embedding_df")) {
+  write.table(embedding_df, file = embedding_file, sep = "\t", col.names = NA, quote = FALSE)
+}
diff --git a/method/BANKSY/banksy.yml b/method/BANKSY/banksy.yml
new file mode 100644
index 00000000..88d5aef2
--- /dev/null
+++ b/method/BANKSY/banksy.yml
@@ -0,0 +1,12 @@
+name: banksy_env
+channels:
+  - conda-forge
+  - bioconda
+dependencies:
+  - r-base=4.3.2
+  - r-optparse=1.7.3
+  - r-remotes=2.4.2
+  - r-jsonlite=1.8.8
+  - r-biocmanager
+  #- bioconductor-spatialexperiment=1.12.0
+  - leidenAlg=0.10.1
\ No newline at end of file
diff --git a/method/BANKSY/banksy_env.sh b/method/BANKSY/banksy_env.sh
new file mode 100644
index 00000000..742f9085
--- /dev/null
+++ b/method/BANKSY/banksy_env.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+# Create the BANKSY conda environment named banksy_env
+conda env create -f banksy.yml
+
+# Activate the environment
+conda activate banksy_env
+
+# Install the required R packages
+Rscript -e "remotes::install_github('prabhakarlab/Banksy', dependencies = TRUE, ref = 'b1a2c8bb2af06346f303637b9bba18faa1a1fe32')"
+
+
+
diff --git a/method/BANKSY/config/config_1.json b/method/BANKSY/config/config_1.json
new file mode 100644
index 00000000..b5213706
--- /dev/null
+++ b/method/BANKSY/config/config_1.json
@@ -0,0 +1 @@
+{"method": "leiden", "lambda": 0.8, "k_geom": 50, "npcs": 50, "resolution": 0.8}
\ No newline at end of file

From 249af142e0715fd661c93539fe47e484fbae51f4 Mon Sep 17 00:00:00 2001
From: shdam <sohdam@dtu.dk>
Date: Thu, 14 Dec 2023 09:52:09 +0000
Subject: [PATCH 2/5] minor adjustment to env

---
 method/BANKSY/banksy.r   | 5 -----
 method/BANKSY/banksy.yml | 2 +-
 2 files changed, 1 insertion(+), 6 deletions(-)

diff --git a/method/BANKSY/banksy.r b/method/BANKSY/banksy.r
index 73f08da4..aa4ec304 100755
--- a/method/BANKSY/banksy.r
+++ b/method/BANKSY/banksy.r
@@ -169,11 +169,6 @@ k_geom <- config$k_geom
 npcs <- config$npcs
 resolution <- config$resolution
 method <- config$method
-#lambda <- 0.8
-#k_geom <- 30
-#npcs <- 50
-#resolution <- 0.8
-#method <- "leiden"
 assay_name <- "counts"
 spe <- Banksy::computeBanksy(spe, assay_name = assay_name, k_geom = k_geom)
 set.seed(seed)
diff --git a/method/BANKSY/banksy.yml b/method/BANKSY/banksy.yml
index 88d5aef2..2d1e9615 100644
--- a/method/BANKSY/banksy.yml
+++ b/method/BANKSY/banksy.yml
@@ -8,5 +8,5 @@ dependencies:
   - r-remotes=2.4.2
   - r-jsonlite=1.8.8
   - r-biocmanager
-  #- bioconductor-spatialexperiment=1.12.0
+  - bioconductor-spatialexperiment=1.12.0
   - leidenAlg=0.10.1
\ No newline at end of file

From 86f82c6556142d2046bca31bd243c3e27bb6d0df Mon Sep 17 00:00:00 2001
From: shdam <sohdam@dtu.dk>
Date: Thu, 14 Dec 2023 10:08:20 +0000
Subject: [PATCH 3/5] minor adjustments to how banksy is run

---
 method/BANKSY/banksy.r | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/method/BANKSY/banksy.r b/method/BANKSY/banksy.r
index aa4ec304..94b75359 100755
--- a/method/BANKSY/banksy.r
+++ b/method/BANKSY/banksy.r
@@ -73,7 +73,7 @@ option_list <- list(
   )
 )
 
-description <- "BANKSY cluster cluster cells in a feature space"
+description <- "BANKSY cluster cells in a feature space"
 
 opt_parser <- OptionParser(
   usage = description,
@@ -169,11 +169,12 @@ k_geom <- config$k_geom
 npcs <- config$npcs
 resolution <- config$resolution
 method <- config$method
-assay_name <- "counts"
+assay_name <- "logcounts"
 spe <- Banksy::computeBanksy(spe, assay_name = assay_name, k_geom = k_geom)
 set.seed(seed)
 spe <- Banksy::runBanksyPCA(spe, lambda = lambda, npcs = npcs)
-spe <- Banksy::clusterBanksy(spe, lambda = lambda, npcs = npcs, resolution = resolution, seed = seed, method = method)
+spe <- Banksy::clusterBanksy(spe, lambda = lambda, npcs = npcs, resolution = resolution, seed = seed, method = method,
+                            dimred = reducedDimNames(spe))
 
 
 # The data.frames with observations may contain a column "selected" which you need to use to

From 8e0198d689752b73fb1f0828efc712f018cb0e19 Mon Sep 17 00:00:00 2001
From: shdam <sohdam@dtu.dk>
Date: Thu, 14 Dec 2023 10:16:35 +0000
Subject: [PATCH 4/5] add embedding

---
 method/BANKSY/banksy.r | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/method/BANKSY/banksy.r b/method/BANKSY/banksy.r
index 94b75359..4e3c6a62 100755
--- a/method/BANKSY/banksy.r
+++ b/method/BANKSY/banksy.r
@@ -156,7 +156,6 @@ get_SpatialExperiment <- function(
 
 # Seed
 seed <- opt$seed
-set.seed(seed)
 
 # You can use the data as SpatialExperiment
 spe <- get_SpatialExperiment(feature_file = feature_file, observation_file = observation_file,
@@ -170,18 +169,18 @@ npcs <- config$npcs
 resolution <- config$resolution
 method <- config$method
 assay_name <- "logcounts"
+set.seed(seed)
 spe <- Banksy::computeBanksy(spe, assay_name = assay_name, k_geom = k_geom)
 set.seed(seed)
 spe <- Banksy::runBanksyPCA(spe, lambda = lambda, npcs = npcs)
-spe <- Banksy::clusterBanksy(spe, lambda = lambda, npcs = npcs, resolution = resolution, seed = seed, method = method,
-                            dimred = reducedDimNames(spe))
+spe <- Banksy::clusterBanksy(spe, lambda = lambda, use_pcs = TRUE, npcs = npcs, resolution = resolution, seed = seed, method = method)
 
 
 # The data.frames with observations may contain a column "selected" which you need to use to
 # subset and also use to subset coordinates, neighbors, (transformed) count matrix
 #cnames <- colnames(colData(spe))
 label_df <- data.frame("label" = colData(spe)[, clusterNames(spe)], row.names=colnames(spe))  # data.frame with row.names (cell-id/barcode) and 1 column (label)
-embedding_df <- as.data.frame(reducedDims(spe)[[1]])  # optional, data.frame with row.names (cell-id/barcode) and n columns
+embedding_df <- as.data.frame(t(assay(spe, "H1")))  # optional, data.frame with row.names (cell-id/barcode) and n columns
 
 
 ## Write output

From 95f83d79bb0782b3a59b36d4510c552aad55e506 Mon Sep 17 00:00:00 2001
From: shdam <sohdam@dtu.dk>
Date: Thu, 14 Dec 2023 10:35:55 +0000
Subject: [PATCH 5/5] removed a set.seed and added release tag

---
 method/BANKSY/banksy.r      | 1 -
 method/BANKSY/banksy_env.sh | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/method/BANKSY/banksy.r b/method/BANKSY/banksy.r
index 4e3c6a62..27824ec6 100755
--- a/method/BANKSY/banksy.r
+++ b/method/BANKSY/banksy.r
@@ -171,7 +171,6 @@ method <- config$method
 assay_name <- "logcounts"
 set.seed(seed)
 spe <- Banksy::computeBanksy(spe, assay_name = assay_name, k_geom = k_geom)
-set.seed(seed)
 spe <- Banksy::runBanksyPCA(spe, lambda = lambda, npcs = npcs)
 spe <- Banksy::clusterBanksy(spe, lambda = lambda, use_pcs = TRUE, npcs = npcs, resolution = resolution, seed = seed, method = method)
 
diff --git a/method/BANKSY/banksy_env.sh b/method/BANKSY/banksy_env.sh
index 742f9085..58d1389c 100644
--- a/method/BANKSY/banksy_env.sh
+++ b/method/BANKSY/banksy_env.sh
@@ -7,7 +7,7 @@ conda env create -f banksy.yml
 conda activate banksy_env
 
 # Install the required R packages
-Rscript -e "remotes::install_github('prabhakarlab/Banksy', dependencies = TRUE, ref = 'b1a2c8bb2af06346f303637b9bba18faa1a1fe32')"
+Rscript -e "remotes::install_github('prabhakarlab/Banksy@v0.1.5', dependencies = TRUE)"