Merge pull request #131 from SpatialHackathon/method_banksy_shdam

Method BANKSY
SpatialHackathon · Dec 14, 2023 · 78c6d23 · 78c6d23
2 parents 1145687 + 95f83d7
commit 78c6d23
Show file tree

Hide file tree

Showing 4 changed files with 219 additions and 0 deletions.
diff --git a/method/BANKSY/banksy.r b/method/BANKSY/banksy.r
@@ -0,0 +1,193 @@
+#!/usr/bin/env Rscript
+
+# Author_and_contribution: Niklas Mueller-Boetticher; created template
+# Author_and_contribution: Søren Helweg Dam; implemented method
+
+suppressPackageStartupMessages({
+    library(optparse)
+    library(jsonlite)
+    library(SpatialExperiment)
+    library(Banksy)
+})
+
+option_list <- list(
+  make_option(
+    c("-c", "--coordinates"),
+    type = "character", default = NULL,
+    help = "Path to coordinates (as tsv)."
+  ),
+  make_option(
+    c("-m", "--matrix"),
+    type = "character", default = NA,
+    help = "Path to (transformed) counts (as mtx)."
+  ),
+  make_option(
+    c("-f", "--features"),
+    type = "character", default = NULL,
+    help = "Path to features (as tsv)."
+  ),
+  make_option(
+    c("-o", "--observations"),
+    type = "character", default = NULL,
+    help = "Path to observations (as tsv)."
+  ),
+  make_option(
+    c("-n", "--neighbors"),
+    type = "character", default = NA,
+    help = "Path to neighbor definitions. Square matrix (not necessarily symmetric) where each row contains the neighbors of this observation (as mtx)."
+  ),
+  make_option(
+    c("-d", "--out_dir"),
+    type = "character", default = NULL,
+    help = "Output directory."
+  ),
+  make_option(
+    c("--dim_red"),
+    type = "character", default = NA,
+    help = "Reduced dimensionality representation (e.g. PCA)."
+  ),
+  make_option(
+    c("--image"),
+    type = "character", default = NA,
+    help = "Path to H&E staining."
+  ),
+  make_option(
+    c("--n_clusters"),
+    type = "integer", default = NULL,
+    help = "Number of clusters to return."
+  ),
+  make_option(
+    c("--technology"),
+    type = "character", default = NULL,
+    help = "The technology of the dataset (Visium, ST, imaging-based)."
+  ),
+  make_option(
+    c("--seed"),
+    type = "integer", default = NULL,
+    help = "Seed to use for random operations."
+  ),
+  make_option(
+    c("--config"),
+    type = "character", default = NA,
+    help = "Optional config file (json) used to pass additional parameters."
+  )
+)
+
+description <- "BANKSY cluster cells in a feature space"
+
+opt_parser <- OptionParser(
+  usage = description,
+  option_list = option_list
+)
+opt <- parse_args(opt_parser)
+
+out_dir <- opt$out_dir
+
+# Output files
+label_file <- file.path(out_dir, "domains.tsv")
+embedding_file <- file.path(out_dir, "embedding.tsv")
+# if additional output files are required write it also to out_dir
+
+# Use these filepaths as input ...
+coord_file <- opt$coordinates
+feature_file <- opt$features
+observation_file <- opt$observations
+
+if (!is.na(opt$neighbors)) {
+  neighbors_file <- opt$neighbors
+}
+if (!is.na(opt$matrix)) {
+  matrix_file <- opt$matrix
+}
+if (!is.na(opt$dim_red)) {
+  dimred_file <- opt$dim_red
+}
+if (!is.na(opt$image)) {
+  image_file <- opt$image
+}
+if (!is.na(opt$config)) {
+  config_file <- opt$config
+    config <- fromJSON(config_file)
+}
+
+technology <- opt$technology
+n_clusters <- opt$n_clusters
+
+# You can get SpatialExperiment directly
+get_SpatialExperiment <- function(
+    feature_file,
+    observation_file,
+    coord_file,
+    matrix_file = NA,
+    reducedDim_file = NA,
+    assay_name = "counts",
+    reducedDim_name = "reducedDim") {
+  rowData <- read.delim(feature_file, stringsAsFactors = FALSE, row.names = 1)
+  colData <- read.delim(observation_file, stringsAsFactors = FALSE, row.names = 1)
+
+  coordinates <- read.delim(coord_file, sep = "\t", row.names = 1)
+  coordinates <- as.matrix(coordinates[rownames(colData), ])
+  coordinates[,c(1:2)] <- as.numeric(coordinates[,c(1:2)])
+
+  spe <- SpatialExperiment::SpatialExperiment(
+    rowData = rowData, colData = colData, spatialCoords = coordinates
+  )
+
+  if (!is.na(matrix_file)) {
+    assay(spe, assay_name, withDimnames = FALSE) <- as(Matrix::t(Matrix::readMM(matrix_file)), "CsparseMatrix")
+    assay(spe, "logcounts", withDimnames = FALSE) <- as(Matrix::t(Matrix::readMM(matrix_file)), "CsparseMatrix")
+  }
+
+  # Filter features and samples
+  if ("selected" %in% colnames(rowData(spe))) {
+    spe <- spe[as.logical(rowData(spe)$selected), ]
+  }
+  if ("selected" %in% colnames(colData(spe))) {
+    spe <- spe[, as.logical(colData(spe)$selected)]
+  }
+
+  if (!is.na(reducedDim_file)) {
+    dimRed <- read.delim(reducedDim_file, stringsAsFactors = FALSE, row.names = 1)
+    reducedDim(spe, reducedDim_name) <- as.matrix(dimRed[colnames(spe), ])
+  }
+  return(spe)
+}
+
+
+# Seed
+seed <- opt$seed
+
+# You can use the data as SpatialExperiment
+spe <- get_SpatialExperiment(feature_file = feature_file, observation_file = observation_file,
+                                    coord_file = coord_file,matrix_file = matrix_file)
+
+
+## Your code goes here
+lambda <- config$lambda
+k_geom <- config$k_geom
+npcs <- config$npcs
+resolution <- config$resolution
+method <- config$method
+assay_name <- "logcounts"
+set.seed(seed)
+spe <- Banksy::computeBanksy(spe, assay_name = assay_name, k_geom = k_geom)
+spe <- Banksy::runBanksyPCA(spe, lambda = lambda, npcs = npcs)
+spe <- Banksy::clusterBanksy(spe, lambda = lambda, use_pcs = TRUE, npcs = npcs, resolution = resolution, seed = seed, method = method)
+
+
+# The data.frames with observations may contain a column "selected" which you need to use to
+# subset and also use to subset coordinates, neighbors, (transformed) count matrix
+#cnames <- colnames(colData(spe))
+label_df <- data.frame("label" = colData(spe)[, clusterNames(spe)], row.names=colnames(spe))  # data.frame with row.names (cell-id/barcode) and 1 column (label)
+embedding_df <- as.data.frame(t(assay(spe, "H1")))  # optional, data.frame with row.names (cell-id/barcode) and n columns
+
+
+## Write output
+dir.create(out_dir, showWarnings = FALSE, recursive = TRUE)
+
+colnames(label_df) <- c("label")
+write.table(label_df, file = label_file, sep = "\t", col.names = NA, quote = FALSE)
+
+if (exists("embedding_df")) {
+  write.table(embedding_df, file = embedding_file, sep = "\t", col.names = NA, quote = FALSE)
+}
diff --git a/method/BANKSY/banksy.yml b/method/BANKSY/banksy.yml
@@ -0,0 +1,12 @@
+name: banksy_env
+channels:
+  - conda-forge
+  - bioconda
+dependencies:
+  - r-base=4.3.2
+  - r-optparse=1.7.3
+  - r-remotes=2.4.2
+  - r-jsonlite=1.8.8
+  - r-biocmanager
+  - bioconductor-spatialexperiment=1.12.0
+  - leidenAlg=0.10.1
diff --git a/method/BANKSY/banksy_env.sh b/method/BANKSY/banksy_env.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+# Create the BANKSY conda environment named banksy_env
+conda env create -f banksy.yml
+
+# Activate the environment
+conda activate banksy_env
+
+# Install the required R packages
+Rscript -e "remotes::install_github('prabhakarlab/[email protected]', dependencies = TRUE)"
+
+
+
diff --git a/method/BANKSY/config/config_1.json b/method/BANKSY/config/config_1.json
@@ -0,0 +1 @@
+{"method": "leiden", "lambda": 0.8, "k_geom": 50, "npcs": 50, "resolution": 0.8}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		{"method": "leiden", "lambda": 0.8, "k_geom": 50, "npcs": 50, "resolution": 0.8}