Skip to content

Commit

Permalink
Merge pull request #131 from SpatialHackathon/method_banksy_shdam
Browse files Browse the repository at this point in the history
Method BANKSY
  • Loading branch information
niklasmueboe authored Dec 14, 2023
2 parents 1145687 + 95f83d7 commit 78c6d23
Show file tree
Hide file tree
Showing 4 changed files with 219 additions and 0 deletions.
193 changes: 193 additions & 0 deletions method/BANKSY/banksy.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
#!/usr/bin/env Rscript

# Author_and_contribution: Niklas Mueller-Boetticher; created template
# Author_and_contribution: Søren Helweg Dam; implemented method

suppressPackageStartupMessages({
library(optparse)
library(jsonlite)
library(SpatialExperiment)
library(Banksy)
})

option_list <- list(
make_option(
c("-c", "--coordinates"),
type = "character", default = NULL,
help = "Path to coordinates (as tsv)."
),
make_option(
c("-m", "--matrix"),
type = "character", default = NA,
help = "Path to (transformed) counts (as mtx)."
),
make_option(
c("-f", "--features"),
type = "character", default = NULL,
help = "Path to features (as tsv)."
),
make_option(
c("-o", "--observations"),
type = "character", default = NULL,
help = "Path to observations (as tsv)."
),
make_option(
c("-n", "--neighbors"),
type = "character", default = NA,
help = "Path to neighbor definitions. Square matrix (not necessarily symmetric) where each row contains the neighbors of this observation (as mtx)."
),
make_option(
c("-d", "--out_dir"),
type = "character", default = NULL,
help = "Output directory."
),
make_option(
c("--dim_red"),
type = "character", default = NA,
help = "Reduced dimensionality representation (e.g. PCA)."
),
make_option(
c("--image"),
type = "character", default = NA,
help = "Path to H&E staining."
),
make_option(
c("--n_clusters"),
type = "integer", default = NULL,
help = "Number of clusters to return."
),
make_option(
c("--technology"),
type = "character", default = NULL,
help = "The technology of the dataset (Visium, ST, imaging-based)."
),
make_option(
c("--seed"),
type = "integer", default = NULL,
help = "Seed to use for random operations."
),
make_option(
c("--config"),
type = "character", default = NA,
help = "Optional config file (json) used to pass additional parameters."
)
)

description <- "BANKSY cluster cells in a feature space"

opt_parser <- OptionParser(
usage = description,
option_list = option_list
)
opt <- parse_args(opt_parser)

out_dir <- opt$out_dir

# Output files
label_file <- file.path(out_dir, "domains.tsv")
embedding_file <- file.path(out_dir, "embedding.tsv")
# if additional output files are required write it also to out_dir

# Use these filepaths as input ...
coord_file <- opt$coordinates
feature_file <- opt$features
observation_file <- opt$observations

if (!is.na(opt$neighbors)) {
neighbors_file <- opt$neighbors
}
if (!is.na(opt$matrix)) {
matrix_file <- opt$matrix
}
if (!is.na(opt$dim_red)) {
dimred_file <- opt$dim_red
}
if (!is.na(opt$image)) {
image_file <- opt$image
}
if (!is.na(opt$config)) {
config_file <- opt$config
config <- fromJSON(config_file)
}

technology <- opt$technology
n_clusters <- opt$n_clusters

# You can get SpatialExperiment directly
get_SpatialExperiment <- function(
feature_file,
observation_file,
coord_file,
matrix_file = NA,
reducedDim_file = NA,
assay_name = "counts",
reducedDim_name = "reducedDim") {
rowData <- read.delim(feature_file, stringsAsFactors = FALSE, row.names = 1)
colData <- read.delim(observation_file, stringsAsFactors = FALSE, row.names = 1)

coordinates <- read.delim(coord_file, sep = "\t", row.names = 1)
coordinates <- as.matrix(coordinates[rownames(colData), ])
coordinates[,c(1:2)] <- as.numeric(coordinates[,c(1:2)])

spe <- SpatialExperiment::SpatialExperiment(
rowData = rowData, colData = colData, spatialCoords = coordinates
)

if (!is.na(matrix_file)) {
assay(spe, assay_name, withDimnames = FALSE) <- as(Matrix::t(Matrix::readMM(matrix_file)), "CsparseMatrix")
assay(spe, "logcounts", withDimnames = FALSE) <- as(Matrix::t(Matrix::readMM(matrix_file)), "CsparseMatrix")
}

# Filter features and samples
if ("selected" %in% colnames(rowData(spe))) {
spe <- spe[as.logical(rowData(spe)$selected), ]
}
if ("selected" %in% colnames(colData(spe))) {
spe <- spe[, as.logical(colData(spe)$selected)]
}

if (!is.na(reducedDim_file)) {
dimRed <- read.delim(reducedDim_file, stringsAsFactors = FALSE, row.names = 1)
reducedDim(spe, reducedDim_name) <- as.matrix(dimRed[colnames(spe), ])
}
return(spe)
}


# Seed
seed <- opt$seed

# You can use the data as SpatialExperiment
spe <- get_SpatialExperiment(feature_file = feature_file, observation_file = observation_file,
coord_file = coord_file,matrix_file = matrix_file)


## Your code goes here
lambda <- config$lambda
k_geom <- config$k_geom
npcs <- config$npcs
resolution <- config$resolution
method <- config$method
assay_name <- "logcounts"
set.seed(seed)
spe <- Banksy::computeBanksy(spe, assay_name = assay_name, k_geom = k_geom)
spe <- Banksy::runBanksyPCA(spe, lambda = lambda, npcs = npcs)
spe <- Banksy::clusterBanksy(spe, lambda = lambda, use_pcs = TRUE, npcs = npcs, resolution = resolution, seed = seed, method = method)


# The data.frames with observations may contain a column "selected" which you need to use to
# subset and also use to subset coordinates, neighbors, (transformed) count matrix
#cnames <- colnames(colData(spe))
label_df <- data.frame("label" = colData(spe)[, clusterNames(spe)], row.names=colnames(spe)) # data.frame with row.names (cell-id/barcode) and 1 column (label)
embedding_df <- as.data.frame(t(assay(spe, "H1"))) # optional, data.frame with row.names (cell-id/barcode) and n columns


## Write output
dir.create(out_dir, showWarnings = FALSE, recursive = TRUE)

colnames(label_df) <- c("label")
write.table(label_df, file = label_file, sep = "\t", col.names = NA, quote = FALSE)

if (exists("embedding_df")) {
write.table(embedding_df, file = embedding_file, sep = "\t", col.names = NA, quote = FALSE)
}
12 changes: 12 additions & 0 deletions method/BANKSY/banksy.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
name: banksy_env
channels:
- conda-forge
- bioconda
dependencies:
- r-base=4.3.2
- r-optparse=1.7.3
- r-remotes=2.4.2
- r-jsonlite=1.8.8
- r-biocmanager
- bioconductor-spatialexperiment=1.12.0
- leidenAlg=0.10.1
13 changes: 13 additions & 0 deletions method/BANKSY/banksy_env.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/bin/bash

# Create the BANKSY conda environment named banksy_env
conda env create -f banksy.yml

# Activate the environment
conda activate banksy_env

# Install the required R packages
Rscript -e "remotes::install_github('prabhakarlab/[email protected]', dependencies = TRUE)"



1 change: 1 addition & 0 deletions method/BANKSY/config/config_1.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"method": "leiden", "lambda": 0.8, "k_geom": 50, "npcs": 50, "resolution": 0.8}

0 comments on commit 78c6d23

Please sign in to comment.