Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Method BANKSY #131

Merged
merged 5 commits into from
Dec 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
193 changes: 193 additions & 0 deletions method/BANKSY/banksy.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
#!/usr/bin/env Rscript

# Author_and_contribution: Niklas Mueller-Boetticher; created template
# Author_and_contribution: Søren Helweg Dam; implemented method

suppressPackageStartupMessages({
library(optparse)
library(jsonlite)
library(SpatialExperiment)
library(Banksy)
})

option_list <- list(
make_option(
c("-c", "--coordinates"),
type = "character", default = NULL,
help = "Path to coordinates (as tsv)."
),
make_option(
c("-m", "--matrix"),
type = "character", default = NA,
help = "Path to (transformed) counts (as mtx)."
),
make_option(
c("-f", "--features"),
type = "character", default = NULL,
help = "Path to features (as tsv)."
),
make_option(
c("-o", "--observations"),
type = "character", default = NULL,
help = "Path to observations (as tsv)."
),
make_option(
c("-n", "--neighbors"),
type = "character", default = NA,
help = "Path to neighbor definitions. Square matrix (not necessarily symmetric) where each row contains the neighbors of this observation (as mtx)."
),
make_option(
c("-d", "--out_dir"),
type = "character", default = NULL,
help = "Output directory."
),
make_option(
c("--dim_red"),
type = "character", default = NA,
help = "Reduced dimensionality representation (e.g. PCA)."
),
make_option(
c("--image"),
type = "character", default = NA,
help = "Path to H&E staining."
),
make_option(
c("--n_clusters"),
type = "integer", default = NULL,
help = "Number of clusters to return."
),
make_option(
c("--technology"),
type = "character", default = NULL,
help = "The technology of the dataset (Visium, ST, imaging-based)."
),
make_option(
c("--seed"),
type = "integer", default = NULL,
help = "Seed to use for random operations."
),
make_option(
c("--config"),
type = "character", default = NA,
help = "Optional config file (json) used to pass additional parameters."
)
)

description <- "BANKSY cluster cells in a feature space"

opt_parser <- OptionParser(
usage = description,
option_list = option_list
)
opt <- parse_args(opt_parser)

out_dir <- opt$out_dir

# Output files
label_file <- file.path(out_dir, "domains.tsv")
embedding_file <- file.path(out_dir, "embedding.tsv")
# if additional output files are required write it also to out_dir

# Use these filepaths as input ...
coord_file <- opt$coordinates
feature_file <- opt$features
observation_file <- opt$observations

if (!is.na(opt$neighbors)) {
neighbors_file <- opt$neighbors
}
if (!is.na(opt$matrix)) {
matrix_file <- opt$matrix
}
if (!is.na(opt$dim_red)) {
dimred_file <- opt$dim_red
}
if (!is.na(opt$image)) {
image_file <- opt$image
}
if (!is.na(opt$config)) {
config_file <- opt$config
config <- fromJSON(config_file)
}

technology <- opt$technology
n_clusters <- opt$n_clusters

# You can get SpatialExperiment directly
get_SpatialExperiment <- function(
feature_file,
observation_file,
coord_file,
matrix_file = NA,
reducedDim_file = NA,
assay_name = "counts",
reducedDim_name = "reducedDim") {
rowData <- read.delim(feature_file, stringsAsFactors = FALSE, row.names = 1)
colData <- read.delim(observation_file, stringsAsFactors = FALSE, row.names = 1)

coordinates <- read.delim(coord_file, sep = "\t", row.names = 1)
coordinates <- as.matrix(coordinates[rownames(colData), ])
coordinates[,c(1:2)] <- as.numeric(coordinates[,c(1:2)])

spe <- SpatialExperiment::SpatialExperiment(
rowData = rowData, colData = colData, spatialCoords = coordinates
)

if (!is.na(matrix_file)) {
assay(spe, assay_name, withDimnames = FALSE) <- as(Matrix::t(Matrix::readMM(matrix_file)), "CsparseMatrix")
assay(spe, "logcounts", withDimnames = FALSE) <- as(Matrix::t(Matrix::readMM(matrix_file)), "CsparseMatrix")
}

# Filter features and samples
if ("selected" %in% colnames(rowData(spe))) {
spe <- spe[as.logical(rowData(spe)$selected), ]
}
if ("selected" %in% colnames(colData(spe))) {
spe <- spe[, as.logical(colData(spe)$selected)]
}

if (!is.na(reducedDim_file)) {
dimRed <- read.delim(reducedDim_file, stringsAsFactors = FALSE, row.names = 1)
reducedDim(spe, reducedDim_name) <- as.matrix(dimRed[colnames(spe), ])
}
return(spe)
}


# Seed
seed <- opt$seed

# You can use the data as SpatialExperiment
spe <- get_SpatialExperiment(feature_file = feature_file, observation_file = observation_file,
coord_file = coord_file,matrix_file = matrix_file)


## Your code goes here
lambda <- config$lambda
k_geom <- config$k_geom
npcs <- config$npcs
resolution <- config$resolution
method <- config$method
assay_name <- "logcounts"
set.seed(seed)
spe <- Banksy::computeBanksy(spe, assay_name = assay_name, k_geom = k_geom)
spe <- Banksy::runBanksyPCA(spe, lambda = lambda, npcs = npcs)
spe <- Banksy::clusterBanksy(spe, lambda = lambda, use_pcs = TRUE, npcs = npcs, resolution = resolution, seed = seed, method = method)


# The data.frames with observations may contain a column "selected" which you need to use to
# subset and also use to subset coordinates, neighbors, (transformed) count matrix
#cnames <- colnames(colData(spe))
label_df <- data.frame("label" = colData(spe)[, clusterNames(spe)], row.names=colnames(spe)) # data.frame with row.names (cell-id/barcode) and 1 column (label)
embedding_df <- as.data.frame(t(assay(spe, "H1"))) # optional, data.frame with row.names (cell-id/barcode) and n columns


## Write output
dir.create(out_dir, showWarnings = FALSE, recursive = TRUE)

colnames(label_df) <- c("label")
write.table(label_df, file = label_file, sep = "\t", col.names = NA, quote = FALSE)

if (exists("embedding_df")) {
write.table(embedding_df, file = embedding_file, sep = "\t", col.names = NA, quote = FALSE)
}
12 changes: 12 additions & 0 deletions method/BANKSY/banksy.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
name: banksy_env
channels:
- conda-forge
- bioconda
dependencies:
- r-base=4.3.2
- r-optparse=1.7.3
- r-remotes=2.4.2
- r-jsonlite=1.8.8
- r-biocmanager
- bioconductor-spatialexperiment=1.12.0
- leidenAlg=0.10.1
13 changes: 13 additions & 0 deletions method/BANKSY/banksy_env.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/bin/bash

# Create the BANKSY conda environment named banksy_env
conda env create -f banksy.yml

# Activate the environment
conda activate banksy_env

# Install the required R packages
Rscript -e "remotes::install_github('prabhakarlab/[email protected]', dependencies = TRUE)"



1 change: 1 addition & 0 deletions method/BANKSY/config/config_1.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"method": "leiden", "lambda": 0.8, "k_geom": 50, "npcs": 50, "resolution": 0.8}