Skip to content

Commit

Permalink
Merge pull request #195 from SpatialHackathon/data_Locus_coeruleus
Browse files Browse the repository at this point in the history
Adding data for issue #38
  • Loading branch information
Jieran-S authored Feb 14, 2024
2 parents baab711 + 9f43a03 commit 2e81727
Show file tree
Hide file tree
Showing 2 changed files with 138 additions and 0 deletions.
130 changes: 130 additions & 0 deletions data/locus_coeruleus/locus_coeruleus.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
#!/usr/bin/env Rscript

# Author_and_contribution: Niklas Mueller-Boetticher; created template
# Author_and_contribution: Florian Heyl (@heylf); created code

# H_E.json and H_E.tiff not public. Request for access is still unanswered.

suppressPackageStartupMessages(library(optparse))
suppressPackageStartupMessages(library(SpatialExperiment))
suppressPackageStartupMessages(library(SingleCellExperiment))
suppressPackageStartupMessages(library(WeberDivechaLCdata))
suppressPackageStartupMessages(library(Matrix))

option_list <- list(
make_option(
c("-o", "--out_dir"),
type = "character", default = NULL,
help = "Output directory to write files to."
)
)

description <- "Load data (Visium) for Locus_coeruleus from Lukas M. Weber at al. (2022);
The gene expression landscape of the human locus coeruleus revealed
by single-nucleus and spatially-resolved transcriptomics"

opt_parser <- OptionParser(
usage = description,
option_list = option_list
)
opt <- parse_args(opt_parser)

out_dir <- opt$out_dir

args = commandArgs(trailingOnly=TRUE)

# Load data
spe <- WeberDivechaLCdata_Visium()

coords <- as.data.frame(spe@int_colData$spatialCoords)
colnames(coords) <- c('x', 'y')
coords_rownames <- rownames(spe@int_colData$spatialCoords)

counts <- spe@assays@data$counts

counts_func <- function(x){
fields <- unlist(strsplit(x, "_"))
if ( length(fields) == 4 ){
return(paste(fields[1:3], collapse = "_"))
} else {
return(paste(fields[1:4], collapse = "_"))
}
}
counts_lc <- unlist(lapply(colnames(counts), counts_func))

LC_samples <- unique(spe@colData$sample_id)

for ( dir in LC_samples ){
dir <- paste0(out_dir, "/", dir)
if ( dir.exists(dir) == FALSE ){
dir.create(dir, recursive=TRUE)
}
}

print("Create output ...")

patient_list <- c()
sample_list <- c()
directory_list <- c()

# Write coordinates.tsv, observations.tsv, features.tsv, counts.mtx and labels.tsv
for (lc in LC_samples){

print(lc)
dir <- paste0(out_dir, "/", lc)

# Write coordinates.tsv
coords_subset <- coords[which(spe@colData$sample_id == lc),]
rownames(coords_subset) <- coords_rownames[which(spe@colData$sample_id == lc)]
write.table(coords_subset, file = paste0(dir, "/coordinates.tsv"), col.names = NA,
sep = "\t", quote = FALSE, row.names = TRUE)

# Count matrix has rows = genes/features and cols = cells/observations
counts_subset <- counts[,which(counts_lc == lc)]

# Transpose to have rows = cells/observations
counts_subset <- t(counts_subset)
writeMM(counts_subset, file = paste0(dir, "/counts.mtx"))

observations_subset <- spe@colData[which(counts_lc == lc),]
rownames(observations_subset) <- lapply(rownames(observations_subset), function(x){tail(unlist(strsplit(x,"_")),1)})
write.table(observations_subset, file = paste0(dir, "/observations.tsv"), col.names = NA, sep = "\t", quote = FALSE)

labels <- spe@colData$annot_region[which(spe@colData$sample_id == lc)]
labels[which(labels == TRUE)] <- "LC"
labels[which(labels == FALSE)] <- "non_LC"

labels_df <- data.frame(label = labels)
rownames(labels_df) <- rownames(observations_subset)
write.table(labels_df, file = paste0(dir, "/labels.tsv"), col.names = NA, sep = "\t", quote = FALSE)

# Fill metadata
patient_list <- c(patient_list, as.character(unique(spe@colData[which(spe@colData$sample_id == lc),]$donor_id)))
sample_list <- c(sample_list, lc)
directory_list <- c(directory_list, dir)

# Write features.tsv
features <- as.data.frame(spe@rowRanges)
rownames(features) <- spe@rowRanges$gene_id
write.table(features, file = paste0(dir,"/features.tsv"), col.names = NA, sep = "\t", quote = FALSE)
}

## Metadata files
samples_df <- data.frame(
patient = patient_list,
sample = sample_list,
position = rep(NA, length(patient_list)), # Not sure what position means
replicate = rep(NA, length(patient_list)), # If they have replicated then it is really badly named
directory = directory_list,
n_clusters = rep(2, length(patient_list)),
stringsAsFactors = FALSE
)
row.names(samples_df) <- NULL
write.table(samples_df, file = file.path(out_dir, "samples.tsv"), sep = "\t", col.names = NA, quote = FALSE)

technology = "Visium"
json <- file(file.path(out_dir, "experiment.json"))
writeLines(c(paste0('{"technology": "', technology, '"}')), json)
close(json)

print("...finished")
8 changes: 8 additions & 0 deletions data/locus_coeruleus/locus_coeruleus.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
channels:
- conda-forge
- bioconda
dependencies:
- r-base=4.3.1
- bioconductor-weberdivechalcdata=1.4.0
- r-optparse=1.7.3
- r-matrix=1.6.1

0 comments on commit 2e81727

Please sign in to comment.