From 3cfd7a54d5bdc250ea7ee74be0f3f8ee2b40a7f0 Mon Sep 17 00:00:00 2001 From: f641l Date: Fri, 9 Feb 2024 18:31:01 +0100 Subject: [PATCH 1/6] Adding data for issue #38 --- data/locus_coeruleus/locus_coeruleus.R | 128 +++++++++++++++++++++++ data/locus_coeruleus/locus_coeruleus.yml | 8 ++ 2 files changed, 136 insertions(+) create mode 100644 data/locus_coeruleus/locus_coeruleus.R create mode 100644 data/locus_coeruleus/locus_coeruleus.yml diff --git a/data/locus_coeruleus/locus_coeruleus.R b/data/locus_coeruleus/locus_coeruleus.R new file mode 100644 index 00000000..a96cad54 --- /dev/null +++ b/data/locus_coeruleus/locus_coeruleus.R @@ -0,0 +1,128 @@ +#!/usr/bin/env Rscript + +# Author_and_contribution: Niklas Mueller-Boetticher; created template +# Author_and_contribution: Florian Heyl (@heylf); created code + +# H_E.json and H_E.tiff not public. Request for access is still unanswered. + +suppressPackageStartupMessages(library(optparse)) +suppressPackageStartupMessages(library(SpatialExperiment)) +suppressPackageStartupMessages(library(SingleCellExperiment)) +suppressPackageStartupMessages(library(WeberDivechaLCdata)) +suppressPackageStartupMessages(library(Matrix)) + +option_list <- list( + make_option( + c("-o", "--out_dir"), + type = "character", default = NULL, + help = "Output directory to write files to." + ) +) + +description <- "Load data (Visium) for Locus_coeruleus from Lukas M. Weber at al. (2022); +The gene expression landscape of the human locus coeruleus revealed +by single-nucleus and spatially-resolved transcriptomics" + +opt_parser <- OptionParser( + usage = description, + option_list = option_list +) +opt <- parse_args(opt_parser) + +out_dir <- opt$out_dir + +args = commandArgs(trailingOnly=TRUE) + +# Load data +spe <- WeberDivechaLCdata_Visium() + +coords <- as.data.frame(spe@int_colData$spatialCoords) +colnames(coords) <- c('x', 'y') + +counts <- spe@assays@data$counts +counts_lc <- unlist(lapply(colnames(counts), function(x){paste(unlist(strsplit(x, "_"))[1:3], collapse="_")})) +observations_lc <- unlist(lapply(colnames(counts), function(x){paste(unlist(strsplit(x, "_"))[4], collapse="_")})) + +LC_samples <- unique(spe@colData$sample_id) + +for ( dir in LC_samples ){ + dir <- paste0(out_dir, "/", dir) + if ( dir.exists(dir) == FALSE ){ + dir.create(dir, recursive=TRUE) + } +} + +print("Create output ...") + +patient_list <- c() +sample_list <- c() +directory_list <- c() + +# Write coordinates.tsv, observations.tsv, features.tsv, counts.mtx and labels.tsv +for (lc in LC_samples){ + print(lc) + dir <- paste0(out_dir, "/", lc) + # Please check this again beause I had to flip the coordinates to get the pictures as seen in: + # https://libd.shinyapps.io/locus-c_Visium/. See ggplot below. + coords_subset <- coords[which(spe@colData$sample_id == lc),] + write.table(coords_subset, file = paste0(dir, "/coordinates.tsv"), col.names = NA, + sep = "\t", quote = FALSE, row.names = TRUE) + + # Create the scatter plot + # plot_df <- coords[which(spe@colData$sample_id == lc),] + # plot_df$label <- labels + # plot_df + # ggplot(plot_df, aes(x = x, y = y, color=label)) + + # geom_point() + + # coord_fixed() + # Keep aspect ratio equal + # scale_y_reverse() # Flip the y-axis + + # Count matrix has rows = genes/features and cols = cells/observations + # Has the header %%MatrixMarket matrix coordinate integer general + counts_subset <- counts[,which(counts_lc == lc)] + if (lc == "Br6522_LC_1_round1" || lc == "Br6522_LC_2_round1"){ + split_lc <- unlist(strsplit(as.character(LC_samples[1]), "_"))[1:3] + correct_lc <- paste(split_lc, collapse = "_") + counts_subset <- counts[,which(counts_lc == correct_lc)] + } + t(counts_subset) + writeMM(counts_subset, file = paste0(dir, "/counts.mtx")) + + # Write observations.tsv + observations_subset <- observations_lc[which(counts_lc == lc)] + write.table(observations_subset, file = paste0(dir, "/observations.tsv"), col.names = NA, sep = "\t", quote = FALSE) + + labels <- spe@colData$annot_region[which(spe@colData$sample_id == lc)] + labels[which(labels == TRUE)] <- "LC" + labels[which(labels == FALSE)] <- "non_LC" + write.table(labels, file = paste0(dir, "/labels.tsv"), col.names = NA, sep = "\t", quote = FALSE) + + # Fill metadata + patient_list <- c(patient_list, as.character(unique(spe@colData[which(spe@colData$sample_id == lc),]$donor_id))) + sample_list <- c(sample_list, lc) + directory_list <- c(directory_list, dir) + + # Write features.tsv + features <- rownames(spe@assays@data$counts) + write.table(features, file = paste0(dir,"/features.tsv"), col.names = NA, sep = "\t", quote = FALSE) +} + +## Metadata files +samples_df <- data.frame( + patient = patient_list, + sample = sample_list, + position = rep(NA, length(patient_list)), # Not sure what position means + replicate = rep(NA, length(patient_list)), # If they have replicated then it is really badly named + directory = directory_list, + n_clusters = rep(2, length(patient_list)), + stringsAsFactors = FALSE +) +row.names(samples_df) <- NULL +write.table(samples_df, file = file.path(out_dir, "samples.tsv"), sep = "\t", col.names = NA, quote = FALSE) + +technology = "Visium" +json <- file(file.path(out_dir, "experiment.json")) +writeLines(c(paste0('{"technology": "', technology, '"}')), json) +close(json) + +print("...finished") diff --git a/data/locus_coeruleus/locus_coeruleus.yml b/data/locus_coeruleus/locus_coeruleus.yml new file mode 100644 index 00000000..f78bf92c --- /dev/null +++ b/data/locus_coeruleus/locus_coeruleus.yml @@ -0,0 +1,8 @@ +channels: + - conda-forge + - bioconda +dependencies: + - r-base=4.3.1 + - bioconductor-weberdivechalcdata=1.2.0 + - r-optparse=1.7.3 + - r-matrix=1.6.1 \ No newline at end of file From ef8f6d12eb4649ee6ad9a7ee7c65edb7dbcb3a7d Mon Sep 17 00:00:00 2001 From: f641l Date: Mon, 12 Feb 2024 10:41:04 +0100 Subject: [PATCH 2/6] Fixes to data generation for locus_coeruleus --- data/locus_coeruleus/locus_coeruleus.R | 31 ++++++++++++++------------ 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/data/locus_coeruleus/locus_coeruleus.R b/data/locus_coeruleus/locus_coeruleus.R index a96cad54..f34ede26 100644 --- a/data/locus_coeruleus/locus_coeruleus.R +++ b/data/locus_coeruleus/locus_coeruleus.R @@ -60,32 +60,25 @@ directory_list <- c() # Write coordinates.tsv, observations.tsv, features.tsv, counts.mtx and labels.tsv for (lc in LC_samples){ + print(lc) dir <- paste0(out_dir, "/", lc) - # Please check this again beause I had to flip the coordinates to get the pictures as seen in: + # TODO Please check this again beause I had to flip the coordinates to get the pictures as seen in: # https://libd.shinyapps.io/locus-c_Visium/. See ggplot below. coords_subset <- coords[which(spe@colData$sample_id == lc),] write.table(coords_subset, file = paste0(dir, "/coordinates.tsv"), col.names = NA, sep = "\t", quote = FALSE, row.names = TRUE) - - # Create the scatter plot - # plot_df <- coords[which(spe@colData$sample_id == lc),] - # plot_df$label <- labels - # plot_df - # ggplot(plot_df, aes(x = x, y = y, color=label)) + - # geom_point() + - # coord_fixed() + # Keep aspect ratio equal - # scale_y_reverse() # Flip the y-axis - + # Count matrix has rows = genes/features and cols = cells/observations - # Has the header %%MatrixMarket matrix coordinate integer general + # TODO Has the header %%MatrixMarket matrix coordinate integer general counts_subset <- counts[,which(counts_lc == lc)] if (lc == "Br6522_LC_1_round1" || lc == "Br6522_LC_2_round1"){ - split_lc <- unlist(strsplit(as.character(LC_samples[1]), "_"))[1:3] + split_lc <- unlist(strsplit(as.character(lc), "_"))[1:3] correct_lc <- paste(split_lc, collapse = "_") counts_subset <- counts[,which(counts_lc == correct_lc)] } - t(counts_subset) + # Transpose to have rows = cells/observations + counts_subset <- t(counts_subset) writeMM(counts_subset, file = paste0(dir, "/counts.mtx")) # Write observations.tsv @@ -97,6 +90,16 @@ for (lc in LC_samples){ labels[which(labels == FALSE)] <- "non_LC" write.table(labels, file = paste0(dir, "/labels.tsv"), col.names = NA, sep = "\t", quote = FALSE) + # TODO #Create the scatter plot + # library(ggplot2) + # plot_df <- coords[which(spe@colData$sample_id == lc),] + # plot_df$label <- labels + # plot_df + # ggplot(plot_df, aes(x = x, y = y, color=label)) + + # geom_point() + + # coord_fixed() + # Keep aspect ratio equal + # scale_y_reverse() # Flip the y-axis + # Fill metadata patient_list <- c(patient_list, as.character(unique(spe@colData[which(spe@colData$sample_id == lc),]$donor_id))) sample_list <- c(sample_list, lc) From c14d5e23b9840e17dd19b17b71a910322ab3a7dd Mon Sep 17 00:00:00 2001 From: f641l Date: Tue, 13 Feb 2024 13:37:44 +0100 Subject: [PATCH 3/6] Bugfix with inconsitent names --- data/locus_coeruleus/locus_coeruleus.R | 35 ++++++++++++++++++-------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/data/locus_coeruleus/locus_coeruleus.R b/data/locus_coeruleus/locus_coeruleus.R index f34ede26..402d7e4b 100644 --- a/data/locus_coeruleus/locus_coeruleus.R +++ b/data/locus_coeruleus/locus_coeruleus.R @@ -40,11 +40,30 @@ coords <- as.data.frame(spe@int_colData$spatialCoords) colnames(coords) <- c('x', 'y') counts <- spe@assays@data$counts -counts_lc <- unlist(lapply(colnames(counts), function(x){paste(unlist(strsplit(x, "_"))[1:3], collapse="_")})) -observations_lc <- unlist(lapply(colnames(counts), function(x){paste(unlist(strsplit(x, "_"))[4], collapse="_")})) + +counts_func <- function(x){ + fields <- unlist(strsplit(x, "_")) + if ( length(fields) == 4 ){ + return(paste(fields[1:3], collapse = "_")) + } else { + return(paste(fields[1:4], collapse = "_")) + } +} +counts_lc <- unlist(lapply(colnames(counts), counts_func)) + +obs_func <- function(x){ + fields <- unlist(strsplit(x, "_")) + if ( length(fields) == 4 ){ + return(fields[4]) + } else{ + return(fields[5]) + } +} +observations_lc <- unlist(lapply(colnames(counts), obs_func)) LC_samples <- unique(spe@colData$sample_id) +out_dir <- 'out_dir' for ( dir in LC_samples ){ dir <- paste0(out_dir, "/", dir) if ( dir.exists(dir) == FALSE ){ @@ -60,28 +79,22 @@ directory_list <- c() # Write coordinates.tsv, observations.tsv, features.tsv, counts.mtx and labels.tsv for (lc in LC_samples){ - + print(lc) dir <- paste0(out_dir, "/", lc) - # TODO Please check this again beause I had to flip the coordinates to get the pictures as seen in: + # TODO Please check this again beause I had to flip the coordinates to get the pictures as seen in: # https://libd.shinyapps.io/locus-c_Visium/. See ggplot below. coords_subset <- coords[which(spe@colData$sample_id == lc),] write.table(coords_subset, file = paste0(dir, "/coordinates.tsv"), col.names = NA, sep = "\t", quote = FALSE, row.names = TRUE) # Count matrix has rows = genes/features and cols = cells/observations - # TODO Has the header %%MatrixMarket matrix coordinate integer general counts_subset <- counts[,which(counts_lc == lc)] - if (lc == "Br6522_LC_1_round1" || lc == "Br6522_LC_2_round1"){ - split_lc <- unlist(strsplit(as.character(lc), "_"))[1:3] - correct_lc <- paste(split_lc, collapse = "_") - counts_subset <- counts[,which(counts_lc == correct_lc)] - } + # Transpose to have rows = cells/observations counts_subset <- t(counts_subset) writeMM(counts_subset, file = paste0(dir, "/counts.mtx")) - # Write observations.tsv observations_subset <- observations_lc[which(counts_lc == lc)] write.table(observations_subset, file = paste0(dir, "/observations.tsv"), col.names = NA, sep = "\t", quote = FALSE) From d8dcd4a75c9219790d95458f6725086c31c22852 Mon Sep 17 00:00:00 2001 From: f641l Date: Tue, 13 Feb 2024 13:38:01 +0100 Subject: [PATCH 4/6] changing version to fix conda recipe issue --- data/locus_coeruleus/locus_coeruleus.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data/locus_coeruleus/locus_coeruleus.yml b/data/locus_coeruleus/locus_coeruleus.yml index f78bf92c..1ebbed6d 100644 --- a/data/locus_coeruleus/locus_coeruleus.yml +++ b/data/locus_coeruleus/locus_coeruleus.yml @@ -3,6 +3,6 @@ channels: - bioconda dependencies: - r-base=4.3.1 - - bioconductor-weberdivechalcdata=1.2.0 + - bioconductor-weberdivechalcdata=1.4.0 - r-optparse=1.7.3 - r-matrix=1.6.1 \ No newline at end of file From 4171e6901d0d1bba7a775cbdc7304c04767a4722 Mon Sep 17 00:00:00 2001 From: f641l Date: Tue, 13 Feb 2024 17:18:28 +0100 Subject: [PATCH 5/6] Built in review --- data/locus_coeruleus/locus_coeruleus.R | 37 +++++++++----------------- 1 file changed, 12 insertions(+), 25 deletions(-) diff --git a/data/locus_coeruleus/locus_coeruleus.R b/data/locus_coeruleus/locus_coeruleus.R index 402d7e4b..701f5e13 100644 --- a/data/locus_coeruleus/locus_coeruleus.R +++ b/data/locus_coeruleus/locus_coeruleus.R @@ -38,6 +38,7 @@ spe <- WeberDivechaLCdata_Visium() coords <- as.data.frame(spe@int_colData$spatialCoords) colnames(coords) <- c('x', 'y') +coords_rownames <- rownames(spe@int_colData$spatialCoords) counts <- spe@assays@data$counts @@ -51,16 +52,6 @@ counts_func <- function(x){ } counts_lc <- unlist(lapply(colnames(counts), counts_func)) -obs_func <- function(x){ - fields <- unlist(strsplit(x, "_")) - if ( length(fields) == 4 ){ - return(fields[4]) - } else{ - return(fields[5]) - } -} -observations_lc <- unlist(lapply(colnames(counts), obs_func)) - LC_samples <- unique(spe@colData$sample_id) out_dir <- 'out_dir' @@ -82,9 +73,10 @@ for (lc in LC_samples){ print(lc) dir <- paste0(out_dir, "/", lc) - # TODO Please check this again beause I had to flip the coordinates to get the pictures as seen in: - # https://libd.shinyapps.io/locus-c_Visium/. See ggplot below. + + # Write coordinates.tsv coords_subset <- coords[which(spe@colData$sample_id == lc),] + rownames(coords_subset) <- coords_rownames[which(spe@colData$sample_id == lc)] write.table(coords_subset, file = paste0(dir, "/coordinates.tsv"), col.names = NA, sep = "\t", quote = FALSE, row.names = TRUE) @@ -95,23 +87,17 @@ for (lc in LC_samples){ counts_subset <- t(counts_subset) writeMM(counts_subset, file = paste0(dir, "/counts.mtx")) - observations_subset <- observations_lc[which(counts_lc == lc)] + observations_subset <- spe@colData[which(counts_lc == lc),] + rownames(observations_subset) <- lapply(rownames(observations_subset), function(x){tail(unlist(strsplit(x,"_")),1)}) write.table(observations_subset, file = paste0(dir, "/observations.tsv"), col.names = NA, sep = "\t", quote = FALSE) labels <- spe@colData$annot_region[which(spe@colData$sample_id == lc)] labels[which(labels == TRUE)] <- "LC" labels[which(labels == FALSE)] <- "non_LC" - write.table(labels, file = paste0(dir, "/labels.tsv"), col.names = NA, sep = "\t", quote = FALSE) - - # TODO #Create the scatter plot - # library(ggplot2) - # plot_df <- coords[which(spe@colData$sample_id == lc),] - # plot_df$label <- labels - # plot_df - # ggplot(plot_df, aes(x = x, y = y, color=label)) + - # geom_point() + - # coord_fixed() + # Keep aspect ratio equal - # scale_y_reverse() # Flip the y-axis + + labels_df <- data.frame(label = labels) + rownames(labels_df) <- rownames(observations_subset) + write.table(labels_df, file = paste0(dir, "/labels.tsv"), col.names = NA, sep = "\t", quote = FALSE) # Fill metadata patient_list <- c(patient_list, as.character(unique(spe@colData[which(spe@colData$sample_id == lc),]$donor_id))) @@ -119,7 +105,8 @@ for (lc in LC_samples){ directory_list <- c(directory_list, dir) # Write features.tsv - features <- rownames(spe@assays@data$counts) + features <- as.data.frame(spe@rowRanges) + rownames(features) <- spe@rowRanges$gene_id write.table(features, file = paste0(dir,"/features.tsv"), col.names = NA, sep = "\t", quote = FALSE) } From 9f43a0345e46ed5e5a367020e12b330a1e71e5d6 Mon Sep 17 00:00:00 2001 From: f641l Date: Wed, 14 Feb 2024 12:59:18 +0100 Subject: [PATCH 6/6] Remove unused variable and fix directory existence check --- data/locus_coeruleus/locus_coeruleus.R | 1 - 1 file changed, 1 deletion(-) diff --git a/data/locus_coeruleus/locus_coeruleus.R b/data/locus_coeruleus/locus_coeruleus.R index 701f5e13..712ce8b6 100644 --- a/data/locus_coeruleus/locus_coeruleus.R +++ b/data/locus_coeruleus/locus_coeruleus.R @@ -54,7 +54,6 @@ counts_lc <- unlist(lapply(colnames(counts), counts_func)) LC_samples <- unique(spe@colData$sample_id) -out_dir <- 'out_dir' for ( dir in LC_samples ){ dir <- paste0(out_dir, "/", dir) if ( dir.exists(dir) == FALSE ){