diff --git a/metric/LISI/LISI.r b/metric/LISI/LISI.r new file mode 100644 index 00000000..65b98791 --- /dev/null +++ b/metric/LISI/LISI.r @@ -0,0 +1,90 @@ +#!/usr/bin/env Rscript + +# Author_and_contribution: Niklas Mueller-Boetticher; created template +# Author_and_contribution: Kirti Biharie; implemented LISI score + +suppressPackageStartupMessages(library(optparse)) + +option_list <- list( + make_option( + c("-l", "--labels"), + type = "character", default = NULL, + help = "Labels from domain clustering." + ), + make_option( + c("-g", "--ground_truth"), + type = "character", default = NA, + help = "Groundtruth labels." + ), + make_option( + c("-e", "--embedding"), + type = "character", default = NA, + help = "Embedding of points in latent space. Potential usage for metrics without groundtruth." + ), + # format should be json + make_option( + c("-c", "--config"), + type = "character", default = NA, + help = "Optional config file (json) used to pass additional parameters." + ), + make_option( + c("-o", "--out_file"), + type = "character", default = NULL, + help = "Output file." + ) +) + +description <- "Calculate LISI Score" + +opt_parser <- OptionParser( + usage = description, + option_list = option_list +) +opt <- parse_args(opt_parser) + +# Use these filepaths as input +label_file <- opt$labels + +if (!is.na(opt$ground_truth)) { + groundtruth_file <- opt$ground_truth +} +if (!is.na(opt$embedding)) { + embedding_file <- opt$embedding +} +if (!is.na(opt$config)) { + config_file <- opt$config +} + + +## Your code goes here +library(lisi) +library(rjson) + +if (is.na(opt$ground_truth)) { + stop("Groundtruth labels needed to calculate the LISI Score") +} + +if (is.na(opt$embedding)) { + stop("Embeddings needed to calculate the LISI Score") +} + +if (is.na(opt$config)) { + stop("Config file not provided") +} + +ground_truth <- read.delim(groundtruth_file, sep="\t", row.names=1) +embeddings <- read.delim(embedding_file, sep="\t", row.names=1) +config <- fromJSON(file=config_file) + +common_index <- intersect(rownames(ground_truth), rownames(embeddings)) +ground_truth <- ground_truth[common_index,,drop=FALSE] +embeddings <- embeddings[common_index,,drop=FALSE] + +metric <- mean(compute_lisi(embeddings, ground_truth, "label", perplexity=config$perplexity)[,"label"]) + +## Write output +outfile <- file(opt$out_file) +dir.create(dirname(opt$out_file), showWarnings = FALSE, recursive = TRUE) + +writeLines(format(metric, digits = 6, scientific = TRUE), outfile) +close(outfile) diff --git a/metric/LISI/LISI.yml b/metric/LISI/LISI.yml new file mode 100644 index 00000000..709c2c18 --- /dev/null +++ b/metric/LISI/LISI.yml @@ -0,0 +1,8 @@ +channels: + - conda-forge + - bioconda +dependencies: + - r-base=4.3.1 + - r-optparse=1.7.3 + - r-lisi=1.0 + - r-rjson=0.2.21 \ No newline at end of file diff --git a/metric/LISI/config/config_1.json b/metric/LISI/config/config_1.json new file mode 100644 index 00000000..db62fa73 --- /dev/null +++ b/metric/LISI/config/config_1.json @@ -0,0 +1 @@ +{"perplexity": 15} \ No newline at end of file