Merge pull request #99 from SpatialHackathon/metric_completeness_kbih…

…arie Metric Completeness Score
SpatialHackathon · Dec 12, 2023 · 72f30cd · 72f30cd
2 parents 656c756 + 145426d
commit 72f30cd
Show file tree

Hide file tree

Showing 2 changed files with 69 additions and 0 deletions.
diff --git a/metric/Completeness/Completeness.py b/metric/Completeness/Completeness.py
@@ -0,0 +1,63 @@
+#!/usr/bin/env python
+
+# Author_and_contribution: Niklas Mueller-Boetticher; created template
+# Author_and_contribution: Kirti Biharie; implemented completeness score
+
+import argparse
+
+parser = argparse.ArgumentParser(description="Calculate Completeness Score (scikit-learn)")
+
+parser.add_argument(
+    "-l", "--labels", help="Labels from domain clustering.", required=True
+)
+parser.add_argument("-g", "--ground_truth", help="Groundtruth labels.", required=False)
+parser.add_argument(
+    "-e",
+    "--embedding",
+    help="Embedding of points in latent space. Potential usage for metrics without groundtruth.",
+    required=False,
+)
+parser.add_argument(
+    "-c",
+    "--config",
+    help="Optional config file (json) used to pass additional parameters.",
+    required=False,
+)  # format should be json
+parser.add_argument("-o", "--out_file", help="Output file.", required=True)
+
+args = parser.parse_args()
+
+# Use these filepaths as input
+label_file = args.labels
+
+if args.ground_truth is not None:
+    groundtruth_file = args.ground_truth
+if args.embedding is not None:
+    embedding_file = args.embedding
+if args.config is not None:
+    config_file = args.config
+
+
+## Your code goes here
+if args.ground_truth is None:
+    raise Exception("Groundtruth labels needed to calculate the Completeness Score")
+
+import pandas as pd
+import sklearn.metrics
+
+ground_truth = pd.read_table(groundtruth_file, index_col=0)["label"].astype("category").cat.codes
+labels = pd.read_table(label_file, index_col=0)["label"].astype("category").cat.codes
+
+common_index = labels.index.intersection(ground_truth.index)
+ground_truth = ground_truth.loc[common_index]
+labels = labels.loc[common_index]
+
+metric = sklearn.metrics.completeness_score(ground_truth, labels)
+
+## Write output
+from pathlib import Path
+
+Path(args.out_file).parent.mkdir(parents=True, exist_ok=True)
+
+with open(args.out_file, "w") as file:
+    file.write(f"{metric:.5e}\n")
diff --git a/metric/Completeness/Completeness.yml b/metric/Completeness/Completeness.yml
@@ -0,0 +1,6 @@
+channels:
+  - conda-forge
+dependencies:
+  - python=3.12.0
+  - pandas=2.1.1
+  - scikit-learn=1.3.2