Skip to content

Commit

Permalink
Merge pull request #99 from SpatialHackathon/metric_completeness_kbih…
Browse files Browse the repository at this point in the history
…arie

Metric Completeness Score
  • Loading branch information
niklasmueboe authored Dec 12, 2023
2 parents 656c756 + 145426d commit 72f30cd
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 0 deletions.
63 changes: 63 additions & 0 deletions metric/Completeness/Completeness.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
#!/usr/bin/env python

# Author_and_contribution: Niklas Mueller-Boetticher; created template
# Author_and_contribution: Kirti Biharie; implemented completeness score

import argparse

parser = argparse.ArgumentParser(description="Calculate Completeness Score (scikit-learn)")

parser.add_argument(
"-l", "--labels", help="Labels from domain clustering.", required=True
)
parser.add_argument("-g", "--ground_truth", help="Groundtruth labels.", required=False)
parser.add_argument(
"-e",
"--embedding",
help="Embedding of points in latent space. Potential usage for metrics without groundtruth.",
required=False,
)
parser.add_argument(
"-c",
"--config",
help="Optional config file (json) used to pass additional parameters.",
required=False,
) # format should be json
parser.add_argument("-o", "--out_file", help="Output file.", required=True)

args = parser.parse_args()

# Use these filepaths as input
label_file = args.labels

if args.ground_truth is not None:
groundtruth_file = args.ground_truth
if args.embedding is not None:
embedding_file = args.embedding
if args.config is not None:
config_file = args.config


## Your code goes here
if args.ground_truth is None:
raise Exception("Groundtruth labels needed to calculate the Completeness Score")

import pandas as pd
import sklearn.metrics

ground_truth = pd.read_table(groundtruth_file, index_col=0)["label"].astype("category").cat.codes
labels = pd.read_table(label_file, index_col=0)["label"].astype("category").cat.codes

common_index = labels.index.intersection(ground_truth.index)
ground_truth = ground_truth.loc[common_index]
labels = labels.loc[common_index]

metric = sklearn.metrics.completeness_score(ground_truth, labels)

## Write output
from pathlib import Path

Path(args.out_file).parent.mkdir(parents=True, exist_ok=True)

with open(args.out_file, "w") as file:
file.write(f"{metric:.5e}\n")
6 changes: 6 additions & 0 deletions metric/Completeness/Completeness.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
channels:
- conda-forge
dependencies:
- python=3.12.0
- pandas=2.1.1
- scikit-learn=1.3.2

0 comments on commit 72f30cd

Please sign in to comment.