Skip to content

Commit

Permalink
feat: first steps towards VAF stratification
Browse files Browse the repository at this point in the history
  • Loading branch information
famosab committed Apr 12, 2024
1 parent 77ee639 commit 4240db7
Showing 1 changed file with 35 additions and 11 deletions.
46 changes: 35 additions & 11 deletions workflow/scripts/calc-precision-recall.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,33 +7,56 @@
from abc import ABC, abstractmethod
from enum import Enum
import pandas as pd
import numpy as np
import pysam

from common.classification import CompareExactGenotype, CompareExistence, Class


class Classifications:
def __init__(self, comparator):
self.tp_query = 0
self.tp_truth = 0
self.fn = 0
self.fp = 0
self.comparator = comparator
def __init__(self, comparator, vaf_fields):
if vaf_fields[0] is None or vaf_fields[1] is None:
self.tp_query = 0
self.tp_truth = 0
self.fn = 0
self.fp = 0
self.comparator = comparator
else:
self.tp_query = np.array([]) # 0-100%
self.tp_truth = 0
self.fn = 0
self.fp = 0

def increment_counter(self, counter, vaf):
if vaf is None:
counter += 1
else:
# 10 equally sized bins
bin = int(vaf*10)
counter[bin] += 1

def register(self, record):
for c in self.comparator.classify(record):
# TODO: depending on case, fetch VAF from truth or query record (FP: from query record, field configurable by callset (e.g. FORMAT/AF, INFO/AF, ...)
# for truth record, field configurable by benchmark preset (same syntax as above)
# increment counters for bins, bins given to constructor as list of tuples or some numpy equivalent.
# Default: None. If no VAF field given for either truth or callset, don't bin at all.
# Default: None. If no VAF field given for either truth or callset, don't bin at all.
if c.cls is Class.TP_truth:
self.tp_truth += 1
vaf = truth.samples[record.name]["AF"] # still needs to be implemented
self.increment_counter(self.tp_truth, vaf)
# self.tp_truth += 1
elif c.cls is Class.TP_query:
self.tp_query += 1
vaf = truth.samples[record.name]["AF"] # still needs to be implemented
self.increment_counter(self.tp_query, vaf)
# self.tp_query += 1
elif c.cls is Class.FN:
self.fn += 1
vaf = truth.samples[record.name]["AF"] # still needs to be implemented
self.increment_counter(self.fn, vaf)
# self.fn += 1
elif c.cls is Class.FP:
self.fp += 1
vaf = record.samples[0]["AF"] # only works for FORMAT field
self.increment_counter(self.fp, vaf)
# self.fp += 1
else:
assert False, "unexpected case"

Expand Down Expand Up @@ -93,6 +116,7 @@ def collect_results(vartype):
]
]

snakemake.params.vaf_fields

assert snakemake.wildcards.vartype in ["snvs", "indels"]
vartype = "SNV" if snakemake.wildcards.vartype == "snvs" else "INDEL"
Expand Down

0 comments on commit 4240db7

Please sign in to comment.