Skip to content

Commit

Permalink
Merge branch 'dev' into 'master'
Browse files Browse the repository at this point in the history
Merge for release 0.5.4

See merge request tron/easyquant!26
  • Loading branch information
patricksorn committed Jul 1, 2024
2 parents cb04b14 + d8bb1d3 commit 96da140
Show file tree
Hide file tree
Showing 6 changed files with 62 additions and 13 deletions.
15 changes: 14 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -212,4 +212,17 @@ Hint: This is just an example to illustrate the design of the table. Results may
- **within_interval** reads mapping fully onto the interval
- **coverage_perc** percentual coverage of the interval by aligned reads
- **coverage_mean** average coverage per base for the interval (fold coverage)
- **coverage_median** median coverage per base for the interval
- **coverage_median** median coverage per base for the interval


### Things to consider

When choosing the aligner you have to take into account that there are several differences among them:

- STAR:
- end-to-end alignment with no soft-clipping
- very slow for small reference sequences
- several available alignment parameters to optimize your results, which can be used while starting the pipeline
- bowtie2:
- end-to-end alignment might lead to insertions where the context sequence starts/ends
- faster than STAR for short reference sequences (index creation parameters are calculated automatically)
4 changes: 2 additions & 2 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ channels:
- conda-forge
- defaults
dependencies:
- python=3.7
- bowtie2=2.4.4
- python=3.8
- bowtie2=2.5.3
- bwa=0.7.17
- samtools=1.9
- pysam=0.21.0
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "bp_quant"
version = "0.5.3"
version = "0.5.4"
authors = [
{name = "TRON - Translational Oncology at the University Medical Center of the Johannes Gutenberg University Mainz", email = "[email protected]"},
]
Expand Down
45 changes: 38 additions & 7 deletions src/bp_quant/requantify.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,15 @@
logger = logging.getLogger(__name__)


def get_aligner(bam_file):
"""
Uses pysam to detect the aligner used to create the input BAM file
"""
header_dict = pysam.AlignmentFile(bam_file, "rb").header.to_dict()
aligner = header_dict["PG"][0]["ID"].lower()
return aligner


def perc_true(lst):
n = len(lst)
num_true = sum(1 for val in lst if val > 0)
Expand Down Expand Up @@ -117,6 +126,7 @@ class Quantification(object):
def __init__(self, seq_table_file, bam_file, output_path, bp_dist, allow_mismatches, interval_mode):
self.seq_table_file = seq_table_file
self.bam_file = bam_file
self.aligner = get_aligner(bam_file)
self.output_path = os.path.abspath(output_path)
self.quant_file = os.path.join(output_path, "quantification.tsv")
self.reads_file = os.path.join(output_path, "read_info.tsv.gz")
Expand Down Expand Up @@ -186,9 +196,7 @@ def parse_alignment(self):
missing_refs = {}
r1 = None
r2 = None

for read in bam.fetch():

if read.flag > 511:
continue
# Handle missing reference sequences which occur in SAM/BAM
Expand All @@ -197,10 +205,31 @@ def parse_alignment(self):
if read.reference_name not in missing_refs:
missing_refs[read.reference_name] = 0
missing_refs[read.reference_name] += 1
if not r1:
r1 = read
elif r1 and not r2:
r2 = read
# Bowtie reports alignments in the following order,
# therefore the following if statement prevents
# mismatching issues for read pairs where
# there are secondary alignments:
# R1
# R1_secondary_1
# R1_secondary_2
# R1_secondary_3
# R2
# ...
# Secondary alignments will be excluded from the matching process
# and unmapped mates will be simulated as they are not reported
# within the bowtie2 alignment
if read.is_secondary and self.aligner == "bowtie2":
r1_sec = read
r2_sec = pysam.AlignedSegment(bam.header)
r2_sec.reference_name = r1_sec.reference_name
r2_sec.query_name = r1_sec.query_name
r2_sec.is_unmapped = True
self.quantify(r1_sec, r2_sec)
else:
if not r1:
r1 = read
elif r1 and not r2:
r2 = read


if r1 and r2:
Expand All @@ -210,11 +239,13 @@ def parse_alignment(self):
# and therefore needs more relaxed clause
if not (r1.is_unmapped and r2.is_unmapped or
r1.query_name != r2.query_name):

if (r1.reference_name == r2.reference_name or
r1.is_unmapped and not r2.is_unmapped or
not r1.is_unmapped and r2.is_unmapped):
self.quantify(r1, r2)
elif r1.query_name != r2.query_name:
print("MISMATCHING QUERY NAME!", r1, r2)
break
r1 = None
r2 = None
logger.info("Quantification done.")
Expand Down
2 changes: 1 addition & 1 deletion src/bp_quant/version.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
version_info = (0, 5, 3)
version_info = (0, 5, 4)
version = '.'.join(str(c) for c in version_info)
7 changes: 6 additions & 1 deletion tests/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,16 @@

SEQ_TABLE_FILE = os.path.join("example_data", "CLDN18_Context_seq.csv")

from bp_quant.requantify import perc_true, get_seq_to_pos, classify_read
from bp_quant.requantify import get_aligner, perc_true, get_seq_to_pos, classify_read


class TestRequantify(unittest.TestCase):

def test_get_aligner(self):
bam_file = os.path.join("example_data", "example_rna-seq.bam")
self.assertEqual(get_aligner(bam_file), "star")


def test_perc_true(self):
data = [4, 3, 0, 5, 9, 0, 1, 0, 0, 0]
self.assertEqual(perc_true(data), 0.5)
Expand Down

0 comments on commit 96da140

Please sign in to comment.