-
Notifications
You must be signed in to change notification settings - Fork 0
/
reporting_lineages.smk
53 lines (49 loc) · 2.44 KB
/
reporting_lineages.smk
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
def report_lineages_divergence_input(w):
loci = ["IGH"]
for attrs in ANTIBODY_LINEAGES.values():
if attrs["Lineage"] == w.antibody_lineage:
loci.append(attrs["LightLocus"])
subject = attrs["Subject"]
break
else:
raise ValueError(f"Can't find lineage {w.antibody_lineage}")
targets = {
"members": expand(
"analysis/reporting/sonar/{{antibody_lineage}}.{chain_type}/igphyml_collected.csv",
chain_type = [{"IGH": "gamma", "IGK": "kappa", "IGL": "lambda"}[loc] for loc in loci]),
"mabs": "analysis/reporting/by-lineage/{antibody_lineage}.mabs.csv",
"refs": expand("analysis/germline/{subject}.{locus}/{segment}.fasta",
subject=subject, locus=loci, segment=["V", "D", "J"])}
return targets
def report_lineages_divergence_param_refs(w, input):
# to condense V/D/J to just parent dirs
return sorted(list({Path(path).parent for path in input.refs}))
rule report_lineages_divergence_plot:
output: "analysis/reporting/by-lineage/{antibody_lineage}.divergence.pdf"
input: "analysis/reporting/by-lineage/{antibody_lineage}.divergence.csv"
shell: "germ_div.py -Q {input} -o {output}"
rule report_lineages_divergence:
output: "analysis/reporting/by-lineage/{antibody_lineage}.divergence.csv"
input: unpack(report_lineages_divergence_input)
params:
refs=report_lineages_divergence_param_refs
shell: "germ_div.py -S rhesus -r {params.refs} -Q {input.members} isol={input.mabs} -G Bulk isol=Isolate -o {output}"
rule report_lineages_mabs:
output: temp("analysis/reporting/by-lineage/{antibody_lineage}.mabs.csv")
run:
with open(output[0], "w") as f_out:
writer = DictWriter(
f_out,
fieldnames=["timepoint", "chain", "sequence_id", "sequence"],
lineterminator="\n")
writer.writeheader()
for attrs in ANTIBODY_ISOLATES.values():
if attrs["Lineage"] == wildcards.antibody_lineage:
for chain in ["heavy", "light"]:
seq = attrs[chain.capitalize() + "Seq"]
if seq:
writer.writerow({
"timepoint": attrs["Timepoint"],
"chain": chain,
"sequence_id": attrs["Isolate"] + f"_{chain}",
"sequence": seq})