-
Notifications
You must be signed in to change notification settings - Fork 2
/
run_pipeline_mapping.py
executable file
·92 lines (74 loc) · 3.49 KB
/
run_pipeline_mapping.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import mapping
import pre_processing
import gatk_pre_processing
import qc_trim
from utils import helpers
import os
def callmapping(var_maptype, var_sampletype, working_directory, library, threads, var_gatk_tools, issplitchr, trim,
middle_files="Yes"):
mt = var_maptype
if middle_files == "Yes":
mdf_keep = True
else:
mdf_keep = False
st = var_sampletype
wd = working_directory
if wd[-1] == "/" or wd[-1] == "\\":
wd = wd[:-1]
lb = library
th = threads
gt = var_gatk_tools
sc = issplitchr
tr = trim
os.chdir(wd)
fastq_list = helpers.get_fastq()
info_dict = helpers.get_info(st, fastq_list)
if tr == "Yes":
if not os.path.exists(wd + "/QC"):
qc = qc_trim.QC(wd, st, th, fastq_list, info_dict, mt)
qc.run_qc()
else:
if os.path.exists(wd+"/QC"):
tr = "Yes"
mapping_step = mapping.Mapping(working_directory=wd, map_type=mt, sample_type=st, library_matching_id=lb,
thrds=th, trim=tr)
mapping_files = mapping_step.mapping()
#mapping_files = ["SortedBAM_Bwa_NOB01_AACGTGA_L001_001.bam"]
if not mdf_keep:
helpers.delete_files_from_folder(wd, mt, "Mapping", mapping_files)
print("---------------------------")
print(mapping_files)
pre_processing_step = pre_processing.PreProcessing(working_directory=wd, map_type=mt, sample_type=st,
library_matching_id=lb, thrds=th, issplitchr=sc)
print("---------------------------")
print(fastq_list)
print(info_dict)
gatk_file_list = []
if gt == "Yes":
if issplitchr != "No":
mark_duplicate_file = pre_processing_step.pre_process(info_dict, mapping_files)
for file in mark_duplicate_file:
gatk_pre_processing_step = gatk_pre_processing.GatkPreProcessing(working_directory=wd, map_type=mt,
sample_type=st, library_matching_id=lb,
thrds=th)
return_files = gatk_pre_processing_step.run_gatks4(file)
print(return_files)
gatk_file_list.append(return_files)
print(gatk_file_list)
else:
mark_duplicate_file = pre_processing_step.pre_process(info_dict, mapping_files)
gatk_pre_processing_step = gatk_pre_processing.GatkPreProcessing(working_directory=wd, map_type=mt,
sample_type=st, library_matching_id=lb,
thrds=th)
gatk_files = gatk_pre_processing_step.run_gatks4(mark_duplicate_file)
if not mdf_keep:
helpers.delete_files_from_folder(wd, mt, "PreProcess", gatk_files)
else:
mark_duplicate_file = pre_processing_step.pre_process(info_dict, mapping_files)
if not mdf_keep:
helpers.delete_files_from_folder(wd, mt, "PreProcess", mark_duplicate_file)
return True
if __name__ == "__main__":
callmapping(working_directory="/media/bioinformaticslab/369ca485-b3f2-4f04-bbfb-8657aad7669e/yunusemrecebeci/samples/Sample_NOB74",
var_maptype="Bwa", var_sampletype="Tumor", library="1", threads="4", var_gatk_tools="Yes",
issplitchr="No", trim="Yes", middle_files="No")