forked from mdaya/topmed_imputation
-
Notifications
You must be signed in to change notification settings - Fork 1
/
fix_strands.sh
62 lines (53 loc) · 2 KB
/
fix_strands.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
#!/bin/bash
#Set arguments
if [ "$#" -eq "0" ]
then
echo "Usage: ${0##*/} <pre_qc_dir> <post_qc_dir> <code_dir> <chr>"
echo "Script uses output from TOPMed pre-imputation QC to fix strand"
echo "flips. If "chr" input = "all", then the script will create one"
echo "VCF file per chr. Otherwise, must be a single chr number '1',"
echo "'2', etc. Crossover is from hg19 to hg38, this script should"
echo "follow create_initial_input (with or without crossover)."
exit
fi
pre_qc_dir=$1
post_qc_dir=$2
code_dir=$3
chr=$4
#Get list of SNPs to flip
Rscript --vanilla ${code_dir}/get_strand_flip_snp_names.R $pre_qc_dir $post_qc_dir
#Create vcf files for uploading to imputation server for QC
#Note that the encoding for chromosome is e.g. chr22, not 22
process_chr() {
chr=$1
# Flip strands for strand flip & strand flip and allele switch
plink --bfile ${pre_qc_dir}/pre_qc \
--flip ${post_qc_dir}/tmp_flip.txt \
--chr $chr --make-bed --keep-allele-order \
--out ${post_qc_dir}/tmp_chr${chr}_flip
# Fix allele switches after slip
plink --bfile ${post_qc_dir}/tmp_chr${chr}_flip \
--a2-allele ${post_qc_dir}/tmp_a2-allele.txt \
--chr $chr --make-bed --keep-allele-order \
--out ${post_qc_dir}/tmp_chr${chr}_flip_switch
# Fix allele switches only
plink --bfile ${post_qc_dir}/tmp_chr${chr}_flip_switch \
--a2-allele ${post_qc_dir}/tmp_a2-allele_switch_only.txt \
--chr $chr --recode vcf --keep-allele-order \
--out ${post_qc_dir}/tmp_chr${chr}_flip_switch_both
vcf-sort ${post_qc_dir}/tmp_chr${chr}_flip_switch_both.vcf | \
sed -E 's/^([[:digit:]]+)/chr\1/' | \
bgzip -c > ${post_qc_dir}/chr${chr}_post_qc.vcf.gz
}
# If chr = "all", then create one VCF file per chr, otherwise
# chr must equal one chr number, so only make that VCF file
if [ "$chr" == "all" ]
then
for ((chr=1; chr<=22; chr++)); do
process_chr $chr
done
else
process_chr $chr
fi
#Cleanup
rm ${post_qc_dir}/tmp_*