Skip to content

Commit

Permalink
Merge pull request #309 from pbashyal-nmdp/308_fix_map_drb345_to_drbx
Browse files Browse the repository at this point in the history
Fix DRBX mapping for`map_drb345_to_drbx`
  • Loading branch information
mmaiers-nmdp authored Mar 4, 2024
2 parents 2fbe8fa + a2797c5 commit 7082a6f
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 27 deletions.
57 changes: 43 additions & 14 deletions extras/reduce_conf.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,12 @@
"r_drb1_typ2",
"r_dpb1_typ1",
"r_dpb1_typ2",
"r_drb3_typ1",
"r_drb3_typ2",
"r_drb4_typ1",
"r_drb4_typ2",
"r_drb5_typ1",
"r_drb5_typ2",
"d_a_typ1",
"d_a_typ2",
"d_b_typ1",
Expand All @@ -23,7 +29,13 @@
"d_drb1_typ1",
"d_drb1_typ2",
"d_dpb1_typ1",
"d_dpb1_typ2"
"d_dpb1_typ2",
"d_drb3_typ1",
"d_drb3_typ2",
"d_drb4_typ1",
"d_drb4_typ2",
"d_drb5_typ1",
"d_drb5_typ2"
],
"locus_column_mapping": {
"recipient": {
Expand All @@ -39,13 +51,25 @@
"r_c_typ1",
"r_c_typ2"
],
"dqb1": [
"r_dpb1_typ1",
"r_dpb1_typ2"
],
"drb1": [
"r_drb1_typ1",
"r_drb1_typ2"
],
"dqb1": [
"r_dpb1_typ1",
"r_dpb1_typ2"
"drb3": [
"r_drb3_typ1",
"r_drb3_typ2"
],
"drb4": [
"r_drb4_typ1",
"r_drb4_typ2"
],
"drb5": [
"r_drb5_typ1",
"r_drb5_typ2"
]
},
"donor": {
Expand All @@ -61,20 +85,30 @@
"d_c_typ1",
"d_c_typ2"
],
"dqb1": [
"d_dpb1_typ1",
"d_dpb1_typ2"
],
"drb1": [
"d_drb1_typ1",
"d_drb1_typ2"
],
"dqb1": [
"d_dpb1_typ1",
"d_dpb1_typ2"
"drb3": [
"d_drb3_typ1",
"d_drb3_typ2"
],
"drb4": [
"d_drb4_typ1",
"d_drb4_typ2"
],
"drb5": [
"d_drb5_typ1",
"d_drb5_typ2"
]
}
},

"redux_type": "lgx",
"redux_cache_size": 1000,

"reduce_serology": false,
"reduce_v2": true,
"convert_v2_to_v3": false,
Expand All @@ -84,17 +118,12 @@
"reduce_XX": false,
"reduce_MAC": true,
"map_drb345_to_drbx": false,

"locus_in_allele_name": true,
"keep_locus_in_allele_name": true,

"new_column_for_redux": true,
"reduced_column_prefix": "reduced_",

"generate_glstring": true,

"output_file_format": "csv",
"apply_compression": "gzip",

"verbose_log": true
}
8 changes: 4 additions & 4 deletions extras/sample.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
rid,did,r_a_typ1,r_a_typ2,r_b_typ1,r_b_typ2,r_c_typ1,r_c_typ2,r_drb1_typ1,r_drb1_typ2,r_dpb1_typ1,r_dpb1_typ2,d_a_typ1,d_a_typ2,d_b_typ1,d_b_typ2,d_c_typ1,d_c_typ2,d_drb1_typ1,d_drb1_typ2,d_dpb1_typ1,d_dpb1_typ2
2110,123,A*01:AB,A*29:79,B*18:67,B*51:275,C*05:01:19,C*02:85:02,DRB1*03:03,DRB1*14:144,DPB1*193:01:01,DPB1*582:01:01,A*01:AB,A*29:79,B*18:67,B*51:275,C*05:01:19,C*02:85:02,DRB1*03:03,DRB1*14:144,DPB1*193:01:01,DPB1*582:01:01
2111,456,A*01:01:42,A*30:12:02,B*44:02:32,B*35:42,C*03:148,C*04:322,DRB1*13:01:16,DRB1*15:80N,DPB1*914:01:01,DPB1*278:01:01,A*01:01:42,A*30:12:02,B*44:02:32,B*35:42,C*03:148,C*04:322,DRB1*13:01:16,DRB1*15:80N,DPB1*914:01:01,DPB1*278:01:01
2113,789,A*02:247,A*03:227,B*15:570,B*07:02:01:17,C*16:01:10,C*06:102,DRB1*13:156,DRB1*14:167:01,DPB1*405:01:01,DPB1*479:01:01,A*02:247,A*03:227,B*15:570,B*07:02:01:17,C*16:01:10,C*06:102,DRB1*13:156,DRB1*14:167:01,DPB1*405:01:01,DPB1*479:01:01
rid,did,r_a_typ1,r_a_typ2,r_b_typ1,r_b_typ2,r_c_typ1,r_c_typ2,r_drb1_typ1,r_drb1_typ2,r_dpb1_typ1,r_dpb1_typ2,d_a_typ1,d_a_typ2,d_b_typ1,d_b_typ2,d_c_typ1,d_c_typ2,d_drb1_typ1,d_drb1_typ2,d_dpb1_typ1,d_dpb1_typ2,r_drb3_typ1,r_drb3_typ2,r_drb4_typ1,r_drb4_typ2,r_drb5_typ1,r_drb5_typ2,d_drb3_typ1,d_drb3_typ2,d_drb4_typ1,d_drb4_typ2,d_drb5_typ1,d_drb5_typ2
2110,123,A*01:AB,A*29:79,B*18:67,B*51:275,C*05:01:19,C*02:85:02,DRB1*03:03,DRB1*14:144,DPB1*193:01:01,DPB1*582:01:01,A*01:AB,A*29:79,B*18:67,B*51:275,C*05:01:19,C*02:85:02,DRB1*03:03,DRB1*14:144,DPB1*193:01:01,DPB1*582:01:01,DRB3*02:189,DRB3*03:09,NNNN,NNNN,NNNN,NNNN,NNNN,NNNN,NNNN,NNNN,DRB5*01:93,DRB5*02:02:01
2111,456,A*01:01:42,A*30:12:02,B*44:02:32,B*35:42,C*03:148,C*04:322,DRB1*13:01:16,DRB1*15:80N,DPB1*914:01:01,DPB1*278:01:01,A*01:01:42,A*30:12:02,B*44:02:32,B*35:42,C*03:148,C*04:322,DRB1*13:01:16,DRB1*15:80N,DPB1*914:01:01,DPB1*278:01:01,NNNN,NNNN,DRB4*01:53,DRB4*01:31,NNNN,NNNN,NNNN,NNNN,NNNN,NNNN,DRB5*01:102,DRB5*01:103
2113,789,A*02:247,A*03:227,B*15:570,B*07:02:01:17,C*16:01:10,C*06:102,DRB1*13:156,DRB1*14:167:01,DPB1*405:01:01,DPB1*479:01:01,A*02:247,A*03:227,B*15:570,B*07:02:01:17,C*16:01:10,C*06:102,DRB1*13:156,DRB1*14:167:01,DPB1*405:01:01,DPB1*479:01:01,NNNN,NNNN,DRB4*01:79,DRB4*01:119,NNNN,NNNN,DRB3*02:189,DRB3*03:09,NNNN,NNNN,NNNN,NNNN
24 changes: 15 additions & 9 deletions scripts/pyard-reduce-csv
Original file line number Diff line number Diff line change
Expand Up @@ -209,15 +209,21 @@ def reduce_locus_columns(df, ard_config, locus_column_mapping, verbose):
# New columns DRBX_1 and DRBX_2 are created
if ard_config.get("map_drb345_to_drbx"):
drbx_loci = ["DRB3", "DRB4", "DRB5"]
drbx_columns = [
col_name for col_name in df.columns if col_name.split("_")[1] in drbx_loci
]
if len(drbx_columns) == len(drbx_loci) * 2: # For Type1/Type2
locus_in_allele_name = ard_config["keep_locus_in_allele_name"]
df_drbx = df[drbx_columns].apply(
create_drbx, axis=1, args=(locus_in_allele_name,)
)
df["DRBX_1"], df["DRBX_2"] = zip(*df_drbx)
for subject in ard_config["locus_column_mapping"].keys():
subject_loci = ard_config["locus_column_mapping"][subject]
subject_drbs = []
for locus in ard_config["locus_column_mapping"][subject].keys():
if locus.upper() in drbx_loci:
subject_drbs.extend(subject_loci[locus])

# If all the DRBs are there
# ['DRB3_1', 'DRB3_2', 'DRB4_1', 'DRB4_2', 'DRB5_1', 'DRB5_2']
if len(subject_drbs) == 6:
locus_in_allele_name = ard_config["keep_locus_in_allele_name"]
df_drbx = df[subject_drbs].apply(
create_drbx, axis=1, args=(locus_in_allele_name,)
)
df[f"{subject}_DRBX_1"], df[f"{subject}_DRBX_2"] = zip(*df_drbx)

if ard_config.get("generate_glstring"):
for subject in locus_column_mapping:
Expand Down

0 comments on commit 7082a6f

Please sign in to comment.