Skip to content

Commit

Permalink
Merge pull request #138 from Sage-Bionetworks/update-table
Browse files Browse the repository at this point in the history
[GEN-1424] Make sure 'None' gets read in as a string
  • Loading branch information
thomasyu888 authored Jul 16, 2024
2 parents 2ee2d4a + 33f13d8 commit 2015d77
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 5 deletions.
11 changes: 7 additions & 4 deletions scripts/table_updates/update_data_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,15 +289,18 @@ def main():
with open(project_config) as config_file:
cohort_info = json.load(config_file)
logger.info("Read cohort information successful.")
config_file.close()

# get master table
# This is the internal tables with non redacted
table_id, condition = list(TABLE_INFO[table_type])
master_table = download_synapse_table(syn, table_id, condition)
# This contains external tables with redacted
TABLE_INFO["redacted"] = ('syn21446696',"table_type='data' and double_curated is false")

# download data files
# TODO: find the cohort that has new data

# download data files
# TODO: find the cohort that has new data
# This is a mapping to all the intake data. e.g: ProstateBPCIntake_data
# found here: https://www.synapse.org/Synapse:syn23286928
cohort_info_selected = cohort_info[table_type]
cohort_data_list = []
for cohort in cohort_info_selected:
Expand Down
3 changes: 2 additions & 1 deletion scripts/table_updates/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,8 @@ def get_data(syn, label_data_id, cohort):
Returns:
Dataframe: label data
"""
label_data = pandas.read_csv(syn.get(label_data_id).path,low_memory=False)
na_values = [" ", "#N/A", "#N/A N/A", "#NA", "-1.#IND", "-1.#QNAN", "-NaN", "-nan", "1.#IND", "1.#QNAN", "<NA>", "N/A", "NA", "NULL", "NaN", "n/a", "nan", "null"]
label_data = pandas.read_csv(syn.get(label_data_id).path, low_memory=False, na_values=na_values, keep_default_na=False)
label_data['cohort'] = cohort
return(label_data)

Expand Down

0 comments on commit 2015d77

Please sign in to comment.