Skip to content

Commit

Permalink
Updated tests, add CI hook
Browse files Browse the repository at this point in the history
  • Loading branch information
dogversioning committed Jul 30, 2024
1 parent e044069 commit 9205f62
Show file tree
Hide file tree
Showing 9 changed files with 123 additions and 103 deletions.
Empty file added .github/workflows/ci.yaml
Empty file.
76 changes: 38 additions & 38 deletions cumulus_library_opioid/manifest.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,44 +7,44 @@ file_names = [
"vocab/additional_rules_builder.py",
]

[sql_config]
file_names = [
"define_dx.sql",
"define_dx_sepsis.sql",
"define_dx_sud.sql",
"define_lab.sql",
"define_rx.sql",
"define_rx_buprenorphine.sql",
"define_rx_naloxone.sql",
"define_rx_opioid.sql",
"table_study_period.sql",
"table_dx.sql",
"table_dx_sepsis.sql",
"table_lab.sql",
"table_rx.sql",
"version.sql"
]
# [sql_config]
# file_names = [
# "define_dx.sql",
# "define_dx_sepsis.sql",
# "define_dx_sud.sql",
# "define_lab.sql",
# "define_rx.sql",
# "define_rx_buprenorphine.sql",
# "define_rx_naloxone.sql",
# "define_rx_opioid.sql",
# "table_study_period.sql",
# "table_dx.sql",
# "table_dx_sepsis.sql",
# "table_lab.sql",
# "table_rx.sql",
# "version.sql"
# ]

[counts_builder_config]
file_names = [
"counts.py"
]
# [counts_builder_config]
# file_names = [
# "counts.py"
# ]


[export_config]
export_list = [
"opioid__count_study_period_week",
"opioid__count_study_period_month",
"opioid__count_dx_sepsis_week",
"opioid__count_dx_sepsis_month",
"opioid__count_dx_month",
"opioid__count_dx_week",
"opioid__count_lab_month",
"opioid__count_lab_week",
"opioid__count_medicationrequest",
"opioid__count_rx",
"opioid__count_rx_opioid",
"opioid__count_rx_buprenorphine",
"opioid__count_rx_naloxone",
"opioid__meta_version",
]
# [export_config]
# export_list = [
# "opioid__count_study_period_week",
# "opioid__count_study_period_month",
# "opioid__count_dx_sepsis_week",
# "opioid__count_dx_sepsis_month",
# "opioid__count_dx_month",
# "opioid__count_dx_week",
# "opioid__count_lab_month",
# "opioid__count_lab_week",
# "opioid__count_medicationrequest",
# "opioid__count_rx",
# "opioid__count_rx_opioid",
# "opioid__count_rx_buprenorphine",
# "opioid__count_rx_naloxone",
# "opioid__meta_version",
# ]
1 change: 0 additions & 1 deletion cumulus_library_opioid/vocab/additional_rules_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,6 @@ def prepare_queries(
'r.rui',
'r.rel',
'r.rela',
'e.rela',
'r.str1',
'r.str2',
'r.keyword',
Expand Down
10 changes: 4 additions & 6 deletions cumulus_library_opioid/vocab/rxnorm_vsac_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,11 @@ def get_create_view_filter_by(
):
a_schema = a_schema or 'rxnorm.'
a_join_col = a_join_col or 'a.rxcui'
b_join_col = b_join_col or 'b.rxcui'
b_table = b_table or f'opioid__{steward}_vsac',
b_join_col = b_join_col or 'b.code'
b_table = b_table or f'opioid__{steward}_vsac'
join_clauses = join_clauses or [f"{a_join_col} = {b_join_col}"]
view_name = view_name or (
f'{manifest.get_study_prefix()}__{steward}_{a_table}'
)
view_name = view_name or f'{manifest.get_study_prefix()}__{steward}_{a_table}'


return base_templates.get_create_view_from_tables(
view_name=view_name,
Expand Down Expand Up @@ -111,4 +110,3 @@ def get_create_view_filter_by(
b_join_col='b.rxcui1',
)
)

123 changes: 71 additions & 52 deletions cumulus_library_opioid/vocab/static_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,56 +28,57 @@ class StaticBuilder(base_table_builder.BaseTableBuilder):
display_text = "Building static data tables..."
base_path = pathlib.Path(__file__).resolve().parent

tables = [ # noqa: RUF012
TableConfig(
file_path=base_path / "./common/keywords/keywords.tsv",
delimiter="\t",
table_name="keywords",
headers=["STR"],
dtypes={"STR": "str"},
parquet_types=["STRING"],
filtered_path=base_path / "./common/keywords/keywords.filtered.tsv",
),
TableConfig(
file_path=base_path / "./all_rxcui_str.RXNCONSO_curated.tsv",
delimiter="\t",
table_name="all_rxnconso_keywords",
headers=["RXCUI","STR","TTY","SAB","CODE","keyword","keyword_len"],
dtypes={"RXCUI":"str","STR":"str","TTY":"str","SAB":"str","CODE":"str","keyword":"str","keyword_len":"str"},
parquet_types=["STRING","STRING","STRING","STRING","STRING","STRING","STRING"],
),
TableConfig(
file_path=base_path / "./common/expand_rules/expand_rules.tsv",
delimiter="\t",
table_name="search_rules",
headers=[
"TTY1",
"RELA",
"TTY2",
"rule",
],
dtypes={"TTY1": "str", "RELA": "str", "TTY2": "str", "rule": "str"},
parquet_types=["STRING", "STRING", "STRING", "STRING", "BOOLEAN"],
ignore_header=True,
map_cols=[
{
"from": "rule",
"to": "include",
"map_dict": {"yes": True, "no": False},
}
],
),
# TODO: We should eventually replace this with a source derived from
# UMLS directly at some point
TableConfig(
file_path=base_path / "./common/umls/umls_tty.tsv",
delimiter="\t",
table_name="umls_tty",
headers=["TTY","TTY_STR"],
dtypes={"TTY": "str","TTY_STR": "str",},
parquet_types=["STRING", "STRING"],
),
]
def get_table_configs(self):
return [
TableConfig(
file_path=self.base_path / "./common/keywords/keywords.tsv",
delimiter="\t",
table_name="keywords",
headers=["STR"],
dtypes={"STR": "str"},
parquet_types=["STRING"],
filtered_path=self.base_path / "./common/keywords/keywords.filtered.tsv",
),
TableConfig(
file_path=self.base_path / "./all_rxcui_str.RXNCONSO_curated.tsv",
delimiter="\t",
table_name="all_rxnconso_keywords",
headers=["RXCUI","STR","TTY","SAB","CODE","keyword","keyword_len"],
dtypes={"RXCUI":"str","STR":"str","TTY":"str","SAB":"str","CODE":"str","keyword":"str","keyword_len":"str"},
parquet_types=["STRING","STRING","STRING","STRING","STRING","STRING","STRING"],
),
TableConfig(
file_path=self.base_path / "./common/expand_rules/expand_rules.tsv",
delimiter="\t",
table_name="search_rules",
headers=[
"TTY1",
"RELA",
"TTY2",
"rule",
],
dtypes={"TTY1": "str", "RELA": "str", "TTY2": "str", "rule": "str"},
parquet_types=["STRING", "STRING", "STRING", "STRING", "BOOLEAN"],
ignore_header=True,
map_cols=[
{
"from": "rule",
"to": "include",
"map_dict": {"yes": True, "no": False},
}
],
),
# TODO: We should eventually replace this with a source derived from
# UMLS directly at some point
TableConfig(
file_path=self.base_path / "./common/umls/umls_tty.tsv",
delimiter="\t",
table_name="umls_tty",
headers=["TTY","TTY_STR"],
dtypes={"TTY": "str","TTY_STR": "str",},
parquet_types=["STRING", "STRING"],
),
]

def filter_duplicated_meds(
self, path: pathlib.Path, delimiter: str, filtered_path: pathlib.Path
Expand Down Expand Up @@ -127,6 +128,7 @@ def prepare_queries(
**kwargs,
):
# fetch and add vsac tables
self.tables = self.get_table_configs()
vsac_stewards = vsac.get_vsac_stewards(config)
for steward in vsac_stewards:
vsac.download_oid_data(steward, config=config, path=self.base_path /'data')
Expand All @@ -148,6 +150,10 @@ def prepare_queries(

for table in self.tables:
# Determine what we're using as a source file
if table.table_name == 'search_rules':
print(table)
print('base_path: ',self.base_path)
print()
if table.filtered_path:
self.filter_duplicated_meds(
table.file_path, table.delimiter, table.filtered_path
Expand All @@ -156,9 +162,15 @@ def prepare_queries(
else:
path = self.base_path / table.file_path
parquet_path = path.with_suffix(".parquet")

# Read the file, using lots of the TableConfig params, and generate
# a parquet file
if table.table_name == 'search_rules':

tmp_df = pandas.read_csv(
path,
delimiter=table.delimiter
)
print(tmp_df.head())
df = pandas.read_csv(
path,
delimiter=table.delimiter,
Expand All @@ -174,6 +186,10 @@ def prepare_queries(
df[mapping["from"]].str.lower().map(mapping["map_dict"])
)
table.headers.append(mapping["to"])
if table.table_name == 'search_rules':
print(parquet_path)
print(df.head)

df.to_parquet(parquet_path)

# Upload to S3 and create a table that reads from it
Expand All @@ -195,4 +211,7 @@ def prepare_queries(
)
)
progress.advance(task)

if table.table_name == 'search_rules':
print(table)
print('base_path: ',self.base_path)
print()
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ version = "1.0"
requires-python = ">= 3.10"
# If you need python libraries, add them here
dependencies = [
"cumulus-library >= 2.3.0",
"cumulus-library >= 3.0.0",
"sqlfluff >=3",
"xlrd",
"openpyxl",
Expand Down
8 changes: 6 additions & 2 deletions tests/test_additional_rules_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,11 +53,11 @@ def test_additional_rules(mock_api, mock_db_config_rxnorm):
'count':28,
'first':(
1819, '1431077', 'BN', 'BN', 43028489, 'RN', 'reformulated_to',
'reformulated_to', 'Buprenorphine', 'Zubsolv', 'zubsolv'
'Buprenorphine', 'Zubsolv', 'zubsolv'
),
'last': (
1819, '904871', 'BN', 'BN', 3764389, 'RN', 'reformulated_to',
'reformulated_to', 'Buprenorphine', 'Butrans', 'butrans'
'Buprenorphine', 'Butrans', 'butrans'
),
},
{
Expand Down Expand Up @@ -95,6 +95,10 @@ def test_additional_rules(mock_api, mock_db_config_rxnorm):
f"Select * from {table_conf['name']} order by "
f"{','.join([str(x+1) for x in range(table_conf['columns'])])}"
).fetchall()
print(table_conf['name'])
print(res[0])
print(res[-1])
print(len(res))
assert len(res) == table_conf['count']
assert res[0] == table_conf['first']
if table_conf['count'] > 1:
Expand Down
4 changes: 2 additions & 2 deletions tests/test_rxnorm_vsac_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
clear=True,
)
@mock.patch("cumulus_library.apis.umls.UmlsApi")
def test_rxnorm_vsac_builder(mock_api, mock_db_config_rxnorm, tmp_path):
def test_rxnorm_vsac_builder(mock_api, mock_db_config_rxnorm):
with open(pathlib.Path(__file__).parent / "test_data/vsac_resp.json") as f:
resp = json.load(f)
mock_api.return_value.get_vsac_valuesets.return_value = resp
Expand All @@ -30,7 +30,7 @@ def test_rxnorm_vsac_builder(mock_api, mock_db_config_rxnorm, tmp_path):
assert len(res) == 1800
assert res[0] == (
1819, 'Product containing buprenorphine (medicinal product)', 'FN',
'SNOMEDCT_US', 1818, 'RN', 'tradename_of', 4716626
'SNOMEDCT_US', 1818, 'RN', 'reformulated_to', 4716626
)
assert res[-1] == (
1819, 'Buprenorphine', 'IN', 'GS', 1655031, 'RO', 'has_ingredient', 86130850
Expand Down
2 changes: 1 addition & 1 deletion tests/test_static_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def test_static_tables(
shutil.copy(test_path / "filtered.csv", tmp_path / "filtered.csv")
builder = static_builder.StaticBuilder()
filtered = tmp_path / filtered if filtered else None
builder.tables = [
builder.get_table_configs = lambda: [
static_builder.TableConfig(
file_path=tmp_path / "static_table.csv",
delimiter=",",
Expand Down

0 comments on commit 9205f62

Please sign in to comment.