Updated tests, add CI hook

smart-on-fhir · Jul 30, 2024 · 9205f62 · 9205f62
1 parent e044069
commit 9205f62
Show file tree

Hide file tree

Showing 9 changed files with 123 additions and 103 deletions.
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
diff --git a/cumulus_library_opioid/manifest.toml b/cumulus_library_opioid/manifest.toml
@@ -7,44 +7,44 @@ file_names = [
     "vocab/additional_rules_builder.py",
 ]
 
-[sql_config]
-file_names = [
-    "define_dx.sql",
-    "define_dx_sepsis.sql",
-    "define_dx_sud.sql",
-    "define_lab.sql",
-    "define_rx.sql",
-    "define_rx_buprenorphine.sql",
-    "define_rx_naloxone.sql",
-    "define_rx_opioid.sql",
-    "table_study_period.sql",
-    "table_dx.sql",
-    "table_dx_sepsis.sql",
-    "table_lab.sql",
-    "table_rx.sql",
-    "version.sql"
-]
+# [sql_config]
+# file_names = [
+#     "define_dx.sql",
+#     "define_dx_sepsis.sql",
+#     "define_dx_sud.sql",
+#     "define_lab.sql",
+#     "define_rx.sql",
+#     "define_rx_buprenorphine.sql",
+#     "define_rx_naloxone.sql",
+#     "define_rx_opioid.sql",
+#     "table_study_period.sql",
+#     "table_dx.sql",
+#     "table_dx_sepsis.sql",
+#     "table_lab.sql",
+#     "table_rx.sql",
+#     "version.sql"
+# ]
 
-[counts_builder_config]
-file_names = [
-    "counts.py"
-]
+# [counts_builder_config]
+# file_names = [
+#     "counts.py"
+# ]
 
 
-[export_config]
-export_list = [
-    "opioid__count_study_period_week",
-    "opioid__count_study_period_month",
-    "opioid__count_dx_sepsis_week",
-    "opioid__count_dx_sepsis_month",
-    "opioid__count_dx_month",
-    "opioid__count_dx_week",
-    "opioid__count_lab_month",
-    "opioid__count_lab_week",
-    "opioid__count_medicationrequest",
-    "opioid__count_rx",
-    "opioid__count_rx_opioid",
-    "opioid__count_rx_buprenorphine",
-    "opioid__count_rx_naloxone",
-    "opioid__meta_version",
-]
+# [export_config]
+# export_list = [
+#     "opioid__count_study_period_week",
+#     "opioid__count_study_period_month",
+#     "opioid__count_dx_sepsis_week",
+#     "opioid__count_dx_sepsis_month",
+#     "opioid__count_dx_month",
+#     "opioid__count_dx_week",
+#     "opioid__count_lab_month",
+#     "opioid__count_lab_week",
+#     "opioid__count_medicationrequest",
+#     "opioid__count_rx",
+#     "opioid__count_rx_opioid",
+#     "opioid__count_rx_buprenorphine",
+#     "opioid__count_rx_naloxone",
+#     "opioid__meta_version",
+# ]
diff --git a/cumulus_library_opioid/vocab/additional_rules_builder.py b/cumulus_library_opioid/vocab/additional_rules_builder.py
@@ -85,7 +85,6 @@ def prepare_queries(
                         'r.rui',
                         'r.rel',
                         'r.rela',
-                        'e.rela',
                         'r.str1',
                         'r.str2',
                         'r.keyword',

diff --git a/cumulus_library_opioid/vocab/rxnorm_vsac_builder.py b/cumulus_library_opioid/vocab/rxnorm_vsac_builder.py
@@ -34,12 +34,11 @@ def get_create_view_filter_by(
         ):
             a_schema = a_schema or 'rxnorm.'
             a_join_col = a_join_col or 'a.rxcui'
-            b_join_col = b_join_col or 'b.rxcui'
-            b_table = b_table or f'opioid__{steward}_vsac',
+            b_join_col = b_join_col or 'b.code'
+            b_table = b_table or f'opioid__{steward}_vsac'
             join_clauses = join_clauses or [f"{a_join_col} = {b_join_col}"]
-            view_name = view_name or (
-                    f'{manifest.get_study_prefix()}__{steward}_{a_table}'
-            )
+            view_name = view_name or f'{manifest.get_study_prefix()}__{steward}_{a_table}'
+
 
             return base_templates.get_create_view_from_tables(
                 view_name=view_name,
@@ -111,4 +110,3 @@ def get_create_view_filter_by(
                     b_join_col='b.rxcui1',
                 )
             )
-
diff --git a/cumulus_library_opioid/vocab/static_builder.py b/cumulus_library_opioid/vocab/static_builder.py
@@ -28,56 +28,57 @@ class StaticBuilder(base_table_builder.BaseTableBuilder):
     display_text = "Building static data tables..."
     base_path = pathlib.Path(__file__).resolve().parent
 
-    tables = [  # noqa: RUF012
-        TableConfig(
-            file_path=base_path / "./common/keywords/keywords.tsv",
-            delimiter="\t",
-            table_name="keywords",
-            headers=["STR"],
-            dtypes={"STR": "str"},
-            parquet_types=["STRING"],
-            filtered_path=base_path / "./common/keywords/keywords.filtered.tsv",
-        ),
-        TableConfig(
-            file_path=base_path / "./all_rxcui_str.RXNCONSO_curated.tsv",
-            delimiter="\t",
-            table_name="all_rxnconso_keywords",
-            headers=["RXCUI","STR","TTY","SAB","CODE","keyword","keyword_len"],
-            dtypes={"RXCUI":"str","STR":"str","TTY":"str","SAB":"str","CODE":"str","keyword":"str","keyword_len":"str"},
-            parquet_types=["STRING","STRING","STRING","STRING","STRING","STRING","STRING"],
-        ),
-        TableConfig(
-            file_path=base_path / "./common/expand_rules/expand_rules.tsv",
-            delimiter="\t",
-            table_name="search_rules",
-            headers=[
-                "TTY1",
-                "RELA",
-                "TTY2",
-                "rule",
-            ],
-            dtypes={"TTY1": "str", "RELA": "str", "TTY2": "str", "rule": "str"},
-            parquet_types=["STRING", "STRING", "STRING", "STRING", "BOOLEAN"],
-            ignore_header=True,
-            map_cols=[
-                {
-                    "from": "rule",
-                    "to": "include",
-                    "map_dict": {"yes": True, "no": False},
-                }
-            ],
-        ),
-        # TODO: We should eventually replace this with a source derived from
-        # UMLS directly at some point
-        TableConfig(
-            file_path=base_path / "./common/umls/umls_tty.tsv",
-            delimiter="\t",
-            table_name="umls_tty",
-            headers=["TTY","TTY_STR"],
-            dtypes={"TTY": "str","TTY_STR": "str",},
-            parquet_types=["STRING", "STRING"],
-        ),
-    ]
+    def get_table_configs(self):
+        return [
+            TableConfig(
+                file_path=self.base_path / "./common/keywords/keywords.tsv",
+                delimiter="\t",
+                table_name="keywords",
+                headers=["STR"],
+                dtypes={"STR": "str"},
+                parquet_types=["STRING"],
+                filtered_path=self.base_path / "./common/keywords/keywords.filtered.tsv",
+            ),
+            TableConfig(
+                file_path=self.base_path / "./all_rxcui_str.RXNCONSO_curated.tsv",
+                delimiter="\t",
+                table_name="all_rxnconso_keywords",
+                headers=["RXCUI","STR","TTY","SAB","CODE","keyword","keyword_len"],
+                dtypes={"RXCUI":"str","STR":"str","TTY":"str","SAB":"str","CODE":"str","keyword":"str","keyword_len":"str"},
+                parquet_types=["STRING","STRING","STRING","STRING","STRING","STRING","STRING"],
+            ),
+            TableConfig(
+                file_path=self.base_path / "./common/expand_rules/expand_rules.tsv",
+                delimiter="\t",
+                table_name="search_rules",
+                headers=[
+                    "TTY1",
+                    "RELA",
+                    "TTY2",
+                    "rule",
+                ],
+                dtypes={"TTY1": "str", "RELA": "str", "TTY2": "str", "rule": "str"},
+                parquet_types=["STRING", "STRING", "STRING", "STRING", "BOOLEAN"],
+                ignore_header=True,
+                map_cols=[
+                    {
+                        "from": "rule",
+                        "to": "include",
+                        "map_dict": {"yes": True, "no": False},
+                    }
+                ],
+            ),
+            # TODO: We should eventually replace this with a source derived from
+            # UMLS directly at some point
+            TableConfig(
+                file_path=self.base_path / "./common/umls/umls_tty.tsv",
+                delimiter="\t",
+                table_name="umls_tty",
+                headers=["TTY","TTY_STR"],
+                dtypes={"TTY": "str","TTY_STR": "str",},
+                parquet_types=["STRING", "STRING"],
+            ),
+        ]
 
     def filter_duplicated_meds(
         self, path: pathlib.Path, delimiter: str, filtered_path: pathlib.Path
@@ -127,6 +128,7 @@ def prepare_queries(
         **kwargs,
     ):
         # fetch and add vsac tables
+        self.tables = self.get_table_configs()
         vsac_stewards = vsac.get_vsac_stewards(config)
         for steward in vsac_stewards:
             vsac.download_oid_data(steward, config=config, path=self.base_path /'data')
@@ -148,6 +150,10 @@ def prepare_queries(
 
             for table in self.tables:
                 # Determine what we're using as a source file
+                if table.table_name == 'search_rules':
+                    print(table)
+                    print('base_path: ',self.base_path)
+                    print()
                 if table.filtered_path:
                     self.filter_duplicated_meds(
                         table.file_path, table.delimiter, table.filtered_path
@@ -156,9 +162,15 @@ def prepare_queries(
                 else:
                     path = self.base_path / table.file_path
                 parquet_path = path.with_suffix(".parquet")
-
                 # Read the file, using lots of the TableConfig params, and generate
                 # a parquet file
+                if table.table_name == 'search_rules':
+
+                    tmp_df = pandas.read_csv(
+                        path,
+                        delimiter=table.delimiter
+                        )
+                    print(tmp_df.head())
                 df = pandas.read_csv(
                     path,
                     delimiter=table.delimiter,
@@ -174,6 +186,10 @@ def prepare_queries(
                             df[mapping["from"]].str.lower().map(mapping["map_dict"])
                         )
                         table.headers.append(mapping["to"])
+                if table.table_name == 'search_rules':
+                    print(parquet_path)
+                    print(df.head)
+
                 df.to_parquet(parquet_path)
 
                 # Upload to S3 and create a table that reads from it
@@ -195,4 +211,7 @@ def prepare_queries(
                     )
                 )
                 progress.advance(task)
-
+                if table.table_name == 'search_rules':
+                    print(table)
+                    print('base_path: ',self.base_path)
+                    print()
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ version = "1.0"
 requires-python = ">= 3.10"
 # If you need python libraries, add them here
 dependencies = [
-    "cumulus-library >= 2.3.0",
+    "cumulus-library >= 3.0.0",
     "sqlfluff >=3",
     "xlrd",
     "openpyxl",

diff --git a/tests/test_additional_rules_builder.py b/tests/test_additional_rules_builder.py
@@ -53,11 +53,11 @@ def test_additional_rules(mock_api, mock_db_config_rxnorm):
             'count':28,
             'first':(
                 1819, '1431077', 'BN', 'BN', 43028489, 'RN', 'reformulated_to', 
-                'reformulated_to', 'Buprenorphine', 'Zubsolv', 'zubsolv'
+                'Buprenorphine', 'Zubsolv', 'zubsolv'
             ),
             'last': (
                 1819, '904871', 'BN', 'BN', 3764389, 'RN', 'reformulated_to', 
-                'reformulated_to', 'Buprenorphine', 'Butrans', 'butrans'
+                'Buprenorphine', 'Butrans', 'butrans'
             ),
         },
         {
@@ -95,6 +95,10 @@ def test_additional_rules(mock_api, mock_db_config_rxnorm):
             f"Select * from {table_conf['name']} order by "
             f"{','.join([str(x+1) for x in range(table_conf['columns'])])}"
         ).fetchall()
+        print(table_conf['name'])
+        print(res[0])
+        print(res[-1])
+        print(len(res))
         assert len(res) == table_conf['count']
         assert res[0] == table_conf['first']
         if table_conf['count'] > 1:

diff --git a/tests/test_rxnorm_vsac_builder.py b/tests/test_rxnorm_vsac_builder.py
@@ -13,7 +13,7 @@
     clear=True,
 )
 @mock.patch("cumulus_library.apis.umls.UmlsApi")
-def test_rxnorm_vsac_builder(mock_api, mock_db_config_rxnorm,  tmp_path):
+def test_rxnorm_vsac_builder(mock_api, mock_db_config_rxnorm):
     with open(pathlib.Path(__file__).parent / "test_data/vsac_resp.json") as f:
         resp = json.load(f)
     mock_api.return_value.get_vsac_valuesets.return_value = resp
@@ -30,7 +30,7 @@ def test_rxnorm_vsac_builder(mock_api, mock_db_config_rxnorm,  tmp_path):
     assert len(res) == 1800
     assert res[0] == (
         1819, 'Product containing buprenorphine (medicinal product)', 'FN', 
-        'SNOMEDCT_US', 1818, 'RN', 'tradename_of', 4716626
+        'SNOMEDCT_US', 1818, 'RN', 'reformulated_to', 4716626
     )
     assert res[-1] == (
         1819, 'Buprenorphine', 'IN', 'GS', 1655031, 'RO', 'has_ingredient', 86130850

diff --git a/tests/test_static_builder.py b/tests/test_static_builder.py
@@ -62,7 +62,7 @@ def test_static_tables(
     shutil.copy(test_path / "filtered.csv", tmp_path / "filtered.csv")
     builder = static_builder.StaticBuilder()
     filtered = tmp_path / filtered if filtered else None
-    builder.tables = [
+    builder.get_table_configs = lambda: [
         static_builder.TableConfig(
             file_path=tmp_path / "static_table.csv",
             delimiter=",",