fix eager dtype conversion of value column (#1353)

* assume less about a column named "value" * improve comment * ignore xref warning * fix twine check * simplify a bit * better comment * clearer comments * add test --------- Co-authored-by: Scott Huberty <[email protected]>
mne-tools · Dec 31, 2024 · 3492fa0 · 3492fa0
1 parent 46f284b
commit 3492fa0
Show file tree

Hide file tree

Showing 6 changed files with 44 additions and 19 deletions.
diff --git a/doc/conf.py b/doc/conf.py
@@ -114,6 +114,11 @@
 # This patterns also effect to html_static_path and html_extra_path
 exclude_patterns = ["auto_examples/index.rst", "_build", "Thumbs.db", ".DS_Store"]
 
+nitpick_ignore_regex = [
+    # needs https://github.com/sphinx-doc/sphinx/issues/13178
+    ("py:class", r".*pathlib\._local\.Path"),
+]
+
 # HTML options (e.g., theme)
 html_show_sourcelink = False
 html_copy_source = False

diff --git a/examples/convert_eeg_to_bids.py b/examples/convert_eeg_to_bids.py
@@ -59,7 +59,7 @@
 # to the "eyes closed" task.
 subject = 1
 run = 2
-eegbci.load_data(subject=subject, runs=run, update_path=True)
+eegbci.load_data(subjects=subject, runs=run, update_path=True)
 
 # %%
 # Let's see whether the data has been downloaded using a quick visualization
@@ -94,7 +94,7 @@
 # It prevents the data from being loaded and modified when converting to BIDS.
 
 # Load the data from "2 minutes eyes closed rest"
-edf_path = eegbci.load_data(subject=subject, runs=run)[0]
+edf_path = eegbci.load_data(subjects=subject, runs=run)[0]
 raw = mne.io.read_raw_edf(edf_path, preload=False)
 raw.info["line_freq"] = 50  # specify power line frequency as required by BIDS
 

diff --git a/examples/convert_group_studies.py b/examples/convert_group_studies.py
@@ -50,7 +50,7 @@
 run_map = dict(zip(runs, range(1, 4)))
 
 for subject_id in subject_ids:
-    eegbci.load_data(subject=subject_id, runs=runs, update_path=True)
+    eegbci.load_data(subjects=subject_id, runs=runs, update_path=True)
 
 # get path to MNE directory with the downloaded example data
 mne_data_dir = mne.get_config("MNE_DATASETS_EEGBCI_PATH")
@@ -81,7 +81,7 @@
 bids_list = list()
 for subject_id in subject_ids:
     for run in runs:
-        raw_fname = eegbci.load_data(subject=subject_id, runs=run)[0]
+        raw_fname = eegbci.load_data(subjects=subject_id, runs=run)[0]
         raw = mne.io.read_raw_edf(raw_fname)
         raw.info["line_freq"] = 50  # specify power line frequency
         raw_list.append(raw)

diff --git a/mne_bids/read.py b/mne_bids/read.py
@@ -531,7 +531,8 @@ def _handle_events_reading(events_fname, raw):
     logger.info(f"Reading events from {events_fname}.")
     events_dict = _from_tsv(events_fname)
 
-    # drop events where onset is n/a
+    # drop events where onset is n/a; we can't annotate them and thus don't need entries
+    # for them in event_id either
     events_dict = _drop(events_dict, "n/a", "onset")
 
     # Get event descriptions. Use `trial_type` column if available.
@@ -547,9 +548,11 @@ def _handle_events_reading(events_fname, raw):
     # If we lack proper event descriptions, perhaps we have at least an event value?
     elif "value" in events_dict:
         trial_type_col_name = "value"
-    # Worst case: all events will become `n/a` and all values will be `1`
+    # Worst case: all events become `n/a` and all values become `1`
     else:
         trial_type_col_name = None
+        descrs = np.full(len(events_dict["onset"]), "n/a")
+        event_id = {descrs[0]: 1}
 
     if trial_type_col_name is not None:
         # Drop events unrelated to a trial type
@@ -569,26 +572,33 @@ def _handle_events_reading(events_fname, raw):
                         "Creating hierarchical event names."
                     )
                     for ii in idx:
-                        value = values[ii]
-                        value = "na" if value == "n/a" else value
+                        # strip `/` from `n/a` before incorporating into trial type name
+                        value = values[ii] if values[ii] != "n/a" else "na"
                         new_name = f"{trial_type}/{value}"
                         logger.info(f"    Renaming event: {trial_type} -> {new_name}")
                         trial_types[ii] = new_name
-            # drop rows where `value` is `n/a` & convert remaining `value` to int (only
-            # when making our `event_id` dict; `value = n/a` doesn't prevent annotation)
+            # make a copy with rows dropped where `value` is `n/a` (only for making our
+            # `event_id` dict; `value = n/a` doesn't prevent making annotations).
             culled = _drop(events_dict, "n/a", "value")
-            event_id = dict(
-                zip(culled[trial_type_col_name], np.asarray(culled["value"], dtype=int))
-            )
+            # Often (but not always!) the `value` column was written by MNE-BIDS and
+            # represents integer event IDs (as would be found in MNE-Python events
+            # arrays / event_id dicts). But in case not, let's be defensive:
+            culled_vals = culled["value"]
+            try:
+                culled_vals = np.asarray(culled_vals, dtype=float)
+            except ValueError:  # contained strings or complex numbers
+                pass
+            else:
+                try:
+                    culled_vals = culled_vals.astype(int)
+                except ValueError:  # numeric, but has some non-integer values
+                    pass
+            event_id = dict(zip(culled[trial_type_col_name], culled_vals))
         else:
             event_id = dict(zip(trial_types, np.arange(len(trial_types))))
         descrs = np.asarray(trial_types, dtype=str)
 
-    # Worst case: all events become `n/a` and all values become `1`
-    else:
-        descrs = np.full(len(events_dict["onset"]), "n/a")
-        event_id = {descrs[0]: 1}
-    # Deal with "n/a" strings before converting to float
+    # convert onsets & durations to floats ("n/a" onsets were already dropped)
     ons = np.asarray(events_dict["onset"], dtype=float)
     durs = np.array(
         [0 if du == "n/a" else du for du in events_dict["duration"]], dtype=float

diff --git a/mne_bids/tests/test_read.py b/mne_bids/tests/test_read.py
@@ -579,6 +579,16 @@ def test_handle_events_reading(tmp_path):
     ev_arr, ev_dict = mne.events_from_annotations(raw)
     assert event_id == ev_dict == {"n/a": 1}  # fallback behavior
 
+    # Test with only a (non-numeric) `value` column
+    events = {"onset": [10, 15], "duration": [1, 1], "value": ["A", "B"]}
+    events_fname = tmp_path / "bids6" / "sub-01_task-test_events.tsv"
+    events_fname.parent.mkdir()
+    _to_tsv(events, events_fname)
+    raw, event_id = _handle_events_reading(events_fname, raw)
+    # don't pass event_id to mne.events_from_annotatations; its values are strings
+    assert event_id == {"A": "A", "B": "B"}
+    assert raw.annotations.description.tolist() == ["A", "B"]
+
 
 @pytest.mark.filterwarnings(warning_str["channel_unit_changed"])
 @testing.requires_testing_data

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [build-system]
 build-backend = "hatchling.build"
-requires = ["hatch-vcs", "hatchling"]
+requires = ["hatch-vcs", "hatchling==1.26.3"]
 
 [project]
 authors = [{name = "The MNE-BIDS developers"}]