From 3492fa01157d921f77b93ea31a4db192c47d3bb0 Mon Sep 17 00:00:00 2001 From: Daniel McCloy Date: Tue, 31 Dec 2024 12:45:42 -0600 Subject: [PATCH] fix eager dtype conversion of value column (#1353) * assume less about a column named "value" * improve comment * ignore xref warning * fix twine check * simplify a bit * better comment * clearer comments * add test --------- Co-authored-by: Scott Huberty --- doc/conf.py | 5 ++++ examples/convert_eeg_to_bids.py | 4 ++-- examples/convert_group_studies.py | 4 ++-- mne_bids/read.py | 38 +++++++++++++++++++------------ mne_bids/tests/test_read.py | 10 ++++++++ pyproject.toml | 2 +- 6 files changed, 44 insertions(+), 19 deletions(-) diff --git a/doc/conf.py b/doc/conf.py index e61a693d7..d4cdaa000 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -114,6 +114,11 @@ # This patterns also effect to html_static_path and html_extra_path exclude_patterns = ["auto_examples/index.rst", "_build", "Thumbs.db", ".DS_Store"] +nitpick_ignore_regex = [ + # needs https://github.com/sphinx-doc/sphinx/issues/13178 + ("py:class", r".*pathlib\._local\.Path"), +] + # HTML options (e.g., theme) html_show_sourcelink = False html_copy_source = False diff --git a/examples/convert_eeg_to_bids.py b/examples/convert_eeg_to_bids.py index dbf0acdd7..33e2cce40 100644 --- a/examples/convert_eeg_to_bids.py +++ b/examples/convert_eeg_to_bids.py @@ -59,7 +59,7 @@ # to the "eyes closed" task. subject = 1 run = 2 -eegbci.load_data(subject=subject, runs=run, update_path=True) +eegbci.load_data(subjects=subject, runs=run, update_path=True) # %% # Let's see whether the data has been downloaded using a quick visualization @@ -94,7 +94,7 @@ # It prevents the data from being loaded and modified when converting to BIDS. # Load the data from "2 minutes eyes closed rest" -edf_path = eegbci.load_data(subject=subject, runs=run)[0] +edf_path = eegbci.load_data(subjects=subject, runs=run)[0] raw = mne.io.read_raw_edf(edf_path, preload=False) raw.info["line_freq"] = 50 # specify power line frequency as required by BIDS diff --git a/examples/convert_group_studies.py b/examples/convert_group_studies.py index 24dc4cee8..7567c95aa 100644 --- a/examples/convert_group_studies.py +++ b/examples/convert_group_studies.py @@ -50,7 +50,7 @@ run_map = dict(zip(runs, range(1, 4))) for subject_id in subject_ids: - eegbci.load_data(subject=subject_id, runs=runs, update_path=True) + eegbci.load_data(subjects=subject_id, runs=runs, update_path=True) # get path to MNE directory with the downloaded example data mne_data_dir = mne.get_config("MNE_DATASETS_EEGBCI_PATH") @@ -81,7 +81,7 @@ bids_list = list() for subject_id in subject_ids: for run in runs: - raw_fname = eegbci.load_data(subject=subject_id, runs=run)[0] + raw_fname = eegbci.load_data(subjects=subject_id, runs=run)[0] raw = mne.io.read_raw_edf(raw_fname) raw.info["line_freq"] = 50 # specify power line frequency raw_list.append(raw) diff --git a/mne_bids/read.py b/mne_bids/read.py index 8b1a6ead4..149ca3517 100644 --- a/mne_bids/read.py +++ b/mne_bids/read.py @@ -531,7 +531,8 @@ def _handle_events_reading(events_fname, raw): logger.info(f"Reading events from {events_fname}.") events_dict = _from_tsv(events_fname) - # drop events where onset is n/a + # drop events where onset is n/a; we can't annotate them and thus don't need entries + # for them in event_id either events_dict = _drop(events_dict, "n/a", "onset") # Get event descriptions. Use `trial_type` column if available. @@ -547,9 +548,11 @@ def _handle_events_reading(events_fname, raw): # If we lack proper event descriptions, perhaps we have at least an event value? elif "value" in events_dict: trial_type_col_name = "value" - # Worst case: all events will become `n/a` and all values will be `1` + # Worst case: all events become `n/a` and all values become `1` else: trial_type_col_name = None + descrs = np.full(len(events_dict["onset"]), "n/a") + event_id = {descrs[0]: 1} if trial_type_col_name is not None: # Drop events unrelated to a trial type @@ -569,26 +572,33 @@ def _handle_events_reading(events_fname, raw): "Creating hierarchical event names." ) for ii in idx: - value = values[ii] - value = "na" if value == "n/a" else value + # strip `/` from `n/a` before incorporating into trial type name + value = values[ii] if values[ii] != "n/a" else "na" new_name = f"{trial_type}/{value}" logger.info(f" Renaming event: {trial_type} -> {new_name}") trial_types[ii] = new_name - # drop rows where `value` is `n/a` & convert remaining `value` to int (only - # when making our `event_id` dict; `value = n/a` doesn't prevent annotation) + # make a copy with rows dropped where `value` is `n/a` (only for making our + # `event_id` dict; `value = n/a` doesn't prevent making annotations). culled = _drop(events_dict, "n/a", "value") - event_id = dict( - zip(culled[trial_type_col_name], np.asarray(culled["value"], dtype=int)) - ) + # Often (but not always!) the `value` column was written by MNE-BIDS and + # represents integer event IDs (as would be found in MNE-Python events + # arrays / event_id dicts). But in case not, let's be defensive: + culled_vals = culled["value"] + try: + culled_vals = np.asarray(culled_vals, dtype=float) + except ValueError: # contained strings or complex numbers + pass + else: + try: + culled_vals = culled_vals.astype(int) + except ValueError: # numeric, but has some non-integer values + pass + event_id = dict(zip(culled[trial_type_col_name], culled_vals)) else: event_id = dict(zip(trial_types, np.arange(len(trial_types)))) descrs = np.asarray(trial_types, dtype=str) - # Worst case: all events become `n/a` and all values become `1` - else: - descrs = np.full(len(events_dict["onset"]), "n/a") - event_id = {descrs[0]: 1} - # Deal with "n/a" strings before converting to float + # convert onsets & durations to floats ("n/a" onsets were already dropped) ons = np.asarray(events_dict["onset"], dtype=float) durs = np.array( [0 if du == "n/a" else du for du in events_dict["duration"]], dtype=float diff --git a/mne_bids/tests/test_read.py b/mne_bids/tests/test_read.py index ef5b00777..70d56dbd2 100644 --- a/mne_bids/tests/test_read.py +++ b/mne_bids/tests/test_read.py @@ -579,6 +579,16 @@ def test_handle_events_reading(tmp_path): ev_arr, ev_dict = mne.events_from_annotations(raw) assert event_id == ev_dict == {"n/a": 1} # fallback behavior + # Test with only a (non-numeric) `value` column + events = {"onset": [10, 15], "duration": [1, 1], "value": ["A", "B"]} + events_fname = tmp_path / "bids6" / "sub-01_task-test_events.tsv" + events_fname.parent.mkdir() + _to_tsv(events, events_fname) + raw, event_id = _handle_events_reading(events_fname, raw) + # don't pass event_id to mne.events_from_annotatations; its values are strings + assert event_id == {"A": "A", "B": "B"} + assert raw.annotations.description.tolist() == ["A", "B"] + @pytest.mark.filterwarnings(warning_str["channel_unit_changed"]) @testing.requires_testing_data diff --git a/pyproject.toml b/pyproject.toml index b30bf0be3..a44165931 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [build-system] build-backend = "hatchling.build" -requires = ["hatch-vcs", "hatchling"] +requires = ["hatch-vcs", "hatchling==1.26.3"] [project] authors = [{name = "The MNE-BIDS developers"}]