Skip to content

Commit

Permalink
fix eager dtype conversion of value column (#1353)
Browse files Browse the repository at this point in the history
* assume less about a column named "value"

* improve comment

* ignore xref warning

* fix twine check

* simplify a bit

* better comment

* clearer comments

* add test

---------

Co-authored-by: Scott Huberty <[email protected]>
  • Loading branch information
drammock and Scott Huberty authored Dec 31, 2024
1 parent 46f284b commit 3492fa0
Show file tree
Hide file tree
Showing 6 changed files with 44 additions and 19 deletions.
5 changes: 5 additions & 0 deletions doc/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,11 @@
# This patterns also effect to html_static_path and html_extra_path
exclude_patterns = ["auto_examples/index.rst", "_build", "Thumbs.db", ".DS_Store"]

nitpick_ignore_regex = [
# needs https://github.com/sphinx-doc/sphinx/issues/13178
("py:class", r".*pathlib\._local\.Path"),
]

# HTML options (e.g., theme)
html_show_sourcelink = False
html_copy_source = False
Expand Down
4 changes: 2 additions & 2 deletions examples/convert_eeg_to_bids.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@
# to the "eyes closed" task.
subject = 1
run = 2
eegbci.load_data(subject=subject, runs=run, update_path=True)
eegbci.load_data(subjects=subject, runs=run, update_path=True)

# %%
# Let's see whether the data has been downloaded using a quick visualization
Expand Down Expand Up @@ -94,7 +94,7 @@
# It prevents the data from being loaded and modified when converting to BIDS.

# Load the data from "2 minutes eyes closed rest"
edf_path = eegbci.load_data(subject=subject, runs=run)[0]
edf_path = eegbci.load_data(subjects=subject, runs=run)[0]
raw = mne.io.read_raw_edf(edf_path, preload=False)
raw.info["line_freq"] = 50 # specify power line frequency as required by BIDS

Expand Down
4 changes: 2 additions & 2 deletions examples/convert_group_studies.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
run_map = dict(zip(runs, range(1, 4)))

for subject_id in subject_ids:
eegbci.load_data(subject=subject_id, runs=runs, update_path=True)
eegbci.load_data(subjects=subject_id, runs=runs, update_path=True)

# get path to MNE directory with the downloaded example data
mne_data_dir = mne.get_config("MNE_DATASETS_EEGBCI_PATH")
Expand Down Expand Up @@ -81,7 +81,7 @@
bids_list = list()
for subject_id in subject_ids:
for run in runs:
raw_fname = eegbci.load_data(subject=subject_id, runs=run)[0]
raw_fname = eegbci.load_data(subjects=subject_id, runs=run)[0]
raw = mne.io.read_raw_edf(raw_fname)
raw.info["line_freq"] = 50 # specify power line frequency
raw_list.append(raw)
Expand Down
38 changes: 24 additions & 14 deletions mne_bids/read.py
Original file line number Diff line number Diff line change
Expand Up @@ -531,7 +531,8 @@ def _handle_events_reading(events_fname, raw):
logger.info(f"Reading events from {events_fname}.")
events_dict = _from_tsv(events_fname)

# drop events where onset is n/a
# drop events where onset is n/a; we can't annotate them and thus don't need entries
# for them in event_id either
events_dict = _drop(events_dict, "n/a", "onset")

# Get event descriptions. Use `trial_type` column if available.
Expand All @@ -547,9 +548,11 @@ def _handle_events_reading(events_fname, raw):
# If we lack proper event descriptions, perhaps we have at least an event value?
elif "value" in events_dict:
trial_type_col_name = "value"
# Worst case: all events will become `n/a` and all values will be `1`
# Worst case: all events become `n/a` and all values become `1`
else:
trial_type_col_name = None
descrs = np.full(len(events_dict["onset"]), "n/a")
event_id = {descrs[0]: 1}

if trial_type_col_name is not None:
# Drop events unrelated to a trial type
Expand All @@ -569,26 +572,33 @@ def _handle_events_reading(events_fname, raw):
"Creating hierarchical event names."
)
for ii in idx:
value = values[ii]
value = "na" if value == "n/a" else value
# strip `/` from `n/a` before incorporating into trial type name
value = values[ii] if values[ii] != "n/a" else "na"
new_name = f"{trial_type}/{value}"
logger.info(f" Renaming event: {trial_type} -> {new_name}")
trial_types[ii] = new_name
# drop rows where `value` is `n/a` & convert remaining `value` to int (only
# when making our `event_id` dict; `value = n/a` doesn't prevent annotation)
# make a copy with rows dropped where `value` is `n/a` (only for making our
# `event_id` dict; `value = n/a` doesn't prevent making annotations).
culled = _drop(events_dict, "n/a", "value")
event_id = dict(
zip(culled[trial_type_col_name], np.asarray(culled["value"], dtype=int))
)
# Often (but not always!) the `value` column was written by MNE-BIDS and
# represents integer event IDs (as would be found in MNE-Python events
# arrays / event_id dicts). But in case not, let's be defensive:
culled_vals = culled["value"]
try:
culled_vals = np.asarray(culled_vals, dtype=float)
except ValueError: # contained strings or complex numbers
pass
else:
try:
culled_vals = culled_vals.astype(int)
except ValueError: # numeric, but has some non-integer values
pass
event_id = dict(zip(culled[trial_type_col_name], culled_vals))
else:
event_id = dict(zip(trial_types, np.arange(len(trial_types))))
descrs = np.asarray(trial_types, dtype=str)

# Worst case: all events become `n/a` and all values become `1`
else:
descrs = np.full(len(events_dict["onset"]), "n/a")
event_id = {descrs[0]: 1}
# Deal with "n/a" strings before converting to float
# convert onsets & durations to floats ("n/a" onsets were already dropped)
ons = np.asarray(events_dict["onset"], dtype=float)
durs = np.array(
[0 if du == "n/a" else du for du in events_dict["duration"]], dtype=float
Expand Down
10 changes: 10 additions & 0 deletions mne_bids/tests/test_read.py
Original file line number Diff line number Diff line change
Expand Up @@ -579,6 +579,16 @@ def test_handle_events_reading(tmp_path):
ev_arr, ev_dict = mne.events_from_annotations(raw)
assert event_id == ev_dict == {"n/a": 1} # fallback behavior

# Test with only a (non-numeric) `value` column
events = {"onset": [10, 15], "duration": [1, 1], "value": ["A", "B"]}
events_fname = tmp_path / "bids6" / "sub-01_task-test_events.tsv"
events_fname.parent.mkdir()
_to_tsv(events, events_fname)
raw, event_id = _handle_events_reading(events_fname, raw)
# don't pass event_id to mne.events_from_annotatations; its values are strings
assert event_id == {"A": "A", "B": "B"}
assert raw.annotations.description.tolist() == ["A", "B"]


@pytest.mark.filterwarnings(warning_str["channel_unit_changed"])
@testing.requires_testing_data
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[build-system]
build-backend = "hatchling.build"
requires = ["hatch-vcs", "hatchling"]
requires = ["hatch-vcs", "hatchling==1.26.3"]

[project]
authors = [{name = "The MNE-BIDS developers"}]
Expand Down

0 comments on commit 3492fa0

Please sign in to comment.