From 3f59b0e0ee835549d068ad4ad85936ddf0ed04cb Mon Sep 17 00:00:00 2001 From: Scott Huberty <52462026+scott-huberty@users.noreply.github.com> Date: Wed, 1 Jan 2025 08:05:54 -0800 Subject: [PATCH] FIX: Support UTF-8 encoding for JSON files (#1357) * WIP: add ensure_ascii flage to _write_json * Revert "WIP: add ensure_ascii flage to _write_json" This reverts commit 4c4767913a1523efa1095a20110982e092c99b37. * Dont Force ASCII encoding in _write_json * TST: Add a test TIL: That json.loads will always convert unicode. So to test that unicode was properly encoded while writing to disk, I had to had to just read the text on disk without the json module * DOC: update changelog * Commit Dan's suggestion Instead of closing and re-opening the file, rewind the "playhead" to the start of the open file, then use fid.read() as usual Co-authored-by: Daniel McCloy --------- Co-authored-by: Daniel McCloy Co-authored-by: Stefan Appelhoff --- doc/whats_new.rst | 2 ++ mne_bids/tests/test_write.py | 11 +++++++++-- mne_bids/utils.py | 2 +- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/doc/whats_new.rst b/doc/whats_new.rst index b685cd3cd..456af0299 100644 --- a/doc/whats_new.rst +++ b/doc/whats_new.rst @@ -23,6 +23,7 @@ The following authors had contributed before. Thank you for sticking around! * `Stefan Appelhoff`_ * `Daniel McCloy`_ +* `Scott Huberty`_ Detailed list of changes ~~~~~~~~~~~~~~~~~~~~~~~~ @@ -47,6 +48,7 @@ Detailed list of changes ^^^^^^^^^^^^ - :func:`mne_bids.read_raw_bids` can optionally return an ``event_id`` dictionary suitable for use with :func:`mne.events_from_annotations`, and if a ``values`` column is present in ``events.tsv`` it will be used as the source of the integer event ID codes, by `Daniel McCloy`_ (:gh:`1349`) +- :func:`mne_bids.make_dataset_description` now correctly encodes the dataset description as UTF-8 on disk, by `Scott Huberty`_ (:gh:`1357`) ⚕️ Code health ^^^^^^^^^^^^^^ diff --git a/mne_bids/tests/test_write.py b/mne_bids/tests/test_write.py index a22252dd7..623c869d3 100644 --- a/mne_bids/tests/test_write.py +++ b/mne_bids/tests/test_write.py @@ -376,7 +376,7 @@ def test_make_dataset_description(tmp_path, monkeypatch): make_dataset_description( path=tmp_path, name="tst2", - authors="MNE B., MNE P.", + authors="MNE B., MNE P., MNE Ł.", funding="GSOC2019, GSOC2021", references_and_links="https://doi.org/10.21105/joss.01896", dataset_type="derivative", @@ -386,7 +386,14 @@ def test_make_dataset_description(tmp_path, monkeypatch): with open(op.join(tmp_path, "dataset_description.json"), encoding="utf-8") as fid: dataset_description_json = json.load(fid) - assert dataset_description_json["Authors"] == ["MNE B.", "MNE P."] + assert dataset_description_json["Authors"] == ["MNE B.", "MNE P.", "MNE Ł."] + # If the text on disk is unicode, json.load will convert it. So let's test that + # the text was encoded correctly on disk. + fid.seek(0) + # don't use json.load here, as it will convert unicode to str + dataset_description_string = fid.read() + # Check that U+0141 was correctly encoded as Ł on disk + assert "MNE Ł." in dataset_description_string # Check we raise warnings and errors where appropriate with pytest.raises( diff --git a/mne_bids/utils.py b/mne_bids/utils.py index 49abf6efd..da0f7066e 100644 --- a/mne_bids/utils.py +++ b/mne_bids/utils.py @@ -233,7 +233,7 @@ def _write_json(fname, dictionary, overwrite=False): f'"{fname}" already exists. Please set overwrite to True.' ) - json_output = json.dumps(dictionary, indent=4) + json_output = json.dumps(dictionary, indent=4, ensure_ascii=False) with open(fname, "w", encoding="utf-8") as fid: fid.write(json_output) fid.write("\n")