From 532fba7f9b3ac0e482d8542a84e87dee190876a2 Mon Sep 17 00:00:00 2001 From: Yonathan Randolph Date: Mon, 27 Sep 2021 19:01:15 -0700 Subject: [PATCH] Add test that lone surrogates are not detected or fixed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Upgrade simplejson to 3.3.0 since prior to that, decoding json with lone surrogates would raise ā€œJSONDecodeError: Unpaired high surrogateā€ --- remoteobjects/json.py | 13 +++++-------- setup.py | 2 +- tests/test_http.py | 35 +++++++++++++++++++++++++++++++++++ 3 files changed, 41 insertions(+), 9 deletions(-) diff --git a/remoteobjects/json.py b/remoteobjects/json.py index f1d2568..111cb45 100644 --- a/remoteobjects/json.py +++ b/remoteobjects/json.py @@ -33,14 +33,11 @@ # simplejson >=3.12 from simplejson.errors import errmsg except ImportError: - try: - # simplejson >=3.1.0, <3.12, before this commit: - # https://github.com/simplejson/simplejson/commit/0d36c5cd16055d55e6eceaf252f072a9339e0746 - from simplejson.scanner import errmsg - except ImportError: - # simplejson >=1.1,<3.1.0, before this commit: - # https://github.com/simplejson/simplejson/commit/104b40fcf6aa39d9ba7b240c3c528d1f85e86ef2 - from simplejson.decoder import errmsg + # simplejson >=3.1.0, <3.12, since this commit: + # https://github.com/simplejson/simplejson/commit/104b40fcf6aa39d9ba7b240c3c528d1f85e86ef2 + # and before this commit + # https://github.com/simplejson/simplejson/commit/0d36c5cd16055d55e6eceaf252f072a9339e0746 + from simplejson.scanner import errmsg from simplejson.scanner import py_make_scanner from six import unichr, text_type import sys diff --git a/setup.py b/setup.py index 0bad657..c9660d0 100644 --- a/setup.py +++ b/setup.py @@ -63,7 +63,7 @@ provides=['remoteobjects'], python_requires='>=2.7, !=3.0.*, !=3.1.*, !=3.2.*', install_requires=[ - 'simplejson>=2.0.0', + 'simplejson>=3.3.0', 'httplib2>=0.5.0', 'python-dateutil>=2.1', 'six~=1.16.0', diff --git a/tests/test_http.py b/tests/test_http.py index cc6317d..13e0e81 100644 --- a/tests/test_http.py +++ b/tests/test_http.py @@ -30,6 +30,7 @@ import unittest from remoteobjects import fields, http +from six import PY2 from tests import test_dataobject from tests import utils @@ -81,6 +82,40 @@ class BasicMost(self.cls): # Bad characters are replaced with the unicode Replacement Character 0xFFFD. self.assertEqual(b.value, u"image by \ufffdrew Example") h.request.assert_called_once_with(**request) + h.reset_mock() + + # since simplejson 3.3.0, lone surrogates are passed through + # https://github.com/simplejson/simplejson/commit/35816bfe2d0ddeb5ddcc68239683cbb35b7e3ff2 + content = """{"name": "lone surrogate \\ud800", "value": "\\udc00 lone surrogate"}""" + h = utils.mock_http(request, content) + b = BasicMost.get('http://example.com/ohhai', http=h) + # Lone surrogates are passed through as lone surrogates in the python unicode value + self.assertEqual(b.name, u"lone surrogate \ud800") + self.assertEqual(b.value, u"\udc00 lone surrogate") + h.request.assert_called_once_with(**request) + + content = u"""{"name": "100 \u20AC", "value": "13000 \u00A5"}""".encode('utf-8') + h = utils.mock_http(request, content) + b = BasicMost.get('http://example.com/ohhai', http=h) + # JSON containing non-ascii UTF-8 should be decoded to unicode strings + self.assertEqual(b.name, u"100 \u20AC") + self.assertEqual(b.value, u"13000 \u00A5") + h.request.assert_called_once_with(**request) + + content = b"""{"name": "lone surrogate \xed\xa0\x80", "value": "\xed\xb0\x80 lone surrogate"}""" + h = utils.mock_http(request, content) + b = BasicMost.get('http://example.com/ohhai', http=h) + # Lone surrogates are passed through as lone surrogates in the python unicode value + if PY2: + # in python2, our JSONDecoder does not detect naked lone surrogates + self.assertEqual(b.name, u"lone surrogate \ud800") + self.assertEqual(b.value, u"\udc00 lone surrogate") + else: + # in python3, bytes.decode replaces lone surrogates with replacement char + self.assertEqual(b.name, u"lone surrogate \ufffd\ufffd\ufffd") + self.assertEqual(b.value, u"\ufffd\ufffd\ufffd lone surrogate") + + h.request.assert_called_once_with(**request) def test_post(self):