Skip to content

bpo-25457: Allow json.encode() to handle mixed keys when sort_keys=True #8011

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
61 changes: 37 additions & 24 deletions Lib/json/encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -331,6 +331,37 @@ def _iterencode_list(lst, _current_indent_level):
del markers[markerid]

def _iterencode_dict(dct, _current_indent_level):
def _coerce_key(key):
if isinstance(key, str):
return key
# JavaScript is weakly typed for these, so it makes sense to
# also allow them. Many encoders seem to do something like this.
if isinstance(key, float):
# see comment for int/float in _make_iterencode
return _floatstr(key)
if key is True:
return 'true'
if key is False:
return 'false'
if key is None:
return 'null'
if isinstance(key, int):
# see comment for int/float in _make_iterencode
return _intstr(key)

if _skipkeys:
return None
else:
raise TypeError(f'keys must be str, int, float, bool or None, '
f'not {key.__class__.__name__}')

def _coerce_items(items):
for (k,v) in items:
k = _coerce_key(k) # Coerce or throw
if k is None:
continue
yield (k,v)

if not dct:
yield '{}'
return
Expand All @@ -349,32 +380,14 @@ def _iterencode_dict(dct, _current_indent_level):
newline_indent = None
item_separator = _item_separator
first = True

# Coerce keys to strings
items = _coerce_items(dct.items())

if _sort_keys:
items = sorted(dct.items())
else:
items = dct.items()
items = sorted(items)

for key, value in items:
if isinstance(key, str):
pass
# JavaScript is weakly typed for these, so it makes sense to
# also allow them. Many encoders seem to do something like this.
elif isinstance(key, float):
# see comment for int/float in _make_iterencode
key = _floatstr(key)
elif key is True:
key = 'true'
elif key is False:
key = 'false'
elif key is None:
key = 'null'
elif isinstance(key, int):
# see comment for int/float in _make_iterencode
key = _intstr(key)
elif _skipkeys:
continue
else:
raise TypeError(f'keys must be str, int, float, bool or None, '
f'not {key.__class__.__name__}')
if first:
first = False
else:
Expand Down
2 changes: 1 addition & 1 deletion Lib/test/test_json/test_dump.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def test_encode_truefalse(self):
'{"false": true, "true": false}')
self.assertEqual(self.dumps(
{2: 3.0, 4.0: 5, False: 1, 6: True}, sort_keys=True),
'{"false": 1, "2": 3.0, "4.0": 5, "6": true}')
'{"2": 3.0, "4.0": 5, "6": true, "false": 1}')

# Issue 16228: Crash on encoding resized list
def test_encode_mutated(self):
Expand Down
4 changes: 0 additions & 4 deletions Lib/test/test_json/test_speedups.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,3 @@ def test(name):
self.assertRaises(ZeroDivisionError, test, 'check_circular')
self.assertRaises(ZeroDivisionError, test, 'allow_nan')
self.assertRaises(ZeroDivisionError, test, 'sort_keys')

def test_unsortable_keys(self):
with self.assertRaises(TypeError):
self.json.encoder.JSONEncoder(sort_keys=True).encode({'a': 1, 1: 'a'})
43 changes: 35 additions & 8 deletions Modules/_json.c
Original file line number Diff line number Diff line change
Expand Up @@ -1527,6 +1527,7 @@ encoder_listencode_dict(PyEncoderObject *s, _PyAccu *acc,
PyObject *ident = NULL;
PyObject *it = NULL;
PyObject *items;
PyObject *coerced_items;
PyObject *item = NULL;
Py_ssize_t idx;

Expand Down Expand Up @@ -1572,17 +1573,15 @@ encoder_listencode_dict(PyEncoderObject *s, _PyAccu *acc,
items = PyMapping_Items(dct);
if (items == NULL)
goto bail;
if (s->sort_keys && PyList_Sort(items) < 0) {
Py_DECREF(items);
goto bail;
}

coerced_items = PyList_New(0);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems to introduce a few possible refleaks for coerced_items, effectively in each goto bail location where coerced_items isn't NULL, prior to the Py_DECREF(coerced_items). IMO, the easiest solution would be adding a Py_XDECREF(coerced_items) to bail since there are multiple paths where this could potentially occur.

There should probably also be the following error condition, in case the list can't be instantiated:

Suggested change
coerced_items = PyList_New(0);
coerced_items = PyList_New(0);
if (coerced_items == NULL) {
goto bail;
}

What do you think @serhiy-storchaka? My experience with the C-API is fairly limited, so I'm not entirely certain about the above.

it = PyObject_GetIter(items);
Py_DECREF(items);
if (it == NULL)
goto bail;
idx = 0;

while ((item = PyIter_Next(it)) != NULL) {
PyObject *encoded, *key, *value;
PyObject *key, *value, *coerced_item;
if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
PyErr_SetString(PyExc_ValueError, "items must return 2-tuples");
goto bail;
Expand All @@ -1598,8 +1597,8 @@ encoder_listencode_dict(PyEncoderObject *s, _PyAccu *acc,
goto bail;
}
else if (key == Py_True || key == Py_False || key == Py_None) {
/* This must come before the PyLong_Check because
True and False are also 1 and 0.*/
/* This must come before the PyLong_Check because
True and False are also 1 and 0.*/
kstr = _encoded_const(key);
if (kstr == NULL)
goto bail;
Expand All @@ -1621,6 +1620,34 @@ encoder_listencode_dict(PyEncoderObject *s, _PyAccu *acc,
goto bail;
}

value = PyTuple_GET_ITEM(item, 1);
coerced_item = PyTuple_Pack(2, kstr, value);
if (coerced_item == NULL) {
goto bail;
}
/* Append instead of set because skipkeys=True may
"shrink" the number of items */
if (-1 == PyList_Append(coerced_items, coerced_item))
goto bail;
}

if (s->sort_keys && PyList_Sort(coerced_items) < 0) {
Py_DECREF(coerced_items);
goto bail;
}
it = PyObject_GetIter(coerced_items);
Py_DECREF(coerced_items);
if (it == NULL)
goto bail;
idx = 0;
while ((item = PyIter_Next(it)) != NULL) {
PyObject *encoded, *value;
if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
PyErr_SetString(PyExc_ValueError, "items must return 2-tuples");
goto bail;
}
kstr = PyTuple_GET_ITEM(item, 0);

if (idx) {
if (_PyAccu_Accumulate(acc, s->item_separator))
goto bail;
Expand Down