From 76028798d0ddef4e6d88c0313db691592bcdd58a Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Tue, 17 Apr 2018 01:25:13 +0100 Subject: [PATCH 01/10] Improve list() pre-sizing for inputs with known lengths The list() constructor isn't taking full advantage of known input lengths or length hints. This commit makes the constructor pre-size and not over-allocate when the input size is known or can be reasonably estimated. A new test is added to test_list.py and a test needed to be modified in test_descr.py as it assumed that there is only one call to __length_hint__() in the list constructor. --- Lib/test/test_descr.py | 2 +- Lib/test/test_list.py | 9 +++++ .../2018-04-17-01-24-51.bpo-33234.l9IDtp.rst | 2 ++ Objects/listobject.c | 33 +++++++++++++++++++ 4 files changed, 45 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2018-04-17-01-24-51.bpo-33234.l9IDtp.rst diff --git a/Lib/test/test_descr.py b/Lib/test/test_descr.py index 0e7728ebf2d7a2..4ed985e275f48e 100644 --- a/Lib/test/test_descr.py +++ b/Lib/test/test_descr.py @@ -2028,7 +2028,7 @@ class X(Checker): setattr(X, attr, obj) setattr(X, name, SpecialDescr(meth_impl)) runner(X()) - self.assertEqual(record, [1], name) + self.assertGreaterEqual(record.count(1), 1, name) class X(Checker): pass diff --git a/Lib/test/test_list.py b/Lib/test/test_list.py index def4badbf5578e..c5002b12732c98 100644 --- a/Lib/test/test_list.py +++ b/Lib/test/test_list.py @@ -1,5 +1,6 @@ import sys from test import list_tests +from test.support import cpython_only import pickle import unittest @@ -157,5 +158,13 @@ class L(list): pass with self.assertRaises(TypeError): (3,) + L([1,2]) + @cpython_only + def test_preallocation(self): + iterable = [0] * 10 + iter_size = sys.getsizeof(iterable) + + self.assertEqual(iter_size, sys.getsizeof(list([0] * 10))) + self.assertEqual(iter_size, sys.getsizeof(list(range(10)))) + if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/Core and Builtins/2018-04-17-01-24-51.bpo-33234.l9IDtp.rst b/Misc/NEWS.d/next/Core and Builtins/2018-04-17-01-24-51.bpo-33234.l9IDtp.rst new file mode 100644 index 00000000000000..e926472ca71186 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2018-04-17-01-24-51.bpo-33234.l9IDtp.rst @@ -0,0 +1,2 @@ +The list constructor will pre-size and not over-allocate when the input size +is known or can be reasonably estimated. diff --git a/Objects/listobject.c b/Objects/listobject.c index c8ffeff09368de..2475581ace31e5 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -76,6 +76,32 @@ list_resize(PyListObject *self, Py_ssize_t newsize) return 0; } +static int +list_preallocate_exact(PyListObject *self, Py_ssize_t size) +{ + PyObject **items; + size_t allocated, num_allocated_bytes; + + allocated = (size_t)size; + if (allocated > (size_t)PY_SSIZE_T_MAX / sizeof(PyObject *)) { + PyErr_NoMemory(); + return -1; + } + + if (size == 0) { + allocated = 0; + } + num_allocated_bytes = allocated * sizeof(PyObject *); + items = (PyObject **)PyMem_Malloc(num_allocated_bytes); + if (items == NULL) { + PyErr_NoMemory(); + return -1; + } + self->ob_item = items; + self->allocated = allocated; + return 0; +} + /* Debug statistic to compare allocations with reuse through the free list */ #undef SHOW_ALLOC_COUNT #ifdef SHOW_ALLOC_COUNT @@ -2649,6 +2675,13 @@ list___init___impl(PyListObject *self, PyObject *iterable) (void)_list_clear(self); } if (iterable != NULL) { + Py_ssize_t iter_len = PyObject_LengthHint(iterable, 0); + if (iter_len == -1){ + PyErr_Clear(); + } + if (iter_len > 0 && list_preallocate_exact(self, iter_len)) { + return -1; + } PyObject *rv = list_extend(self, iterable); if (rv == NULL) return -1; From 39a87576b5a3075ec5e0a512065901b436bf8e54 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Sat, 13 Oct 2018 17:48:28 +0100 Subject: [PATCH 02/10] Preallocate only when __len__ is available to avoid loss of performance for small iterables --- Objects/listobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Objects/listobject.c b/Objects/listobject.c index 2475581ace31e5..2e8d1cdf9ba29a 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -2675,7 +2675,7 @@ list___init___impl(PyListObject *self, PyObject *iterable) (void)_list_clear(self); } if (iterable != NULL) { - Py_ssize_t iter_len = PyObject_LengthHint(iterable, 0); + Py_ssize_t iter_len = PyObject_Length(iterable); if (iter_len == -1){ PyErr_Clear(); } From ab450b3d5a453f36a4988d83b391a79bfbfa5a61 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Mon, 15 Oct 2018 22:13:11 +0100 Subject: [PATCH 03/10] Fix NEWS entry and correct style --- .../2018-04-17-01-24-51.bpo-33234.l9IDtp.rst | 4 ++-- Objects/listobject.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Misc/NEWS.d/next/Core and Builtins/2018-04-17-01-24-51.bpo-33234.l9IDtp.rst b/Misc/NEWS.d/next/Core and Builtins/2018-04-17-01-24-51.bpo-33234.l9IDtp.rst index e926472ca71186..2f9cd62e8e0161 100644 --- a/Misc/NEWS.d/next/Core and Builtins/2018-04-17-01-24-51.bpo-33234.l9IDtp.rst +++ b/Misc/NEWS.d/next/Core and Builtins/2018-04-17-01-24-51.bpo-33234.l9IDtp.rst @@ -1,2 +1,2 @@ -The list constructor will pre-size and not over-allocate when the input size -is known or can be reasonably estimated. +The list constructor will pre-size and not over-allocate when +the input lenght is known. diff --git a/Objects/listobject.c b/Objects/listobject.c index 2e8d1cdf9ba29a..5d34a92ceba771 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -2676,7 +2676,7 @@ list___init___impl(PyListObject *self, PyObject *iterable) } if (iterable != NULL) { Py_ssize_t iter_len = PyObject_Length(iterable); - if (iter_len == -1){ + if (iter_len == -1) { PyErr_Clear(); } if (iter_len > 0 && list_preallocate_exact(self, iter_len)) { From 717ca6732b8a15b24542648bdba56812b5ef6867 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Tue, 23 Oct 2018 13:58:19 +0100 Subject: [PATCH 04/10] Use _PyObject_HasLen to check if the object has lenght to avoid overhead --- Objects/listobject.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/Objects/listobject.c b/Objects/listobject.c index 5d34a92ceba771..1b5b54d765d197 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -2675,12 +2675,14 @@ list___init___impl(PyListObject *self, PyObject *iterable) (void)_list_clear(self); } if (iterable != NULL) { - Py_ssize_t iter_len = PyObject_Length(iterable); - if (iter_len == -1) { - PyErr_Clear(); - } - if (iter_len > 0 && list_preallocate_exact(self, iter_len)) { - return -1; + if(_PyObject_HasLen(iterable)){ + Py_ssize_t iter_len = PyObject_Length(iterable); + if (iter_len == -1) { + PyErr_Clear(); + } + if (iter_len > 0 && list_preallocate_exact(self, iter_len)) { + return -1; + } } PyObject *rv = list_extend(self, iterable); if (rv == NULL) From b67844271da9a4cb3046820be74bf6e0c7b46927 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Tue, 23 Oct 2018 20:01:10 +0100 Subject: [PATCH 05/10] Simplify implementation and don't capture all expections on error --- Objects/listobject.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/Objects/listobject.c b/Objects/listobject.c index 1b5b54d765d197..b74941b1e5df91 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -80,7 +80,7 @@ static int list_preallocate_exact(PyListObject *self, Py_ssize_t size) { PyObject **items; - size_t allocated, num_allocated_bytes; + size_t allocated; allocated = (size_t)size; if (allocated > (size_t)PY_SSIZE_T_MAX / sizeof(PyObject *)) { @@ -91,8 +91,7 @@ list_preallocate_exact(PyListObject *self, Py_ssize_t size) if (size == 0) { allocated = 0; } - num_allocated_bytes = allocated * sizeof(PyObject *); - items = (PyObject **)PyMem_Malloc(num_allocated_bytes); + items = (PyObject **)PyMem_New(PyObject*, allocated); if (items == NULL) { PyErr_NoMemory(); return -1; @@ -2675,10 +2674,14 @@ list___init___impl(PyListObject *self, PyObject *iterable) (void)_list_clear(self); } if (iterable != NULL) { - if(_PyObject_HasLen(iterable)){ - Py_ssize_t iter_len = PyObject_Length(iterable); + if (_PyObject_HasLen(iterable) && self->ob_item == NULL) { + Py_ssize_t iter_len = PyObject_Size(iterable); if (iter_len == -1) { - PyErr_Clear(); + if (PyErr_ExceptionMatches(PyExc_Exception)) { + PyErr_Clear(); + } else { + return -1; + } } if (iter_len > 0 && list_preallocate_exact(self, iter_len)) { return -1; From b40bd623c5536dd90a4cc8ebbc72fc5ff0889ca3 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Thu, 25 Oct 2018 22:36:50 +0100 Subject: [PATCH 06/10] Add assert and fix formatting --- Objects/listobject.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Objects/listobject.c b/Objects/listobject.c index b74941b1e5df91..ed3444a3403629 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -79,6 +79,8 @@ list_resize(PyListObject *self, Py_ssize_t newsize) static int list_preallocate_exact(PyListObject *self, Py_ssize_t size) { + assert(self->ob_item == NULL); + PyObject **items; size_t allocated; @@ -2679,7 +2681,8 @@ list___init___impl(PyListObject *self, PyObject *iterable) if (iter_len == -1) { if (PyErr_ExceptionMatches(PyExc_Exception)) { PyErr_Clear(); - } else { + } + else { return -1; } } From 38374862963005541cb0af9b7a9beb2c7a98b467 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Fri, 26 Oct 2018 00:26:04 +0100 Subject: [PATCH 07/10] Remove uneeded test modification --- Lib/test/test_descr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_descr.py b/Lib/test/test_descr.py index 4ed985e275f48e..0e7728ebf2d7a2 100644 --- a/Lib/test/test_descr.py +++ b/Lib/test/test_descr.py @@ -2028,7 +2028,7 @@ class X(Checker): setattr(X, attr, obj) setattr(X, name, SpecialDescr(meth_impl)) runner(X()) - self.assertGreaterEqual(record.count(1), 1, name) + self.assertEqual(record, [1], name) class X(Checker): pass From b6ff7ce618bc958a4be4cf29aa98a622cf4cebea Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Fri, 26 Oct 2018 10:34:28 +0100 Subject: [PATCH 08/10] Don't preallocate if list is modified when calling PyObject_Size --- Objects/listobject.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Objects/listobject.c b/Objects/listobject.c index ed3444a3403629..f07311bd086306 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -2676,7 +2676,7 @@ list___init___impl(PyListObject *self, PyObject *iterable) (void)_list_clear(self); } if (iterable != NULL) { - if (_PyObject_HasLen(iterable) && self->ob_item == NULL) { + if (_PyObject_HasLen(iterable)) { Py_ssize_t iter_len = PyObject_Size(iterable); if (iter_len == -1) { if (PyErr_ExceptionMatches(PyExc_Exception)) { @@ -2686,7 +2686,8 @@ list___init___impl(PyListObject *self, PyObject *iterable) return -1; } } - if (iter_len > 0 && list_preallocate_exact(self, iter_len)) { + if (iter_len > 0 && self->ob_item == NULL + && list_preallocate_exact(self, iter_len)) { return -1; } } From e7f8c464258acf2e82278909fb5e1a792aa65dbf Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Fri, 26 Oct 2018 13:06:34 +0100 Subject: [PATCH 09/10] Improve readability in the error handling branch --- Objects/listobject.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/Objects/listobject.c b/Objects/listobject.c index f07311bd086306..5062511b3ef5d5 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -2679,12 +2679,10 @@ list___init___impl(PyListObject *self, PyObject *iterable) if (_PyObject_HasLen(iterable)) { Py_ssize_t iter_len = PyObject_Size(iterable); if (iter_len == -1) { - if (PyErr_ExceptionMatches(PyExc_Exception)) { - PyErr_Clear(); - } - else { + if (!PyErr_ExceptionMatches(PyExc_Exception)) { return -1; } + PyErr_Clear(); } if (iter_len > 0 && self->ob_item == NULL && list_preallocate_exact(self, iter_len)) { From 0a6c8ba7d2bc269767e0bdbec283b36f59817d22 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Sun, 28 Oct 2018 15:00:04 +0000 Subject: [PATCH 10/10] Capture only TypeError as suggested by Serhiy --- Objects/listobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Objects/listobject.c b/Objects/listobject.c index 5062511b3ef5d5..a625fda4c247be 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -2679,7 +2679,7 @@ list___init___impl(PyListObject *self, PyObject *iterable) if (_PyObject_HasLen(iterable)) { Py_ssize_t iter_len = PyObject_Size(iterable); if (iter_len == -1) { - if (!PyErr_ExceptionMatches(PyExc_Exception)) { + if (!PyErr_ExceptionMatches(PyExc_TypeError)) { return -1; } PyErr_Clear();