From 44c79e106ac3fd92e97516f8b65e6f39cc79dcf5 Mon Sep 17 00:00:00 2001 From: Eddie Elizondo Date: Sat, 19 Feb 2022 17:07:19 -0800 Subject: [PATCH 1/3] bpo-40255: Implement Immortal Instances - Optimization 1 --- Include/boolobject.h | 4 +- Include/internal/pycore_object.h | 2 +- Include/moduleobject.h | 12 +++-- Include/object.h | 47 ++++++++++++++++++- Lib/ctypes/test/test_python_api.py | 3 +- Lib/test/test_builtin.py | 25 +++++++++- Lib/test/test_regrtest.py | 2 +- Lib/test/test_sys.py | 3 +- .../2021-12-18-08-57-24.bpo-40255.XDDrSO.rst | 2 + Objects/longobject.c | 4 +- Objects/object.c | 7 ++- 11 files changed, 92 insertions(+), 19 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2021-12-18-08-57-24.bpo-40255.XDDrSO.rst diff --git a/Include/boolobject.h b/Include/boolobject.h index cda6f89a99e9a2..76339fa6faf07c 100644 --- a/Include/boolobject.h +++ b/Include/boolobject.h @@ -31,8 +31,8 @@ PyAPI_FUNC(int) Py_IsFalse(PyObject *x); #define Py_IsFalse(x) Py_Is((x), Py_False) /* Macros for returning Py_True or Py_False, respectively */ -#define Py_RETURN_TRUE return Py_NewRef(Py_True) -#define Py_RETURN_FALSE return Py_NewRef(Py_False) +#define Py_RETURN_TRUE return Py_True +#define Py_RETURN_FALSE return Py_False /* Function to return a bool from a C long */ PyAPI_FUNC(PyObject *) PyBool_FromLong(long); diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index 65abc1884c3bbd..86fbb243570889 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -17,7 +17,7 @@ extern "C" { #define _PyObject_IMMORTAL_INIT(type) \ { \ - .ob_refcnt = 999999999, \ + .ob_refcnt = _Py_IMMORTAL_REFCNT, \ .ob_type = type, \ } #define _PyVarObject_IMMORTAL_INIT(type, size) \ diff --git a/Include/moduleobject.h b/Include/moduleobject.h index 49b116ca1c3587..a31f11b4253d7b 100644 --- a/Include/moduleobject.h +++ b/Include/moduleobject.h @@ -48,11 +48,13 @@ typedef struct PyModuleDef_Base { PyObject* m_copy; } PyModuleDef_Base; -#define PyModuleDef_HEAD_INIT { \ - PyObject_HEAD_INIT(NULL) \ - NULL, /* m_init */ \ - 0, /* m_index */ \ - NULL, /* m_copy */ \ +// TODO(eduardo-elizondo): This is only used to simplify the review of GH-19474 +// Rather than changing this API, we'll introduce PyModuleDef_HEAD_IMMORTAL_INIT +#define PyModuleDef_HEAD_INIT { \ + PyObject_HEAD_IMMORTAL_INIT(NULL) \ + NULL, /* m_init */ \ + 0, /* m_index */ \ + NULL, /* m_copy */ \ } struct PyModuleDef_Slot; diff --git a/Include/object.h b/Include/object.h index 3566c736a535c4..d57efefc8fd58f 100644 --- a/Include/object.h +++ b/Include/object.h @@ -81,12 +81,34 @@ typedef struct _typeobject PyTypeObject; /* PyObject_HEAD defines the initial segment of every PyObject. */ #define PyObject_HEAD PyObject ob_base; +/* +Immortalization: + +This marks the reference count bit that will be used to define immortality. +The GC bit-shifts refcounts left by two, and after that shift it still needs +to be larger than zero, so it's placed after the first three high bits. + +For backwards compatibility the actual reference count of an immortal instance +is set to higher than just the immortal bit. This will ensure that the immortal +bit will remain active, even with extensions compiled without the updated checks +in Py_INCREF and Py_DECREF. This can be safely changed to a smaller value if +additional bits are needed in the reference count field. +*/ +#define _Py_IMMORTAL_BIT_OFFSET (8 * sizeof(Py_ssize_t) - 4) +#define _Py_IMMORTAL_BIT (1LL << _Py_IMMORTAL_BIT_OFFSET) +#define _Py_IMMORTAL_REFCNT (_Py_IMMORTAL_BIT + (_Py_IMMORTAL_BIT / 2)) + #define PyObject_HEAD_INIT(type) \ { _PyObject_EXTRA_INIT \ 1, type }, +#define PyObject_HEAD_IMMORTAL_INIT(type) \ + { _PyObject_EXTRA_INIT _Py_IMMORTAL_REFCNT, type }, + +// TODO(eduardo-elizondo): This is only used to simplify the review of GH-19474 +// Rather than changing this API, we'll introduce PyVarObject_HEAD_IMMORTAL_INIT #define PyVarObject_HEAD_INIT(type, size) \ - { PyObject_HEAD_INIT(type) size }, + { PyObject_HEAD_IMMORTAL_INIT(type) size }, /* PyObject_VAR_HEAD defines the initial segment of all variable-size * container objects. These end with a declaration of an array with 1 @@ -146,6 +168,18 @@ static inline Py_ssize_t Py_SIZE(const PyVarObject *ob) { #define Py_SIZE(ob) Py_SIZE(_PyVarObject_CAST_CONST(ob)) +static inline int _Py_IsImmortal(PyObject *op) +{ + return (op->ob_refcnt & _Py_IMMORTAL_BIT) != 0; +} + +static inline void _Py_SetImmortal(PyObject *op) +{ + if (op) { + op->ob_refcnt = _Py_IMMORTAL_REFCNT; + } +} + static inline int Py_IS_TYPE(const PyObject *ob, const PyTypeObject *type) { // bpo-44378: Don't use Py_TYPE() since Py_TYPE() requires a non-const // object. @@ -155,6 +189,9 @@ static inline int Py_IS_TYPE(const PyObject *ob, const PyTypeObject *type) { static inline void Py_SET_REFCNT(PyObject *ob, Py_ssize_t refcnt) { + if (_Py_IsImmortal(ob)) { + return; + } ob->ob_refcnt = refcnt; } #define Py_SET_REFCNT(ob, refcnt) Py_SET_REFCNT(_PyObject_CAST(ob), refcnt) @@ -483,6 +520,9 @@ static inline void Py_INCREF(PyObject *op) #else // Non-limited C API and limited C API for Python 3.9 and older access // directly PyObject.ob_refcnt. + if (_Py_IsImmortal(op)) { + return; + } #ifdef Py_REF_DEBUG _Py_RefTotal++; #endif @@ -503,6 +543,9 @@ static inline void Py_DECREF( #else // Non-limited C API and limited C API for Python 3.9 and older access // directly PyObject.ob_refcnt. + if (_Py_IsImmortal(op)) { + return; + } #ifdef Py_REF_DEBUG _Py_RefTotal--; #endif @@ -627,7 +670,7 @@ PyAPI_FUNC(int) Py_IsNone(PyObject *x); #define Py_IsNone(x) Py_Is((x), Py_None) /* Macro for returning Py_None from a function */ -#define Py_RETURN_NONE return Py_NewRef(Py_None) +#define Py_RETURN_NONE return Py_None /* Py_NotImplemented is a singleton used to signal that an operation is diff --git a/Lib/ctypes/test/test_python_api.py b/Lib/ctypes/test/test_python_api.py index 49571f97bbe152..de8989e2c3300f 100644 --- a/Lib/ctypes/test/test_python_api.py +++ b/Lib/ctypes/test/test_python_api.py @@ -46,7 +46,8 @@ def test_PyLong_Long(self): pythonapi.PyLong_AsLong.restype = c_long res = pythonapi.PyLong_AsLong(42) - self.assertEqual(grc(res), ref42 + 1) + # Small int refcnts don't change + self.assertEqual(grc(res), ref42) del res self.assertEqual(grc(42), ref42) diff --git a/Lib/test/test_builtin.py b/Lib/test/test_builtin.py index a601a524d6eb72..8cd418be94ce62 100644 --- a/Lib/test/test_builtin.py +++ b/Lib/test/test_builtin.py @@ -27,7 +27,7 @@ from types import AsyncGeneratorType, FunctionType from operator import neg from test import support -from test.support import (swap_attr, maybe_get_event_loop_policy) +from test.support import (cpython_only, swap_attr, maybe_get_event_loop_policy) from test.support.os_helper import (EnvironmentVarGuard, TESTFN, unlink) from test.support.script_helper import assert_python_ok from test.support.warnings_helper import check_warnings @@ -2214,6 +2214,29 @@ def __del__(self): self.assertEqual(["before", "after"], out.decode().splitlines()) +@cpython_only +class ImmortalTests(unittest.TestCase): + def test_immortal(self): + none_refcount = sys.getrefcount(None) + true_refcount = sys.getrefcount(True) + false_refcount = sys.getrefcount(False) + smallint_refcount = sys.getrefcount(100) + + # Assert that all of these immortal instances have large ref counts + self.assertGreater(none_refcount, 1e8) + self.assertGreater(true_refcount, 1e8) + self.assertGreater(false_refcount, 1e8) + self.assertGreater(smallint_refcount, 1e8) + + # Confirm that the refcount doesn't change even with a new ref to them + l = [None, True, False, 100] + self.assertEqual(sys.getrefcount(None), none_refcount) + self.assertEqual(sys.getrefcount(True), true_refcount) + self.assertEqual(sys.getrefcount(False), false_refcount) + self.assertEqual(sys.getrefcount(100), smallint_refcount) + + + class TestType(unittest.TestCase): def test_new_type(self): A = type('A', (), {}) diff --git a/Lib/test/test_regrtest.py b/Lib/test/test_regrtest.py index babc8a690877a2..abc7b63c9e92ea 100644 --- a/Lib/test/test_regrtest.py +++ b/Lib/test/test_regrtest.py @@ -915,7 +915,7 @@ class RefLeakTest(unittest.TestCase): def test_leak(self): GLOBAL_LIST.append(object()) """) - self.check_leak(code, 'references') + self.check_leak(code, 'memory blocks') @unittest.skipUnless(Py_DEBUG, 'need a debug build') def test_huntrleaks_fd_leak(self): diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index f828d1b15d2868..a128441e83a4ea 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -385,7 +385,8 @@ def test_refcount(self): self.assertRaises(TypeError, sys.getrefcount) c = sys.getrefcount(None) n = None - self.assertEqual(sys.getrefcount(None), c+1) + # Singleton refcnts don't change + self.assertEqual(sys.getrefcount(None), c) del n self.assertEqual(sys.getrefcount(None), c) if hasattr(sys, "gettotalrefcount"): diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-12-18-08-57-24.bpo-40255.XDDrSO.rst b/Misc/NEWS.d/next/Core and Builtins/2021-12-18-08-57-24.bpo-40255.XDDrSO.rst new file mode 100644 index 00000000000000..43983d000ee4b0 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2021-12-18-08-57-24.bpo-40255.XDDrSO.rst @@ -0,0 +1,2 @@ +This introduces Immortal Instances which allows objects to bypass reference +counting and remain alive throughout the execution of the runtime diff --git a/Objects/longobject.c b/Objects/longobject.c index 3438906d842758..ab6ad2d8dfd768 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -47,9 +47,7 @@ static PyObject * get_small_int(sdigit ival) { assert(IS_SMALL_INT(ival)); - PyObject *v = (PyObject *)&_PyLong_SMALL_INTS[_PY_NSMALLNEGINTS + ival]; - Py_INCREF(v); - return v; + return (PyObject *)&_PyLong_SMALL_INTS[_PY_NSMALLNEGINTS + ival]; } static PyLongObject * diff --git a/Objects/object.c b/Objects/object.c index 3044c862fb9dac..c2a5c2ef091cfe 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -1705,7 +1705,8 @@ PyTypeObject _PyNone_Type = { PyObject _Py_NoneStruct = { _PyObject_EXTRA_INIT - 1, &_PyNone_Type + _Py_IMMORTAL_REFCNT, + &_PyNone_Type }; /* NotImplemented is an object that can be used to signal that an @@ -1994,7 +1995,9 @@ _Py_NewReference(PyObject *op) #ifdef Py_REF_DEBUG _Py_RefTotal++; #endif - Py_SET_REFCNT(op, 1); + /* Do not use Py_SET_REFCNT to skip the Immortal Instance check. This + * API guarantees that an instance will always be set to a refcnt of 1 */ + op->ob_refcnt = 1; #ifdef Py_TRACE_REFS _Py_AddToAllObjects(op, 1); #endif From 586e44f19ce6aebd07f1436b7f36b1f9e6a54190 Mon Sep 17 00:00:00 2001 From: Eddie Elizondo Date: Sat, 19 Feb 2022 21:43:17 -0800 Subject: [PATCH 2/3] Make interned strings immortal --- Objects/unicodeobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 908ad514925999..22c1d2276f2857 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -15616,7 +15616,7 @@ PyUnicode_InternInPlace(PyObject **p) /* The two references in interned dict (key and value) are not counted by refcnt. unicode_dealloc() and _PyUnicode_ClearInterned() take care of this. */ - Py_SET_REFCNT(s, Py_REFCNT(s) - 2); + _Py_SetImmortal(s); _PyUnicode_STATE(s).interned = SSTATE_INTERNED_MORTAL; #else // PyDict expects that interned strings have their hash From 92186f79a81b53bc28fd90ca5709816dc1ce52bd Mon Sep 17 00:00:00 2001 From: Eddie Elizondo Date: Sun, 20 Feb 2022 22:48:50 -0800 Subject: [PATCH 3/3] Simplify interned strings --- Objects/unicodeobject.c | 74 ++--------------------------------------- Programs/_testembed.c | 2 +- 2 files changed, 3 insertions(+), 73 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 22c1d2276f2857..df1de0eb1ce016 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -1941,26 +1941,6 @@ unicode_dealloc(PyObject *unicode) case SSTATE_NOT_INTERNED: break; - case SSTATE_INTERNED_MORTAL: - { -#ifdef INTERNED_STRINGS - /* Revive the dead object temporarily. PyDict_DelItem() removes two - references (key and value) which were ignored by - PyUnicode_InternInPlace(). Use refcnt=3 rather than refcnt=2 - to prevent calling unicode_dealloc() again. Adjust refcnt after - PyDict_DelItem(). */ - assert(Py_REFCNT(unicode) == 0); - Py_SET_REFCNT(unicode, 3); - if (PyDict_DelItem(interned, unicode) != 0) { - _PyErr_WriteUnraisableMsg("deletion of interned string failed", - NULL); - } - assert(Py_REFCNT(unicode) == 1); - Py_SET_REFCNT(unicode, 0); -#endif - break; - } - case SSTATE_INTERNED_IMMORTAL: _PyObject_ASSERT_FAILED_MSG(unicode, "Immortal interned string died"); break; @@ -15608,16 +15588,12 @@ PyUnicode_InternInPlace(PyObject **p) } if (t != s) { - Py_INCREF(t); Py_SETREF(*p, t); return; } - /* The two references in interned dict (key and value) are not counted by - refcnt. unicode_dealloc() and _PyUnicode_ClearInterned() take care of - this. */ _Py_SetImmortal(s); - _PyUnicode_STATE(s).interned = SSTATE_INTERNED_MORTAL; + _PyUnicode_STATE(*p).interned = SSTATE_INTERNED_IMMORTAL; #else // PyDict expects that interned strings have their hash // (PyASCIIObject.hash) already computed. @@ -15638,10 +15614,6 @@ PyUnicode_InternImmortal(PyObject **p) } PyUnicode_InternInPlace(p); - if (PyUnicode_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) { - _PyUnicode_STATE(*p).interned = SSTATE_INTERNED_IMMORTAL; - Py_INCREF(*p); - } } PyObject * @@ -15668,49 +15640,7 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp) } assert(PyDict_CheckExact(interned)); - /* Interned unicode strings are not forcibly deallocated; rather, we give - them their stolen references back, and then clear and DECREF the - interned dict. */ - -#ifdef INTERNED_STATS - fprintf(stderr, "releasing %zd interned strings\n", - PyDict_GET_SIZE(interned)); - - Py_ssize_t immortal_size = 0, mortal_size = 0; -#endif - Py_ssize_t pos = 0; - PyObject *s, *ignored_value; - while (PyDict_Next(interned, &pos, &s, &ignored_value)) { - assert(PyUnicode_IS_READY(s)); - - switch (PyUnicode_CHECK_INTERNED(s)) { - case SSTATE_INTERNED_IMMORTAL: - Py_SET_REFCNT(s, Py_REFCNT(s) + 1); -#ifdef INTERNED_STATS - immortal_size += PyUnicode_GET_LENGTH(s); -#endif - break; - case SSTATE_INTERNED_MORTAL: - // Restore the two references (key and value) ignored - // by PyUnicode_InternInPlace(). - Py_SET_REFCNT(s, Py_REFCNT(s) + 2); -#ifdef INTERNED_STATS - mortal_size += PyUnicode_GET_LENGTH(s); -#endif - break; - case SSTATE_NOT_INTERNED: - /* fall through */ - default: - Py_UNREACHABLE(); - } - _PyUnicode_STATE(s).interned = SSTATE_NOT_INTERNED; - } -#ifdef INTERNED_STATS - fprintf(stderr, - "total size of all interned strings: %zd/%zd mortal/immortal\n", - mortal_size, immortal_size); -#endif - + /* Interned unicode strings are not forcibly deallocated */ PyDict_Clear(interned); Py_CLEAR(interned); } diff --git a/Programs/_testembed.c b/Programs/_testembed.c index 3830dc3f8b6ec7..1273020d1bc393 100644 --- a/Programs/_testembed.c +++ b/Programs/_testembed.c @@ -1829,7 +1829,7 @@ static int test_unicode_id_init(void) str1 = _PyUnicode_FromId(&PyId_test_unicode_id_init); assert(str1 != NULL); - assert(Py_REFCNT(str1) == 1); + assert(_Py_IsImmortal(str1)); str2 = PyUnicode_FromString("test_unicode_id_init"); assert(str2 != NULL);