From 5206f3de699e869a5fe796c58bb855595637c48b Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Tue, 30 May 2023 11:04:46 -0600 Subject: [PATCH 1/7] Call _PyEval_ReleaseLock() with NULL when finalizing the current thread. --- Include/internal/pycore_ceval.h | 2 +- Python/ceval_gil.c | 17 +++++++++++++---- Python/pylifecycle.c | 2 +- Python/pystate.c | 9 ++++++--- 4 files changed, 21 insertions(+), 9 deletions(-) diff --git a/Include/internal/pycore_ceval.h b/Include/internal/pycore_ceval.h index 3c8b368bd2af4e..ca2703781de4b0 100644 --- a/Include/internal/pycore_ceval.h +++ b/Include/internal/pycore_ceval.h @@ -100,7 +100,7 @@ extern PyStatus _PyEval_InitGIL(PyThreadState *tstate, int own_gil); extern void _PyEval_FiniGIL(PyInterpreterState *interp); extern void _PyEval_AcquireLock(PyThreadState *tstate); -extern void _PyEval_ReleaseLock(PyThreadState *tstate); +extern void _PyEval_ReleaseLock(PyInterpreterState *, PyThreadState *); extern PyThreadState * _PyThreadState_SwapNoGIL(PyThreadState *); extern void _PyEval_DeactivateOpCache(void); diff --git a/Python/ceval_gil.c b/Python/ceval_gil.c index b9bdb74fcedf32..40c7229b29793a 100644 --- a/Python/ceval_gil.c +++ b/Python/ceval_gil.c @@ -278,6 +278,10 @@ static void recreate_gil(struct _gil_runtime_state *gil) static void drop_gil(struct _ceval_state *ceval, PyThreadState *tstate) { + /* We shouldn't be using a thread state that isn't viable any more. */ + // XXX It may be more correct to check tstate->_status.finalizing. + assert(tstate == NULL || !tstate->_status.cleared); + struct _gil_runtime_state *gil = ceval->gil; if (!_Py_atomic_load_relaxed(&gil->locked)) { Py_FatalError("drop_gil: GIL is not locked"); @@ -298,7 +302,7 @@ drop_gil(struct _ceval_state *ceval, PyThreadState *tstate) MUTEX_UNLOCK(gil->mutex); #ifdef FORCE_SWITCHING - if (_Py_atomic_load_relaxed(&ceval->gil_drop_request) && tstate != NULL) { + if (tstate != NULL && _Py_atomic_load_relaxed(&ceval->gil_drop_request)) { MUTEX_LOCK(gil->switch_mutex); /* Not switched yet => wait */ if (((PyThreadState*)_Py_atomic_load_relaxed(&gil->last_holder)) == tstate) @@ -350,6 +354,9 @@ take_gil(PyThreadState *tstate) int err = errno; assert(tstate != NULL); + /* We shouldn't be using a thread state that isn't viable any more. */ + // XXX It may be more correct to check tstate->_status.finalizing. + assert(!tstate->_status.cleared); if (tstate_must_exit(tstate)) { /* bpo-39877: If Py_Finalize() has been called and tstate is not the @@ -625,10 +632,12 @@ _PyEval_AcquireLock(PyThreadState *tstate) } void -_PyEval_ReleaseLock(PyThreadState *tstate) +_PyEval_ReleaseLock(PyInterpreterState *interp, PyThreadState *tstate) { - _Py_EnsureTstateNotNULL(tstate); - struct _ceval_state *ceval = &tstate->interp->ceval; + /* If tstate is NULL then we do not expect the current thread + to acquire the GIL ever again. */ + assert(tstate == NULL || tstate->interp == interp); + struct _ceval_state *ceval = &interp->ceval; drop_gil(ceval, tstate); } diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 06c43459624c67..8b846443ce8f6a 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -2035,7 +2035,7 @@ new_interpreter(PyThreadState **tstate_p, const PyInterpreterConfig *config) const PyConfig *src_config; if (save_tstate != NULL) { // XXX Might new_interpreter() have been called without the GIL held? - _PyEval_ReleaseLock(save_tstate); + _PyEval_ReleaseLock(save_tstate->interp, save_tstate); src_config = _PyInterpreterState_GetConfig(save_tstate->interp); } else diff --git a/Python/pystate.c b/Python/pystate.c index 25e655a2027918..2dd8152ab701e2 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -930,6 +930,7 @@ _PyInterpreterState_Clear(PyThreadState *tstate) } +static inline void tstate_deactivate(PyThreadState *tstate); static void zapthreads(PyInterpreterState *interp); void @@ -943,7 +944,9 @@ PyInterpreterState_Delete(PyInterpreterState *interp) PyThreadState *tcur = current_fast_get(runtime); if (tcur != NULL && interp == tcur->interp) { /* Unset current thread. After this, many C API calls become crashy. */ - _PyThreadState_Swap(runtime, NULL); + current_fast_clear(runtime); + tstate_deactivate(tcur); + _PyEval_ReleaseLock(interp, NULL); } zapthreads(interp); @@ -1567,7 +1570,7 @@ _PyThreadState_DeleteCurrent(PyThreadState *tstate) _Py_EnsureTstateNotNULL(tstate); tstate_delete_common(tstate); current_fast_clear(tstate->interp->runtime); - _PyEval_ReleaseLock(tstate); + _PyEval_ReleaseLock(tstate->interp, NULL); free_threadstate(tstate); } @@ -1907,7 +1910,7 @@ _PyThreadState_Swap(_PyRuntimeState *runtime, PyThreadState *newts) { PyThreadState *oldts = current_fast_get(runtime); if (oldts != NULL) { - _PyEval_ReleaseLock(oldts); + _PyEval_ReleaseLock(oldts->interp, oldts); } _swap_thread_states(runtime, oldts, newts); if (newts != NULL) { From db6d7ac54435dae0b56bbdefcc8f81a9ebd028dc Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Tue, 30 May 2023 14:06:54 -0600 Subject: [PATCH 2/7] Temporarily adjust tstate->_status.cleared during interpreter_clear(). --- Python/pystate.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/Python/pystate.c b/Python/pystate.c index 2dd8152ab701e2..7b4cdf356e378d 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -822,6 +822,12 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate) p = p->next; HEAD_UNLOCK(runtime); } + if (tstate->interp == interp) { + /* We fix tstate->_status below we we for sure aren't using it + (e.g. no longer need the GIL). */ + // XXX Eliminate the need to do this. + tstate->_status.cleared = 0; + } /* It is possible that any of the objects below have a finalizer that runs Python code or otherwise relies on a thread state @@ -886,6 +892,12 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate) Py_CLEAR(interp->builtins); Py_CLEAR(interp->interpreter_trampoline); + if (tstate->interp == interp) { + /* We are now safe to fix tstate->_status.cleared. */ + // XXX Do this (much) earlier? + tstate->_status.cleared = 1; + } + for (int i=0; i < DICT_MAX_WATCHERS; i++) { interp->dict_state.watchers[i] = NULL; } From a374f48ea809f5c279cdd3e154dbf9307417c202 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Tue, 30 May 2023 14:55:06 -0600 Subject: [PATCH 3/7] Disable the broken asserts. --- Python/ceval_gil.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Python/ceval_gil.c b/Python/ceval_gil.c index 40c7229b29793a..4d13861955d911 100644 --- a/Python/ceval_gil.c +++ b/Python/ceval_gil.c @@ -280,7 +280,7 @@ drop_gil(struct _ceval_state *ceval, PyThreadState *tstate) { /* We shouldn't be using a thread state that isn't viable any more. */ // XXX It may be more correct to check tstate->_status.finalizing. - assert(tstate == NULL || !tstate->_status.cleared); + // XXX assert(tstate == NULL || !tstate->_status.cleared); struct _gil_runtime_state *gil = ceval->gil; if (!_Py_atomic_load_relaxed(&gil->locked)) { @@ -356,7 +356,7 @@ take_gil(PyThreadState *tstate) assert(tstate != NULL); /* We shouldn't be using a thread state that isn't viable any more. */ // XXX It may be more correct to check tstate->_status.finalizing. - assert(!tstate->_status.cleared); + // XXX assert(!tstate->_status.cleared); if (tstate_must_exit(tstate)) { /* bpo-39877: If Py_Finalize() has been called and tstate is not the From 432db0c478ee446308de27cbd547b8d5fc2b81ee Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Tue, 30 May 2023 18:04:07 -0600 Subject: [PATCH 4/7] Add a note about the race in drop_gil(). --- Python/ceval_gil.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Python/ceval_gil.c b/Python/ceval_gil.c index 4d13861955d911..76d99ce57a8aba 100644 --- a/Python/ceval_gil.c +++ b/Python/ceval_gil.c @@ -302,6 +302,14 @@ drop_gil(struct _ceval_state *ceval, PyThreadState *tstate) MUTEX_UNLOCK(gil->mutex); #ifdef FORCE_SWITCHING + /* We check tstate first in case we might be releasing the GIL for + the last time in this thread. In that case there's a possible + race with tstate->interp getting deleted after gil->mutex is + unlocked and before the following code runs, leading to a crash. + We can use (tstate == NULL) to indicate the thread is done with + the GIL, and that's the only time we might delete the + interpreter, so checking tstate first prevents the crash. + See https://github.com/python/cpython/issues/104341. */ if (tstate != NULL && _Py_atomic_load_relaxed(&ceval->gil_drop_request)) { MUTEX_LOCK(gil->switch_mutex); /* Not switched yet => wait */ From 1280e0dede5e05a4e04ad59a3d10acd86ee82e67 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Tue, 30 May 2023 18:04:58 -0600 Subject: [PATCH 5/7] Fix a typo. --- Python/pystate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/pystate.c b/Python/pystate.c index 7b4cdf356e378d..39fe5473ed46b3 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -823,7 +823,7 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate) HEAD_UNLOCK(runtime); } if (tstate->interp == interp) { - /* We fix tstate->_status below we we for sure aren't using it + /* We fix tstate->_status below when we for sure aren't using it (e.g. no longer need the GIL). */ // XXX Eliminate the need to do this. tstate->_status.cleared = 0; From cc1f2fe86a9e22b636fb249b12f8fcde2f8e2016 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 31 May 2023 17:26:26 -0600 Subject: [PATCH 6/7] Clarify a comment. --- Python/ceval_gil.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Python/ceval_gil.c b/Python/ceval_gil.c index 76d99ce57a8aba..c70ce7ac9be312 100644 --- a/Python/ceval_gil.c +++ b/Python/ceval_gil.c @@ -278,7 +278,12 @@ static void recreate_gil(struct _gil_runtime_state *gil) static void drop_gil(struct _ceval_state *ceval, PyThreadState *tstate) { - /* We shouldn't be using a thread state that isn't viable any more. */ + /* If tstate is NULL, the caller is indicateing that we're releasing + the GIL for the last time in this thread. This is particularly + relevant when the current thread state is finalizing or its + interpreter is finalizing.(either may be in an inconsistent + state). In that case the current thread will definitely + never try to acquire the GIL again. */ // XXX It may be more correct to check tstate->_status.finalizing. // XXX assert(tstate == NULL || !tstate->_status.cleared); From b64b299cfbaa5c445848c9bd611a3338fc3c8be8 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Thu, 1 Jun 2023 14:42:43 -0700 Subject: [PATCH 7/7] minor comment typo fix. --- Python/ceval_gil.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Python/ceval_gil.c b/Python/ceval_gil.c index c70ce7ac9be312..764278ac130dfa 100644 --- a/Python/ceval_gil.c +++ b/Python/ceval_gil.c @@ -278,10 +278,10 @@ static void recreate_gil(struct _gil_runtime_state *gil) static void drop_gil(struct _ceval_state *ceval, PyThreadState *tstate) { - /* If tstate is NULL, the caller is indicateing that we're releasing + /* If tstate is NULL, the caller is indicating that we're releasing the GIL for the last time in this thread. This is particularly relevant when the current thread state is finalizing or its - interpreter is finalizing.(either may be in an inconsistent + interpreter is finalizing (either may be in an inconsistent state). In that case the current thread will definitely never try to acquire the GIL again. */ // XXX It may be more correct to check tstate->_status.finalizing.