From 5422b5397b28e0dad2113bb8f630bb17950ee572 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Mon, 4 Apr 2022 11:51:13 +0100 Subject: [PATCH 1/3] Add undocumented, unstable FrameStack API for use by greenlets and similar libraries. --- Include/cpython/pystate.h | 17 +++++-- Include/internal/pycore_frame.h | 8 ++-- Python/pystate.c | 82 ++++++++++++++++++++------------- 3 files changed, 67 insertions(+), 40 deletions(-) diff --git a/Include/cpython/pystate.h b/Include/cpython/pystate.h index 1af21a2c947d99..9af3ea61fc3ca8 100644 --- a/Include/cpython/pystate.h +++ b/Include/cpython/pystate.h @@ -79,6 +79,13 @@ typedef struct _stack_chunk { PyObject * data[1]; /* Variable sized */ } _PyStackChunk; +typedef struct _frame_stack { + _PyStackChunk *current_chunk; + PyObject **top; + PyObject **limit; + int chunk_size; +} _PyFrameStack; + struct _ts { /* See Python/ceval.c for comments explaining most fields */ @@ -178,10 +185,7 @@ struct _ts { uint64_t id; PyTraceInfo trace_info; - - _PyStackChunk *datastack_chunk; - PyObject **datastack_top; - PyObject **datastack_limit; + _PyFrameStack frame_stack; /* XXX signal handlers should also be here */ /* The following fields are here to avoid allocation during init. @@ -364,3 +368,8 @@ typedef int (*crossinterpdatafunc)(PyObject *, _PyCrossInterpreterData *); PyAPI_FUNC(int) _PyCrossInterpreterData_RegisterClass(PyTypeObject *, crossinterpdatafunc); PyAPI_FUNC(crossinterpdatafunc) _PyCrossInterpreterData_Lookup(PyObject *); + + +PyAPI_FUNC(void) _PyFrameStack_Init(_PyFrameStack *fs, int chunk_size); +PyAPI_FUNC(void) _PyFrameStack_Swap(_PyFrameStack *fs); +PyAPI_FUNC(void) _PyFrameStack_Free(_PyFrameStack *fs); diff --git a/Include/internal/pycore_frame.h b/Include/internal/pycore_frame.h index 211831a6e497f5..b48bfc21336644 100644 --- a/Include/internal/pycore_frame.h +++ b/Include/internal/pycore_frame.h @@ -168,12 +168,12 @@ _PyThreadState_BumpFramePointerSlow(PyThreadState *tstate, size_t size); static inline _PyInterpreterFrame * _PyThreadState_BumpFramePointer(PyThreadState *tstate, size_t size) { - PyObject **base = tstate->datastack_top; + PyObject **base = tstate->frame_stack.top; if (base) { PyObject **top = base + size; - assert(tstate->datastack_limit); - if (top < tstate->datastack_limit) { - tstate->datastack_top = top; + assert(tstate->frame_stack.limit); + if (top < tstate->frame_stack.limit) { + tstate->frame_stack.top = top; return (_PyInterpreterFrame *)base; } } diff --git a/Python/pystate.c b/Python/pystate.c index 3e28a6ab69a989..d042a69dd53c8f 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -783,9 +783,7 @@ init_threadstate(PyThreadState *tstate, tstate->exc_info = &tstate->exc_state; tstate->cframe = &tstate->root_cframe; - tstate->datastack_chunk = NULL; - tstate->datastack_top = NULL; - tstate->datastack_limit = NULL; + _PyFrameStack_Init(&tstate->frame_stack, DATA_STACK_CHUNK_SIZE); tstate->_initialized = 1; } @@ -1073,13 +1071,7 @@ tstate_delete_common(PyThreadState *tstate, { PyThread_tss_set(&gilstate->autoTSSkey, NULL); } - _PyStackChunk *chunk = tstate->datastack_chunk; - tstate->datastack_chunk = NULL; - while (chunk != NULL) { - _PyStackChunk *prev = chunk->previous; - _PyObject_VirtualFree(chunk, chunk->size); - chunk = prev; - } + _PyFrameStack_Free(&tstate->frame_stack); } static void @@ -2159,27 +2151,27 @@ _Py_GetConfig(void) #define MINIMUM_OVERHEAD 1000 static PyObject ** -push_chunk(PyThreadState *tstate, int size) +push_chunk(_PyFrameStack *frame_stack, int size) { - int allocate_size = DATA_STACK_CHUNK_SIZE; + int allocate_size = frame_stack->chunk_size; while (allocate_size < (int)sizeof(PyObject*)*(size + MINIMUM_OVERHEAD)) { allocate_size *= 2; } - _PyStackChunk *new = allocate_chunk(allocate_size, tstate->datastack_chunk); + _PyStackChunk *new = allocate_chunk(allocate_size, frame_stack->current_chunk); if (new == NULL) { return NULL; } - if (tstate->datastack_chunk) { - tstate->datastack_chunk->top = tstate->datastack_top - - &tstate->datastack_chunk->data[0]; + if (frame_stack->current_chunk) { + frame_stack->current_chunk->top = frame_stack->top - + &frame_stack->current_chunk->data[0]; } - tstate->datastack_chunk = new; - tstate->datastack_limit = (PyObject **)(((char *)new) + allocate_size); + frame_stack->current_chunk = new; + frame_stack->limit = (PyObject **)(((char *)new) + allocate_size); // When new is the "root" chunk (i.e. new->previous == NULL), we can keep // _PyThreadState_PopFrame from freeing it later by "skipping" over the // first element: PyObject **res = &new->data[new->previous == NULL]; - tstate->datastack_top = res + size; + frame_stack->top = res + size; return res; } @@ -2187,13 +2179,13 @@ _PyInterpreterFrame * _PyThreadState_BumpFramePointerSlow(PyThreadState *tstate, size_t size) { assert(size < INT_MAX/sizeof(PyObject *)); - PyObject **base = tstate->datastack_top; + PyObject **base = tstate->frame_stack.top; PyObject **top = base + size; - if (top >= tstate->datastack_limit) { - base = push_chunk(tstate, (int)size); + if (top >= tstate->frame_stack.limit) { + base = push_chunk(&tstate->frame_stack, (int)size); } else { - tstate->datastack_top = top; + tstate->frame_stack.top = top; } return (_PyInterpreterFrame *)base; } @@ -2201,25 +2193,51 @@ _PyThreadState_BumpFramePointerSlow(PyThreadState *tstate, size_t size) void _PyThreadState_PopFrame(PyThreadState *tstate, _PyInterpreterFrame * frame) { - assert(tstate->datastack_chunk); + assert(tstate->frame_stack.current_chunk); PyObject **base = (PyObject **)frame; - if (base == &tstate->datastack_chunk->data[0]) { - _PyStackChunk *chunk = tstate->datastack_chunk; + if (base == &tstate->frame_stack.current_chunk->data[0]) { + _PyStackChunk *chunk = tstate->frame_stack.current_chunk; _PyStackChunk *previous = chunk->previous; // push_chunk ensures that the root chunk is never popped: assert(previous); - tstate->datastack_top = &previous->data[previous->top]; - tstate->datastack_chunk = previous; + tstate->frame_stack.top = &previous->data[previous->top]; + tstate->frame_stack.current_chunk = previous; _PyObject_VirtualFree(chunk, chunk->size); - tstate->datastack_limit = (PyObject **)(((char *)previous) + previous->size); + tstate->frame_stack.limit = (PyObject **)(((char *)previous) + previous->size); } else { - assert(tstate->datastack_top); - assert(tstate->datastack_top >= base); - tstate->datastack_top = base; + assert(tstate->frame_stack.top); + assert(tstate->frame_stack.top >= base); + tstate->frame_stack.top = base; } } +void _PyFrameStack_Init(_PyFrameStack *fs, int chunk_size) +{ + fs->chunk_size = chunk_size; + fs->current_chunk = NULL; + fs->top = NULL; + fs->limit = NULL; +} + +void _PyFrameStack_Swap(_PyFrameStack *fs) +{ + PyThreadState *tstate = _PyThreadState_GET(); + _PyFrameStack temp = *fs; + *fs = tstate->frame_stack; + tstate->frame_stack = temp; +} + +void _PyFrameStack_Free(_PyFrameStack *fs) +{ + _PyStackChunk *chunk = fs->current_chunk; + fs->current_chunk = NULL; + while (chunk != NULL) { + _PyStackChunk *previous = chunk->previous; + _PyObject_VirtualFree(chunk, chunk->size); + chunk = previous; + } +} #ifdef __cplusplus } From f6a46a634ff0aad024c7a4386fe7338b53d31eaa Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Mon, 4 Apr 2022 13:07:53 +0100 Subject: [PATCH 2/3] Add minimal documentation. --- Include/cpython/pystate.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Include/cpython/pystate.h b/Include/cpython/pystate.h index 9af3ea61fc3ca8..205feb16d94101 100644 --- a/Include/cpython/pystate.h +++ b/Include/cpython/pystate.h @@ -369,7 +369,15 @@ typedef int (*crossinterpdatafunc)(PyObject *, _PyCrossInterpreterData *); PyAPI_FUNC(int) _PyCrossInterpreterData_RegisterClass(PyTypeObject *, crossinterpdatafunc); PyAPI_FUNC(crossinterpdatafunc) _PyCrossInterpreterData_Lookup(PyObject *); +/* UNSTABLE API for stackful coroutines. + * It it the responsibility of the caller to manage the memory for the _PyFrameStack struct. + * The memory for the actual frame stack will be managed by the VM. + * All functions need the GIL to be held. + */ +/* Initialize fs with given chunk size */ PyAPI_FUNC(void) _PyFrameStack_Init(_PyFrameStack *fs, int chunk_size); +/* Swap the frame stack of the current thread with fs */ PyAPI_FUNC(void) _PyFrameStack_Swap(_PyFrameStack *fs); +/* Free any allocated memory chunks from fs. Does not free fs itself */ PyAPI_FUNC(void) _PyFrameStack_Free(_PyFrameStack *fs); From 1a8268360afc7a3fab7f9eddd4d3277d62e954ca Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 7 Sep 2022 16:37:09 +0100 Subject: [PATCH 3/3] Change top/limit pair to free/limit pair to allow fast checks and NULL pointer. --- Include/cpython/pystate.h | 10 ++-- Include/internal/pycore_frame.h | 14 ++--- Python/ceval.c | 2 +- Python/pystate.c | 94 ++++++++++++++++++++++++--------- 4 files changed, 81 insertions(+), 39 deletions(-) diff --git a/Include/cpython/pystate.h b/Include/cpython/pystate.h index 3876863dd57dcb..2ad9fd1c2b35df 100644 --- a/Include/cpython/pystate.h +++ b/Include/cpython/pystate.h @@ -74,15 +74,15 @@ typedef struct _err_stackitem { typedef struct _stack_chunk { struct _stack_chunk *previous; - size_t size; - size_t top; + int size_in_bytes; + int free; PyObject * data[1]; /* Variable sized */ } _PyStackChunk; typedef struct _frame_stack { _PyStackChunk *current_chunk; - PyObject **top; PyObject **limit; + int free; int chunk_size; } _PyFrameStack; @@ -382,5 +382,5 @@ PyAPI_FUNC(crossinterpdatafunc) _PyCrossInterpreterData_Lookup(PyObject *); PyAPI_FUNC(void) _PyFrameStack_Init(_PyFrameStack *fs, int chunk_size); /* Swap the frame stack of the current thread with fs */ PyAPI_FUNC(void) _PyFrameStack_Swap(_PyFrameStack *fs); -/* Free any allocated memory chunks from fs. Does not free fs itself */ -PyAPI_FUNC(void) _PyFrameStack_Free(_PyFrameStack *fs); +/* Free any allocated memory chunks for fs. */ +PyAPI_FUNC(void) _PyFrameStack_Clear(_PyFrameStack *fs); diff --git a/Include/internal/pycore_frame.h b/Include/internal/pycore_frame.h index 12365321db219f..7b80a3936644eb 100644 --- a/Include/internal/pycore_frame.h +++ b/Include/internal/pycore_frame.h @@ -194,12 +194,11 @@ static inline bool _PyThreadState_HasStackSpace(PyThreadState *tstate, int size) { assert( - (tstate->frame_stack.top == NULL && tstate->frame_stack.limit == NULL) + (tstate->frame_stack.limit == NULL && tstate->frame_stack.free <= 0) || - (tstate->frame_stack.top != NULL && tstate->frame_stack.limit != NULL) + (tstate->frame_stack.limit != NULL && tstate->frame_stack.free >= 0) ); - return tstate->frame_stack.top != NULL && - size < tstate->frame_stack.limit - tstate->frame_stack.top; + return size < tstate->frame_stack.free; } extern _PyInterpreterFrame * @@ -215,9 +214,10 @@ _PyFrame_PushUnchecked(PyThreadState *tstate, PyFunctionObject *func) { CALL_STAT_INC(frames_pushed); PyCodeObject *code = (PyCodeObject *)func->func_code; - _PyInterpreterFrame *new_frame = (_PyInterpreterFrame *)tstate->frame_stack.top; - tstate->frame_stack.top += code->co_framesize; - assert(tstate->frame_stack.top < tstate->frame_stack.limit); + PyObject **top = tstate->frame_stack.limit - tstate->frame_stack.free; + _PyInterpreterFrame *new_frame = (_PyInterpreterFrame *)top; + tstate->frame_stack.free -= code->co_framesize; + assert(top < tstate->frame_stack.limit); _PyFrame_InitializeSpecials(new_frame, func, NULL, code); return new_frame; } diff --git a/Python/ceval.c b/Python/ceval.c index f76ffba029fb30..5c497da503349c 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -5756,7 +5756,7 @@ _PyEvalFrameClearAndPop(PyThreadState *tstate, _PyInterpreterFrame * frame) // Make sure that this is, indeed, the top frame. We can't check this in // _PyThreadState_PopFrame, since f_code is already cleared at that point: assert((PyObject **)frame + frame->f_code->co_framesize == - tstate->frame_stack.top); + tstate->frame_stack.limit - tstate->frame_stack.free); tstate->recursion_remaining--; assert(frame->frame_obj == NULL || frame->frame_obj->f_frame == frame); assert(frame->owner == FRAME_OWNED_BY_THREAD); diff --git a/Python/pystate.c b/Python/pystate.c index 2d445531e9e7d0..23f88582965480 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -736,8 +736,8 @@ allocate_chunk(int size_in_bytes, _PyStackChunk* previous) return NULL; } res->previous = previous; - res->size = size_in_bytes; - res->top = 0; + res->size_in_bytes = size_in_bytes; + res->free = 0; return res; } @@ -1091,7 +1091,7 @@ tstate_delete_common(PyThreadState *tstate, { PyThread_tss_set(&gilstate->autoTSSkey, NULL); } - _PyFrameStack_Free(&tstate->frame_stack); + _PyFrameStack_Clear(&tstate->frame_stack); } static void @@ -2158,9 +2158,43 @@ _Py_GetConfig(void) #define MINIMUM_OVERHEAD 1000 +static PyObject ** +stack_chunk_limit(_PyStackChunk *chunk) +{ + return (PyObject **)(((char *)chunk) + chunk->size_in_bytes); +} + +static bool +framechunk_is_consistent(_PyStackChunk *chunk) +{ + bool ok = chunk->size_in_bytes > MINIMUM_OVERHEAD; + int size = stack_chunk_limit(chunk) - &chunk->data[0]; + ok &= chunk->free >= 0; + ok &= chunk->free < size; + return ok; +} + +static bool +framestack_is_consistent(_PyFrameStack *fs) +{ + bool ok = fs->free >= 0; + ok &= fs->free < fs->chunk_size; + if (fs->current_chunk) { + ok &= (char *)fs->limit == ((char *)fs->current_chunk) + fs->current_chunk->size_in_bytes; + } + _PyStackChunk *chunk = fs->current_chunk; + while (chunk) { + ok &= framechunk_is_consistent(chunk); + chunk = chunk->previous; + } + return ok; +} + + static PyObject ** push_chunk(_PyFrameStack *frame_stack, int size) { + assert(framestack_is_consistent(frame_stack)); int allocate_size = frame_stack->chunk_size; while (allocate_size < (int)sizeof(PyObject*)*(size + MINIMUM_OVERHEAD)) { allocate_size *= 2; @@ -2170,16 +2204,17 @@ push_chunk(_PyFrameStack *frame_stack, int size) return NULL; } if (frame_stack->current_chunk) { - frame_stack->current_chunk->top = frame_stack->top - - &frame_stack->current_chunk->data[0]; + frame_stack->current_chunk->free = frame_stack->free; } frame_stack->current_chunk = new; - frame_stack->limit = (PyObject **)(((char *)new) + allocate_size); + frame_stack->limit = stack_chunk_limit(new); // When new is the "root" chunk (i.e. new->previous == NULL), we can keep // _PyThreadState_PopFrame from freeing it later by "skipping" over the // first element: PyObject **res = &new->data[new->previous == NULL]; - frame_stack->top = res + size; + PyObject **top = res + size; + frame_stack->free = frame_stack->limit - top; + assert(framestack_is_consistent(frame_stack)); return res; } @@ -2188,40 +2223,46 @@ _PyThreadState_PushFrame(PyThreadState *tstate, size_t size) { assert(size < INT_MAX/sizeof(PyObject *)); if (_PyThreadState_HasStackSpace(tstate, (int)size)) { - _PyInterpreterFrame *res = (_PyInterpreterFrame *)tstate->frame_stack.top; - tstate->frame_stack.top += size; + _PyInterpreterFrame *res = (_PyInterpreterFrame *)(tstate->frame_stack.limit - tstate->frame_stack.free); + tstate->frame_stack.free -= size; return res; } return (_PyInterpreterFrame *)push_chunk(&tstate->frame_stack, (int)size); } +void +pop_chunk(PyThreadState *tstate) +{ + _PyStackChunk *chunk = tstate->frame_stack.current_chunk; + _PyStackChunk *previous = chunk->previous; + // push_chunk ensures that the root chunk is never popped: + assert(previous); + tstate->frame_stack.free = previous->free; + tstate->frame_stack.current_chunk = previous; + tstate->frame_stack.limit = (PyObject **)(((char *)previous) + previous->size_in_bytes); + _PyObject_VirtualFree(chunk, chunk->size_in_bytes); +} + void _PyThreadState_PopFrame(PyThreadState *tstate, _PyInterpreterFrame * frame) { + assert(framestack_is_consistent(&tstate->frame_stack)); assert(tstate->frame_stack.current_chunk); PyObject **base = (PyObject **)frame; - if (base == &tstate->frame_stack.current_chunk->data[0]) { - _PyStackChunk *chunk = tstate->frame_stack.current_chunk; - _PyStackChunk *previous = chunk->previous; - // push_chunk ensures that the root chunk is never popped: - assert(previous); - tstate->frame_stack.top = &previous->data[previous->top]; - tstate->frame_stack.current_chunk = previous; - _PyObject_VirtualFree(chunk, chunk->size); - tstate->frame_stack.limit = (PyObject **)(((char *)previous) + previous->size); - } - else { - assert(tstate->frame_stack.top); - assert(tstate->frame_stack.top >= base); - tstate->frame_stack.top = base; + if (base != &tstate->frame_stack.current_chunk->data[0]) { + assert(tstate->frame_stack.limit); + tstate->frame_stack.free = tstate->frame_stack.limit - base; + assert(tstate->frame_stack.free > 0); + return; } + pop_chunk(tstate); } void _PyFrameStack_Init(_PyFrameStack *fs, int chunk_size) { fs->chunk_size = chunk_size; fs->current_chunk = NULL; - fs->top = NULL; + fs->free = 0; fs->limit = NULL; } @@ -2233,13 +2274,14 @@ void _PyFrameStack_Swap(_PyFrameStack *fs) tstate->frame_stack = temp; } -void _PyFrameStack_Free(_PyFrameStack *fs) +void _PyFrameStack_Clear(_PyFrameStack *fs) { _PyStackChunk *chunk = fs->current_chunk; fs->current_chunk = NULL; + fs->free = 0; while (chunk != NULL) { _PyStackChunk *previous = chunk->previous; - _PyObject_VirtualFree(chunk, chunk->size); + _PyObject_VirtualFree(chunk, chunk->size_in_bytes); chunk = previous; } }