From 54601ba0e87889beb6ca83264061450e8aeb212c Mon Sep 17 00:00:00 2001 From: animalize Date: Sat, 2 Mar 2019 11:43:24 +0800 Subject: [PATCH] re module, allocate SRE_REPEAT in a memory pool Fix memory leak when a match is terminated by a signal or memory allocation failure. --- .../2019-03-03-17-39-55.bpo-23689.cmqYbo.rst | 2 + Modules/_sre.c | 95 ++++++++++++++++++- Modules/sre.h | 6 ++ Modules/sre_lib.h | 4 +- 4 files changed, 104 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2019-03-03-17-39-55.bpo-23689.cmqYbo.rst diff --git a/Misc/NEWS.d/next/Library/2019-03-03-17-39-55.bpo-23689.cmqYbo.rst b/Misc/NEWS.d/next/Library/2019-03-03-17-39-55.bpo-23689.cmqYbo.rst new file mode 100644 index 00000000000000..30f083fa9176f3 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2019-03-03-17-39-55.bpo-23689.cmqYbo.rst @@ -0,0 +1,2 @@ +re module: fix memory leak when a match is terminated by a signal or memory +allocation failure. Patch by Ma Lin. diff --git a/Modules/_sre.c b/Modules/_sre.c index 5cea7562f2807a..4d36a2abd9ff5f 100644 --- a/Modules/_sre.c +++ b/Modules/_sre.c @@ -223,6 +223,89 @@ data_stack_grow(SRE_STATE* state, Py_ssize_t size) return 0; } + +/* memory pool functions for SRE_REPEAT, this can avoid memory + leak when SRE(match) function terminates abruptly. + state->pool_used_repeats is a doubled linked list, so that we + can remove a SRE_REPEAT node from it. + state->pool_unused_repeats is a single linked list, we put/get + node at the head. */ + +static SRE_REPEAT* +mempool_repeat_malloc(SRE_STATE *state) +{ + SRE_REPEAT *repeat, *temp; + + if (state->pool_unused_repeats) { + /* unused pool has slot */ + repeat = state->pool_unused_repeats; + + /* remove from unused pool */ + state->pool_unused_repeats = repeat->mem_next; + } else { + repeat = PyObject_MALLOC(sizeof(SRE_REPEAT)); + if (!repeat) + return NULL; + } + + /* add to used pool */ + temp = state->pool_used_repeats; + if (temp) + temp->mem_prev = repeat; + repeat->mem_prev = NULL; + repeat->mem_next = temp; + state->pool_used_repeats = repeat; + + return repeat; +} + +static void +mempool_repeat_free(SRE_STATE *state, SRE_REPEAT *repeat) +{ + SRE_REPEAT *prev, *next; + + /* remove from used pool */ + prev = repeat->mem_prev; + next = repeat->mem_next; + + if (prev) { + prev->mem_next = next; + } else { + state->pool_used_repeats = next; + } + if (next) + next->mem_prev = prev; + + /* add to unused pool */ + repeat->mem_next = state->pool_unused_repeats; + state->pool_unused_repeats = repeat; +} + +static void +mempool_repeat_clear(SRE_STATE *state) +{ + SRE_REPEAT *next, *temp; + + /* clear used pool */ + next = state->pool_used_repeats; + while (next) { + temp = next; + next = temp->mem_next; + PyObject_FREE(temp); + } + state->pool_used_repeats = NULL; + + /* clear unused pool */ + next = state->pool_unused_repeats; + while (next) { + temp = next; + next = temp->mem_next; + PyObject_FREE(temp); + } + state->pool_unused_repeats = NULL; +} + + /* generate 8-bit version */ #define SRE_CHAR Py_UCS1 @@ -348,7 +431,12 @@ state_reset(SRE_STATE* state) state->repeat = NULL; - data_stack_dealloc(state); + /* reuse stack if stack_size <= 16 KiB, + to avoid frequent memory alloc/free. */ + if (state->data_stack_size <= 16*1024) + state->data_stack_base = 0; + else + data_stack_dealloc(state); } static void* @@ -442,6 +530,10 @@ state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string, state->match_all = 0; state->must_advance = 0; + state->repeat = NULL; + state->pool_used_repeats = NULL; + state->pool_unused_repeats = NULL; + state->beginning = ptr; state->start = (void*) ((char*) ptr + start * state->charsize); @@ -470,6 +562,7 @@ state_fini(SRE_STATE* state) data_stack_dealloc(state); PyMem_Del(state->mark); state->mark = NULL; + mempool_repeat_clear(state); } /* calculate offset from start of string */ diff --git a/Modules/sre.h b/Modules/sre.h index a7284881457c3b..7621d1dd6bed9f 100644 --- a/Modules/sre.h +++ b/Modules/sre.h @@ -57,6 +57,9 @@ typedef struct SRE_REPEAT_T { SRE_CODE* pattern; /* points to REPEAT operator arguments */ void* last_ptr; /* helper to check for infinite loops */ struct SRE_REPEAT_T *prev; /* points to previous repeat context */ + /* for memory pool */ + struct SRE_REPEAT_T *mem_prev; + struct SRE_REPEAT_T *mem_next; } SRE_REPEAT; typedef struct { @@ -83,6 +86,9 @@ typedef struct { size_t data_stack_base; /* current repeat context */ SRE_REPEAT *repeat; + /* memory pool for SRE_REPEAT object */ + SRE_REPEAT *pool_used_repeats; + SRE_REPEAT *pool_unused_repeats; } SRE_STATE; typedef struct { diff --git a/Modules/sre_lib.h b/Modules/sre_lib.h index 437ab43f434a62..82bddfddd52cad 100644 --- a/Modules/sre_lib.h +++ b/Modules/sre_lib.h @@ -988,7 +988,7 @@ SRE(match)(SRE_STATE* state, SRE_CODE* pattern, int toplevel) ctx->pattern[1], ctx->pattern[2])); /* install new repeat context */ - ctx->u.rep = (SRE_REPEAT*) PyObject_MALLOC(sizeof(*ctx->u.rep)); + ctx->u.rep = mempool_repeat_malloc(state); if (!ctx->u.rep) { PyErr_NoMemory(); RETURN_FAILURE; @@ -1002,7 +1002,7 @@ SRE(match)(SRE_STATE* state, SRE_CODE* pattern, int toplevel) state->ptr = ctx->ptr; DO_JUMP(JUMP_REPEAT, jump_repeat, ctx->pattern+ctx->pattern[0]); state->repeat = ctx->u.rep->prev; - PyObject_FREE(ctx->u.rep); + mempool_repeat_free(state, ctx->u.rep); if (ret) { RETURN_ON_ERROR(ret);