From 2e39aa4c83519130853e1d80f42b3e483416e250 Mon Sep 17 00:00:00 2001 From: Yannick Jadoul Date: Sun, 27 Dec 2020 19:22:46 +0100 Subject: [PATCH 1/9] Plug leaking function_record objects when exceptions are thrown --- include/pybind11/pybind11.h | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/include/pybind11/pybind11.h b/include/pybind11/pybind11.h index 82003a9092..1c7345eada 100644 --- a/include/pybind11/pybind11.h +++ b/include/pybind11/pybind11.h @@ -115,8 +115,8 @@ class cpp_function : public function { protected: /// Space optimization: don't inline this frequently instantiated fragment - PYBIND11_NOINLINE detail::function_record *make_function_record() { - return new detail::function_record(); + PYBIND11_NOINLINE std::unique_ptr make_function_record() { + return std::unique_ptr(new detail::function_record()); } /// Special internal constructor for functors, lambda functions, etc. @@ -126,7 +126,8 @@ class cpp_function : public function { struct capture { remove_reference_t f; }; /* Store the function including any extra state it might have (e.g. a lambda capture object) */ - auto rec = make_function_record(); + auto unique_rec = make_function_record(); + auto rec = unique_rec.get(); /* Store the capture object directly in the function record if there is enough space */ if (sizeof(capture) <= sizeof(rec->data)) { @@ -207,7 +208,7 @@ class cpp_function : public function { PYBIND11_DESCR_CONSTEXPR auto types = decltype(signature)::types(); /* Register the function with Python from generic (non-templated) code */ - initialize_generic(rec, signature.text, types.data(), sizeof...(Args)); + initialize_generic(std::move(unique_rec), signature.text, types.data(), sizeof...(Args)); if (cast_in::has_args) rec->has_args = true; if (cast_in::has_kwargs) rec->has_kwargs = true; @@ -224,8 +225,9 @@ class cpp_function : public function { } /// Register a function call with Python (generic non-templated code goes here) - void initialize_generic(detail::function_record *rec, const char *text, + void initialize_generic(std::unique_ptr &&unique_rec, const char *text, const std::type_info *const *types, size_t args) { + auto rec = unique_rec.get(); /* Create copies of all referenced C-style strings */ rec->name = strdup(rec->name ? rec->name : ""); @@ -356,7 +358,7 @@ class cpp_function : public function { rec->def->ml_meth = reinterpret_cast(reinterpret_cast(*dispatcher)); rec->def->ml_flags = METH_VARARGS | METH_KEYWORDS; - capsule rec_capsule(rec, [](void *ptr) { + capsule rec_capsule(unique_rec.release(), [](void *ptr) { destruct((detail::function_record *) ptr); }); @@ -393,13 +395,13 @@ class cpp_function : public function { chain_start = rec; rec->next = chain; auto rec_capsule = reinterpret_borrow(((PyCFunctionObject *) m_ptr)->m_self); - rec_capsule.set_pointer(rec); + rec_capsule.set_pointer(unique_rec.release()); } else { // Or end of chain (normal behavior) chain_start = chain; while (chain->next) chain = chain->next; - chain->next = rec; + chain->next = unique_rec.release(); } } From 39cf3b859eba41844d4df55616b3f94c23bbfb83 Mon Sep 17 00:00:00 2001 From: Yannick Jadoul Date: Sun, 27 Dec 2020 21:17:56 +0100 Subject: [PATCH 2/9] Plug leak of strdup'ed strings in function_record --- include/pybind11/pybind11.h | 39 +++++++++++++++++++++++++++++-------- 1 file changed, 31 insertions(+), 8 deletions(-) diff --git a/include/pybind11/pybind11.h b/include/pybind11/pybind11.h index 1c7345eada..b17f5d4707 100644 --- a/include/pybind11/pybind11.h +++ b/include/pybind11/pybind11.h @@ -224,21 +224,41 @@ class cpp_function : public function { } } + class strdup_guard { + public: + ~strdup_guard() { + for (auto s : strings) + std::free(s); + } + char *operator()(const char *s) { + auto t = strdup(s); + strings.emplace_back(t); + return t; + } + void release() { + strings.clear(); + } + private: + std::vector strings; + }; + /// Register a function call with Python (generic non-templated code goes here) void initialize_generic(std::unique_ptr &&unique_rec, const char *text, const std::type_info *const *types, size_t args) { auto rec = unique_rec.get(); + strdup_guard guarded_strdup; + /* Create copies of all referenced C-style strings */ - rec->name = strdup(rec->name ? rec->name : ""); - if (rec->doc) rec->doc = strdup(rec->doc); + rec->name = guarded_strdup(rec->name ? rec->name : ""); + if (rec->doc) rec->doc = guarded_strdup(rec->doc); for (auto &a: rec->args) { if (a.name) - a.name = strdup(a.name); + a.name = guarded_strdup(a.name); if (a.descr) - a.descr = strdup(a.descr); + a.descr = guarded_strdup(a.descr); else if (a.value) - a.descr = strdup(repr(a.value).cast().c_str()); + a.descr = guarded_strdup(repr(a.value).cast().c_str()); } rec->is_constructor = !strcmp(rec->name, "__init__") || !strcmp(rec->name, "__setstate__"); @@ -321,13 +341,13 @@ class cpp_function : public function { #if PY_MAJOR_VERSION < 3 if (strcmp(rec->name, "__next__") == 0) { std::free(rec->name); - rec->name = strdup("next"); + rec->name = guarded_strdup("next"); } else if (strcmp(rec->name, "__bool__") == 0) { std::free(rec->name); - rec->name = strdup("__nonzero__"); + rec->name = guarded_strdup("__nonzero__"); } #endif - rec->signature = strdup(signature.c_str()); + rec->signature = guarded_strdup(signature.c_str()); rec->args.shrink_to_fit(); rec->nargs = (std::uint16_t) args; @@ -361,6 +381,7 @@ class cpp_function : public function { capsule rec_capsule(unique_rec.release(), [](void *ptr) { destruct((detail::function_record *) ptr); }); + guarded_strdup.release(); object scope_module; if (rec->scope) { @@ -396,12 +417,14 @@ class cpp_function : public function { rec->next = chain; auto rec_capsule = reinterpret_borrow(((PyCFunctionObject *) m_ptr)->m_self); rec_capsule.set_pointer(unique_rec.release()); + guarded_strdup.release(); } else { // Or end of chain (normal behavior) chain_start = chain; while (chain->next) chain = chain->next; chain->next = unique_rec.release(); + guarded_strdup.release(); } } From 69b6cb5191ab9c6987ccea916f8882f8780496b9 Mon Sep 17 00:00:00 2001 From: Yannick Jadoul Date: Mon, 28 Dec 2020 23:33:16 +0100 Subject: [PATCH 3/9] Some extra comments about the function_record ownership dance --- include/pybind11/pybind11.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/include/pybind11/pybind11.h b/include/pybind11/pybind11.h index b17f5d4707..1c857bf99c 100644 --- a/include/pybind11/pybind11.h +++ b/include/pybind11/pybind11.h @@ -126,6 +126,7 @@ class cpp_function : public function { struct capture { remove_reference_t f; }; /* Store the function including any extra state it might have (e.g. a lambda capture object) */ + // The unique_ptr makes sure nothing is leaked in case of an exception. auto unique_rec = make_function_record(); auto rec = unique_rec.get(); @@ -208,6 +209,7 @@ class cpp_function : public function { PYBIND11_DESCR_CONSTEXPR auto types = decltype(signature)::types(); /* Register the function with Python from generic (non-templated) code */ + // Pass on the ownership over the `unique_rec` to `initialize_generic`. `rec` stays valid. initialize_generic(std::move(unique_rec), signature.text, types.data(), sizeof...(Args)); if (cast_in::has_args) rec->has_args = true; @@ -224,6 +226,8 @@ class cpp_function : public function { } } + // Utility class that keeps track of all duplicated strings, and cleans them up in its destructor, + // unless they are released. Basically a RAII-solution to deal with exceptions along the way. class strdup_guard { public: ~strdup_guard() { @@ -247,6 +251,11 @@ class cpp_function : public function { const std::type_info *const *types, size_t args) { auto rec = unique_rec.get(); + // Keep track of strdup'ed strings, and clean them up as long as the function's capsule + // has not taken ownership yet (when `unique_rec.release()` is called). + // Note: This cannot easily be fixed by a `unique_ptr` with custom deleter, because the strings + // are only referenced before strdup'ing. So only *after* the following block could `destruct` + // safely be called, but even then, `repr` could still throw in the middle of copying all strings. strdup_guard guarded_strdup; /* Create copies of all referenced C-style strings */ From 780ee72aa2bd2f1cf244fae1061da0c215102e66 Mon Sep 17 00:00:00 2001 From: Yannick Jadoul Date: Mon, 4 Jan 2021 13:50:06 +0100 Subject: [PATCH 4/9] Clean up the function_record better, in case of exceptions --- include/pybind11/pybind11.h | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/include/pybind11/pybind11.h b/include/pybind11/pybind11.h index 1c857bf99c..9ad9d72d29 100644 --- a/include/pybind11/pybind11.h +++ b/include/pybind11/pybind11.h @@ -114,9 +114,13 @@ class cpp_function : public function { object name() const { return attr("__name__"); } protected: + using unique_function_record = std::unique_ptr; + /// Space optimization: don't inline this frequently instantiated fragment - PYBIND11_NOINLINE std::unique_ptr make_function_record() { - return std::unique_ptr(new detail::function_record()); + PYBIND11_NOINLINE unique_function_record make_function_record() { + // `destruct(function_record)`: `initialize_generic` copies strings and + // takes care of cleaning up in case of exceptions. + return unique_function_record(new detail::function_record(), destruct); } /// Special internal constructor for functors, lambda functions, etc. @@ -247,7 +251,7 @@ class cpp_function : public function { }; /// Register a function call with Python (generic non-templated code goes here) - void initialize_generic(std::unique_ptr &&unique_rec, const char *text, + void initialize_generic(unique_function_record &&unique_rec, const char *text, const std::type_info *const *types, size_t args) { auto rec = unique_rec.get(); @@ -486,6 +490,7 @@ class cpp_function : public function { } /// When a cpp_function is GCed, release any memory allocated by pybind11 + template static void destruct(detail::function_record *rec) { // If on Python 3.9, check the interpreter "MICRO" (patch) version. // If this is running on 3.9.0, we have to work around a bug. @@ -497,14 +502,19 @@ class cpp_function : public function { detail::function_record *next = rec->next; if (rec->free_data) rec->free_data(rec); - std::free((char *) rec->name); - std::free((char *) rec->doc); - std::free((char *) rec->signature); - for (auto &arg: rec->args) { - std::free(const_cast(arg.name)); - std::free(const_cast(arg.descr)); - arg.value.dec_ref(); + // During initialization, these strings might not have been copied yet, + // so they cannot be freed. Once the function has been created, they can. + if (DeleteStrings) { + std::free((char *) rec->name); + std::free((char *) rec->doc); + std::free((char *) rec->signature); + for (auto &arg: rec->args) { + std::free(const_cast(arg.name)); + std::free(const_cast(arg.descr)); + } } + for (auto &arg: rec->args) + arg.value.dec_ref(); if (rec->def) { std::free(const_cast(rec->def->ml_doc)); // Python 3.9.0 decref's these in the wrong order; rec->def From 10aad45c95deca82ecc389b24a6414ce7493cba9 Mon Sep 17 00:00:00 2001 From: Yannick Jadoul Date: Mon, 4 Jan 2021 14:38:55 +0100 Subject: [PATCH 5/9] Demonstrate some extra function_record leaks --- tests/test_constants_and_functions.cpp | 15 +++++++++++++++ tests/test_constants_and_functions.py | 11 +++++++++++ 2 files changed, 26 insertions(+) diff --git a/tests/test_constants_and_functions.cpp b/tests/test_constants_and_functions.cpp index f607795593..cd3c8b7933 100644 --- a/tests/test_constants_and_functions.cpp +++ b/tests/test_constants_and_functions.cpp @@ -124,4 +124,19 @@ TEST_SUBMODULE(constants_and_functions, m) { m.def("f2", f2); m.def("f3", f3); m.def("f4", f4); + + // test_function_record_leaks + struct LargeCapture { + // This should always be enough to trigger the alternative branch + // where `sizeof(capture) > sizeof(rec->data)` + uint64_t zeros[10] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; + }; + m.def("register_large_capture_with_invalid_arguments", [](py::module_ m) { + LargeCapture capture; // VS 2015's MSVC is acting up if we create the array here + m.def("should_raise", [capture](int) { return capture.zeros[9] + 33; }, py::kw_only(), py::arg()); + }); + m.def("register_with_raising_repr", [](py::module_ m, py::object default_value) { + m.def("should_raise", [](int, int, py::object) { return 42; }, "some docstring", + py::arg_v("x", 42), py::arg_v("y", 42, ""), py::arg_v("z", default_value)); + }); } diff --git a/tests/test_constants_and_functions.py b/tests/test_constants_and_functions.py index b980ccf1cc..ff13bd0f26 100644 --- a/tests/test_constants_and_functions.py +++ b/tests/test_constants_and_functions.py @@ -40,3 +40,14 @@ def test_exception_specifiers(): assert m.f2(53) == 55 assert m.f3(86) == 89 assert m.f4(140) == 144 + + +def test_function_record_leaks(): + class RaisingRepr: + def __repr__(self): + raise RuntimeError("Surprise!") + + with pytest.raises(RuntimeError): + m.register_large_capture_with_invalid_arguments(m) + with pytest.raises(RuntimeError): + m.register_with_raising_repr(m, RaisingRepr()) From dade4c9db19d292c3462880ae28aaeafe13ff3f8 Mon Sep 17 00:00:00 2001 From: Yannick Jadoul Date: Mon, 4 Jan 2021 19:54:49 +0100 Subject: [PATCH 6/9] Change DeleteStrings template argument to free_strings runtime argument in destruct(function_record *) --- include/pybind11/pybind11.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/include/pybind11/pybind11.h b/include/pybind11/pybind11.h index 9ad9d72d29..19cb2f9f36 100644 --- a/include/pybind11/pybind11.h +++ b/include/pybind11/pybind11.h @@ -118,9 +118,10 @@ class cpp_function : public function { /// Space optimization: don't inline this frequently instantiated fragment PYBIND11_NOINLINE unique_function_record make_function_record() { - // `destruct(function_record)`: `initialize_generic` copies strings and - // takes care of cleaning up in case of exceptions. - return unique_function_record(new detail::function_record(), destruct); + // `destruct(function_record, false)`: `initialize_generic` copies strings and + // takes care of cleaning up in case of exceptions. So set `free_srings` to `false`. + auto destruct_no_free_strings = [](detail::function_record * rec) { destruct(rec, false); }; + return unique_function_record(new detail::function_record(), destruct_no_free_strings); } /// Special internal constructor for functors, lambda functions, etc. @@ -490,8 +491,7 @@ class cpp_function : public function { } /// When a cpp_function is GCed, release any memory allocated by pybind11 - template - static void destruct(detail::function_record *rec) { + static void destruct(detail::function_record *rec, bool free_strings = true) { // If on Python 3.9, check the interpreter "MICRO" (patch) version. // If this is running on 3.9.0, we have to work around a bug. #if !defined(PYPY_VERSION) && PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION == 9 @@ -504,7 +504,8 @@ class cpp_function : public function { rec->free_data(rec); // During initialization, these strings might not have been copied yet, // so they cannot be freed. Once the function has been created, they can. - if (DeleteStrings) { + // Check `make_function_record` for more details. + if (free_strings) { std::free((char *) rec->name); std::free((char *) rec->doc); std::free((char *) rec->signature); From 5d5367aa3baf0f2d30c83f96ea8c3168c6d0170a Mon Sep 17 00:00:00 2001 From: Yannick Jadoul Date: Mon, 4 Jan 2021 22:08:14 +0100 Subject: [PATCH 7/9] Zero-state unique_function_record deleter object --- include/pybind11/pybind11.h | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/include/pybind11/pybind11.h b/include/pybind11/pybind11.h index 19cb2f9f36..de49312581 100644 --- a/include/pybind11/pybind11.h +++ b/include/pybind11/pybind11.h @@ -114,14 +114,16 @@ class cpp_function : public function { object name() const { return attr("__name__"); } protected: - using unique_function_record = std::unique_ptr; + struct InitializingFunctionRecordDeleter { + // `destruct(function_record, false)`: `initialize_generic` copies strings and + // takes care of cleaning up in case of exceptions. So pass `false` to `free_strings`. + void operator()(detail::function_record * rec) { destruct(rec, false); } + }; + using unique_function_record = std::unique_ptr; /// Space optimization: don't inline this frequently instantiated fragment PYBIND11_NOINLINE unique_function_record make_function_record() { - // `destruct(function_record, false)`: `initialize_generic` copies strings and - // takes care of cleaning up in case of exceptions. So set `free_srings` to `false`. - auto destruct_no_free_strings = [](detail::function_record * rec) { destruct(rec, false); }; - return unique_function_record(new detail::function_record(), destruct_no_free_strings); + return unique_function_record(new detail::function_record()); } /// Special internal constructor for functors, lambda functions, etc. From 6578453c774651dad7c328beba6fcb1aca934f88 Mon Sep 17 00:00:00 2001 From: Yannick Jadoul Date: Wed, 13 Jan 2021 15:15:40 +0100 Subject: [PATCH 8/9] Clarify rvalue reference to unique_ptr parameter in initialize_generic --- include/pybind11/pybind11.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/pybind11/pybind11.h b/include/pybind11/pybind11.h index de49312581..21ae7d8bc1 100644 --- a/include/pybind11/pybind11.h +++ b/include/pybind11/pybind11.h @@ -256,6 +256,9 @@ class cpp_function : public function { /// Register a function call with Python (generic non-templated code goes here) void initialize_generic(unique_function_record &&unique_rec, const char *text, const std::type_info *const *types, size_t args) { + // Do NOT receive `unique_rec` by value. If this function fails to move out the unique_ptr, + // we do not want this to destuct the pointer. `initialize` (the caller) still relies on the + // pointee being alive after this call. Only move out if a `capsule` is going to keep it alive. auto rec = unique_rec.get(); // Keep track of strdup'ed strings, and clean them up as long as the function's capsule From 1c20521292c3005de50e97b66c5213dc63bd9c7a Mon Sep 17 00:00:00 2001 From: Yannick Jadoul Date: Wed, 13 Jan 2021 16:27:37 +0100 Subject: [PATCH 9/9] Use push_back with const char * instead of emplace_back --- include/pybind11/pybind11.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/pybind11/pybind11.h b/include/pybind11/pybind11.h index 21ae7d8bc1..ccbca43a69 100644 --- a/include/pybind11/pybind11.h +++ b/include/pybind11/pybind11.h @@ -243,7 +243,7 @@ class cpp_function : public function { } char *operator()(const char *s) { auto t = strdup(s); - strings.emplace_back(t); + strings.push_back(t); return t; } void release() {