From 5f14311334cfeefc5efda0e3ea9ae10a0773b756 Mon Sep 17 00:00:00 2001 From: Nikita Sobolev Date: Fri, 7 Oct 2022 19:53:42 +0300 Subject: [PATCH 1/2] gh-94808: Cover `%p` in `PyUnicode_FromFormat` (GH-96677) Co-authored-by: Jelle Zijlstra (cherry picked from commit 72c166add89a0cd992d66f75ce94eee5eb675a99) Co-authored-by: Nikita Sobolev --- Lib/test/test_unicode.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index 90bd75f550dff6..38214308451cc2 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -2809,6 +2809,25 @@ def check_format(expected, format, *args): check_format('repr=abc', b'repr=%V', 'abc', b'xyz') + # test %p + # We cannot test the exact result, + # because it returns a hex representation of a C pointer, + # which is going to be different each time. But, we can test the format. + p_format_regex = r'^0x[a-zA-Z0-9]{8,}$' + p_format1 = PyUnicode_FromFormat(b'%p', 'abc') + self.assertIsInstance(p_format1, str) + self.assertRegex(p_format1, p_format_regex) + + p_format2 = PyUnicode_FromFormat(b'%p %p', '123456', b'xyz') + self.assertIsInstance(p_format2, str) + self.assertRegex(p_format2, + r'0x[a-zA-Z0-9]{8,} 0x[a-zA-Z0-9]{8,}') + + # Extra args are ignored: + p_format3 = PyUnicode_FromFormat(b'%p', '123456', None, b'xyz') + self.assertIsInstance(p_format3, str) + self.assertRegex(p_format3, p_format_regex) + # Test string decode from parameter of %s using utf-8. # b'\xe4\xba\xba\xe6\xb0\x91' is utf-8 encoded byte sequence of # '\u4eba\u6c11' From fdea9098d8d530b77520bbbc1656b930e126d7d6 Mon Sep 17 00:00:00 2001 From: Jelle Zijlstra Date: Fri, 7 Oct 2022 11:05:16 -0700 Subject: [PATCH 2/2] Apply suggestions from code review --- Lib/test/test_unicode.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index 38214308451cc2..9b0e4b230506a8 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -2813,7 +2813,7 @@ def check_format(expected, format, *args): # We cannot test the exact result, # because it returns a hex representation of a C pointer, # which is going to be different each time. But, we can test the format. - p_format_regex = r'^0x[a-zA-Z0-9]{8,}$' + p_format_regex = r'^0x[a-zA-Z0-9]{3,}$' p_format1 = PyUnicode_FromFormat(b'%p', 'abc') self.assertIsInstance(p_format1, str) self.assertRegex(p_format1, p_format_regex) @@ -2821,7 +2821,7 @@ def check_format(expected, format, *args): p_format2 = PyUnicode_FromFormat(b'%p %p', '123456', b'xyz') self.assertIsInstance(p_format2, str) self.assertRegex(p_format2, - r'0x[a-zA-Z0-9]{8,} 0x[a-zA-Z0-9]{8,}') + r'0x[a-zA-Z0-9]{3,} 0x[a-zA-Z0-9]{3,}') # Extra args are ignored: p_format3 = PyUnicode_FromFormat(b'%p', '123456', None, b'xyz')