diff --git a/Doc/library/pyexpat.rst b/Doc/library/pyexpat.rst index c0e9999f4b1270..1efdb36a4b379b 100644 --- a/Doc/library/pyexpat.rst +++ b/Doc/library/pyexpat.rst @@ -316,6 +316,15 @@ just past the last parse event (regardless of whether there was an associated callback). +.. attribute:: xmlparser.CurrentByteCount + + Number of bytes in the current event. ``0`` if the event is for the end tag + event for *empty-element* tags or is inside a reference to an internal + entity. + + .. versionadded:: 3.14 + + .. attribute:: xmlparser.CurrentByteIndex Current byte index in the parser input. diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py index 1d56ccd71cf962..d1935aaa764a19 100644 --- a/Lib/test/test_pyexpat.py +++ b/Lib/test/test_pyexpat.py @@ -506,6 +506,7 @@ def EndElementHandler(self, name): def check_pos(self, event): pos = (event, self.parser.CurrentByteIndex, + self.parser.CurrentByteCount, self.parser.CurrentLineNumber, self.parser.CurrentColumnNumber) self.assertTrue(self.upto < len(self.expected_list), @@ -520,8 +521,8 @@ def test(self): self.parser.StartElementHandler = self.StartElementHandler self.parser.EndElementHandler = self.EndElementHandler self.upto = 0 - self.expected_list = [('s', 0, 1, 0), ('s', 5, 2, 1), ('s', 11, 3, 2), - ('e', 15, 3, 6), ('e', 17, 4, 1), ('e', 22, 5, 0)] + self.expected_list = [('s', 0, 3, 1, 0), ('s', 5, 3, 2, 1), ('s', 11, 4, 3, 2), + ('e', 15, 0, 3, 6), ('e', 17, 4, 4, 1), ('e', 22, 4, 5, 0)] xml = b'\n \n \n \n' self.parser.Parse(xml, True) diff --git a/Misc/ACKS b/Misc/ACKS index b031eb7c11f73f..f02dd1b5f3b6a7 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -882,6 +882,7 @@ Muhammad Jehanzeb Drew Jenkins Flemming Kjær Jensen Philip H. Jensen +Jesper Jensen Philip Jenvey MunSic Jeong Chris Jerdonek diff --git a/Misc/NEWS.d/next/Library/2024-09-11-16-40-12.gh-issue-123963.TzzIY8.rst b/Misc/NEWS.d/next/Library/2024-09-11-16-40-12.gh-issue-123963.TzzIY8.rst new file mode 100644 index 00000000000000..2b7057d72e025c --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-09-11-16-40-12.gh-issue-123963.TzzIY8.rst @@ -0,0 +1,3 @@ +Expose the :attr:`xmlparser.CurrentByteCount` field for :mod:`Expat XML +` parsers. +Patch by Jesper Jensen. diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c index 9733bc34f7c80a..c4ba8a92b8d92c 100644 --- a/Modules/pyexpat.c +++ b/Modules/pyexpat.c @@ -1349,6 +1349,7 @@ INT_GETTER(ErrorByteIndex) INT_GETTER(CurrentLineNumber) INT_GETTER(CurrentColumnNumber) INT_GETTER(CurrentByteIndex) +INT_GETTER(CurrentByteCount) #undef INT_GETTER @@ -1529,6 +1530,7 @@ static PyGetSetDef xmlparse_getsetlist[] = { XMLPARSE_GETTER_DEF(CurrentLineNumber) XMLPARSE_GETTER_DEF(CurrentColumnNumber) XMLPARSE_GETTER_DEF(CurrentByteIndex) + XMLPARSE_GETTER_DEF(CurrentByteCount) XMLPARSE_GETTER_SETTER_DEF(buffer_size) XMLPARSE_GETTER_SETTER_DEF(buffer_text) XMLPARSE_GETTER_DEF(buffer_used)