From 5d5a1751590cfe5052cd3fda876647f9fb1a1d6f Mon Sep 17 00:00:00 2001 From: Georges Toth Date: Fri, 4 Sep 2020 18:15:19 +0200 Subject: [PATCH 1/3] bpo-30681: Support invalid date format or value in email Date header Re-submitting an older PR which was abandoned but still relevant. --- Doc/library/email.errors.rst | 3 +++ Doc/library/email.utils.rst | 5 ++++- Lib/email/errors.py | 3 +++ Lib/email/headerregistry.py | 9 ++++++++- Lib/test/test_email/test_headerregistry.py | 16 ++++++++++++++++ Lib/test/test_email/test_inversion.py | 8 ++++++++ Lib/test/test_email/test_utils.py | 13 +++++++++++++ .../2020-09-04-17-33-04.bpo-30681.LR4fnY.rst | 2 ++ 8 files changed, 57 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2020-09-04-17-33-04.bpo-30681.LR4fnY.rst diff --git a/Doc/library/email.errors.rst b/Doc/library/email.errors.rst index f4b9f52509689e..7a77640571cb1e 100644 --- a/Doc/library/email.errors.rst +++ b/Doc/library/email.errors.rst @@ -112,3 +112,6 @@ All defect classes are subclassed from :class:`email.errors.MessageDefect`. * :class:`InvalidBase64LengthDefect` -- When decoding a block of base64 encoded bytes, the number of non-padding base64 characters was invalid (1 more than a multiple of 4). The encoded block was kept as-is. + +* :class:`InvalidDateDefect` -- When decoding an invalid or unparsable date field. + The original value is kept as-is. \ No newline at end of file diff --git a/Doc/library/email.utils.rst b/Doc/library/email.utils.rst index 4d0e920eb0ad29..c166a5498b1f4e 100644 --- a/Doc/library/email.utils.rst +++ b/Doc/library/email.utils.rst @@ -124,7 +124,10 @@ of the new API. .. function:: parsedate_to_datetime(date) The inverse of :func:`format_datetime`. Performs the same function as - :func:`parsedate`, but on success returns a :mod:`~datetime.datetime`. If + :func:`parsedate`, but on success returns a :mod:`~datetime.datetime`; + otherwise ``None`` may be returned if parsing fails, or a ``ValueError`` + raised if *date* contains an invalid value such as an hour greater than + 23 or a timezone offset not between -24 and 24 hours. If the input date has a timezone of ``-0000``, the ``datetime`` will be a naive ``datetime``, and if the date is conforming to the RFCs it will represent a time in UTC but with no indication of the actual source timezone of the diff --git a/Lib/email/errors.py b/Lib/email/errors.py index d28a6800104bab..1d258c34fc9d4a 100644 --- a/Lib/email/errors.py +++ b/Lib/email/errors.py @@ -108,3 +108,6 @@ class NonASCIILocalPartDefect(HeaderDefect): """local_part contains non-ASCII characters""" # This defect only occurs during unicode parsing, not when # parsing messages decoded from binary. + +class InvalidDateDefect(HeaderDefect): + """Header has unparseable or invalid date""" diff --git a/Lib/email/headerregistry.py b/Lib/email/headerregistry.py index 5d84fc0d82d0b0..92b37fc3a0dc01 100644 --- a/Lib/email/headerregistry.py +++ b/Lib/email/headerregistry.py @@ -302,7 +302,14 @@ def parse(cls, value, kwds): kwds['parse_tree'] = parser.TokenList() return if isinstance(value, str): - value = utils.parsedate_to_datetime(value) + kwds['decoded'] = value + try: + value = utils.parsedate_to_datetime(value) + except (ValueError, TypeError): + kwds['defects'].append(errors.InvalidDateDefect('Invalid date value or format')) + kwds['datetime'] = None + kwds['parse_tree'] = parser.TokenList() + return kwds['datetime'] = value kwds['decoded'] = utils.format_datetime(kwds['datetime']) kwds['parse_tree'] = cls.value_parser(kwds['decoded']) diff --git a/Lib/test/test_email/test_headerregistry.py b/Lib/test/test_email/test_headerregistry.py index 68bbc9561c4aff..59fcd932e0ec4a 100644 --- a/Lib/test/test_email/test_headerregistry.py +++ b/Lib/test/test_email/test_headerregistry.py @@ -204,6 +204,22 @@ def test_no_value_is_defect(self): self.assertEqual(len(h.defects), 1) self.assertIsInstance(h.defects[0], errors.HeaderMissingRequiredValue) + def test_invalid_date_format(self): + s = 'Not a date header' + h = self.make_header('date', s) + self.assertEqual(h, s) + self.assertIsNone(h.datetime) + self.assertEqual(len(h.defects), 1) + self.assertIsInstance(h.defects[0], errors.InvalidDateDefect) + + def test_invalid_date_value(self): + s = 'Tue, 06 Jun 2017 27:39:33 +0600' + h = self.make_header('date', s) + self.assertEqual(h, s) + self.assertIsNone(h.datetime) + self.assertEqual(len(h.defects), 1) + self.assertIsInstance(h.defects[0], errors.InvalidDateDefect) + def test_datetime_read_only(self): h = self.make_header('date', self.datestring) with self.assertRaises(AttributeError): diff --git a/Lib/test/test_email/test_inversion.py b/Lib/test/test_email/test_inversion.py index 8e8d67641b8943..7bd7f2a72067ad 100644 --- a/Lib/test/test_email/test_inversion.py +++ b/Lib/test/test_email/test_inversion.py @@ -46,6 +46,14 @@ def msg_as_input(self, msg): foo """),), + 'header_with_invalid_date': (dedent(b"""\ + Date: Tue, 06 Jun 2017 27:39:33 +0600 + From: abc@xyz.com + Subject: timezones + + How do they work even? + """),), + } payload_params = { diff --git a/Lib/test/test_email/test_utils.py b/Lib/test/test_email/test_utils.py index 4e3c3f3a195fc4..d2c0efb2a60ed4 100644 --- a/Lib/test/test_email/test_utils.py +++ b/Lib/test/test_email/test_utils.py @@ -48,6 +48,19 @@ def test_parsedate_to_datetime_naive(self): utils.parsedate_to_datetime(self.datestring + ' -0000'), self.naive_dt) + def test_parsedate_to_datetime_with_invalid_raises_typeerror(self): + invalid_dates = ['', '0', 'A Complete Waste of Time'] + for dtstr in invalid_dates: + with self.subTest(dtstr=dtstr): + self.assertRaises(TypeError, utils.parsedate_to_datetime, dtstr) + + def test_parsedate_to_datetime_with_invalid_raises_valueerror(self): + invalid_dates = ['Tue, 06 Jun 2017 27:39:33 +0600', + 'Tue, 06 Jun 2017 07:39:33 +2600', + 'Tue, 06 Jun 2017 27:39:33'] + for dtstr in invalid_dates: + with self.subTest(dtstr=dtstr): + self.assertRaises(ValueError, utils.parsedate_to_datetime, dtstr) class LocaltimeTests(unittest.TestCase): diff --git a/Misc/NEWS.d/next/Library/2020-09-04-17-33-04.bpo-30681.LR4fnY.rst b/Misc/NEWS.d/next/Library/2020-09-04-17-33-04.bpo-30681.LR4fnY.rst new file mode 100644 index 00000000000000..83830e343da66f --- /dev/null +++ b/Misc/NEWS.d/next/Library/2020-09-04-17-33-04.bpo-30681.LR4fnY.rst @@ -0,0 +1,2 @@ +Handle exceptions caused by unparseable date headers when using email +"default" policy. Patch by Tim Bell, Georges Toth From bc99204e7504c5380e2eb8be23bb110977b4c9b0 Mon Sep 17 00:00:00 2001 From: Georges Toth Date: Fri, 23 Oct 2020 10:48:45 +0200 Subject: [PATCH 2/3] Fix documentation for function *parsedate_to_datetime*; mention that it raises a TypeError on invalid input instead of it returning None --- Doc/library/email.utils.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Doc/library/email.utils.rst b/Doc/library/email.utils.rst index c166a5498b1f4e..a1f74b38baf4e2 100644 --- a/Doc/library/email.utils.rst +++ b/Doc/library/email.utils.rst @@ -125,9 +125,9 @@ of the new API. The inverse of :func:`format_datetime`. Performs the same function as :func:`parsedate`, but on success returns a :mod:`~datetime.datetime`; - otherwise ``None`` may be returned if parsing fails, or a ``ValueError`` - raised if *date* contains an invalid value such as an hour greater than - 23 or a timezone offset not between -24 and 24 hours. If + otherwise ``TypeError`` is raised if parsing fails because of an invalid input, + or a ``ValueError`` raised if *date* contains an invalid value such as an hour + greater than 23 or a timezone offset not between -24 and 24 hours. If the input date has a timezone of ``-0000``, the ``datetime`` will be a naive ``datetime``, and if the date is conforming to the RFCs it will represent a time in UTC but with no indication of the actual source timezone of the From 764b1dc18fbcec95cd3faeb7ac93b8bde22ca52b Mon Sep 17 00:00:00 2001 From: Georges Toth Date: Fri, 23 Oct 2020 22:54:58 +0200 Subject: [PATCH 3/3] Implement comments from code review - _parsedate_tz should explicitely return None - parsedate_to_datetime should check for None as return value by _parsedate_tz and in that case raise ValueError - Fix documentation and tests accordingly --- Doc/library/email.utils.rst | 7 +++---- Lib/email/_parseaddr.py | 2 +- Lib/email/headerregistry.py | 2 +- Lib/email/utils.py | 5 ++++- Lib/test/test_email/test_utils.py | 11 ++++------- 5 files changed, 13 insertions(+), 14 deletions(-) diff --git a/Doc/library/email.utils.rst b/Doc/library/email.utils.rst index a1f74b38baf4e2..0e266b6a45782a 100644 --- a/Doc/library/email.utils.rst +++ b/Doc/library/email.utils.rst @@ -125,10 +125,9 @@ of the new API. The inverse of :func:`format_datetime`. Performs the same function as :func:`parsedate`, but on success returns a :mod:`~datetime.datetime`; - otherwise ``TypeError`` is raised if parsing fails because of an invalid input, - or a ``ValueError`` raised if *date* contains an invalid value such as an hour - greater than 23 or a timezone offset not between -24 and 24 hours. If - the input date has a timezone of ``-0000``, the ``datetime`` will be a naive + otherwise ``ValueError`` is raised if *date* contains an invalid value such + as an hour greater than 23 or a timezone offset not between -24 and 24 hours. + If the input date has a timezone of ``-0000``, the ``datetime`` will be a naive ``datetime``, and if the date is conforming to the RFCs it will represent a time in UTC but with no indication of the actual source timezone of the message the date comes from. If the input date has any other valid timezone diff --git a/Lib/email/_parseaddr.py b/Lib/email/_parseaddr.py index 41ff6f8c000d57..4d27f87974b20d 100644 --- a/Lib/email/_parseaddr.py +++ b/Lib/email/_parseaddr.py @@ -65,7 +65,7 @@ def _parsedate_tz(data): """ if not data: - return + return None data = data.split() # The FWS after the comma after the day-of-week is optional, so search and # adjust for this. diff --git a/Lib/email/headerregistry.py b/Lib/email/headerregistry.py index 92b37fc3a0dc01..d8613ebf24e613 100644 --- a/Lib/email/headerregistry.py +++ b/Lib/email/headerregistry.py @@ -305,7 +305,7 @@ def parse(cls, value, kwds): kwds['decoded'] = value try: value = utils.parsedate_to_datetime(value) - except (ValueError, TypeError): + except ValueError: kwds['defects'].append(errors.InvalidDateDefect('Invalid date value or format')) kwds['datetime'] = None kwds['parse_tree'] = parser.TokenList() diff --git a/Lib/email/utils.py b/Lib/email/utils.py index 1a7719dbc4898f..a8e46a761bf922 100644 --- a/Lib/email/utils.py +++ b/Lib/email/utils.py @@ -195,7 +195,10 @@ def make_msgid(idstring=None, domain=None): def parsedate_to_datetime(data): - *dtuple, tz = _parsedate_tz(data) + parsed_date_tz = _parsedate_tz(data) + if parsed_date_tz is None: + raise ValueError('Invalid date value or format "%s"' % str(data)) + *dtuple, tz = parsed_date_tz if tz is None: return datetime.datetime(*dtuple[:6]) return datetime.datetime(*dtuple[:6], diff --git a/Lib/test/test_email/test_utils.py b/Lib/test/test_email/test_utils.py index d2c0efb2a60ed4..e3d3eaebc93693 100644 --- a/Lib/test/test_email/test_utils.py +++ b/Lib/test/test_email/test_utils.py @@ -48,14 +48,11 @@ def test_parsedate_to_datetime_naive(self): utils.parsedate_to_datetime(self.datestring + ' -0000'), self.naive_dt) - def test_parsedate_to_datetime_with_invalid_raises_typeerror(self): - invalid_dates = ['', '0', 'A Complete Waste of Time'] - for dtstr in invalid_dates: - with self.subTest(dtstr=dtstr): - self.assertRaises(TypeError, utils.parsedate_to_datetime, dtstr) - def test_parsedate_to_datetime_with_invalid_raises_valueerror(self): - invalid_dates = ['Tue, 06 Jun 2017 27:39:33 +0600', + invalid_dates = ['', + '0', + 'A Complete Waste of Time' + 'Tue, 06 Jun 2017 27:39:33 +0600', 'Tue, 06 Jun 2017 07:39:33 +2600', 'Tue, 06 Jun 2017 27:39:33'] for dtstr in invalid_dates: