Skip to content

Commit 6d3b511

Browse files
committed
Stop incorrectly RFC 2047 encoding non-ASCII email addresses
Email generators had been incorrectly flattening non-ASCII email addresses to RFC 2047 encoded-word format, leaving them undeliverable. (RFC 2047 prohibits use of encoded-word in an addr-spec.) This change raises a ValueError when attempting to flatten an EmailMessage with a non-ASCII addr-spec and a policy with utf8=False. (Exception: If the non-ASCII address originated from parsing a message, it will be flattened as originally parsed, without error.) Non-ASCII email addresses are supported when using a policy with utf8=True (such as email.policy.SMTPUTF8) under RFCs 6531 and 6532. Non-ASCII email address domains (but not localparts) can also be used with non-SMTPUTF8 policies by encoding the domain as an IDNA A-label. (The email package does not perform this encoding, because it cannot know whether the caller wants IDNA 2003, IDNA 2008, or some other variant such as UTS python#46.)
1 parent 46f5a4f commit 6d3b511

File tree

3 files changed

+75
-4
lines changed

3 files changed

+75
-4
lines changed

Doc/library/email.policy.rst

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -406,11 +406,17 @@ added matters. To illustrate::
406406
.. attribute:: utf8
407407

408408
If ``False``, follow :rfc:`5322`, supporting non-ASCII characters in
409-
headers by encoding them as "encoded words". If ``True``, follow
410-
:rfc:`6532` and use ``utf-8`` encoding for headers. Messages
409+
headers by encoding them as :rfc:`2047` "encoded words". If ``True``,
410+
follow :rfc:`6532` and use ``utf-8`` encoding for headers. Messages
411411
formatted in this way may be passed to SMTP servers that support
412412
the ``SMTPUTF8`` extension (:rfc:`6531`).
413413

414+
.. versionchanged:: 3.13
415+
If ``False``, the generator will raise a ``ValueError`` if any email
416+
address contains non-ASCII characters. To send to a non-ASCII domain
417+
with ``utf8=False``, encode the domain using the third-party
418+
:pypi:`idna` module or :mod:`encodings.idna`. No RFC allows a non-ASCII
419+
username ("localpart") in an email address with ``utf8=False``.
414420

415421
.. attribute:: refold_source
416422

Lib/email/_header_value_parser.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2829,6 +2829,17 @@ def _refold_parse_tree(parse_tree, *, policy):
28292829
_fold_mime_parameters(part, lines, maxlen, encoding)
28302830
continue
28312831

2832+
if want_encoding and part.token_type == 'addr-spec':
2833+
# RFC2047 forbids encoded-word in any part of an addr-spec.
2834+
if charset == 'unknown-8bit':
2835+
# Non-ASCII addr-spec came from parsed message; leave unchanged.
2836+
want_encoding = False
2837+
else:
2838+
raise ValueError(
2839+
"Non-ASCII address requires policy with utf8=True:"
2840+
" '{}'".format(part)
2841+
)
2842+
28322843
if want_encoding and not wrap_as_ew_blocked:
28332844
if not part.as_ew_allowed:
28342845
want_encoding = False

Lib/test/test_email/test_generator.py

Lines changed: 56 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import io
2+
import re
23
import textwrap
34
import unittest
45
from email import message_from_string, message_from_bytes
@@ -288,6 +289,28 @@ def test_keep_long_encoded_newlines(self):
288289
g.flatten(msg)
289290
self.assertEqual(s.getvalue(), self.typ(expected))
290291

292+
def test_non_ascii_addr_spec_raises(self):
293+
# RFC2047 encoded-word is not permitted in any part of an addr-spec.
294+
# (See also test_non_ascii_addr_spec_preserved below.)
295+
g = self.genclass(self.ioclass(), policy=self.policy.clone(utf8=False))
296+
cases = [
297+
298+
'wok@exàmple.com',
299+
'wők@exàmple.com',
300+
'"Name, for display" <wő[email protected]>',
301+
'Näyttönimi <wő[email protected]>',
302+
]
303+
for address in cases:
304+
with self.subTest(address=address):
305+
msg = EmailMessage()
306+
msg['To'] = address
307+
expected_error = re.escape(
308+
"Non-ASCII address requires policy with utf8=True:"
309+
" '{}'".format(msg['To'].addresses[0].addr_spec)
310+
)
311+
with self.assertRaisesRegex(ValueError, expected_error):
312+
g.flatten(msg)
313+
291314

292315
class TestGenerator(TestGeneratorBase, TestEmailBase):
293316

@@ -432,12 +455,12 @@ def test_cte_type_7bit_transforms_8bit_cte(self):
432455

433456
def test_smtputf8_policy(self):
434457
msg = EmailMessage()
435-
msg['From'] = "Páolo <főo@bar.com>"
458+
msg['From'] = "Páolo <főo@bàr.com>"
436459
msg['To'] = 'Dinsdale'
437460
msg['Subject'] = 'Nudge nudge, wink, wink \u1F609'
438461
msg.set_content("oh là là, know what I mean, know what I mean?")
439462
expected = textwrap.dedent("""\
440-
From: Páolo <főo@bar.com>
463+
From: Páolo <főo@bàr.com>
441464
To: Dinsdale
442465
Subject: Nudge nudge, wink, wink \u1F609
443466
Content-Type: text/plain; charset="utf-8"
@@ -472,6 +495,37 @@ def test_smtp_policy(self):
472495
g.flatten(msg)
473496
self.assertEqual(s.getvalue(), expected)
474497

498+
def test_non_ascii_addr_spec_preserved(self):
499+
# A defective non-ASCII addr-spec parsed from the original
500+
# message is left unchanged when flattening.
501+
# (See also test_non_ascii_addr_spec_raises above.)
502+
source = (
503+
'To: jö[email protected], "But a long name still works with refold_source" <jö[email protected]>'
504+
).encode()
505+
expected = (
506+
b'To: j\xc3\xb6[email protected],\n'
507+
b' "But a long name still works with refold_source" <j\xc3\xb6[email protected]>\n'
508+
b'\n'
509+
)
510+
msg = message_from_bytes(source, policy=policy.default)
511+
s = io.BytesIO()
512+
g = BytesGenerator(s, policy=policy.default)
513+
g.flatten(msg)
514+
self.assertEqual(s.getvalue(), expected)
515+
516+
def test_idna_encoding_preserved(self):
517+
# Nothing tries to decode a pre-encoded IDNA domain.
518+
msg = EmailMessage()
519+
msg["To"] = Address(
520+
username='jörg',
521+
domain='☕.example'.encode('idna').decode() # IDNA 2003
522+
)
523+
expected = 'To: jö[email protected],\n\n'.encode()
524+
s = io.BytesIO()
525+
g = BytesGenerator(s, policy=policy.default.clone(utf8=True))
526+
g.flatten(msg)
527+
self.assertEqual(s.getvalue(), expected)
528+
475529

476530
if __name__ == '__main__':
477531
unittest.main()

0 commit comments

Comments
 (0)