gh-127794: Validate email header names according to RFC 5322 (#127820)

`email.message.Message` objects now validate header names specified via `__setitem__`
or `add_header` according to RFC 5322, §2.2 [1].

In particular, callers should expect a ValueError to be raised for invalid header names.

[1]: https://datatracker.ietf.org/doc/html/rfc5322#section-2.2

---------

Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com>
Co-authored-by: R. David Murray <rdmurray@bitdance.com>
This commit is contained in:
Srinivas Reddy Thatiparthy (తాటిపర్తి శ్రీనివాస్ రెడ్డి) 2025-03-30 17:59:29 +05:30 committed by GitHub
parent 55150a79ca
commit c432d0147b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 71 additions and 1 deletions

View File

@ -4,6 +4,7 @@
""" """
import abc import abc
import re
from email import header from email import header
from email import charset as _charset from email import charset as _charset
from email.utils import _has_surrogates from email.utils import _has_surrogates
@ -14,6 +15,14 @@
'compat32', 'compat32',
] ]
# validation regex from RFC 5322, equivalent to pattern re.compile("[!-9;-~]+$")
valid_header_name_re = re.compile("[\041-\071\073-\176]+$")
def validate_header_name(name):
# Validate header name according to RFC 5322
if not valid_header_name_re.match(name):
raise ValueError(
f"Header field name contains invalid characters: {name!r}")
class _PolicyBase: class _PolicyBase:
@ -314,6 +323,7 @@ def header_store_parse(self, name, value):
"""+ """+
The name and value are returned unmodified. The name and value are returned unmodified.
""" """
validate_header_name(name)
return (name, value) return (name, value)
def header_fetch_parse(self, name, value): def header_fetch_parse(self, name, value):

View File

@ -4,7 +4,13 @@
import re import re
import sys import sys
from email._policybase import Policy, Compat32, compat32, _extend_docstrings from email._policybase import (
Compat32,
Policy,
_extend_docstrings,
compat32,
validate_header_name
)
from email.utils import _has_surrogates from email.utils import _has_surrogates
from email.headerregistry import HeaderRegistry as HeaderRegistry from email.headerregistry import HeaderRegistry as HeaderRegistry
from email.contentmanager import raw_data_manager from email.contentmanager import raw_data_manager
@ -138,6 +144,7 @@ def header_store_parse(self, name, value):
CR or LF characters. CR or LF characters.
""" """
validate_header_name(name)
if hasattr(value, 'name') and value.name.lower() == name.lower(): if hasattr(value, 'name') and value.name.lower() == name.lower():
return (name, value) return (name, value)
if isinstance(value, str) and len(value.splitlines())>1: if isinstance(value, str) and len(value.splitlines())>1:

View File

@ -728,6 +728,31 @@ def test_nonascii_add_header_with_tspecial(self):
"attachment; filename*=utf-8''Fu%C3%9Fballer%20%5Bfilename%5D.ppt", "attachment; filename*=utf-8''Fu%C3%9Fballer%20%5Bfilename%5D.ppt",
msg['Content-Disposition']) msg['Content-Disposition'])
def test_invalid_header_names(self):
invalid_headers = [
('Invalid Header', 'contains space'),
('Tab\tHeader', 'contains tab'),
('Colon:Header', 'contains colon'),
('', 'Empty name'),
(' LeadingSpace', 'starts with space'),
('TrailingSpace ', 'ends with space'),
('Header\x7F', 'Non-ASCII character'),
('Header\x80', 'Extended ASCII'),
]
for policy in (email.policy.default, email.policy.compat32):
for setter in (Message.__setitem__, Message.add_header):
for name, value in invalid_headers:
self.do_test_invalid_header_names(
policy, setter,name, value)
def do_test_invalid_header_names(self, policy, setter, name, value):
with self.subTest(policy=policy, setter=setter, name=name, value=value):
message = Message(policy=policy)
pattern = r'(?i)(?=.*invalid)(?=.*header)(?=.*name)'
with self.assertRaisesRegex(ValueError, pattern) as cm:
setter(message, name, value)
self.assertIn(f"{name!r}", str(cm.exception))
def test_binary_quopri_payload(self): def test_binary_quopri_payload(self):
for charset in ('latin-1', 'ascii'): for charset in ('latin-1', 'ascii'):
msg = Message() msg = Message()

View File

@ -1004,6 +1004,30 @@ def test_folding_with_long_nospace_http_policy_1(self):
parsed_msg = message_from_bytes(m.as_bytes(), policy=policy.default) parsed_msg = message_from_bytes(m.as_bytes(), policy=policy.default)
self.assertEqual(parsed_msg['Message-ID'], m['Message-ID']) self.assertEqual(parsed_msg['Message-ID'], m['Message-ID'])
def test_invalid_header_names(self):
invalid_headers = [
('Invalid Header', 'contains space'),
('Tab\tHeader', 'contains tab'),
('Colon:Header', 'contains colon'),
('', 'Empty name'),
(' LeadingSpace', 'starts with space'),
('TrailingSpace ', 'ends with space'),
('Header\x7F', 'Non-ASCII character'),
('Header\x80', 'Extended ASCII'),
]
for email_policy in (policy.default, policy.compat32):
for setter in (EmailMessage.__setitem__, EmailMessage.add_header):
for name, value in invalid_headers:
self.do_test_invalid_header_names(email_policy, setter, name, value)
def do_test_invalid_header_names(self, policy, setter, name, value):
with self.subTest(policy=policy, setter=setter, name=name, value=value):
message = EmailMessage(policy=policy)
pattern = r'(?i)(?=.*invalid)(?=.*header)(?=.*name)'
with self.assertRaisesRegex(ValueError, pattern) as cm:
setter(message, name, value)
self.assertIn(f"{name!r}", str(cm.exception))
def test_get_body_malformed(self): def test_get_body_malformed(self):
"""test for bpo-42892""" """test for bpo-42892"""
msg = textwrap.dedent("""\ msg = textwrap.dedent("""\

View File

@ -0,0 +1,4 @@
When headers are added to :class:`email.message.Message` objects, either through
:meth:`email.message.Message.__setitem__` or :meth:`email.message.Message.add_header`,
the field name is now validated according to :rfc:`RFC 5322, Section 2.2 <5322#section-2.2>`
and a :exc:`ValueError` is raised if the field name contains any invalid characters.