mirror of https://github.com/python/cpython.git
[3.13] gh-133890: Handle UnicodeEncodeError in tarfile (GH-134147) (GH-134196)
UnicodeEncodeError is now handled the same way as OSError during
TarFile member extraction.
(cherry picked from commit 9983c7d441
)
This commit is contained in:
parent
ea9c9623f0
commit
06a8c0613e
|
@ -2376,7 +2376,7 @@ def _get_extract_tarinfo(self, member, filter_function, path):
|
|||
unfiltered = tarinfo
|
||||
try:
|
||||
tarinfo = filter_function(tarinfo, path)
|
||||
except (OSError, FilterError) as e:
|
||||
except (OSError, UnicodeEncodeError, FilterError) as e:
|
||||
self._handle_fatal_error(e)
|
||||
except ExtractError as e:
|
||||
self._handle_nonfatal_error(e)
|
||||
|
@ -2397,7 +2397,7 @@ def _extract_one(self, tarinfo, path, set_attrs, numeric_owner):
|
|||
self._extract_member(tarinfo, os.path.join(path, tarinfo.name),
|
||||
set_attrs=set_attrs,
|
||||
numeric_owner=numeric_owner)
|
||||
except OSError as e:
|
||||
except (OSError, UnicodeEncodeError) as e:
|
||||
self._handle_fatal_error(e)
|
||||
except ExtractError as e:
|
||||
self._handle_nonfatal_error(e)
|
||||
|
|
|
@ -3457,11 +3457,12 @@ class ArchiveMaker:
|
|||
with t.open() as tar:
|
||||
... # `tar` is now a TarFile with 'filename' in it!
|
||||
"""
|
||||
def __init__(self):
|
||||
def __init__(self, **kwargs):
|
||||
self.bio = io.BytesIO()
|
||||
self.tar_kwargs = dict(kwargs)
|
||||
|
||||
def __enter__(self):
|
||||
self.tar_w = tarfile.TarFile(mode='w', fileobj=self.bio)
|
||||
self.tar_w = tarfile.TarFile(mode='w', fileobj=self.bio, **self.tar_kwargs)
|
||||
return self
|
||||
|
||||
def __exit__(self, *exc):
|
||||
|
@ -4040,7 +4041,10 @@ def test_tar_filter(self):
|
|||
# that in the test archive.)
|
||||
with tarfile.TarFile.open(tarname) as tar:
|
||||
for tarinfo in tar.getmembers():
|
||||
filtered = tarfile.tar_filter(tarinfo, '')
|
||||
try:
|
||||
filtered = tarfile.tar_filter(tarinfo, '')
|
||||
except UnicodeEncodeError:
|
||||
continue
|
||||
self.assertIs(filtered.name, tarinfo.name)
|
||||
self.assertIs(filtered.type, tarinfo.type)
|
||||
|
||||
|
@ -4051,11 +4055,48 @@ def test_data_filter(self):
|
|||
for tarinfo in tar.getmembers():
|
||||
try:
|
||||
filtered = tarfile.data_filter(tarinfo, '')
|
||||
except tarfile.FilterError:
|
||||
except (tarfile.FilterError, UnicodeEncodeError):
|
||||
continue
|
||||
self.assertIs(filtered.name, tarinfo.name)
|
||||
self.assertIs(filtered.type, tarinfo.type)
|
||||
|
||||
@unittest.skipIf(sys.platform == 'win32', 'requires native bytes paths')
|
||||
def test_filter_unencodable(self):
|
||||
# Sanity check using a valid path.
|
||||
tarinfo = tarfile.TarInfo(os_helper.TESTFN)
|
||||
filtered = tarfile.tar_filter(tarinfo, '')
|
||||
self.assertIs(filtered.name, tarinfo.name)
|
||||
filtered = tarfile.data_filter(tarinfo, '')
|
||||
self.assertIs(filtered.name, tarinfo.name)
|
||||
|
||||
tarinfo = tarfile.TarInfo('test\x00')
|
||||
self.assertRaises(ValueError, tarfile.tar_filter, tarinfo, '')
|
||||
self.assertRaises(ValueError, tarfile.data_filter, tarinfo, '')
|
||||
tarinfo = tarfile.TarInfo('\ud800')
|
||||
self.assertRaises(UnicodeEncodeError, tarfile.tar_filter, tarinfo, '')
|
||||
self.assertRaises(UnicodeEncodeError, tarfile.data_filter, tarinfo, '')
|
||||
|
||||
@unittest.skipIf(sys.platform == 'win32', 'requires native bytes paths')
|
||||
def test_extract_unencodable(self):
|
||||
# Create a member with name \xed\xa0\x80 which is UTF-8 encoded
|
||||
# lone surrogate \ud800.
|
||||
with ArchiveMaker(encoding='ascii', errors='surrogateescape') as arc:
|
||||
arc.add('\udced\udca0\udc80')
|
||||
with os_helper.temp_cwd() as tmp:
|
||||
tar = arc.open(encoding='utf-8', errors='surrogatepass',
|
||||
errorlevel=1)
|
||||
self.assertEqual(tar.getnames(), ['\ud800'])
|
||||
with self.assertRaises(UnicodeEncodeError):
|
||||
tar.extractall(filter=tarfile.tar_filter)
|
||||
self.assertEqual(os.listdir(), [])
|
||||
|
||||
tar = arc.open(encoding='utf-8', errors='surrogatepass',
|
||||
errorlevel=0, debug=1)
|
||||
with support.captured_stderr() as stderr:
|
||||
tar.extractall(filter=tarfile.tar_filter)
|
||||
self.assertEqual(os.listdir(), [])
|
||||
self.assertIn('tarfile: UnicodeEncodeError ', stderr.getvalue())
|
||||
|
||||
def test_default_filter_warns(self):
|
||||
"""Ensure the default filter warns"""
|
||||
with ArchiveMaker() as arc:
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
The :mod:`tarfile` module now handles :exc:`UnicodeEncodeError` in the same
|
||||
way as :exc:`OSError` when cannot extract a member.
|
Loading…
Reference in New Issue