GH-82805: Fix handling of single-dot file extensions in pathlib (#118952)

pathlib now treats "`.`" as a valid file extension (suffix). This brings
it in line with `os.path.splitext()`.

In the (private) pathlib ABCs, we add a new `ParserBase.splitext()` method
that splits a path into a `(root, ext)` pair, like `os.path.splitext()`.
This method is called by `PurePathBase.stem`, `suffix`, etc. In a future
version of pathlib, we might make these base classes public, and so users
will be able to define their own `splitext()` method to control file
extension splitting.

In `pathlib.PurePath` we add optimised `stem`, `suffix` and `suffixes`
properties that don't use `splitext()`, which avoids computing the path
base name twice.
This commit is contained in:
Barney Gale 2024-05-25 21:01:36 +01:00 committed by GitHub
parent 0c5ebe13e9
commit e418fc3a6e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 101 additions and 35 deletions

View File

@ -449,6 +449,10 @@ Pure paths provide the following methods and properties:
This is commonly called the file extension. This is commonly called the file extension.
.. versionchanged:: 3.14
A single dot ("``.``") is considered a valid suffix.
.. attribute:: PurePath.suffixes .. attribute:: PurePath.suffixes
A list of the path's suffixes, often called file extensions:: A list of the path's suffixes, often called file extensions::
@ -460,6 +464,10 @@ Pure paths provide the following methods and properties:
>>> PurePosixPath('my/library').suffixes >>> PurePosixPath('my/library').suffixes
[] []
.. versionchanged:: 3.14
A single dot ("``.``") is considered a valid suffix.
.. attribute:: PurePath.stem .. attribute:: PurePath.stem
@ -713,6 +721,11 @@ Pure paths provide the following methods and properties:
>>> p.with_suffix('') >>> p.with_suffix('')
PureWindowsPath('README') PureWindowsPath('README')
.. versionchanged:: 3.14
A single dot ("``.``") is considered a valid suffix. In previous
versions, :exc:`ValueError` is raised if a single dot is supplied.
.. method:: PurePath.with_segments(*pathsegments) .. method:: PurePath.with_segments(*pathsegments)

View File

@ -68,6 +68,12 @@ def splitdrive(self, path):
drive. Either part may be empty.""" drive. Either part may be empty."""
raise UnsupportedOperation(self._unsupported_msg('splitdrive()')) raise UnsupportedOperation(self._unsupported_msg('splitdrive()'))
def splitext(self, path):
"""Split the path into a pair (root, ext), where *ext* is empty or
begins with a begins with a period and contains at most one period,
and *root* is everything before the extension."""
raise UnsupportedOperation(self._unsupported_msg('splitext()'))
def normcase(self, path): def normcase(self, path):
"""Normalize the case of the path.""" """Normalize the case of the path."""
raise UnsupportedOperation(self._unsupported_msg('normcase()')) raise UnsupportedOperation(self._unsupported_msg('normcase()'))
@ -151,12 +157,7 @@ def suffix(self):
This includes the leading period. For example: '.txt' This includes the leading period. For example: '.txt'
""" """
name = self.name return self.parser.splitext(self.name)[1]
i = name.rfind('.')
if 0 < i < len(name) - 1:
return name[i:]
else:
return ''
@property @property
def suffixes(self): def suffixes(self):
@ -165,21 +166,18 @@ def suffixes(self):
These include the leading periods. For example: ['.tar', '.gz'] These include the leading periods. For example: ['.tar', '.gz']
""" """
name = self.name split = self.parser.splitext
if name.endswith('.'): stem, suffix = split(self.name)
return [] suffixes = []
name = name.lstrip('.') while suffix:
return ['.' + suffix for suffix in name.split('.')[1:]] suffixes.append(suffix)
stem, suffix = split(stem)
return suffixes[::-1]
@property @property
def stem(self): def stem(self):
"""The final path component, minus its last suffix.""" """The final path component, minus its last suffix."""
name = self.name return self.parser.splitext(self.name)[0]
i = name.rfind('.')
if 0 < i < len(name) - 1:
return name[:i]
else:
return name
def with_name(self, name): def with_name(self, name):
"""Return a new path with the file name changed.""" """Return a new path with the file name changed."""
@ -208,7 +206,7 @@ def with_suffix(self, suffix):
if not stem: if not stem:
# If the stem is empty, we can't make the suffix non-empty. # If the stem is empty, we can't make the suffix non-empty.
raise ValueError(f"{self!r} has an empty name") raise ValueError(f"{self!r} has an empty name")
elif suffix and not (suffix.startswith('.') and len(suffix) > 1): elif suffix and not suffix.startswith('.'):
raise ValueError(f"Invalid suffix {suffix!r}") raise ValueError(f"Invalid suffix {suffix!r}")
else: else:
return self.with_name(stem + suffix) return self.with_name(stem + suffix)

View File

@ -361,6 +361,40 @@ def with_name(self, name):
tail[-1] = name tail[-1] = name
return self._from_parsed_parts(self.drive, self.root, tail) return self._from_parsed_parts(self.drive, self.root, tail)
@property
def stem(self):
"""The final path component, minus its last suffix."""
name = self.name
i = name.rfind('.')
if i != -1:
stem = name[:i]
# Stem must contain at least one non-dot character.
if stem.lstrip('.'):
return stem
return name
@property
def suffix(self):
"""
The final component's last suffix, if any.
This includes the leading period. For example: '.txt'
"""
name = self.name.lstrip('.')
i = name.rfind('.')
if i != -1:
return name[i:]
return ''
@property
def suffixes(self):
"""
A list of the final component's suffixes, if any.
These include the leading periods. For example: ['.tar', '.gz']
"""
return ['.' + ext for ext in self.name.lstrip('.').split('.')[1:]]
def relative_to(self, other, *, walk_up=False): def relative_to(self, other, *, walk_up=False):
"""Return the relative path to another path identified by the passed """Return the relative path to another path identified by the passed
arguments. If the operation is not possible (because this is not arguments. If the operation is not possible (because this is not

View File

@ -50,6 +50,7 @@ def test_unsupported_operation(self):
self.assertRaises(e, m.join, 'foo') self.assertRaises(e, m.join, 'foo')
self.assertRaises(e, m.split, 'foo') self.assertRaises(e, m.split, 'foo')
self.assertRaises(e, m.splitdrive, 'foo') self.assertRaises(e, m.splitdrive, 'foo')
self.assertRaises(e, m.splitext, 'foo')
self.assertRaises(e, m.normcase, 'foo') self.assertRaises(e, m.normcase, 'foo')
self.assertRaises(e, m.isabs, 'foo') self.assertRaises(e, m.isabs, 'foo')
@ -789,8 +790,12 @@ def test_suffix_common(self):
self.assertEqual(P('/a/.hg.rc').suffix, '.rc') self.assertEqual(P('/a/.hg.rc').suffix, '.rc')
self.assertEqual(P('a/b.tar.gz').suffix, '.gz') self.assertEqual(P('a/b.tar.gz').suffix, '.gz')
self.assertEqual(P('/a/b.tar.gz').suffix, '.gz') self.assertEqual(P('/a/b.tar.gz').suffix, '.gz')
self.assertEqual(P('a/Some name. Ending with a dot.').suffix, '') self.assertEqual(P('a/trailing.dot.').suffix, '.')
self.assertEqual(P('/a/Some name. Ending with a dot.').suffix, '') self.assertEqual(P('/a/trailing.dot.').suffix, '.')
self.assertEqual(P('a/..d.o.t..').suffix, '.')
self.assertEqual(P('a/inn.er..dots').suffix, '.dots')
self.assertEqual(P('photo').suffix, '')
self.assertEqual(P('photo.jpg').suffix, '.jpg')
@needs_windows @needs_windows
def test_suffix_windows(self): def test_suffix_windows(self):
@ -807,8 +812,8 @@ def test_suffix_windows(self):
self.assertEqual(P('c:/a/.hg.rc').suffix, '.rc') self.assertEqual(P('c:/a/.hg.rc').suffix, '.rc')
self.assertEqual(P('c:a/b.tar.gz').suffix, '.gz') self.assertEqual(P('c:a/b.tar.gz').suffix, '.gz')
self.assertEqual(P('c:/a/b.tar.gz').suffix, '.gz') self.assertEqual(P('c:/a/b.tar.gz').suffix, '.gz')
self.assertEqual(P('c:a/Some name. Ending with a dot.').suffix, '') self.assertEqual(P('c:a/trailing.dot.').suffix, '.')
self.assertEqual(P('c:/a/Some name. Ending with a dot.').suffix, '') self.assertEqual(P('c:/a/trailing.dot.').suffix, '.')
self.assertEqual(P('//My.py/Share.php').suffix, '') self.assertEqual(P('//My.py/Share.php').suffix, '')
self.assertEqual(P('//My.py/Share.php/a/b').suffix, '') self.assertEqual(P('//My.py/Share.php/a/b').suffix, '')
@ -828,8 +833,12 @@ def test_suffixes_common(self):
self.assertEqual(P('/a/.hg.rc').suffixes, ['.rc']) self.assertEqual(P('/a/.hg.rc').suffixes, ['.rc'])
self.assertEqual(P('a/b.tar.gz').suffixes, ['.tar', '.gz']) self.assertEqual(P('a/b.tar.gz').suffixes, ['.tar', '.gz'])
self.assertEqual(P('/a/b.tar.gz').suffixes, ['.tar', '.gz']) self.assertEqual(P('/a/b.tar.gz').suffixes, ['.tar', '.gz'])
self.assertEqual(P('a/Some name. Ending with a dot.').suffixes, []) self.assertEqual(P('a/trailing.dot.').suffixes, ['.dot', '.'])
self.assertEqual(P('/a/Some name. Ending with a dot.').suffixes, []) self.assertEqual(P('/a/trailing.dot.').suffixes, ['.dot', '.'])
self.assertEqual(P('a/..d.o.t..').suffixes, ['.o', '.t', '.', '.'])
self.assertEqual(P('a/inn.er..dots').suffixes, ['.er', '.', '.dots'])
self.assertEqual(P('photo').suffixes, [])
self.assertEqual(P('photo.jpg').suffixes, ['.jpg'])
@needs_windows @needs_windows
def test_suffixes_windows(self): def test_suffixes_windows(self):
@ -848,8 +857,8 @@ def test_suffixes_windows(self):
self.assertEqual(P('c:/a/b.tar.gz').suffixes, ['.tar', '.gz']) self.assertEqual(P('c:/a/b.tar.gz').suffixes, ['.tar', '.gz'])
self.assertEqual(P('//My.py/Share.php').suffixes, []) self.assertEqual(P('//My.py/Share.php').suffixes, [])
self.assertEqual(P('//My.py/Share.php/a/b').suffixes, []) self.assertEqual(P('//My.py/Share.php/a/b').suffixes, [])
self.assertEqual(P('c:a/Some name. Ending with a dot.').suffixes, []) self.assertEqual(P('c:a/trailing.dot.').suffixes, ['.dot', '.'])
self.assertEqual(P('c:/a/Some name. Ending with a dot.').suffixes, []) self.assertEqual(P('c:/a/trailing.dot.').suffixes, ['.dot', '.'])
def test_stem_empty(self): def test_stem_empty(self):
P = self.cls P = self.cls
@ -865,8 +874,11 @@ def test_stem_common(self):
self.assertEqual(P('a/.hgrc').stem, '.hgrc') self.assertEqual(P('a/.hgrc').stem, '.hgrc')
self.assertEqual(P('a/.hg.rc').stem, '.hg') self.assertEqual(P('a/.hg.rc').stem, '.hg')
self.assertEqual(P('a/b.tar.gz').stem, 'b.tar') self.assertEqual(P('a/b.tar.gz').stem, 'b.tar')
self.assertEqual(P('a/Some name. Ending with a dot.').stem, self.assertEqual(P('a/trailing.dot.').stem, 'trailing.dot')
'Some name. Ending with a dot.') self.assertEqual(P('a/..d.o.t..').stem, '..d.o.t.')
self.assertEqual(P('a/inn.er..dots').stem, 'inn.er.')
self.assertEqual(P('photo').stem, 'photo')
self.assertEqual(P('photo.jpg').stem, 'photo')
@needs_windows @needs_windows
def test_stem_windows(self): def test_stem_windows(self):
@ -880,8 +892,8 @@ def test_stem_windows(self):
self.assertEqual(P('c:a/.hgrc').stem, '.hgrc') self.assertEqual(P('c:a/.hgrc').stem, '.hgrc')
self.assertEqual(P('c:a/.hg.rc').stem, '.hg') self.assertEqual(P('c:a/.hg.rc').stem, '.hg')
self.assertEqual(P('c:a/b.tar.gz').stem, 'b.tar') self.assertEqual(P('c:a/b.tar.gz').stem, 'b.tar')
self.assertEqual(P('c:a/Some name. Ending with a dot.').stem, self.assertEqual(P('c:a/trailing.dot.').stem, 'trailing.dot')
'Some name. Ending with a dot.')
def test_with_name_common(self): def test_with_name_common(self):
P = self.cls P = self.cls
self.assertEqual(P('a/b').with_name('d.xml'), P('a/d.xml')) self.assertEqual(P('a/b').with_name('d.xml'), P('a/d.xml'))
@ -929,16 +941,16 @@ def test_with_stem_common(self):
self.assertEqual(P('a/b.py').with_stem('d'), P('a/d.py')) self.assertEqual(P('a/b.py').with_stem('d'), P('a/d.py'))
self.assertEqual(P('/a/b.py').with_stem('d'), P('/a/d.py')) self.assertEqual(P('/a/b.py').with_stem('d'), P('/a/d.py'))
self.assertEqual(P('/a/b.tar.gz').with_stem('d'), P('/a/d.gz')) self.assertEqual(P('/a/b.tar.gz').with_stem('d'), P('/a/d.gz'))
self.assertEqual(P('a/Dot ending.').with_stem('d'), P('a/d')) self.assertEqual(P('a/Dot ending.').with_stem('d'), P('a/d.'))
self.assertEqual(P('/a/Dot ending.').with_stem('d'), P('/a/d')) self.assertEqual(P('/a/Dot ending.').with_stem('d'), P('/a/d.'))
@needs_windows @needs_windows
def test_with_stem_windows(self): def test_with_stem_windows(self):
P = self.cls P = self.cls
self.assertEqual(P('c:a/b').with_stem('d'), P('c:a/d')) self.assertEqual(P('c:a/b').with_stem('d'), P('c:a/d'))
self.assertEqual(P('c:/a/b').with_stem('d'), P('c:/a/d')) self.assertEqual(P('c:/a/b').with_stem('d'), P('c:/a/d'))
self.assertEqual(P('c:a/Dot ending.').with_stem('d'), P('c:a/d')) self.assertEqual(P('c:a/Dot ending.').with_stem('d'), P('c:a/d.'))
self.assertEqual(P('c:/a/Dot ending.').with_stem('d'), P('c:/a/d')) self.assertEqual(P('c:/a/Dot ending.').with_stem('d'), P('c:/a/d.'))
self.assertRaises(ValueError, P('c:').with_stem, 'd') self.assertRaises(ValueError, P('c:').with_stem, 'd')
self.assertRaises(ValueError, P('c:/').with_stem, 'd') self.assertRaises(ValueError, P('c:/').with_stem, 'd')
self.assertRaises(ValueError, P('//My/Share').with_stem, 'd') self.assertRaises(ValueError, P('//My/Share').with_stem, 'd')
@ -974,6 +986,11 @@ def test_with_suffix_common(self):
# Stripping suffix. # Stripping suffix.
self.assertEqual(P('a/b.py').with_suffix(''), P('a/b')) self.assertEqual(P('a/b.py').with_suffix(''), P('a/b'))
self.assertEqual(P('/a/b').with_suffix(''), P('/a/b')) self.assertEqual(P('/a/b').with_suffix(''), P('/a/b'))
# Single dot
self.assertEqual(P('a/b').with_suffix('.'), P('a/b.'))
self.assertEqual(P('/a/b').with_suffix('.'), P('/a/b.'))
self.assertEqual(P('a/b.py').with_suffix('.'), P('a/b.'))
self.assertEqual(P('/a/b.py').with_suffix('.'), P('/a/b.'))
@needs_windows @needs_windows
def test_with_suffix_windows(self): def test_with_suffix_windows(self):
@ -1012,7 +1029,6 @@ def test_with_suffix_invalid(self):
# Invalid suffix. # Invalid suffix.
self.assertRaises(ValueError, P('a/b').with_suffix, 'gz') self.assertRaises(ValueError, P('a/b').with_suffix, 'gz')
self.assertRaises(ValueError, P('a/b').with_suffix, '/') self.assertRaises(ValueError, P('a/b').with_suffix, '/')
self.assertRaises(ValueError, P('a/b').with_suffix, '.')
self.assertRaises(ValueError, P('a/b').with_suffix, '/.gz') self.assertRaises(ValueError, P('a/b').with_suffix, '/.gz')
self.assertRaises(ValueError, P('a/b').with_suffix, 'c/d') self.assertRaises(ValueError, P('a/b').with_suffix, 'c/d')
self.assertRaises(ValueError, P('a/b').with_suffix, '.c/.d') self.assertRaises(ValueError, P('a/b').with_suffix, '.c/.d')

View File

@ -0,0 +1,5 @@
Support single-dot file extensions in :attr:`pathlib.PurePath.suffix` and
related attributes and methods. For example, the
:attr:`~pathlib.PurePath.suffixes` of ``PurePath('foo.bar.')`` are now
``['.bar', '.']`` rather than ``[]``. This brings file extension splitting
in line with :func:`os.path.splitext`.