GH-125413: Add `pathlib.Path.scandir()` method (#126060)

Add `pathlib.Path.scandir()` as a trivial wrapper of `os.scandir()`. This
will be used to implement several `PathBase` methods more efficiently,
including methods that provide `Path.copy()`.
This commit is contained in:
Barney Gale 2024-11-01 01:19:01 +00:00 committed by GitHub
parent d0abd0b826
commit 260843df1b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 114 additions and 11 deletions

View File

@ -1289,6 +1289,35 @@ Reading directories
raised. raised.
.. method:: Path.scandir()
When the path points to a directory, return an iterator of
:class:`os.DirEntry` objects corresponding to entries in the directory. The
returned iterator supports the :term:`context manager` protocol. It is
implemented using :func:`os.scandir` and gives the same guarantees.
Using :meth:`~Path.scandir` instead of :meth:`~Path.iterdir` can
significantly increase the performance of code that also needs file type or
file attribute information, because :class:`os.DirEntry` objects expose
this information if the operating system provides it when scanning a
directory.
The following example displays the names of subdirectories. The
``entry.is_dir()`` check will generally not make an additional system call::
>>> p = Path('docs')
>>> with p.scandir() as entries:
... for entry in entries:
... if entry.is_dir():
... entry.name
...
'_templates'
'_build'
'_static'
.. versionadded:: 3.14
.. method:: Path.glob(pattern, *, case_sensitive=None, recurse_symlinks=False) .. method:: Path.glob(pattern, *, case_sensitive=None, recurse_symlinks=False)
Glob the given relative *pattern* in the directory represented by this path, Glob the given relative *pattern* in the directory represented by this path,

View File

@ -380,6 +380,12 @@ pathlib
(Contributed by Barney Gale in :gh:`73991`.) (Contributed by Barney Gale in :gh:`73991`.)
* Add :meth:`pathlib.Path.scandir` to scan a directory and return an iterator
of :class:`os.DirEntry` objects. This is exactly equivalent to calling
:func:`os.scandir` on a path object.
(Contributed by Barney Gale in :gh:`125413`.)
pdb pdb
--- ---

View File

@ -639,13 +639,23 @@ def write_text(self, data, encoding=None, errors=None, newline=None):
with self.open(mode='w', encoding=encoding, errors=errors, newline=newline) as f: with self.open(mode='w', encoding=encoding, errors=errors, newline=newline) as f:
return f.write(data) return f.write(data)
def scandir(self):
"""Yield os.DirEntry objects of the directory contents.
The children are yielded in arbitrary order, and the
special entries '.' and '..' are not included.
"""
raise UnsupportedOperation(self._unsupported_msg('scandir()'))
def iterdir(self): def iterdir(self):
"""Yield path objects of the directory contents. """Yield path objects of the directory contents.
The children are yielded in arbitrary order, and the The children are yielded in arbitrary order, and the
special entries '.' and '..' are not included. special entries '.' and '..' are not included.
""" """
raise UnsupportedOperation(self._unsupported_msg('iterdir()')) with self.scandir() as entries:
names = [entry.name for entry in entries]
return map(self.joinpath, names)
def _glob_selector(self, parts, case_sensitive, recurse_symlinks): def _glob_selector(self, parts, case_sensitive, recurse_symlinks):
if case_sensitive is None: if case_sensitive is None:

View File

@ -615,6 +615,14 @@ def _filter_trailing_slash(self, paths):
path_str = path_str[:-1] path_str = path_str[:-1]
yield path_str yield path_str
def scandir(self):
"""Yield os.DirEntry objects of the directory contents.
The children are yielded in arbitrary order, and the
special entries '.' and '..' are not included.
"""
return os.scandir(self)
def iterdir(self): def iterdir(self):
"""Yield path objects of the directory contents. """Yield path objects of the directory contents.

View File

@ -1,4 +1,5 @@
import collections import collections
import contextlib
import io import io
import os import os
import errno import errno
@ -1424,6 +1425,24 @@ def close(self):
'st_mode st_ino st_dev st_nlink st_uid st_gid st_size st_atime st_mtime st_ctime') 'st_mode st_ino st_dev st_nlink st_uid st_gid st_size st_atime st_mtime st_ctime')
class DummyDirEntry:
"""
Minimal os.DirEntry-like object. Returned from DummyPath.scandir().
"""
__slots__ = ('name', '_is_symlink', '_is_dir')
def __init__(self, name, is_symlink, is_dir):
self.name = name
self._is_symlink = is_symlink
self._is_dir = is_dir
def is_symlink(self):
return self._is_symlink
def is_dir(self, *, follow_symlinks=True):
return self._is_dir and (follow_symlinks or not self._is_symlink)
class DummyPath(PathBase): class DummyPath(PathBase):
""" """
Simple implementation of PathBase that keeps files and directories in Simple implementation of PathBase that keeps files and directories in
@ -1491,14 +1510,25 @@ def open(self, mode='r', buffering=-1, encoding=None,
stream = io.TextIOWrapper(stream, encoding=encoding, errors=errors, newline=newline) stream = io.TextIOWrapper(stream, encoding=encoding, errors=errors, newline=newline)
return stream return stream
def iterdir(self): @contextlib.contextmanager
path = str(self.resolve()) def scandir(self):
if path in self._files: path = self.resolve()
raise NotADirectoryError(errno.ENOTDIR, "Not a directory", path) path_str = str(path)
elif path in self._directories: if path_str in self._files:
return iter([self / name for name in self._directories[path]]) raise NotADirectoryError(errno.ENOTDIR, "Not a directory", path_str)
elif path_str in self._directories:
yield iter([path.joinpath(name)._dir_entry for name in self._directories[path_str]])
else: else:
raise FileNotFoundError(errno.ENOENT, "File not found", path) raise FileNotFoundError(errno.ENOENT, "File not found", path_str)
@property
def _dir_entry(self):
path_str = str(self)
is_symlink = path_str in self._symlinks
is_directory = (path_str in self._directories
if not is_symlink
else self._symlinks[path_str][1])
return DummyDirEntry(self.name, is_symlink, is_directory)
def mkdir(self, mode=0o777, parents=False, exist_ok=False): def mkdir(self, mode=0o777, parents=False, exist_ok=False):
path = str(self.parent.resolve() / self.name) path = str(self.parent.resolve() / self.name)
@ -1602,7 +1632,7 @@ def setUp(self):
if self.can_symlink: if self.can_symlink:
p.joinpath('linkA').symlink_to('fileA') p.joinpath('linkA').symlink_to('fileA')
p.joinpath('brokenLink').symlink_to('non-existing') p.joinpath('brokenLink').symlink_to('non-existing')
p.joinpath('linkB').symlink_to('dirB') p.joinpath('linkB').symlink_to('dirB', target_is_directory=True)
p.joinpath('dirA', 'linkC').symlink_to(parser.join('..', 'dirB')) p.joinpath('dirA', 'linkC').symlink_to(parser.join('..', 'dirB'))
p.joinpath('dirB', 'linkD').symlink_to(parser.join('..', 'dirB')) p.joinpath('dirB', 'linkD').symlink_to(parser.join('..', 'dirB'))
p.joinpath('brokenLinkLoop').symlink_to('brokenLinkLoop') p.joinpath('brokenLinkLoop').symlink_to('brokenLinkLoop')
@ -2187,6 +2217,23 @@ def test_iterdir_nodir(self):
self.assertIn(cm.exception.errno, (errno.ENOTDIR, self.assertIn(cm.exception.errno, (errno.ENOTDIR,
errno.ENOENT, errno.EINVAL)) errno.ENOENT, errno.EINVAL))
def test_scandir(self):
p = self.cls(self.base)
with p.scandir() as entries:
self.assertTrue(list(entries))
with p.scandir() as entries:
for entry in entries:
child = p / entry.name
self.assertIsNotNone(entry)
self.assertEqual(entry.name, child.name)
self.assertEqual(entry.is_symlink(),
child.is_symlink())
self.assertEqual(entry.is_dir(follow_symlinks=False),
child.is_dir(follow_symlinks=False))
if entry.name != 'brokenLinkLoop':
self.assertEqual(entry.is_dir(), child.is_dir())
def test_glob_common(self): def test_glob_common(self):
def _check(glob, expected): def _check(glob, expected):
self.assertEqual(set(glob), { P(self.base, q) for q in expected }) self.assertEqual(set(glob), { P(self.base, q) for q in expected })
@ -3038,7 +3085,7 @@ class DummyPathWithSymlinks(DummyPath):
def readlink(self): def readlink(self):
path = str(self.parent.resolve() / self.name) path = str(self.parent.resolve() / self.name)
if path in self._symlinks: if path in self._symlinks:
return self.with_segments(self._symlinks[path]) return self.with_segments(self._symlinks[path][0])
elif path in self._files or path in self._directories: elif path in self._files or path in self._directories:
raise OSError(errno.EINVAL, "Not a symlink", path) raise OSError(errno.EINVAL, "Not a symlink", path)
else: else:
@ -3050,7 +3097,7 @@ def symlink_to(self, target, target_is_directory=False):
if path in self._symlinks: if path in self._symlinks:
raise FileExistsError(errno.EEXIST, "File exists", path) raise FileExistsError(errno.EEXIST, "File exists", path)
self._directories[parent].add(self.name) self._directories[parent].add(self.name)
self._symlinks[path] = str(target) self._symlinks[path] = str(target), target_is_directory
class DummyPathWithSymlinksTest(DummyPathTest): class DummyPathWithSymlinksTest(DummyPathTest):

View File

@ -0,0 +1,3 @@
Add :meth:`pathlib.Path.scandir` method to efficiently fetch directory
children and their file attributes. This is a trivial wrapper of
:func:`os.scandir`.