From 17f02bbe06fc43e20dc706f66232cf9f1b5116cd Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 1 Apr 2023 02:20:11 +0100 Subject: [PATCH 01/26] GH-89727: Fix `shutil.rmtree()` recursion error on deep trees. Add a `follow_junctions` argument to `pathlib.Path.walk()`. When set to false, directory junctions are treated as files. Add an `fwalk()` method to `pathlib.Path`, analogous to `os.fwalk()`. Implement `shutil.rmtree()` using `pathlib.Path.walk()` and `fwalk()`. --- Lib/pathlib.py | 143 +++++++++++++++++++++++-------- Lib/shutil.py | 182 ++++++++++------------------------------ Lib/test/test_shutil.py | 4 +- 3 files changed, 154 insertions(+), 175 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index a126bf2fe5570a..fa2f522ac47630 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -204,6 +204,89 @@ def _select_from(self, parent_path, is_dir, exists, scandir, normcase): return +# +# Walking helpers +# + + +class _WalkAction: + WALK = object() + YIELD = object() + CLOSE = object() + + +_supports_fwalk = ( + {os.stat, os.open} <= os.supports_dir_fd and + {os.stat, os.scandir} <= os.supports_fd) + + +def _walk(top_down, follow_symlinks, follow_junctions, use_fd, actions): + action, value = actions.pop() + + if action is _WalkAction.WALK: + path, dir_fd, orig_st = value + dirstats = None + dirnames = [] + filenames = [] + if use_fd: + name = path if dir_fd is None else path.name + if not follow_symlinks: + if top_down: + orig_st = os.stat(name, follow_symlinks=False, dir_fd=dir_fd) + else: + dirstats = [] + fd = os.open(name, os.O_RDONLY, dir_fd=dir_fd) + actions.append((_WalkAction.CLOSE, fd)) + if orig_st and not os.path.samestat(orig_st, os.stat(fd)): + # This can only happen if a directory is replaced with a symlink during the walk. + return + result = path, dirnames, filenames, fd + try: + scandir_it = os.scandir(fd) + except OSError as exc: + exc.filename = str(path) + raise exc + else: + fd = None + result = path, dirnames, filenames + scandir_it = path._scandir() + + with scandir_it: + for entry in scandir_it: + try: + is_dir = entry.is_dir(follow_symlinks=follow_symlinks) and ( + follow_junctions or not entry.is_junction()) + if is_dir: + if dirstats is not None: + dirstats.append(entry.stat(follow_symlinks=False)) + dirnames.append(entry.name) + else: + filenames.append(entry.name) + + except OSError: + filenames.append(entry.name) + + if top_down: + yield result + else: + actions.append((_WalkAction.YIELD, result)) + + if dirstats: + actions += [ + (_WalkAction.WALK, (path._make_child_relpath(dirname), fd, dirstat)) + for dirname, dirstat in zip(reversed(dirnames), reversed(dirstats))] + else: + actions += [ + (_WalkAction.WALK, (path._make_child_relpath(dirname), fd, None)) + for dirname in reversed(dirnames)] + + elif action is _WalkAction.YIELD: + yield value + elif action is _WalkAction.CLOSE: + os.close(value) + else: + raise AssertionError(f"unknown walk action: {action}") + # # Public API # @@ -1194,50 +1277,38 @@ def expanduser(self): return self - def walk(self, top_down=True, on_error=None, follow_symlinks=False): + def walk(self, top_down=True, on_error=None, follow_symlinks=False, follow_junctions=True): """Walk the directory tree from this directory, similar to os.walk().""" - sys.audit("pathlib.Path.walk", self, on_error, follow_symlinks) - paths = [self] - - while paths: - path = paths.pop() - if isinstance(path, tuple): - yield path - continue - - # We may not have read permission for self, in which case we can't - # get a list of the files the directory contains. os.walk() - # always suppressed the exception in that instance, rather than - # blow up for a minor reason when (say) a thousand readable - # directories are still left to visit. That logic is copied here. + sys.audit("pathlib.Path.walk", self, on_error, follow_symlinks, follow_junctions) + actions = [(_WalkAction.WALK, (self, None, None))] + while actions: try: - scandir_it = path._scandir() + yield from _walk(top_down, follow_symlinks, follow_junctions, False, actions) except OSError as error: if on_error is not None: on_error(error) continue - with scandir_it: - dirnames = [] - filenames = [] - for entry in scandir_it: + if _supports_fwalk: + def fwalk(self, top_down=True, on_error=None, follow_symlinks=False, dir_fd=None): + """Walk the directory tree from this directory, similar to os.fwalk().""" + sys.audit("pathlib.Path.fwalk", self, on_error, follow_symlinks, dir_fd) + actions = [(_WalkAction.WALK, (self, dir_fd, None))] + try: + while actions: try: - is_dir = entry.is_dir(follow_symlinks=follow_symlinks) - except OSError: - # Carried over from os.path.isdir(). - is_dir = False - - if is_dir: - dirnames.append(entry.name) - else: - filenames.append(entry.name) - - if top_down: - yield path, dirnames, filenames - else: - paths.append((path, dirnames, filenames)) - - paths += [path._make_child_relpath(d) for d in reversed(dirnames)] + yield from _walk(top_down, follow_symlinks, True, True, actions) + except OSError as error: + if on_error is not None: + on_error(error) + continue + finally: + for action, value in reversed(actions): + if action is _WalkAction.CLOSE: + try: + os.close(value) + except OSError: + pass class PosixPath(Path, PurePosixPath): diff --git a/Lib/shutil.py b/Lib/shutil.py index b0576407e02ffb..7fbeba5ed34cb2 100644 --- a/Lib/shutil.py +++ b/Lib/shutil.py @@ -10,6 +10,7 @@ import fnmatch import collections import errno +import pathlib import warnings try: @@ -562,111 +563,56 @@ def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2, ignore_dangling_symlinks=ignore_dangling_symlinks, dirs_exist_ok=dirs_exist_ok) -if hasattr(os.stat_result, 'st_file_attributes'): - def _rmtree_islink(path): - try: - st = os.lstat(path) - return (stat.S_ISLNK(st.st_mode) or - (st.st_file_attributes & stat.FILE_ATTRIBUTE_REPARSE_POINT - and st.st_reparse_tag == stat.IO_REPARSE_TAG_MOUNT_POINT)) - except OSError: - return False -else: - def _rmtree_islink(path): - return os.path.islink(path) # version vulnerable to race conditions def _rmtree_unsafe(path, onexc): - try: - with os.scandir(path) as scandir_it: - entries = list(scandir_it) - except OSError as err: - onexc(os.scandir, path, err) - entries = [] - for entry in entries: - fullname = entry.path - try: - is_dir = entry.is_dir(follow_symlinks=False) - except OSError: - is_dir = False - - if is_dir and not entry.is_junction(): + def on_walk_error(exc): + onexc(os.scandir, exc.filename, exc) + walker = path.walk( + top_down=False, + follow_symlinks=False, + follow_junctions=False, + on_error=on_walk_error) + for toppath, dirnames, filenames in walker: + for dirname in dirnames: try: - if entry.is_symlink(): - # This can only happen if someone replaces - # a directory with a symlink after the call to - # os.scandir or entry.is_dir above. - raise OSError("Cannot call rmtree on a symbolic link") - except OSError as err: - onexc(os.path.islink, fullname, err) - continue - _rmtree_unsafe(fullname, onexc) - else: + toppath._make_child_relpath(dirname).rmdir() + except OSError as exc: + onexc(os.rmdir, str(toppath / dirname), exc) + for filename in filenames: try: - os.unlink(fullname) - except OSError as err: - onexc(os.unlink, fullname, err) + toppath._make_child_relpath(filename).unlink() + except OSError as exc: + onexc(os.unlink, str(toppath / filename), exc) try: - os.rmdir(path) - except OSError as err: - onexc(os.rmdir, path, err) + path.rmdir() + except OSError as exc: + onexc(os.rmdir, str(path), exc) # Version using fd-based APIs to protect against races -def _rmtree_safe_fd(topfd, path, onexc): - try: - with os.scandir(topfd) as scandir_it: - entries = list(scandir_it) - except OSError as err: - err.filename = path - onexc(os.scandir, path, err) - return - for entry in entries: - fullname = os.path.join(path, entry.name) - try: - is_dir = entry.is_dir(follow_symlinks=False) - except OSError: - is_dir = False - else: - if is_dir: - try: - orig_st = entry.stat(follow_symlinks=False) - is_dir = stat.S_ISDIR(orig_st.st_mode) - except OSError as err: - onexc(os.lstat, fullname, err) - continue - if is_dir: +def _rmtree_safe_fd(path, onexc, dir_fd): + def on_walk_error(exc): + onexc(os.scandir, exc.filename, exc) + walker = path.fwalk( + top_down=False, + follow_symlinks=False, + on_error=on_walk_error, + dir_fd=dir_fd) + for toppath, dirnames, filenames, topfd in walker: + for dirname in dirnames: try: - dirfd = os.open(entry.name, os.O_RDONLY, dir_fd=topfd) - dirfd_closed = False - except OSError as err: - onexc(os.open, fullname, err) - else: - try: - if os.path.samestat(orig_st, os.fstat(dirfd)): - _rmtree_safe_fd(dirfd, fullname, onexc) - try: - os.close(dirfd) - dirfd_closed = True - os.rmdir(entry.name, dir_fd=topfd) - except OSError as err: - onexc(os.rmdir, fullname, err) - else: - try: - # This can only happen if someone replaces - # a directory with a symlink after the call to - # os.scandir or stat.S_ISDIR above. - raise OSError("Cannot call rmtree on a symbolic " - "link") - except OSError as err: - onexc(os.path.islink, fullname, err) - finally: - if not dirfd_closed: - os.close(dirfd) - else: + os.rmdir(dirname, dir_fd=topfd) + except OSError as exc: + onexc(os.rmdir, str(toppath / dirname), exc) + for filename in filenames: try: - os.unlink(entry.name, dir_fd=topfd) - except OSError as err: - onexc(os.unlink, fullname, err) + os.unlink(filename, dir_fd=topfd) + except OSError as exc: + onexc(os.unlink, str(toppath / filename), exc) + try: + os.rmdir(path, dir_fd=dir_fd) + except OSError as exc: + onexc(os.rmdir, str(path), exc) _use_fd_functions = ({os.open, os.stat, os.unlink, os.rmdir} <= os.supports_dir_fd and @@ -719,52 +665,14 @@ def onexc(*args): exc_info = type(exc), exc, exc.__traceback__ return onerror(func, path, exc_info) + if isinstance(path, bytes): + path = os.fsdecode(path) + path = pathlib.Path(path) if _use_fd_functions: - # While the unsafe rmtree works fine on bytes, the fd based does not. - if isinstance(path, bytes): - path = os.fsdecode(path) - # Note: To guard against symlink races, we use the standard - # lstat()/open()/fstat() trick. - try: - orig_st = os.lstat(path, dir_fd=dir_fd) - except Exception as err: - onexc(os.lstat, path, err) - return - try: - fd = os.open(path, os.O_RDONLY, dir_fd=dir_fd) - fd_closed = False - except Exception as err: - onexc(os.open, path, err) - return - try: - if os.path.samestat(orig_st, os.fstat(fd)): - _rmtree_safe_fd(fd, path, onexc) - try: - os.close(fd) - fd_closed = True - os.rmdir(path, dir_fd=dir_fd) - except OSError as err: - onexc(os.rmdir, path, err) - else: - try: - # symlinks to directories are forbidden, see bug #1669 - raise OSError("Cannot call rmtree on a symbolic link") - except OSError as err: - onexc(os.path.islink, path, err) - finally: - if not fd_closed: - os.close(fd) + _rmtree_safe_fd(path, onexc, dir_fd) else: if dir_fd is not None: raise NotImplementedError("dir_fd unavailable on this platform") - try: - if _rmtree_islink(path): - # symlinks to directories are forbidden, see bug #1669 - raise OSError("Cannot call rmtree on a symbolic link") - except OSError as err: - onexc(os.path.islink, path, err) - # can't continue even if onexc hook returns - return return _rmtree_unsafe(path, onexc) # Allow introspection of whether or not the hardening against symlink diff --git a/Lib/test/test_shutil.py b/Lib/test/test_shutil.py index 1c0589ced9ea89..dd61f85d48872b 100644 --- a/Lib/test/test_shutil.py +++ b/Lib/test/test_shutil.py @@ -211,7 +211,7 @@ def onerror(*args): with self.assertWarns(DeprecationWarning): shutil.rmtree(link, onerror=onerror) self.assertEqual(len(errors), 1) - self.assertIs(errors[0][0], os.path.islink) + self.assertIs(errors[0][0], os.rmdir) self.assertEqual(errors[0][1], link) self.assertIsInstance(errors[0][2][1], OSError) @@ -230,7 +230,7 @@ def onexc(*args): errors.append(args) shutil.rmtree(link, onexc=onexc) self.assertEqual(len(errors), 1) - self.assertIs(errors[0][0], os.path.islink) + self.assertIs(errors[0][0], os.rmdir) self.assertEqual(errors[0][1], link) self.assertIsInstance(errors[0][2], OSError) From 48dc5db406b88d0b00448903a2ceb36f279676cd Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 1 Apr 2023 02:40:36 +0100 Subject: [PATCH 02/26] Delay urllib import --- Lib/pathlib.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index fa2f522ac47630..5d8bfe0c10b3be 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -18,7 +18,6 @@ from errno import ENOENT, ENOTDIR, EBADF, ELOOP from operator import attrgetter from stat import S_ISDIR, S_ISLNK, S_ISREG, S_ISSOCK, S_ISBLK, S_ISCHR, S_ISFIFO -from urllib.parse import quote_from_bytes as urlquote_from_bytes __all__ = [ @@ -452,7 +451,9 @@ def as_uri(self): # It's a posix path => 'file:///etc/hosts' prefix = 'file://' path = str(self) - return prefix + urlquote_from_bytes(os.fsencode(path)) + + from urllib.parse import quote_from_bytes + return prefix + quote_from_bytes(os.fsencode(path)) @property def _parts_normcase(self): From 6e92b2728458bb7fe2154cea9872d2c640cef18f Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 1 Apr 2023 18:05:17 +0100 Subject: [PATCH 03/26] Fix up filenames in exceptions --- Lib/pathlib.py | 37 +++++++++++++++++++------------------ Lib/shutil.py | 28 ++++++++++++++-------------- Lib/test/test_shutil.py | 6 ++---- 3 files changed, 35 insertions(+), 36 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 5d8bfe0c10b3be..5e13738813dc20 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -223,24 +223,25 @@ def _walk(top_down, follow_symlinks, follow_junctions, use_fd, actions): action, value = actions.pop() if action is _WalkAction.WALK: - path, dir_fd, orig_st = value - dirstats = None + path, dir_fd, entry = value + entries = [] if use_fd and not top_down and not follow_symlinks else None dirnames = [] filenames = [] if use_fd: name = path if dir_fd is None else path.name - if not follow_symlinks: - if top_down: - orig_st = os.stat(name, follow_symlinks=False, dir_fd=dir_fd) - else: - dirstats = [] - fd = os.open(name, os.O_RDONLY, dir_fd=dir_fd) - actions.append((_WalkAction.CLOSE, fd)) - if orig_st and not os.path.samestat(orig_st, os.stat(fd)): - # This can only happen if a directory is replaced with a symlink during the walk. - return - result = path, dirnames, filenames, fd + orig_st = None try: + if not follow_symlinks: + if entry: + orig_st = entry.stat(follow_symlinks=False) + else: + orig_st = os.stat(name, follow_symlinks=False, dir_fd=dir_fd) + fd = os.open(name, os.O_RDONLY, dir_fd=dir_fd) + actions.append((_WalkAction.CLOSE, fd)) + if orig_st and not os.path.samestat(orig_st, os.stat(fd)): + # This can only happen if a directory is replaced with a symlink during the walk. + return + result = path, dirnames, filenames, fd scandir_it = os.scandir(fd) except OSError as exc: exc.filename = str(path) @@ -256,8 +257,8 @@ def _walk(top_down, follow_symlinks, follow_junctions, use_fd, actions): is_dir = entry.is_dir(follow_symlinks=follow_symlinks) and ( follow_junctions or not entry.is_junction()) if is_dir: - if dirstats is not None: - dirstats.append(entry.stat(follow_symlinks=False)) + if entries is not None: + entries.append(entry) dirnames.append(entry.name) else: filenames.append(entry.name) @@ -270,10 +271,10 @@ def _walk(top_down, follow_symlinks, follow_junctions, use_fd, actions): else: actions.append((_WalkAction.YIELD, result)) - if dirstats: + if entries: actions += [ - (_WalkAction.WALK, (path._make_child_relpath(dirname), fd, dirstat)) - for dirname, dirstat in zip(reversed(dirnames), reversed(dirstats))] + (_WalkAction.WALK, (path._make_child_relpath(dirname), fd, entry)) + for dirname, entry in zip(reversed(dirnames), reversed(entries))] else: actions += [ (_WalkAction.WALK, (path._make_child_relpath(dirname), fd, None)) diff --git a/Lib/shutil.py b/Lib/shutil.py index 7fbeba5ed34cb2..ae8467dd08be71 100644 --- a/Lib/shutil.py +++ b/Lib/shutil.py @@ -575,15 +575,17 @@ def on_walk_error(exc): on_error=on_walk_error) for toppath, dirnames, filenames in walker: for dirname in dirnames: + dirpath = toppath._make_child_relpath(dirname) try: - toppath._make_child_relpath(dirname).rmdir() + dirpath.rmdir() except OSError as exc: - onexc(os.rmdir, str(toppath / dirname), exc) + onexc(os.rmdir, str(dirpath), exc) for filename in filenames: + filepath = toppath._make_child_relpath(filename) try: - toppath._make_child_relpath(filename).unlink() + filepath.unlink() except OSError as exc: - onexc(os.unlink, str(toppath / filename), exc) + onexc(os.unlink, str(filepath), exc) try: path.rmdir() except OSError as exc: @@ -603,21 +605,19 @@ def on_walk_error(exc): try: os.rmdir(dirname, dir_fd=topfd) except OSError as exc: - onexc(os.rmdir, str(toppath / dirname), exc) + exc.filename = str(toppath._make_child_relpath(dirname)) + onexc(os.rmdir, exc.filename, exc) for filename in filenames: try: os.unlink(filename, dir_fd=topfd) except OSError as exc: - onexc(os.unlink, str(toppath / filename), exc) + exc.filename = str(toppath._make_child_relpath(filename)) + onexc(os.unlink, exc.filename, exc) try: os.rmdir(path, dir_fd=dir_fd) except OSError as exc: - onexc(os.rmdir, str(path), exc) - -_use_fd_functions = ({os.open, os.stat, os.unlink, os.rmdir} <= - os.supports_dir_fd and - os.scandir in os.supports_fd and - os.stat in os.supports_follow_symlinks) + exc.filename = str(path) + onexc(os.rmdir, exc.filename, exc) def rmtree(path, ignore_errors=False, onerror=None, *, onexc=None, dir_fd=None): """Recursively delete a directory tree. @@ -668,7 +668,7 @@ def onexc(*args): if isinstance(path, bytes): path = os.fsdecode(path) path = pathlib.Path(path) - if _use_fd_functions: + if hasattr(path, "fwalk"): _rmtree_safe_fd(path, onexc, dir_fd) else: if dir_fd is not None: @@ -677,7 +677,7 @@ def onexc(*args): # Allow introspection of whether or not the hardening against symlink # attacks is supported on the current platform -rmtree.avoids_symlink_attacks = _use_fd_functions +rmtree.avoids_symlink_attacks = hasattr(pathlib.Path, "fwalk") def _basename(path): """A basename() variant which first strips the trailing slash, if present. diff --git a/Lib/test/test_shutil.py b/Lib/test/test_shutil.py index dd61f85d48872b..442936acf11297 100644 --- a/Lib/test/test_shutil.py +++ b/Lib/test/test_shutil.py @@ -564,7 +564,6 @@ def test_rmtree_uses_safe_fd_version_if_available(self): os.listdir in os.supports_fd and os.stat in os.supports_follow_symlinks) if _use_fd_functions: - self.assertTrue(shutil._use_fd_functions) self.assertTrue(shutil.rmtree.avoids_symlink_attacks) tmp_dir = self.mkdtemp() d = os.path.join(tmp_dir, 'a') @@ -579,10 +578,9 @@ def _raiser(*args, **kwargs): finally: shutil._rmtree_safe_fd = real_rmtree else: - self.assertFalse(shutil._use_fd_functions) self.assertFalse(shutil.rmtree.avoids_symlink_attacks) - @unittest.skipUnless(shutil._use_fd_functions, "dir_fd is not supported") + @unittest.skipUnless(shutil.rmtree.avoids_symlink_attacks, "dir_fd is not supported") def test_rmtree_with_dir_fd(self): tmp_dir = self.mkdtemp() victim = 'killme' @@ -596,7 +594,7 @@ def test_rmtree_with_dir_fd(self): shutil.rmtree(victim, dir_fd=dir_fd) self.assertFalse(os.path.exists(fullname)) - @unittest.skipIf(shutil._use_fd_functions, "dir_fd is supported") + @unittest.skipIf(shutil.rmtree.avoids_symlink_attacks, "dir_fd is supported") def test_rmtree_with_dir_fd_unsupported(self): tmp_dir = self.mkdtemp() with self.assertRaises(NotImplementedError): From c042220d5a61e21c6f9750505deb5816f3bd2549 Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 1 Apr 2023 18:42:20 +0100 Subject: [PATCH 04/26] Restore `shutil._rmtree_islink()` --- Lib/shutil.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/Lib/shutil.py b/Lib/shutil.py index ae8467dd08be71..33a91122ed67d5 100644 --- a/Lib/shutil.py +++ b/Lib/shutil.py @@ -563,6 +563,18 @@ def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2, ignore_dangling_symlinks=ignore_dangling_symlinks, dirs_exist_ok=dirs_exist_ok) +if hasattr(os.stat_result, 'st_file_attributes'): + def _rmtree_islink(path): + try: + st = os.lstat(path) + return (stat.S_ISLNK(st.st_mode) or + (st.st_file_attributes & stat.FILE_ATTRIBUTE_REPARSE_POINT + and st.st_reparse_tag == stat.IO_REPARSE_TAG_MOUNT_POINT)) + except OSError: + return False +else: + def _rmtree_islink(path): + return os.path.islink(path) # version vulnerable to race conditions def _rmtree_unsafe(path, onexc): @@ -673,6 +685,14 @@ def onexc(*args): else: if dir_fd is not None: raise NotImplementedError("dir_fd unavailable on this platform") + try: + if _rmtree_islink(path): + # symlinks to directories are forbidden, see bug #1669 + raise OSError("Cannot call rmtree on a symbolic link") + except OSError as err: + onexc(os.path.islink, path, err) + # can't continue even if onexc hook returns + return return _rmtree_unsafe(path, onexc) # Allow introspection of whether or not the hardening against symlink From e268e89a2c95dcc1a011bdbcb405c3de9dfc456f Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 1 Apr 2023 18:42:32 +0100 Subject: [PATCH 05/26] Tidy up exception handling --- Lib/shutil.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/Lib/shutil.py b/Lib/shutil.py index 33a91122ed67d5..abe88bf6d70791 100644 --- a/Lib/shutil.py +++ b/Lib/shutil.py @@ -587,21 +587,19 @@ def on_walk_error(exc): on_error=on_walk_error) for toppath, dirnames, filenames in walker: for dirname in dirnames: - dirpath = toppath._make_child_relpath(dirname) try: - dirpath.rmdir() + toppath._make_child_relpath(dirname).rmdir() except OSError as exc: - onexc(os.rmdir, str(dirpath), exc) + onexc(os.rmdir, exc.filename, exc) for filename in filenames: - filepath = toppath._make_child_relpath(filename) try: - filepath.unlink() + toppath._make_child_relpath(filename).unlink() except OSError as exc: - onexc(os.unlink, str(filepath), exc) + onexc(os.unlink, exc.filename, exc) try: path.rmdir() except OSError as exc: - onexc(os.rmdir, str(path), exc) + onexc(os.rmdir, exc.filename, exc) # Version using fd-based APIs to protect against races def _rmtree_safe_fd(path, onexc, dir_fd): From 3f40cb0f7b6d7315fb539019af46ee2eccd11043 Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 1 Apr 2023 20:03:19 +0100 Subject: [PATCH 06/26] Better compatibility with shutil tests. --- Lib/pathlib.py | 5 +++-- Lib/shutil.py | 26 +++++++++++++++----------- Lib/test/test_shutil.py | 16 ++++------------ 3 files changed, 22 insertions(+), 25 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 5e13738813dc20..497ff54991b84c 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -239,8 +239,9 @@ def _walk(top_down, follow_symlinks, follow_junctions, use_fd, actions): fd = os.open(name, os.O_RDONLY, dir_fd=dir_fd) actions.append((_WalkAction.CLOSE, fd)) if orig_st and not os.path.samestat(orig_st, os.stat(fd)): - # This can only happen if a directory is replaced with a symlink during the walk. - return + exc = NotADirectoryError("Will not walk into symbolic link") + exc.func = os.path.islink + raise exc result = path, dirnames, filenames, fd scandir_it = os.scandir(fd) except OSError as exc: diff --git a/Lib/shutil.py b/Lib/shutil.py index abe88bf6d70791..83aa0df17d5e83 100644 --- a/Lib/shutil.py +++ b/Lib/shutil.py @@ -579,7 +579,7 @@ def _rmtree_islink(path): # version vulnerable to race conditions def _rmtree_unsafe(path, onexc): def on_walk_error(exc): - onexc(os.scandir, exc.filename, exc) + onexc(getattr(exc, 'func', os.scandir), exc.filename, exc) walker = path.walk( top_down=False, follow_symlinks=False, @@ -596,15 +596,17 @@ def on_walk_error(exc): toppath._make_child_relpath(filename).unlink() except OSError as exc: onexc(os.unlink, exc.filename, exc) - try: - path.rmdir() - except OSError as exc: - onexc(os.rmdir, exc.filename, exc) + if toppath == path: + try: + path.rmdir() + except OSError as exc: + onexc(os.rmdir, exc.filename, exc) + # Version using fd-based APIs to protect against races def _rmtree_safe_fd(path, onexc, dir_fd): def on_walk_error(exc): - onexc(os.scandir, exc.filename, exc) + onexc(getattr(exc, 'func', os.scandir), exc.filename, exc) walker = path.fwalk( top_down=False, follow_symlinks=False, @@ -623,11 +625,13 @@ def on_walk_error(exc): except OSError as exc: exc.filename = str(toppath._make_child_relpath(filename)) onexc(os.unlink, exc.filename, exc) - try: - os.rmdir(path, dir_fd=dir_fd) - except OSError as exc: - exc.filename = str(path) - onexc(os.rmdir, exc.filename, exc) + if toppath == path: + try: + os.rmdir(path, dir_fd=dir_fd) + except OSError as exc: + exc.filename = str(path) + onexc(os.rmdir, exc.filename, exc) + def rmtree(path, ignore_errors=False, onerror=None, *, onexc=None, dir_fd=None): """Recursively delete a directory tree. diff --git a/Lib/test/test_shutil.py b/Lib/test/test_shutil.py index 442936acf11297..5c85acd0f6ef89 100644 --- a/Lib/test/test_shutil.py +++ b/Lib/test/test_shutil.py @@ -211,7 +211,7 @@ def onerror(*args): with self.assertWarns(DeprecationWarning): shutil.rmtree(link, onerror=onerror) self.assertEqual(len(errors), 1) - self.assertIs(errors[0][0], os.rmdir) + self.assertIs(errors[0][0], os.path.islink) self.assertEqual(errors[0][1], link) self.assertIsInstance(errors[0][2][1], OSError) @@ -230,7 +230,7 @@ def onexc(*args): errors.append(args) shutil.rmtree(link, onexc=onexc) self.assertEqual(len(errors), 1) - self.assertIs(errors[0][0], os.rmdir) + self.assertIs(errors[0][0], os.path.islink) self.assertEqual(errors[0][1], link) self.assertIsInstance(errors[0][2], OSError) @@ -342,15 +342,11 @@ def onerror(*args): errors.append(args) with self.assertWarns(DeprecationWarning): shutil.rmtree(filename, onerror=onerror) - self.assertEqual(len(errors), 2) + self.assertEqual(len(errors), 1) self.assertIs(errors[0][0], os.scandir) self.assertEqual(errors[0][1], filename) self.assertIsInstance(errors[0][2][1], NotADirectoryError) self.assertEqual(errors[0][2][1].filename, filename) - self.assertIs(errors[1][0], os.rmdir) - self.assertEqual(errors[1][1], filename) - self.assertIsInstance(errors[1][2][1], NotADirectoryError) - self.assertEqual(errors[1][2][1].filename, filename) def test_rmtree_errors_onexc(self): # filename is guaranteed not to exist @@ -374,15 +370,11 @@ def test_rmtree_errors_onexc(self): def onexc(*args): errors.append(args) shutil.rmtree(filename, onexc=onexc) - self.assertEqual(len(errors), 2) + self.assertEqual(len(errors), 1) self.assertIs(errors[0][0], os.scandir) self.assertEqual(errors[0][1], filename) self.assertIsInstance(errors[0][2], NotADirectoryError) self.assertEqual(errors[0][2].filename, filename) - self.assertIs(errors[1][0], os.rmdir) - self.assertEqual(errors[1][1], filename) - self.assertIsInstance(errors[1][2], NotADirectoryError) - self.assertEqual(errors[1][2].filename, filename) @unittest.skipIf(sys.platform[:6] == 'cygwin', "This test can't be run on Cygwin (issue #1071513).") From b59bda88166b113ab5b46c776cba2e4dbe5b0f93 Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 1 Apr 2023 21:12:53 +0100 Subject: [PATCH 07/26] Move main implementation into pathlib --- Lib/pathlib.py | 76 ++++++++++++++++++++++++++ Lib/shutil.py | 116 +++++----------------------------------- Lib/test/test_shutil.py | 6 +-- 3 files changed, 91 insertions(+), 107 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 497ff54991b84c..995acd4b1b23cb 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -245,6 +245,8 @@ def _walk(top_down, follow_symlinks, follow_junctions, use_fd, actions): result = path, dirnames, filenames, fd scandir_it = os.scandir(fd) except OSError as exc: + if not hasattr(exc, 'func'): + exc.func = os.scandir exc.filename = str(path) raise exc else: @@ -1313,6 +1315,80 @@ def fwalk(self, top_down=True, on_error=None, follow_symlinks=False, dir_fd=None except OSError: pass + def _rmtree(self, dir_fd, on_error): + walker = self.fwalk( + top_down=False, + follow_symlinks=False, + on_error=on_error, + dir_fd=dir_fd) + for toppath, dirnames, filenames, topfd in walker: + for dirname in dirnames: + try: + os.rmdir(dirname, dir_fd=topfd) + except OSError as exc: + if on_error: + exc.filename = str(toppath._make_child_relpath(dirname)) + exc.func = os.rmdir + on_error(exc) + for filename in filenames: + try: + os.unlink(filename, dir_fd=topfd) + except OSError as exc: + if on_error: + exc.filename = str(toppath._make_child_relpath(filename)) + exc.func = os.unlink + on_error(exc) + if toppath == self: + try: + os.rmdir(self, dir_fd=dir_fd) + except OSError as exc: + if on_error: + exc.filename = str(self) + exc.func = os.rmdir + on_error(exc) + _rmtree.avoids_symlink_attacks = True + + else: + def _rmtree(self, dir_fd, on_error): + if dir_fd is not None: + raise NotImplementedError("dir_fd unavailable on this platform") + try: + if self.is_symlink() or self.is_junction(): + raise OSError("Cannot call rmtree on a symbolic link") + except OSError as err: + if on_error: + on_error(err) + return + + walker = self.walk( + top_down=False, + follow_symlinks=False, + follow_junctions=False, + on_error=on_error) + for toppath, dirnames, filenames in walker: + for dirname in dirnames: + try: + toppath._make_child_relpath(dirname).rmdir() + except OSError as exc: + if on_error: + exc.func = os.rmdir + on_error(exc) + for filename in filenames: + try: + toppath._make_child_relpath(filename).unlink() + except OSError as exc: + if on_error: + exc.func = os.unlink + on_error(exc) + if toppath == self: + try: + self.rmdir() + except OSError as exc: + if on_error: + exc.func = os.rmdir + on_error(exc) + _rmtree.avoids_symlink_attacks = False + class PosixPath(Path, PurePosixPath): """Path subclass for non-Windows systems. diff --git a/Lib/shutil.py b/Lib/shutil.py index 83aa0df17d5e83..5a1fc16b51bf5a 100644 --- a/Lib/shutil.py +++ b/Lib/shutil.py @@ -563,76 +563,6 @@ def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2, ignore_dangling_symlinks=ignore_dangling_symlinks, dirs_exist_ok=dirs_exist_ok) -if hasattr(os.stat_result, 'st_file_attributes'): - def _rmtree_islink(path): - try: - st = os.lstat(path) - return (stat.S_ISLNK(st.st_mode) or - (st.st_file_attributes & stat.FILE_ATTRIBUTE_REPARSE_POINT - and st.st_reparse_tag == stat.IO_REPARSE_TAG_MOUNT_POINT)) - except OSError: - return False -else: - def _rmtree_islink(path): - return os.path.islink(path) - -# version vulnerable to race conditions -def _rmtree_unsafe(path, onexc): - def on_walk_error(exc): - onexc(getattr(exc, 'func', os.scandir), exc.filename, exc) - walker = path.walk( - top_down=False, - follow_symlinks=False, - follow_junctions=False, - on_error=on_walk_error) - for toppath, dirnames, filenames in walker: - for dirname in dirnames: - try: - toppath._make_child_relpath(dirname).rmdir() - except OSError as exc: - onexc(os.rmdir, exc.filename, exc) - for filename in filenames: - try: - toppath._make_child_relpath(filename).unlink() - except OSError as exc: - onexc(os.unlink, exc.filename, exc) - if toppath == path: - try: - path.rmdir() - except OSError as exc: - onexc(os.rmdir, exc.filename, exc) - - -# Version using fd-based APIs to protect against races -def _rmtree_safe_fd(path, onexc, dir_fd): - def on_walk_error(exc): - onexc(getattr(exc, 'func', os.scandir), exc.filename, exc) - walker = path.fwalk( - top_down=False, - follow_symlinks=False, - on_error=on_walk_error, - dir_fd=dir_fd) - for toppath, dirnames, filenames, topfd in walker: - for dirname in dirnames: - try: - os.rmdir(dirname, dir_fd=topfd) - except OSError as exc: - exc.filename = str(toppath._make_child_relpath(dirname)) - onexc(os.rmdir, exc.filename, exc) - for filename in filenames: - try: - os.unlink(filename, dir_fd=topfd) - except OSError as exc: - exc.filename = str(toppath._make_child_relpath(filename)) - onexc(os.unlink, exc.filename, exc) - if toppath == path: - try: - os.rmdir(path, dir_fd=dir_fd) - except OSError as exc: - exc.filename = str(path) - onexc(os.rmdir, exc.filename, exc) - - def rmtree(path, ignore_errors=False, onerror=None, *, onexc=None, dir_fd=None): """Recursively delete a directory tree. @@ -660,46 +590,24 @@ def rmtree(path, ignore_errors=False, onerror=None, *, onexc=None, dir_fd=None): sys.audit("shutil.rmtree", path, dir_fd) if ignore_errors: - def onexc(*args): - pass - elif onerror is None and onexc is None: - def onexc(*args): + handle_error = None + elif onexc is not None: + def handle_error(error): + return onexc(error.func, error.filename, error) + elif onerror is not None: + def handle_error(error): + exc_info = type(error), error, error.__traceback__ + return onerror(error.func, error.filename, exc_info) + else: + def handle_error(error): raise - elif onexc is None: - if onerror is None: - def onexc(*args): - raise - else: - # delegate to onerror - def onexc(*args): - func, path, exc = args - if exc is None: - exc_info = None, None, None - else: - exc_info = type(exc), exc, exc.__traceback__ - return onerror(func, path, exc_info) - if isinstance(path, bytes): path = os.fsdecode(path) - path = pathlib.Path(path) - if hasattr(path, "fwalk"): - _rmtree_safe_fd(path, onexc, dir_fd) - else: - if dir_fd is not None: - raise NotImplementedError("dir_fd unavailable on this platform") - try: - if _rmtree_islink(path): - # symlinks to directories are forbidden, see bug #1669 - raise OSError("Cannot call rmtree on a symbolic link") - except OSError as err: - onexc(os.path.islink, path, err) - # can't continue even if onexc hook returns - return - return _rmtree_unsafe(path, onexc) + pathlib.Path(path)._rmtree(dir_fd, handle_error) # Allow introspection of whether or not the hardening against symlink # attacks is supported on the current platform -rmtree.avoids_symlink_attacks = hasattr(pathlib.Path, "fwalk") +rmtree.avoids_symlink_attacks = pathlib.Path._rmtree.avoids_symlink_attacks def _basename(path): """A basename() variant which first strips the trailing slash, if present. diff --git a/Lib/test/test_shutil.py b/Lib/test/test_shutil.py index 5c85acd0f6ef89..5df021ec5083b9 100644 --- a/Lib/test/test_shutil.py +++ b/Lib/test/test_shutil.py @@ -561,14 +561,14 @@ def test_rmtree_uses_safe_fd_version_if_available(self): d = os.path.join(tmp_dir, 'a') os.mkdir(d) try: - real_rmtree = shutil._rmtree_safe_fd + real_fwalk = pathlib.Path.fwalk class Called(Exception): pass def _raiser(*args, **kwargs): raise Called - shutil._rmtree_safe_fd = _raiser + pathlib.Path.fwalk = _raiser self.assertRaises(Called, shutil.rmtree, d) finally: - shutil._rmtree_safe_fd = real_rmtree + pathlib.Path.fwalk = real_fwalk else: self.assertFalse(shutil.rmtree.avoids_symlink_attacks) From c1743769f88a9d0d87585753906c4146d83aba60 Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 1 Apr 2023 22:01:12 +0100 Subject: [PATCH 08/26] Make `_fwalk()` private, tidy up naming. --- Lib/pathlib.py | 51 +++++++++++++++++++++++++------------------------- 1 file changed, 26 insertions(+), 25 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 995acd4b1b23cb..e04b90aa775ead 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -1295,7 +1295,7 @@ def walk(self, top_down=True, on_error=None, follow_symlinks=False, follow_junct continue if _supports_fwalk: - def fwalk(self, top_down=True, on_error=None, follow_symlinks=False, dir_fd=None): + def _fwalk(self, top_down=True, on_error=None, follow_symlinks=False, dir_fd=None): """Walk the directory tree from this directory, similar to os.fwalk().""" sys.audit("pathlib.Path.fwalk", self, on_error, follow_symlinks, dir_fd) actions = [(_WalkAction.WALK, (self, dir_fd, None))] @@ -1316,7 +1316,7 @@ def fwalk(self, top_down=True, on_error=None, follow_symlinks=False, dir_fd=None pass def _rmtree(self, dir_fd, on_error): - walker = self.fwalk( + walker = self._fwalk( top_down=False, follow_symlinks=False, on_error=on_error, @@ -1325,27 +1325,27 @@ def _rmtree(self, dir_fd, on_error): for dirname in dirnames: try: os.rmdir(dirname, dir_fd=topfd) - except OSError as exc: + except OSError as error: if on_error: - exc.filename = str(toppath._make_child_relpath(dirname)) - exc.func = os.rmdir - on_error(exc) + error.filename = str(toppath._make_child_relpath(dirname)) + error.func = os.rmdir + on_error(error) for filename in filenames: try: os.unlink(filename, dir_fd=topfd) - except OSError as exc: + except OSError as error: if on_error: - exc.filename = str(toppath._make_child_relpath(filename)) - exc.func = os.unlink - on_error(exc) + error.filename = str(toppath._make_child_relpath(filename)) + error.func = os.unlink + on_error(error) if toppath == self: try: os.rmdir(self, dir_fd=dir_fd) - except OSError as exc: + except OSError as error: if on_error: - exc.filename = str(self) - exc.func = os.rmdir - on_error(exc) + error.filename = str(self) + error.func = os.rmdir + on_error(error) _rmtree.avoids_symlink_attacks = True else: @@ -1355,9 +1355,10 @@ def _rmtree(self, dir_fd, on_error): try: if self.is_symlink() or self.is_junction(): raise OSError("Cannot call rmtree on a symbolic link") - except OSError as err: + except OSError as error: if on_error: - on_error(err) + error.func = os.path.islink + on_error(error) return walker = self.walk( @@ -1369,24 +1370,24 @@ def _rmtree(self, dir_fd, on_error): for dirname in dirnames: try: toppath._make_child_relpath(dirname).rmdir() - except OSError as exc: + except OSError as error: if on_error: - exc.func = os.rmdir - on_error(exc) + error.func = os.rmdir + on_error(error) for filename in filenames: try: toppath._make_child_relpath(filename).unlink() - except OSError as exc: + except OSError as error: if on_error: - exc.func = os.unlink - on_error(exc) + error.func = os.unlink + on_error(error) if toppath == self: try: self.rmdir() - except OSError as exc: + except OSError as error: if on_error: - exc.func = os.rmdir - on_error(exc) + error.func = os.rmdir + on_error(error) _rmtree.avoids_symlink_attacks = False From 0e8d8e613e96ef617ee305bd0c657788190257db Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 1 Apr 2023 22:35:17 +0100 Subject: [PATCH 09/26] Fix tests --- Lib/pathlib.py | 26 +++++++++++++------------- Lib/test/test_shutil.py | 6 +++--- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index e04b90aa775ead..83b60210b625b4 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -227,10 +227,10 @@ def _walk(top_down, follow_symlinks, follow_junctions, use_fd, actions): entries = [] if use_fd and not top_down and not follow_symlinks else None dirnames = [] filenames = [] - if use_fd: - name = path if dir_fd is None else path.name - orig_st = None - try: + try: + if use_fd: + name = path if dir_fd is None else path.name + orig_st = None if not follow_symlinks: if entry: orig_st = entry.stat(follow_symlinks=False) @@ -244,15 +244,15 @@ def _walk(top_down, follow_symlinks, follow_junctions, use_fd, actions): raise exc result = path, dirnames, filenames, fd scandir_it = os.scandir(fd) - except OSError as exc: - if not hasattr(exc, 'func'): - exc.func = os.scandir - exc.filename = str(path) - raise exc - else: - fd = None - result = path, dirnames, filenames - scandir_it = path._scandir() + else: + fd = None + result = path, dirnames, filenames + scandir_it = path._scandir() + except OSError as exc: + if not hasattr(exc, 'func'): + exc.func = os.scandir + exc.filename = str(path) + raise exc with scandir_it: for entry in scandir_it: diff --git a/Lib/test/test_shutil.py b/Lib/test/test_shutil.py index 5df021ec5083b9..614ac2f43e7868 100644 --- a/Lib/test/test_shutil.py +++ b/Lib/test/test_shutil.py @@ -561,14 +561,14 @@ def test_rmtree_uses_safe_fd_version_if_available(self): d = os.path.join(tmp_dir, 'a') os.mkdir(d) try: - real_fwalk = pathlib.Path.fwalk + real_fwalk = pathlib.Path._fwalk class Called(Exception): pass def _raiser(*args, **kwargs): raise Called - pathlib.Path.fwalk = _raiser + pathlib.Path._fwalk = _raiser self.assertRaises(Called, shutil.rmtree, d) finally: - pathlib.Path.fwalk = real_fwalk + pathlib.Path._fwalk = real_fwalk else: self.assertFalse(shutil.rmtree.avoids_symlink_attacks) From fea58ab6d6f59f371dee6e1a9bac745ddc61946c Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 1 Apr 2023 22:56:29 +0100 Subject: [PATCH 10/26] Fix missing filename in exception, tidy up more naming. --- Lib/pathlib.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 83b60210b625b4..098c9c3c2a9809 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -239,20 +239,20 @@ def _walk(top_down, follow_symlinks, follow_junctions, use_fd, actions): fd = os.open(name, os.O_RDONLY, dir_fd=dir_fd) actions.append((_WalkAction.CLOSE, fd)) if orig_st and not os.path.samestat(orig_st, os.stat(fd)): - exc = NotADirectoryError("Will not walk into symbolic link") - exc.func = os.path.islink - raise exc + error = NotADirectoryError("Will not walk into symbolic link") + error.func = os.path.islink + raise error result = path, dirnames, filenames, fd scandir_it = os.scandir(fd) else: fd = None result = path, dirnames, filenames scandir_it = path._scandir() - except OSError as exc: - if not hasattr(exc, 'func'): - exc.func = os.scandir - exc.filename = str(path) - raise exc + except OSError as error: + if not hasattr(error, 'func'): + error.func = os.scandir + error.filename = str(path) + raise error with scandir_it: for entry in scandir_it: @@ -1354,7 +1354,9 @@ def _rmtree(self, dir_fd, on_error): raise NotImplementedError("dir_fd unavailable on this platform") try: if self.is_symlink() or self.is_junction(): - raise OSError("Cannot call rmtree on a symbolic link") + error = OSError("Cannot call rmtree on a symbolic link") + error.filename = str(self) + raise error except OSError as error: if on_error: error.func = os.path.islink From bff0f5e2bd73991628ceb0ef7835b13398340a47 Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 1 Apr 2023 23:40:22 +0100 Subject: [PATCH 11/26] More cleanup --- Lib/pathlib.py | 40 +++++++++++++++++++++++++--------------- Lib/shutil.py | 10 +++------- 2 files changed, 28 insertions(+), 22 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 098c9c3c2a9809..54d229660daf73 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -1074,6 +1074,15 @@ def rmdir(self): """ os.rmdir(self) + def _rmtree(self, *, on_error=None, ignore_errors=False, dir_fd=None): + """ + Recursively delete this directory tree. + """ + if on_error is None and not ignore_errors: + def on_error(error): + raise + self._rmtree_impl(on_error, dir_fd) + def lstat(self): """ Like stat(), except if the path points to a symlink, the symlink's @@ -1284,7 +1293,7 @@ def expanduser(self): def walk(self, top_down=True, on_error=None, follow_symlinks=False, follow_junctions=True): """Walk the directory tree from this directory, similar to os.walk().""" - sys.audit("pathlib.Path.walk", self, on_error, follow_symlinks, follow_junctions) + sys.audit("pathlib.Path.walk", self, on_error, follow_symlinks) actions = [(_WalkAction.WALK, (self, None, None))] while actions: try: @@ -1295,9 +1304,8 @@ def walk(self, top_down=True, on_error=None, follow_symlinks=False, follow_junct continue if _supports_fwalk: - def _fwalk(self, top_down=True, on_error=None, follow_symlinks=False, dir_fd=None): - """Walk the directory tree from this directory, similar to os.fwalk().""" - sys.audit("pathlib.Path.fwalk", self, on_error, follow_symlinks, dir_fd) + def _fwalk(self, top_down=True, *, on_error=None, follow_symlinks=False, dir_fd=None): + sys.audit("pathlib.Path._fwalk", self, on_error, follow_symlinks, dir_fd) actions = [(_WalkAction.WALK, (self, dir_fd, None))] try: while actions: @@ -1315,7 +1323,9 @@ def _fwalk(self, top_down=True, on_error=None, follow_symlinks=False, dir_fd=Non except OSError: pass - def _rmtree(self, dir_fd, on_error): + # Version using fd-based APIs to protect against races + _rmtree.avoids_symlink_attacks = True + def _rmtree_impl(self, on_error, dir_fd): walker = self._fwalk( top_down=False, follow_symlinks=False, @@ -1326,7 +1336,7 @@ def _rmtree(self, dir_fd, on_error): try: os.rmdir(dirname, dir_fd=topfd) except OSError as error: - if on_error: + if on_error is not None: error.filename = str(toppath._make_child_relpath(dirname)) error.func = os.rmdir on_error(error) @@ -1334,7 +1344,7 @@ def _rmtree(self, dir_fd, on_error): try: os.unlink(filename, dir_fd=topfd) except OSError as error: - if on_error: + if on_error is not None: error.filename = str(toppath._make_child_relpath(filename)) error.func = os.unlink on_error(error) @@ -1342,14 +1352,15 @@ def _rmtree(self, dir_fd, on_error): try: os.rmdir(self, dir_fd=dir_fd) except OSError as error: - if on_error: + if on_error is not None: error.filename = str(self) error.func = os.rmdir on_error(error) - _rmtree.avoids_symlink_attacks = True else: - def _rmtree(self, dir_fd, on_error): + # version vulnerable to race conditions + _rmtree.avoids_symlink_attacks = False + def _rmtree_impl(self, on_error, dir_fd): if dir_fd is not None: raise NotImplementedError("dir_fd unavailable on this platform") try: @@ -1358,7 +1369,7 @@ def _rmtree(self, dir_fd, on_error): error.filename = str(self) raise error except OSError as error: - if on_error: + if on_error is not None: error.func = os.path.islink on_error(error) return @@ -1373,24 +1384,23 @@ def _rmtree(self, dir_fd, on_error): try: toppath._make_child_relpath(dirname).rmdir() except OSError as error: - if on_error: + if on_error is not None: error.func = os.rmdir on_error(error) for filename in filenames: try: toppath._make_child_relpath(filename).unlink() except OSError as error: - if on_error: + if on_error is not None: error.func = os.unlink on_error(error) if toppath == self: try: self.rmdir() except OSError as error: - if on_error: + if on_error is not None: error.func = os.rmdir on_error(error) - _rmtree.avoids_symlink_attacks = False class PosixPath(Path, PurePosixPath): diff --git a/Lib/shutil.py b/Lib/shutil.py index 5a1fc16b51bf5a..4918e82e2e462e 100644 --- a/Lib/shutil.py +++ b/Lib/shutil.py @@ -589,21 +589,17 @@ def rmtree(path, ignore_errors=False, onerror=None, *, onexc=None, dir_fd=None): DeprecationWarning) sys.audit("shutil.rmtree", path, dir_fd) - if ignore_errors: - handle_error = None - elif onexc is not None: + handle_error = None + if onexc is not None: def handle_error(error): return onexc(error.func, error.filename, error) elif onerror is not None: def handle_error(error): exc_info = type(error), error, error.__traceback__ return onerror(error.func, error.filename, exc_info) - else: - def handle_error(error): - raise if isinstance(path, bytes): path = os.fsdecode(path) - pathlib.Path(path)._rmtree(dir_fd, handle_error) + pathlib.Path(path)._rmtree(ignore_errors=ignore_errors, on_error=handle_error, dir_fd=dir_fd) # Allow introspection of whether or not the hardening against symlink # attacks is supported on the current platform From 3b216840cf5e8b407a919564ab6c0ae3b30a4cc3 Mon Sep 17 00:00:00 2001 From: barneygale Date: Sun, 2 Apr 2023 00:00:51 +0100 Subject: [PATCH 12/26] Even more tidy-up --- Lib/pathlib.py | 72 +++++++++++++++++++++++--------------------------- 1 file changed, 33 insertions(+), 39 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 54d229660daf73..8218ca0fcba31f 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -203,11 +203,6 @@ def _select_from(self, parent_path, is_dir, exists, scandir, normcase): return -# -# Walking helpers -# - - class _WalkAction: WALK = object() YIELD = object() @@ -221,7 +216,6 @@ class _WalkAction: def _walk(top_down, follow_symlinks, follow_junctions, use_fd, actions): action, value = actions.pop() - if action is _WalkAction.WALK: path, dir_fd, entry = value entries = [] if use_fd and not top_down and not follow_symlinks else None @@ -229,31 +223,16 @@ def _walk(top_down, follow_symlinks, follow_junctions, use_fd, actions): filenames = [] try: if use_fd: - name = path if dir_fd is None else path.name - orig_st = None - if not follow_symlinks: - if entry: - orig_st = entry.stat(follow_symlinks=False) - else: - orig_st = os.stat(name, follow_symlinks=False, dir_fd=dir_fd) - fd = os.open(name, os.O_RDONLY, dir_fd=dir_fd) - actions.append((_WalkAction.CLOSE, fd)) - if orig_st and not os.path.samestat(orig_st, os.stat(fd)): - error = NotADirectoryError("Will not walk into symbolic link") - error.func = os.path.islink - raise error + scandir_it, fd = path._fscandir(follow_symlinks, actions, dir_fd, entry) result = path, dirnames, filenames, fd - scandir_it = os.scandir(fd) else: - fd = None + scandir_it, fd = path._scandir(), None result = path, dirnames, filenames - scandir_it = path._scandir() except OSError as error: if not hasattr(error, 'func'): error.func = os.scandir error.filename = str(path) raise error - with scandir_it: for entry in scandir_it: try: @@ -265,15 +244,12 @@ def _walk(top_down, follow_symlinks, follow_junctions, use_fd, actions): dirnames.append(entry.name) else: filenames.append(entry.name) - except OSError: filenames.append(entry.name) - if top_down: yield result else: actions.append((_WalkAction.YIELD, result)) - if entries: actions += [ (_WalkAction.WALK, (path._make_child_relpath(dirname), fd, entry)) @@ -282,7 +258,6 @@ def _walk(top_down, follow_symlinks, follow_junctions, use_fd, actions): actions += [ (_WalkAction.WALK, (path._make_child_relpath(dirname), fd, None)) for dirname in reversed(dirnames)] - elif action is _WalkAction.YIELD: yield value elif action is _WalkAction.CLOSE: @@ -1301,10 +1276,10 @@ def walk(self, top_down=True, on_error=None, follow_symlinks=False, follow_junct except OSError as error: if on_error is not None: on_error(error) - continue if _supports_fwalk: def _fwalk(self, top_down=True, *, on_error=None, follow_symlinks=False, dir_fd=None): + """Walk the directory tree from this directory, similar to os.fwalk().""" sys.audit("pathlib.Path._fwalk", self, on_error, follow_symlinks, dir_fd) actions = [(_WalkAction.WALK, (self, dir_fd, None))] try: @@ -1314,7 +1289,6 @@ def _fwalk(self, top_down=True, *, on_error=None, follow_symlinks=False, dir_fd= except OSError as error: if on_error is not None: on_error(error) - continue finally: for action, value in reversed(actions): if action is _WalkAction.CLOSE: @@ -1323,6 +1297,25 @@ def _fwalk(self, top_down=True, *, on_error=None, follow_symlinks=False, dir_fd= except OSError: pass + def _fscandir(self, follow_symlinks, actions, dir_fd, entry): + name = self if dir_fd is None else self.name + if not follow_symlinks: + # Note: To guard against symlink races, we use the standard + # lstat()/open()/fstat() trick. + if entry: + orig_st = entry.stat(follow_symlinks=False) + else: + orig_st = os.stat(name, follow_symlinks=False, dir_fd=dir_fd) + fd = os.open(name, os.O_RDONLY, dir_fd=dir_fd) + actions.append((_WalkAction.CLOSE, fd)) + if not follow_symlinks and not os.path.samestat(orig_st, os.stat(fd)): + # This can only happen if someone replaces a directory + # with a symbolic link after the call to stat() above. + error = NotADirectoryError("Cannot walk into a symbolic link") + error.func = os.path.islink + raise error + return os.scandir(fd), fd + # Version using fd-based APIs to protect against races _rmtree.avoids_symlink_attacks = True def _rmtree_impl(self, on_error, dir_fd): @@ -1331,24 +1324,24 @@ def _rmtree_impl(self, on_error, dir_fd): follow_symlinks=False, on_error=on_error, dir_fd=dir_fd) - for toppath, dirnames, filenames, topfd in walker: + for path, dirnames, filenames, fd in walker: for dirname in dirnames: try: - os.rmdir(dirname, dir_fd=topfd) + os.rmdir(dirname, dir_fd=fd) except OSError as error: if on_error is not None: - error.filename = str(toppath._make_child_relpath(dirname)) + error.filename = str(path._make_child_relpath(dirname)) error.func = os.rmdir on_error(error) for filename in filenames: try: - os.unlink(filename, dir_fd=topfd) + os.unlink(filename, dir_fd=fd) except OSError as error: if on_error is not None: - error.filename = str(toppath._make_child_relpath(filename)) + error.filename = str(path._make_child_relpath(filename)) error.func = os.unlink on_error(error) - if toppath == self: + if path == self: try: os.rmdir(self, dir_fd=dir_fd) except OSError as error: @@ -1372,6 +1365,7 @@ def _rmtree_impl(self, on_error, dir_fd): if on_error is not None: error.func = os.path.islink on_error(error) + # can't continue even if on_error hook returns return walker = self.walk( @@ -1379,22 +1373,22 @@ def _rmtree_impl(self, on_error, dir_fd): follow_symlinks=False, follow_junctions=False, on_error=on_error) - for toppath, dirnames, filenames in walker: + for path, dirnames, filenames in walker: for dirname in dirnames: try: - toppath._make_child_relpath(dirname).rmdir() + path._make_child_relpath(dirname).rmdir() except OSError as error: if on_error is not None: error.func = os.rmdir on_error(error) for filename in filenames: try: - toppath._make_child_relpath(filename).unlink() + path._make_child_relpath(filename).unlink() except OSError as error: if on_error is not None: error.func = os.unlink on_error(error) - if toppath == self: + if path == self: try: self.rmdir() except OSError as error: From 5abee55c375aa568b9e907dfe04ccbc92cfc0f0a Mon Sep 17 00:00:00 2001 From: barneygale Date: Sun, 2 Apr 2023 16:23:40 +0100 Subject: [PATCH 13/26] Reduce diff a bit --- Lib/pathlib.py | 43 ++++++++++++++++++----------------------- Lib/shutil.py | 4 +++- Lib/test/test_shutil.py | 6 ++++-- 3 files changed, 26 insertions(+), 27 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 8218ca0fcba31f..ab46c1348ddf7e 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -209,12 +209,7 @@ class _WalkAction: CLOSE = object() -_supports_fwalk = ( - {os.stat, os.open} <= os.supports_dir_fd and - {os.stat, os.scandir} <= os.supports_fd) - - -def _walk(top_down, follow_symlinks, follow_junctions, use_fd, actions): +def _walk_step(top_down, follow_symlinks, follow_junctions, use_fd, actions): action, value = actions.pop() if action is _WalkAction.WALK: path, dir_fd, entry = value @@ -223,7 +218,7 @@ def _walk(top_down, follow_symlinks, follow_junctions, use_fd, actions): filenames = [] try: if use_fd: - scandir_it, fd = path._fscandir(follow_symlinks, actions, dir_fd, entry) + scandir_it, fd = path._scandir_fwalk(follow_symlinks, actions, dir_fd, entry) result = path, dirnames, filenames, fd else: scandir_it, fd = path._scandir(), None @@ -1049,15 +1044,6 @@ def rmdir(self): """ os.rmdir(self) - def _rmtree(self, *, on_error=None, ignore_errors=False, dir_fd=None): - """ - Recursively delete this directory tree. - """ - if on_error is None and not ignore_errors: - def on_error(error): - raise - self._rmtree_impl(on_error, dir_fd) - def lstat(self): """ Like stat(), except if the path points to a symlink, the symlink's @@ -1272,12 +1258,12 @@ def walk(self, top_down=True, on_error=None, follow_symlinks=False, follow_junct actions = [(_WalkAction.WALK, (self, None, None))] while actions: try: - yield from _walk(top_down, follow_symlinks, follow_junctions, False, actions) + yield from _walk_step(top_down, follow_symlinks, follow_junctions, False, actions) except OSError as error: if on_error is not None: on_error(error) - if _supports_fwalk: + if {os.stat, os.open} <= os.supports_dir_fd and {os.stat, os.scandir} <= os.supports_fd: def _fwalk(self, top_down=True, *, on_error=None, follow_symlinks=False, dir_fd=None): """Walk the directory tree from this directory, similar to os.fwalk().""" sys.audit("pathlib.Path._fwalk", self, on_error, follow_symlinks, dir_fd) @@ -1285,7 +1271,7 @@ def _fwalk(self, top_down=True, *, on_error=None, follow_symlinks=False, dir_fd= try: while actions: try: - yield from _walk(top_down, follow_symlinks, True, True, actions) + yield from _walk_step(top_down, follow_symlinks, True, True, actions) except OSError as error: if on_error is not None: on_error(error) @@ -1297,7 +1283,7 @@ def _fwalk(self, top_down=True, *, on_error=None, follow_symlinks=False, dir_fd= except OSError: pass - def _fscandir(self, follow_symlinks, actions, dir_fd, entry): + def _scandir_fwalk(self, follow_symlinks, actions, dir_fd, entry): name = self if dir_fd is None else self.name if not follow_symlinks: # Note: To guard against symlink races, we use the standard @@ -1317,8 +1303,12 @@ def _fscandir(self, follow_symlinks, actions, dir_fd, entry): return os.scandir(fd), fd # Version using fd-based APIs to protect against races - _rmtree.avoids_symlink_attacks = True - def _rmtree_impl(self, on_error, dir_fd): + def _rmtree(self, *, on_error=None, ignore_errors=False, dir_fd=None): + """Recursively delete this directory tree.""" + if on_error is None and not ignore_errors: + def on_error(error): + raise + walker = self._fwalk( top_down=False, follow_symlinks=False, @@ -1349,13 +1339,17 @@ def _rmtree_impl(self, on_error, dir_fd): error.filename = str(self) error.func = os.rmdir on_error(error) + _rmtree.avoids_symlink_attacks = True else: # version vulnerable to race conditions - _rmtree.avoids_symlink_attacks = False - def _rmtree_impl(self, on_error, dir_fd): + def _rmtree(self, *, on_error=None, ignore_errors=False, dir_fd=None): + """Recursively delete this directory tree.""" if dir_fd is not None: raise NotImplementedError("dir_fd unavailable on this platform") + if on_error is None and not ignore_errors: + def on_error(error): + raise try: if self.is_symlink() or self.is_junction(): error = OSError("Cannot call rmtree on a symbolic link") @@ -1395,6 +1389,7 @@ def _rmtree_impl(self, on_error, dir_fd): if on_error is not None: error.func = os.rmdir on_error(error) + _rmtree.avoids_symlink_attacks = False class PosixPath(Path, PurePosixPath): diff --git a/Lib/shutil.py b/Lib/shutil.py index 4918e82e2e462e..79a267c06da690 100644 --- a/Lib/shutil.py +++ b/Lib/shutil.py @@ -563,6 +563,8 @@ def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2, ignore_dangling_symlinks=ignore_dangling_symlinks, dirs_exist_ok=dirs_exist_ok) +_use_fd_functions = pathlib.Path._rmtree.avoids_symlink_attacks + def rmtree(path, ignore_errors=False, onerror=None, *, onexc=None, dir_fd=None): """Recursively delete a directory tree. @@ -603,7 +605,7 @@ def handle_error(error): # Allow introspection of whether or not the hardening against symlink # attacks is supported on the current platform -rmtree.avoids_symlink_attacks = pathlib.Path._rmtree.avoids_symlink_attacks +rmtree.avoids_symlink_attacks = _use_fd_functions def _basename(path): """A basename() variant which first strips the trailing slash, if present. diff --git a/Lib/test/test_shutil.py b/Lib/test/test_shutil.py index 614ac2f43e7868..6f82925a71e203 100644 --- a/Lib/test/test_shutil.py +++ b/Lib/test/test_shutil.py @@ -556,6 +556,7 @@ def test_rmtree_uses_safe_fd_version_if_available(self): os.listdir in os.supports_fd and os.stat in os.supports_follow_symlinks) if _use_fd_functions: + self.assertTrue(shutil._use_fd_functions) self.assertTrue(shutil.rmtree.avoids_symlink_attacks) tmp_dir = self.mkdtemp() d = os.path.join(tmp_dir, 'a') @@ -570,9 +571,10 @@ def _raiser(*args, **kwargs): finally: pathlib.Path._fwalk = real_fwalk else: + self.assertFalse(shutil._use_fd_functions) self.assertFalse(shutil.rmtree.avoids_symlink_attacks) - @unittest.skipUnless(shutil.rmtree.avoids_symlink_attacks, "dir_fd is not supported") + @unittest.skipUnless(shutil._use_fd_functions, "dir_fd is not supported") def test_rmtree_with_dir_fd(self): tmp_dir = self.mkdtemp() victim = 'killme' @@ -586,7 +588,7 @@ def test_rmtree_with_dir_fd(self): shutil.rmtree(victim, dir_fd=dir_fd) self.assertFalse(os.path.exists(fullname)) - @unittest.skipIf(shutil.rmtree.avoids_symlink_attacks, "dir_fd is supported") + @unittest.skipIf(shutil._use_fd_functions, "dir_fd is supported") def test_rmtree_with_dir_fd_unsupported(self): tmp_dir = self.mkdtemp() with self.assertRaises(NotImplementedError): From 57173d93ceffd1e9de0e008dbb71e9d0a17c3cee Mon Sep 17 00:00:00 2001 From: barneygale Date: Sun, 2 Apr 2023 19:19:03 +0100 Subject: [PATCH 14/26] Performance improvements --- Lib/pathlib.py | 51 +++++++++++++++++++++++++------------------------- 1 file changed, 25 insertions(+), 26 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index ab46c1348ddf7e..72cfe67e250baa 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -209,11 +209,10 @@ class _WalkAction: CLOSE = object() -def _walk_step(top_down, follow_symlinks, follow_junctions, use_fd, actions): +def _walk_step(top_down, follow_symlinks, follow_junctions, use_fd, entries, actions): action, value = actions.pop() if action is _WalkAction.WALK: path, dir_fd, entry = value - entries = [] if use_fd and not top_down and not follow_symlinks else None dirnames = [] filenames = [] try: @@ -231,9 +230,8 @@ def _walk_step(top_down, follow_symlinks, follow_junctions, use_fd, actions): with scandir_it: for entry in scandir_it: try: - is_dir = entry.is_dir(follow_symlinks=follow_symlinks) and ( - follow_junctions or not entry.is_junction()) - if is_dir: + if entry.is_dir(follow_symlinks=follow_symlinks) and ( + follow_junctions or not entry.is_junction()): if entries is not None: entries.append(entry) dirnames.append(entry.name) @@ -245,14 +243,13 @@ def _walk_step(top_down, follow_symlinks, follow_junctions, use_fd, actions): yield result else: actions.append((_WalkAction.YIELD, result)) - if entries: - actions += [ - (_WalkAction.WALK, (path._make_child_relpath(dirname), fd, entry)) - for dirname, entry in zip(reversed(dirnames), reversed(entries))] + if entries is not None: + for entry in reversed(entries): + actions.append((_WalkAction.WALK, (path._make_child_relpath(entry.name), fd, entry))) + entries.clear() else: - actions += [ - (_WalkAction.WALK, (path._make_child_relpath(dirname), fd, None)) - for dirname in reversed(dirnames)] + for dirname in reversed(dirnames): + actions.append((_WalkAction.WALK, (path._make_child_relpath(dirname), fd, None))) elif action is _WalkAction.YIELD: yield value elif action is _WalkAction.CLOSE: @@ -1258,7 +1255,7 @@ def walk(self, top_down=True, on_error=None, follow_symlinks=False, follow_junct actions = [(_WalkAction.WALK, (self, None, None))] while actions: try: - yield from _walk_step(top_down, follow_symlinks, follow_junctions, False, actions) + yield from _walk_step(top_down, follow_symlinks, follow_junctions, False, None, actions) except OSError as error: if on_error is not None: on_error(error) @@ -1267,11 +1264,12 @@ def walk(self, top_down=True, on_error=None, follow_symlinks=False, follow_junct def _fwalk(self, top_down=True, *, on_error=None, follow_symlinks=False, dir_fd=None): """Walk the directory tree from this directory, similar to os.fwalk().""" sys.audit("pathlib.Path._fwalk", self, on_error, follow_symlinks, dir_fd) + entries = [] if not top_down and not follow_symlinks else None actions = [(_WalkAction.WALK, (self, dir_fd, None))] try: while actions: try: - yield from _walk_step(top_down, follow_symlinks, True, True, actions) + yield from _walk_step(top_down, follow_symlinks, True, True, entries, actions) except OSError as error: if on_error is not None: on_error(error) @@ -1285,21 +1283,22 @@ def _fwalk(self, top_down=True, *, on_error=None, follow_symlinks=False, dir_fd= def _scandir_fwalk(self, follow_symlinks, actions, dir_fd, entry): name = self if dir_fd is None else self.name - if not follow_symlinks: + if follow_symlinks: + fd = os.open(name, os.O_RDONLY, dir_fd=dir_fd) + actions.append((_WalkAction.CLOSE, fd)) + else: # Note: To guard against symlink races, we use the standard # lstat()/open()/fstat() trick. - if entry: - orig_st = entry.stat(follow_symlinks=False) - else: + if entry is None: orig_st = os.stat(name, follow_symlinks=False, dir_fd=dir_fd) - fd = os.open(name, os.O_RDONLY, dir_fd=dir_fd) - actions.append((_WalkAction.CLOSE, fd)) - if not follow_symlinks and not os.path.samestat(orig_st, os.stat(fd)): - # This can only happen if someone replaces a directory - # with a symbolic link after the call to stat() above. - error = NotADirectoryError("Cannot walk into a symbolic link") - error.func = os.path.islink - raise error + else: + orig_st = entry.stat(follow_symlinks=False) + fd = os.open(name, os.O_RDONLY, dir_fd=dir_fd) + actions.append((_WalkAction.CLOSE, fd)) + if not os.path.samestat(orig_st, os.stat(fd)): + error = NotADirectoryError("Cannot walk into a symbolic link") + error.func = os.path.islink + raise error return os.scandir(fd), fd # Version using fd-based APIs to protect against races From 58070d9a664b9370c4151c1d5fcabb6f52f3fe22 Mon Sep 17 00:00:00 2001 From: barneygale Date: Sun, 2 Apr 2023 21:24:22 +0100 Subject: [PATCH 15/26] Simplify rmtree (h/t eryksun) --- Lib/pathlib.py | 43 +++++++++++++------------------------------ 1 file changed, 13 insertions(+), 30 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 72cfe67e250baa..229b2de4a75103 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -1314,14 +1314,6 @@ def on_error(error): on_error=on_error, dir_fd=dir_fd) for path, dirnames, filenames, fd in walker: - for dirname in dirnames: - try: - os.rmdir(dirname, dir_fd=fd) - except OSError as error: - if on_error is not None: - error.filename = str(path._make_child_relpath(dirname)) - error.func = os.rmdir - on_error(error) for filename in filenames: try: os.unlink(filename, dir_fd=fd) @@ -1330,14 +1322,13 @@ def on_error(error): error.filename = str(path._make_child_relpath(filename)) error.func = os.unlink on_error(error) - if path == self: - try: - os.rmdir(self, dir_fd=dir_fd) - except OSError as error: - if on_error is not None: - error.filename = str(self) - error.func = os.rmdir - on_error(error) + try: + os.rmdir(path, dir_fd=dir_fd) + except OSError as error: + if on_error is not None: + error.filename = str(path) + error.func = os.rmdir + on_error(error) _rmtree.avoids_symlink_attacks = True else: @@ -1367,13 +1358,6 @@ def on_error(error): follow_junctions=False, on_error=on_error) for path, dirnames, filenames in walker: - for dirname in dirnames: - try: - path._make_child_relpath(dirname).rmdir() - except OSError as error: - if on_error is not None: - error.func = os.rmdir - on_error(error) for filename in filenames: try: path._make_child_relpath(filename).unlink() @@ -1381,13 +1365,12 @@ def on_error(error): if on_error is not None: error.func = os.unlink on_error(error) - if path == self: - try: - self.rmdir() - except OSError as error: - if on_error is not None: - error.func = os.rmdir - on_error(error) + try: + path.rmdir() + except OSError as error: + if on_error is not None: + error.func = os.rmdir + on_error(error) _rmtree.avoids_symlink_attacks = False From d61baa13ae0f3d88c7478ea9c66d588e94fd5955 Mon Sep 17 00:00:00 2001 From: barneygale Date: Sun, 2 Apr 2023 23:59:48 +0100 Subject: [PATCH 16/26] More performance tweaks --- Lib/pathlib.py | 21 ++++++++------------- Lib/test/test_pathlib.py | 40 +++++++++++++++++++++++++--------------- 2 files changed, 33 insertions(+), 28 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 229b2de4a75103..57c1f9dcab0972 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -209,7 +209,7 @@ class _WalkAction: CLOSE = object() -def _walk_step(top_down, follow_symlinks, follow_junctions, use_fd, entries, actions): +def _walk_step(top_down, follow_symlinks, follow_junctions, use_fd, actions): action, value = actions.pop() if action is _WalkAction.WALK: path, dir_fd, entry = value @@ -227,13 +227,16 @@ def _walk_step(top_down, follow_symlinks, follow_junctions, use_fd, entries, act error.func = os.scandir error.filename = str(path) raise error + if not top_down: + actions.append((_WalkAction.YIELD, result)) with scandir_it: for entry in scandir_it: try: if entry.is_dir(follow_symlinks=follow_symlinks) and ( follow_junctions or not entry.is_junction()): - if entries is not None: - entries.append(entry) + if not top_down: + actions.append((_WalkAction.WALK, ( + path._make_child_relpath(entry.name), fd, entry))) dirnames.append(entry.name) else: filenames.append(entry.name) @@ -241,13 +244,6 @@ def _walk_step(top_down, follow_symlinks, follow_junctions, use_fd, entries, act filenames.append(entry.name) if top_down: yield result - else: - actions.append((_WalkAction.YIELD, result)) - if entries is not None: - for entry in reversed(entries): - actions.append((_WalkAction.WALK, (path._make_child_relpath(entry.name), fd, entry))) - entries.clear() - else: for dirname in reversed(dirnames): actions.append((_WalkAction.WALK, (path._make_child_relpath(dirname), fd, None))) elif action is _WalkAction.YIELD: @@ -1255,7 +1251,7 @@ def walk(self, top_down=True, on_error=None, follow_symlinks=False, follow_junct actions = [(_WalkAction.WALK, (self, None, None))] while actions: try: - yield from _walk_step(top_down, follow_symlinks, follow_junctions, False, None, actions) + yield from _walk_step(top_down, follow_symlinks, follow_junctions, False, actions) except OSError as error: if on_error is not None: on_error(error) @@ -1264,12 +1260,11 @@ def walk(self, top_down=True, on_error=None, follow_symlinks=False, follow_junct def _fwalk(self, top_down=True, *, on_error=None, follow_symlinks=False, dir_fd=None): """Walk the directory tree from this directory, similar to os.fwalk().""" sys.audit("pathlib.Path._fwalk", self, on_error, follow_symlinks, dir_fd) - entries = [] if not top_down and not follow_symlinks else None actions = [(_WalkAction.WALK, (self, dir_fd, None))] try: while actions: try: - yield from _walk_step(top_down, follow_symlinks, True, True, entries, actions) + yield from _walk_step(top_down, follow_symlinks, True, True, actions) except OSError as error: if on_error is not None: on_error(error) diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index 3041630da67899..db7dd9017c67f2 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -2708,21 +2708,31 @@ def test_walk_bottom_up(self): all = list(self.walk_path.walk( top_down=False)) self.assertEqual(len(all), 4, all) - # We can't know which order SUB1 and SUB2 will appear in. - # Not flipped: SUB11, SUB1, SUB2, TESTFN - # flipped: SUB2, SUB11, SUB1, TESTFN - flipped = all[3][1][0] != "SUB1" - all[3][1].sort() - all[2 - 2 * flipped][-1].sort() - all[2 - 2 * flipped][1].sort() - self.assertEqual(all[3], - (self.walk_path, ["SUB1", "SUB2"], ["tmp1"])) - self.assertEqual(all[flipped], - (self.sub11_path, [], [])) - self.assertEqual(all[flipped + 1], - (self.sub1_path, ["SUB11"], ["tmp2"])) - self.assertEqual(all[2 - 2 * flipped], - self.sub2_tree) + seen_testfn = seen_sub1 = seen_sub2 = seen_sub11 = False + for path, dirnames, filenames in all: + if path == self.walk_path: + self.assertTrue(seen_sub1) + self.assertTrue(seen_sub2) + self.assertEqual(sorted(dirnames), ["SUB1", "SUB2"]) + self.assertEqual(filenames, ["tmp1"]) + seen_testfn = True + elif path == self.sub1_path: + self.assertFalse(seen_testfn) + self.assertTrue(seen_sub11) + self.assertEqual(dirnames, ["SUB11"]) + self.assertEqual(filenames, ["tmp2"]) + seen_sub1 = True + elif path == self.sub11_path: + self.assertFalse(seen_testfn) + self.assertFalse(seen_sub1) + self.assertEqual(dirnames, []) + self.assertEqual(filenames, []) + seen_sub11 = True + elif path == self.sub2_path: + self.assertFalse(seen_testfn) + self.assertEqual(sorted(dirnames), sorted(self.sub2_tree[1])) + self.assertEqual(sorted(filenames), sorted(self.sub2_tree[2])) + seen_sub2 = True @os_helper.skip_unless_symlink def test_walk_follow_symlinks(self): From e983dd7f83f3e9770f1a5f0c305ce11ddf97d0fc Mon Sep 17 00:00:00 2001 From: barneygale Date: Mon, 3 Apr 2023 02:54:21 +0100 Subject: [PATCH 17/26] Make `error._func` private, as it's only intended for `shutil.rmtree()` --- Lib/pathlib.py | 16 ++++++++-------- Lib/shutil.py | 4 ++-- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 57c1f9dcab0972..e05ea032a56ced 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -223,8 +223,8 @@ def _walk_step(top_down, follow_symlinks, follow_junctions, use_fd, actions): scandir_it, fd = path._scandir(), None result = path, dirnames, filenames except OSError as error: - if not hasattr(error, 'func'): - error.func = os.scandir + if not hasattr(error, '_func'): + error._func = os.scandir error.filename = str(path) raise error if not top_down: @@ -1292,7 +1292,7 @@ def _scandir_fwalk(self, follow_symlinks, actions, dir_fd, entry): actions.append((_WalkAction.CLOSE, fd)) if not os.path.samestat(orig_st, os.stat(fd)): error = NotADirectoryError("Cannot walk into a symbolic link") - error.func = os.path.islink + error._func = os.path.islink raise error return os.scandir(fd), fd @@ -1315,14 +1315,14 @@ def on_error(error): except OSError as error: if on_error is not None: error.filename = str(path._make_child_relpath(filename)) - error.func = os.unlink + error._func = os.unlink on_error(error) try: os.rmdir(path, dir_fd=dir_fd) except OSError as error: if on_error is not None: error.filename = str(path) - error.func = os.rmdir + error._func = os.rmdir on_error(error) _rmtree.avoids_symlink_attacks = True @@ -1342,7 +1342,7 @@ def on_error(error): raise error except OSError as error: if on_error is not None: - error.func = os.path.islink + error._func = os.path.islink on_error(error) # can't continue even if on_error hook returns return @@ -1358,13 +1358,13 @@ def on_error(error): path._make_child_relpath(filename).unlink() except OSError as error: if on_error is not None: - error.func = os.unlink + error._func = os.unlink on_error(error) try: path.rmdir() except OSError as error: if on_error is not None: - error.func = os.rmdir + error._func = os.rmdir on_error(error) _rmtree.avoids_symlink_attacks = False diff --git a/Lib/shutil.py b/Lib/shutil.py index 79a267c06da690..bbc1abf6167579 100644 --- a/Lib/shutil.py +++ b/Lib/shutil.py @@ -594,11 +594,11 @@ def rmtree(path, ignore_errors=False, onerror=None, *, onexc=None, dir_fd=None): handle_error = None if onexc is not None: def handle_error(error): - return onexc(error.func, error.filename, error) + onexc(error._func, error.filename, error) elif onerror is not None: def handle_error(error): exc_info = type(error), error, error.__traceback__ - return onerror(error.func, error.filename, exc_info) + onerror(error._func, error.filename, exc_info) if isinstance(path, bytes): path = os.fsdecode(path) pathlib.Path(path)._rmtree(ignore_errors=ignore_errors, on_error=handle_error, dir_fd=dir_fd) From 5387d32135362734128e7948592de64b6a1e7f09 Mon Sep 17 00:00:00 2001 From: barneygale Date: Mon, 3 Apr 2023 02:54:55 +0100 Subject: [PATCH 18/26] Improve tests --- Lib/test/test_pathlib.py | 43 +++++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 20 deletions(-) diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index db7dd9017c67f2..b0c3314bb4de0d 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -2668,20 +2668,20 @@ def setUp(self): del self.sub2_tree[1][:1] def test_walk_topdown(self): - all = list(self.walk_path.walk()) - - self.assertEqual(len(all), 4) - # We can't know which order SUB1 and SUB2 will appear in. - # Not flipped: TESTFN, SUB1, SUB11, SUB2 - # flipped: TESTFN, SUB2, SUB1, SUB11 - flipped = all[0][1][0] != "SUB1" - all[0][1].sort() - all[3 - 2 * flipped][-1].sort() - all[3 - 2 * flipped][1].sort() - self.assertEqual(all[0], (self.walk_path, ["SUB1", "SUB2"], ["tmp1"])) - self.assertEqual(all[1 + flipped], (self.sub1_path, ["SUB11"], ["tmp2"])) - self.assertEqual(all[2 + flipped], (self.sub11_path, [], [])) - self.assertEqual(all[3 - 2 * flipped], self.sub2_tree) + walker = self.walk_path.walk() + entry = next(walker) + entry[1].sort() # Ensure we visit SUB1 before SUB2 + self.assertEqual(entry, (self.walk_path, ["SUB1", "SUB2"], ["tmp1"])) + entry = next(walker) + self.assertEqual(entry, (self.sub1_path, ["SUB11"], ["tmp2"])) + entry = next(walker) + self.assertEqual(entry, (self.sub11_path, [], [])) + entry = next(walker) + entry[1].sort() + entry[2].sort() + self.assertEqual(entry, self.sub2_tree) + with self.assertRaises(StopIteration): + next(walker) def test_walk_prune(self, walk_path=None): if walk_path is None: @@ -2705,12 +2705,10 @@ def test_file_like_path(self): self.test_walk_prune(FakePath(self.walk_path).__fspath__()) def test_walk_bottom_up(self): - all = list(self.walk_path.walk( top_down=False)) - - self.assertEqual(len(all), 4, all) - seen_testfn = seen_sub1 = seen_sub2 = seen_sub11 = False - for path, dirnames, filenames in all: + seen_testfn = seen_sub1 = seen_sub11 = seen_sub2 = False + for path, dirnames, filenames in self.walk_path.walk(top_down=False): if path == self.walk_path: + self.assertFalse(seen_testfn) self.assertTrue(seen_sub1) self.assertTrue(seen_sub2) self.assertEqual(sorted(dirnames), ["SUB1", "SUB2"]) @@ -2718,21 +2716,26 @@ def test_walk_bottom_up(self): seen_testfn = True elif path == self.sub1_path: self.assertFalse(seen_testfn) + self.assertFalse(seen_sub1) self.assertTrue(seen_sub11) self.assertEqual(dirnames, ["SUB11"]) self.assertEqual(filenames, ["tmp2"]) seen_sub1 = True elif path == self.sub11_path: - self.assertFalse(seen_testfn) self.assertFalse(seen_sub1) + self.assertFalse(seen_sub11) self.assertEqual(dirnames, []) self.assertEqual(filenames, []) seen_sub11 = True elif path == self.sub2_path: self.assertFalse(seen_testfn) + self.assertFalse(seen_sub2) self.assertEqual(sorted(dirnames), sorted(self.sub2_tree[1])) self.assertEqual(sorted(filenames), sorted(self.sub2_tree[2])) seen_sub2 = True + else: + raise AssertionError(f"Unexpected path: {path}") + self.assertTrue(seen_testfn) @os_helper.skip_unless_symlink def test_walk_follow_symlinks(self): From 75541fbf6467debd1c41f7829b7f6bbdf4b099e1 Mon Sep 17 00:00:00 2001 From: barneygale Date: Mon, 3 Apr 2023 19:45:00 +0100 Subject: [PATCH 19/26] Improve performance --- Lib/pathlib.py | 105 ++++++++++++++++++++++++------------------------- 1 file changed, 51 insertions(+), 54 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index e05ea032a56ced..0981470c1e833f 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -209,49 +209,56 @@ class _WalkAction: CLOSE = object() -def _walk_step(top_down, follow_symlinks, follow_junctions, use_fd, actions): - action, value = actions.pop() - if action is _WalkAction.WALK: - path, dir_fd, entry = value - dirnames = [] - filenames = [] +def _walk(top_down, on_error, follow_symlinks, follow_junctions, use_fd, actions): + while actions: + action, value = actions.pop() try: - if use_fd: - scandir_it, fd = path._scandir_fwalk(follow_symlinks, actions, dir_fd, entry) - result = path, dirnames, filenames, fd - else: - scandir_it, fd = path._scandir(), None - result = path, dirnames, filenames - except OSError as error: - if not hasattr(error, '_func'): - error._func = os.scandir - error.filename = str(path) - raise error - if not top_down: - actions.append((_WalkAction.YIELD, result)) - with scandir_it: - for entry in scandir_it: + if action is _WalkAction.WALK: + path, dir_fd, entry = value + dirnames = [] + filenames = [] try: - if entry.is_dir(follow_symlinks=follow_symlinks) and ( - follow_junctions or not entry.is_junction()): - if not top_down: - actions.append((_WalkAction.WALK, ( - path._make_child_relpath(entry.name), fd, entry))) - dirnames.append(entry.name) + if use_fd: + scandir_it, fd = path._scandir_fwalk( + follow_symlinks, actions, dir_fd, entry) + result = path, dirnames, filenames, fd else: - filenames.append(entry.name) - except OSError: - filenames.append(entry.name) - if top_down: - yield result - for dirname in reversed(dirnames): - actions.append((_WalkAction.WALK, (path._make_child_relpath(dirname), fd, None))) - elif action is _WalkAction.YIELD: - yield value - elif action is _WalkAction.CLOSE: - os.close(value) - else: - raise AssertionError(f"unknown walk action: {action}") + scandir_it, fd = path._scandir(), None + result = path, dirnames, filenames + except OSError as error: + if not hasattr(error, '_func'): + error._func = os.scandir + error.filename = str(path) + raise error + if not top_down: + actions.append((_WalkAction.YIELD, result)) + with scandir_it: + for entry in scandir_it: + try: + if entry.is_dir(follow_symlinks=follow_symlinks) and ( + follow_junctions or not entry.is_junction()): + if not top_down: + actions.append((_WalkAction.WALK, ( + path._make_child_relpath(entry.name), fd, entry))) + dirnames.append(entry.name) + else: + filenames.append(entry.name) + except OSError: + filenames.append(entry.name) + if top_down: + yield result + for dirname in reversed(dirnames): + actions.append((_WalkAction.WALK, ( + path._make_child_relpath(dirname), fd, None))) + elif action is _WalkAction.YIELD: + yield value + elif action is _WalkAction.CLOSE: + os.close(value) + else: + raise AssertionError(f"unknown walk action: {action}") + except OSError as error: + if on_error is not None: + on_error(error) # # Public API @@ -1249,12 +1256,7 @@ def walk(self, top_down=True, on_error=None, follow_symlinks=False, follow_junct """Walk the directory tree from this directory, similar to os.walk().""" sys.audit("pathlib.Path.walk", self, on_error, follow_symlinks) actions = [(_WalkAction.WALK, (self, None, None))] - while actions: - try: - yield from _walk_step(top_down, follow_symlinks, follow_junctions, False, actions) - except OSError as error: - if on_error is not None: - on_error(error) + return _walk(top_down, on_error, follow_symlinks, follow_junctions, False, actions) if {os.stat, os.open} <= os.supports_dir_fd and {os.stat, os.scandir} <= os.supports_fd: def _fwalk(self, top_down=True, *, on_error=None, follow_symlinks=False, dir_fd=None): @@ -1262,12 +1264,7 @@ def _fwalk(self, top_down=True, *, on_error=None, follow_symlinks=False, dir_fd= sys.audit("pathlib.Path._fwalk", self, on_error, follow_symlinks, dir_fd) actions = [(_WalkAction.WALK, (self, dir_fd, None))] try: - while actions: - try: - yield from _walk_step(top_down, follow_symlinks, True, True, actions) - except OSError as error: - if on_error is not None: - on_error(error) + return _walk(top_down, on_error, follow_symlinks, True, True, actions) finally: for action, value in reversed(actions): if action is _WalkAction.CLOSE: @@ -1308,7 +1305,7 @@ def on_error(error): follow_symlinks=False, on_error=on_error, dir_fd=dir_fd) - for path, dirnames, filenames, fd in walker: + for path, _, filenames, fd in walker: for filename in filenames: try: os.unlink(filename, dir_fd=fd) @@ -1352,7 +1349,7 @@ def on_error(error): follow_symlinks=False, follow_junctions=False, on_error=on_error) - for path, dirnames, filenames in walker: + for path, _, filenames in walker: for filename in filenames: try: path._make_child_relpath(filename).unlink() From 74cdd6c1ddd97f02d8960ef74ff95ecccda3b71f Mon Sep 17 00:00:00 2001 From: barneygale Date: Fri, 7 Apr 2023 20:19:32 +0100 Subject: [PATCH 20/26] Supply relative paths to `rmdir()` in fd-based walk. --- Lib/pathlib.py | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 2701039601332d..1b3ef78656e805 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -1316,7 +1316,7 @@ def on_error(error): follow_symlinks=False, on_error=on_error, dir_fd=dir_fd) - for path, _, filenames, fd in walker: + for path, dirnames, filenames, fd in walker: for filename in filenames: try: os.unlink(filename, dir_fd=fd) @@ -1325,13 +1325,22 @@ def on_error(error): error.filename = str(path._make_child_relpath(filename)) error._func = os.unlink on_error(error) - try: - os.rmdir(path, dir_fd=dir_fd) - except OSError as error: - if on_error is not None: - error.filename = str(path) - error._func = os.rmdir - on_error(error) + for dirname in dirnames: + try: + os.rmdir(dirname, dir_fd=fd) + except OSError as error: + if on_error is not None: + error.filename = str(path._make_child_relpath(dirname)) + error._func = os.rmdir + on_error(error) + if path == self: + try: + os.rmdir(self, dir_fd=dir_fd) + except OSError as error: + if on_error is not None: + error.filename = str(self) + error._func = os.rmdir + on_error(error) _rmtree.avoids_symlink_attacks = True else: From c17d07c2d9aadfcc541849c14d76fa5b55f5f19b Mon Sep 17 00:00:00 2001 From: barneygale Date: Fri, 7 Apr 2023 21:51:20 +0100 Subject: [PATCH 21/26] Handle rmdir() and unlink() not accepting dir_fd --- Lib/pathlib.py | 159 +++++++++++++++++++++++++------------------------ 1 file changed, 80 insertions(+), 79 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 1b3ef78656e805..7e691f859211d9 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -1055,6 +1055,48 @@ def rmdir(self): """ os.rmdir(self) + # version vulnerable to race conditions + def _rmtree(self, *, on_error=None, ignore_errors=False, dir_fd=None): + """Recursively delete this directory tree.""" + if dir_fd is not None: + raise NotImplementedError("dir_fd unavailable on this platform") + if on_error is None and not ignore_errors: + def on_error(error): + raise + try: + if self.is_symlink() or self.is_junction(): + error = OSError("Cannot call rmtree on a symbolic link") + error.filename = str(self) + raise error + except OSError as error: + if on_error is not None: + error._func = os.path.islink + on_error(error) + # can't continue even if on_error hook returns + return + + walker = self.walk( + top_down=False, + follow_symlinks=False, + follow_junctions=False, + on_error=on_error) + for path, _, filenames in walker: + for filename in filenames: + try: + path._make_child_relpath(filename).unlink() + except OSError as error: + if on_error is not None: + error._func = os.unlink + on_error(error) + try: + path.rmdir() + except OSError as error: + if on_error is not None: + error._func = os.rmdir + on_error(error) + + _rmtree.avoids_symlink_attacks = False + def lstat(self): """ Like stat(), except if the path points to a symlink, the symlink's @@ -1304,86 +1346,45 @@ def _scandir_fwalk(self, follow_symlinks, actions, dir_fd, entry): raise error return os.scandir(fd), fd - # Version using fd-based APIs to protect against races - def _rmtree(self, *, on_error=None, ignore_errors=False, dir_fd=None): - """Recursively delete this directory tree.""" - if on_error is None and not ignore_errors: - def on_error(error): - raise - - walker = self._fwalk( - top_down=False, - follow_symlinks=False, - on_error=on_error, - dir_fd=dir_fd) - for path, dirnames, filenames, fd in walker: - for filename in filenames: - try: - os.unlink(filename, dir_fd=fd) - except OSError as error: - if on_error is not None: - error.filename = str(path._make_child_relpath(filename)) - error._func = os.unlink - on_error(error) - for dirname in dirnames: - try: - os.rmdir(dirname, dir_fd=fd) - except OSError as error: - if on_error is not None: - error.filename = str(path._make_child_relpath(dirname)) - error._func = os.rmdir - on_error(error) - if path == self: - try: - os.rmdir(self, dir_fd=dir_fd) - except OSError as error: - if on_error is not None: - error.filename = str(self) - error._func = os.rmdir - on_error(error) - _rmtree.avoids_symlink_attacks = True - - else: - # version vulnerable to race conditions - def _rmtree(self, *, on_error=None, ignore_errors=False, dir_fd=None): - """Recursively delete this directory tree.""" - if dir_fd is not None: - raise NotImplementedError("dir_fd unavailable on this platform") - if on_error is None and not ignore_errors: - def on_error(error): - raise - try: - if self.is_symlink() or self.is_junction(): - error = OSError("Cannot call rmtree on a symbolic link") - error.filename = str(self) - raise error - except OSError as error: - if on_error is not None: - error._func = os.path.islink - on_error(error) - # can't continue even if on_error hook returns - return + if {os.unlink, os.rmdir} <= os.supports_dir_fd: + # Version using fd-based APIs to protect against races + def _rmtree(self, *, on_error=None, ignore_errors=False, dir_fd=None): + """Recursively delete this directory tree.""" + if on_error is None and not ignore_errors: + def on_error(error): + raise - walker = self.walk( - top_down=False, - follow_symlinks=False, - follow_junctions=False, - on_error=on_error) - for path, _, filenames in walker: - for filename in filenames: - try: - path._make_child_relpath(filename).unlink() - except OSError as error: - if on_error is not None: - error._func = os.unlink - on_error(error) - try: - path.rmdir() - except OSError as error: - if on_error is not None: - error._func = os.rmdir - on_error(error) - _rmtree.avoids_symlink_attacks = False + walker = self._fwalk( + top_down=False, + follow_symlinks=False, + on_error=on_error, + dir_fd=dir_fd) + for path, dirnames, filenames, fd in walker: + for filename in filenames: + try: + os.unlink(filename, dir_fd=fd) + except OSError as error: + if on_error is not None: + error.filename = str(path._make_child_relpath(filename)) + error._func = os.unlink + on_error(error) + for dirname in dirnames: + try: + os.rmdir(dirname, dir_fd=fd) + except OSError as error: + if on_error is not None: + error.filename = str(path._make_child_relpath(dirname)) + error._func = os.rmdir + on_error(error) + if path == self: + try: + os.rmdir(self, dir_fd=dir_fd) + except OSError as error: + if on_error is not None: + error.filename = str(self) + error._func = os.rmdir + on_error(error) + _rmtree.avoids_symlink_attacks = True class PosixPath(Path, PurePosixPath): From b4f120c952b2e45bb84c6ed0d3ebddec88431806 Mon Sep 17 00:00:00 2001 From: barneygale Date: Wed, 12 Apr 2023 16:02:00 +0100 Subject: [PATCH 22/26] Move rmtree() implementation back into shutil. --- Lib/pathlib.py | 82 ------------------------------------------------ Lib/shutil.py | 84 ++++++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 74 insertions(+), 92 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 7e691f859211d9..0517db9a22a596 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -1055,48 +1055,6 @@ def rmdir(self): """ os.rmdir(self) - # version vulnerable to race conditions - def _rmtree(self, *, on_error=None, ignore_errors=False, dir_fd=None): - """Recursively delete this directory tree.""" - if dir_fd is not None: - raise NotImplementedError("dir_fd unavailable on this platform") - if on_error is None and not ignore_errors: - def on_error(error): - raise - try: - if self.is_symlink() or self.is_junction(): - error = OSError("Cannot call rmtree on a symbolic link") - error.filename = str(self) - raise error - except OSError as error: - if on_error is not None: - error._func = os.path.islink - on_error(error) - # can't continue even if on_error hook returns - return - - walker = self.walk( - top_down=False, - follow_symlinks=False, - follow_junctions=False, - on_error=on_error) - for path, _, filenames in walker: - for filename in filenames: - try: - path._make_child_relpath(filename).unlink() - except OSError as error: - if on_error is not None: - error._func = os.unlink - on_error(error) - try: - path.rmdir() - except OSError as error: - if on_error is not None: - error._func = os.rmdir - on_error(error) - - _rmtree.avoids_symlink_attacks = False - def lstat(self): """ Like stat(), except if the path points to a symlink, the symlink's @@ -1346,46 +1304,6 @@ def _scandir_fwalk(self, follow_symlinks, actions, dir_fd, entry): raise error return os.scandir(fd), fd - if {os.unlink, os.rmdir} <= os.supports_dir_fd: - # Version using fd-based APIs to protect against races - def _rmtree(self, *, on_error=None, ignore_errors=False, dir_fd=None): - """Recursively delete this directory tree.""" - if on_error is None and not ignore_errors: - def on_error(error): - raise - - walker = self._fwalk( - top_down=False, - follow_symlinks=False, - on_error=on_error, - dir_fd=dir_fd) - for path, dirnames, filenames, fd in walker: - for filename in filenames: - try: - os.unlink(filename, dir_fd=fd) - except OSError as error: - if on_error is not None: - error.filename = str(path._make_child_relpath(filename)) - error._func = os.unlink - on_error(error) - for dirname in dirnames: - try: - os.rmdir(dirname, dir_fd=fd) - except OSError as error: - if on_error is not None: - error.filename = str(path._make_child_relpath(dirname)) - error._func = os.rmdir - on_error(error) - if path == self: - try: - os.rmdir(self, dir_fd=dir_fd) - except OSError as error: - if on_error is not None: - error.filename = str(self) - error._func = os.rmdir - on_error(error) - _rmtree.avoids_symlink_attacks = True - class PosixPath(Path, PurePosixPath): """Path subclass for non-Windows systems. diff --git a/Lib/shutil.py b/Lib/shutil.py index bbc1abf6167579..c0258972a8aace 100644 --- a/Lib/shutil.py +++ b/Lib/shutil.py @@ -563,7 +563,56 @@ def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2, ignore_dangling_symlinks=ignore_dangling_symlinks, dirs_exist_ok=dirs_exist_ok) -_use_fd_functions = pathlib.Path._rmtree.avoids_symlink_attacks +# version vulnerable to race conditions +def _rmtree_unsafe(path, onexc): + try: + if path.is_symlink() or path.is_junction(): + raise OSError("Cannot call rmtree on a symbolic link") + except OSError as err: + onexc(os.path.islink, str(path), err) + return + + walker = path.walk( + top_down=False, + follow_symlinks=False, + follow_junctions=False, + on_error=lambda err: onexc(err._func, err.filename, err)) + for dirpath, _, filenames in walker: + for filename in filenames: + try: + dirpath._make_child_relpath(filename).unlink() + except OSError as err: + onexc(os.unlink, err.filename, err) + try: + dirpath.rmdir() + except OSError as err: + onexc(os.rmdir, err.filename, err) + +# Version using fd-based APIs to protect against races +def _rmtree_safe_fd(topfd, path, onexc): + walker = path._fwalk( + top_down=False, + follow_symlinks=False, + on_error=lambda err: onexc(err._func, err.filename, err), + dir_fd=topfd) + for dirpath, dirnames, filenames, fd in walker: + for filename in filenames: + try: + os.unlink(filename, dir_fd=fd) + except OSError as err: + onexc(os.unlink, str(dirpath._make_child_relpath(filename)), err) + for dirname in dirnames: + try: + os.rmdir(dirname, dir_fd=fd) + except OSError as err: + onexc(os.rmdir, str(dirpath._make_child_relpath(dirname)), err) + if dirpath == path: + try: + os.rmdir(path, dir_fd=topfd) + except OSError as err: + onexc(os.rmdir, str(path), err) + +_use_fd_functions = hasattr(pathlib.Path, '_fwalk') and {os.unlink, os.rmdir} <= os.supports_dir_fd def rmtree(path, ignore_errors=False, onerror=None, *, onexc=None, dir_fd=None): """Recursively delete a directory tree. @@ -591,17 +640,32 @@ def rmtree(path, ignore_errors=False, onerror=None, *, onexc=None, dir_fd=None): DeprecationWarning) sys.audit("shutil.rmtree", path, dir_fd) - handle_error = None - if onexc is not None: - def handle_error(error): - onexc(error._func, error.filename, error) - elif onerror is not None: - def handle_error(error): - exc_info = type(error), error, error.__traceback__ - onerror(error._func, error.filename, exc_info) + if ignore_errors: + def onexc(*args): + pass + elif onerror is None and onexc is None: + def onexc(*args): + raise + elif onexc is None: + if onerror is None: + def onexc(*args): + raise + else: + # delegate to onerror + def onexc(*args): + func, path, exc = args + if exc is None: + exc_info = None, None, None + else: + exc_info = type(exc), exc, exc.__traceback__ + return onerror(func, path, exc_info) if isinstance(path, bytes): path = os.fsdecode(path) - pathlib.Path(path)._rmtree(ignore_errors=ignore_errors, on_error=handle_error, dir_fd=dir_fd) + path = pathlib.Path(path) + if _use_fd_functions: + return _rmtree_safe_fd(dir_fd, path, onexc) + else: + return _rmtree_unsafe(path, onexc) # Allow introspection of whether or not the hardening against symlink # attacks is supported on the current platform From e856ccb910afeed2ae6289e49ffbe29bbb5fe327 Mon Sep 17 00:00:00 2001 From: barneygale Date: Wed, 12 Apr 2023 16:57:06 +0100 Subject: [PATCH 23/26] Restore missing dir_fd check --- Lib/shutil.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Lib/shutil.py b/Lib/shutil.py index c0258972a8aace..523fb8910ffec6 100644 --- a/Lib/shutil.py +++ b/Lib/shutil.py @@ -665,6 +665,8 @@ def onexc(*args): if _use_fd_functions: return _rmtree_safe_fd(dir_fd, path, onexc) else: + if dir_fd is not None: + raise NotImplementedError("dir_fd unavailable on this platform") return _rmtree_unsafe(path, onexc) # Allow introspection of whether or not the hardening against symlink From d1e349ca002759cf63a0a11df08aad301f695b08 Mon Sep 17 00:00:00 2001 From: barneygale Date: Wed, 12 Apr 2023 17:32:20 +0100 Subject: [PATCH 24/26] Remove unnecessary change to test --- Lib/test/test_shutil.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_shutil.py b/Lib/test/test_shutil.py index 6f82925a71e203..156c0a2c56c72c 100644 --- a/Lib/test/test_shutil.py +++ b/Lib/test/test_shutil.py @@ -562,14 +562,14 @@ def test_rmtree_uses_safe_fd_version_if_available(self): d = os.path.join(tmp_dir, 'a') os.mkdir(d) try: - real_fwalk = pathlib.Path._fwalk + real_rmtree = shutil._rmtree_safe_fd class Called(Exception): pass def _raiser(*args, **kwargs): raise Called - pathlib.Path._fwalk = _raiser + shutil._rmtree_safe_fd = _raiser self.assertRaises(Called, shutil.rmtree, d) finally: - pathlib.Path._fwalk = real_fwalk + shutil._rmtree_safe_fd = real_rmtree else: self.assertFalse(shutil._use_fd_functions) self.assertFalse(shutil.rmtree.avoids_symlink_attacks) From 63e55e1ff4fa80a837f118bd42a36651a5ad8388 Mon Sep 17 00:00:00 2001 From: barneygale Date: Wed, 12 Apr 2023 18:26:47 +0100 Subject: [PATCH 25/26] Reduce memory usage a bit. --- Lib/pathlib.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 0517db9a22a596..b4ac2d1e07e362 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -238,7 +238,8 @@ def _walk(top_down, on_error, follow_symlinks, follow_junctions, use_fd, actions follow_junctions or not entry.is_junction()): if not top_down: actions.append((_WalkAction.WALK, ( - path._make_child_relpath(entry.name), fd, entry))) + path._make_child_relpath(entry.name), fd, + entry if use_fd and not follow_symlinks else None))) dirnames.append(entry.name) else: filenames.append(entry.name) From f7c53525a10898574ff61f271a99d497805880c5 Mon Sep 17 00:00:00 2001 From: barneygale Date: Wed, 12 Apr 2023 18:27:52 +0100 Subject: [PATCH 26/26] Reduce diff --- Lib/shutil.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/Lib/shutil.py b/Lib/shutil.py index 523fb8910ffec6..d907c8a86ac38a 100644 --- a/Lib/shutil.py +++ b/Lib/shutil.py @@ -565,13 +565,6 @@ def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2, # version vulnerable to race conditions def _rmtree_unsafe(path, onexc): - try: - if path.is_symlink() or path.is_junction(): - raise OSError("Cannot call rmtree on a symbolic link") - except OSError as err: - onexc(os.path.islink, str(path), err) - return - walker = path.walk( top_down=False, follow_symlinks=False, @@ -667,6 +660,14 @@ def onexc(*args): else: if dir_fd is not None: raise NotImplementedError("dir_fd unavailable on this platform") + try: + if path.is_symlink() or path.is_junction(): + # symlinks to directories are forbidden, see bug #1669 + raise OSError("Cannot call rmtree on a symbolic link") + except OSError as err: + onexc(os.path.islink, str(path), err) + # can't continue even if onexc hook returns + return return _rmtree_unsafe(path, onexc) # Allow introspection of whether or not the hardening against symlink