Skip to content

GH-73991: Add pathlib.Path.copy() #119058

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 19 commits into from
Jun 14, 2024
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions Doc/library/pathlib.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1477,6 +1477,21 @@ example because the path doesn't exist).
available. In previous versions, :exc:`NotImplementedError` was raised.


.. method:: Path.copy(target, *, follow_symlinks=True)

Copy the contents of this file to the *target* file. If *target* specifies
a file that already exists, it will be replaced.

If *follow_symlinks* is false, and this file is a symbolic link, *target*
will be created as a symbolic link. If *follow_symlinks* is true and this
file is a symbolic link, *target* will be a copy of the symlink target.

This method doesn't guarantee whether permissions, ownership, and other
metadata is copied; it may vary by operating system and Python version.

.. versionadded:: 3.14


.. method:: Path.rename(target)

Rename this file or directory to the given *target*, and return a new Path
Expand Down
6 changes: 6 additions & 0 deletions Doc/whatsnew/3.14.rst
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,12 @@ ast
Added :func:`ast.compare` for comparing two ASTs.
(Contributed by Batuhan Taskaya and Jeremy Hylton in :issue:`15987`.)

pathlib
-------

* Add :meth:`pathlib.Path.copy`, which copies the content of one file to
another, like :func:`shutil.copyfile`.
(Contributed by Barney Gale in :gh:`73991`.)


Optimizations
Expand Down
153 changes: 152 additions & 1 deletion Lib/pathlib/_abc.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,19 @@
"""

import functools
import os
import sys
from glob import _Globber, _no_recurse_symlinks
from errno import ENOTDIR, ELOOP
from errno import ENOTDIR, ELOOP, EBADF, EOPNOTSUPP, ETXTBSY, EXDEV
from stat import S_ISDIR, S_ISLNK, S_ISREG, S_ISSOCK, S_ISBLK, S_ISCHR, S_ISFIFO
try:
import fcntl
except ImportError:
fcntl = None
try:
import posix
except ImportError:
posix = None


__all__ = ["UnsupportedOperation"]
Expand All @@ -25,6 +35,88 @@ def _is_case_sensitive(parser):
return parser.normcase('Aa') == 'Aa'


def _get_copy_blocksize(infd):
"""Determine blocksize for fastcopying on Linux.
Hopefully the whole file will be copied in a single call.
The copying itself should be performed in a loop 'till EOF is
reached (0 return) so a blocksize smaller or bigger than the actual
file size should not make any difference, also in case the file
content changes while being copied.
"""
try:
blocksize = max(os.fstat(infd).st_size, 2 ** 23) # min 8 MiB
except OSError:
blocksize = 2 ** 27 # 128 MiB
# On 32-bit architectures truncate to 1 GiB to avoid OverflowError,
# see gh-82500.
if sys.maxsize < 2 ** 32:
blocksize = min(blocksize, 2 ** 30)
return blocksize


if fcntl and hasattr(fcntl, 'FICLONE'):
def _clonefd(source_fd, target_fd):
"""
Perform a lightweight copy of two files, where the data blocks are
copied only when modified. This is known as Copy on Write (CoW),
instantaneous copy or reflink.
"""
fcntl.ioctl(target_fd, fcntl.FICLONE, source_fd)
else:
_clonefd = None


if posix and hasattr(posix, '_fcopyfile'):
def _copyfd(source_fd, target_fd):
"""
Copy a regular file content using high-performance fcopyfile(3)
syscall (macOS).
"""
posix._fcopyfile(source_fd, target_fd, posix._COPYFILE_DATA)
elif hasattr(os, 'copy_file_range'):
def _copyfd(source_fd, target_fd):
"""
Copy data from one regular mmap-like fd to another by using a
high-performance copy_file_range(2) syscall that gives filesystems
an opportunity to implement the use of reflinks or server-side
copy.
This should work on Linux >= 4.5 only.
"""
blocksize = _get_copy_blocksize(source_fd)
offset = 0
while True:
sent = os.copy_file_range(source_fd, target_fd, blocksize,
offset_dst=offset)
if sent == 0:
break # EOF
offset += sent
elif hasattr(os, 'sendfile'):
def _copyfd(source_fd, target_fd):
"""Copy data from one regular mmap-like fd to another by using
high-performance sendfile(2) syscall.
This should work on Linux >= 2.6.33 only.
"""
blocksize = _get_copy_blocksize(source_fd)
offset = 0
while True:
sent = os.sendfile(target_fd, source_fd, offset, blocksize)
if sent == 0:
break # EOF
offset += sent
else:
_copyfd = None


def _copyfileobj(source_f, target_f):
"""
Copy data from file-like object source_f to file-like object target_f.
"""
read_source = source_f.read
write_target = target_f.write
while buf := read_source(1024 * 1024):
write_target(buf)


class UnsupportedOperation(NotImplementedError):
"""An exception that is raised when an unsupported operation is called on
a path object.
Expand Down Expand Up @@ -535,6 +627,15 @@ def samefile(self, other_path):
return (st.st_ino == other_st.st_ino and
st.st_dev == other_st.st_dev)

def _samefile_safe(self, other_path):
"""
Like samefile(), but returns False rather than raising OSError.
"""
try:
return self.samefile(other_path)
except (OSError, ValueError):
return False

def open(self, mode='r', buffering=-1, encoding=None,
errors=None, newline=None):
"""
Expand Down Expand Up @@ -783,6 +884,56 @@ def mkdir(self, mode=0o777, parents=False, exist_ok=False):
"""
raise UnsupportedOperation(self._unsupported_msg('mkdir()'))

def copy(self, target, follow_symlinks=True):
"""
Copy the contents of this file to the given target. If this file is a
symlink and follow_symlinks is false, a symlink will be created at the
target.
"""
if not isinstance(target, PathBase):
target = self.with_segments(target)
if self._samefile_safe(target):
raise OSError(f"{self!r} and {target!r} are the same file")
if not follow_symlinks and self.is_symlink():
target.symlink_to(self.readlink())
return
with self.open('rb') as source_f:
try:
with target.open('wb') as target_f:
try:
source_fd = source_f.fileno()
target_fd = target_f.fileno()
except Exception:
return _copyfileobj(source_f, target_f)
try:
# Use OS copy-on-write where available.
if _clonefd:
try:
return _clonefd(source_fd, target_fd)
except OSError as err:
if err.errno not in (EBADF, EOPNOTSUPP, ETXTBSY, EXDEV):
raise err

# Use OS copy where available.
if _copyfd:
return _copyfd(source_fd, target_fd)

# Last resort: copy between file objects.
return _copyfileobj(source_f, target_f)
except OSError as err:
# Produce more useful error messages.
err.filename = str(self)
err.filename2 = str(target)
raise err

except IsADirectoryError as e:
if not target.exists():
# Raise a less confusing exception.
raise FileNotFoundError(
f'Directory does not exist: {target}') from e
else:
raise

def rename(self, target):
"""
Rename this path to the target path.
Expand Down
17 changes: 17 additions & 0 deletions Lib/pathlib/_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@
import grp
except ImportError:
grp = None
try:
import _winapi
except ImportError:
_winapi = None

from ._abc import UnsupportedOperation, PurePathBase, PathBase

Expand Down Expand Up @@ -780,6 +784,19 @@ def mkdir(self, mode=0o777, parents=False, exist_ok=False):
if not exist_ok or not self.is_dir():
raise

if hasattr(_winapi, 'CopyFile2'):
def copy(self, target, follow_symlinks=True):
"""
Copy the contents of this file to the given target. If this file is a
symlink and follow_symlinks is false, a symlink will be created at the
target.
"""
if isinstance(target, PathBase) and not isinstance(target, Path):
return PathBase.copy(self, target, follow_symlinks=follow_symlinks)

flags = 0 if follow_symlinks else _winapi.COPY_FILE_COPY_SYMLINK
_winapi.CopyFile2(os.fspath(self), os.fspath(target), flags)

def chmod(self, mode, *, follow_symlinks=True):
"""
Change the permissions of the path, like os.chmod().
Expand Down
63 changes: 63 additions & 0 deletions Lib/test/test_pathlib/test_pathlib_abc.py
Original file line number Diff line number Diff line change
Expand Up @@ -1696,6 +1696,69 @@ def test_write_text_with_newlines(self):
self.assertEqual((p / 'fileA').read_bytes(),
b'abcde' + os_linesep_byte + b'fghlk' + os_linesep_byte + b'\rmnopq')

def test_copy_file(self):
base = self.cls(self.base)
source = base / 'fileA'
target = base / 'copyA'
source.copy(target)
self.assertTrue(target.exists())
self.assertEqual(source.read_text(), target.read_text())

def test_copy_directory(self):
base = self.cls(self.base)
source = base / 'dirA'
target = base / 'copyA'
with self.assertRaises(OSError):
source.copy(target)

@needs_symlinks
def test_copy_symlink_follow_symlinks_true(self):
base = self.cls(self.base)
source = base / 'linkA'
target = base / 'copyA'
source.copy(target)
self.assertTrue(target.exists())
self.assertFalse(target.is_symlink())
self.assertEqual(source.read_text(), target.read_text())

@needs_symlinks
def test_copy_symlink_follow_symlinks_false(self):
base = self.cls(self.base)
source = base / 'linkA'
target = base / 'copyA'
source.copy(target, follow_symlinks=False)
self.assertTrue(target.exists())
self.assertTrue(target.is_symlink())
self.assertEqual(source.readlink(), target.readlink())

def test_copy_to_existing_file(self):
base = self.cls(self.base)
source = base / 'fileA'
target = base / 'dirB' / 'fileB'
source.copy(target)
self.assertTrue(target.exists())
self.assertEqual(source.read_text(), target.read_text())

def test_copy_to_existing_directory(self):
base = self.cls(self.base)
source = base / 'fileA'
target = base / 'dirA'
with self.assertRaises(OSError):
source.copy(target)

@needs_symlinks
def test_copy_to_existing_symlink(self):
base = self.cls(self.base)
source = base / 'dirB' / 'fileB'
target = base / 'linkA'
real_target = base / 'fileA'
source.copy(target)
self.assertTrue(target.exists())
self.assertTrue(target.is_symlink())
self.assertTrue(real_target.exists())
self.assertFalse(real_target.is_symlink())
self.assertEqual(source.read_text(), real_target.read_text())

def test_iterdir(self):
P = self.cls
p = P(self.base)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Add :meth:`pathlib.Path.copy`, which copies the content of one file to another,
like :func:`shutil.copyfile`.