Skip to content

Commit bde708f

Browse files
apopple-nvidiaakpm00
authored andcommitted
fs/dax: always remove DAX page-cache entries when breaking layouts
Prior to any truncation operations file systems call dax_break_mapping() to ensure pages in the range are not under going DMA. Later DAX page-cache entries will be removed by truncate_folio_batch_exceptionals() in the generic page-cache code. However this makes it possible for folios to be removed from the page-cache even though they are still DMA busy if the file-system hasn't called dax_break_mapping(). It also means they can never be waited on in future because FS DAX will lose track of them once the page-cache entry has been deleted. Instead it is better to delete the FS DAX entry when the file-system calls dax_break_mapping() as part of it's truncate operation. This ensures only idle pages can be removed from the FS DAX page-cache and makes it easy to detect if a file-system hasn't called dax_break_mapping() prior to a truncate operation. Link: https://lkml.kernel.org/r/3be6115eaaa8d28fee37fcba3287be4f226a7d24.1740713401.git-series.apopple@nvidia.com Signed-off-by: Alistair Popple <[email protected]> Reviewed-by: Dan Williams <[email protected]> Tested-by: Alison Schofield <[email protected]> Cc: Alexander Gordeev <[email protected]> Cc: Asahi Lina <[email protected]> Cc: Balbir Singh <[email protected]> Cc: Bjorn Helgaas <[email protected]> Cc: Catalin Marinas <[email protected]> Cc: Christian Borntraeger <[email protected]> Cc: Christoph Hellwig <[email protected]> Cc: Chunyan Zhang <[email protected]> Cc: "Darrick J. Wong" <[email protected]> Cc: Dave Chinner <[email protected]> Cc: Dave Hansen <[email protected]> Cc: Dave Jiang <[email protected]> Cc: David Hildenbrand <[email protected]> Cc: Gerald Schaefer <[email protected]> Cc: Heiko Carstens <[email protected]> Cc: Huacai Chen <[email protected]> Cc: Ira Weiny <[email protected]> Cc: Jan Kara <[email protected]> Cc: Jason Gunthorpe <[email protected]> Cc: Jason Gunthorpe <[email protected]> Cc: John Hubbard <[email protected]> Cc: linmiaohe <[email protected]> Cc: Logan Gunthorpe <[email protected]> Cc: Matthew Wilcow (Oracle) <[email protected]> Cc: Michael "Camp Drill Sergeant" Ellerman <[email protected]> Cc: Nicholas Piggin <[email protected]> Cc: Peter Xu <[email protected]> Cc: Sven Schnelle <[email protected]> Cc: Ted Ts'o <[email protected]> Cc: Vasily Gorbik <[email protected]> Cc: Vishal Verma <[email protected]> Cc: Vivek Goyal <[email protected]> Cc: WANG Xuerui <[email protected]> Cc: Will Deacon <[email protected]> Signed-off-by: Andrew Morton <[email protected]>
1 parent d5b3afe commit bde708f

File tree

4 files changed

+59
-4
lines changed

4 files changed

+59
-4
lines changed

fs/dax.c

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -846,6 +846,36 @@ int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index)
846846
return ret;
847847
}
848848

849+
void dax_delete_mapping_range(struct address_space *mapping,
850+
loff_t start, loff_t end)
851+
{
852+
void *entry;
853+
pgoff_t start_idx = start >> PAGE_SHIFT;
854+
pgoff_t end_idx;
855+
XA_STATE(xas, &mapping->i_pages, start_idx);
856+
857+
/* If end == LLONG_MAX, all pages from start to till end of file */
858+
if (end == LLONG_MAX)
859+
end_idx = ULONG_MAX;
860+
else
861+
end_idx = end >> PAGE_SHIFT;
862+
863+
xas_lock_irq(&xas);
864+
xas_for_each(&xas, entry, end_idx) {
865+
if (!xa_is_value(entry))
866+
continue;
867+
entry = wait_entry_unlocked_exclusive(&xas, entry);
868+
if (!entry)
869+
continue;
870+
dax_disassociate_entry(entry, mapping, true);
871+
xas_store(&xas, NULL);
872+
mapping->nrpages -= 1UL << dax_entry_order(entry);
873+
put_unlocked_entry(&xas, entry, WAKE_ALL);
874+
}
875+
xas_unlock_irq(&xas);
876+
}
877+
EXPORT_SYMBOL_GPL(dax_delete_mapping_range);
878+
849879
static int wait_page_idle(struct page *page,
850880
void (cb)(struct inode *),
851881
struct inode *inode)
@@ -857,6 +887,9 @@ static int wait_page_idle(struct page *page,
857887
/*
858888
* Unmaps the inode and waits for any DMA to complete prior to deleting the
859889
* DAX mapping entries for the range.
890+
*
891+
* For NOWAIT behavior, pass @cb as NULL to early-exit on first found
892+
* busy page
860893
*/
861894
int dax_break_layout(struct inode *inode, loff_t start, loff_t end,
862895
void (cb)(struct inode *))
@@ -871,10 +904,17 @@ int dax_break_layout(struct inode *inode, loff_t start, loff_t end,
871904
page = dax_layout_busy_page_range(inode->i_mapping, start, end);
872905
if (!page)
873906
break;
907+
if (!cb) {
908+
error = -ERESTARTSYS;
909+
break;
910+
}
874911

875912
error = wait_page_idle(page, cb, inode);
876913
} while (error == 0);
877914

915+
if (!page)
916+
dax_delete_mapping_range(inode->i_mapping, start, end);
917+
878918
return error;
879919
}
880920
EXPORT_SYMBOL_GPL(dax_break_layout);

fs/xfs/xfs_inode.c

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2735,7 +2735,6 @@ xfs_mmaplock_two_inodes_and_break_dax_layout(
27352735
struct xfs_inode *ip2)
27362736
{
27372737
int error;
2738-
struct page *page;
27392738

27402739
if (ip1->i_ino > ip2->i_ino)
27412740
swap(ip1, ip2);
@@ -2759,8 +2758,8 @@ xfs_mmaplock_two_inodes_and_break_dax_layout(
27592758
* need to unlock & lock the XFS_MMAPLOCK_EXCL which is not suitable
27602759
* for this nested lock case.
27612760
*/
2762-
page = dax_layout_busy_page(VFS_I(ip2)->i_mapping);
2763-
if (!dax_page_is_idle(page)) {
2761+
error = dax_break_layout(VFS_I(ip2), 0, -1, NULL);
2762+
if (error) {
27642763
xfs_iunlock(ip2, XFS_MMAPLOCK_EXCL);
27652764
xfs_iunlock(ip1, XFS_MMAPLOCK_EXCL);
27662765
goto again;

include/linux/dax.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,8 @@ vm_fault_t dax_iomap_fault(struct vm_fault *vmf, unsigned int order,
255255
vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf,
256256
unsigned int order, pfn_t pfn);
257257
int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index);
258+
void dax_delete_mapping_range(struct address_space *mapping,
259+
loff_t start, loff_t end);
258260
int dax_invalidate_mapping_entry_sync(struct address_space *mapping,
259261
pgoff_t index);
260262
int __must_check dax_break_layout(struct inode *inode, loff_t start,

mm/truncate.c

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,8 +78,22 @@ static void truncate_folio_batch_exceptionals(struct address_space *mapping,
7878

7979
if (dax_mapping(mapping)) {
8080
for (i = j; i < nr; i++) {
81-
if (xa_is_value(fbatch->folios[i]))
81+
if (xa_is_value(fbatch->folios[i])) {
82+
/*
83+
* File systems should already have called
84+
* dax_break_layout_entry() to remove all DAX
85+
* entries while holding a lock to prevent
86+
* establishing new entries. Therefore we
87+
* shouldn't find any here.
88+
*/
89+
WARN_ON_ONCE(1);
90+
91+
/*
92+
* Delete the mapping so truncate_pagecache()
93+
* doesn't loop forever.
94+
*/
8295
dax_delete_mapping_entry(mapping, indices[i]);
96+
}
8397
}
8498
goto out;
8599
}

0 commit comments

Comments
 (0)