Skip to content

Commit 2ed25aa

Browse files
ohartoovrleon
authored andcommitted
IB/mlx5: Fix potential deadlock in MR deregistration
The issue arises when kzalloc() is invoked while holding umem_mutex or any other lock acquired under umem_mutex. This is problematic because kzalloc() can trigger fs_reclaim_aqcuire(), which may, in turn, invoke mmu_notifier_invalidate_range_start(). This function can lead to mlx5_ib_invalidate_range(), which attempts to acquire umem_mutex again, resulting in a deadlock. The problematic flow: CPU0 | CPU1 ---------------------------------------|------------------------------------------------ mlx5_ib_dereg_mr() | → revoke_mr() | → mutex_lock(&umem_odp->umem_mutex) | | mlx5_mkey_cache_init() | → mutex_lock(&dev->cache.rb_lock) | → mlx5r_cache_create_ent_locked() | → kzalloc(GFP_KERNEL) | → fs_reclaim() | → mmu_notifier_invalidate_range_start() | → mlx5_ib_invalidate_range() | → mutex_lock(&umem_odp->umem_mutex) → cache_ent_find_and_store() | → mutex_lock(&dev->cache.rb_lock) | Additionally, when kzalloc() is called from within cache_ent_find_and_store(), we encounter the same deadlock due to re-acquisition of umem_mutex. Solve by releasing umem_mutex in dereg_mr() after umr_revoke_mr() and before acquiring rb_lock. This ensures that we don't hold umem_mutex while performing memory allocations that could trigger the reclaim path. This change prevents the deadlock by ensuring proper lock ordering and avoiding holding locks during memory allocation operations that could trigger the reclaim path. The following lockdep warning demonstrates the deadlock: python3/20557 is trying to acquire lock: ffff888387542128 (&umem_odp->umem_mutex){+.+.}-{4:4}, at: mlx5_ib_invalidate_range+0x5b/0x550 [mlx5_ib] but task is already holding lock: ffffffff82f6b840 (mmu_notifier_invalidate_range_start){+.+.}-{0:0}, at: unmap_vmas+0x7b/0x1a0 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #3 (mmu_notifier_invalidate_range_start){+.+.}-{0:0}: fs_reclaim_acquire+0x60/0xd0 mem_cgroup_css_alloc+0x6f/0x9b0 cgroup_init_subsys+0xa4/0x240 cgroup_init+0x1c8/0x510 start_kernel+0x747/0x760 x86_64_start_reservations+0x25/0x30 x86_64_start_kernel+0x73/0x80 common_startup_64+0x129/0x138 -> #2 (fs_reclaim){+.+.}-{0:0}: fs_reclaim_acquire+0x91/0xd0 __kmalloc_cache_noprof+0x4d/0x4c0 mlx5r_cache_create_ent_locked+0x75/0x620 [mlx5_ib] mlx5_mkey_cache_init+0x186/0x360 [mlx5_ib] mlx5_ib_stage_post_ib_reg_umr_init+0x3c/0x60 [mlx5_ib] __mlx5_ib_add+0x4b/0x190 [mlx5_ib] mlx5r_probe+0xd9/0x320 [mlx5_ib] auxiliary_bus_probe+0x42/0x70 really_probe+0xdb/0x360 __driver_probe_device+0x8f/0x130 driver_probe_device+0x1f/0xb0 __driver_attach+0xd4/0x1f0 bus_for_each_dev+0x79/0xd0 bus_add_driver+0xf0/0x200 driver_register+0x6e/0xc0 __auxiliary_driver_register+0x6a/0xc0 do_one_initcall+0x5e/0x390 do_init_module+0x88/0x240 init_module_from_file+0x85/0xc0 idempotent_init_module+0x104/0x300 __x64_sys_finit_module+0x68/0xc0 do_syscall_64+0x6d/0x140 entry_SYSCALL_64_after_hwframe+0x4b/0x53 -> #1 (&dev->cache.rb_lock){+.+.}-{4:4}: __mutex_lock+0x98/0xf10 __mlx5_ib_dereg_mr+0x6f2/0x890 [mlx5_ib] mlx5_ib_dereg_mr+0x21/0x110 [mlx5_ib] ib_dereg_mr_user+0x85/0x1f0 [ib_core] uverbs_free_mr+0x19/0x30 [ib_uverbs] destroy_hw_idr_uobject+0x21/0x80 [ib_uverbs] uverbs_destroy_uobject+0x60/0x3d0 [ib_uverbs] uobj_destroy+0x57/0xa0 [ib_uverbs] ib_uverbs_cmd_verbs+0x4d5/0x1210 [ib_uverbs] ib_uverbs_ioctl+0x129/0x230 [ib_uverbs] __x64_sys_ioctl+0x596/0xaa0 do_syscall_64+0x6d/0x140 entry_SYSCALL_64_after_hwframe+0x4b/0x53 -> #0 (&umem_odp->umem_mutex){+.+.}-{4:4}: __lock_acquire+0x1826/0x2f00 lock_acquire+0xd3/0x2e0 __mutex_lock+0x98/0xf10 mlx5_ib_invalidate_range+0x5b/0x550 [mlx5_ib] __mmu_notifier_invalidate_range_start+0x18e/0x1f0 unmap_vmas+0x182/0x1a0 exit_mmap+0xf3/0x4a0 mmput+0x3a/0x100 do_exit+0x2b9/0xa90 do_group_exit+0x32/0xa0 get_signal+0xc32/0xcb0 arch_do_signal_or_restart+0x29/0x1d0 syscall_exit_to_user_mode+0x105/0x1d0 do_syscall_64+0x79/0x140 entry_SYSCALL_64_after_hwframe+0x4b/0x53 Chain exists of: &dev->cache.rb_lock --> mmu_notifier_invalidate_range_start --> &umem_odp->umem_mutex Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&umem_odp->umem_mutex); lock(mmu_notifier_invalidate_range_start); lock(&umem_odp->umem_mutex); lock(&dev->cache.rb_lock); *** DEADLOCK *** Fixes: abb604a ("RDMA/mlx5: Fix a race for an ODP MR which leads to CQE with error") Signed-off-by: Or Har-Toov <[email protected]> Reviewed-by: Michael Guralnik <[email protected]> Link: https://patch.msgid.link/3c8f225a8a9fade647d19b014df1172544643e4a.1750061612.git.leon@kernel.org Signed-off-by: Leon Romanovsky <[email protected]>
1 parent 8edab8a commit 2ed25aa

File tree

1 file changed

+47
-14
lines changed
  • drivers/infiniband/hw/mlx5

1 file changed

+47
-14
lines changed

drivers/infiniband/hw/mlx5/mr.c

Lines changed: 47 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2027,23 +2027,50 @@ void mlx5_ib_revoke_data_direct_mrs(struct mlx5_ib_dev *dev)
20272027
}
20282028
}
20292029

2030-
static int mlx5_revoke_mr(struct mlx5_ib_mr *mr)
2030+
static int mlx5_umr_revoke_mr_with_lock(struct mlx5_ib_mr *mr)
20312031
{
2032-
struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
2033-
struct mlx5_cache_ent *ent = mr->mmkey.cache_ent;
2034-
bool is_odp = is_odp_mr(mr);
20352032
bool is_odp_dma_buf = is_dmabuf_mr(mr) &&
2036-
!to_ib_umem_dmabuf(mr->umem)->pinned;
2037-
bool from_cache = !!ent;
2038-
int ret = 0;
2033+
!to_ib_umem_dmabuf(mr->umem)->pinned;
2034+
bool is_odp = is_odp_mr(mr);
2035+
int ret;
20392036

20402037
if (is_odp)
20412038
mutex_lock(&to_ib_umem_odp(mr->umem)->umem_mutex);
20422039

20432040
if (is_odp_dma_buf)
2044-
dma_resv_lock(to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv, NULL);
2041+
dma_resv_lock(to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv,
2042+
NULL);
2043+
2044+
ret = mlx5r_umr_revoke_mr(mr);
2045+
2046+
if (is_odp) {
2047+
if (!ret)
2048+
to_ib_umem_odp(mr->umem)->private = NULL;
2049+
mutex_unlock(&to_ib_umem_odp(mr->umem)->umem_mutex);
2050+
}
2051+
2052+
if (is_odp_dma_buf) {
2053+
if (!ret)
2054+
to_ib_umem_dmabuf(mr->umem)->private = NULL;
2055+
dma_resv_unlock(
2056+
to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv);
2057+
}
20452058

2046-
if (mr->mmkey.cacheable && !mlx5r_umr_revoke_mr(mr) && !cache_ent_find_and_store(dev, mr)) {
2059+
return ret;
2060+
}
2061+
2062+
static int mlx5r_handle_mkey_cleanup(struct mlx5_ib_mr *mr)
2063+
{
2064+
bool is_odp_dma_buf = is_dmabuf_mr(mr) &&
2065+
!to_ib_umem_dmabuf(mr->umem)->pinned;
2066+
struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
2067+
struct mlx5_cache_ent *ent = mr->mmkey.cache_ent;
2068+
bool is_odp = is_odp_mr(mr);
2069+
bool from_cache = !!ent;
2070+
int ret;
2071+
2072+
if (mr->mmkey.cacheable && !mlx5_umr_revoke_mr_with_lock(mr) &&
2073+
!cache_ent_find_and_store(dev, mr)) {
20472074
ent = mr->mmkey.cache_ent;
20482075
/* upon storing to a clean temp entry - schedule its cleanup */
20492076
spin_lock_irq(&ent->mkeys_queue.lock);
@@ -2055,7 +2082,7 @@ static int mlx5_revoke_mr(struct mlx5_ib_mr *mr)
20552082
ent->tmp_cleanup_scheduled = true;
20562083
}
20572084
spin_unlock_irq(&ent->mkeys_queue.lock);
2058-
goto out;
2085+
return 0;
20592086
}
20602087

20612088
if (ent) {
@@ -2064,8 +2091,14 @@ static int mlx5_revoke_mr(struct mlx5_ib_mr *mr)
20642091
mr->mmkey.cache_ent = NULL;
20652092
spin_unlock_irq(&ent->mkeys_queue.lock);
20662093
}
2094+
2095+
if (is_odp)
2096+
mutex_lock(&to_ib_umem_odp(mr->umem)->umem_mutex);
2097+
2098+
if (is_odp_dma_buf)
2099+
dma_resv_lock(to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv,
2100+
NULL);
20672101
ret = destroy_mkey(dev, mr);
2068-
out:
20692102
if (is_odp) {
20702103
if (!ret)
20712104
to_ib_umem_odp(mr->umem)->private = NULL;
@@ -2075,9 +2108,9 @@ static int mlx5_revoke_mr(struct mlx5_ib_mr *mr)
20752108
if (is_odp_dma_buf) {
20762109
if (!ret)
20772110
to_ib_umem_dmabuf(mr->umem)->private = NULL;
2078-
dma_resv_unlock(to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv);
2111+
dma_resv_unlock(
2112+
to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv);
20792113
}
2080-
20812114
return ret;
20822115
}
20832116

@@ -2126,7 +2159,7 @@ static int __mlx5_ib_dereg_mr(struct ib_mr *ibmr)
21262159
}
21272160

21282161
/* Stop DMA */
2129-
rc = mlx5_revoke_mr(mr);
2162+
rc = mlx5r_handle_mkey_cleanup(mr);
21302163
if (rc)
21312164
return rc;
21322165

0 commit comments

Comments
 (0)