Skip to content

Commit 711741f

Browse files
pcacjrSteve French
authored andcommitted
smb: client: fix potential deadlock when reconnecting channels
Fix cifs_signal_cifsd_for_reconnect() to take the correct lock order and prevent the following deadlock from happening ====================================================== WARNING: possible circular locking dependency detected 6.16.0-rc3-build2+ #1301 Tainted: G S W ------------------------------------------------------ cifsd/6055 is trying to acquire lock: ffff88810ad56038 (&tcp_ses->srv_lock){+.+.}-{3:3}, at: cifs_signal_cifsd_for_reconnect+0x134/0x200 but task is already holding lock: ffff888119c64330 (&ret_buf->chan_lock){+.+.}-{3:3}, at: cifs_signal_cifsd_for_reconnect+0xcf/0x200 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #2 (&ret_buf->chan_lock){+.+.}-{3:3}: validate_chain+0x1cf/0x270 __lock_acquire+0x60e/0x780 lock_acquire.part.0+0xb4/0x1f0 _raw_spin_lock+0x2f/0x40 cifs_setup_session+0x81/0x4b0 cifs_get_smb_ses+0x771/0x900 cifs_mount_get_session+0x7e/0x170 cifs_mount+0x92/0x2d0 cifs_smb3_do_mount+0x161/0x460 smb3_get_tree+0x55/0x90 vfs_get_tree+0x46/0x180 do_new_mount+0x1b0/0x2e0 path_mount+0x6ee/0x740 do_mount+0x98/0xe0 __do_sys_mount+0x148/0x180 do_syscall_64+0xa4/0x260 entry_SYSCALL_64_after_hwframe+0x76/0x7e -> #1 (&ret_buf->ses_lock){+.+.}-{3:3}: validate_chain+0x1cf/0x270 __lock_acquire+0x60e/0x780 lock_acquire.part.0+0xb4/0x1f0 _raw_spin_lock+0x2f/0x40 cifs_match_super+0x101/0x320 sget+0xab/0x270 cifs_smb3_do_mount+0x1e0/0x460 smb3_get_tree+0x55/0x90 vfs_get_tree+0x46/0x180 do_new_mount+0x1b0/0x2e0 path_mount+0x6ee/0x740 do_mount+0x98/0xe0 __do_sys_mount+0x148/0x180 do_syscall_64+0xa4/0x260 entry_SYSCALL_64_after_hwframe+0x76/0x7e -> #0 (&tcp_ses->srv_lock){+.+.}-{3:3}: check_noncircular+0x95/0xc0 check_prev_add+0x115/0x2f0 validate_chain+0x1cf/0x270 __lock_acquire+0x60e/0x780 lock_acquire.part.0+0xb4/0x1f0 _raw_spin_lock+0x2f/0x40 cifs_signal_cifsd_for_reconnect+0x134/0x200 __cifs_reconnect+0x8f/0x500 cifs_handle_standard+0x112/0x280 cifs_demultiplex_thread+0x64d/0xbc0 kthread+0x2f7/0x310 ret_from_fork+0x2a/0x230 ret_from_fork_asm+0x1a/0x30 other info that might help us debug this: Chain exists of: &tcp_ses->srv_lock --> &ret_buf->ses_lock --> &ret_buf->chan_lock Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&ret_buf->chan_lock); lock(&ret_buf->ses_lock); lock(&ret_buf->chan_lock); lock(&tcp_ses->srv_lock); *** DEADLOCK *** 3 locks held by cifsd/6055: #0: ffffffff857de398 (&cifs_tcp_ses_lock){+.+.}-{3:3}, at: cifs_signal_cifsd_for_reconnect+0x7b/0x200 #1: ffff888119c64060 (&ret_buf->ses_lock){+.+.}-{3:3}, at: cifs_signal_cifsd_for_reconnect+0x9c/0x200 #2: ffff888119c64330 (&ret_buf->chan_lock){+.+.}-{3:3}, at: cifs_signal_cifsd_for_reconnect+0xcf/0x200 Cc: [email protected] Reported-by: David Howells <[email protected]> Fixes: d7d7a66 ("cifs: avoid use of global locks for high contention data") Reviewed-by: David Howells <[email protected]> Tested-by: David Howells <[email protected]> Signed-off-by: Paulo Alcantara (Red Hat) <[email protected]> Signed-off-by: David Howells <[email protected]> Signed-off-by: Steve French <[email protected]>
1 parent e97f954 commit 711741f

File tree

2 files changed

+37
-22
lines changed

2 files changed

+37
-22
lines changed

fs/smb/client/cifsglob.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -709,6 +709,7 @@ inc_rfc1001_len(void *buf, int count)
709709
struct TCP_Server_Info {
710710
struct list_head tcp_ses_list;
711711
struct list_head smb_ses_list;
712+
struct list_head rlist; /* reconnect list */
712713
spinlock_t srv_lock; /* protect anything here that is not protected */
713714
__u64 conn_id; /* connection identifier (useful for debugging) */
714715
int srv_count; /* reference counter */

fs/smb/client/connect.c

Lines changed: 36 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,14 @@ static void smb2_query_server_interfaces(struct work_struct *work)
124124
(SMB_INTERFACE_POLL_INTERVAL * HZ));
125125
}
126126

127+
#define set_need_reco(server) \
128+
do { \
129+
spin_lock(&server->srv_lock); \
130+
if (server->tcpStatus != CifsExiting) \
131+
server->tcpStatus = CifsNeedReconnect; \
132+
spin_unlock(&server->srv_lock); \
133+
} while (0)
134+
127135
/*
128136
* Update the tcpStatus for the server.
129137
* This is used to signal the cifsd thread to call cifs_reconnect
@@ -137,39 +145,45 @@ void
137145
cifs_signal_cifsd_for_reconnect(struct TCP_Server_Info *server,
138146
bool all_channels)
139147
{
140-
struct TCP_Server_Info *pserver;
148+
struct TCP_Server_Info *nserver;
141149
struct cifs_ses *ses;
150+
LIST_HEAD(reco);
142151
int i;
143152

144-
/* If server is a channel, select the primary channel */
145-
pserver = SERVER_IS_CHAN(server) ? server->primary_server : server;
146-
147153
/* if we need to signal just this channel */
148154
if (!all_channels) {
149-
spin_lock(&server->srv_lock);
150-
if (server->tcpStatus != CifsExiting)
151-
server->tcpStatus = CifsNeedReconnect;
152-
spin_unlock(&server->srv_lock);
155+
set_need_reco(server);
153156
return;
154157
}
155158

156-
spin_lock(&cifs_tcp_ses_lock);
157-
list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) {
158-
if (cifs_ses_exiting(ses))
159-
continue;
160-
spin_lock(&ses->chan_lock);
161-
for (i = 0; i < ses->chan_count; i++) {
162-
if (!ses->chans[i].server)
159+
if (SERVER_IS_CHAN(server))
160+
server = server->primary_server;
161+
scoped_guard(spinlock, &cifs_tcp_ses_lock) {
162+
set_need_reco(server);
163+
list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
164+
spin_lock(&ses->ses_lock);
165+
if (ses->ses_status == SES_EXITING) {
166+
spin_unlock(&ses->ses_lock);
163167
continue;
164-
165-
spin_lock(&ses->chans[i].server->srv_lock);
166-
if (ses->chans[i].server->tcpStatus != CifsExiting)
167-
ses->chans[i].server->tcpStatus = CifsNeedReconnect;
168-
spin_unlock(&ses->chans[i].server->srv_lock);
168+
}
169+
spin_lock(&ses->chan_lock);
170+
for (i = 1; i < ses->chan_count; i++) {
171+
nserver = ses->chans[i].server;
172+
if (!nserver)
173+
continue;
174+
nserver->srv_count++;
175+
list_add(&nserver->rlist, &reco);
176+
}
177+
spin_unlock(&ses->chan_lock);
178+
spin_unlock(&ses->ses_lock);
169179
}
170-
spin_unlock(&ses->chan_lock);
171180
}
172-
spin_unlock(&cifs_tcp_ses_lock);
181+
182+
list_for_each_entry_safe(server, nserver, &reco, rlist) {
183+
list_del_init(&server->rlist);
184+
set_need_reco(server);
185+
cifs_put_tcp_session(server, 0);
186+
}
173187
}
174188

175189
/*

0 commit comments

Comments
 (0)