Move kernel_sock_shutdown() out of the tx_lock critical section to break the circular lock dependency: tx_lock -> sk_lock (via inet_shutdown) vs the reverse order in the block I/O submission path. The lockdep splat shows: sk_lock-AF_INET6 --> &cmd->lock --> &nsock->tx_lock When recv_work() detects a connection failure, it calls nbd_mark_nsock_dead() under tx_lock, which calls kernel_sock_shutdown() -> inet_shutdown() -> lock_sock(), creating the tx_lock -> sk_lock dependency that conflicts with the reverse order in the I/O path. Fix this by introducing a deferred shutdown mechanism: nbd_mark_nsock_dead() records the socket needing shutdown in nsock->shutdown_sock (with an extra file reference to prevent use-after-free from the reconnect path), and the actual kernel_sock_shutdown() is performed by nbd_nsock_deferred_shutdown() after tx_lock is released at each call site. The helper uses xchg() to guarantee the shutdown is executed exactly once even when multiple paths race to consume it. Fixes: f3733247ae7c ("nbd: handle single path failures gracefully") Reported-by: syzbot+576095eed5658cbd9b63@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=576095eed5658cbd9b63 Signed-off-by: Yun Zhou --- drivers/block/nbd.c | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 8f10762e90ef..497f3bbe5795 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -58,6 +58,7 @@ struct nbd_sock { struct socket *sock; struct mutex tx_lock; struct request *pending; + struct socket *shutdown_sock; int sent; bool dead; int fallback_index; @@ -315,7 +316,14 @@ static void nbd_mark_nsock_dead(struct nbd_device *nbd, struct nbd_sock *nsock, } } if (!nsock->dead) { - kernel_sock_shutdown(nsock->sock, SHUT_RDWR); + /* + * Defer shutdown to after tx_lock is released to avoid + * circular lock dependency (tx_lock -> sk_lock). + * Hold an extra file reference so the socket remains + * valid until the deferred shutdown completes. + */ + nsock->shutdown_sock = nsock->sock; + get_file(nsock->sock->file); if (atomic_dec_return(&nbd->config->live_connections) == 0) { if (test_and_clear_bit(NBD_RT_DISCONNECT_REQUESTED, &nbd->config->runtime_flags)) { @@ -331,6 +339,20 @@ static void nbd_mark_nsock_dead(struct nbd_device *nbd, struct nbd_sock *nsock, nsock->sent = 0; } +/* + * Perform deferred socket shutdown outside of tx_lock. + * Uses xchg to guarantee only one caller performs the shutdown. + */ +static void nbd_nsock_deferred_shutdown(struct nbd_sock *nsock) +{ + struct socket *sock = xchg(&nsock->shutdown_sock, NULL); + + if (sock) { + kernel_sock_shutdown(sock, SHUT_RDWR); + sockfd_put(sock); + } +} + static int nbd_set_size(struct nbd_device *nbd, loff_t bytesize, loff_t blksize) { struct queue_limits lim; @@ -410,6 +432,7 @@ static void sock_shutdown(struct nbd_device *nbd) mutex_lock(&nsock->tx_lock); nbd_mark_nsock_dead(nbd, nsock, 0); mutex_unlock(&nsock->tx_lock); + nbd_nsock_deferred_shutdown(nsock); } dev_warn(disk_to_dev(nbd->disk), "shutting down sockets\n"); } @@ -502,6 +525,7 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req) if (cmd->cookie == nsock->cookie) nbd_mark_nsock_dead(nbd, nsock, 1); mutex_unlock(&nsock->tx_lock); + nbd_nsock_deferred_shutdown(nsock); } nbd_requeue_cmd(cmd); mutex_unlock(&cmd->lock); @@ -836,6 +860,7 @@ static void nbd_pending_cmd_work(struct work_struct *work) wait_ms *= 2; } mutex_unlock(&nsock->tx_lock); + nbd_nsock_deferred_shutdown(nsock); clear_bit(NBD_CMD_PARTIAL_SEND, &cmd->flags); out: mutex_unlock(&cmd->lock); @@ -1020,6 +1045,7 @@ static void recv_work(struct work_struct *work) mutex_lock(&nsock->tx_lock); nbd_mark_nsock_dead(nbd, nsock, 1); mutex_unlock(&nsock->tx_lock); + nbd_nsock_deferred_shutdown(nsock); atomic_dec(&config->recv_threads); wake_up(&config->recv_wq); @@ -1177,6 +1203,7 @@ static blk_status_t nbd_handle_cmd(struct nbd_cmd *cmd, int index) ret = nbd_send_cmd(nbd, cmd, index); out: mutex_unlock(&nsock->tx_lock); + nbd_nsock_deferred_shutdown(nsock); nbd_config_put(nbd); return ret; } @@ -1391,6 +1418,8 @@ static int nbd_reconnect_socket(struct nbd_device *nbd, unsigned long arg) args->nsock = nsock; nsock->cookie++; mutex_unlock(&nsock->tx_lock); + /* Complete any pending shutdown of the old socket */ + nbd_nsock_deferred_shutdown(nsock); sockfd_put(old); clear_bit(NBD_RT_DISCONNECTED, &config->runtime_flags); -- 2.43.0