Followup of commit c51da3f7a161 ("net: remove sock_i_uid()") A recent syzbot report was the trigger for this change. Over the years, we had many problems caused by the read_lock[_bh](&sk->sk_callback_lock) in sock_i_uid(). We could fix smc_diag_dump_proto() or make a more radical move: Instead of waiting for new syzbot reports, cache the socket inode number in sk->sk_ino, so that we no longer need to acquire sk->sk_callback_lock in sock_i_ino(). This makes socket dumps faster (one less cache line miss, and two atomic ops avoided). Prior art: commit 25a9c8a4431c ("netlink: Add __sock_i_ino() for __netlink_diag_dump().") commit 4f9bf2a2f5aa ("tcp: Don't acquire inet_listen_hashbucket::lock with disabled BH.") commit efc3dbc37412 ("rds: Make rds_sock_lock BH rather than IRQ safe.") Fixes: d2d6422f8bd1 ("x86: Allow to enable PREEMPT_RT.") Reported-by: syzbot+50603c05bbdf4dfdaffa@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/68b73804.050a0220.3db4df.01d8.GAE@google.com/T/#u Signed-off-by: Eric Dumazet Cc: Sebastian Andrzej Siewior Cc: Kuniyuki Iwashima --- include/net/sock.h | 17 +++++++++++++---- net/core/sock.c | 22 ---------------------- net/mptcp/protocol.c | 1 - net/netlink/diag.c | 2 +- 4 files changed, 14 insertions(+), 28 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index c8a4b283df6f..fb13322a11fc 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -285,6 +285,7 @@ struct sk_filter; * @sk_ack_backlog: current listen backlog * @sk_max_ack_backlog: listen backlog set in listen() * @sk_uid: user id of owner + * @sk_ino: inode number (zero if orphaned) * @sk_prefer_busy_poll: prefer busypolling over softirq processing * @sk_busy_poll_budget: napi processing budget when busypolling * @sk_priority: %SO_PRIORITY setting @@ -518,6 +519,7 @@ struct sock { u32 sk_ack_backlog; u32 sk_max_ack_backlog; kuid_t sk_uid; + unsigned long sk_ino; spinlock_t sk_peer_lock; int sk_bind_phc; struct pid *sk_peer_pid; @@ -2056,6 +2058,10 @@ static inline int sk_rx_queue_get(const struct sock *sk) static inline void sk_set_socket(struct sock *sk, struct socket *sock) { sk->sk_socket = sock; + if (sock) { + WRITE_ONCE(sk->sk_uid, SOCK_INODE(sock)->i_uid); + WRITE_ONCE(sk->sk_ino, SOCK_INODE(sock)->i_ino); + } } static inline wait_queue_head_t *sk_sleep(struct sock *sk) @@ -2077,6 +2083,7 @@ static inline void sock_orphan(struct sock *sk) sk_set_socket(sk, NULL); sk->sk_wq = NULL; /* Note: sk_uid is unchanged. */ + WRITE_ONCE(sk->sk_ino, 0); write_unlock_bh(&sk->sk_callback_lock); } @@ -2087,20 +2094,22 @@ static inline void sock_graft(struct sock *sk, struct socket *parent) rcu_assign_pointer(sk->sk_wq, &parent->wq); parent->sk = sk; sk_set_socket(sk, parent); - WRITE_ONCE(sk->sk_uid, SOCK_INODE(parent)->i_uid); security_sock_graft(sk, parent); write_unlock_bh(&sk->sk_callback_lock); } +static inline unsigned long sock_i_ino(const struct sock *sk) +{ + /* Paired with WRITE_ONCE() in sock_graft() and sock_orphan() */ + return READ_ONCE(sk->sk_ino); +} + static inline kuid_t sk_uid(const struct sock *sk) { /* Paired with WRITE_ONCE() in sockfs_setattr() */ return READ_ONCE(sk->sk_uid); } -unsigned long __sock_i_ino(struct sock *sk); -unsigned long sock_i_ino(struct sock *sk); - static inline kuid_t sock_net_uid(const struct net *net, const struct sock *sk) { return sk ? sk_uid(sk) : make_kuid(net->user_ns, 0); diff --git a/net/core/sock.c b/net/core/sock.c index 7c26ec8dce63..158bddd23134 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2780,28 +2780,6 @@ void sock_pfree(struct sk_buff *skb) EXPORT_SYMBOL(sock_pfree); #endif /* CONFIG_INET */ -unsigned long __sock_i_ino(struct sock *sk) -{ - unsigned long ino; - - read_lock(&sk->sk_callback_lock); - ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0; - read_unlock(&sk->sk_callback_lock); - return ino; -} -EXPORT_SYMBOL(__sock_i_ino); - -unsigned long sock_i_ino(struct sock *sk) -{ - unsigned long ino; - - local_bh_disable(); - ino = __sock_i_ino(sk); - local_bh_enable(); - return ino; -} -EXPORT_SYMBOL(sock_i_ino); - /* * Allocate a skb from the socket's send buffer. */ diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 9a287b75c1b3..e6fd97b21e9e 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -3554,7 +3554,6 @@ void mptcp_sock_graft(struct sock *sk, struct socket *parent) write_lock_bh(&sk->sk_callback_lock); rcu_assign_pointer(sk->sk_wq, &parent->wq); sk_set_socket(sk, parent); - WRITE_ONCE(sk->sk_uid, SOCK_INODE(parent)->i_uid); write_unlock_bh(&sk->sk_callback_lock); } diff --git a/net/netlink/diag.c b/net/netlink/diag.c index 61981e01fd6f..b8e58132e8af 100644 --- a/net/netlink/diag.c +++ b/net/netlink/diag.c @@ -168,7 +168,7 @@ static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, - __sock_i_ino(sk)) < 0) { + sock_i_ino(sk)) < 0) { ret = 1; break; } -- 2.51.0.338.gd7d06c2dae-goog