UDP TX packets destructor is sock_wfree(). It suffers from a cache line bouncing in sock_def_write_space_wfree(). Instead of reading sk->sk_wmem_alloc after we just did an atomic RMW on it, use __refcount_sub_and_test() to get the old value for free, and pass the new value to sock_def_write_space_wfree(). Add __sock_writeable() helper. Signed-off-by: Eric Dumazet --- include/net/sock.h | 6 +++++- net/core/sock.c | 14 ++++++++------ 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 30ac2eb4ef9bf73743e3dc9e66c6c3059f34964e..7d9bfaaff913d3bce8d0a12df8987db96ee2bad6 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2591,12 +2591,16 @@ static inline struct page_frag *sk_page_frag(struct sock *sk) bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag); +static inline bool __sock_writeable(const struct sock *sk, int wmem_alloc) +{ + return wmem_alloc < (READ_ONCE(sk->sk_sndbuf) >> 1); +} /* * Default write policy as shown to user space via poll/select/SIGIO */ static inline bool sock_writeable(const struct sock *sk) { - return refcount_read(&sk->sk_wmem_alloc) < (READ_ONCE(sk->sk_sndbuf) >> 1); + return __sock_writeable(sk, refcount_read(&sk->sk_wmem_alloc)); } static inline gfp_t gfp_any(void) diff --git a/net/core/sock.c b/net/core/sock.c index 08ae20069b6d287745800710192396f76c8781b4..0ca3566cff83a8e6ee37e60a37a5a6f533203d0f 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -155,7 +155,7 @@ static DEFINE_MUTEX(proto_list_mutex); static LIST_HEAD(proto_list); -static void sock_def_write_space_wfree(struct sock *sk); +static void sock_def_write_space_wfree(struct sock *sk, int wmem_alloc); static void sock_def_write_space(struct sock *sk); /** @@ -2648,16 +2648,18 @@ EXPORT_SYMBOL_GPL(sk_setup_caps); */ void sock_wfree(struct sk_buff *skb) { - struct sock *sk = skb->sk; unsigned int len = skb->truesize; + struct sock *sk = skb->sk; bool free; + int old; if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE)) { if (sock_flag(sk, SOCK_RCU_FREE) && sk->sk_write_space == sock_def_write_space) { rcu_read_lock(); - free = refcount_sub_and_test(len, &sk->sk_wmem_alloc); - sock_def_write_space_wfree(sk); + free = __refcount_sub_and_test(len, &sk->sk_wmem_alloc, + &old); + sock_def_write_space_wfree(sk, old - len); rcu_read_unlock(); if (unlikely(free)) __sk_free(sk); @@ -3589,12 +3591,12 @@ static void sock_def_write_space(struct sock *sk) * for SOCK_RCU_FREE sockets under RCU read section and after putting * ->sk_wmem_alloc. */ -static void sock_def_write_space_wfree(struct sock *sk) +static void sock_def_write_space_wfree(struct sock *sk, int wmem_alloc) { /* Do not wake up a writer until he can make "significant" * progress. --DaveM */ - if (sock_writeable(sk)) { + if (__sock_writeable(sk, wmem_alloc)) { struct socket_wq *wq = rcu_dereference(sk->sk_wq); /* rely on refcount_sub from sock_wfree() */ -- 2.51.0.858.gf9c4a03a3a-goog