From: Jason Xing - Split cq_lock into two smaller locks: cq_prod_lock and cq_cached_prod_lock - Avoid disabling/enabling interrupts in the hot xmit path In either xsk_cq_cancel_locked() or xsk_cq_reserve_locked() function, the race condition is only between multiple xsks sharing the same pool. They are all in the process context rather than interrupt context, so now the small lock named cq_cached_prod_lock can be used without handling interrupts. While cq_cached_prod_lock ensures the exclusive modification of @cached_prod, cq_prod_lock in xsk_cq_submit_addr_locked() only cares about @producer and corresponding @desc. Both of them don't necessarily be consistent with @cached_prod protected by cq_cached_prod_lock. That's the reason why the previous big lock can be split into two smaller ones. Frequently disabling and enabling interrupt are very time consuming in some cases, especially in a per-descriptor granularity, which now can be avoided after this optimization, even when the pool is shared by multiple xsks. Signed-off-by: Jason Xing --- include/net/xsk_buff_pool.h | 13 +++++++++---- net/xdp/xsk.c | 14 ++++++-------- net/xdp/xsk_buff_pool.c | 3 ++- 3 files changed, 17 insertions(+), 13 deletions(-) diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h index cac56e6b0869..92a2358c6ce3 100644 --- a/include/net/xsk_buff_pool.h +++ b/include/net/xsk_buff_pool.h @@ -85,11 +85,16 @@ struct xsk_buff_pool { bool unaligned; bool tx_sw_csum; void *addrs; - /* Mutual exclusion of the completion ring in the SKB mode. Two cases to protect: - * NAPI TX thread and sendmsg error paths in the SKB destructor callback and when - * sockets share a single cq when the same netdev and queue id is shared. + /* Mutual exclusion of the completion ring in the SKB mode. + * Protect: NAPI TX thread and sendmsg error paths in the SKB + * destructor callback. */ - spinlock_t cq_lock; + spinlock_t cq_prod_lock; + /* Mutual exclusion of the completion ring in the SKB mode. + * Protect: when sockets share a single cq when the same netdev + * and queue id is shared. + */ + spinlock_t cq_cached_prod_lock; struct xdp_buff_xsk *free_heads[]; }; diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 76f797fcc49c..d254817b8a53 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -549,14 +549,13 @@ static int xsk_wakeup(struct xdp_sock *xs, u8 flags) static int xsk_cq_reserve_locked(struct xsk_buff_pool *pool) { bool lock = !list_is_singular(&pool->xsk_tx_list); - unsigned long flags; int ret; if (lock) - spin_lock_irqsave(&pool->cq_lock, flags); + spin_lock(&pool->cq_cached_prod_lock); ret = xskq_prod_reserve(pool->cq); if (lock) - spin_unlock_irqrestore(&pool->cq_lock, flags); + spin_unlock(&pool->cq_cached_prod_lock); return ret; } @@ -569,7 +568,7 @@ static void xsk_cq_submit_addr_locked(struct xsk_buff_pool *pool, unsigned long flags; u32 idx; - spin_lock_irqsave(&pool->cq_lock, flags); + spin_lock_irqsave(&pool->cq_prod_lock, flags); idx = xskq_get_prod(pool->cq); xskq_prod_write_addr(pool->cq, idx, @@ -586,19 +585,18 @@ static void xsk_cq_submit_addr_locked(struct xsk_buff_pool *pool, } } xskq_prod_submit_n(pool->cq, descs_processed); - spin_unlock_irqrestore(&pool->cq_lock, flags); + spin_unlock_irqrestore(&pool->cq_prod_lock, flags); } static void xsk_cq_cancel_locked(struct xsk_buff_pool *pool, u32 n) { bool lock = !list_is_singular(&pool->xsk_tx_list); - unsigned long flags; if (lock) - spin_lock_irqsave(&pool->cq_lock, flags); + spin_lock(&pool->cq_cached_prod_lock); xskq_prod_cancel_n(pool->cq, n); if (lock) - spin_unlock_irqrestore(&pool->cq_lock, flags); + spin_unlock(&pool->cq_cached_prod_lock); } static void xsk_inc_num_desc(struct sk_buff *skb) diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index aa9788f20d0d..add44bd09cae 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -94,7 +94,8 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs, INIT_LIST_HEAD(&pool->xskb_list); INIT_LIST_HEAD(&pool->xsk_tx_list); spin_lock_init(&pool->xsk_tx_list_lock); - spin_lock_init(&pool->cq_lock); + spin_lock_init(&pool->cq_prod_lock); + spin_lock_init(&pool->cq_cached_prod_lock); refcount_set(&pool->users, 1); pool->fq = xs->fq_tmp; -- 2.41.3