From: Menglong Dong For now, we use sk_busy_loop() for both rx and tx path. The sk_busy_loop() will call napi_busy_loop() for the specified napi_id. However, some nic drivers have tx napi, such as virtio-net. In this case, sk_busy_loop() doesn't work, as it can only schedule the NAPI for the rx queue. Therefore, introduce sk_tx_busy_loop() for the nic drivers that support tx napi, which will schedule the tx napi if available. Signed-off-by: Menglong Dong --- include/linux/netdevice.h | 1 + include/net/busy_poll.h | 41 ++++++++++++++++++++++++++++++++++++--- net/core/dev.c | 26 +++++++------------------ 3 files changed, 46 insertions(+), 22 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 0e1e581efc5a..8a771b014d54 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -416,6 +416,7 @@ struct napi_struct { int napi_rmap_idx; int index; struct napi_config *config; + struct napi_struct *tx_napi; }; enum { diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h index 6e172d0f6ef5..0959e80272c7 100644 --- a/include/net/busy_poll.h +++ b/include/net/busy_poll.h @@ -33,6 +33,12 @@ static inline bool napi_id_valid(unsigned int napi_id) #ifdef CONFIG_NET_RX_BUSY_POLL +enum { + NAPI_F_PREFER_BUSY_POLL = 1, + NAPI_F_END_ON_RESCHED = 2, + NAPI_F_TX_NAPI = 4, +}; + struct napi_struct; extern unsigned int sysctl_net_busy_read __read_mostly; extern unsigned int sysctl_net_busy_poll __read_mostly; @@ -49,9 +55,9 @@ static inline bool sk_can_busy_loop(const struct sock *sk) bool sk_busy_loop_end(void *p, unsigned long start_time); -void napi_busy_loop(unsigned int napi_id, - bool (*loop_end)(void *, unsigned long), - void *loop_end_arg, bool prefer_busy_poll, u16 budget); +void __napi_busy_loop(unsigned int napi_id, + bool (*loop_end)(void *, unsigned long), + void *loop_end_arg, unsigned int flags, u16 budget); void napi_busy_loop_rcu(unsigned int napi_id, bool (*loop_end)(void *, unsigned long), @@ -60,6 +66,17 @@ void napi_busy_loop_rcu(unsigned int napi_id, void napi_suspend_irqs(unsigned int napi_id); void napi_resume_irqs(unsigned int napi_id); +static inline void napi_busy_loop(unsigned int napi_id, + bool (*loop_end)(void *, unsigned long), + void *loop_end_arg, bool prefer_busy_poll, u16 budget) +{ + unsigned int flags = prefer_busy_poll ? NAPI_F_PREFER_BUSY_POLL : 0; + + rcu_read_lock(); + __napi_busy_loop(napi_id, loop_end, loop_end_arg, flags, budget); + rcu_read_unlock(); +} + #else /* CONFIG_NET_RX_BUSY_POLL */ static inline unsigned long net_busy_loop_on(void) { @@ -126,6 +143,24 @@ static inline void sk_busy_loop(struct sock *sk, int nonblock) #endif } +static inline void sk_tx_busy_loop(struct sock *sk, int nonblock) +{ +#ifdef CONFIG_NET_RX_BUSY_POLL + unsigned int napi_id = READ_ONCE(sk->sk_napi_id); + unsigned int flags = NAPI_F_TX_NAPI; + + if (READ_ONCE(sk->sk_prefer_busy_poll)) + flags |= NAPI_F_PREFER_BUSY_POLL; + + if (napi_id_valid(napi_id)) { + rcu_read_lock(); + __napi_busy_loop(napi_id, nonblock ? NULL : sk_busy_loop_end, sk, flags, + READ_ONCE(sk->sk_busy_poll_budget) ?: BUSY_POLL_BUDGET); + rcu_read_unlock(); + } +#endif +} + /* used in the NIC receive handler to mark the skb */ static inline void __skb_mark_napi_id(struct sk_buff *skb, const struct gro_node *gro) diff --git a/net/core/dev.c b/net/core/dev.c index 0c6c270d9f7d..645a2e851918 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6878,11 +6878,6 @@ static void __busy_poll_stop(struct napi_struct *napi, unsigned long timeout) HRTIMER_MODE_REL_PINNED); } -enum { - NAPI_F_PREFER_BUSY_POLL = 1, - NAPI_F_END_ON_RESCHED = 2, -}; - static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock, unsigned flags, u16 budget) { @@ -6932,9 +6927,9 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock, local_bh_enable(); } -static void __napi_busy_loop(unsigned int napi_id, +void __napi_busy_loop(unsigned int napi_id, bool (*loop_end)(void *, unsigned long), - void *loop_end_arg, unsigned flags, u16 budget) + void *loop_end_arg, unsigned int flags, u16 budget) { unsigned long start_time = loop_end ? busy_loop_current_time() : 0; int (*napi_poll)(struct napi_struct *napi, int budget); @@ -6951,6 +6946,9 @@ static void __napi_busy_loop(unsigned int napi_id, if (!napi) return; + if ((flags & NAPI_F_TX_NAPI) && napi->tx_napi) + napi = napi->tx_napi; + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) preempt_disable(); for (;;) { @@ -7015,6 +7013,7 @@ static void __napi_busy_loop(unsigned int napi_id, if (!IS_ENABLED(CONFIG_PREEMPT_RT)) preempt_enable(); } +EXPORT_SYMBOL(__napi_busy_loop); void napi_busy_loop_rcu(unsigned int napi_id, bool (*loop_end)(void *, unsigned long), @@ -7028,18 +7027,6 @@ void napi_busy_loop_rcu(unsigned int napi_id, __napi_busy_loop(napi_id, loop_end, loop_end_arg, flags, budget); } -void napi_busy_loop(unsigned int napi_id, - bool (*loop_end)(void *, unsigned long), - void *loop_end_arg, bool prefer_busy_poll, u16 budget) -{ - unsigned flags = prefer_busy_poll ? NAPI_F_PREFER_BUSY_POLL : 0; - - rcu_read_lock(); - __napi_busy_loop(napi_id, loop_end, loop_end_arg, flags, budget); - rcu_read_unlock(); -} -EXPORT_SYMBOL(napi_busy_loop); - void napi_suspend_irqs(unsigned int napi_id) { struct napi_struct *napi; @@ -7579,6 +7566,7 @@ void netif_napi_add_weight_locked(struct net_device *dev, napi->poll_owner = -1; #endif napi->list_owner = -1; + napi->tx_napi = NULL; set_bit(NAPI_STATE_SCHED, &napi->state); set_bit(NAPI_STATE_NPSVC, &napi->state); netif_napi_dev_list_add(dev, napi); -- 2.54.0