Currently, the refill work is a global delayed work for all the receive queues. This commit makes the refill work a per receive queue so that we can manage them separately and avoid further mistakes. It also helps the successfully refilled queue avoid the napi_disable in the global delayed refill work like before. Signed-off-by: Bui Quang Minh --- drivers/net/virtio_net.c | 155 ++++++++++++++++++--------------------- 1 file changed, 72 insertions(+), 83 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 1bb3aeca66c6..63126e490bda 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -379,6 +379,15 @@ struct receive_queue { struct xdp_rxq_info xsk_rxq_info; struct xdp_buff **xsk_buffs; + + /* Is delayed refill enabled? */ + bool refill_enabled; + + /* The lock to synchronize the access to refill_enabled */ + spinlock_t refill_lock; + + /* Work struct for delayed refilling if we run low on memory. */ + struct delayed_work refill; }; #define VIRTIO_NET_RSS_MAX_KEY_SIZE 40 @@ -441,9 +450,6 @@ struct virtnet_info { /* Packet virtio header size */ u8 hdr_len; - /* Work struct for delayed refilling if we run low on memory. */ - struct delayed_work refill; - /* UDP tunnel support */ bool tx_tnl; @@ -451,12 +457,6 @@ struct virtnet_info { bool rx_tnl_csum; - /* Is delayed refill enabled? */ - bool refill_enabled; - - /* The lock to synchronize the access to refill_enabled */ - spinlock_t refill_lock; - /* Work struct for config space updates */ struct work_struct config_work; @@ -720,18 +720,18 @@ static void virtnet_rq_free_buf(struct virtnet_info *vi, put_page(virt_to_head_page(buf)); } -static void enable_delayed_refill(struct virtnet_info *vi) +static void enable_delayed_refill(struct receive_queue *rq) { - spin_lock_bh(&vi->refill_lock); - vi->refill_enabled = true; - spin_unlock_bh(&vi->refill_lock); + spin_lock_bh(&rq->refill_lock); + rq->refill_enabled = true; + spin_unlock_bh(&rq->refill_lock); } -static void disable_delayed_refill(struct virtnet_info *vi) +static void disable_delayed_refill(struct receive_queue *rq) { - spin_lock_bh(&vi->refill_lock); - vi->refill_enabled = false; - spin_unlock_bh(&vi->refill_lock); + spin_lock_bh(&rq->refill_lock); + rq->refill_enabled = false; + spin_unlock_bh(&rq->refill_lock); } static void enable_rx_mode_work(struct virtnet_info *vi) @@ -2950,38 +2950,19 @@ static void virtnet_napi_disable(struct receive_queue *rq) static void refill_work(struct work_struct *work) { - struct virtnet_info *vi = - container_of(work, struct virtnet_info, refill.work); + struct receive_queue *rq = + container_of(work, struct receive_queue, refill.work); bool still_empty; - int i; - - for (i = 0; i < vi->curr_queue_pairs; i++) { - struct receive_queue *rq = &vi->rq[i]; - /* - * When queue API support is added in the future and the call - * below becomes napi_disable_locked, this driver will need to - * be refactored. - * - * One possible solution would be to: - * - cancel refill_work with cancel_delayed_work (note: - * non-sync) - * - cancel refill_work with cancel_delayed_work_sync in - * virtnet_remove after the netdev is unregistered - * - wrap all of the work in a lock (perhaps the netdev - * instance lock) - * - check netif_running() and return early to avoid a race - */ - napi_disable(&rq->napi); - still_empty = !try_fill_recv(vi, rq, GFP_KERNEL); - virtnet_napi_do_enable(rq->vq, &rq->napi); + napi_disable(&rq->napi); + still_empty = !try_fill_recv(rq->vq->vdev->priv, rq, GFP_KERNEL); + virtnet_napi_do_enable(rq->vq, &rq->napi); - /* In theory, this can happen: if we don't get any buffers in - * we will *never* try to fill again. - */ - if (still_empty) - schedule_delayed_work(&vi->refill, HZ/2); - } + /* In theory, this can happen: if we don't get any buffers in + * we will *never* try to fill again. + */ + if (still_empty) + schedule_delayed_work(&rq->refill, HZ / 2); } static int virtnet_receive_xsk_bufs(struct virtnet_info *vi, @@ -3048,10 +3029,10 @@ static int virtnet_receive(struct receive_queue *rq, int budget, if (rq->vq->num_free > min((unsigned int)budget, virtqueue_get_vring_size(rq->vq)) / 2) { if (!try_fill_recv(vi, rq, GFP_ATOMIC)) { - spin_lock(&vi->refill_lock); - if (vi->refill_enabled) - schedule_delayed_work(&vi->refill, 0); - spin_unlock(&vi->refill_lock); + spin_lock(&rq->refill_lock); + if (rq->refill_enabled) + schedule_delayed_work(&rq->refill, 0); + spin_unlock(&rq->refill_lock); } } @@ -3226,13 +3207,13 @@ static int virtnet_open(struct net_device *dev) struct virtnet_info *vi = netdev_priv(dev); int i, err; - enable_delayed_refill(vi); - for (i = 0; i < vi->max_queue_pairs; i++) { - if (i < vi->curr_queue_pairs) + if (i < vi->curr_queue_pairs) { + enable_delayed_refill(&vi->rq[i]); /* Make sure we have some buffers: if oom use wq. */ if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL)) - schedule_delayed_work(&vi->refill, 0); + schedule_delayed_work(&vi->rq[i].refill, 0); + } err = virtnet_enable_queue_pair(vi, i); if (err < 0) @@ -3251,10 +3232,9 @@ static int virtnet_open(struct net_device *dev) return 0; err_enable_qp: - disable_delayed_refill(vi); - cancel_delayed_work_sync(&vi->refill); - for (i--; i >= 0; i--) { + disable_delayed_refill(&vi->rq[i]); + cancel_delayed_work_sync(&vi->rq[i].refill); virtnet_disable_queue_pair(vi, i); virtnet_cancel_dim(vi, &vi->rq[i].dim); } @@ -3447,14 +3427,15 @@ static void virtnet_rx_pause_all(struct virtnet_info *vi) { int i; - /* - * Make sure refill_work does not run concurrently to - * avoid napi_disable race which leads to deadlock. - */ - disable_delayed_refill(vi); - cancel_delayed_work_sync(&vi->refill); - for (i = 0; i < vi->max_queue_pairs; i++) + for (i = 0; i < vi->max_queue_pairs; i++) { + /* + * Make sure refill_work does not run concurrently to + * avoid napi_disable race which leads to deadlock. + */ + disable_delayed_refill(&vi->rq[i]); + cancel_delayed_work_sync(&vi->rq[i].refill); __virtnet_rx_pause(vi, &vi->rq[i]); + } } static void virtnet_rx_pause(struct virtnet_info *vi, struct receive_queue *rq) @@ -3463,8 +3444,8 @@ static void virtnet_rx_pause(struct virtnet_info *vi, struct receive_queue *rq) * Make sure refill_work does not run concurrently to * avoid napi_disable race which leads to deadlock. */ - disable_delayed_refill(vi); - cancel_delayed_work_sync(&vi->refill); + disable_delayed_refill(rq); + cancel_delayed_work_sync(&rq->refill); __virtnet_rx_pause(vi, rq); } @@ -3481,25 +3462,26 @@ static void __virtnet_rx_resume(struct virtnet_info *vi, virtnet_napi_enable(rq); if (schedule_refill) - schedule_delayed_work(&vi->refill, 0); + schedule_delayed_work(&rq->refill, 0); } static void virtnet_rx_resume_all(struct virtnet_info *vi) { int i; - enable_delayed_refill(vi); for (i = 0; i < vi->max_queue_pairs; i++) { - if (i < vi->curr_queue_pairs) + if (i < vi->curr_queue_pairs) { + enable_delayed_refill(&vi->rq[i]); __virtnet_rx_resume(vi, &vi->rq[i], true); - else + } else { __virtnet_rx_resume(vi, &vi->rq[i], false); + } } } static void virtnet_rx_resume(struct virtnet_info *vi, struct receive_queue *rq) { - enable_delayed_refill(vi); + enable_delayed_refill(rq); __virtnet_rx_resume(vi, rq, true); } @@ -3830,10 +3812,16 @@ static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs) succ: vi->curr_queue_pairs = queue_pairs; /* virtnet_open() will refill when device is going to up. */ - spin_lock_bh(&vi->refill_lock); - if (dev->flags & IFF_UP && vi->refill_enabled) - schedule_delayed_work(&vi->refill, 0); - spin_unlock_bh(&vi->refill_lock); + if (dev->flags & IFF_UP) { + int i; + + for (i = 0; i < vi->curr_queue_pairs; i++) { + spin_lock_bh(&vi->rq[i].refill_lock); + if (vi->rq[i].refill_enabled) + schedule_delayed_work(&vi->rq[i].refill, 0); + spin_unlock_bh(&vi->rq[i].refill_lock); + } + } return 0; } @@ -3843,10 +3831,6 @@ static int virtnet_close(struct net_device *dev) struct virtnet_info *vi = netdev_priv(dev); int i; - /* Make sure NAPI doesn't schedule refill work */ - disable_delayed_refill(vi); - /* Make sure refill_work doesn't re-enable napi! */ - cancel_delayed_work_sync(&vi->refill); /* Prevent the config change callback from changing carrier * after close */ @@ -3857,6 +3841,10 @@ static int virtnet_close(struct net_device *dev) cancel_work_sync(&vi->config_work); for (i = 0; i < vi->max_queue_pairs; i++) { + /* Make sure NAPI doesn't schedule refill work */ + disable_delayed_refill(&vi->rq[i]); + /* Make sure refill_work doesn't re-enable napi! */ + cancel_delayed_work_sync(&vi->rq[i].refill); virtnet_disable_queue_pair(vi, i); virtnet_cancel_dim(vi, &vi->rq[i].dim); } @@ -5802,7 +5790,6 @@ static int virtnet_restore_up(struct virtio_device *vdev) virtio_device_ready(vdev); - enable_delayed_refill(vi); enable_rx_mode_work(vi); if (netif_running(vi->dev)) { @@ -6559,8 +6546,9 @@ static int virtnet_alloc_queues(struct virtnet_info *vi) if (!vi->rq) goto err_rq; - INIT_DELAYED_WORK(&vi->refill, refill_work); for (i = 0; i < vi->max_queue_pairs; i++) { + INIT_DELAYED_WORK(&vi->rq[i].refill, refill_work); + spin_lock_init(&vi->rq[i].refill_lock); vi->rq[i].pages = NULL; netif_napi_add_config(vi->dev, &vi->rq[i].napi, virtnet_poll, i); @@ -6901,7 +6889,6 @@ static int virtnet_probe(struct virtio_device *vdev) INIT_WORK(&vi->config_work, virtnet_config_changed_work); INIT_WORK(&vi->rx_mode_work, virtnet_rx_mode_work); - spin_lock_init(&vi->refill_lock); if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) { vi->mergeable_rx_bufs = true; @@ -7165,7 +7152,9 @@ static int virtnet_probe(struct virtio_device *vdev) net_failover_destroy(vi->failover); free_vqs: virtio_reset_device(vdev); - cancel_delayed_work_sync(&vi->refill); + for (i = 0; i < vi->max_queue_pairs; i++) + cancel_delayed_work_sync(&vi->rq[i].refill); + free_receive_page_frags(vi); virtnet_del_vqs(vi); free: -- 2.43.0 Calling napi_disable() on an already disabled napi can cause the deadlock. Because the delayed refill work will call napi_disable(), we must ensure that refill work is only enabled and scheduled after we have enabled the rx queue's NAPI. Signed-off-by: Bui Quang Minh --- drivers/net/virtio_net.c | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 63126e490bda..8016d2b378cf 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -3208,16 +3208,31 @@ static int virtnet_open(struct net_device *dev) int i, err; for (i = 0; i < vi->max_queue_pairs; i++) { + bool schedule_refill = false; + + /* - We must call try_fill_recv before enabling napi of the same + * receive queue so that it doesn't race with the call in + * virtnet_receive. + * - We must enable and schedule delayed refill work only when + * we have enabled all the receive queue's napi. Otherwise, in + * refill_work, we have a deadlock when calling napi_disable on + * an already disabled napi. + */ if (i < vi->curr_queue_pairs) { - enable_delayed_refill(&vi->rq[i]); /* Make sure we have some buffers: if oom use wq. */ if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL)) - schedule_delayed_work(&vi->rq[i].refill, 0); + schedule_refill = true; } err = virtnet_enable_queue_pair(vi, i); if (err < 0) goto err_enable_qp; + + if (i < vi->curr_queue_pairs) { + enable_delayed_refill(&vi->rq[i]); + if (schedule_refill) + schedule_delayed_work(&vi->rq[i].refill, 0); + } } if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) { @@ -3456,11 +3471,16 @@ static void __virtnet_rx_resume(struct virtnet_info *vi, bool running = netif_running(vi->dev); bool schedule_refill = false; + /* See the comment in virtnet_open for the ordering rule + * of try_fill_recv, receive queue napi_enable and delayed + * refill enable/schedule. + */ if (refill && !try_fill_recv(vi, rq, GFP_KERNEL)) schedule_refill = true; if (running) virtnet_napi_enable(rq); + enable_delayed_refill(rq); if (schedule_refill) schedule_delayed_work(&rq->refill, 0); } @@ -3470,18 +3490,15 @@ static void virtnet_rx_resume_all(struct virtnet_info *vi) int i; for (i = 0; i < vi->max_queue_pairs; i++) { - if (i < vi->curr_queue_pairs) { - enable_delayed_refill(&vi->rq[i]); + if (i < vi->curr_queue_pairs) __virtnet_rx_resume(vi, &vi->rq[i], true); - } else { + else __virtnet_rx_resume(vi, &vi->rq[i], false); - } } } static void virtnet_rx_resume(struct virtnet_info *vi, struct receive_queue *rq) { - enable_delayed_refill(rq); __virtnet_rx_resume(vi, rq, true); } -- 2.43.0 As we need to move the enable_delayed_refill after napi_enable, it's possible that a refill work needs to be scheduled in virtnet_receive but it cannot. This can make the receive side stuck because if we don't have any receive buffers, there will be nothing trigger the refill logic. So in case it happens, in virtnet_receive, set the rx queue's refill_pending, then when the refill work is enabled again, a refill work will be scheduled. Signed-off-by: Bui Quang Minh --- drivers/net/virtio_net.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 8016d2b378cf..ddc62dab2f9a 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -383,6 +383,9 @@ struct receive_queue { /* Is delayed refill enabled? */ bool refill_enabled; + /* A refill work needs to be scheduled when delayed refill is enabled */ + bool refill_pending; + /* The lock to synchronize the access to refill_enabled */ spinlock_t refill_lock; @@ -720,10 +723,13 @@ static void virtnet_rq_free_buf(struct virtnet_info *vi, put_page(virt_to_head_page(buf)); } -static void enable_delayed_refill(struct receive_queue *rq) +static void enable_delayed_refill(struct receive_queue *rq, + bool schedule_refill) { spin_lock_bh(&rq->refill_lock); rq->refill_enabled = true; + if (rq->refill_pending || schedule_refill) + schedule_delayed_work(&rq->refill, 0); spin_unlock_bh(&rq->refill_lock); } @@ -3032,6 +3038,8 @@ static int virtnet_receive(struct receive_queue *rq, int budget, spin_lock(&rq->refill_lock); if (rq->refill_enabled) schedule_delayed_work(&rq->refill, 0); + else + rq->refill_pending = true; spin_unlock(&rq->refill_lock); } } @@ -3228,11 +3236,8 @@ static int virtnet_open(struct net_device *dev) if (err < 0) goto err_enable_qp; - if (i < vi->curr_queue_pairs) { - enable_delayed_refill(&vi->rq[i]); - if (schedule_refill) - schedule_delayed_work(&vi->rq[i].refill, 0); - } + if (i < vi->curr_queue_pairs) + enable_delayed_refill(&vi->rq[i], schedule_refill); } if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) { @@ -3480,9 +3485,7 @@ static void __virtnet_rx_resume(struct virtnet_info *vi, if (running) virtnet_napi_enable(rq); - enable_delayed_refill(rq); - if (schedule_refill) - schedule_delayed_work(&rq->refill, 0); + enable_delayed_refill(rq, schedule_refill); } static void virtnet_rx_resume_all(struct virtnet_info *vi) -- 2.43.0