From: David Wei When a process in a container wants to setup a memory provider, it will use the virtual netdev and a mapped rxq, and call net_mp_{open,close}_rxq to try and restart the queue. At this point, proxy the queue restart on the real rxq in the physical netdev. For memory providers (io_uring zero-copy rx and devmem), it causes the real rxq in the physical netdev to be filled from a memory provider that has DMA mapped memory from a process within a container. Signed-off-by: David Wei Co-developed-by: Daniel Borkmann Signed-off-by: Daniel Borkmann --- include/net/page_pool/memory_provider.h | 4 +- net/core/netdev_rx_queue.c | 57 +++++++++++++++++-------- 2 files changed, 41 insertions(+), 20 deletions(-) diff --git a/include/net/page_pool/memory_provider.h b/include/net/page_pool/memory_provider.h index ada4f968960a..b6f811c3416b 100644 --- a/include/net/page_pool/memory_provider.h +++ b/include/net/page_pool/memory_provider.h @@ -23,12 +23,12 @@ bool net_mp_niov_set_dma_addr(struct net_iov *niov, dma_addr_t addr); void net_mp_niov_set_page_pool(struct page_pool *pool, struct net_iov *niov); void net_mp_niov_clear_page_pool(struct net_iov *niov); -int net_mp_open_rxq(struct net_device *dev, unsigned ifq_idx, +int net_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx, struct pp_memory_provider_params *p); int __net_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx, const struct pp_memory_provider_params *p, struct netlink_ext_ack *extack); -void net_mp_close_rxq(struct net_device *dev, unsigned ifq_idx, +void net_mp_close_rxq(struct net_device *dev, unsigned int rxq_idx, struct pp_memory_provider_params *old_p); void __net_mp_close_rxq(struct net_device *dev, unsigned int rxq_idx, const struct pp_memory_provider_params *old_p); diff --git a/net/core/netdev_rx_queue.c b/net/core/netdev_rx_queue.c index 686a6300df78..7d25a6014926 100644 --- a/net/core/netdev_rx_queue.c +++ b/net/core/netdev_rx_queue.c @@ -168,48 +168,63 @@ int __net_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx, struct netlink_ext_ack *extack) { struct netdev_rx_queue *rxq; + bool needs_unlock = false; int ret; if (!netdev_need_ops_lock(dev)) return -EOPNOTSUPP; - if (rxq_idx >= dev->real_num_rx_queues) { NL_SET_ERR_MSG(extack, "rx queue index out of range"); return -ERANGE; } - rxq_idx = array_index_nospec(rxq_idx, dev->real_num_rx_queues); + rxq_idx = array_index_nospec(rxq_idx, dev->real_num_rx_queues); + rxq = netif_get_rx_queue_peer_locked(&dev, &rxq_idx, &needs_unlock); + if (!rxq) { + NL_SET_ERR_MSG(extack, "rx queue peered to a virtual netdev"); + return -EBUSY; + } + if (!dev->dev.parent) { + NL_SET_ERR_MSG(extack, "rx queue is mapped to a virtual netdev"); + ret = -EBUSY; + goto out; + } if (dev->cfg->hds_config != ETHTOOL_TCP_DATA_SPLIT_ENABLED) { NL_SET_ERR_MSG(extack, "tcp-data-split is disabled"); - return -EINVAL; + ret = -EINVAL; + goto out; } if (dev->cfg->hds_thresh) { NL_SET_ERR_MSG(extack, "hds-thresh is not zero"); - return -EINVAL; + ret = -EINVAL; + goto out; } if (dev_xdp_prog_count(dev)) { NL_SET_ERR_MSG(extack, "unable to custom memory provider to device with XDP program attached"); - return -EEXIST; + ret = -EEXIST; + goto out; } - - rxq = __netif_get_rx_queue(dev, rxq_idx); if (rxq->mp_params.mp_ops) { NL_SET_ERR_MSG(extack, "designated queue already memory provider bound"); - return -EEXIST; + ret = -EEXIST; + goto out; } #ifdef CONFIG_XDP_SOCKETS if (rxq->pool) { NL_SET_ERR_MSG(extack, "designated queue already in use by AF_XDP"); - return -EBUSY; + ret = -EBUSY; + goto out; } #endif - rxq->mp_params = *p; ret = netdev_rx_queue_restart(dev, rxq_idx); if (ret) { rxq->mp_params.mp_ops = NULL; rxq->mp_params.mp_priv = NULL; } +out: + if (needs_unlock) + netdev_unlock(dev); return ret; } @@ -224,38 +239,44 @@ int net_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx, return ret; } -void __net_mp_close_rxq(struct net_device *dev, unsigned int ifq_idx, +void __net_mp_close_rxq(struct net_device *dev, unsigned int rxq_idx, const struct pp_memory_provider_params *old_p) { struct netdev_rx_queue *rxq; + bool needs_unlock = false; int err; - if (WARN_ON_ONCE(ifq_idx >= dev->real_num_rx_queues)) + if (WARN_ON_ONCE(rxq_idx >= dev->real_num_rx_queues)) return; - rxq = __netif_get_rx_queue(dev, ifq_idx); + rxq = netif_get_rx_queue_peer_locked(&dev, &rxq_idx, &needs_unlock); + if (WARN_ON_ONCE(!rxq)) + return; /* Callers holding a netdev ref may get here after we already * went thru shutdown via dev_memory_provider_uninstall(). */ if (dev->reg_state > NETREG_REGISTERED && !rxq->mp_params.mp_ops) - return; + goto out; if (WARN_ON_ONCE(rxq->mp_params.mp_ops != old_p->mp_ops || rxq->mp_params.mp_priv != old_p->mp_priv)) - return; + goto out; rxq->mp_params.mp_ops = NULL; rxq->mp_params.mp_priv = NULL; - err = netdev_rx_queue_restart(dev, ifq_idx); + err = netdev_rx_queue_restart(dev, rxq_idx); WARN_ON(err && err != -ENETDOWN); +out: + if (needs_unlock) + netdev_unlock(dev); } -void net_mp_close_rxq(struct net_device *dev, unsigned ifq_idx, +void net_mp_close_rxq(struct net_device *dev, unsigned int rxq_idx, struct pp_memory_provider_params *old_p) { netdev_lock(dev); - __net_mp_close_rxq(dev, ifq_idx, old_p); + __net_mp_close_rxq(dev, rxq_idx, old_p); netdev_unlock(dev); } -- 2.43.0