From: David Wei When a process in a container wants to setup a memory provider, it will use the virtual netdev and a leased rxq, and call net_mp_{open,close}_rxq to try and restart the queue. At this point, proxy the queue restart on the real rxq in the physical netdev. For memory providers (io_uring zero-copy rx and devmem), it causes the real rxq in the physical netdev to be filled from a memory provider that has DMA mapped memory from a process within a container. Signed-off-by: David Wei Co-developed-by: Daniel Borkmann Signed-off-by: Daniel Borkmann --- include/net/netdev_rx_queue.h | 4 ++ net/core/netdev_rx_queue.c | 100 +++++++++++++++++++++++++++------- 2 files changed, 83 insertions(+), 21 deletions(-) diff --git a/include/net/netdev_rx_queue.h b/include/net/netdev_rx_queue.h index 589192f67d97..0286c81fb156 100644 --- a/include/net/netdev_rx_queue.h +++ b/include/net/netdev_rx_queue.h @@ -75,4 +75,8 @@ enum netif_lease_dir { struct netdev_rx_queue * __netif_get_rx_queue_lease(struct net_device **dev, unsigned int *rxq, enum netif_lease_dir dir); +struct netdev_rx_queue * +netif_get_rx_queue_lease_locked(struct net_device **dev, unsigned int *rxq); +void netif_put_rx_queue_lease_locked(struct net_device *orig_dev, + struct net_device *dev); #endif /* _LINUX_NETDEV_RX_QUEUE_H */ diff --git a/net/core/netdev_rx_queue.c b/net/core/netdev_rx_queue.c index f30fe7550455..34e523f1e926 100644 --- a/net/core/netdev_rx_queue.c +++ b/net/core/netdev_rx_queue.c @@ -68,6 +68,29 @@ __netif_get_rx_queue_lease(struct net_device **dev, unsigned int *rxq_idx, return rxq; } +struct netdev_rx_queue * +netif_get_rx_queue_lease_locked(struct net_device **dev, unsigned int *rxq_idx) +{ + struct net_device *orig_dev = *dev; + struct netdev_rx_queue *rxq; + + /* Locking order is always from the virtual to the physical device + * see netdev_nl_queue_create_doit(). + */ + netdev_ops_assert_locked(orig_dev); + rxq = __netif_get_rx_queue_lease(dev, rxq_idx, NETIF_VIRT_TO_PHYS); + if (rxq && orig_dev != *dev) + netdev_lock(*dev); + return rxq; +} + +void netif_put_rx_queue_lease_locked(struct net_device *orig_dev, + struct net_device *dev) +{ + if (orig_dev != dev) + netdev_unlock(dev); +} + bool netif_rx_queue_lease_get_owner(struct net_device **dev, unsigned int *rxq_idx) { @@ -191,24 +214,15 @@ int netdev_rx_queue_restart(struct net_device *dev, unsigned int rxq_idx) } EXPORT_SYMBOL_NS_GPL(netdev_rx_queue_restart, "NETDEV_INTERNAL"); -int net_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx, - const struct pp_memory_provider_params *p, - struct netlink_ext_ack *extack) +static int __net_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx, + const struct pp_memory_provider_params *p, + struct netlink_ext_ack *extack) { const struct netdev_queue_mgmt_ops *qops = dev->queue_mgmt_ops; struct netdev_queue_config qcfg[2]; struct netdev_rx_queue *rxq; int ret; - if (!netdev_need_ops_lock(dev)) - return -EOPNOTSUPP; - - if (rxq_idx >= dev->real_num_rx_queues) { - NL_SET_ERR_MSG(extack, "rx queue index out of range"); - return -ERANGE; - } - rxq_idx = array_index_nospec(rxq_idx, dev->real_num_rx_queues); - if (dev->cfg->hds_config != ETHTOOL_TCP_DATA_SPLIT_ENABLED) { NL_SET_ERR_MSG(extack, "tcp-data-split is disabled"); return -EINVAL; @@ -255,17 +269,47 @@ int net_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx, return ret; } -void net_mp_close_rxq(struct net_device *dev, unsigned int ifq_idx, - const struct pp_memory_provider_params *old_p) +int net_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx, + const struct pp_memory_provider_params *p, + struct netlink_ext_ack *extack) +{ + struct net_device *orig_dev = dev; + int ret; + + if (!netdev_need_ops_lock(dev)) + return -EOPNOTSUPP; + + if (rxq_idx >= dev->real_num_rx_queues) { + NL_SET_ERR_MSG(extack, "rx queue index out of range"); + return -ERANGE; + } + + rxq_idx = array_index_nospec(rxq_idx, dev->real_num_rx_queues); + + if (!netif_get_rx_queue_lease_locked(&dev, &rxq_idx)) { + NL_SET_ERR_MSG(extack, "rx queue leased to a virtual netdev"); + return -EBUSY; + } + if (!dev->dev.parent) { + NL_SET_ERR_MSG(extack, "rx queue belongs to a virtual netdev"); + ret = -EOPNOTSUPP; + goto out; + } + + ret = __net_mp_open_rxq(dev, rxq_idx, p, extack); +out: + netif_put_rx_queue_lease_locked(orig_dev, dev); + return ret; +} + +static void __net_mp_close_rxq(struct net_device *dev, unsigned int rxq_idx, + const struct pp_memory_provider_params *old_p) { struct netdev_queue_config qcfg[2]; struct netdev_rx_queue *rxq; int err; - if (WARN_ON_ONCE(ifq_idx >= dev->real_num_rx_queues)) - return; - - rxq = __netif_get_rx_queue(dev, ifq_idx); + rxq = __netif_get_rx_queue(dev, rxq_idx); /* Callers holding a netdev ref may get here after we already * went thru shutdown via dev_memory_provider_uninstall(). @@ -278,10 +322,24 @@ void net_mp_close_rxq(struct net_device *dev, unsigned int ifq_idx, rxq->mp_params.mp_priv != old_p->mp_priv)) return; - netdev_queue_config(dev, ifq_idx, &qcfg[0]); + netdev_queue_config(dev, rxq_idx, &qcfg[0]); memset(&rxq->mp_params, 0, sizeof(rxq->mp_params)); - netdev_queue_config(dev, ifq_idx, &qcfg[1]); + netdev_queue_config(dev, rxq_idx, &qcfg[1]); - err = netdev_rx_queue_reconfig(dev, ifq_idx, &qcfg[0], &qcfg[1]); + err = netdev_rx_queue_reconfig(dev, rxq_idx, &qcfg[0], &qcfg[1]); WARN_ON(err && err != -ENETDOWN); } + +void net_mp_close_rxq(struct net_device *dev, unsigned int rxq_idx, + const struct pp_memory_provider_params *old_p) +{ + struct net_device *orig_dev = dev; + + if (WARN_ON_ONCE(rxq_idx >= dev->real_num_rx_queues)) + return; + if (WARN_ON_ONCE(!netif_get_rx_queue_lease_locked(&dev, &rxq_idx))) + return; + + __net_mp_close_rxq(dev, rxq_idx, old_p); + netif_put_rx_queue_lease_locked(orig_dev, dev); +} -- 2.43.0