For zerocopy (io_uring, devmem), there is an assumption that the parent device can do DMA. However that is not always the case: scalable function netdevs [1] have the DMA device in the grandparent. This patch adds a helper for getting the DMA device for a netdev from its parent or grandparent if necessary. The NULL case is handled in the callers. devmem and io_uring are updated accordingly to use this helper instead of directly using the parent. [1] Documentation/networking/device_drivers/ethernet/mellanox/mlx5/switchdev.rst Signed-off-by: Dragos Tatulea Reviewed-by: Cosmin Ratio Reviewed-by: Tariq Toukan Reviewed-by: Pavel Begunkov Reviewed-by: Mina Almasry ---- Changes in v2 [2]: - Dropped the Fixes tag. - Added more documentation as requeseted. - Renamed the patch title to better reflect its purpose. Changes in v1 [1]: - Upgraded from RFC status. - Dropped driver specific bits for generic solution. - Implemented single patch as a fix as requested in RFC. - Handling of multi-PF netdevs will be handled in a subsequent patch series. [1] RFC: https://lore.kernel.org/all/20250702172433.1738947-2-dtatulea@nvidia.com/ [2] v2: https://lore.kernel.org/all/20250709124059.516095-2-dtatulea@nvidia.com/ --- include/linux/netdevice.h | 21 +++++++++++++++++++++ io_uring/zcrx.c | 2 +- net/core/devmem.c | 10 +++++++++- 3 files changed, 31 insertions(+), 2 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 5847c20994d3..53aa63d6e5a3 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -5560,4 +5560,25 @@ extern struct net_device *blackhole_netdev; atomic_long_add((VAL), &(DEV)->stats.__##FIELD) #define DEV_STATS_READ(DEV, FIELD) atomic_long_read(&(DEV)->stats.__##FIELD) +static inline struct device *netdev_get_dma_dev(const struct net_device *dev) +{ + struct device *dma_dev = dev->dev.parent; + + if (!dma_dev) + return NULL; + + /* Common case: dma device is parent device of netdev. */ + if (dma_dev->dma_mask) + return dma_dev; + + /* SF netdevs have an auxdev device as parent, the dma device being the + * grandparent. + */ + dma_dev = dma_dev->parent; + if (dma_dev && dma_dev->dma_mask) + return dma_dev; + + return NULL; +} + #endif /* _LINUX_NETDEVICE_H */ diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c index 797247a34cb7..93462e5b2207 100644 --- a/io_uring/zcrx.c +++ b/io_uring/zcrx.c @@ -584,7 +584,7 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx, goto err; } - ifq->dev = ifq->netdev->dev.parent; + ifq->dev = netdev_get_dma_dev(ifq->netdev); if (!ifq->dev) { ret = -EOPNOTSUPP; goto err; diff --git a/net/core/devmem.c b/net/core/devmem.c index b3a62ca0df65..881044e0ae0e 100644 --- a/net/core/devmem.c +++ b/net/core/devmem.c @@ -183,6 +183,7 @@ net_devmem_bind_dmabuf(struct net_device *dev, { struct net_devmem_dmabuf_binding *binding; static u32 id_alloc_next; + struct device *dma_dev; struct scatterlist *sg; struct dma_buf *dmabuf; unsigned int sg_idx, i; @@ -193,6 +194,13 @@ net_devmem_bind_dmabuf(struct net_device *dev, if (IS_ERR(dmabuf)) return ERR_CAST(dmabuf); + dma_dev = netdev_get_dma_dev(dev); + if (!dma_dev) { + err = -EOPNOTSUPP; + NL_SET_ERR_MSG(extack, "Device doesn't support dma"); + goto err_put_dmabuf; + } + binding = kzalloc_node(sizeof(*binding), GFP_KERNEL, dev_to_node(&dev->dev)); if (!binding) { @@ -209,7 +217,7 @@ net_devmem_bind_dmabuf(struct net_device *dev, binding->dmabuf = dmabuf; - binding->attachment = dma_buf_attach(binding->dmabuf, dev->dev.parent); + binding->attachment = dma_buf_attach(binding->dmabuf, dma_dev); if (IS_ERR(binding->attachment)) { err = PTR_ERR(binding->attachment); NL_SET_ERR_MSG(extack, "Failed to bind dmabuf to device"); -- 2.43.0