From: David Wei Add a nested peer field to the queue-get response that returns the peered ifindex, queue id and optionally netns id if the device resides in a different netns. Example with ynl client: # ip a [...] 4: enp10s0f0np0: mtu 1500 xdp/id:24 qdisc mq state UP group default qlen 1000 link/ether e8:eb:d3:a3:43:f6 brd ff:ff:ff:ff:ff:ff inet 10.0.0.2/24 scope global enp10s0f0np0 valid_lft forever preferred_lft forever inet6 fe80::eaeb:d3ff:fea3:43f6/64 scope link proto kernel_ll valid_lft forever preferred_lft forever [...] # ethtool -i enp10s0f0np0 driver: mlx5_core [...] # ip netns exec foo ./pyynl/cli.py \ --spec ~/netlink/specs/netdev.yaml \ --do queue-get \ --json '{"ifindex": 4, "id": 15, "type": "rx"}' {'id': 15, 'ifindex': 4, 'napi-id': 8227, 'peer': {'id': 1, 'ifindex': 8, 'netns-id': 0}, 'type': 'rx', 'xsk': {}} # ip netns list foo (id: 0) # ip netns exec foo ip a [...] 8: nk@NONE: mtu 1500 qdisc noqueue state UP group default qlen 1000 link/ether 00:00:00:00:00:00 brd ff:ff:ff:ff:ff:ff inet6 fe80::200:ff:fe00:0/64 scope link proto kernel_ll valid_lft forever preferred_lft forever [...] # ip netns exec foo ethtool -i nk driver: netkit [...] # ip netns exec foo ./pyynl/cli.py \ --spec ~/netlink/specs/netdev.yaml \ --do queue-get \ --json '{"ifindex": 8, "id": 1, "type": "rx"}' {'id': 1, 'ifindex': 8, 'type': 'rx'} Note that the caller of netdev_nl_queue_fill_one() holds the netdevice lock. For the queue-get we do not lock both devices. When queues get {un,}peered, both devices are locked, thus if __netif_get_rx_queue_peer() returns true, the peer pointer points to a valid device. The netns-id is fetched via peernet2id_alloc() similarly as done in OVS. Signed-off-by: David Wei Co-developed-by: Daniel Borkmann Signed-off-by: Daniel Borkmann Reviewed-by: Nikolay Aleksandrov --- Documentation/netlink/specs/netdev.yaml | 24 +++++++++++++++++ include/net/netdev_rx_queue.h | 9 +++++++ include/uapi/linux/netdev.h | 10 ++++++++ net/core/netdev-genl.c | 34 +++++++++++++++++++++++-- net/core/netdev_rx_queue.c | 19 +++++++++++--- tools/include/uapi/linux/netdev.h | 10 ++++++++ 6 files changed, 100 insertions(+), 6 deletions(-) diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml index 1e24c7f76de0..e1735b486222 100644 --- a/Documentation/netlink/specs/netdev.yaml +++ b/Documentation/netlink/specs/netdev.yaml @@ -297,6 +297,24 @@ attribute-sets: - name: xsk-info attributes: [] + - + name: peer-info + attributes: + - + name: id + doc: Queue index of the netdevice to which the peer queue belongs. + type: u32 + - + name: ifindex + doc: ifindex of the netdevice to which the peer queue belongs. + type: u32 + - + name: netns-id + doc: | + Network namespace of the netdevice to which the peer queue belongs. + This is populated if the netdevices are not in the same network + namespace. + type: s32 - name: queue attributes: @@ -338,6 +356,11 @@ attribute-sets: doc: XSK information for this queue, if any. type: nest nested-attributes: xsk-info + - + name: peer + doc: Whether this queue was bound to another peer queue. + type: nest + nested-attributes: peer-info - name: qstats doc: | @@ -723,6 +746,7 @@ operations: - dmabuf - io-uring - xsk + - peer dump: request: attributes: diff --git a/include/net/netdev_rx_queue.h b/include/net/netdev_rx_queue.h index 3a02d47e42bc..d2505d099400 100644 --- a/include/net/netdev_rx_queue.h +++ b/include/net/netdev_rx_queue.h @@ -92,4 +92,13 @@ netif_get_rx_queue_peer_locked(struct net_device **dev, unsigned int *rxq_idx); void netif_put_rx_queue_peer_locked(struct net_device *orig_dev, struct net_device *dev); + +enum netif_peer_dir { + NETIF_VIRT_TO_PHYS, + NETIF_PHYS_TO_VIRT, +}; + +struct netdev_rx_queue * +__netif_get_rx_queue_peer(struct net_device **dev, unsigned int *rxq_idx, + enum netif_peer_dir dir); #endif /* _LINUX_NETDEV_RX_QUEUE_H */ diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index 4ef04d0bc412..d4d5d9f86eee 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -150,6 +150,15 @@ enum { NETDEV_A_XSK_INFO_MAX = (__NETDEV_A_XSK_INFO_MAX - 1) }; +enum { + NETDEV_A_PEER_INFO_ID = 1, + NETDEV_A_PEER_INFO_IFINDEX, + NETDEV_A_PEER_INFO_NETNS_ID, + + __NETDEV_A_PEER_INFO_MAX, + NETDEV_A_PEER_INFO_MAX = (__NETDEV_A_PEER_INFO_MAX - 1) +}; + enum { NETDEV_A_QUEUE_ID = 1, NETDEV_A_QUEUE_IFINDEX, @@ -158,6 +167,7 @@ enum { NETDEV_A_QUEUE_DMABUF, NETDEV_A_QUEUE_IO_URING, NETDEV_A_QUEUE_XSK, + NETDEV_A_QUEUE_PEER, __NETDEV_A_QUEUE_MAX, NETDEV_A_QUEUE_MAX = (__NETDEV_A_QUEUE_MAX - 1) diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index 4fa7e881441f..5cf7a9ca2e4b 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -391,8 +391,11 @@ netdev_nl_queue_fill_one(struct sk_buff *rsp, struct net_device *netdev, u32 q_idx, u32 q_type, const struct genl_info *info) { struct pp_memory_provider_params *params; + struct net_device *orig_netdev = netdev; struct netdev_rx_queue *rxq; struct netdev_queue *txq; + u32 peer_q_idx = q_idx; + struct nlattr *nest; void *hdr; hdr = genlmsg_iput(rsp, info); @@ -410,6 +413,33 @@ netdev_nl_queue_fill_one(struct sk_buff *rsp, struct net_device *netdev, if (nla_put_napi_id(rsp, rxq->napi)) goto nla_put_failure; + if (__netif_get_rx_queue_peer(&netdev, &peer_q_idx, + NETIF_PHYS_TO_VIRT)) { + struct net *net, *peer_net; + + nest = nla_nest_start(rsp, NETDEV_A_QUEUE_PEER); + if (!nest) + goto nla_put_failure; + + if (nla_put_u32(rsp, NETDEV_A_PEER_INFO_ID, peer_q_idx) || + nla_put_u32(rsp, NETDEV_A_PEER_INFO_IFINDEX, + READ_ONCE(netdev->ifindex))) + goto nla_put_failure; + + rcu_read_lock(); + peer_net = dev_net_rcu(netdev); + net = dev_net_rcu(orig_netdev); + if (!net_eq(net, peer_net)) { + s32 id = peernet2id_alloc(net, peer_net, GFP_ATOMIC); + + if (nla_put_s32(rsp, NETDEV_A_PEER_INFO_NETNS_ID, id)) + goto nla_put_failure_unlock; + } + rcu_read_unlock(); + nla_nest_end(rsp, nest); + netdev = orig_netdev; + } + params = &rxq->mp_params; if (params->mp_ops && params->mp_ops->nl_fill(params->mp_priv, rsp, rxq)) @@ -419,7 +449,6 @@ netdev_nl_queue_fill_one(struct sk_buff *rsp, struct net_device *netdev, if (nla_put_empty_nest(rsp, NETDEV_A_QUEUE_XSK)) goto nla_put_failure; #endif - break; case NETDEV_QUEUE_TYPE_TX: txq = netdev_get_tx_queue(netdev, q_idx); @@ -434,9 +463,10 @@ netdev_nl_queue_fill_one(struct sk_buff *rsp, struct net_device *netdev, } genlmsg_end(rsp, hdr); - return 0; +nla_put_failure_unlock: + rcu_read_unlock(); nla_put_failure: genlmsg_cancel(rsp, hdr); return -EMSGSIZE; diff --git a/net/core/netdev_rx_queue.c b/net/core/netdev_rx_queue.c index 6eb12f3b969c..889b7382cdb6 100644 --- a/net/core/netdev_rx_queue.c +++ b/net/core/netdev_rx_queue.c @@ -42,14 +42,25 @@ void netdev_rx_queue_unpeer(struct net_device *src_dev, netdev_put(src_dev, &src_rxq->peer_tracker); } -static struct netdev_rx_queue * -__netif_get_rx_queue_peer(struct net_device **dev, unsigned int *rxq_idx) +static bool netif_peer_dir_ok(const struct net_device *dev, + enum netif_peer_dir dir) +{ + if (dir == NETIF_VIRT_TO_PHYS && !dev->dev.parent) + return true; + if (dir == NETIF_PHYS_TO_VIRT && dev->dev.parent) + return true; + return false; +} + +struct netdev_rx_queue * +__netif_get_rx_queue_peer(struct net_device **dev, unsigned int *rxq_idx, + enum netif_peer_dir dir) { struct net_device *orig_dev = *dev; struct netdev_rx_queue *rxq = __netif_get_rx_queue(orig_dev, *rxq_idx); if (rxq->peer) { - if (orig_dev->dev.parent) + if (!netif_peer_dir_ok(orig_dev, dir)) return NULL; rxq = rxq->peer; *rxq_idx = get_netdev_rx_queue_index(rxq); @@ -68,7 +79,7 @@ netif_get_rx_queue_peer_locked(struct net_device **dev, unsigned int *rxq_idx) * see netdev_nl_bind_queue_doit(). */ netdev_ops_assert_locked(orig_dev); - rxq = __netif_get_rx_queue_peer(dev, rxq_idx); + rxq = __netif_get_rx_queue_peer(dev, rxq_idx, NETIF_VIRT_TO_PHYS); if (rxq && orig_dev != *dev) netdev_lock(*dev); return rxq; diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h index 4ef04d0bc412..d4d5d9f86eee 100644 --- a/tools/include/uapi/linux/netdev.h +++ b/tools/include/uapi/linux/netdev.h @@ -150,6 +150,15 @@ enum { NETDEV_A_XSK_INFO_MAX = (__NETDEV_A_XSK_INFO_MAX - 1) }; +enum { + NETDEV_A_PEER_INFO_ID = 1, + NETDEV_A_PEER_INFO_IFINDEX, + NETDEV_A_PEER_INFO_NETNS_ID, + + __NETDEV_A_PEER_INFO_MAX, + NETDEV_A_PEER_INFO_MAX = (__NETDEV_A_PEER_INFO_MAX - 1) +}; + enum { NETDEV_A_QUEUE_ID = 1, NETDEV_A_QUEUE_IFINDEX, @@ -158,6 +167,7 @@ enum { NETDEV_A_QUEUE_DMABUF, NETDEV_A_QUEUE_IO_URING, NETDEV_A_QUEUE_XSK, + NETDEV_A_QUEUE_PEER, __NETDEV_A_QUEUE_MAX, NETDEV_A_QUEUE_MAX = (__NETDEV_A_QUEUE_MAX - 1) -- 2.43.0