From: David Wei Add a ynl netdev family operation called bind-queue that _binds_ an rxq from a real netdev to a virtual netdev i.e. netkit or veth. This bound or _mapped_ rxq in the virtual netdev acts as a proxy for the parent real rxq, and can be used by processes running in a container to use memory providers (io_uring zero-copy rx or devmem) or AF_XDP. An early implementation had only driver-specific integration [0], but in order for other virtual devices to reuse, it makes sense to have this as a generic API. src-ifindex and src-queue-id is the real netdev and rxq respectively. dst-ifindex is the virtual netdev. Note that this op doesn't take dst-queue-id, because the expectation is that the op will _create_ a new rxq in the virtual netdev. The virtual netdev must have real_num_rx_queues less than num_rx_queues at the time of calling bind-queue. Signed-off-by: David Wei Co-developed-by: Daniel Borkmann Signed-off-by: Daniel Borkmann Link: https://bpfconf.ebpf.io/bpfconf2025/bpfconf2025_material/lsfmmbpf_2025_netkit_borkmann.pdf [0] --- Documentation/netlink/specs/netdev.yaml | 37 +++++++++++++++++++++++++ include/uapi/linux/netdev.h | 11 ++++++++ net/core/netdev-genl-gen.c | 14 ++++++++++ net/core/netdev-genl-gen.h | 1 + net/core/netdev-genl.c | 4 +++ tools/include/uapi/linux/netdev.h | 11 ++++++++ 6 files changed, 78 insertions(+) diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml index e00d3fa1c152..99a430ea8a9a 100644 --- a/Documentation/netlink/specs/netdev.yaml +++ b/Documentation/netlink/specs/netdev.yaml @@ -561,6 +561,29 @@ attribute-sets: type: u32 checks: min: 1 + - + name: queue-pair + attributes: + - + name: src-ifindex + doc: netdev ifindex of the physical device + type: u32 + checks: + min: 1 + - + name: src-queue-id + doc: netdev queue id of the physical device + type: u32 + - + name: dst-ifindex + doc: netdev ifindex of the virtual device + type: u32 + checks: + min: 1 + - + name: dst-queue-id + doc: netdev queue id of the virtual device + type: u32 operations: list: @@ -772,6 +795,20 @@ operations: attributes: - id + - + name: bind-queue + doc: Bind a physical netdev queue to a virtual one + attribute-set: queue-pair + do: + request: + attributes: + - src-ifindex + - src-queue-id + - dst-ifindex + reply: + attributes: + - dst-queue-id + kernel-family: headers: ["net/netdev_netlink.h"] sock-priv: struct netdev_nl_sock diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index 48eb49aa03d4..05e17765a39d 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -210,6 +210,16 @@ enum { NETDEV_A_DMABUF_MAX = (__NETDEV_A_DMABUF_MAX - 1) }; +enum { + NETDEV_A_QUEUE_PAIR_SRC_IFINDEX = 1, + NETDEV_A_QUEUE_PAIR_SRC_QUEUE_ID, + NETDEV_A_QUEUE_PAIR_DST_IFINDEX, + NETDEV_A_QUEUE_PAIR_DST_QUEUE_ID, + + __NETDEV_A_QUEUE_PAIR_MAX, + NETDEV_A_QUEUE_PAIR_MAX = (__NETDEV_A_QUEUE_PAIR_MAX - 1) +}; + enum { NETDEV_CMD_DEV_GET = 1, NETDEV_CMD_DEV_ADD_NTF, @@ -226,6 +236,7 @@ enum { NETDEV_CMD_BIND_RX, NETDEV_CMD_NAPI_SET, NETDEV_CMD_BIND_TX, + NETDEV_CMD_BIND_QUEUE, __NETDEV_CMD_MAX, NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1) diff --git a/net/core/netdev-genl-gen.c b/net/core/netdev-genl-gen.c index e9a2a6f26cb7..10b2ab4dd500 100644 --- a/net/core/netdev-genl-gen.c +++ b/net/core/netdev-genl-gen.c @@ -106,6 +106,13 @@ static const struct nla_policy netdev_bind_tx_nl_policy[NETDEV_A_DMABUF_FD + 1] [NETDEV_A_DMABUF_FD] = { .type = NLA_U32, }, }; +/* NETDEV_CMD_BIND_QUEUE - do */ +static const struct nla_policy netdev_bind_queue_nl_policy[NETDEV_A_QUEUE_PAIR_DST_IFINDEX + 1] = { + [NETDEV_A_QUEUE_PAIR_SRC_IFINDEX] = NLA_POLICY_MIN(NLA_U32, 1), + [NETDEV_A_QUEUE_PAIR_SRC_QUEUE_ID] = { .type = NLA_U32, }, + [NETDEV_A_QUEUE_PAIR_DST_IFINDEX] = NLA_POLICY_MIN(NLA_U32, 1), +}; + /* Ops table for netdev */ static const struct genl_split_ops netdev_nl_ops[] = { { @@ -204,6 +211,13 @@ static const struct genl_split_ops netdev_nl_ops[] = { .maxattr = NETDEV_A_DMABUF_FD, .flags = GENL_CMD_CAP_DO, }, + { + .cmd = NETDEV_CMD_BIND_QUEUE, + .doit = netdev_nl_bind_queue_doit, + .policy = netdev_bind_queue_nl_policy, + .maxattr = NETDEV_A_QUEUE_PAIR_DST_IFINDEX, + .flags = GENL_CMD_CAP_DO, + }, }; static const struct genl_multicast_group netdev_nl_mcgrps[] = { diff --git a/net/core/netdev-genl-gen.h b/net/core/netdev-genl-gen.h index cf3fad74511f..309248fe2b9e 100644 --- a/net/core/netdev-genl-gen.h +++ b/net/core/netdev-genl-gen.h @@ -35,6 +35,7 @@ int netdev_nl_qstats_get_dumpit(struct sk_buff *skb, int netdev_nl_bind_rx_doit(struct sk_buff *skb, struct genl_info *info); int netdev_nl_napi_set_doit(struct sk_buff *skb, struct genl_info *info); int netdev_nl_bind_tx_doit(struct sk_buff *skb, struct genl_info *info); +int netdev_nl_bind_queue_doit(struct sk_buff *skb, struct genl_info *info); enum { NETDEV_NLGRP_MGMT, diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index 470fabbeacd9..b0aea27bf84e 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -1120,6 +1120,10 @@ int netdev_nl_bind_tx_doit(struct sk_buff *skb, struct genl_info *info) return err; } +int netdev_nl_bind_queue_doit(struct sk_buff *skb, struct genl_info *info) +{ +} + void netdev_nl_sock_priv_init(struct netdev_nl_sock *priv) { INIT_LIST_HEAD(&priv->bindings); diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h index 48eb49aa03d4..05e17765a39d 100644 --- a/tools/include/uapi/linux/netdev.h +++ b/tools/include/uapi/linux/netdev.h @@ -210,6 +210,16 @@ enum { NETDEV_A_DMABUF_MAX = (__NETDEV_A_DMABUF_MAX - 1) }; +enum { + NETDEV_A_QUEUE_PAIR_SRC_IFINDEX = 1, + NETDEV_A_QUEUE_PAIR_SRC_QUEUE_ID, + NETDEV_A_QUEUE_PAIR_DST_IFINDEX, + NETDEV_A_QUEUE_PAIR_DST_QUEUE_ID, + + __NETDEV_A_QUEUE_PAIR_MAX, + NETDEV_A_QUEUE_PAIR_MAX = (__NETDEV_A_QUEUE_PAIR_MAX - 1) +}; + enum { NETDEV_CMD_DEV_GET = 1, NETDEV_CMD_DEV_ADD_NTF, @@ -226,6 +236,7 @@ enum { NETDEV_CMD_BIND_RX, NETDEV_CMD_NAPI_SET, NETDEV_CMD_BIND_TX, + NETDEV_CMD_BIND_QUEUE, __NETDEV_CMD_MAX, NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1) -- 2.43.0