When a BPF program that supports BPF_F_XDP_HAS_FRAGS is issuing bpf_xdp_adjust_tail and a large packet is injected via /dev/net/tun a crash occurs due to detecting a bad page state (page_pool leak). This is because xdp_buff does not record the type of memory and instead relies on the netdev receive queue xdp info. Since netif_alloc_rx_queues only calls xdp_rxq_info_reg mem.type is going to be set to MEM_TYPE_PAGE_SHARED. So shrinking will eventually call page_frag_free. But with current multi-buff support for BPF_F_XDP_HAS_FRAGS programs buffers are allocated via the page pool in skb_cow_data_for_xdp. To fix this issue use the same approach that is used in cpu_map_bpf_prog_run_xdp: declare an xdp_rxq_info structure on the stack instead of using the xdp_rxq_info structure from the netdev rx queue. And update mem.type to reflect how the buffers are allocated, in this case to MEM_TYPE_PAGE_POOL if skb_cow_data_for_xdp is used. Reported-by: syzbot+ff145014d6b0ce64a173@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/6756c37b.050a0220.a30f1.019a.GAE@google.com/ Fixes: e6d5dbdd20aa ("xdp: add multi-buff support for xdp running in generic mode") Signed-off-by: Octavian Purdila --- v2: - use a local xdp_rxq_info structure and update mem.type instead of skipping using page pool if the netdev xdp_rxq_info is not set to MEM_TYPE_PAGE_POOL (which is always the case currently) v1: https://lore.kernel.org/netdev/20250924060843.2280499-1-tavip@google.com/ include/linux/netdevice.h | 4 +++- kernel/bpf/cpumap.c | 2 +- kernel/bpf/devmap.c | 2 +- net/core/dev.c | 32 +++++++++++++++++++++----------- 4 files changed, 26 insertions(+), 14 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f3a3b761abfb..41585414e45c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -78,6 +78,7 @@ struct udp_tunnel_nic_info; struct udp_tunnel_nic; struct bpf_prog; struct xdp_buff; +struct xdp_rxq_info; struct xdp_frame; struct xdp_metadata_ops; struct xdp_md; @@ -4149,7 +4150,8 @@ static inline void dev_consume_skb_any(struct sk_buff *skb) } u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp, - const struct bpf_prog *xdp_prog); + const struct bpf_prog *xdp_prog, + struct xdp_rxq_info *rxq); void generic_xdp_tx(struct sk_buff *skb, const struct bpf_prog *xdp_prog); int do_xdp_generic(const struct bpf_prog *xdp_prog, struct sk_buff **pskb); int netif_rx(struct sk_buff *skb); diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index c46360b27871..a057a58ba969 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -145,7 +145,7 @@ static u32 cpu_map_bpf_prog_run_skb(struct bpf_cpu_map_entry *rcpu, for (u32 i = 0; i < skb_n; i++) { struct sk_buff *skb = skbs[i]; - act = bpf_prog_run_generic_xdp(skb, &xdp, rcpu->prog); + act = bpf_prog_run_generic_xdp(skb, &xdp, rcpu->prog, NULL); switch (act) { case XDP_PASS: skbs[pass++] = skb; diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 482d284a1553..29459b79bacb 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -512,7 +512,7 @@ static u32 dev_map_bpf_prog_run_skb(struct sk_buff *skb, struct bpf_dtab_netdev __skb_pull(skb, skb->mac_len); xdp.txq = &txq; - act = bpf_prog_run_generic_xdp(skb, &xdp, dst->xdp_prog); + act = bpf_prog_run_generic_xdp(skb, &xdp, dst->xdp_prog, NULL); switch (act) { case XDP_PASS: __skb_push(skb, skb->mac_len); diff --git a/net/core/dev.c b/net/core/dev.c index 8d49b2198d07..365c43ffc9c1 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5230,10 +5230,10 @@ static struct netdev_rx_queue *netif_get_rxqueue(struct sk_buff *skb) } u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp, - const struct bpf_prog *xdp_prog) + const struct bpf_prog *xdp_prog, + struct xdp_rxq_info *rxq) { void *orig_data, *orig_data_end, *hard_start; - struct netdev_rx_queue *rxqueue; bool orig_bcast, orig_host; u32 mac_len, frame_sz; __be16 orig_eth_type; @@ -5251,8 +5251,9 @@ u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp, frame_sz = (void *)skb_end_pointer(skb) - hard_start; frame_sz += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); - rxqueue = netif_get_rxqueue(skb); - xdp_init_buff(xdp, frame_sz, &rxqueue->xdp_rxq); + if (!rxq) + rxq = &netif_get_rxqueue(skb)->xdp_rxq; + xdp_init_buff(xdp, frame_sz, rxq); xdp_prepare_buff(xdp, hard_start, skb_headroom(skb) - mac_len, skb_headlen(skb) + mac_len, true); if (skb_is_nonlinear(skb)) { @@ -5331,17 +5332,23 @@ u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp, return act; } -static int -netif_skb_check_for_xdp(struct sk_buff **pskb, const struct bpf_prog *prog) +static int netif_skb_check_for_xdp(struct sk_buff **pskb, + const struct bpf_prog *prog, + struct xdp_rxq_info *rxq) { struct sk_buff *skb = *pskb; int err, hroom, troom; + struct page_pool *pool; + pool = this_cpu_read(system_page_pool.pool); local_lock_nested_bh(&system_page_pool.bh_lock); - err = skb_cow_data_for_xdp(this_cpu_read(system_page_pool.pool), pskb, prog); + err = skb_cow_data_for_xdp(pool, pskb, prog); local_unlock_nested_bh(&system_page_pool.bh_lock); - if (!err) + if (!err) { + rxq->mem.type = MEM_TYPE_PAGE_POOL; + rxq->mem.id = pool->xdp_mem_id; return 0; + } /* In case we have to go down the path and also linearize, * then lets do the pskb_expand_head() work just once here. @@ -5379,13 +5386,13 @@ static u32 netif_receive_generic_xdp(struct sk_buff **pskb, if (skb_cloned(skb) || skb_is_nonlinear(skb) || skb_headroom(skb) < XDP_PACKET_HEADROOM) { - if (netif_skb_check_for_xdp(pskb, xdp_prog)) + if (netif_skb_check_for_xdp(pskb, xdp_prog, xdp->rxq)) goto do_drop; } __skb_pull(*pskb, mac_len); - act = bpf_prog_run_generic_xdp(*pskb, xdp, xdp_prog); + act = bpf_prog_run_generic_xdp(*pskb, xdp, xdp_prog, xdp->rxq); switch (act) { case XDP_REDIRECT: case XDP_TX: @@ -5442,7 +5449,10 @@ int do_xdp_generic(const struct bpf_prog *xdp_prog, struct sk_buff **pskb) struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx; if (xdp_prog) { - struct xdp_buff xdp; + struct xdp_rxq_info rxq = {}; + struct xdp_buff xdp = { + .rxq = &rxq, + }; u32 act; int err; -- 2.51.0.618.g983fd99d29-goog