xdp_build_skb_from_zc() allocated xdp->frame_sz bytes from the per-cpu system_page_pool and built the skb head with napi_build_skb(). The latter places skb_shared_info at the tail of the buffer, but the helper sized the allocation as if the whole frame_sz were usable for data. Whenever the packet plus reserved headroom approached frame_sz, the head memcpy overran shinfo with packet content, corrupting ->flags (SKBFL_ZEROCOPY_ENABLE) and ->nr_frags, which then drove skb_copy_ubufs() off the end of frags[] on the RX path: UBSAN: array-index-out-of-bounds in include/linux/skbuff.h:2541 index 113 is out of range for type 'skb_frag_t [17]' skb_copy_ubufs+0x7da/0x960 ip_local_deliver_finish+0xcd/0x110 ice_napi_poll+0xe4/0x2a0 [ice] The overrun bytes come from the packet, so an on-wire sender can corrupt kernel memory remotely whenever the XDP program returns XDP_PASS. Rather than patch the sizing math, switch to the pattern used by other in-tree AF_XDP zero-copy drivers like mlx5 and i40e which use napi_alloc_skb() sized to the actual packet plus skb_put_data(). This sizes the head exactly for the data being copied, drops the system_page_pool local_lock from this path, and removes the structural mismatch between frame_sz and the skb head buffer. Frags are allocated with alloc_page() per frag, matching the other drivers. Fixes: 560d958c6c68 ("xsk: add generic XSk &xdp_buff -> skb conversion") Cc: stable@vger.kernel.org Signed-off-by: Lorenz Brun --- drivers/net/ethernet/intel/ice/ice_xsk.c | 2 +- include/net/libeth/xsk.h | 2 +- include/net/xdp.h | 3 +- net/core/xdp.c | 72 ++++++++---------------- 4 files changed, 29 insertions(+), 50 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index 0643017541c35..6c01a14fde150 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -653,7 +653,7 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, construct_skb: /* XDP_PASS path */ - skb = xdp_build_skb_from_zc(first); + skb = xdp_build_skb_from_zc(&rx_ring->q_vector->napi, first); if (!skb) { xsk_buff_free(first); first = NULL; diff --git a/include/net/libeth/xsk.h b/include/net/libeth/xsk.h index 82b5d21aae878..922b4587acd3f 100644 --- a/include/net/libeth/xsk.h +++ b/include/net/libeth/xsk.h @@ -468,7 +468,7 @@ __libeth_xsk_run_pass(struct libeth_xdp_buff *xdp, if (act != LIBETH_XDP_PASS) return act != LIBETH_XDP_ABORTED; - skb = xdp_build_skb_from_zc(&xdp->base); + skb = xdp_build_skb_from_zc(napi, &xdp->base); if (unlikely(!skb)) { libeth_xsk_buff_free_slow(xdp); return true; diff --git a/include/net/xdp.h b/include/net/xdp.h index aa742f413c358..fb2452243fd36 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -375,7 +375,8 @@ void xdp_warn(const char *msg, const char *func, const int line); #define XDP_WARN(msg) xdp_warn(msg, __func__, __LINE__) struct sk_buff *xdp_build_skb_from_buff(const struct xdp_buff *xdp); -struct sk_buff *xdp_build_skb_from_zc(struct xdp_buff *xdp); +struct sk_buff *xdp_build_skb_from_zc(struct napi_struct *napi, + struct xdp_buff *xdp); struct xdp_frame *xdp_convert_zc_to_xdp_frame(struct xdp_buff *xdp); struct sk_buff *__xdp_build_skb_from_frame(struct xdp_frame *xdpf, struct sk_buff *skb, diff --git a/net/core/xdp.c b/net/core/xdp.c index 9890a30584ba7..54005b64e6cbb 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -677,16 +677,14 @@ EXPORT_SYMBOL_GPL(xdp_build_skb_from_buff); * xdp_copy_frags_from_zc - copy frags from XSk buff to skb * @skb: skb to copy frags to * @xdp: XSk &xdp_buff from which the frags will be copied - * @pp: &page_pool backing page allocation, if available * * Copy all frags from XSk &xdp_buff to the skb to pass it up the stack. - * Allocate a new buffer for each frag, copy it and attach to the skb. + * Allocate a new page for each frag, copy it and attach to the skb. * - * Return: true on success, false on netmem allocation fail. + * Return: true on success, false on page allocation fail. */ static noinline bool xdp_copy_frags_from_zc(struct sk_buff *skb, - const struct xdp_buff *xdp, - struct page_pool *pp) + const struct xdp_buff *xdp) { struct skb_shared_info *sinfo = skb_shinfo(skb); const struct skb_shared_info *xinfo; @@ -699,20 +697,18 @@ static noinline bool xdp_copy_frags_from_zc(struct sk_buff *skb, for (u32 i = 0; i < nr_frags; i++) { const skb_frag_t *frag = &xinfo->frags[i]; u32 len = skb_frag_size(frag); - u32 offset, truesize = len; struct page *page; - page = page_pool_dev_alloc(pp, &offset, &truesize); + page = alloc_page(GFP_ATOMIC | __GFP_NOWARN); if (unlikely(!page)) { sinfo->nr_frags = i; return false; } - memcpy(page_address(page) + offset, skb_frag_address(frag), - LARGEST_ALIGN(len)); - __skb_fill_page_desc_noacc(sinfo, i, page, offset, len); + memcpy(page_address(page), skb_frag_address(frag), len); + __skb_fill_page_desc_noacc(sinfo, i, page, 0, len); - tsize += truesize; + tsize += PAGE_SIZE; if (page_is_pfmemalloc(page)) flags |= XDP_FLAGS_FRAGS_PF_MEMALLOC; } @@ -725,49 +721,34 @@ static noinline bool xdp_copy_frags_from_zc(struct sk_buff *skb, /** * xdp_build_skb_from_zc - create an skb from XSk &xdp_buff + * @napi: NAPI instance the buffer was received on (provides the skb cache) * @xdp: source XSk buff * * Similar to xdp_build_skb_from_buff(), but for XSk frames. Allocate an skb - * head, new buffer for the head, copy the data and initialize the skb fields. - * If there are frags, allocate new buffers for them and copy. - * Buffers are allocated from the system percpu pools to try recycling them. - * If new skb was built successfully, @xdp is returned to XSk pool's freelist. - * On error, it remains untouched and the caller must take care of this. + * sized to the packet from the NAPI cache, copy the head data, and copy + * any frags into freshly allocated pages. + * + * If a new skb was built successfully, @xdp is returned to the XSk pool's + * freelist. On error, it remains untouched and the caller must take care + * of this. * * Return: new &sk_buff on success, %NULL on error. */ -struct sk_buff *xdp_build_skb_from_zc(struct xdp_buff *xdp) +struct sk_buff *xdp_build_skb_from_zc(struct napi_struct *napi, + struct xdp_buff *xdp) { const struct xdp_rxq_info *rxq = xdp->rxq; - u32 len = xdp->data_end - xdp->data_meta; - u32 truesize = xdp->frame_sz; - struct sk_buff *skb = NULL; - struct page_pool *pp; - int metalen; - void *data; + u32 totallen = xdp->data_end - xdp->data_meta; + u32 metalen = xdp->data - xdp->data_meta; + struct sk_buff *skb; - if (!IS_ENABLED(CONFIG_PAGE_POOL)) + skb = napi_alloc_skb(napi, totallen); + if (unlikely(!skb)) return NULL; - local_lock_nested_bh(&system_page_pool.bh_lock); - pp = this_cpu_read(system_page_pool.pool); - data = page_pool_dev_alloc_va(pp, &truesize); - if (unlikely(!data)) - goto out; - - skb = napi_build_skb(data, truesize); - if (unlikely(!skb)) { - page_pool_free_va(pp, data, true); - goto out; - } - - skb_mark_for_recycle(skb); - skb_reserve(skb, xdp->data_meta - xdp->data_hard_start); + skb_put_data(skb, xdp->data_meta, totallen); - memcpy(__skb_put(skb, len), xdp->data_meta, LARGEST_ALIGN(len)); - - metalen = xdp->data - xdp->data_meta; - if (metalen > 0) { + if (metalen) { skb_metadata_set(skb, metalen); __skb_pull(skb, metalen); } @@ -775,18 +756,15 @@ struct sk_buff *xdp_build_skb_from_zc(struct xdp_buff *xdp) skb_record_rx_queue(skb, rxq->queue_index); if (unlikely(xdp_buff_has_frags(xdp)) && - unlikely(!xdp_copy_frags_from_zc(skb, xdp, pp))) { + unlikely(!xdp_copy_frags_from_zc(skb, xdp))) { napi_consume_skb(skb, true); - skb = NULL; - goto out; + return NULL; } xsk_buff_free(xdp); skb->protocol = eth_type_trans(skb, rxq->dev); -out: - local_unlock_nested_bh(&system_page_pool.bh_lock); return skb; } EXPORT_SYMBOL_GPL(xdp_build_skb_from_zc); -- 2.51.2