This patch enhances RX buffer handling in the mana driver by allocating pages from a page pool and slicing them into MTU-sized fragments, rather than dedicating a full page per packet. This approach is especially beneficial on systems with large page sizes like 64KB. Key improvements: - Proper integration of page pool for RX buffer allocations. - MTU-sized buffer slicing to improve memory utilization. - Reduce overall per Rx queue memory footprint. - Automatic fallback to full-page buffers when: * Jumbo frames are enabled (MTU > PAGE_SIZE / 2). * The XDP path is active, to avoid complexities with fragment reuse. Testing on VMs with 64KB pages shows around 200% throughput improvement. Memory efficiency is significantly improved due to reduced wastage in page allocations. Example: We are now able to fit 35 rx buffers in a single 64kb page for MTU size of 1500, instead of 1 rx buffer per page previously. Tested: - iperf3, iperf2, and nttcp benchmarks. - Jumbo frames with MTU 9000. - Native XDP programs (XDP_PASS, XDP_DROP, XDP_TX, XDP_REDIRECT) for testing the XDP path in driver. - Memory leak detection (kmemleak). - Driver load/unload, reboot, and stress scenarios. Signed-off-by: Dipayaan Roy Reviewed-by: Jacob Keller Reviewed-by: Saurabh Sengar Reviewed-by: Haiyang Zhang --- Changes in v3: - Retained the pre-alloc rxbuf for driver reconfig paths to better handle low memory scenario during reconfig. Changes in v2: - Fixed mana_xdp_set() to return error code on failure instead of always returning 0. - Moved all local variable declarations to the start of functions in mana_get_rxbuf_cfg. - Removed unnecessary parentheses and wrapped lines to <= 80 chars. - Use mana_xdp_get() for checking bpf_prog. - Factored repeated page put/free logic into a static helper function. --- .../net/ethernet/microsoft/mana/mana_bpf.c | 36 ++++- drivers/net/ethernet/microsoft/mana/mana_en.c | 147 ++++++++++++------ include/net/mana/mana.h | 4 + 3 files changed, 137 insertions(+), 50 deletions(-) diff --git a/drivers/net/ethernet/microsoft/mana/mana_bpf.c b/drivers/net/ethernet/microsoft/mana/mana_bpf.c index d30721d4516f..1cf470b25167 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_bpf.c +++ b/drivers/net/ethernet/microsoft/mana/mana_bpf.c @@ -174,6 +174,8 @@ static int mana_xdp_set(struct net_device *ndev, struct bpf_prog *prog, struct mana_port_context *apc = netdev_priv(ndev); struct bpf_prog *old_prog; struct gdma_context *gc; + int err = 0; + bool dealloc_rxbufs_pre = false; gc = apc->ac->gdma_dev->gdma_context; @@ -198,15 +200,45 @@ static int mana_xdp_set(struct net_device *ndev, struct bpf_prog *prog, if (old_prog) bpf_prog_put(old_prog); - if (apc->port_is_up) + if (apc->port_is_up) { + /* Re-create rxq's after xdp prog was loaded or unloaded. + * Ex: re create rxq's to switch from full pages to smaller + * size page fragments when xdp prog is unloaded and vice-versa. + */ + + /* Pre-allocate buffers to prevent failure in mana_attach later */ + err = mana_pre_alloc_rxbufs(apc, ndev->mtu, apc->num_queues); + if (err) { + netdev_err(ndev, + "Insufficient memory for rx buf config change\n"); + goto out; + } + dealloc_rxbufs_pre = true; + + err = mana_detach(ndev, false); + if (err) { + netdev_err(ndev, "mana_detach failed at xdp set: %d\n", err); + goto out; + } + + err = mana_attach(ndev); + if (err) { + netdev_err(ndev, "mana_attach failed at xdp set: %d\n", err); + goto out; + } + mana_chn_setxdp(apc, prog); + } if (prog) ndev->max_mtu = MANA_XDP_MTU_MAX; else ndev->max_mtu = gc->adapter_mtu - ETH_HLEN; - return 0; +out: + if (dealloc_rxbufs_pre) + mana_pre_dealloc_rxbufs(apc); + return err; } int mana_bpf(struct net_device *ndev, struct netdev_bpf *bpf) diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index a7973651ae51..591f3081d395 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -56,6 +56,14 @@ static bool mana_en_need_log(struct mana_port_context *apc, int err) return true; } +static void mana_put_rx_page(struct mana_rxq *rxq, struct page *page, bool from_pool) +{ + if (from_pool) + page_pool_put_full_page(rxq->page_pool, page, false); + else + put_page(page); +} + /* Microsoft Azure Network Adapter (MANA) functions */ static int mana_open(struct net_device *ndev) @@ -629,21 +637,40 @@ static void *mana_get_rxbuf_pre(struct mana_rxq *rxq, dma_addr_t *da) } /* Get RX buffer's data size, alloc size, XDP headroom based on MTU */ -static void mana_get_rxbuf_cfg(int mtu, u32 *datasize, u32 *alloc_size, - u32 *headroom) +static void mana_get_rxbuf_cfg(struct mana_port_context *apc, + int mtu, u32 *datasize, u32 *alloc_size, + u32 *headroom, u32 *frag_count) { - if (mtu > MANA_XDP_MTU_MAX) - *headroom = 0; /* no support for XDP */ - else - *headroom = XDP_PACKET_HEADROOM; + u32 len, buf_size; - *alloc_size = SKB_DATA_ALIGN(mtu + MANA_RXBUF_PAD + *headroom); + /* Calculate datasize first (consistent across all cases) */ + *datasize = mtu + ETH_HLEN; - /* Using page pool in this case, so alloc_size is PAGE_SIZE */ - if (*alloc_size < PAGE_SIZE) - *alloc_size = PAGE_SIZE; + /* For xdp and jumbo frames make sure only one packet fits per page */ + if (mtu + MANA_RXBUF_PAD > PAGE_SIZE / 2 || mana_xdp_get(apc)) { + if (mana_xdp_get(apc)) { + *headroom = XDP_PACKET_HEADROOM; + *alloc_size = PAGE_SIZE; + } else { + *headroom = 0; /* no support for XDP */ + *alloc_size = SKB_DATA_ALIGN(mtu + MANA_RXBUF_PAD + + *headroom); + } - *datasize = mtu + ETH_HLEN; + *frag_count = 1; + return; + } + + /* Standard MTU case - optimize for multiple packets per page */ + *headroom = 0; + + /* Calculate base buffer size needed */ + len = SKB_DATA_ALIGN(mtu + MANA_RXBUF_PAD + *headroom); + buf_size = ALIGN(len, MANA_RX_FRAG_ALIGNMENT); + + /* Calculate how many packets can fit in a page */ + *frag_count = PAGE_SIZE / buf_size; + *alloc_size = buf_size; } int mana_pre_alloc_rxbufs(struct mana_port_context *mpc, int new_mtu, int num_queues) @@ -655,8 +682,9 @@ int mana_pre_alloc_rxbufs(struct mana_port_context *mpc, int new_mtu, int num_qu void *va; int i; - mana_get_rxbuf_cfg(new_mtu, &mpc->rxbpre_datasize, - &mpc->rxbpre_alloc_size, &mpc->rxbpre_headroom); + mana_get_rxbuf_cfg(mpc, new_mtu, &mpc->rxbpre_datasize, + &mpc->rxbpre_alloc_size, &mpc->rxbpre_headroom, + &mpc->rxbpre_frag_count); dev = mpc->ac->gdma_dev->gdma_context->dev; @@ -1841,8 +1869,11 @@ static void mana_rx_skb(void *buf_va, bool from_pool, drop: if (from_pool) { - page_pool_recycle_direct(rxq->page_pool, - virt_to_head_page(buf_va)); + if (rxq->frag_count == 1) + page_pool_recycle_direct(rxq->page_pool, + virt_to_head_page(buf_va)); + else + page_pool_free_va(rxq->page_pool, buf_va, true); } else { WARN_ON_ONCE(rxq->xdp_save_va); /* Save for reuse */ @@ -1858,33 +1889,43 @@ static void *mana_get_rxfrag(struct mana_rxq *rxq, struct device *dev, dma_addr_t *da, bool *from_pool) { struct page *page; + u32 offset; void *va; - *from_pool = false; - /* Reuse XDP dropped page if available */ - if (rxq->xdp_save_va) { - va = rxq->xdp_save_va; - rxq->xdp_save_va = NULL; - } else { - page = page_pool_dev_alloc_pages(rxq->page_pool); - if (!page) + /* Don't use fragments for jumbo frames or XDP (i.e when fragment = 1 per page) */ + if (rxq->frag_count == 1) { + /* Reuse XDP dropped page if available */ + if (rxq->xdp_save_va) { + va = rxq->xdp_save_va; + page = virt_to_head_page(va); + rxq->xdp_save_va = NULL; + } else { + page = page_pool_dev_alloc_pages(rxq->page_pool); + if (!page) + return NULL; + + *from_pool = true; + va = page_to_virt(page); + } + + *da = dma_map_single(dev, va + rxq->headroom, rxq->datasize, + DMA_FROM_DEVICE); + if (dma_mapping_error(dev, *da)) { + mana_put_rx_page(rxq, page, *from_pool); return NULL; + } - *from_pool = true; - va = page_to_virt(page); + return va; } - *da = dma_map_single(dev, va + rxq->headroom, rxq->datasize, - DMA_FROM_DEVICE); - if (dma_mapping_error(dev, *da)) { - if (*from_pool) - page_pool_put_full_page(rxq->page_pool, page, false); - else - put_page(virt_to_head_page(va)); - + page = page_pool_dev_alloc_frag(rxq->page_pool, &offset, rxq->alloc_size); + if (!page) return NULL; - } + + va = page_to_virt(page) + offset; + *da = page_pool_get_dma_addr(page) + offset + rxq->headroom; + *from_pool = true; return va; } @@ -1901,9 +1942,9 @@ static void mana_refill_rx_oob(struct device *dev, struct mana_rxq *rxq, va = mana_get_rxfrag(rxq, dev, &da, &from_pool); if (!va) return; - - dma_unmap_single(dev, rxoob->sgl[0].address, rxq->datasize, - DMA_FROM_DEVICE); + if (!rxoob->from_pool || rxq->frag_count == 1) + dma_unmap_single(dev, rxoob->sgl[0].address, rxq->datasize, + DMA_FROM_DEVICE); *old_buf = rxoob->buf_va; *old_fp = rxoob->from_pool; @@ -2314,15 +2355,15 @@ static void mana_destroy_rxq(struct mana_port_context *apc, if (!rx_oob->buf_va) continue; - dma_unmap_single(dev, rx_oob->sgl[0].address, - rx_oob->sgl[0].size, DMA_FROM_DEVICE); - page = virt_to_head_page(rx_oob->buf_va); - if (rx_oob->from_pool) - page_pool_put_full_page(rxq->page_pool, page, false); - else - put_page(page); + if (rxq->frag_count == 1) { + dma_unmap_single(dev, rx_oob->sgl[0].address, rx_oob->sgl[0].size, + DMA_FROM_DEVICE); + mana_put_rx_page(rxq, page, rx_oob->from_pool); + } else { + page_pool_free_va(rxq->page_pool, rx_oob->buf_va, true); + } rx_oob->buf_va = NULL; } @@ -2428,11 +2469,22 @@ static int mana_create_page_pool(struct mana_rxq *rxq, struct gdma_context *gc) struct page_pool_params pprm = {}; int ret; - pprm.pool_size = mpc->rx_queue_size; + pprm.pool_size = mpc->rx_queue_size / rxq->frag_count + 1; pprm.nid = gc->numa_node; pprm.napi = &rxq->rx_cq.napi; pprm.netdev = rxq->ndev; pprm.order = get_order(rxq->alloc_size); + pprm.queue_idx = rxq->rxq_idx; + pprm.dev = gc->dev; + + /* Let the page pool do the dma map when page sharing with multiple fragments + * enabled for rx buffers. + */ + if (rxq->frag_count > 1) { + pprm.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV; + pprm.max_len = PAGE_SIZE; + pprm.dma_dir = DMA_FROM_DEVICE; + } rxq->page_pool = page_pool_create(&pprm); @@ -2471,9 +2523,8 @@ static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc, rxq->rxq_idx = rxq_idx; rxq->rxobj = INVALID_MANA_HANDLE; - mana_get_rxbuf_cfg(ndev->mtu, &rxq->datasize, &rxq->alloc_size, - &rxq->headroom); - + mana_get_rxbuf_cfg(apc, ndev->mtu, &rxq->datasize, &rxq->alloc_size, + &rxq->headroom, &rxq->frag_count); /* Create page pool for RX queue */ err = mana_create_page_pool(rxq, gc); if (err) { diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h index e1030a7d2daa..0921485565c0 100644 --- a/include/net/mana/mana.h +++ b/include/net/mana/mana.h @@ -65,6 +65,8 @@ enum TRI_STATE { #define MANA_STATS_RX_COUNT 5 #define MANA_STATS_TX_COUNT 11 +#define MANA_RX_FRAG_ALIGNMENT 64 + struct mana_stats_rx { u64 packets; u64 bytes; @@ -328,6 +330,7 @@ struct mana_rxq { u32 datasize; u32 alloc_size; u32 headroom; + u32 frag_count; mana_handle_t rxobj; @@ -510,6 +513,7 @@ struct mana_port_context { u32 rxbpre_datasize; u32 rxbpre_alloc_size; u32 rxbpre_headroom; + u32 rxbpre_frag_count; struct bpf_prog *bpf_prog; -- 2.43.0