On some ARM64 platforms with 4K PAGE_SIZE, page_pool fragment allocation in the RX refill path can cause 15-20% throughput regression under high connection counts (>16 TCP streams). Add an ethtool private flag "full-page-rx" that allows the user to force one RX buffer per page, bypassing the page_pool fragment path. This restores line-rate (180+ Gbps) performance on affected platforms. Usage: ethtool --set-priv-flags eth0 full-page-rx on There is no behavioral change by default. The flag must be explicitly enabled by the user or udev rule. The existing single-buffer-per-page logic for XDP and jumbo frames is consolidated into a new helper mana_use_single_rxbuf_per_page() which is now the single decision point for both the automatic and user-controlled paths. Signed-off-by: Dipayaan Roy --- drivers/net/ethernet/microsoft/mana/mana_en.c | 22 ++++- .../ethernet/microsoft/mana/mana_ethtool.c | 89 +++++++++++++++++++ include/net/mana/mana.h | 8 ++ 3 files changed, 117 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index 49c65cc1697c..59a1626c2be1 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -744,6 +744,25 @@ static void *mana_get_rxbuf_pre(struct mana_rxq *rxq, dma_addr_t *da) return va; } +static bool +mana_use_single_rxbuf_per_page(struct mana_port_context *apc, u32 mtu) +{ + /* On some platforms with 4K PAGE_SIZE, page_pool fragment allocation + * in the RX refill path (~2kB buffer) can cause significant throughput + * regression under high connection counts. Allow user to force one RX + * buffer per page via ethtool private flag to bypass the fragment + * path. + */ + if (apc->priv_flags & BIT(MANA_PRIV_FLAG_USE_FULL_PAGE_RXBUF)) + return true; + + /* For xdp and jumbo frames make sure only one packet fits per page. */ + if (mtu + MANA_RXBUF_PAD > PAGE_SIZE / 2 || mana_xdp_get(apc)) + return true; + + return false; +} + /* Get RX buffer's data size, alloc size, XDP headroom based on MTU */ static void mana_get_rxbuf_cfg(struct mana_port_context *apc, int mtu, u32 *datasize, u32 *alloc_size, @@ -754,8 +773,7 @@ static void mana_get_rxbuf_cfg(struct mana_port_context *apc, /* Calculate datasize first (consistent across all cases) */ *datasize = mtu + ETH_HLEN; - /* For xdp and jumbo frames make sure only one packet fits per page */ - if (mtu + MANA_RXBUF_PAD > PAGE_SIZE / 2 || mana_xdp_get(apc)) { + if (mana_use_single_rxbuf_per_page(apc, mtu)) { if (mana_xdp_get(apc)) { *headroom = XDP_PACKET_HEADROOM; *alloc_size = PAGE_SIZE; diff --git a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c index a28ca461c135..0547c903f613 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c +++ b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c @@ -133,6 +133,10 @@ static const struct mana_stats_desc mana_phy_stats[] = { { "hc_tc7_tx_pause_phy", offsetof(struct mana_ethtool_phy_stats, tx_pause_tc7_phy) }, }; +static const char mana_priv_flags[MANA_PRIV_FLAG_MAX][ETH_GSTRING_LEN] = { + [MANA_PRIV_FLAG_USE_FULL_PAGE_RXBUF] = "full-page-rx" +}; + static int mana_get_sset_count(struct net_device *ndev, int stringset) { struct mana_port_context *apc = netdev_priv(ndev); @@ -144,6 +148,10 @@ static int mana_get_sset_count(struct net_device *ndev, int stringset) ARRAY_SIZE(mana_phy_stats) + ARRAY_SIZE(mana_hc_stats) + num_queues * (MANA_STATS_RX_COUNT + MANA_STATS_TX_COUNT); + + case ETH_SS_PRIV_FLAGS: + return MANA_PRIV_FLAG_MAX; + default: return -EINVAL; } @@ -192,6 +200,14 @@ static void mana_get_strings_stats(struct mana_port_context *apc, u8 **data) } } +static void mana_get_strings_priv_flags(u8 **data) +{ + int i; + + for (i = 0; i < MANA_PRIV_FLAG_MAX; i++) + ethtool_puts(data, mana_priv_flags[i]); +} + static void mana_get_strings(struct net_device *ndev, u32 stringset, u8 *data) { struct mana_port_context *apc = netdev_priv(ndev); @@ -200,6 +216,9 @@ static void mana_get_strings(struct net_device *ndev, u32 stringset, u8 *data) case ETH_SS_STATS: mana_get_strings_stats(apc, &data); break; + case ETH_SS_PRIV_FLAGS: + mana_get_strings_priv_flags(&data); + break; default: break; } @@ -590,6 +609,74 @@ static int mana_get_link_ksettings(struct net_device *ndev, return 0; } +static u32 mana_get_priv_flags(struct net_device *ndev) +{ + struct mana_port_context *apc = netdev_priv(ndev); + + return apc->priv_flags; +} + +static int mana_set_priv_flags(struct net_device *ndev, u32 priv_flags) +{ + struct mana_port_context *apc = netdev_priv(ndev); + u32 changed = apc->priv_flags ^ priv_flags; + u32 old_priv_flags = apc->priv_flags; + bool schedule_port_reset = false; + int err = 0; + + if (!changed) + return 0; + + /* Reject unknown bits */ + if (priv_flags & ~GENMASK(MANA_PRIV_FLAG_MAX - 1, 0)) + return -EINVAL; + + if (changed & BIT(MANA_PRIV_FLAG_USE_FULL_PAGE_RXBUF)) { + apc->priv_flags = priv_flags; + + if (!apc->port_is_up) { + /* Port is down, flag updated to apply on next up + * so just return. + */ + return 0; + } + + /* Pre-allocate buffers to prevent failure in mana_attach + * later + */ + err = mana_pre_alloc_rxbufs(apc, ndev->mtu, apc->num_queues); + if (err) { + netdev_err(ndev, + "Insufficient memory for new allocations\n"); + apc->priv_flags = old_priv_flags; + return err; + } + + err = mana_detach(ndev, false); + if (err) { + netdev_err(ndev, "mana_detach failed: %d\n", err); + apc->priv_flags = old_priv_flags; + goto out; + } + + err = mana_attach(ndev); + if (err) { + netdev_err(ndev, "mana_attach failed: %d\n", err); + apc->priv_flags = old_priv_flags; + schedule_port_reset = true; + } + } + +out: + mana_pre_dealloc_rxbufs(apc); + + if (err && schedule_port_reset) + queue_work(apc->ac->per_port_queue_reset_wq, + &apc->queue_reset_work); + + return err; +} + const struct ethtool_ops mana_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_RX_CQE_FRAMES, .get_ethtool_stats = mana_get_ethtool_stats, @@ -608,4 +695,6 @@ const struct ethtool_ops mana_ethtool_ops = { .set_ringparam = mana_set_ringparam, .get_link_ksettings = mana_get_link_ksettings, .get_link = ethtool_op_get_link, + .get_priv_flags = mana_get_priv_flags, + .set_priv_flags = mana_set_priv_flags, }; diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h index 3336688fed5e..fd87e3d6c1f4 100644 --- a/include/net/mana/mana.h +++ b/include/net/mana/mana.h @@ -30,6 +30,12 @@ enum TRI_STATE { TRI_STATE_TRUE = 1 }; +/* MANA ethtool private flag bit positions */ +enum mana_priv_flag_bits { + MANA_PRIV_FLAG_USE_FULL_PAGE_RXBUF = 0, + MANA_PRIV_FLAG_MAX, +}; + /* Number of entries for hardware indirection table must be in power of 2 */ #define MANA_INDIRECT_TABLE_MAX_SIZE 512 #define MANA_INDIRECT_TABLE_DEF_SIZE 64 @@ -531,6 +537,8 @@ struct mana_port_context { u32 rxbpre_headroom; u32 rxbpre_frag_count; + u32 priv_flags; + struct bpf_prog *bpf_prog; /* Create num_queues EQs, SQs, SQ-CQs, RQs and RQ-CQs, respectively. */ -- 2.43.0