The MANA hardware supports a maximum of 30 scatter-gather entries (SGEs) per TX WQE. Exceeding this limit can cause TX failures. Add ndo_features_check() callback to validate SKB layout before transmission. For GSO SKBs that would exceed the hardware SGE limit, clear NETIF_F_GSO_MASK to enforce software segmentation in the stack. Add a fallback in mana_start_xmit() to linearize non-GSO SKBs that still exceed the SGE limit. Return NETDEV_TX_BUSY only for -ENOSPC from mana_gd_post_work_request(), send other errors to free_sgl_ptr to free resources and record the tx drop. Co-developed-by: Dipayaan Roy Signed-off-by: Dipayaan Roy Signed-off-by: Aditya Garg --- drivers/net/ethernet/microsoft/mana/mana_en.c | 48 +++++++++++++++++-- include/net/mana/gdma.h | 6 ++- include/net/mana/mana.h | 1 + 3 files changed, 49 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index 0142fd98392c..1f95b644eba1 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -289,6 +290,21 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev) cq = &apc->tx_qp[txq_idx].tx_cq; tx_stats = &txq->stats; + if (MAX_SKB_FRAGS + 2 > MAX_TX_WQE_SGL_ENTRIES && + skb_shinfo(skb)->nr_frags + 2 > MAX_TX_WQE_SGL_ENTRIES) { + /* GSO skb with Hardware SGE limit exceeded is not expected here + * as they are handled in mana_features_check() callback + */ + if (skb_is_gso(skb)) + netdev_warn_once(ndev, "GSO enabled skb exceeds max SGE limit\n"); + if (skb_linearize(skb)) { + netdev_warn_once(ndev, "Failed to linearize skb with nr_frags=%d and is_gso=%d\n", + skb_shinfo(skb)->nr_frags, + skb_is_gso(skb)); + goto tx_drop_count; + } + } + pkg.tx_oob.s_oob.vcq_num = cq->gdma_id; pkg.tx_oob.s_oob.vsq_frame = txq->vsq_frame; @@ -402,8 +418,6 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev) } } - WARN_ON_ONCE(pkg.wqe_req.num_sge > MAX_TX_WQE_SGL_ENTRIES); - if (pkg.wqe_req.num_sge <= ARRAY_SIZE(pkg.sgl_array)) { pkg.wqe_req.sgl = pkg.sgl_array; } else { @@ -438,9 +452,13 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev) if (err) { (void)skb_dequeue_tail(&txq->pending_skbs); + mana_unmap_skb(skb, apc); netdev_warn(ndev, "Failed to post TX OOB: %d\n", err); - err = NETDEV_TX_BUSY; - goto tx_busy; + if (err == -ENOSPC) { + err = NETDEV_TX_BUSY; + goto tx_busy; + } + goto free_sgl_ptr; } err = NETDEV_TX_OK; @@ -478,6 +496,25 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev) return NETDEV_TX_OK; } +static netdev_features_t mana_features_check(struct sk_buff *skb, + struct net_device *ndev, + netdev_features_t features) +{ + if (MAX_SKB_FRAGS + 2 > MAX_TX_WQE_SGL_ENTRIES && + skb_shinfo(skb)->nr_frags + 2 > MAX_TX_WQE_SGL_ENTRIES) { + /* Exceeds HW SGE limit. + * GSO case: + * Disable GSO so the stack will software-segment the skb + * into smaller skbs that fit the SGE budget. + * Non-GSO case: + * The xmit path will attempt skb_linearize() as a fallback. + */ + if (skb_is_gso(skb)) + features &= ~NETIF_F_GSO_MASK; + } + return features; +} + static void mana_get_stats64(struct net_device *ndev, struct rtnl_link_stats64 *st) { @@ -838,6 +875,7 @@ static const struct net_device_ops mana_devops = { .ndo_open = mana_open, .ndo_stop = mana_close, .ndo_select_queue = mana_select_queue, + .ndo_features_check = mana_features_check, .ndo_start_xmit = mana_start_xmit, .ndo_validate_addr = eth_validate_addr, .ndo_get_stats64 = mana_get_stats64, @@ -1606,7 +1644,7 @@ static int mana_move_wq_tail(struct gdma_queue *wq, u32 num_units) return 0; } -static void mana_unmap_skb(struct sk_buff *skb, struct mana_port_context *apc) +void mana_unmap_skb(struct sk_buff *skb, struct mana_port_context *apc) { struct mana_skb_head *ash = (struct mana_skb_head *)skb->head; struct gdma_context *gc = apc->ac->gdma_dev->gdma_context; diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h index 57df78cfbf82..b35ecc58fbab 100644 --- a/include/net/mana/gdma.h +++ b/include/net/mana/gdma.h @@ -591,6 +591,9 @@ enum { /* Driver can self reset on FPGA Reconfig EQE notification */ #define GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE BIT(17) +/* Driver supports linearizing the skb when num_sge exceeds hardware limit */ +#define GDMA_DRV_CAP_FLAG_1_SKB_LINEARIZE BIT(20) + #define GDMA_DRV_CAP_FLAGS1 \ (GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT | \ GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX | \ @@ -599,7 +602,8 @@ enum { GDMA_DRV_CAP_FLAG_1_DEV_LIST_HOLES_SUP | \ GDMA_DRV_CAP_FLAG_1_DYNAMIC_IRQ_ALLOC_SUPPORT | \ GDMA_DRV_CAP_FLAG_1_SELF_RESET_ON_EQE | \ - GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE) + GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE | \ + GDMA_DRV_CAP_FLAG_1_SKB_LINEARIZE) #define GDMA_DRV_CAP_FLAGS2 0 diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h index 0921485565c0..330e1bb088bb 100644 --- a/include/net/mana/mana.h +++ b/include/net/mana/mana.h @@ -580,6 +580,7 @@ int mana_set_bw_clamp(struct mana_port_context *apc, u32 speed, void mana_query_phy_stats(struct mana_port_context *apc); int mana_pre_alloc_rxbufs(struct mana_port_context *apc, int mtu, int num_queues); void mana_pre_dealloc_rxbufs(struct mana_port_context *apc); +void mana_unmap_skb(struct sk_buff *skb, struct mana_port_context *apc); extern const struct ethtool_ops mana_ethtool_ops; extern struct dentry *mana_debugfs_root; -- 2.43.0