When queue allocation fails partway through, the error cleanup frees and NULLs apc->tx_qp and apc->rxqs. Multiple teardown paths such as mana_remove(), mana_change_mtu() recovery, and internal error handling in mana_alloc_queues() can subsequently call into functions that dereference these pointers without NULL checks: - mana_chn_setxdp() dereferences apc->rxqs[0], causing a NULL pointer dereference panic (CR2: 0000000000000000 at mana_chn_setxdp+0x26). - mana_destroy_vport() iterates apc->rxqs without a NULL check. - mana_fence_rqs() iterates apc->rxqs without a NULL check. - mana_dealloc_queues() iterates apc->tx_qp without a NULL check. Add NULL guards for apc->rxqs in mana_fence_rqs(), mana_destroy_vport(), and before the mana_chn_setxdp() call. Add a NULL guard for apc->tx_qp in mana_dealloc_queues() to skip TX queue draining when TX queues were never allocated or already freed. Fixes: ca9c54d2d6a5 ("net: mana: Add a driver for Microsoft Azure Network Adapter (MANA)") Reviewed-by: Haiyang Zhang Signed-off-by: Dipayaan Roy --- drivers/net/ethernet/microsoft/mana/mana_en.c | 70 +++++++++++-------- 1 file changed, 41 insertions(+), 29 deletions(-) diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index 9afc786b297a..0582803907a8 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -1727,6 +1727,9 @@ static void mana_fence_rqs(struct mana_port_context *apc) struct mana_rxq *rxq; int err; + if (!apc->rxqs) + return; + for (rxq_idx = 0; rxq_idx < apc->num_queues; rxq_idx++) { rxq = apc->rxqs[rxq_idx]; err = mana_fence_rq(apc, rxq); @@ -2858,13 +2861,16 @@ static void mana_destroy_vport(struct mana_port_context *apc) struct mana_rxq *rxq; u32 rxq_idx; - for (rxq_idx = 0; rxq_idx < apc->num_queues; rxq_idx++) { - rxq = apc->rxqs[rxq_idx]; - if (!rxq) - continue; + if (apc->rxqs) { - mana_destroy_rxq(apc, rxq, true); - apc->rxqs[rxq_idx] = NULL; + for (rxq_idx = 0; rxq_idx < apc->num_queues; rxq_idx++) { + rxq = apc->rxqs[rxq_idx]; + if (!rxq) + continue; + + mana_destroy_rxq(apc, rxq, true); + apc->rxqs[rxq_idx] = NULL; + } } mana_destroy_txq(apc); @@ -3269,7 +3275,8 @@ static int mana_dealloc_queues(struct net_device *ndev) if (apc->port_is_up) return -EINVAL; - mana_chn_setxdp(apc, NULL); + if (apc->rxqs) + mana_chn_setxdp(apc, NULL); if (gd->gdma_context->is_pf && !apc->ac->bm_hostmode) mana_pf_deregister_filter(apc); @@ -3287,33 +3294,38 @@ static int mana_dealloc_queues(struct net_device *ndev) * number of queues. */ - for (i = 0; i < apc->num_queues; i++) { - txq = &apc->tx_qp[i].txq; - tsleep = 1000; - while (atomic_read(&txq->pending_sends) > 0 && - time_before(jiffies, timeout)) { - usleep_range(tsleep, tsleep + 1000); - tsleep <<= 1; - } - if (atomic_read(&txq->pending_sends)) { - err = pcie_flr(to_pci_dev(gd->gdma_context->dev)); - if (err) { - netdev_err(ndev, "flr failed %d with %d pkts pending in txq %u\n", - err, atomic_read(&txq->pending_sends), - txq->gdma_txq_id); + if (apc->tx_qp) { + for (i = 0; i < apc->num_queues; i++) { + txq = &apc->tx_qp[i].txq; + tsleep = 1000; + while (atomic_read(&txq->pending_sends) > 0 && + time_before(jiffies, timeout)) { + usleep_range(tsleep, tsleep + 1000); + tsleep <<= 1; + } + if (atomic_read(&txq->pending_sends)) { + err = + pcie_flr(to_pci_dev(gd->gdma_context->dev)); + if (err) { + netdev_err(ndev, "flr failed %d with %d pkts pending in txq %u\n", + err, + atomic_read(&txq->pending_sends), + txq->gdma_txq_id); + } + break; } - break; } - } - for (i = 0; i < apc->num_queues; i++) { - txq = &apc->tx_qp[i].txq; - while ((skb = skb_dequeue(&txq->pending_skbs))) { - mana_unmap_skb(skb, apc); - dev_kfree_skb_any(skb); + for (i = 0; i < apc->num_queues; i++) { + txq = &apc->tx_qp[i].txq; + while ((skb = skb_dequeue(&txq->pending_skbs))) { + mana_unmap_skb(skb, apc); + dev_kfree_skb_any(skb); + } + atomic_set(&txq->pending_sends, 0); } - atomic_set(&txq->pending_sends, 0); } + /* We're 100% sure the queues can no longer be woken up, because * we're sure now mana_poll_tx_cq() can't be running. */ -- 2.43.0