From: Keita Morisaki Fix a race condition between ice_free_tx_tstamp_ring() and ice_tx_map() that can cause a NULL pointer dereference. ice_free_tx_tstamp_ring currently clears the ICE_TX_FLAGS_TXTIME flag after NULLing the tstamp_ring. This could allow a concurrent ice_tx_map call on another CPU to dereference the tstamp_ring, which could lead to a NULL pointer dereference. CPU A:ice_free_tx_tstamp_ring() | CPU B:ice_tx_map() --------------------------------|--------------------------------- tx_ring->tstamp_ring = NULL | | ice_is_txtime_cfg() -> true | tstamp_ring = tx_ring->tstamp_ring | tstamp_ring->count // NULL deref! flags &= ~ICE_TX_FLAGS_TXTIME | Fix by: 1. Reordering ice_free_tx_tstamp_ring() to clear the flag before NULLing the pointer, with smp_wmb() to ensure proper ordering. 2. Adding smp_rmb() in ice_tx_map() after the flag check to order the flag read before the pointer read, using READ_ONCE() for the pointer, and adding a NULL check as a safety net. 3. Converting tx_ring->flags from u8 to DECLARE_BITMAP() and using atomic bitops (set_bit(), clear_bit(), test_bit()) for all flag operations throughout the driver: - ICE_TX_RING_FLAGS_XDP - ICE_TX_RING_FLAGS_VLAN_L2TAG1 - ICE_TX_RING_FLAGS_VLAN_L2TAG2 - ICE_TX_RING_FLAGS_TXTIME Fixes: ccde82e909467 ("ice: add E830 Earliest TxTime First Offload support") Signed-off-by: Keita Morisaki Reviewed-by: Aleksandr Loktionov Tested-by: Rinitha S Signed-off-by: Jacob Keller --- drivers/net/ethernet/intel/ice/ice.h | 4 ++-- drivers/net/ethernet/intel/ice/ice_txrx.h | 16 ++++++++++------ drivers/net/ethernet/intel/ice/ice_dcb_lib.c | 2 +- drivers/net/ethernet/intel/ice/ice_lib.c | 4 ++-- drivers/net/ethernet/intel/ice/ice_txrx.c | 23 ++++++++++++++++------- 5 files changed, 31 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index eb3a48330cc1..725b130dd3a2 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -753,7 +753,7 @@ static inline bool ice_is_xdp_ena_vsi(struct ice_vsi *vsi) static inline void ice_set_ring_xdp(struct ice_tx_ring *ring) { - ring->flags |= ICE_TX_FLAGS_RING_XDP; + set_bit(ICE_TX_RING_FLAGS_XDP, ring->flags); } /** @@ -778,7 +778,7 @@ static inline bool ice_is_txtime_ena(const struct ice_tx_ring *ring) */ static inline bool ice_is_txtime_cfg(const struct ice_tx_ring *ring) { - return !!(ring->flags & ICE_TX_FLAGS_TXTIME); + return test_bit(ICE_TX_RING_FLAGS_TXTIME, ring->flags); } /** diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index b6547e1b7c42..5e517f219379 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -212,6 +212,14 @@ enum ice_rx_dtype { ICE_RX_DTYPE_SPLIT_ALWAYS = 2, }; +enum ice_tx_ring_flags { + ICE_TX_RING_FLAGS_XDP, + ICE_TX_RING_FLAGS_VLAN_L2TAG1, + ICE_TX_RING_FLAGS_VLAN_L2TAG2, + ICE_TX_RING_FLAGS_TXTIME, + ICE_TX_RING_FLAGS_NBITS, +}; + struct ice_pkt_ctx { u64 cached_phctime; __be16 vlan_proto; @@ -352,11 +360,7 @@ struct ice_tx_ring { u16 count; /* Number of descriptors */ u16 q_index; /* Queue number of ring */ - u8 flags; -#define ICE_TX_FLAGS_RING_XDP BIT(0) -#define ICE_TX_FLAGS_RING_VLAN_L2TAG1 BIT(1) -#define ICE_TX_FLAGS_RING_VLAN_L2TAG2 BIT(2) -#define ICE_TX_FLAGS_TXTIME BIT(3) + DECLARE_BITMAP(flags, ICE_TX_RING_FLAGS_NBITS); struct xsk_buff_pool *xsk_pool; @@ -398,7 +402,7 @@ static inline bool ice_ring_ch_enabled(struct ice_tx_ring *ring) static inline bool ice_ring_is_xdp(struct ice_tx_ring *ring) { - return !!(ring->flags & ICE_TX_FLAGS_RING_XDP); + return test_bit(ICE_TX_RING_FLAGS_XDP, ring->flags); } enum ice_container_type { diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c index bd77f1c001ee..16aa25535152 100644 --- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c @@ -943,7 +943,7 @@ ice_tx_prepare_vlan_flags_dcb(struct ice_tx_ring *tx_ring, /* if this is not already set it means a VLAN 0 + priority needs * to be offloaded */ - if (tx_ring->flags & ICE_TX_FLAGS_RING_VLAN_L2TAG2) + if (test_bit(ICE_TX_RING_FLAGS_VLAN_L2TAG2, tx_ring->flags)) first->tx_flags |= ICE_TX_FLAGS_HW_OUTER_SINGLE_VLAN; else first->tx_flags |= ICE_TX_FLAGS_HW_VLAN; diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 689c6025ea82..837b71b7b2b7 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -1412,9 +1412,9 @@ static int ice_vsi_alloc_rings(struct ice_vsi *vsi) ring->count = vsi->num_tx_desc; ring->txq_teid = ICE_INVAL_TEID; if (dvm_ena) - ring->flags |= ICE_TX_FLAGS_RING_VLAN_L2TAG2; + set_bit(ICE_TX_RING_FLAGS_VLAN_L2TAG2, ring->flags); else - ring->flags |= ICE_TX_FLAGS_RING_VLAN_L2TAG1; + set_bit(ICE_TX_RING_FLAGS_VLAN_L2TAG1, ring->flags); WRITE_ONCE(vsi->tx_rings[i], ring); } diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 7be9c062949b..4ca1a0602307 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -190,9 +190,10 @@ void ice_free_tstamp_ring(struct ice_tx_ring *tx_ring) void ice_free_tx_tstamp_ring(struct ice_tx_ring *tx_ring) { ice_free_tstamp_ring(tx_ring); + clear_bit(ICE_TX_RING_FLAGS_TXTIME, tx_ring->flags); + smp_wmb(); /* order flag clear before pointer NULL */ kfree_rcu(tx_ring->tstamp_ring, rcu); - tx_ring->tstamp_ring = NULL; - tx_ring->flags &= ~ICE_TX_FLAGS_TXTIME; + WRITE_ONCE(tx_ring->tstamp_ring, NULL); } /** @@ -405,7 +406,7 @@ static int ice_alloc_tstamp_ring(struct ice_tx_ring *tx_ring) tx_ring->tstamp_ring = tstamp_ring; tstamp_ring->desc = NULL; tstamp_ring->count = ice_calc_ts_ring_count(tx_ring); - tx_ring->flags |= ICE_TX_FLAGS_TXTIME; + set_bit(ICE_TX_RING_FLAGS_TXTIME, tx_ring->flags); return 0; } @@ -1521,13 +1522,20 @@ ice_tx_map(struct ice_tx_ring *tx_ring, struct ice_tx_buf *first, return; if (ice_is_txtime_cfg(tx_ring)) { - struct ice_tstamp_ring *tstamp_ring = tx_ring->tstamp_ring; - u32 tstamp_count = tstamp_ring->count; - u32 j = tstamp_ring->next_to_use; + struct ice_tstamp_ring *tstamp_ring; + u32 tstamp_count, j; struct ice_ts_desc *ts_desc; struct timespec64 ts; u32 tstamp; + smp_rmb(); /* order flag read before pointer read */ + tstamp_ring = READ_ONCE(tx_ring->tstamp_ring); + if (unlikely(!tstamp_ring)) + goto ring_kick; + + tstamp_count = tstamp_ring->count; + j = tstamp_ring->next_to_use; + ts = ktime_to_timespec64(first->skb->tstamp); tstamp = ts.tv_nsec >> ICE_TXTIME_CTX_RESOLUTION_128NS; @@ -1555,6 +1563,7 @@ ice_tx_map(struct ice_tx_ring *tx_ring, struct ice_tx_buf *first, tstamp_ring->next_to_use = j; writel_relaxed(j, tstamp_ring->tail); } else { +ring_kick: writel_relaxed(i, tx_ring->tail); } return; @@ -1814,7 +1823,7 @@ ice_tx_prepare_vlan_flags(struct ice_tx_ring *tx_ring, struct ice_tx_buf *first) */ if (skb_vlan_tag_present(skb)) { first->vid = skb_vlan_tag_get(skb); - if (tx_ring->flags & ICE_TX_FLAGS_RING_VLAN_L2TAG2) + if (test_bit(ICE_TX_RING_FLAGS_VLAN_L2TAG2, tx_ring->flags)) first->tx_flags |= ICE_TX_FLAGS_HW_OUTER_SINGLE_VLAN; else first->tx_flags |= ICE_TX_FLAGS_HW_VLAN; -- 2.53.0.1066.g1eceb487f285