From: Alexander Lobakin Place the fields in ice_{rx,tx}_ring used in the same pieces of hotpath code closer to each other and use __cacheline_group_{begin,end}_aligned() to isolate the read mostly, read-write, and cold groups into separate cachelines similarly to idpf. Suggested-by: Jacob Keller Reviewed-by: Aleksandr Loktionov Reviewed-by: Jacob Keller Signed-off-by: Alexander Lobakin Reviewed-by: Paul Menzel Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_ethtool.c | 1 - drivers/net/ethernet/intel/ice/ice_txrx.c | 1 - drivers/net/ethernet/intel/ice/ice_txrx.h | 122 ++++++++++-------- drivers/net/ethernet/intel/ice/ice_txrx_lib.c | 3 - 4 files changed, 70 insertions(+), 57 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index 2c007669c197..c6bc29cfb8e6 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -3388,7 +3388,6 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring, */ rx_rings[i].next_to_use = 0; rx_rings[i].next_to_clean = 0; - rx_rings[i].next_to_alloc = 0; *vsi->rx_rings[i] = rx_rings[i]; } kfree(rx_rings); diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index eea83b26b094..396326a6d5be 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -574,7 +574,6 @@ void ice_clean_rx_ring(struct ice_rx_ring *rx_ring) PAGE_SIZE); memset(rx_ring->desc, 0, size); - rx_ring->next_to_alloc = 0; rx_ring->next_to_clean = 0; rx_ring->next_to_use = 0; } diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index c51b1e60f717..b6547e1b7c42 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -267,34 +267,49 @@ struct ice_tstamp_ring { } ____cacheline_internodealigned_in_smp; struct ice_rx_ring { - /* CL1 - 1st cacheline starts here */ + __cacheline_group_begin_aligned(read_mostly); void *desc; /* Descriptor ring memory */ struct page_pool *pp; struct net_device *netdev; /* netdev ring maps to */ - struct ice_vsi *vsi; /* Backreference to associated VSI */ struct ice_q_vector *q_vector; /* Backreference to associated vector */ u8 __iomem *tail; - u16 q_index; /* Queue number of ring */ - - u16 count; /* Number of descriptors */ - u16 reg_idx; /* HW register index of the ring */ - u16 next_to_alloc; union { struct libeth_fqe *rx_fqes; struct xdp_buff **xdp_buf; }; - /* CL2 - 2nd cacheline starts here */ - struct libeth_fqe *hdr_fqes; + u16 count; /* Number of descriptors */ + u8 ptp_rx; + + u8 flags; +#define ICE_RX_FLAGS_CRC_STRIP_DIS BIT(2) +#define ICE_RX_FLAGS_MULTIDEV BIT(3) +#define ICE_RX_FLAGS_RING_GCS BIT(4) + + u32 truesize; + struct page_pool *hdr_pp; + struct libeth_fqe *hdr_fqes; + + struct bpf_prog *xdp_prog; + struct ice_tx_ring *xdp_ring; + struct xsk_buff_pool *xsk_pool; + + /* stats structs */ + struct ice_ring_stats *ring_stats; + struct ice_rx_ring *next; /* pointer to next ring in q_vector */ + u32 hdr_truesize; + + struct xdp_rxq_info xdp_rxq; + __cacheline_group_end_aligned(read_mostly); + + __cacheline_group_begin_aligned(read_write); union { struct libeth_xdp_buff_stash xdp; struct libeth_xdp_buff *xsk; }; - - /* CL3 - 3rd cacheline starts here */ union { struct ice_pkt_ctx pkt_ctx; struct { @@ -302,75 +317,78 @@ struct ice_rx_ring { __be16 vlan_proto; }; }; - struct bpf_prog *xdp_prog; /* used in interrupt processing */ u16 next_to_use; u16 next_to_clean; + __cacheline_group_end_aligned(read_write); - u32 hdr_truesize; - u32 truesize; - - /* stats structs */ - struct ice_ring_stats *ring_stats; - + __cacheline_group_begin_aligned(cold); struct rcu_head rcu; /* to avoid race on free */ - /* CL4 - 4th cacheline starts here */ + struct ice_vsi *vsi; /* Backreference to associated VSI */ struct ice_channel *ch; - struct ice_tx_ring *xdp_ring; - struct ice_rx_ring *next; /* pointer to next ring in q_vector */ - struct xsk_buff_pool *xsk_pool; - u16 rx_hdr_len; - u16 rx_buf_len; + dma_addr_t dma; /* physical address of ring */ + u16 q_index; /* Queue number of ring */ + u16 reg_idx; /* HW register index of the ring */ u8 dcb_tc; /* Traffic class of ring */ - u8 ptp_rx; -#define ICE_RX_FLAGS_CRC_STRIP_DIS BIT(2) -#define ICE_RX_FLAGS_MULTIDEV BIT(3) -#define ICE_RX_FLAGS_RING_GCS BIT(4) - u8 flags; - /* CL5 - 5th cacheline starts here */ - struct xdp_rxq_info xdp_rxq; + + u16 rx_hdr_len; + u16 rx_buf_len; + __cacheline_group_end_aligned(cold); } ____cacheline_internodealigned_in_smp; struct ice_tx_ring { - /* CL1 - 1st cacheline starts here */ - struct ice_tx_ring *next; /* pointer to next ring in q_vector */ + __cacheline_group_begin_aligned(read_mostly); void *desc; /* Descriptor ring memory */ struct device *dev; /* Used for DMA mapping */ u8 __iomem *tail; struct ice_tx_buf *tx_buf; + struct ice_q_vector *q_vector; /* Backreference to associated vector */ struct net_device *netdev; /* netdev ring maps to */ struct ice_vsi *vsi; /* Backreference to associated VSI */ - /* CL2 - 2nd cacheline starts here */ - dma_addr_t dma; /* physical address of ring */ - struct xsk_buff_pool *xsk_pool; - u16 next_to_use; - u16 next_to_clean; - u16 q_handle; /* Queue handle per TC */ - u16 reg_idx; /* HW register index of the ring */ + u16 count; /* Number of descriptors */ u16 q_index; /* Queue number of ring */ - u16 xdp_tx_active; + + u8 flags; +#define ICE_TX_FLAGS_RING_XDP BIT(0) +#define ICE_TX_FLAGS_RING_VLAN_L2TAG1 BIT(1) +#define ICE_TX_FLAGS_RING_VLAN_L2TAG2 BIT(2) +#define ICE_TX_FLAGS_TXTIME BIT(3) + + struct xsk_buff_pool *xsk_pool; + /* stats structs */ struct ice_ring_stats *ring_stats; - /* CL3 - 3rd cacheline starts here */ + struct ice_tx_ring *next; /* pointer to next ring in q_vector */ + + struct ice_tstamp_ring *tstamp_ring; + struct ice_ptp_tx *tx_tstamps; + __cacheline_group_end_aligned(read_mostly); + + __cacheline_group_begin_aligned(read_write); + u16 next_to_use; + u16 next_to_clean; + + u16 xdp_tx_active; + spinlock_t tx_lock; + __cacheline_group_end_aligned(read_write); + + __cacheline_group_begin_aligned(cold); struct rcu_head rcu; /* to avoid race on free */ DECLARE_BITMAP(xps_state, ICE_TX_NBITS); /* XPS Config State */ struct ice_channel *ch; - struct ice_ptp_tx *tx_tstamps; - spinlock_t tx_lock; - u32 txq_teid; /* Added Tx queue TEID */ - /* CL4 - 4th cacheline starts here */ - struct ice_tstamp_ring *tstamp_ring; -#define ICE_TX_FLAGS_RING_XDP BIT(0) -#define ICE_TX_FLAGS_RING_VLAN_L2TAG1 BIT(1) -#define ICE_TX_FLAGS_RING_VLAN_L2TAG2 BIT(2) -#define ICE_TX_FLAGS_TXTIME BIT(3) - u8 flags; + + dma_addr_t dma; /* physical address of ring */ + u16 q_handle; /* Queue handle per TC */ + u16 reg_idx; /* HW register index of the ring */ u8 dcb_tc; /* Traffic class of ring */ + u16 quanta_prof_id; + u32 txq_teid; /* Added Tx queue TEID */ + __cacheline_group_end_aligned(cold); } ____cacheline_internodealigned_in_smp; static inline bool ice_ring_ch_enabled(struct ice_tx_ring *ring) diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c index e68f3e5d35b4..e695a664e53d 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c @@ -20,9 +20,6 @@ void ice_release_rx_desc(struct ice_rx_ring *rx_ring, u16 val) rx_ring->next_to_use = val; - /* update next to alloc since we have filled the ring */ - rx_ring->next_to_alloc = val; - /* QRX_TAIL will be updated with any tail value, but hardware ignores * the lower 3 bits. This makes it so we only bump tail on meaningful * boundaries. Also, this allows us to bump tail on intervals of 8 up to -- 2.47.1