From: Jacob Keller The ice_fetch_u64_stats_per_ring function takes a pointer to the syncp from the ring stats to synchronize reading of the packet stats. It also takes a *copy* of the ice_q_stats fields instead of a pointer to the stats. This completely defeats the point of using the u64_stats API. We pass the stats by value, so they are static at the point of reading within the u64_stats_fetch_retry loop. Simplify the function to take a pointer to the ice_ring_stats instead of two separate parameters. Additionally, since we never call this outside of ice_main.c, make it a static function. Reviewed-by: Aleksandr Loktionov Signed-off-by: Jacob Keller Reviewed-by: Simon Horman Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice.h | 3 --- drivers/net/ethernet/intel/ice/ice_main.c | 24 +++++++++-------------- 2 files changed, 9 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 00f75d87c73f..def7efa15447 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -957,9 +957,6 @@ u16 ice_get_avail_rxq_count(struct ice_pf *pf); int ice_vsi_recfg_qs(struct ice_vsi *vsi, int new_rx, int new_tx, bool locked); void ice_update_vsi_stats(struct ice_vsi *vsi); void ice_update_pf_stats(struct ice_pf *pf); -void -ice_fetch_u64_stats_per_ring(struct u64_stats_sync *syncp, - struct ice_q_stats stats, u64 *pkts, u64 *bytes); int ice_up(struct ice_vsi *vsi); int ice_down(struct ice_vsi *vsi); int ice_down_up(struct ice_vsi *vsi); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index de488185cd4a..6dc22e811ae5 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -6824,25 +6824,23 @@ int ice_up(struct ice_vsi *vsi) /** * ice_fetch_u64_stats_per_ring - get packets and bytes stats per ring - * @syncp: pointer to u64_stats_sync - * @stats: stats that pkts and bytes count will be taken from + * @stats: pointer to ring stats structure * @pkts: packets stats counter * @bytes: bytes stats counter * * This function fetches stats from the ring considering the atomic operations * that needs to be performed to read u64 values in 32 bit machine. */ -void -ice_fetch_u64_stats_per_ring(struct u64_stats_sync *syncp, - struct ice_q_stats stats, u64 *pkts, u64 *bytes) +static void ice_fetch_u64_stats_per_ring(struct ice_ring_stats *stats, + u64 *pkts, u64 *bytes) { unsigned int start; do { - start = u64_stats_fetch_begin(syncp); - *pkts = stats.pkts; - *bytes = stats.bytes; - } while (u64_stats_fetch_retry(syncp, start)); + start = u64_stats_fetch_begin(&stats->syncp); + *pkts = stats->stats.pkts; + *bytes = stats->stats.bytes; + } while (u64_stats_fetch_retry(&stats->syncp, start)); } /** @@ -6866,9 +6864,7 @@ ice_update_vsi_tx_ring_stats(struct ice_vsi *vsi, ring = READ_ONCE(rings[i]); if (!ring || !ring->ring_stats) continue; - ice_fetch_u64_stats_per_ring(&ring->ring_stats->syncp, - ring->ring_stats->stats, &pkts, - &bytes); + ice_fetch_u64_stats_per_ring(ring->ring_stats, &pkts, &bytes); vsi_stats->tx_packets += pkts; vsi_stats->tx_bytes += bytes; vsi->tx_restart += ring->ring_stats->tx_stats.restart_q; @@ -6912,9 +6908,7 @@ static void ice_update_vsi_ring_stats(struct ice_vsi *vsi) struct ice_ring_stats *ring_stats; ring_stats = ring->ring_stats; - ice_fetch_u64_stats_per_ring(&ring_stats->syncp, - ring_stats->stats, &pkts, - &bytes); + ice_fetch_u64_stats_per_ring(ring_stats, &pkts, &bytes); vsi_stats->rx_packets += pkts; vsi_stats->rx_bytes += bytes; vsi->rx_buf_failed += ring_stats->rx_stats.alloc_buf_failed; -- 2.47.1 From: Jacob Keller The ice_qp_reset_stats function resets the stats for all rings on a VSI. It currently behaves differently for Tx and Rx rings. For Rx rings, it only clears the rx_stats which do not include the pkt and byte counts. For Tx rings and XDP rings, it clears only the pkt and byte counts. We could add extra memset calls to cover both the stats and relevant tx/rx stats fields. Instead, lets convert stats into a struct_group which contains both the pkts and bytes fields as well as the Tx or Rx stats, and remove the ice_q_stats structure entirely. The only remaining user of ice_q_stats is the ice_q_stats_len function in ice_ethtool.c, which just counts the number of fields. Replace this with a simple multiplication by 2. I find this to be simpler to reason about than relying on knowing the layout of the ice_q_stats structure. Now that the stats field of the ice_ring_stats covers all of the statistic values, the ice_qp_reset_stats function will properly zero out all of the fields. Reviewed-by: Aleksandr Loktionov Signed-off-by: Jacob Keller Reviewed-by: Simon Horman Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_base.c | 4 ++-- drivers/net/ethernet/intel/ice/ice_ethtool.c | 4 ++-- drivers/net/ethernet/intel/ice/ice_lib.c | 7 ++++--- drivers/net/ethernet/intel/ice/ice_txrx.h | 18 ++++++++---------- 4 files changed, 16 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c index eadb1e3d12b3..afbff8aa9ceb 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.c +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -1414,8 +1414,8 @@ static void ice_qp_reset_stats(struct ice_vsi *vsi, u16 q_idx) if (!vsi_stat) return; - memset(&vsi_stat->rx_ring_stats[q_idx]->rx_stats, 0, - sizeof(vsi_stat->rx_ring_stats[q_idx]->rx_stats)); + memset(&vsi_stat->rx_ring_stats[q_idx]->stats, 0, + sizeof(vsi_stat->rx_ring_stats[q_idx]->stats)); memset(&vsi_stat->tx_ring_stats[q_idx]->stats, 0, sizeof(vsi_stat->tx_ring_stats[q_idx]->stats)); if (vsi->xdp_rings) diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index 3565a5d96c6d..91d234f31c02 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -33,8 +33,8 @@ static int ice_q_stats_len(struct net_device *netdev) { struct ice_netdev_priv *np = netdev_priv(netdev); - return ((np->vsi->alloc_txq + np->vsi->alloc_rxq) * - (sizeof(struct ice_q_stats) / sizeof(u64))); + /* One packets and one bytes count per queue */ + return ((np->vsi->alloc_txq + np->vsi->alloc_rxq) * 2); } #define ICE_PF_STATS_LEN ARRAY_SIZE(ice_gstrings_pf_stats) diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 6dabac51e1f9..ecf4ba5c79c3 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -3440,7 +3440,8 @@ int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc) * * This function assumes that caller has acquired a u64_stats_sync lock. */ -static void ice_update_ring_stats(struct ice_q_stats *stats, u64 pkts, u64 bytes) +static void ice_update_ring_stats(struct ice_ring_stats *stats, + u64 pkts, u64 bytes) { stats->bytes += bytes; stats->pkts += pkts; @@ -3455,7 +3456,7 @@ static void ice_update_ring_stats(struct ice_q_stats *stats, u64 pkts, u64 bytes void ice_update_tx_ring_stats(struct ice_tx_ring *tx_ring, u64 pkts, u64 bytes) { u64_stats_update_begin(&tx_ring->ring_stats->syncp); - ice_update_ring_stats(&tx_ring->ring_stats->stats, pkts, bytes); + ice_update_ring_stats(tx_ring->ring_stats, pkts, bytes); u64_stats_update_end(&tx_ring->ring_stats->syncp); } @@ -3468,7 +3469,7 @@ void ice_update_tx_ring_stats(struct ice_tx_ring *tx_ring, u64 pkts, u64 bytes) void ice_update_rx_ring_stats(struct ice_rx_ring *rx_ring, u64 pkts, u64 bytes) { u64_stats_update_begin(&rx_ring->ring_stats->syncp); - ice_update_ring_stats(&rx_ring->ring_stats->stats, pkts, bytes); + ice_update_ring_stats(rx_ring->ring_stats, pkts, bytes); u64_stats_update_end(&rx_ring->ring_stats->syncp); } diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index e440c55d9e9f..d5ad76b25f16 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -129,11 +129,6 @@ struct ice_tx_offload_params { u8 header_len; }; -struct ice_q_stats { - u64 pkts; - u64 bytes; -}; - struct ice_txq_stats { u64 restart_q; u64 tx_busy; @@ -149,12 +144,15 @@ struct ice_rxq_stats { struct ice_ring_stats { struct rcu_head rcu; /* to avoid race on free */ - struct ice_q_stats stats; struct u64_stats_sync syncp; - union { - struct ice_txq_stats tx_stats; - struct ice_rxq_stats rx_stats; - }; + struct_group(stats, + u64 pkts; + u64 bytes; + union { + struct ice_txq_stats tx_stats; + struct ice_rxq_stats rx_stats; + }; + ); }; enum ice_ring_state_t { -- 2.47.1 From: Jacob Keller The __ice_update_sample and __ice_get_ethtool_stats functions directly accesses the pkts and bytes counters from the ring stats. A following change is going to update the fields to be u64_stats_t type, and will need to be accessed appropriately. This will ensure that the accesses do not cause load/store tearing. Add helper functions similar to the ones used for updating the stats values, and use them. This ensures use of the syncp pointer on 32-bit architectures. Once the fields are updated to u64_stats_t, it will then properly avoid tears on all architectures. Reviewed-by: Aleksandr Loktionov Signed-off-by: Jacob Keller Reviewed-by: Simon Horman Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_ethtool.c | 26 +++++++++----- drivers/net/ethernet/intel/ice/ice_lib.c | 36 ++++++++++++++++++++ drivers/net/ethernet/intel/ice/ice_lib.h | 6 ++++ drivers/net/ethernet/intel/ice/ice_txrx.c | 29 ++++++++-------- 4 files changed, 75 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index 91d234f31c02..2c007669c197 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -1942,25 +1942,35 @@ __ice_get_ethtool_stats(struct net_device *netdev, rcu_read_lock(); ice_for_each_alloc_txq(vsi, j) { + u64 pkts, bytes; + tx_ring = READ_ONCE(vsi->tx_rings[j]); - if (tx_ring && tx_ring->ring_stats) { - data[i++] = tx_ring->ring_stats->stats.pkts; - data[i++] = tx_ring->ring_stats->stats.bytes; - } else { + if (!tx_ring || !tx_ring->ring_stats) { data[i++] = 0; data[i++] = 0; + continue; } + + ice_fetch_tx_ring_stats(tx_ring, &pkts, &bytes); + + data[i++] = pkts; + data[i++] = bytes; } ice_for_each_alloc_rxq(vsi, j) { + u64 pkts, bytes; + rx_ring = READ_ONCE(vsi->rx_rings[j]); - if (rx_ring && rx_ring->ring_stats) { - data[i++] = rx_ring->ring_stats->stats.pkts; - data[i++] = rx_ring->ring_stats->stats.bytes; - } else { + if (!rx_ring || !rx_ring->ring_stats) { data[i++] = 0; data[i++] = 0; + continue; } + + ice_fetch_rx_ring_stats(rx_ring, &pkts, &bytes); + + data[i++] = pkts; + data[i++] = bytes; } rcu_read_unlock(); diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index ecf4ba5c79c3..c0f77d962c30 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -3473,6 +3473,42 @@ void ice_update_rx_ring_stats(struct ice_rx_ring *rx_ring, u64 pkts, u64 bytes) u64_stats_update_end(&rx_ring->ring_stats->syncp); } +/** + * ice_fetch_tx_ring_stats - Fetch Tx ring packet and byte counters + * @ring: ring to update + * @pkts: number of processed packets + * @bytes: number of processed bytes + */ +void ice_fetch_tx_ring_stats(const struct ice_tx_ring *ring, + u64 *pkts, u64 *bytes) +{ + unsigned int start; + + do { + start = u64_stats_fetch_begin(&ring->ring_stats->syncp); + *pkts = ring->ring_stats->pkts; + *bytes = ring->ring_stats->bytes; + } while (u64_stats_fetch_retry(&ring->ring_stats->syncp, start)); +} + +/** + * ice_fetch_rx_ring_stats - Fetch Rx ring packet and byte counters + * @ring: ring to read + * @pkts: number of processed packets + * @bytes: number of processed bytes + */ +void ice_fetch_rx_ring_stats(const struct ice_rx_ring *ring, + u64 *pkts, u64 *bytes) +{ + unsigned int start; + + do { + start = u64_stats_fetch_begin(&ring->ring_stats->syncp); + *pkts = ring->ring_stats->pkts; + *bytes = ring->ring_stats->bytes; + } while (u64_stats_fetch_retry(&ring->ring_stats->syncp, start)); +} + /** * ice_is_dflt_vsi_in_use - check if the default forwarding VSI is being used * @pi: port info of the switch with default VSI diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h index 2cb1eb98b9da..49454d98dcfe 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_lib.h @@ -92,6 +92,12 @@ void ice_update_tx_ring_stats(struct ice_tx_ring *ring, u64 pkts, u64 bytes); void ice_update_rx_ring_stats(struct ice_rx_ring *ring, u64 pkts, u64 bytes); +void ice_fetch_tx_ring_stats(const struct ice_tx_ring *ring, + u64 *pkts, u64 *bytes); + +void ice_fetch_rx_ring_stats(const struct ice_rx_ring *ring, + u64 *pkts, u64 *bytes); + void ice_write_intrl(struct ice_q_vector *q_vector, u8 intrl); void ice_write_itr(struct ice_ring_container *rc, u16 itr); void ice_set_q_vector_intrl(struct ice_q_vector *q_vector); diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index ad76768a4232..f4196347b23a 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -1087,35 +1087,36 @@ static void __ice_update_sample(struct ice_q_vector *q_vector, struct dim_sample *sample, bool is_tx) { - u64 packets = 0, bytes = 0; + u64 total_packets = 0, total_bytes = 0, pkts, bytes; if (is_tx) { struct ice_tx_ring *tx_ring; ice_for_each_tx_ring(tx_ring, *rc) { - struct ice_ring_stats *ring_stats; - - ring_stats = tx_ring->ring_stats; - if (!ring_stats) + if (!tx_ring->ring_stats) continue; - packets += ring_stats->stats.pkts; - bytes += ring_stats->stats.bytes; + + ice_fetch_tx_ring_stats(tx_ring, &pkts, &bytes); + + total_packets += pkts; + total_bytes += bytes; } } else { struct ice_rx_ring *rx_ring; ice_for_each_rx_ring(rx_ring, *rc) { - struct ice_ring_stats *ring_stats; - - ring_stats = rx_ring->ring_stats; - if (!ring_stats) + if (!rx_ring->ring_stats) continue; - packets += ring_stats->stats.pkts; - bytes += ring_stats->stats.bytes; + + ice_fetch_rx_ring_stats(rx_ring, &pkts, &bytes); + + total_packets += pkts; + total_bytes += bytes; } } - dim_update_sample(q_vector->total_events, packets, bytes, sample); + dim_update_sample(q_vector->total_events, + total_packets, total_bytes, sample); sample->comp_ctr = 0; /* if dim settings get stale, like when not updated for 1 -- 2.47.1 From: Jacob Keller The ice Tx/Rx hotpath has a few statistics counters for tracking unexpected events. These values are stored as u64 but are not accumulated using the u64_stats API. This could result in load/tear stores on some architectures. Even some 64-bit architectures could have issues since the fields are not read or written using ACCESS_ONCE or READ_ONCE. A following change is going to refactor the stats accumulator code to use the u64_stats API for all of these stats, and to use u64_stats_read and u64_stats_inc properly to prevent load/store tears on all architectures. Using u64_stats_inc and the syncp pointer is slightly verbose and would be duplicated in a number of places in the Tx and Rx hot path. Add accessor macros for the cases where only a single stat value is touched at once. To keep lines short, also shorten the stats names and convert ice_txq_stats and ice_rxq_stats to struct_group. This will ease the transition to properly using the u64_stats API in the following change. Reviewed-by: Aleksandr Loktionov Signed-off-by: Jacob Keller Reviewed-by: Simon Horman Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_main.c | 16 +++--- drivers/net/ethernet/intel/ice/ice_txrx.c | 16 +++--- drivers/net/ethernet/intel/ice/ice_txrx.h | 55 ++++++++++++++----- drivers/net/ethernet/intel/ice/ice_txrx_lib.c | 2 +- drivers/net/ethernet/intel/ice/ice_txrx_lib.h | 2 +- drivers/net/ethernet/intel/ice/ice_xsk.c | 4 +- 6 files changed, 60 insertions(+), 35 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 6dc22e811ae5..ced6ad86c8b2 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -159,8 +159,8 @@ static void ice_check_for_hang_subtask(struct ice_pf *pf) * prev_pkt would be negative if there was no * pending work. */ - packets = ring_stats->stats.pkts & INT_MAX; - if (ring_stats->tx_stats.prev_pkt == packets) { + packets = ice_stats_read(ring_stats, pkts) & INT_MAX; + if (ring_stats->tx.prev_pkt == packets) { /* Trigger sw interrupt to revive the queue */ ice_trigger_sw_intr(hw, tx_ring->q_vector); continue; @@ -170,7 +170,7 @@ static void ice_check_for_hang_subtask(struct ice_pf *pf) * to ice_get_tx_pending() */ smp_rmb(); - ring_stats->tx_stats.prev_pkt = + ring_stats->tx.prev_pkt = ice_get_tx_pending(tx_ring) ? packets : -1; } } @@ -6867,9 +6867,9 @@ ice_update_vsi_tx_ring_stats(struct ice_vsi *vsi, ice_fetch_u64_stats_per_ring(ring->ring_stats, &pkts, &bytes); vsi_stats->tx_packets += pkts; vsi_stats->tx_bytes += bytes; - vsi->tx_restart += ring->ring_stats->tx_stats.restart_q; - vsi->tx_busy += ring->ring_stats->tx_stats.tx_busy; - vsi->tx_linearize += ring->ring_stats->tx_stats.tx_linearize; + vsi->tx_restart += ring->ring_stats->tx_restart_q; + vsi->tx_busy += ring->ring_stats->tx_busy; + vsi->tx_linearize += ring->ring_stats->tx_linearize; } } @@ -6911,8 +6911,8 @@ static void ice_update_vsi_ring_stats(struct ice_vsi *vsi) ice_fetch_u64_stats_per_ring(ring_stats, &pkts, &bytes); vsi_stats->rx_packets += pkts; vsi_stats->rx_bytes += bytes; - vsi->rx_buf_failed += ring_stats->rx_stats.alloc_buf_failed; - vsi->rx_page_failed += ring_stats->rx_stats.alloc_page_failed; + vsi->rx_buf_failed += ring_stats->rx_buf_failed; + vsi->rx_page_failed += ring_stats->rx_page_failed; } /* update XDP Tx rings counters */ diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index f4196347b23a..eea83b26b094 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -379,7 +379,7 @@ static bool ice_clean_tx_irq(struct ice_tx_ring *tx_ring, int napi_budget) if (netif_tx_queue_stopped(txring_txq(tx_ring)) && !test_bit(ICE_VSI_DOWN, vsi->state)) { netif_tx_wake_queue(txring_txq(tx_ring)); - ++tx_ring->ring_stats->tx_stats.restart_q; + ice_stats_inc(tx_ring->ring_stats, tx_restart_q); } } @@ -499,7 +499,7 @@ int ice_setup_tx_ring(struct ice_tx_ring *tx_ring) tx_ring->next_to_use = 0; tx_ring->next_to_clean = 0; - tx_ring->ring_stats->tx_stats.prev_pkt = -1; + tx_ring->ring_stats->tx.prev_pkt = -1; return 0; err: @@ -849,7 +849,7 @@ bool ice_alloc_rx_bufs(struct ice_rx_ring *rx_ring, unsigned int cleaned_count) addr = libeth_rx_alloc(&fq, ntu); if (addr == DMA_MAPPING_ERROR) { - rx_ring->ring_stats->rx_stats.alloc_page_failed++; + ice_stats_inc(rx_ring->ring_stats, rx_page_failed); break; } @@ -863,7 +863,7 @@ bool ice_alloc_rx_bufs(struct ice_rx_ring *rx_ring, unsigned int cleaned_count) addr = libeth_rx_alloc(&hdr_fq, ntu); if (addr == DMA_MAPPING_ERROR) { - rx_ring->ring_stats->rx_stats.alloc_page_failed++; + ice_stats_inc(rx_ring->ring_stats, rx_page_failed); libeth_rx_recycle_slow(fq.fqes[ntu].netmem); break; @@ -1045,7 +1045,7 @@ static int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) /* exit if we failed to retrieve a buffer */ if (!skb) { libeth_xdp_return_buff_slow(xdp); - rx_ring->ring_stats->rx_stats.alloc_buf_failed++; + ice_stats_inc(rx_ring->ring_stats, rx_buf_failed); continue; } @@ -1363,7 +1363,7 @@ static int __ice_maybe_stop_tx(struct ice_tx_ring *tx_ring, unsigned int size) /* A reprieve! - use start_queue because it doesn't call schedule */ netif_tx_start_queue(txring_txq(tx_ring)); - ++tx_ring->ring_stats->tx_stats.restart_q; + ice_stats_inc(tx_ring->ring_stats, tx_restart_q); return 0; } @@ -2165,7 +2165,7 @@ ice_xmit_frame_ring(struct sk_buff *skb, struct ice_tx_ring *tx_ring) if (__skb_linearize(skb)) goto out_drop; count = ice_txd_use_count(skb->len); - tx_ring->ring_stats->tx_stats.tx_linearize++; + ice_stats_inc(tx_ring->ring_stats, tx_linearize); } /* need: 1 descriptor per page * PAGE_SIZE/ICE_MAX_DATA_PER_TXD, @@ -2176,7 +2176,7 @@ ice_xmit_frame_ring(struct sk_buff *skb, struct ice_tx_ring *tx_ring) */ if (ice_maybe_stop_tx(tx_ring, count + ICE_DESCS_PER_CACHE_LINE + ICE_DESCS_FOR_CTX_DESC)) { - tx_ring->ring_stats->tx_stats.tx_busy++; + ice_stats_inc(tx_ring->ring_stats, tx_busy); return NETDEV_TX_BUSY; } diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index d5ad76b25f16..e7e4991c0934 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -129,19 +129,6 @@ struct ice_tx_offload_params { u8 header_len; }; -struct ice_txq_stats { - u64 restart_q; - u64 tx_busy; - u64 tx_linearize; - int prev_pkt; /* negative if no pending Tx descriptors */ -}; - -struct ice_rxq_stats { - u64 non_eop_descs; - u64 alloc_page_failed; - u64 alloc_buf_failed; -}; - struct ice_ring_stats { struct rcu_head rcu; /* to avoid race on free */ struct u64_stats_sync syncp; @@ -149,12 +136,50 @@ struct ice_ring_stats { u64 pkts; u64 bytes; union { - struct ice_txq_stats tx_stats; - struct ice_rxq_stats rx_stats; + struct_group(tx, + u64 tx_restart_q; + u64 tx_busy; + u64 tx_linearize; + /* negative if no pending Tx descriptors */ + int prev_pkt; + ); + struct_group(rx, + u64 rx_non_eop_descs; + u64 rx_page_failed; + u64 rx_buf_failed; + ); }; ); }; +/** + * ice_stats_read - Read a single ring stat value + * @stats: pointer to ring_stats structure for a queue + * @member: the ice_ring_stats member to read + * + * Shorthand for reading a single 64-bit stat value from struct + * ice_ring_stats. + * + * Return: the value of the requested stat. + */ +#define ice_stats_read(stats, member) ({ \ + struct ice_ring_stats *__stats = (stats); \ + __stats->member; \ +}) + +/** + * ice_stats_inc - Increment a single ring stat value + * @stats: pointer to the ring_stats structure for a queue + * @member: the ice_ring_stats member to increment + * + * Shorthand for incrementing a single 64-bit stat value in struct + * ice_ring_stats. + */ +#define ice_stats_inc(stats, member) do { \ + struct ice_ring_stats *__stats = (stats); \ + __stats->member++; \ +} while (0) + enum ice_ring_state_t { ICE_TX_XPS_INIT_DONE, ICE_TX_NBITS, diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c index 956da38d63b0..e68f3e5d35b4 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c @@ -480,7 +480,7 @@ int __ice_xmit_xdp_ring(struct xdp_buff *xdp, struct ice_tx_ring *xdp_ring, return ICE_XDP_CONSUMED; busy: - xdp_ring->ring_stats->tx_stats.tx_busy++; + ice_stats_inc(xdp_ring->ring_stats, tx_busy); return ICE_XDP_CONSUMED; } diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h index 6a3f10f7a53f..f17990b68b62 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h @@ -38,7 +38,7 @@ ice_is_non_eop(const struct ice_rx_ring *rx_ring, if (likely(ice_test_staterr(rx_desc->wb.status_error0, ICE_RXD_EOF))) return false; - rx_ring->ring_stats->rx_stats.non_eop_descs++; + ice_stats_inc(rx_ring->ring_stats, rx_non_eop_descs); return true; } diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index 989ff1fd9110..953e68ed0f9a 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -497,7 +497,7 @@ static int ice_xmit_xdp_tx_zc(struct xdp_buff *xdp, return ICE_XDP_TX; busy: - xdp_ring->ring_stats->tx_stats.tx_busy++; + ice_stats_inc(xdp_ring->ring_stats, tx_busy); return ICE_XDP_CONSUMED; } @@ -659,7 +659,7 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, xsk_buff_free(first); first = NULL; - rx_ring->ring_stats->rx_stats.alloc_buf_failed++; + ice_stats_inc(rx_ring->ring_stats, rx_buf_failed); continue; } -- 2.47.1 From: Jacob Keller After several cleanups, the ice driver is now finally ready to convert all Tx and Rx ring stats to the u64_stats_t and proper use of the u64 stats APIs. The final remaining part to cleanup is the VSI stats accumulation logic in ice_update_vsi_ring_stats(). Refactor the function and its helpers so that all stat values (and not just pkts and bytes) use the u64_stats APIs. The ice_fetch_u64_(tx|rx)_stats functions read the stat values using u64_stats_read and then copy them into local ice_vsi_(tx|rx)_stats structures. This does require making a new struct with the stat fields as u64. The ice_update_vsi_(tx|rx)_ring_stats functions call the fetch functions per ring and accumulate the result into one copy of the struct. This accumulated total is then used to update the relevant VSI fields. Since these are relatively small, the contents are all stored on the stack rather than allocating and freeing memory. Once the accumulator side is updated, the helper ice_stats_read and ice_stats_inc and other related helper functions all easily translate to use of u64_stats_read and u64_stats_inc. This completes the refactor and ensures that all stats accesses now make proper use of the API. Reviewed-by: Aleksandr Loktionov Signed-off-by: Jacob Keller Reviewed-by: Simon Horman Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_lib.c | 29 +--- drivers/net/ethernet/intel/ice/ice_main.c | 180 +++++++++++++++------- drivers/net/ethernet/intel/ice/ice_txrx.h | 28 ++-- 3 files changed, 147 insertions(+), 90 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index c0f77d962c30..17d92ba65128 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -3432,21 +3432,6 @@ int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc) return ret; } -/** - * ice_update_ring_stats - Update ring statistics - * @stats: stats to be updated - * @pkts: number of processed packets - * @bytes: number of processed bytes - * - * This function assumes that caller has acquired a u64_stats_sync lock. - */ -static void ice_update_ring_stats(struct ice_ring_stats *stats, - u64 pkts, u64 bytes) -{ - stats->bytes += bytes; - stats->pkts += pkts; -} - /** * ice_update_tx_ring_stats - Update Tx ring specific counters * @tx_ring: ring to update @@ -3456,7 +3441,8 @@ static void ice_update_ring_stats(struct ice_ring_stats *stats, void ice_update_tx_ring_stats(struct ice_tx_ring *tx_ring, u64 pkts, u64 bytes) { u64_stats_update_begin(&tx_ring->ring_stats->syncp); - ice_update_ring_stats(tx_ring->ring_stats, pkts, bytes); + u64_stats_add(&tx_ring->ring_stats->pkts, pkts); + u64_stats_add(&tx_ring->ring_stats->bytes, bytes); u64_stats_update_end(&tx_ring->ring_stats->syncp); } @@ -3469,7 +3455,8 @@ void ice_update_tx_ring_stats(struct ice_tx_ring *tx_ring, u64 pkts, u64 bytes) void ice_update_rx_ring_stats(struct ice_rx_ring *rx_ring, u64 pkts, u64 bytes) { u64_stats_update_begin(&rx_ring->ring_stats->syncp); - ice_update_ring_stats(rx_ring->ring_stats, pkts, bytes); + u64_stats_add(&rx_ring->ring_stats->pkts, pkts); + u64_stats_add(&rx_ring->ring_stats->bytes, bytes); u64_stats_update_end(&rx_ring->ring_stats->syncp); } @@ -3486,8 +3473,8 @@ void ice_fetch_tx_ring_stats(const struct ice_tx_ring *ring, do { start = u64_stats_fetch_begin(&ring->ring_stats->syncp); - *pkts = ring->ring_stats->pkts; - *bytes = ring->ring_stats->bytes; + *pkts = u64_stats_read(&ring->ring_stats->pkts); + *bytes = u64_stats_read(&ring->ring_stats->bytes); } while (u64_stats_fetch_retry(&ring->ring_stats->syncp, start)); } @@ -3504,8 +3491,8 @@ void ice_fetch_rx_ring_stats(const struct ice_rx_ring *ring, do { start = u64_stats_fetch_begin(&ring->ring_stats->syncp); - *pkts = ring->ring_stats->pkts; - *bytes = ring->ring_stats->bytes; + *pkts = u64_stats_read(&ring->ring_stats->pkts); + *bytes = u64_stats_read(&ring->ring_stats->bytes); } while (u64_stats_fetch_retry(&ring->ring_stats->syncp, start)); } diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index ced6ad86c8b2..79d97a902470 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -6822,54 +6822,132 @@ int ice_up(struct ice_vsi *vsi) return err; } +struct ice_vsi_tx_stats { + u64 pkts; + u64 bytes; + u64 tx_restart_q; + u64 tx_busy; + u64 tx_linearize; +}; + +struct ice_vsi_rx_stats { + u64 pkts; + u64 bytes; + u64 rx_non_eop_descs; + u64 rx_page_failed; + u64 rx_buf_failed; +}; + +/** + * ice_fetch_u64_tx_stats - get Tx stats from a ring + * @ring: the Tx ring to copy stats from + * @copy: temporary storage for the ring statistics + * + * Fetch the u64 stats from the ring using u64_stats_fetch. This ensures each + * stat value is self-consistent, though not necessarily consistent w.r.t + * other stats. + */ +static void ice_fetch_u64_tx_stats(struct ice_tx_ring *ring, + struct ice_vsi_tx_stats *copy) +{ + struct ice_ring_stats *stats = ring->ring_stats; + unsigned int start; + + do { + start = u64_stats_fetch_begin(&stats->syncp); + copy->pkts = u64_stats_read(&stats->pkts); + copy->bytes = u64_stats_read(&stats->bytes); + copy->tx_restart_q = u64_stats_read(&stats->tx_restart_q); + copy->tx_busy = u64_stats_read(&stats->tx_busy); + copy->tx_linearize = u64_stats_read(&stats->tx_linearize); + } while (u64_stats_fetch_retry(&stats->syncp, start)); +} + /** - * ice_fetch_u64_stats_per_ring - get packets and bytes stats per ring - * @stats: pointer to ring stats structure - * @pkts: packets stats counter - * @bytes: bytes stats counter + * ice_fetch_u64_rx_stats - get Rx stats from a ring + * @ring: the Rx ring to copy stats from + * @copy: temporary storage for the ring statistics * - * This function fetches stats from the ring considering the atomic operations - * that needs to be performed to read u64 values in 32 bit machine. + * Fetch the u64 stats from the ring using u64_stats_fetch. This ensures each + * stat value is self-consistent, though not necessarily consistent w.r.t + * other stats. */ -static void ice_fetch_u64_stats_per_ring(struct ice_ring_stats *stats, - u64 *pkts, u64 *bytes) +static void ice_fetch_u64_rx_stats(struct ice_rx_ring *ring, + struct ice_vsi_rx_stats *copy) { + struct ice_ring_stats *stats = ring->ring_stats; unsigned int start; do { start = u64_stats_fetch_begin(&stats->syncp); - *pkts = stats->stats.pkts; - *bytes = stats->stats.bytes; + copy->pkts = u64_stats_read(&stats->pkts); + copy->bytes = u64_stats_read(&stats->bytes); + copy->rx_non_eop_descs = + u64_stats_read(&stats->rx_non_eop_descs); + copy->rx_page_failed = u64_stats_read(&stats->rx_page_failed); + copy->rx_buf_failed = u64_stats_read(&stats->rx_buf_failed); } while (u64_stats_fetch_retry(&stats->syncp, start)); } /** * ice_update_vsi_tx_ring_stats - Update VSI Tx ring stats counters * @vsi: the VSI to be updated - * @vsi_stats: the stats struct to be updated + * @vsi_stats: accumulated stats for this VSI * @rings: rings to work on * @count: number of rings */ -static void -ice_update_vsi_tx_ring_stats(struct ice_vsi *vsi, - struct rtnl_link_stats64 *vsi_stats, - struct ice_tx_ring **rings, u16 count) +static void ice_update_vsi_tx_ring_stats(struct ice_vsi *vsi, + struct ice_vsi_tx_stats *vsi_stats, + struct ice_tx_ring **rings, u16 count) { + struct ice_vsi_tx_stats copy = {}; u16 i; for (i = 0; i < count; i++) { struct ice_tx_ring *ring; - u64 pkts = 0, bytes = 0; ring = READ_ONCE(rings[i]); if (!ring || !ring->ring_stats) continue; - ice_fetch_u64_stats_per_ring(ring->ring_stats, &pkts, &bytes); - vsi_stats->tx_packets += pkts; - vsi_stats->tx_bytes += bytes; - vsi->tx_restart += ring->ring_stats->tx_restart_q; - vsi->tx_busy += ring->ring_stats->tx_busy; - vsi->tx_linearize += ring->ring_stats->tx_linearize; + + ice_fetch_u64_tx_stats(ring, ©); + + vsi_stats->pkts += copy.pkts; + vsi_stats->bytes += copy.bytes; + vsi_stats->tx_restart_q += copy.tx_restart_q; + vsi_stats->tx_busy += copy.tx_busy; + vsi_stats->tx_linearize += copy.tx_linearize; + } +} + +/** + * ice_update_vsi_rx_ring_stats - Update VSI Rx ring stats counters + * @vsi: the VSI to be updated + * @vsi_stats: accumulated stats for this VSI + * @rings: rings to work on + * @count: number of rings + */ +static void ice_update_vsi_rx_ring_stats(struct ice_vsi *vsi, + struct ice_vsi_rx_stats *vsi_stats, + struct ice_rx_ring **rings, u16 count) +{ + struct ice_vsi_rx_stats copy = {}; + u16 i; + + for (i = 0; i < count; i++) { + struct ice_rx_ring *ring; + + ring = READ_ONCE(rings[i]); + if (!ring || !ring->ring_stats) + continue; + + ice_fetch_u64_rx_stats(ring, ©); + + vsi_stats->pkts += copy.pkts; + vsi_stats->bytes += copy.bytes; + vsi_stats->rx_non_eop_descs += copy.rx_non_eop_descs; + vsi_stats->rx_page_failed += copy.rx_page_failed; + vsi_stats->rx_buf_failed += copy.rx_buf_failed; } } @@ -6880,48 +6958,34 @@ ice_update_vsi_tx_ring_stats(struct ice_vsi *vsi, static void ice_update_vsi_ring_stats(struct ice_vsi *vsi) { struct rtnl_link_stats64 *net_stats, *stats_prev; - struct rtnl_link_stats64 *vsi_stats; + struct ice_vsi_tx_stats tx_stats = {}; + struct ice_vsi_rx_stats rx_stats = {}; struct ice_pf *pf = vsi->back; - u64 pkts, bytes; - int i; - - vsi_stats = kzalloc(sizeof(*vsi_stats), GFP_ATOMIC); - if (!vsi_stats) - return; - - /* reset non-netdev (extended) stats */ - vsi->tx_restart = 0; - vsi->tx_busy = 0; - vsi->tx_linearize = 0; - vsi->rx_buf_failed = 0; - vsi->rx_page_failed = 0; rcu_read_lock(); /* update Tx rings counters */ - ice_update_vsi_tx_ring_stats(vsi, vsi_stats, vsi->tx_rings, + ice_update_vsi_tx_ring_stats(vsi, &tx_stats, vsi->tx_rings, vsi->num_txq); /* update Rx rings counters */ - ice_for_each_rxq(vsi, i) { - struct ice_rx_ring *ring = READ_ONCE(vsi->rx_rings[i]); - struct ice_ring_stats *ring_stats; - - ring_stats = ring->ring_stats; - ice_fetch_u64_stats_per_ring(ring_stats, &pkts, &bytes); - vsi_stats->rx_packets += pkts; - vsi_stats->rx_bytes += bytes; - vsi->rx_buf_failed += ring_stats->rx_buf_failed; - vsi->rx_page_failed += ring_stats->rx_page_failed; - } + ice_update_vsi_rx_ring_stats(vsi, &rx_stats, vsi->rx_rings, + vsi->num_rxq); /* update XDP Tx rings counters */ if (ice_is_xdp_ena_vsi(vsi)) - ice_update_vsi_tx_ring_stats(vsi, vsi_stats, vsi->xdp_rings, + ice_update_vsi_tx_ring_stats(vsi, &tx_stats, vsi->xdp_rings, vsi->num_xdp_txq); rcu_read_unlock(); + /* Save non-netdev (extended) stats */ + vsi->tx_restart = tx_stats.tx_restart_q; + vsi->tx_busy = tx_stats.tx_busy; + vsi->tx_linearize = tx_stats.tx_linearize; + vsi->rx_buf_failed = rx_stats.rx_buf_failed; + vsi->rx_page_failed = rx_stats.rx_page_failed; + net_stats = &vsi->net_stats; stats_prev = &vsi->net_stats_prev; @@ -6931,18 +6995,16 @@ static void ice_update_vsi_ring_stats(struct ice_vsi *vsi) * let's skip this round. */ if (likely(pf->stat_prev_loaded)) { - net_stats->tx_packets += vsi_stats->tx_packets - stats_prev->tx_packets; - net_stats->tx_bytes += vsi_stats->tx_bytes - stats_prev->tx_bytes; - net_stats->rx_packets += vsi_stats->rx_packets - stats_prev->rx_packets; - net_stats->rx_bytes += vsi_stats->rx_bytes - stats_prev->rx_bytes; + net_stats->tx_packets += tx_stats.pkts - stats_prev->tx_packets; + net_stats->tx_bytes += tx_stats.bytes - stats_prev->tx_bytes; + net_stats->rx_packets += rx_stats.pkts - stats_prev->rx_packets; + net_stats->rx_bytes += rx_stats.bytes - stats_prev->rx_bytes; } - stats_prev->tx_packets = vsi_stats->tx_packets; - stats_prev->tx_bytes = vsi_stats->tx_bytes; - stats_prev->rx_packets = vsi_stats->rx_packets; - stats_prev->rx_bytes = vsi_stats->rx_bytes; - - kfree(vsi_stats); + stats_prev->tx_packets = tx_stats.pkts; + stats_prev->tx_bytes = tx_stats.bytes; + stats_prev->rx_packets = rx_stats.pkts; + stats_prev->rx_bytes = rx_stats.bytes; } /** diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index e7e4991c0934..c51b1e60f717 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -133,20 +133,20 @@ struct ice_ring_stats { struct rcu_head rcu; /* to avoid race on free */ struct u64_stats_sync syncp; struct_group(stats, - u64 pkts; - u64 bytes; + u64_stats_t pkts; + u64_stats_t bytes; union { struct_group(tx, - u64 tx_restart_q; - u64 tx_busy; - u64 tx_linearize; + u64_stats_t tx_restart_q; + u64_stats_t tx_busy; + u64_stats_t tx_linearize; /* negative if no pending Tx descriptors */ int prev_pkt; ); struct_group(rx, - u64 rx_non_eop_descs; - u64 rx_page_failed; - u64 rx_buf_failed; + u64_stats_t rx_non_eop_descs; + u64_stats_t rx_page_failed; + u64_stats_t rx_buf_failed; ); }; ); @@ -164,7 +164,13 @@ struct ice_ring_stats { */ #define ice_stats_read(stats, member) ({ \ struct ice_ring_stats *__stats = (stats); \ - __stats->member; \ + unsigned int start; \ + u64 val; \ + do { \ + start = u64_stats_fetch_begin(&__stats->syncp); \ + val = u64_stats_read(&__stats->member); \ + } while (u64_stats_fetch_retry(&__stats->syncp, start)); \ + val; \ }) /** @@ -177,7 +183,9 @@ struct ice_ring_stats { */ #define ice_stats_inc(stats, member) do { \ struct ice_ring_stats *__stats = (stats); \ - __stats->member++; \ + u64_stats_update_begin(&__stats->syncp); \ + u64_stats_inc(&__stats->member); \ + u64_stats_update_end(&__stats->syncp); \ } while (0) enum ice_ring_state_t { -- 2.47.1 From: Alexander Lobakin Place the fields in ice_{rx,tx}_ring used in the same pieces of hotpath code closer to each other and use __cacheline_group_{begin,end}_aligned() to isolate the read mostly, read-write, and cold groups into separate cachelines similarly to idpf. Suggested-by: Jacob Keller Reviewed-by: Aleksandr Loktionov Reviewed-by: Jacob Keller Signed-off-by: Alexander Lobakin Reviewed-by: Paul Menzel Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_ethtool.c | 1 - drivers/net/ethernet/intel/ice/ice_txrx.c | 1 - drivers/net/ethernet/intel/ice/ice_txrx.h | 122 ++++++++++-------- drivers/net/ethernet/intel/ice/ice_txrx_lib.c | 3 - 4 files changed, 70 insertions(+), 57 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index 2c007669c197..c6bc29cfb8e6 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -3388,7 +3388,6 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring, */ rx_rings[i].next_to_use = 0; rx_rings[i].next_to_clean = 0; - rx_rings[i].next_to_alloc = 0; *vsi->rx_rings[i] = rx_rings[i]; } kfree(rx_rings); diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index eea83b26b094..396326a6d5be 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -574,7 +574,6 @@ void ice_clean_rx_ring(struct ice_rx_ring *rx_ring) PAGE_SIZE); memset(rx_ring->desc, 0, size); - rx_ring->next_to_alloc = 0; rx_ring->next_to_clean = 0; rx_ring->next_to_use = 0; } diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index c51b1e60f717..b6547e1b7c42 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -267,34 +267,49 @@ struct ice_tstamp_ring { } ____cacheline_internodealigned_in_smp; struct ice_rx_ring { - /* CL1 - 1st cacheline starts here */ + __cacheline_group_begin_aligned(read_mostly); void *desc; /* Descriptor ring memory */ struct page_pool *pp; struct net_device *netdev; /* netdev ring maps to */ - struct ice_vsi *vsi; /* Backreference to associated VSI */ struct ice_q_vector *q_vector; /* Backreference to associated vector */ u8 __iomem *tail; - u16 q_index; /* Queue number of ring */ - - u16 count; /* Number of descriptors */ - u16 reg_idx; /* HW register index of the ring */ - u16 next_to_alloc; union { struct libeth_fqe *rx_fqes; struct xdp_buff **xdp_buf; }; - /* CL2 - 2nd cacheline starts here */ - struct libeth_fqe *hdr_fqes; + u16 count; /* Number of descriptors */ + u8 ptp_rx; + + u8 flags; +#define ICE_RX_FLAGS_CRC_STRIP_DIS BIT(2) +#define ICE_RX_FLAGS_MULTIDEV BIT(3) +#define ICE_RX_FLAGS_RING_GCS BIT(4) + + u32 truesize; + struct page_pool *hdr_pp; + struct libeth_fqe *hdr_fqes; + + struct bpf_prog *xdp_prog; + struct ice_tx_ring *xdp_ring; + struct xsk_buff_pool *xsk_pool; + + /* stats structs */ + struct ice_ring_stats *ring_stats; + struct ice_rx_ring *next; /* pointer to next ring in q_vector */ + u32 hdr_truesize; + + struct xdp_rxq_info xdp_rxq; + __cacheline_group_end_aligned(read_mostly); + + __cacheline_group_begin_aligned(read_write); union { struct libeth_xdp_buff_stash xdp; struct libeth_xdp_buff *xsk; }; - - /* CL3 - 3rd cacheline starts here */ union { struct ice_pkt_ctx pkt_ctx; struct { @@ -302,75 +317,78 @@ struct ice_rx_ring { __be16 vlan_proto; }; }; - struct bpf_prog *xdp_prog; /* used in interrupt processing */ u16 next_to_use; u16 next_to_clean; + __cacheline_group_end_aligned(read_write); - u32 hdr_truesize; - u32 truesize; - - /* stats structs */ - struct ice_ring_stats *ring_stats; - + __cacheline_group_begin_aligned(cold); struct rcu_head rcu; /* to avoid race on free */ - /* CL4 - 4th cacheline starts here */ + struct ice_vsi *vsi; /* Backreference to associated VSI */ struct ice_channel *ch; - struct ice_tx_ring *xdp_ring; - struct ice_rx_ring *next; /* pointer to next ring in q_vector */ - struct xsk_buff_pool *xsk_pool; - u16 rx_hdr_len; - u16 rx_buf_len; + dma_addr_t dma; /* physical address of ring */ + u16 q_index; /* Queue number of ring */ + u16 reg_idx; /* HW register index of the ring */ u8 dcb_tc; /* Traffic class of ring */ - u8 ptp_rx; -#define ICE_RX_FLAGS_CRC_STRIP_DIS BIT(2) -#define ICE_RX_FLAGS_MULTIDEV BIT(3) -#define ICE_RX_FLAGS_RING_GCS BIT(4) - u8 flags; - /* CL5 - 5th cacheline starts here */ - struct xdp_rxq_info xdp_rxq; + + u16 rx_hdr_len; + u16 rx_buf_len; + __cacheline_group_end_aligned(cold); } ____cacheline_internodealigned_in_smp; struct ice_tx_ring { - /* CL1 - 1st cacheline starts here */ - struct ice_tx_ring *next; /* pointer to next ring in q_vector */ + __cacheline_group_begin_aligned(read_mostly); void *desc; /* Descriptor ring memory */ struct device *dev; /* Used for DMA mapping */ u8 __iomem *tail; struct ice_tx_buf *tx_buf; + struct ice_q_vector *q_vector; /* Backreference to associated vector */ struct net_device *netdev; /* netdev ring maps to */ struct ice_vsi *vsi; /* Backreference to associated VSI */ - /* CL2 - 2nd cacheline starts here */ - dma_addr_t dma; /* physical address of ring */ - struct xsk_buff_pool *xsk_pool; - u16 next_to_use; - u16 next_to_clean; - u16 q_handle; /* Queue handle per TC */ - u16 reg_idx; /* HW register index of the ring */ + u16 count; /* Number of descriptors */ u16 q_index; /* Queue number of ring */ - u16 xdp_tx_active; + + u8 flags; +#define ICE_TX_FLAGS_RING_XDP BIT(0) +#define ICE_TX_FLAGS_RING_VLAN_L2TAG1 BIT(1) +#define ICE_TX_FLAGS_RING_VLAN_L2TAG2 BIT(2) +#define ICE_TX_FLAGS_TXTIME BIT(3) + + struct xsk_buff_pool *xsk_pool; + /* stats structs */ struct ice_ring_stats *ring_stats; - /* CL3 - 3rd cacheline starts here */ + struct ice_tx_ring *next; /* pointer to next ring in q_vector */ + + struct ice_tstamp_ring *tstamp_ring; + struct ice_ptp_tx *tx_tstamps; + __cacheline_group_end_aligned(read_mostly); + + __cacheline_group_begin_aligned(read_write); + u16 next_to_use; + u16 next_to_clean; + + u16 xdp_tx_active; + spinlock_t tx_lock; + __cacheline_group_end_aligned(read_write); + + __cacheline_group_begin_aligned(cold); struct rcu_head rcu; /* to avoid race on free */ DECLARE_BITMAP(xps_state, ICE_TX_NBITS); /* XPS Config State */ struct ice_channel *ch; - struct ice_ptp_tx *tx_tstamps; - spinlock_t tx_lock; - u32 txq_teid; /* Added Tx queue TEID */ - /* CL4 - 4th cacheline starts here */ - struct ice_tstamp_ring *tstamp_ring; -#define ICE_TX_FLAGS_RING_XDP BIT(0) -#define ICE_TX_FLAGS_RING_VLAN_L2TAG1 BIT(1) -#define ICE_TX_FLAGS_RING_VLAN_L2TAG2 BIT(2) -#define ICE_TX_FLAGS_TXTIME BIT(3) - u8 flags; + + dma_addr_t dma; /* physical address of ring */ + u16 q_handle; /* Queue handle per TC */ + u16 reg_idx; /* HW register index of the ring */ u8 dcb_tc; /* Traffic class of ring */ + u16 quanta_prof_id; + u32 txq_teid; /* Added Tx queue TEID */ + __cacheline_group_end_aligned(cold); } ____cacheline_internodealigned_in_smp; static inline bool ice_ring_ch_enabled(struct ice_tx_ring *ring) diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c index e68f3e5d35b4..e695a664e53d 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c @@ -20,9 +20,6 @@ void ice_release_rx_desc(struct ice_rx_ring *rx_ring, u16 val) rx_ring->next_to_use = val; - /* update next to alloc since we have filled the ring */ - rx_ring->next_to_alloc = val; - /* QRX_TAIL will be updated with any tail value, but hardware ignores * the lower 3 bits. This makes it so we only bump tail on meaningful * boundaries. Also, this allows us to bump tail on intervals of 8 up to -- 2.47.1 From: YiFei Zhu The logic is similar to idpf_rx_hwtstamp, but the data is exported as a BPF kfunc instead of appended to an skb to support grabbing timestamps in xsk packets. A idpf_queue_has(PTP, rxq) condition is added to check the queue supports PTP similar to idpf_rx_process_skb_fields. Tested using an xsk connection and checking xdp timestamps are retrievable in received packets. Cc: intel-wired-lan@lists.osuosl.org Signed-off-by: YiFei Zhu Signed-off-by: Mina Almasry Reviewed-by: Aleksandr Loktionov Reviewed-by: Alexander Lobakin Reviewed-by: Simon Horman Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/idpf/xdp.c | 31 +++++++++++++++++++++++++++ drivers/net/ethernet/intel/idpf/xdp.h | 20 +++++++++++++++++ 2 files changed, 51 insertions(+) diff --git a/drivers/net/ethernet/intel/idpf/xdp.c b/drivers/net/ethernet/intel/idpf/xdp.c index 0fe435fdbb6c..2b60f2a78684 100644 --- a/drivers/net/ethernet/intel/idpf/xdp.c +++ b/drivers/net/ethernet/intel/idpf/xdp.c @@ -2,6 +2,7 @@ /* Copyright (C) 2025 Intel Corporation */ #include "idpf.h" +#include "idpf_ptp.h" #include "idpf_virtchnl.h" #include "xdp.h" #include "xsk.h" @@ -398,8 +399,38 @@ static int idpf_xdpmo_rx_hash(const struct xdp_md *ctx, u32 *hash, pt); } +static int idpf_xdpmo_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp) +{ + const struct libeth_xdp_buff *xdp = (typeof(xdp))ctx; + struct idpf_xdp_rx_desc desc __uninitialized; + const struct idpf_rx_queue *rxq; + u64 cached_time, ts_ns; + u32 ts_high; + + rxq = libeth_xdp_buff_to_rq(xdp, typeof(*rxq), xdp_rxq); + + if (!idpf_queue_has(PTP, rxq)) + return -ENODATA; + + idpf_xdp_get_qw1(&desc, xdp->desc); + + if (!(idpf_xdp_rx_ts_low(&desc) & VIRTCHNL2_RX_FLEX_TSTAMP_VALID)) + return -ENODATA; + + cached_time = READ_ONCE(rxq->cached_phc_time); + + idpf_xdp_get_qw3(&desc, xdp->desc); + + ts_high = idpf_xdp_rx_ts_high(&desc); + ts_ns = idpf_ptp_tstamp_extend_32b_to_64b(cached_time, ts_high); + + *timestamp = ts_ns; + return 0; +} + static const struct xdp_metadata_ops idpf_xdpmo = { .xmo_rx_hash = idpf_xdpmo_rx_hash, + .xmo_rx_timestamp = idpf_xdpmo_rx_timestamp, }; void idpf_xdp_set_features(const struct idpf_vport *vport) diff --git a/drivers/net/ethernet/intel/idpf/xdp.h b/drivers/net/ethernet/intel/idpf/xdp.h index 7ffc6955dfae..63e56f7d43e0 100644 --- a/drivers/net/ethernet/intel/idpf/xdp.h +++ b/drivers/net/ethernet/intel/idpf/xdp.h @@ -112,11 +112,13 @@ struct idpf_xdp_rx_desc { aligned_u64 qw1; #define IDPF_XDP_RX_BUF GENMASK_ULL(47, 32) #define IDPF_XDP_RX_EOP BIT_ULL(1) +#define IDPF_XDP_RX_TS_LOW GENMASK_ULL(31, 24) aligned_u64 qw2; #define IDPF_XDP_RX_HASH GENMASK_ULL(31, 0) aligned_u64 qw3; +#define IDPF_XDP_RX_TS_HIGH GENMASK_ULL(63, 32) } __aligned(4 * sizeof(u64)); static_assert(sizeof(struct idpf_xdp_rx_desc) == sizeof(struct virtchnl2_rx_flex_desc_adv_nic_3)); @@ -128,6 +130,8 @@ static_assert(sizeof(struct idpf_xdp_rx_desc) == #define idpf_xdp_rx_buf(desc) FIELD_GET(IDPF_XDP_RX_BUF, (desc)->qw1) #define idpf_xdp_rx_eop(desc) !!((desc)->qw1 & IDPF_XDP_RX_EOP) #define idpf_xdp_rx_hash(desc) FIELD_GET(IDPF_XDP_RX_HASH, (desc)->qw2) +#define idpf_xdp_rx_ts_low(desc) FIELD_GET(IDPF_XDP_RX_TS_LOW, (desc)->qw1) +#define idpf_xdp_rx_ts_high(desc) FIELD_GET(IDPF_XDP_RX_TS_HIGH, (desc)->qw3) static inline void idpf_xdp_get_qw0(struct idpf_xdp_rx_desc *desc, @@ -149,6 +153,9 @@ idpf_xdp_get_qw1(struct idpf_xdp_rx_desc *desc, desc->qw1 = ((const typeof(desc))rxd)->qw1; #else desc->qw1 = ((u64)le16_to_cpu(rxd->buf_id) << 32) | + ((u64)rxd->ts_low << 24) | + ((u64)rxd->fflags1 << 16) | + ((u64)rxd->status_err1 << 8) | rxd->status_err0_qw1; #endif } @@ -166,6 +173,19 @@ idpf_xdp_get_qw2(struct idpf_xdp_rx_desc *desc, #endif } +static inline void +idpf_xdp_get_qw3(struct idpf_xdp_rx_desc *desc, + const struct virtchnl2_rx_flex_desc_adv_nic_3 *rxd) +{ +#ifdef __LIBETH_WORD_ACCESS + desc->qw3 = ((const typeof(desc))rxd)->qw3; +#else + desc->qw3 = ((u64)le32_to_cpu(rxd->ts_high) << 32) | + ((u64)le16_to_cpu(rxd->fmd6) << 16) | + le16_to_cpu(rxd->l2tag1); +#endif +} + void idpf_xdp_set_features(const struct idpf_vport *vport); int idpf_xdp(struct net_device *dev, struct netdev_bpf *xdp); -- 2.47.1