Support to turn on/off adaptive RX coalesce. When adaptive RX coalesce is on, update the dynamic ITR value based on statistics. Signed-off-by: Jiawen Wu --- .../net/ethernet/wangxun/libwx/wx_ethtool.c | 10 +- drivers/net/ethernet/wangxun/libwx/wx_lib.c | 208 ++++++++++++++++++ drivers/net/ethernet/wangxun/libwx/wx_type.h | 9 + .../net/ethernet/wangxun/libwx/wx_vf_lib.c | 2 +- .../net/ethernet/wangxun/libwx/wx_vf_lib.h | 1 + .../net/ethernet/wangxun/ngbe/ngbe_ethtool.c | 3 +- .../ethernet/wangxun/txgbe/txgbe_ethtool.c | 3 +- 7 files changed, 232 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/wangxun/libwx/wx_ethtool.c b/drivers/net/ethernet/wangxun/libwx/wx_ethtool.c index ebef99185bca..f2d888825659 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_ethtool.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_ethtool.c @@ -303,6 +303,9 @@ int wx_get_coalesce(struct net_device *netdev, else ec->rx_coalesce_usecs = wx->rx_itr_setting >> 2; + if (wx->rx_itr_setting == 1) + ec->use_adaptive_rx_coalesce = 1; + /* if in mixed tx/rx queues per vector mode, report only rx settings */ if (wx->q_vector[0]->tx.count && wx->q_vector[0]->rx.count) return 0; @@ -363,10 +366,15 @@ int wx_set_coalesce(struct net_device *netdev, (ec->tx_coalesce_usecs > (max_eitr >> 2))) return -EINVAL; + if (ec->use_adaptive_rx_coalesce) { + wx->rx_itr_setting = 1; + return 0; + } + if (ec->rx_coalesce_usecs > 1) wx->rx_itr_setting = ec->rx_coalesce_usecs << 2; else - wx->rx_itr_setting = ec->rx_coalesce_usecs; + wx->rx_itr_setting = rx_itr_param; if (wx->rx_itr_setting != 1) rx_itr_param = wx->rx_itr_setting; diff --git a/drivers/net/ethernet/wangxun/libwx/wx_lib.c b/drivers/net/ethernet/wangxun/libwx/wx_lib.c index 723785ef87bb..ebc4281a8760 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_lib.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_lib.c @@ -16,6 +16,7 @@ #include "wx_lib.h" #include "wx_ptp.h" #include "wx_hw.h" +#include "wx_vf_lib.h" /* Lookup table mapping the HW PTYPE to the bit field for decoding */ static struct wx_dec_ptype wx_ptype_lookup[256] = { @@ -832,6 +833,211 @@ static bool wx_clean_tx_irq(struct wx_q_vector *q_vector, return !!budget; } +static void wx_update_itr(struct wx_q_vector *q_vector, + struct wx_ring_container *ring_container) +{ + unsigned int itr = WX_ITR_ADAPTIVE_MIN_USECS | + WX_ITR_ADAPTIVE_LATENCY; + unsigned int avg_wire_size, packets, bytes; + unsigned long next_update = jiffies; + + /* If we don't have any rings just leave ourselves set for maximum + * possible latency so we take ourselves out of the equation. + */ + if (!ring_container->ring) + return; + + /* If we didn't update within up to 1 - 2 jiffies we can assume + * that either packets are coming in so slow there hasn't been + * any work, or that there is so much work that NAPI is dealing + * with interrupt moderation and we don't need to do anything. + */ + if (time_after(next_update, ring_container->next_update)) + goto clear_counts; + + packets = ring_container->total_packets; + + /* We have no packets to actually measure against. This means + * either one of the other queues on this vector is active or + * we are a Tx queue doing TSO with too high of an interrupt rate. + * + * When this occurs just tick up our delay by the minimum value + * and hope that this extra delay will prevent us from being called + * without any work on our queue. + */ + if (!packets) { + itr = (q_vector->itr >> 2) + WX_ITR_ADAPTIVE_MIN_INC; + if (itr > WX_ITR_ADAPTIVE_MAX_USECS) + itr = WX_ITR_ADAPTIVE_MAX_USECS; + itr += ring_container->itr & WX_ITR_ADAPTIVE_LATENCY; + goto clear_counts; + } + + bytes = ring_container->total_bytes; + + /* If packets are less than 4 or bytes are less than 9000 assume + * insufficient data to use bulk rate limiting approach. We are + * likely latency driven. + */ + if (packets < 4 && bytes < 9000) { + itr = WX_ITR_ADAPTIVE_LATENCY; + goto adjust_by_size; + } + + /* Between 4 and 48 we can assume that our current interrupt delay + * is only slightly too low. As such we should increase it by a small + * fixed amount. + */ + if (packets < 48) { + itr = (q_vector->itr >> 2) + WX_ITR_ADAPTIVE_MIN_INC; + if (itr > WX_ITR_ADAPTIVE_MAX_USECS) + itr = WX_ITR_ADAPTIVE_MAX_USECS; + goto clear_counts; + } + + /* Between 48 and 96 is our "goldilocks" zone where we are working + * out "just right". Just report that our current ITR is good for us. + */ + if (packets < 96) { + itr = q_vector->itr >> 2; + goto clear_counts; + } + + /* If packet count is 96 or greater we are likely looking at a slight + * overrun of the delay we want. Try halving our delay to see if that + * will cut the number of packets in half per interrupt. + */ + if (packets < 256) { + itr = q_vector->itr >> 3; + if (itr < WX_ITR_ADAPTIVE_MIN_USECS) + itr = WX_ITR_ADAPTIVE_MIN_USECS; + goto clear_counts; + } + + /* The paths below assume we are dealing with a bulk ITR since number + * of packets is 256 or greater. We are just going to have to compute + * a value and try to bring the count under control, though for smaller + * packet sizes there isn't much we can do as NAPI polling will likely + * be kicking in sooner rather than later. + */ + itr = WX_ITR_ADAPTIVE_BULK; + +adjust_by_size: + /* If packet counts are 256 or greater we can assume we have a gross + * overestimation of what the rate should be. Instead of trying to fine + * tune it just use the formula below to try and dial in an exact value + * give the current packet size of the frame. + */ + avg_wire_size = bytes / packets; + + /* The following is a crude approximation of: + * wmem_default / (size + overhead) = desired_pkts_per_int + * rate / bits_per_byte / (size + ethernet overhead) = pkt_rate + * (desired_pkt_rate / pkt_rate) * usecs_per_sec = ITR value + * + * Assuming wmem_default is 212992 and overhead is 640 bytes per + * packet, (256 skb, 64 headroom, 320 shared info), we can reduce the + * formula down to + * + * (170 * (size + 24)) / (size + 640) = ITR + * + * We first do some math on the packet size and then finally bitshift + * by 8 after rounding up. We also have to account for PCIe link speed + * difference as ITR scales based on this. + */ + if (avg_wire_size <= 60) { + /* Start at 50k ints/sec */ + avg_wire_size = 5120; + } else if (avg_wire_size <= 316) { + /* 50K ints/sec to 16K ints/sec */ + avg_wire_size *= 40; + avg_wire_size += 2720; + } else if (avg_wire_size <= 1084) { + /* 16K ints/sec to 9.2K ints/sec */ + avg_wire_size *= 15; + avg_wire_size += 11452; + } else if (avg_wire_size <= 1968) { + /* 9.2K ints/sec to 8K ints/sec */ + avg_wire_size *= 5; + avg_wire_size += 22420; + } else { + /* plateau at a limit of 8K ints/sec */ + avg_wire_size = 32256; + } + + /* If we are in low latency mode half our delay which doubles the rate + * to somewhere between 100K to 16K ints/sec + */ + if (itr & WX_ITR_ADAPTIVE_LATENCY) + avg_wire_size >>= 1; + + /* Resultant value is 256 times larger than it needs to be. This + * gives us room to adjust the value as needed to either increase + * or decrease the value based on link speeds of 25G, 10G, 1G, etc. + * + * Use addition as we have already recorded the new latency flag + * for the ITR value. + */ + switch (q_vector->wx->speed) { + case SPEED_25000: + itr += DIV_ROUND_UP(avg_wire_size, + WX_ITR_ADAPTIVE_MIN_INC * 512) * + WX_ITR_ADAPTIVE_MIN_INC; + break; + case SPEED_10000: + case SPEED_100: + default: + itr += DIV_ROUND_UP(avg_wire_size, + WX_ITR_ADAPTIVE_MIN_INC * 256) * + WX_ITR_ADAPTIVE_MIN_INC; + break; + case SPEED_1000: + case SPEED_10: + if (avg_wire_size > 8064) + avg_wire_size = 8064; + itr += DIV_ROUND_UP(avg_wire_size, + WX_ITR_ADAPTIVE_MIN_INC * 64) * + WX_ITR_ADAPTIVE_MIN_INC; + break; + } + +clear_counts: + /* write back value */ + ring_container->itr = itr; + + /* next update should occur within next jiffy */ + ring_container->next_update = next_update + 1; + + ring_container->total_bytes = 0; + ring_container->total_packets = 0; +} + +static void wx_set_itr(struct wx_q_vector *q_vector) +{ + struct wx *wx = q_vector->wx; + u32 new_itr; + + wx_update_itr(q_vector, &q_vector->tx); + wx_update_itr(q_vector, &q_vector->rx); + + /* use the smallest value of new ITR delay calculations */ + new_itr = min(q_vector->rx.itr, q_vector->tx.itr); + + /* Clear latency flag if set, shift into correct position */ + new_itr &= ~WX_ITR_ADAPTIVE_LATENCY; + new_itr <<= 2; + + if (new_itr != q_vector->itr) { + /* save the algorithm value here */ + q_vector->itr = new_itr; + + if (wx->pdev->is_virtfn) + wx_write_eitr_vf(q_vector); + else + wx_write_eitr(q_vector); + } +} + /** * wx_poll - NAPI polling RX/TX cleanup routine * @napi: napi struct with our devices info in it @@ -878,6 +1084,8 @@ static int wx_poll(struct napi_struct *napi, int budget) /* all work done, exit the polling mode */ if (likely(napi_complete_done(napi, work_done))) { + if (wx->rx_itr_setting == 1) + wx_set_itr(q_vector); if (netif_running(wx->netdev)) wx_intr_enable(wx, WX_INTR_Q(q_vector->v_idx)); } diff --git a/drivers/net/ethernet/wangxun/libwx/wx_type.h b/drivers/net/ethernet/wangxun/libwx/wx_type.h index 5c52a1db4024..3530e0ef32c5 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_type.h +++ b/drivers/net/ethernet/wangxun/libwx/wx_type.h @@ -416,6 +416,14 @@ enum WX_MSCA_CMD_value { #define WX_AML_MAX_EITR 0x00000FFFU #define WX_EM_MAX_EITR 0x00007FFCU +#define WX_ITR_ADAPTIVE_MIN_INC 2 +#define WX_ITR_ADAPTIVE_MIN_USECS 10 +#define WX_ITR_ADAPTIVE_MAX_USECS 84 +#define WX_ITR_ADAPTIVE_LATENCY 0x80 +#define WX_ITR_ADAPTIVE_BULK 0x00 +#define WX_ITR_ADAPTIVE_MASK_USECS (WX_ITR_ADAPTIVE_LATENCY - \ + WX_ITR_ADAPTIVE_MIN_INC) + /* transmit DMA Registers */ #define WX_PX_TR_BAL(_i) (0x03000 + ((_i) * 0x40)) #define WX_PX_TR_BAH(_i) (0x03004 + ((_i) * 0x40)) @@ -1030,6 +1038,7 @@ struct wx_rx_queue_stats { struct wx_ring_container { struct wx_ring *ring; /* pointer to linked list of rings */ + unsigned long next_update; /* jiffies value of last update */ unsigned int total_bytes; /* total bytes processed this int */ unsigned int total_packets; /* total packets processed this int */ u8 count; /* total number of rings in vector */ diff --git a/drivers/net/ethernet/wangxun/libwx/wx_vf_lib.c b/drivers/net/ethernet/wangxun/libwx/wx_vf_lib.c index 5d48df7a849f..7bcf7e90883b 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_vf_lib.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_vf_lib.c @@ -10,7 +10,7 @@ #include "wx_vf.h" #include "wx_vf_lib.h" -static void wx_write_eitr_vf(struct wx_q_vector *q_vector) +void wx_write_eitr_vf(struct wx_q_vector *q_vector) { struct wx *wx = q_vector->wx; int v_idx = q_vector->v_idx; diff --git a/drivers/net/ethernet/wangxun/libwx/wx_vf_lib.h b/drivers/net/ethernet/wangxun/libwx/wx_vf_lib.h index 43ea126b79eb..a4bd23c92800 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_vf_lib.h +++ b/drivers/net/ethernet/wangxun/libwx/wx_vf_lib.h @@ -4,6 +4,7 @@ #ifndef _WX_VF_LIB_H_ #define _WX_VF_LIB_H_ +void wx_write_eitr_vf(struct wx_q_vector *q_vector); void wx_configure_msix_vf(struct wx *wx); int wx_write_uc_addr_list_vf(struct net_device *netdev); void wx_setup_psrtype_vf(struct wx *wx); diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c index 7e2d9ec38a30..2ca127a7aa77 100644 --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c @@ -115,7 +115,8 @@ static int ngbe_set_channels(struct net_device *dev, static const struct ethtool_ops ngbe_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_USECS | - ETHTOOL_COALESCE_TX_MAX_FRAMES_IRQ, + ETHTOOL_COALESCE_TX_MAX_FRAMES_IRQ | + ETHTOOL_COALESCE_USE_ADAPTIVE_RX, .get_drvinfo = wx_get_drvinfo, .get_link = ethtool_op_get_link, .get_link_ksettings = wx_get_link_ksettings, diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_ethtool.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_ethtool.c index a4753402660e..86f3c106f1ed 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_ethtool.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_ethtool.c @@ -538,7 +538,8 @@ static int txgbe_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) static const struct ethtool_ops txgbe_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_USECS | - ETHTOOL_COALESCE_TX_MAX_FRAMES_IRQ, + ETHTOOL_COALESCE_TX_MAX_FRAMES_IRQ | + ETHTOOL_COALESCE_USE_ADAPTIVE_RX, .get_drvinfo = wx_get_drvinfo, .nway_reset = wx_nway_reset, .get_link = ethtool_op_get_link, -- 2.48.1