From: Jacob Keller The 'numa_node' field in struct ixgbe_q_vector shadows the 'numa_node' accessor for struct device, which triggers a sparse warning about shadowing a built-in object. The stored value here is a plain NUMA node number, not a device attribute, so rename it to 'node' to avoid the shadow and keep the naming consistent with other Intel drivers. Update all three usage sites in ixgbe_lib.c and ixgbe_main.c. Signed-off-by: Jacob Keller Reviewed-by: Simon Horman Signed-off-by: Aleksandr Loktionov --- drivers/net/ethernet/intel/ixgbe/ixgbe.h | 2 +- drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c | 2 +- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index 9b82175..cf2df18 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -515,7 +515,7 @@ struct ixgbe_q_vector { struct rcu_head rcu; /* to avoid race with update stats on free */ cpumask_t affinity_mask; - int numa_node; + int node; char name[IFNAMSIZ + 9]; /* for dynamic allocation of rings associated with this q_vector */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c index 1db4bd5..5a4f05d 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c @@ -864,7 +864,7 @@ static int ixgbe_alloc_q_vector(struct ixgbe_adapter *adapter, /* setup affinity mask and node */ if (cpu != -1) cpumask_set_cpu(cpu, &q_vector->affinity_mask); - q_vector->numa_node = node; + q_vector->node = node; #ifdef CONFIG_IXGBE_DCA /* initialize CPU for DCA */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 2646ee6..65426a1 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -7074,7 +7074,7 @@ int ixgbe_setup_tx_resources(struct ixgbe_ring *tx_ring) size = sizeof(struct ixgbe_tx_buffer) * tx_ring->count; if (tx_ring->q_vector) - ring_node = tx_ring->q_vector->numa_node; + ring_node = tx_ring->q_vector->node; tx_ring->tx_buffer_info = vmalloc_node(size, ring_node); if (!tx_ring->tx_buffer_info) @@ -7175,7 +7175,7 @@ int ixgbe_setup_rx_resources(struct ixgbe_adapter *adapter, size = sizeof(struct ixgbe_rx_buffer) * rx_ring->count; if (rx_ring->q_vector) - ring_node = rx_ring->q_vector->numa_node; + ring_node = rx_ring->q_vector->node; rx_ring->rx_buffer_info = vmalloc_node(size, ring_node); if (!rx_ring->rx_buffer_info) -- 2.52.0 The variables used to store return values of kernel and driver functions throughout the ixgbe driver are declared as u32 in several places. Such functions return negative errno values on error (e.g. -EIO, -EFAULT), which are sign-extended negative integers. Storing them in an unsigned u32 silently wraps the value: -EIO (0xFFFFFFF7) stored in u32 becomes a large positive number, so any "if (status)" truthiness check still works by accident, but comparisons against specific negative error codes or propagation up the call stack produce wrong results. In the Linux kernel, u32 is reserved for fixed-width quantities used in hardware interfaces or protocol structures. Using it for generic error codes misleads reviewers into thinking the value is hardware-constrained. Change all such local variables from u32 to int driver-wide: one in ixgbe_main.c (ixgbe_resume), three in ixgbe_phy.c (ixgbe_identify_phy_generic, ixgbe_tn_check_overtemp, ixgbe_set_copper_phy_power), and six in ixgbe_x550.c (ixgbe_check_link_t_X550em, ixgbe_get_lasi_ext_t_x550em, ixgbe_enable_lasi_ext_t_x550em, ixgbe_handle_lasi_ext_t_x550em, ixgbe_ext_phy_t_x550em_get_link, ixgbe_setup_internal_phy_t_x550em). No functional change. Reviewed-by: Simon Horman Signed-off-by: Aleksandr Loktionov --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 2 +- drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c | 6 +++--- drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c | 12 ++++++------ 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 65426a1..b6308c1 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -7528,7 +7528,7 @@ static int ixgbe_resume(struct device *dev_d) struct pci_dev *pdev = to_pci_dev(dev_d); struct ixgbe_adapter *adapter = pci_get_drvdata(pdev); struct net_device *netdev = adapter->netdev; - u32 err; + int err; adapter->hw.hw_addr = adapter->io_addr; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c index ab733e7..de8f6c6 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c @@ -262,7 +262,7 @@ static bool ixgbe_probe_phy(struct ixgbe_hw *hw, u16 phy_addr) **/ int ixgbe_identify_phy_generic(struct ixgbe_hw *hw) { - u32 status = -EFAULT; + int status = -EFAULT; u32 phy_addr; if (!hw->phy.phy_semaphore_mask) { @@ -2811,7 +2811,7 @@ static void ixgbe_i2c_bus_clear(struct ixgbe_hw *hw) bool ixgbe_tn_check_overtemp(struct ixgbe_hw *hw) { u16 phy_data = 0; - u32 status; + int status; if (hw->device_id != IXGBE_DEV_ID_82599_T3_LOM) return false; @@ -2831,7 +2831,7 @@ bool ixgbe_tn_check_overtemp(struct ixgbe_hw *hw) **/ int ixgbe_set_copper_phy_power(struct ixgbe_hw *hw, bool on) { - u32 status; + int status; u16 reg; /* Bail if we don't have copper phy */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c index 76d2fa3..9b14f3b 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c @@ -1911,7 +1911,7 @@ static int ixgbe_check_link_t_X550em(struct ixgbe_hw *hw, bool *link_up, bool link_up_wait_to_complete) { - u32 status; + int status; u16 i, autoneg_status; if (hw->mac.ops.get_media_type(hw) != ixgbe_media_type_copper) @@ -2330,7 +2330,7 @@ static int ixgbe_get_link_capabilities_X550em(struct ixgbe_hw *hw, static int ixgbe_get_lasi_ext_t_x550em(struct ixgbe_hw *hw, bool *lsc, bool *is_overtemp) { - u32 status; + int status; u16 reg; *is_overtemp = false; @@ -2418,7 +2418,7 @@ static int ixgbe_get_lasi_ext_t_x550em(struct ixgbe_hw *hw, bool *lsc, static int ixgbe_enable_lasi_ext_t_x550em(struct ixgbe_hw *hw) { bool lsc, overtemp; - u32 status; + int status; u16 reg; /* Clear interrupt flags */ @@ -2512,7 +2512,7 @@ static int ixgbe_handle_lasi_ext_t_x550em(struct ixgbe_hw *hw, { struct ixgbe_phy_info *phy = &hw->phy; bool lsc; - u32 status; + int status; status = ixgbe_get_lasi_ext_t_x550em(hw, &lsc, is_overtemp); if (status) @@ -2606,7 +2606,7 @@ static int ixgbe_setup_kr_x550em(struct ixgbe_hw *hw) **/ static int ixgbe_ext_phy_t_x550em_get_link(struct ixgbe_hw *hw, bool *link_up) { - u32 ret; + int ret; u16 autoneg_status; *link_up = false; @@ -2642,7 +2642,7 @@ static int ixgbe_setup_internal_phy_t_x550em(struct ixgbe_hw *hw) { ixgbe_link_speed force_speed; bool link_up; - u32 status; + int status; u16 speed; if (hw->mac.ops.get_media_type(hw) != ixgbe_media_type_copper) -- 2.52.0 From: Piotr Skajewski When the same flow specification is added twice (same 5-tuple with different sw_idx values), ixgbe_add_ethtool_fdir_entry() silently programs the duplicate into hardware using a second FDIR table slot. This wastes a scarce FDIR entry and can cause confusing behaviour when deleting rules. Add a helper ixgbe_match_ethtool_fdir_entry() that walks the in-kernel filter list before programming hardware. If an entry with an identical filter (excluding the sw_idx) already exists, the new add request is rejected with -EEXIST. Signed-off-by: Piotr Skajewski Signed-off-by: Aleksandr Loktionov --- .../net/ethernet/intel/ixgbe/ixgbe_ethtool.c | 25 +++++++++++++++++++ 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index ba049b3..a2009df 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -2938,6 +2938,21 @@ static int ixgbe_flowspec_to_flow_type(struct ethtool_rx_flow_spec *fsp, return 1; } +static bool ixgbe_match_ethtool_fdir_entry(struct ixgbe_adapter *adapter, + struct ixgbe_fdir_filter *input) +{ + struct ixgbe_fdir_filter *rule = NULL; + + hlist_for_each_entry(rule, &adapter->fdir_filter_list, fdir_node) { + if (rule->sw_idx == input->sw_idx) + continue; + if (!memcmp(&rule->filter, &input->filter, + sizeof(rule->filter))) + return true; + } + return false; +} + static int ixgbe_add_ethtool_fdir_entry(struct ixgbe_adapter *adapter, struct ethtool_rxnfc *cmd) { @@ -2947,7 +2964,7 @@ static int ixgbe_add_ethtool_fdir_entry(struct ixgbe_adapter *adapter, struct ixgbe_fdir_filter *input; union ixgbe_atr_input mask; - u8 queue; - int err; + int err = -EINVAL; + u8 queue; if (!(adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE)) return -EOPNOTSUPP; @@ -3050,6 +3067,12 @@ static int ixgbe_add_ethtool_fdir_entry(struct ixgbe_adapter *adapter, /* apply mask and compute/store hash */ ixgbe_atr_compute_perfect_hash_82599(&input->filter, &mask); + /* check for a duplicate filter */ + if (ixgbe_match_ethtool_fdir_entry(adapter, input)) { + err = -EEXIST; + goto err_out_w_lock; + } + /* program filters to filter memory */ err = ixgbe_fdir_write_perfect_filter_82599(hw, &input->filter, input->sw_idx, queue); @@ -3065,7 +3088,7 @@ static int ixgbe_add_ethtool_fdir_entry(struct ixgbe_adapter *adapter, spin_unlock(&adapter->fdir_perfect_lock); err_out: kfree(input); - return -EINVAL; + return err; } static int ixgbe_del_ethtool_fdir_entry(struct ixgbe_adapter *adapter, -- 2.52.0 From: Maciej Rabeda ixgbe_disable_rx_buff_generic polls for SECRX_RDY with 40 iterations and a 1000 us (1 ms) busy-wait per iteration via udelay(), giving an exact total wait of 40 ms. On fast hardware the security block is typically ready well under 1 ms, so each iteration wastes up to 999 us of stalled initialization time. Replace udelay(1000) with usleep_range(10, 20) and raise the iteration limit to 4000. Because usleep_range(min, max) is guaranteed to sleep at least 'min' microseconds, 4000 * 10 us preserves the original 40 ms minimum wait before timeout. The nominal worst-case rises to ~80 ms (4000 * 20 us); on a loaded system actual scheduler wakeup latency may push this higher. This is acceptable because SECRX_RDY failing to assert is a non-fatal informational condition: the function just logs a debug message and returns success. On platforms where SECRX_RDY asserts quickly this reduces the typical stall from up to ~1 ms per iteration to ~10-20 us. The function is called only from process context, so usleep_range is appropriate. Signed-off-by: Maciej Rabeda Reviewed-by: Simon Horman Signed-off-by: Aleksandr Loktionov --- drivers/net/ethernet/intel/ixgbe/ixgbe_common.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c index 3ea6765..c85618c 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c @@ -2683,7 +2683,7 @@ int prot_autoc_write_generic(struct ixgbe_hw *hw, u32 reg_val, bool locked) **/ int ixgbe_disable_rx_buff_generic(struct ixgbe_hw *hw) { -#define IXGBE_MAX_SECRX_POLL 40 +#define IXGBE_MAX_SECRX_POLL 4000 int i; int secrxreg; @@ -2695,8 +2695,7 @@ int ixgbe_disable_rx_buff_generic(struct ixgbe_hw *hw) if (secrxreg & IXGBE_SECRXSTAT_SECRX_RDY) break; else - /* Use interrupt-safe sleep just in case */ - udelay(1000); + usleep_range(10, 20); } /* For informational purposes only */ -- 2.52.0 From: Jacob Keller Replace ktime_to_ns(ktime_get_real()) with the direct equivalent ktime_get_real_ns() in ixgbe_ptp_reset(). Using the combined helper avoids the unnecessary intermediate ktime_t variable and makes the intent clearer. Signed-off-by: Jacob Keller Signed-off-by: Aleksandr Loktionov Reviewed-by: Marcin Szycik Reviewed-by: Simon Horman --- drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c index 6885d23..a7d1635 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c @@ -1347,7 +1347,7 @@ void ixgbe_ptp_reset(struct ixgbe_adapter *adapter) spin_lock_irqsave(&adapter->tmreg_lock, flags); timecounter_init(&adapter->hw_tc, &adapter->hw_cc, - ktime_to_ns(ktime_get_real())); + ktime_get_real_ns()); spin_unlock_irqrestore(&adapter->tmreg_lock, flags); adapter->last_overflow_check = jiffies; -- 2.52.0 From: Jakub Chylkowski Both ixgbe_setup_phy_link_generic() and ixgbe_setup_phy_link_tnx() end with the same three-line sequence that reads MDIO_CTRL1, sets the MDIO_AN_CTRL1_RESTART bit, and writes MDIO_CTRL1 back. Factor it out into a static helper ixgbe_restart_auto_neg() and call it from both sites. While at it, also check the return value of phy.ops.read_reg() in the helper and skip the write on failure. The original inlined code ignored the read result and would OR MDIO_AN_CTRL1_RESTART into a stale autoneg_reg value (left over from the prior MDIO_AN_ADVERTISE write) and unconditionally write it back to MDIO_CTRL1 if the read failed. This is a small behavioral change: on read_reg() failure the restart write is now skipped instead of being issued with a potentially garbage value. Signed-off-by: Jakub Chylkowski Reviewed-by: Simon Horman Signed-off-by: Aleksandr Loktionov --- drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c | 36 ++++++++++++-------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c index de8f6c6..c7387a4 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c @@ -1089,6 +1089,26 @@ int ixgbe_mii_bus_init(struct ixgbe_hw *hw) return mdiobus_register(bus); } +/** + * ixgbe_restart_auto_neg - restart PHY autonegotiation + * @hw: pointer to hardware structure + * + * Sets the restart autoneg bit in MDIO_CTRL1 to trigger a new + * autonegotiation cycle. + **/ +static void ixgbe_restart_auto_neg(struct ixgbe_hw *hw) +{ + u16 autoneg_reg; + int status; + + status = hw->phy.ops.read_reg(hw, MDIO_CTRL1, MDIO_MMD_AN, + &autoneg_reg); + if (status) + return; + autoneg_reg |= MDIO_AN_CTRL1_RESTART; + hw->phy.ops.write_reg(hw, MDIO_CTRL1, MDIO_MMD_AN, autoneg_reg); +} + /** * ixgbe_setup_phy_link_generic - Set and restart autoneg * @hw: pointer to hardware structure @@ -1156,13 +1176,7 @@ int ixgbe_setup_phy_link_generic(struct ixgbe_hw *hw) return 0; /* Restart PHY autonegotiation and wait for completion */ - hw->phy.ops.read_reg(hw, MDIO_CTRL1, - MDIO_MMD_AN, &autoneg_reg); - - autoneg_reg |= MDIO_AN_CTRL1_RESTART; - - hw->phy.ops.write_reg(hw, MDIO_CTRL1, - MDIO_MMD_AN, autoneg_reg); + ixgbe_restart_auto_neg(hw); return status; } @@ -1386,13 +1400,7 @@ int ixgbe_setup_phy_link_tnx(struct ixgbe_hw *hw) return 0; /* Restart PHY autonegotiation and wait for completion */ - hw->phy.ops.read_reg(hw, MDIO_CTRL1, - MDIO_MMD_AN, &autoneg_reg); - - autoneg_reg |= MDIO_AN_CTRL1_RESTART; - - hw->phy.ops.write_reg(hw, MDIO_CTRL1, - MDIO_MMD_AN, autoneg_reg); + ixgbe_restart_auto_neg(hw); return 0; } -- 2.52.0 From: Alexander Duyck When operating in latency mode and the computed ITR is lower than the current setting, the algorithm can reduce the interrupt rate too aggressively in a single step. For a TCP workload this means the ACK stream (a latency-sensitive, low-packet-rate workload) can drive the moderation down to very high interrupt rates, starving CPU time from the sender side. After the speed-based ITR calculation is complete, check whether the result is in latency mode and would decrease below the current setting. If so, limit the decrease to at most IXGBE_ITR_ADAPTIVE_MIN_INC (2 us) per update. This ensures the number of interrupts grows by no more than 2x per adjustment step for latency-class workloads, dialling in smoothly rather than overshooting. Signed-off-by: Alexander Duyck Reviewed-by: Simon Horman Signed-off-by: Aleksandr Loktionov --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index aea76b3..ba7b013 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -2888,6 +2888,17 @@ static void ixgbe_update_itr(struct ixgbe_q_vector *q_vector, break; } + /* In the case of a latency specific workload only allow us to + * reduce the ITR by at most 2us. By doing this we should dial + * in so that our number of interrupts is no more than 2x the number + * of packets for the least busy workload. So for example in the case + * of a TCP workload the ACK packets being received would set the + * interrupt rate as they are a latency specific workload. + */ + if ((itr & IXGBE_ITR_ADAPTIVE_LATENCY) && itr < ring_container->itr) + itr = max_t(unsigned int, itr, + ring_container->itr - IXGBE_ITR_ADAPTIVE_MIN_INC); + clear_counts: /* write back value */ ring_container->itr = itr; -- 2.52.0 From: Alexander Duyck ixgbe_set_itr() clears the mode flag (IXGBE_ITR_ADAPTIVE_LATENCY, bit 7) with the open-coded complement expression ~IXGBE_ITR_ADAPTIVE_LATENCY. This is equivalent to keeping only bits [6:0], i.e. the usecs sub-field. Add IXGBE_ITR_ADAPTIVE_MASK_USECS = IXGBE_ITR_ADAPTIVE_LATENCY - 1 = 0x7F to name this mask explicitly and replace the open-coded AND-NOT operation with the cleaner AND form. The two expressions are arithmetically identical; the change improves readability. Signed-off-by: Alexander Duyck Reviewed-by: Simon Horman Signed-off-by: Aleksandr Loktionov --- drivers/net/ethernet/intel/ixgbe/ixgbe.h | 1 + drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index cf2df18..20e2a97 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -478,6 +478,7 @@ static inline unsigned int ixgbe_rx_pg_order(struct ixgbe_ring *ring) #define IXGBE_ITR_ADAPTIVE_MAX_USECS 126 #define IXGBE_ITR_ADAPTIVE_LATENCY 0x80 #define IXGBE_ITR_ADAPTIVE_BULK 0x00 +#define IXGBE_ITR_ADAPTIVE_MASK_USECS (IXGBE_ITR_ADAPTIVE_LATENCY - 1) struct ixgbe_ring_container { struct ixgbe_ring *ring; /* pointer to linked list of rings */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index ba7b013..be40655 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -2959,7 +2959,7 @@ static void ixgbe_set_itr(struct ixgbe_q_vector *q_vector) new_itr = min(q_vector->rx.itr, q_vector->tx.itr); /* Clear latency flag if set, shift into correct position */ - new_itr &= ~IXGBE_ITR_ADAPTIVE_LATENCY; + new_itr &= IXGBE_ITR_ADAPTIVE_MASK_USECS; new_itr <<= 2; if (new_itr != q_vector->itr) { -- 2.52.0