Currently aci.lock is initialized too late. A bunch of ACI callbacks
using the lock are called prior it's initialized.
Commit 337369f8ce9e ("locking/mutex: Add MUTEX_WARN_ON() into fast path")
highlights that issue what results in call trace.
[ 4.092899] DEBUG_LOCKS_WARN_ON(lock->magic != lock)
[ 4.092910] WARNING: CPU: 0 PID: 578 at kernel/locking/mutex.c:154 mutex_lock+0x6d/0x80
[ 4.098757] Call Trace:
[ 4.098847]
[ 4.098922] ixgbe_aci_send_cmd+0x8c/0x1e0 [ixgbe]
[ 4.099108] ? hrtimer_try_to_cancel+0x18/0x110
[ 4.099277] ixgbe_aci_get_fw_ver+0x52/0xa0 [ixgbe]
[ 4.099460] ixgbe_check_fw_error+0x1fc/0x2f0 [ixgbe]
[ 4.099650] ? usleep_range_state+0x69/0xd0
[ 4.099811] ? usleep_range_state+0x8c/0xd0
[ 4.099964] ixgbe_probe+0x3b0/0x12d0 [ixgbe]
[ 4.100132] local_pci_probe+0x43/0xa0
[ 4.100267] work_for_cpu_fn+0x13/0x20
[ 4.101647]
Move aci.lock mutex initialization to ixgbe_sw_init() before any ACI
command is sent. Along with that move also related SWFW semaphore in
order to reduce size of ixgbe_probe() and that way all locks are
initialized in ixgbe_sw_init().
Reviewed-by: Michal Swiatkowski
Reviewed-by: Przemek Kitszel
Reviewed-by: Simon Horman
Reviewed-by: Aleksandr Loktionov
Fixes: 4600cdf9f5ac ("ixgbe: Enable link management in E610 device")
Signed-off-by: Jedrzej Jagielski
---
drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 15 ++++++++-------
1 file changed, 8 insertions(+), 7 deletions(-)
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 03d31e5b131d..18cae81dc794 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -6801,6 +6801,13 @@ static int ixgbe_sw_init(struct ixgbe_adapter *adapter,
break;
}
+ /* Make sure the SWFW semaphore is in a valid state */
+ if (hw->mac.ops.init_swfw_sync)
+ hw->mac.ops.init_swfw_sync(hw);
+
+ if (hw->mac.type == ixgbe_mac_e610)
+ mutex_init(&hw->aci.lock);
+
#ifdef IXGBE_FCOE
/* FCoE support exists, always init the FCoE lock */
spin_lock_init(&adapter->fcoe.lock);
@@ -11473,10 +11480,6 @@ static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
if (err)
goto err_sw_init;
- /* Make sure the SWFW semaphore is in a valid state */
- if (hw->mac.ops.init_swfw_sync)
- hw->mac.ops.init_swfw_sync(hw);
-
if (ixgbe_check_fw_error(adapter))
return ixgbe_recovery_probe(adapter);
@@ -11680,8 +11683,6 @@ static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
ether_addr_copy(hw->mac.addr, hw->mac.perm_addr);
ixgbe_mac_set_default_filter(adapter);
- if (hw->mac.type == ixgbe_mac_e610)
- mutex_init(&hw->aci.lock);
timer_setup(&adapter->service_timer, ixgbe_service_timer, 0);
if (ixgbe_removed(hw->hw_addr)) {
@@ -11837,9 +11838,9 @@ static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
devl_unlock(adapter->devlink);
ixgbe_release_hw_control(adapter);
ixgbe_clear_interrupt_scheme(adapter);
+err_sw_init:
if (hw->mac.type == ixgbe_mac_e610)
mutex_destroy(&adapter->hw.aci.lock);
-err_sw_init:
ixgbe_disable_sriov(adapter);
adapter->flags2 &= ~IXGBE_FLAG2_SEARCH_FOR_SFP;
iounmap(adapter->io_addr);
--
2.31.1
There's another issue with aci.lock and previous patch uncovers it.
aci.lock is being destroyed during removing ixgbe while some of the
ixgbe closing routines are still ongoing. These routines use Admin
Command Interface which require taking aci.lock which has been already
destroyed what leads to call trace.
[ +0.000004] DEBUG_LOCKS_WARN_ON(lock->magic != lock)
[ +0.000007] WARNING: CPU: 12 PID: 10277 at kernel/locking/mutex.c:155 mutex_lock+0x5f/0x70
[ +0.000002] Call Trace:
[ +0.000003]
[ +0.000006] ixgbe_aci_send_cmd+0xc8/0x220 [ixgbe]
[ +0.000049] ? try_to_wake_up+0x29d/0x5d0
[ +0.000009] ixgbe_disable_rx_e610+0xc4/0x110 [ixgbe]
[ +0.000032] ixgbe_disable_rx+0x3d/0x200 [ixgbe]
[ +0.000027] ixgbe_down+0x102/0x3b0 [ixgbe]
[ +0.000031] ixgbe_close_suspend+0x28/0x90 [ixgbe]
[ +0.000028] ixgbe_close+0xfb/0x100 [ixgbe]
[ +0.000025] __dev_close_many+0xae/0x220
[ +0.000005] dev_close_many+0xc2/0x1a0
[ +0.000004] ? kernfs_should_drain_open_files+0x2a/0x40
[ +0.000005] unregister_netdevice_many_notify+0x204/0xb00
[ +0.000006] ? __kernfs_remove.part.0+0x109/0x210
[ +0.000006] ? kobj_kset_leave+0x4b/0x70
[ +0.000008] unregister_netdevice_queue+0xf6/0x130
[ +0.000006] unregister_netdev+0x1c/0x40
[ +0.000005] ixgbe_remove+0x216/0x290 [ixgbe]
[ +0.000021] pci_device_remove+0x42/0xb0
[ +0.000007] device_release_driver_internal+0x19c/0x200
[ +0.000008] driver_detach+0x48/0x90
[ +0.000003] bus_remove_driver+0x6d/0xf0
[ +0.000006] pci_unregister_driver+0x2e/0xb0
[ +0.000005] ixgbe_exit_module+0x1c/0xc80 [ixgbe]
Same as for the previous commit, the issue has been highlighted by the
commit 337369f8ce9e ("locking/mutex: Add MUTEX_WARN_ON() into fast path").
Move destroying aci.lock to the end of ixgbe_remove(), as this
simply fixes the issue.
Fixes: 4600cdf9f5ac ("ixgbe: Enable link management in E610 device")
Signed-off-by: Jedrzej Jagielski
---
drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 18cae81dc794..4aa4ca603584 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -11891,10 +11891,8 @@ static void ixgbe_remove(struct pci_dev *pdev)
set_bit(__IXGBE_REMOVING, &adapter->state);
cancel_work_sync(&adapter->service_task);
- if (adapter->hw.mac.type == ixgbe_mac_e610) {
+ if (adapter->hw.mac.type == ixgbe_mac_e610)
ixgbe_disable_link_status_events(adapter);
- mutex_destroy(&adapter->hw.aci.lock);
- }
if (adapter->mii_bus)
mdiobus_unregister(adapter->mii_bus);
@@ -11954,6 +11952,9 @@ static void ixgbe_remove(struct pci_dev *pdev)
disable_dev = !test_and_set_bit(__IXGBE_DISABLED, &adapter->state);
free_netdev(netdev);
+ if (adapter->hw.mac.type == ixgbe_mac_e610)
+ mutex_destroy(&adapter->hw.aci.lock);
+
if (disable_dev)
pci_disable_device(pdev);
}
--
2.31.1