Support AER driver to handle the PCIe errors. Sometimes netdev watchdog Tx timeout happens before the AER error report when a PCIe error occurs, CPU blocking would be caused by MMIO during the reset process. To prevent it, check PCIe error status in .ndo_tx_timeout. The current function of ngbe is not yet fully developed, it will be completed in the future. Signed-off-by: Jiawen Wu --- drivers/net/ethernet/wangxun/libwx/wx_err.c | 148 +++++++++++++++++- drivers/net/ethernet/wangxun/libwx/wx_err.h | 2 + drivers/net/ethernet/wangxun/libwx/wx_type.h | 4 + drivers/net/ethernet/wangxun/ngbe/ngbe_main.c | 33 +++- .../net/ethernet/wangxun/txgbe/txgbe_main.c | 30 +++- 5 files changed, 212 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/wangxun/libwx/wx_err.c b/drivers/net/ethernet/wangxun/libwx/wx_err.c index ee27f96735dc..aca52b9e8260 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_err.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_err.c @@ -4,11 +4,124 @@ #include #include +#include #include "wx_type.h" #include "wx_lib.h" #include "wx_err.h" +/** + * wx_io_error_detected - called when PCI error is detected + * @pdev: Pointer to PCI device + * @state: The current pci connection state + * + * Return: pci_ers_result_t. + * + * This function is called after a PCI bus error affecting + * this device has been detected. + */ +static pci_ers_result_t wx_io_error_detected(struct pci_dev *pdev, + pci_channel_state_t state) +{ + struct wx *wx = pci_get_drvdata(pdev); + struct net_device *netdev; + + if (!wx) + return PCI_ERS_RESULT_DISCONNECT; + + netdev = wx->netdev; + if (!netif_device_present(netdev)) + return PCI_ERS_RESULT_DISCONNECT; + + if (state == pci_channel_io_perm_failure) + return PCI_ERS_RESULT_DISCONNECT; + + rtnl_lock(); + netif_device_detach(netdev); + set_bit(WX_FLAG_NEED_PCIE_RECOVERY, wx->flags); + wx_soft_quiesce(wx); + + if (!test_and_set_bit(WX_STATE_DISABLED, wx->state)) + pci_disable_device(pdev); + rtnl_unlock(); + + /* Request a slot reset. */ + return PCI_ERS_RESULT_NEED_RESET; +} + +/** + * wx_io_slot_reset - called after the pci bus has been reset. + * @pdev: Pointer to PCI device + * + * Return: pci_ers_result_t. + * + * Restart the card from scratch, as if from a cold-boot. + */ +static pci_ers_result_t wx_io_slot_reset(struct pci_dev *pdev) +{ + struct wx *wx = pci_get_drvdata(pdev); + pci_ers_result_t result; + + if (pci_enable_device_mem(pdev)) { + wx_err(wx, "Cannot re-enable PCI device after reset.\n"); + result = PCI_ERS_RESULT_DISCONNECT; + } else { + /* make all memory operations done before clearing the flag */ + smp_mb__before_atomic(); + clear_bit(WX_STATE_DISABLED, wx->state); + clear_bit(WX_FLAG_NEED_PCIE_RECOVERY, wx->flags); + pci_set_master(pdev); + pci_restore_state(pdev); + pci_wake_from_d3(pdev, false); + + rtnl_lock(); + if (netif_running(wx->netdev) && wx->down_suspend) + wx->down_suspend(wx); + if (wx->do_reset) + wx->do_reset(wx->netdev, false); + rtnl_unlock(); + result = PCI_ERS_RESULT_RECOVERED; + } + + pci_aer_clear_nonfatal_status(pdev); + + return result; +} + +/** + * wx_io_resume - called when traffic can start flowing again. + * @pdev: Pointer to PCI device + * + * This callback is called when the error recovery driver tells us that + * its OK to resume normal operation. + */ +static void wx_io_resume(struct pci_dev *pdev) +{ + struct wx *wx = pci_get_drvdata(pdev); + struct net_device *netdev; + int err; + + netdev = wx->netdev; + rtnl_lock(); + if (netif_running(netdev)) { + err = netdev->netdev_ops->ndo_open(netdev); + if (err) { + wx_err(wx, "Failed to open netdev after reset\n"); + goto out; + } + } + netif_device_attach(netdev); +out: + rtnl_unlock(); +} + +const struct pci_error_handlers wx_err_handler = { + .error_detected = wx_io_error_detected, + .slot_reset = wx_io_slot_reset, + .resume = wx_io_resume, +}; +EXPORT_SYMBOL(wx_err_handler); + static void wx_pf_reset_subtask(struct wx *wx) { if (!test_and_clear_bit(WX_FLAG_NEED_PF_RESET, wx->flags)) @@ -25,6 +138,9 @@ static void wx_reset_task(struct work_struct *work) rtnl_lock(); + if (test_bit(WX_FLAG_NEED_PCIE_RECOVERY, wx->flags)) + wx_soft_quiesce(wx); + if (test_bit(WX_STATE_DOWN, wx->state) || test_bit(WX_STATE_RESETTING, wx->state)) goto out; @@ -139,6 +255,33 @@ void wx_check_hang_subtask(struct wx *wx) } EXPORT_SYMBOL(wx_check_hang_subtask); +static bool wx_check_pcie_error(struct wx *wx) +{ + u16 vid, pci_cmd; + + pci_read_config_word(wx->pdev, PCI_VENDOR_ID, &vid); + pci_read_config_word(wx->pdev, PCI_COMMAND, &pci_cmd); + + /* PCIe link loss or memory space can't access */ + if (vid == 0xFFFF || !(pci_cmd & 0x2)) + return true; + + return false; +} + +static void wx_tx_timeout_recovery(struct wx *wx) +{ + /* + * When a PCIe hardware error occurs, the driver should initiate a PCIe + * recovery mechanism. However, this recovery flow relies on the AER + * driver for current kernel policy. Therefore, a self-contained + * recovery mechanism is not implemented yet. + */ + set_bit(WX_FLAG_NEED_PCIE_RECOVERY, wx->flags); + wx_err(wx, "PCIe error detected during tx timeout\n"); + queue_work(wx->reset_wq, &wx->reset_task); +} + static void wx_tx_timeout_reset(struct wx *wx) { if (test_bit(WX_STATE_DOWN, wx->state)) @@ -153,7 +296,10 @@ void wx_tx_timeout(struct net_device *netdev, unsigned int __always_unused txque { struct wx *wx = netdev_priv(netdev); - wx_tx_timeout_reset(wx); + if (wx_check_pcie_error(wx)) + wx_tx_timeout_recovery(wx); + else + wx_tx_timeout_reset(wx); } EXPORT_SYMBOL(wx_tx_timeout); diff --git a/drivers/net/ethernet/wangxun/libwx/wx_err.h b/drivers/net/ethernet/wangxun/libwx/wx_err.h index 1eed13e48095..a6a82a263528 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_err.h +++ b/drivers/net/ethernet/wangxun/libwx/wx_err.h @@ -7,6 +7,8 @@ #ifndef _WX_ERR_H_ #define _WX_ERR_H_ +extern const struct pci_error_handlers wx_err_handler; + void wx_check_err_subtask(struct wx *wx); int wx_init_err_task(struct wx *wx); void wx_check_hang_subtask(struct wx *wx); diff --git a/drivers/net/ethernet/wangxun/libwx/wx_type.h b/drivers/net/ethernet/wangxun/libwx/wx_type.h index a8b4e84787f4..c2edb74881f2 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_type.h +++ b/drivers/net/ethernet/wangxun/libwx/wx_type.h @@ -1221,6 +1221,8 @@ enum wx_state { WX_STATE_PTP_RUNNING, WX_STATE_PTP_TX_IN_PROGRESS, WX_STATE_SERVICE_SCHED, + WX_STATE_DISABLED, + WX_STATE_RES_FREED, WX_STATE_NBITS /* must be last */ }; @@ -1288,6 +1290,7 @@ enum wx_pf_flags { WX_FLAG_RX_MERGE_ENABLED, WX_FLAG_TXHEAD_WB_ENABLED, WX_FLAG_NEED_PF_RESET, + WX_FLAG_NEED_PCIE_RECOVERY, WX_PF_FLAGS_NBITS /* must be last */ }; @@ -1409,6 +1412,7 @@ struct wx { void (*configure_fdir)(struct wx *wx); int (*setup_tc)(struct net_device *netdev, u8 tc); void (*do_reset)(struct net_device *netdev, bool reinit); + void (*down_suspend)(struct wx *wx); int (*ptp_setup_sdp)(struct wx *wx); void (*set_num_queues)(struct wx *wx); diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c index 7dd3e12d48aa..7585d4fe4442 100644 --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c @@ -47,6 +47,22 @@ static const struct pci_device_id ngbe_pci_tbl[] = { { } }; +static void ngbe_down_suspend(struct wx *wx) +{ + if (test_and_set_bit(WX_STATE_RES_FREED, wx->state)) + return; + + phylink_stop(wx->phylink); + phylink_disconnect_phy(wx->phylink); + + wx_clean_all_tx_rings(wx); + wx_clean_all_rx_rings(wx); + + wx_free_irq(wx); + wx_free_isb_resources(wx); + wx_free_resources(wx); +} + /** * ngbe_init_type_code - Initialize the shared code * @wx: pointer to hardware structure @@ -135,6 +151,7 @@ static int ngbe_sw_init(struct wx *wx) wx->mbx.size = WX_VXMAILBOX_SIZE; wx->setup_tc = ngbe_setup_tc; wx->do_reset = ngbe_do_reset; + wx->down_suspend = ngbe_down_suspend; set_bit(0, &wx->fwd_bitmask); return 0; @@ -413,6 +430,9 @@ static void ngbe_disable_device(struct wx *wx) static void ngbe_reset(struct wx *wx) { + if (test_bit(WX_FLAG_NEED_PCIE_RECOVERY, wx->flags)) + return; + wx_flush_sw_mac_table(wx); wx_mac_set_default_filter(wx, wx->mac.addr); if (test_bit(WX_STATE_PTP_RUNNING, wx->state)) @@ -435,6 +455,7 @@ static void ngbe_up_complete(struct wx *wx) /* make sure to complete pre-operations */ smp_mb__before_atomic(); clear_bit(WX_STATE_DOWN, wx->state); + clear_bit(WX_STATE_RES_FREED, wx->state); wx_napi_enable_all(wx); /* enable transmits */ netif_tx_start_all_queues(wx->netdev); @@ -529,6 +550,9 @@ static int ngbe_close(struct net_device *netdev) { struct wx *wx = netdev_priv(netdev); + if (test_bit(WX_STATE_RES_FREED, wx->state)) + return 0; + wx_ptp_stop(wx); ngbe_down(wx); wx_free_irq(wx); @@ -566,7 +590,8 @@ static void ngbe_dev_shutdown(struct pci_dev *pdev, bool *enable_wake) *enable_wake = !!wufc; wx_control_hw(wx, false); - pci_disable_device(pdev); + if (!test_and_set_bit(WX_STATE_DISABLED, wx->state)) + pci_disable_device(pdev); } static void ngbe_shutdown(struct pci_dev *pdev) @@ -856,6 +881,7 @@ static int ngbe_probe(struct pci_dev *pdev, goto err_register; pci_set_drvdata(pdev, wx); + pci_save_state(pdev); return 0; @@ -911,7 +937,8 @@ static void ngbe_remove(struct pci_dev *pdev) kfree(wx->mac_table); wx_clear_interrupt_scheme(wx); - pci_disable_device(pdev); + if (!test_and_set_bit(WX_STATE_DISABLED, wx->state)) + pci_disable_device(pdev); } static int ngbe_suspend(struct pci_dev *pdev, pm_message_t state) @@ -938,6 +965,7 @@ static int ngbe_resume(struct pci_dev *pdev) wx_err(wx, "Cannot enable PCI device from suspend\n"); return err; } + clear_bit(WX_STATE_DISABLED, wx->state); pci_set_master(pdev); device_wakeup_disable(&pdev->dev); @@ -962,6 +990,7 @@ static struct pci_driver ngbe_driver = { .resume = ngbe_resume, .shutdown = ngbe_shutdown, .sriov_configure = wx_pci_sriov_configure, + .err_handler = &wx_err_handler, }; module_pci_driver(ngbe_driver); diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c index f6e596eb9217..bee42ac234c2 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c @@ -155,6 +155,7 @@ static void txgbe_up_complete(struct wx *wx) /* make sure to complete pre-operations */ smp_mb__before_atomic(); clear_bit(WX_STATE_DOWN, wx->state); + clear_bit(WX_STATE_RES_FREED, wx->state); wx_napi_enable_all(wx); switch (wx->mac.type) { @@ -198,6 +199,9 @@ static void txgbe_reset(struct wx *wx) u8 old_addr[ETH_ALEN]; int err; + if (test_bit(WX_FLAG_NEED_PCIE_RECOVERY, wx->flags)) + return; + err = txgbe_reset_hw(wx); if (err != 0) wx_err(wx, "Hardware Error: %d\n", err); @@ -304,6 +308,20 @@ void txgbe_up(struct wx *wx) txgbe_up_complete(wx); } +static void txgbe_down_suspend(struct wx *wx) +{ + if (test_and_set_bit(WX_STATE_RES_FREED, wx->state)) + return; + + phylink_stop(wx->phylink); + wx_clean_all_tx_rings(wx); + wx_clean_all_rx_rings(wx); + wx_free_irq(wx); + txgbe_free_misc_irq(wx->priv); + wx_free_resources(wx); + txgbe_fdir_filter_exit(wx); +} + /** * txgbe_init_type_code - Initialize the shared code * @wx: pointer to hardware structure @@ -420,6 +438,7 @@ static int txgbe_sw_init(struct wx *wx) wx->setup_tc = txgbe_setup_tc; wx->do_reset = txgbe_do_reset; + wx->down_suspend = txgbe_down_suspend; set_bit(0, &wx->fwd_bitmask); switch (wx->mac.type) { @@ -530,6 +549,9 @@ static int txgbe_close(struct net_device *netdev) { struct wx *wx = netdev_priv(netdev); + if (test_bit(WX_STATE_RES_FREED, wx->state)) + return 0; + wx_ptp_stop(wx); txgbe_down(wx); wx_free_irq(wx); @@ -556,7 +578,8 @@ static void txgbe_dev_shutdown(struct pci_dev *pdev) wx_control_hw(wx, false); - pci_disable_device(pdev); + if (!test_and_set_bit(WX_STATE_DISABLED, wx->state)) + pci_disable_device(pdev); } static void txgbe_shutdown(struct pci_dev *pdev) @@ -908,6 +931,7 @@ static int txgbe_probe(struct pci_dev *pdev, goto err_remove_phy; pci_set_drvdata(pdev, wx); + pci_save_state(pdev); netif_tx_stop_all_queues(netdev); @@ -982,7 +1006,8 @@ static void txgbe_remove(struct pci_dev *pdev) kfree(wx->mac_table); wx_clear_interrupt_scheme(wx); - pci_disable_device(pdev); + if (!test_and_set_bit(WX_STATE_DISABLED, wx->state)) + pci_disable_device(pdev); } static struct pci_driver txgbe_driver = { @@ -992,6 +1017,7 @@ static struct pci_driver txgbe_driver = { .remove = txgbe_remove, .shutdown = txgbe_shutdown, .sriov_configure = wx_pci_sriov_configure, + .err_handler = &wx_err_handler, }; module_pci_driver(txgbe_driver); -- 2.51.0