From: Michael Lo Activating AER ensures that enhanced error reporting feature are properly initialized, enabling more effective error management and helping to prevent system crashes caused by PCIe errors. [ 2932.266976] Unable to handle kernel paging request at virtual address ffffffc01099eac0 [ 2932.267800] pc : mt76_dma_add_buf+0x124/0x188 [mt76] [ 2932.267831] lr : mt76_dma_rx_fill+0x11c/0x1d8 [mt76] [ 2932.267845] sp : ffffffc016d9bbf0 [ 2932.267859] x29: ffffffc016d9bc10 x28: 0000000000000000 [ 2932.267885] x27: 0000000000000000 x26: ffffffb7855e50b8 [ 2932.267911] x25: ffffffb80d04f000 x24: 0000000000000000 [ 2932.267936] x23: 0000000000000ec0 x22: ffffffb796803648 [ 2932.267962] x21: ffffffb796801f80 x20: ffffffb7968035f8 [ 2932.267987] x19: 0000000000000ec0 x18: 0000000000000000 [ 2932.268012] x17: 000000004ec00000 x16: 000000000ec00000 [ 2932.268037] x15: ffffffc01099eac0 x14: 000000004ec00000 [ 2932.268063] x13: 00000000ffc5a000 x12: ffffffc016d9bc32 [ 2932.268088] x11: 00000000ffffffff x10: 0000000000000002 [ 2932.268113] x9 : 0000000000000000 x8 : 000000000000b4ac [ 2932.268138] x7 : 0000000000000a20 x6 : ffffffb6c1806400 [ 2932.268163] x5 : 0000000000000000 x4 : ffffffb80d04f000 [ 2932.268188] x3 : 0000000000000000 x2 : 0000000000000001 [ 2932.268213] x1 : 000000000ec04000 x0 : ffffffb7968035f8 [ 2932.268238] Call trace: [ 2932.268275] mt76_dma_add_buf+0x124/0x188 [mt76 (HASH:1029 4)] [ 2932.268309] mt76_dma_rx_reset+0xe8/0xfc [mt76 (HASH:1029 4)] [ 2932.268342] mt7921_wpdma_reset+0x188/0x1b0 [mt7921e (HASH:ee48 5)] [ 2932.268371] mt7921e_mac_reset+0x128/0x418 [mt7921e (HASH:ee48 5)] [ 2932.268403] mt7921_mac_reset_work+0xac/0x1a8 [mt7921_common (HASH:f721 6)] [ 2932.268427] process_one_work+0x188/0x514 [ 2932.268445] worker_thread+0x12c/0x300 [ 2932.268465] kthread+0x140/0x1fc [ 2932.268483] ret_from_fork+0x10/0x30 Due to hardware limitations - such as the lack of a connected hardware reset pin or the absence of host re-probe functionality - affected Wi-Fi devices may not fully recover to a normal operational state after certain errors, even with AER enabled. Signed-off-by: Michael Lo Signed-off-by: Ming Yen Hsieh --- drivers/net/wireless/mediatek/mt76/agg-rx.c | 9 +++ drivers/net/wireless/mediatek/mt76/dma.c | 6 ++ drivers/net/wireless/mediatek/mt76/mac80211.c | 3 + drivers/net/wireless/mediatek/mt76/mcu.c | 3 + .../net/wireless/mediatek/mt76/mt76_connac.h | 3 + .../wireless/mediatek/mt76/mt76_connac_mac.c | 3 + .../net/wireless/mediatek/mt76/mt7921/mac.c | 3 + .../net/wireless/mediatek/mt76/mt7921/main.c | 3 + .../net/wireless/mediatek/mt76/mt7921/pci.c | 64 +++++++++++++++++++ .../net/wireless/mediatek/mt76/mt792x_core.c | 8 +++ .../net/wireless/mediatek/mt76/mt792x_mac.c | 12 ++++ 11 files changed, 117 insertions(+) diff --git a/drivers/net/wireless/mediatek/mt76/agg-rx.c b/drivers/net/wireless/mediatek/mt76/agg-rx.c index 936ab1ca9246..89d45f5954a2 100644 --- a/drivers/net/wireless/mediatek/mt76/agg-rx.c +++ b/drivers/net/wireless/mediatek/mt76/agg-rx.c @@ -96,6 +96,9 @@ mt76_rx_aggr_reorder_work(struct work_struct *work) struct sk_buff_head frames; int nframes; + if (atomic_read(&dev->bus_hung) == 1) + return; + __skb_queue_head_init(&frames); local_bh_disable(); @@ -179,6 +182,9 @@ void mt76_rx_aggr_reorder(struct sk_buff *skb, struct sk_buff_head *frames) if (!tid) return; + if (atomic_read(&tid->dev->bus_hung) == 1) + return; + status->flag |= RX_FLAG_DUP_VALIDATED; spin_lock_bh(&tid->lock); @@ -246,6 +252,9 @@ int mt76_rx_aggr_start(struct mt76_dev *dev, struct mt76_wcid *wcid, u8 tidno, { struct mt76_rx_tid *tid; + if (atomic_read(&dev->bus_hung) == 1) + return -EIO; + mt76_rx_aggr_stop(dev, wcid, tidno); tid = kzalloc(struct_size(tid, reorder_buf, size), GFP_KERNEL); diff --git a/drivers/net/wireless/mediatek/mt76/dma.c b/drivers/net/wireless/mediatek/mt76/dma.c index 1fa7de1d2c45..2d508ddbc7b7 100644 --- a/drivers/net/wireless/mediatek/mt76/dma.c +++ b/drivers/net/wireless/mediatek/mt76/dma.c @@ -339,6 +339,9 @@ mt76_dma_add_buf(struct mt76_dev *dev, struct mt76_queue *q, int i, idx = -1; u32 ctrl, next; + if (atomic_read(&dev->bus_hung) == 1) + return idx; + if (txwi) { q->entry[q->head].txwi = DMA_DUMMY_DATA; q->entry[q->head].skip_buf0 = true; @@ -765,6 +768,9 @@ mt76_dma_rx_fill_buf(struct mt76_dev *dev, struct mt76_queue *q, int len = SKB_WITH_OVERHEAD(q->buf_size); int frames = 0; + if (atomic_read(&dev->bus_hung) == 1) + return 0; + if (!q->ndesc) return 0; diff --git a/drivers/net/wireless/mediatek/mt76/mac80211.c b/drivers/net/wireless/mediatek/mt76/mac80211.c index 09cc5e40ccf9..a70245672638 100644 --- a/drivers/net/wireless/mediatek/mt76/mac80211.c +++ b/drivers/net/wireless/mediatek/mt76/mac80211.c @@ -1549,6 +1549,9 @@ void mt76_rx_poll_complete(struct mt76_dev *dev, enum mt76_rxq_id q, struct sk_buff_head frames; struct sk_buff *skb; + if (atomic_read(&dev->bus_hung) == 1) + return; + __skb_queue_head_init(&frames); while ((skb = __skb_dequeue(&dev->rx_skb[q])) != NULL) { diff --git a/drivers/net/wireless/mediatek/mt76/mcu.c b/drivers/net/wireless/mediatek/mt76/mcu.c index 65d4c2adb538..2107c0c07f3e 100644 --- a/drivers/net/wireless/mediatek/mt76/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mcu.c @@ -58,6 +58,9 @@ int mt76_mcu_send_and_get_msg(struct mt76_dev *dev, int cmd, const void *data, { struct sk_buff *skb; + if (atomic_read(&dev->bus_hung) == 1) + return -EIO; + if (dev->mcu_ops->mcu_send_msg) return dev->mcu_ops->mcu_send_msg(dev, cmd, data, len, wait_resp); diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac.h b/drivers/net/wireless/mediatek/mt76/mt76_connac.h index 756719ce0e48..46b0f65320c1 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76_connac.h +++ b/drivers/net/wireless/mediatek/mt76/mt76_connac.h @@ -333,6 +333,9 @@ static inline u8 mt76_connac_spe_idx(u8 antenna_mask) static inline void mt76_connac_irq_enable(struct mt76_dev *dev, u32 mask) { + if (atomic_read(&dev->bus_hung) == 1) + return; + mt76_set_irq_mask(dev, 0, 0, mask); tasklet_schedule(&dev->irq_tasklet); } diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c b/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c index 0db00efe88b0..7a6db5e0e250 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c @@ -64,6 +64,9 @@ void mt76_connac_power_save_sched(struct mt76_phy *phy, { struct mt76_dev *dev = phy->dev; + if (atomic_read(&dev->bus_hung) == 1) + return; + if (mt76_is_usb(dev)) return; diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c index bce26389ab18..610aaf7eccff 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c @@ -658,6 +658,9 @@ void mt7921_mac_reset_work(struct work_struct *work) struct mt76_connac_pm *pm = &dev->pm; int i, ret; + if (atomic_read(&dev->mt76.bus_hung) == 1) + return; + dev_dbg(dev->mt76.dev, "chip reset\n"); set_bit(MT76_RESET, &dev->mphy.state); dev->hw_full_reset = true; diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/main.c b/drivers/net/wireless/mediatek/mt76/mt7921/main.c index ef216153cdf0..ba85f3e5d0f8 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/main.c @@ -1002,6 +1002,9 @@ void mt7921_scan_work(struct work_struct *work) phy = (struct mt792x_phy *)container_of(work, struct mt792x_phy, scan_work.work); + if (atomic_read(&phy->dev->mt76.bus_hung) == 1) + return; + while (true) { struct mt76_connac2_mcu_rxd *rxd; struct sk_buff *skb; diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c index 71fba57db9be..019d7961d9d4 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c @@ -363,6 +363,8 @@ static int mt7921_pci_probe(struct pci_dev *pdev, (mt7921_l1_rr(dev, MT_HW_REV) & 0xff); dev_info(mdev->dev, "ASIC revision: %04x\n", mdev->rev); + atomic_set(&mdev->bus_hung, 0); + ret = mt792x_wfsys_reset(dev); if (ret) goto err_free_dev; @@ -562,6 +564,67 @@ static void mt7921_pci_shutdown(struct pci_dev *pdev) static DEFINE_SIMPLE_DEV_PM_OPS(mt7921_pm_ops, mt7921_pci_suspend, mt7921_pci_resume); +static pci_ers_result_t mt7921_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state) +{ + struct mt76_dev *mdev = pci_get_drvdata(pdev); + struct mt792x_dev *dev = container_of(mdev, struct mt792x_dev, mt76); + struct ieee80211_hw *hw = mdev->hw; + struct mt792x_phy *phy = mt792x_hw_phy(hw); + struct net_device *netdev = pci_get_drvdata(pdev); + + if (state == pci_channel_io_normal) + return PCI_ERS_RESULT_CAN_RECOVER; + + if (atomic_read(&mdev->bus_hung) == 1) + return PCI_ERS_RESULT_NEED_RESET; + + atomic_set(&mdev->bus_hung, 1); + + set_bit(MT76_REMOVED, &mdev->phy.state); + + if (netif_running(netdev)) + netif_device_detach(netdev); + + cancel_delayed_work_sync(&phy->mt76->mac_work); + + cancel_delayed_work_sync(&dev->pm.ps_work); + cancel_work_sync(&dev->pm.wake_work); + mt76_connac_free_pending_tx_skbs(&dev->pm, NULL); + + mt792x_mutex_acquire(dev); + clear_bit(MT76_STATE_RUNNING, &phy->mt76->state); + mt76_connac_mcu_set_mac_enable(&dev->mt76, 0, false, false); + mt792x_mutex_release(dev); + + if (state == pci_channel_io_perm_failure) + return PCI_ERS_RESULT_DISCONNECT; + + tasklet_kill(&mdev->irq_tasklet); + + pci_disable_device(pdev); + + /* Request a slot reset. */ + return PCI_ERS_RESULT_NEED_RESET; +} + +static pci_ers_result_t mt7921_pci_error_slot_reset(struct pci_dev *pdev) +{ + pci_ers_result_t ret = PCI_ERS_RESULT_DISCONNECT; + + return ret; +} + +static void mt7921_pci_error_resume(struct pci_dev *pdev) +{ + return; +} + +static const struct pci_error_handlers mt7921_pci_err_handler = { + .error_detected = mt7921_pci_error_detected, + .slot_reset = mt7921_pci_error_slot_reset, + .resume = mt7921_pci_error_resume, +}; + static struct pci_driver mt7921_pci_driver = { .name = KBUILD_MODNAME, .id_table = mt7921_pci_device_table, @@ -569,6 +632,7 @@ static struct pci_driver mt7921_pci_driver = { .remove = mt7921_pci_remove, .shutdown = mt7921_pci_shutdown, .driver.pm = pm_sleep_ptr(&mt7921_pm_ops), + .err_handler = &mt7921_pci_err_handler, }; module_pci_driver(mt7921_pci_driver); diff --git a/drivers/net/wireless/mediatek/mt76/mt792x_core.c b/drivers/net/wireless/mediatek/mt76/mt792x_core.c index 65cff5302a5a..4f4aa26b359d 100644 --- a/drivers/net/wireless/mediatek/mt76/mt792x_core.c +++ b/drivers/net/wireless/mediatek/mt76/mt792x_core.c @@ -811,6 +811,10 @@ int mt792x_mcu_drv_pmctrl(struct mt792x_dev *dev) struct mt76_phy *mphy = &dev->mt76.phy; struct mt76_connac_pm *pm = &dev->pm; int err = 0; + struct mt76_dev *mdev = mphy->dev; + + if (atomic_read(&mdev->bus_hung) == 1) + return -EIO; mutex_lock(&pm->mutex); @@ -833,6 +837,10 @@ int mt792x_mcu_fw_pmctrl(struct mt792x_dev *dev) struct mt76_phy *mphy = &dev->mt76.phy; struct mt76_connac_pm *pm = &dev->pm; int err = 0; + struct mt76_dev *mdev = mphy->dev; + + if (atomic_read(&mdev->bus_hung) == 1) + return -EIO; mutex_lock(&pm->mutex); diff --git a/drivers/net/wireless/mediatek/mt76/mt792x_mac.c b/drivers/net/wireless/mediatek/mt76/mt792x_mac.c index f86e0ac91100..c813547a3562 100644 --- a/drivers/net/wireless/mediatek/mt76/mt792x_mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt792x_mac.c @@ -13,6 +13,10 @@ void mt792x_mac_work(struct work_struct *work) mphy = (struct mt76_phy *)container_of(work, struct mt76_phy, mac_work.work); + + if (atomic_read(&mphy->dev->bus_hung) == 1) + return; + phy = mphy->priv; mt792x_mutex_acquire(phy->dev); @@ -322,6 +326,10 @@ void mt792x_pm_wake_work(struct work_struct *work) dev = (struct mt792x_dev *)container_of(work, struct mt792x_dev, pm.wake_work); + + if (atomic_read(&dev->mt76.bus_hung) == 1) + return; + mphy = dev->phy.mt76; if (!mt792x_mcu_drv_pmctrl(dev)) { @@ -357,6 +365,10 @@ void mt792x_pm_power_save_work(struct work_struct *work) dev = (struct mt792x_dev *)container_of(work, struct mt792x_dev, pm.ps_work.work); + + if (atomic_read(&dev->mt76.bus_hung) == 1) + return; + mphy = dev->phy.mt76; delta = dev->pm.idle_timeout; -- 2.34.1