Refactor the health reporting to: 1. Introduce a dedicated workqueue for TX timeouts. This prevents calling devlink_health_report (which may sleep) from an atomic context (netdev tx_timeout). 2. Update statistics tracking and reporting context to separate TX Pause and RX Pause frames, allowing finer-grained stall analysis (local vs. link partner induced flow control storm). 3. Change the devlink recovery function to call phylink_mac_change(false). This leverages the newly robust link_down path which performs the necessary locking and conditional Lite Reset. Signed-off-by: Oleksij Rempel --- drivers/net/usb/lan78xx.c | 133 +++++++++++++++++++++++++------------- 1 file changed, 87 insertions(+), 46 deletions(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 9dadca4101bc..316a3a8d0534 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -425,15 +425,36 @@ struct lan78xx_stat_snapshot { ktime_t time; u64 tx_pause_total; + u64 rx_pause_total; u64 tx_unicast_total; u64 rx_total_frames; u64 rx_hw_drop_total; u64 rx_sw_packets_total; - u32 last_delta_pause; + u32 last_delta_rx_pause; + u32 last_delta_tx_pause; u32 last_delta_drops; }; +struct lan78xx_dump_ctx { + const char *msg; + ktime_t ts; /* Timestamp of detection */ + + union { + struct { + u64 delta_tx_pause; + u64 delta_rx_pause; + u64 delta_rx; + u64 delta_hw_drop; + u64 delta_sw_rx; + } fifo; + struct { + u32 int_sts; /* The ISR's view of INT_STS */ + u32 int_enp; /* The ISR's view of INT_ENP_CTL */ + } err; + }; +}; + struct irq_domain_data { struct irq_domain *irqdomain; unsigned int phyirq; @@ -505,27 +526,10 @@ struct lan78xx_net { struct devlink_health_reporter *fifo_reporter; struct devlink_health_reporter *internal_err_reporter; struct lan78xx_stat_snapshot snapshot; + struct work_struct tx_timeout_work; + struct lan78xx_dump_ctx timeout_ctx; }; -struct lan78xx_dump_ctx { - const char *msg; - ktime_t ts; /* Timestamp of detection */ - - union { - struct { - u64 delta_pause; - u64 delta_rx; - u64 delta_hw_drop; - u64 delta_sw_rx; - } fifo; - struct { - u32 int_sts; /* The ISR's view of INT_STS */ - u32 int_enp; /* The ISR's view of INT_ENP_CTL */ - } err; - }; -}; - -/* Register Dump Map Structure */ struct lan78xx_reg_map { u32 reg; const char *name; @@ -966,7 +970,7 @@ static void lan78xx_check_stat_rollover(struct lan78xx_net *dev, static void lan78xx_check_stat_anomalies(struct lan78xx_net *dev) { - u64 delta_pause, delta_rx, delta_hw_drop, delta_sw_rx; + u64 delta_tx_pause, delta_rx_pause, delta_rx, delta_hw_drop, delta_sw_rx; struct lan78xx_dump_ctx ctx = {0}; struct lan78xx_stat_snapshot now; const char *anomaly_msg = NULL; @@ -976,6 +980,7 @@ static void lan78xx_check_stat_anomalies(struct lan78xx_net *dev) mutex_lock(&dev->stats.access_lock); now.tx_pause_total = dev->stats.curr_stat.tx_pause_frames; + now.rx_pause_total = dev->stats.curr_stat.rx_pause_frames; now.rx_total_frames = dev->stats.curr_stat.rx_unicast_frames + dev->stats.curr_stat.rx_broadcast_frames + dev->stats.curr_stat.rx_multicast_frames; @@ -985,17 +990,19 @@ static void lan78xx_check_stat_anomalies(struct lan78xx_net *dev) now.rx_sw_packets_total = dev->net->stats.rx_packets; - delta_pause = now.tx_pause_total - dev->snapshot.tx_pause_total; + delta_tx_pause = now.tx_pause_total - dev->snapshot.tx_pause_total; + delta_rx_pause = now.rx_pause_total - dev->snapshot.rx_pause_total; delta_rx = now.rx_total_frames - dev->snapshot.rx_total_frames; delta_hw_drop = now.rx_hw_drop_total - dev->snapshot.rx_hw_drop_total; delta_sw_rx = now.rx_sw_packets_total - dev->snapshot.rx_sw_packets_total; - now.last_delta_pause = (u32)delta_pause; + now.last_delta_tx_pause = (u32)delta_tx_pause; + now.last_delta_rx_pause = (u32)delta_rx_pause; now.last_delta_drops = (u32)delta_hw_drop; dev->snapshot = now; - if (delta_pause > LAN78XX_STALL_PAUSE_THRESH && delta_rx == 0) { + if (delta_tx_pause > LAN78XX_STALL_PAUSE_THRESH && delta_rx == 0) { anomaly_msg = "Stall: Pause Storm & No RX"; } else if (delta_hw_drop > LAN78XX_LIVELOCK_DROP_THRESH && delta_hw_drop > (delta_sw_rx * LAN78XX_LIVELOCK_DROP_RATIO)) { @@ -1008,10 +1015,11 @@ static void lan78xx_check_stat_anomalies(struct lan78xx_net *dev) /* 5. Reporting */ ctx.msg = anomaly_msg; ctx.ts = now.time; - ctx.fifo.delta_pause = delta_pause; - ctx.fifo.delta_rx = delta_rx; + ctx.fifo.delta_tx_pause = delta_tx_pause; + ctx.fifo.delta_rx_pause = delta_rx_pause; + ctx.fifo.delta_rx = delta_rx; ctx.fifo.delta_hw_drop = delta_hw_drop; - ctx.fifo.delta_sw_rx = delta_sw_rx; + ctx.fifo.delta_sw_rx = delta_sw_rx; netdev_warn(dev->net, "%s (HW Drops: +%llu, SW RX: +%llu)\n", ctx.msg, delta_hw_drop, delta_sw_rx); @@ -2495,6 +2503,24 @@ static void lan78xx_mac_config(struct phylink_config *config, unsigned int mode, ERR_PTR(ret)); } +static int lan78xx_configure_flowcontrol(struct lan78xx_net *dev, + bool tx_pause, bool rx_pause); +static int lan78xx_reset(struct lan78xx_net *dev); + +static void lan78xx_dump_status(struct lan78xx_net *dev, const char *msg) +{ + u32 int_sts, mac_tx, fct_tx_ctl, mac_rx, fct_rx_ctl; + + lan78xx_read_reg(dev, INT_STS, &int_sts); + lan78xx_read_reg(dev, MAC_TX, &mac_tx); + lan78xx_read_reg(dev, FCT_TX_CTL, &fct_tx_ctl); + lan78xx_read_reg(dev, MAC_RX, &mac_rx); + lan78xx_read_reg(dev, FCT_RX_CTL, &fct_rx_ctl); + + netdev_info(dev->net, "[%s] INT_STS: 0x%08x, MAC_TX: 0x%08x, FCT_TX: 0x%08x, MAC_RX: 0x%08x, FCT_RX: 0x%08x\n", + msg, int_sts, mac_tx, fct_tx_ctl, mac_rx, fct_rx_ctl); +} + static void lan78xx_mac_link_down(struct phylink_config *config, unsigned int mode, phy_interface_t interface) { @@ -4939,8 +4965,10 @@ static int lan78xx_fifo_dump(struct devlink_health_reporter *reporter, ktime_to_ns(ctx->ts)); devlink_fmsg_obj_nest_start(fmsg); - devlink_fmsg_u64_pair_put(fmsg, "trigger_delta_pause", - ctx->fifo.delta_pause); + devlink_fmsg_u64_pair_put(fmsg, "trigger_delta_tx_pause", + ctx->fifo.delta_tx_pause); + devlink_fmsg_u64_pair_put(fmsg, "trigger_delta_rx_pause", + ctx->fifo.delta_rx_pause); devlink_fmsg_u64_pair_put(fmsg, "trigger_delta_rx", ctx->fifo.delta_rx); devlink_fmsg_u64_pair_put(fmsg, "trigger_delta_hw_drop", @@ -4989,8 +5017,9 @@ static int lan78xx_fifo_recover(struct devlink_health_reporter *reporter, { struct lan78xx_net *dev = devlink_health_reporter_priv(reporter); - netdev_warn(dev->net, "Recovering from FIFO stall via Lite Reset\n"); - return lan78xx_reset(dev); + netdev_warn(dev->net, "Recovering via Lite Reset\n"); + phylink_mac_change(dev->phylink, false); + return 0; } static const struct devlink_health_reporter_ops lan78xx_fifo_ops = { @@ -5075,6 +5104,7 @@ static void lan78xx_disconnect(struct usb_interface *intf) lan78xx_health_cleanup(dev); if (dev->devlink) { + cancel_work_sync(&dev->tx_timeout_work); devlink_unregister(dev->devlink); devlink_free(dev->devlink); dev->devlink = NULL; @@ -5107,36 +5137,45 @@ static void lan78xx_disconnect(struct usb_interface *intf) usb_put_dev(udev); } +static void lan78xx_tx_timeout_work(struct work_struct *work) +{ + struct lan78xx_net *dev = container_of(work, struct lan78xx_net, + tx_timeout_work); + + devlink_health_report(dev->fifo_reporter, dev->timeout_ctx.msg, + &dev->timeout_ctx); +} + static void lan78xx_tx_timeout(struct net_device *net, unsigned int txqueue) { struct lan78xx_net *dev = netdev_priv(net); - struct lan78xx_dump_ctx ctx = {0}; - s64 diff_ms; + s64 diff_ms = 0; /* Calculate time since last health check */ - ctx.ts = ktime_get_real(); - diff_ms = ktime_ms_delta(ctx.ts, dev->snapshot.time); + dev->timeout_ctx.ts = ktime_get_real(); + diff_ms = ktime_ms_delta(dev->timeout_ctx.ts, dev->snapshot.time); /* We rely on the trend data captured during the last valid stat update * to infer the system state before the crash. */ - if (dev->snapshot.last_delta_pause > LAN78XX_STALL_PAUSE_THRESH) - ctx.msg = "TX Timeout (Flow Control Storm?)"; + if (dev->snapshot.last_delta_rx_pause > LAN78XX_STALL_PAUSE_THRESH) + dev->timeout_ctx.msg = "TX Timeout (Link Partner Pause Storm?)"; + else if (dev->snapshot.last_delta_tx_pause > LAN78XX_STALL_PAUSE_THRESH) + dev->timeout_ctx.msg = "TX Timeout (Local Flow Control Storm?)"; else if (dev->snapshot.last_delta_drops > LAN78XX_TX_TIMEOUT_DROP_THRESH) - ctx.msg = "TX Timeout (FIFO Drop Storm?)"; + dev->timeout_ctx.msg = "TX Timeout (FIFO Drop Storm?)"; else - ctx.msg = "TX Timeout"; + dev->timeout_ctx.msg = "TX Timeout"; - ctx.fifo.delta_pause = dev->snapshot.last_delta_pause; - ctx.fifo.delta_hw_drop = dev->snapshot.last_delta_drops; + dev->timeout_ctx.fifo.delta_rx_pause = dev->snapshot.last_delta_rx_pause; + dev->timeout_ctx.fifo.delta_tx_pause = dev->snapshot.last_delta_tx_pause; + dev->timeout_ctx.fifo.delta_hw_drop = dev->snapshot.last_delta_drops; netdev_warn(dev->net, "%s (Last stat update: %lld ms ago)\n", - ctx.msg, diff_ms); + dev->timeout_ctx.msg, diff_ms); - devlink_health_report(dev->fifo_reporter, ctx.msg, &ctx); - - unlink_urbs(dev, &dev->txq); - napi_schedule(&dev->napi); + /* Defer report to worker to avoid sleeping in atomic context */ + schedule_work(&dev->tx_timeout_work); } static netdev_features_t lan78xx_features_check(struct sk_buff *skb, @@ -5542,6 +5581,8 @@ static int lan78xx_probe(struct usb_interface *intf, pm_runtime_set_autosuspend_delay(&udev->dev, DEFAULT_AUTOSUSPEND_DELAY); + INIT_WORK(&dev->tx_timeout_work, lan78xx_tx_timeout_work); + dev->devlink = devlink_alloc(&lan78xx_devlink_ops, sizeof(struct lan78xx_devlink_priv), &udev->dev); -- 2.47.3