Add devlink health support for diagnostics Signed-off-by: Oleksij Rempel --- drivers/net/usb/lan78xx.c | 388 +++++++++++++++++++++++++++++++++++++- 1 file changed, 387 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index ad620b56443b..221be42e06f4 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -41,6 +42,11 @@ #define THROTTLE_JIFFIES (HZ / 8) #define UNLINK_TIMEOUT_MS 3 +#define LAN78XX_STALL_PAUSE_THRESH 100 +#define LAN78XX_LIVELOCK_DROP_THRESH 10000 +#define LAN78XX_LIVELOCK_DROP_RATIO 10 +#define LAN78XX_TX_TIMEOUT_DROP_THRESH 1000 + #define RX_MAX_QUEUE_MEMORY (60 * 1518) #define SS_USB_PKT_SIZE (1024) @@ -373,6 +379,10 @@ struct lan78xx_priv { u32 wol; }; +struct lan78xx_devlink_priv { + struct lan78xx_net *dev; +}; + enum skb_state { illegal = 0, tx_start, @@ -411,6 +421,19 @@ struct statstage { struct lan78xx_statstage64 curr_stat; }; +struct lan78xx_stat_snapshot { + ktime_t time; + + u64 tx_pause_total; + u64 tx_unicast_total; + u64 rx_total_frames; + u64 rx_hw_drop_total; + u64 rx_sw_packets_total; + + u32 last_delta_pause; + u32 last_delta_drops; +}; + struct irq_domain_data { struct irq_domain *irqdomain; unsigned int phyirq; @@ -477,6 +500,35 @@ struct lan78xx_net { struct phylink *phylink; struct phylink_config phylink_config; + + struct devlink *devlink; + struct devlink_health_reporter *fifo_reporter; + struct devlink_health_reporter *internal_err_reporter; + struct lan78xx_stat_snapshot snapshot; +}; + +struct lan78xx_dump_ctx { + const char *msg; + ktime_t ts; /* Timestamp of detection */ + + union { + struct { + u64 delta_pause; + u64 delta_rx; + u64 delta_hw_drop; + u64 delta_sw_rx; + } fifo; + struct { + u32 int_sts; /* The ISR's view of INT_STS */ + u32 int_enp; /* The ISR's view of INT_ENP_CTL */ + } err; + }; +}; + +/* Register Dump Map Structure */ +struct lan78xx_reg_map { + u32 reg; + const char *name; }; /* use ethtool to change the level for any given device */ @@ -484,6 +536,87 @@ static int msg_level = -1; module_param(msg_level, int, 0); MODULE_PARM_DESC(msg_level, "Override default message level"); +/* Helper macro to map register to name string */ +#define LAN78XX_DUMP_REG(reg) { reg, #reg } + +static const struct lan78xx_reg_map lan78xx_fifo_regs[] = { + /* --- FIFO Control & Status --- + * specific enable/reset bits. + * used_bytes tells us if the bottleneck is USB (TX high) or MAC + * (RX high). + */ + LAN78XX_DUMP_REG(FCT_TX_CTL), + LAN78XX_DUMP_REG(FCT_RX_CTL), + + /* --- Data Path Usage --- + * Capture total buffer usage including USB endpoint overhead. + * If DP_STOR is high but FCT_USED is low, data is stuck in the USB + * layer. + */ + LAN78XX_DUMP_REG(TX_DP_STOR), + LAN78XX_DUMP_REG(RX_DP_STOR), + + /* --- FIFO Boundaries --- + * verify if the FIFO partitioning has been corrupted or misconfigured. + */ + LAN78XX_DUMP_REG(FCT_TX_FIFO_END), + LAN78XX_DUMP_REG(FCT_RX_FIFO_END), + + /* --- Flow Control --- + * Critical for "Pause Storm" debugging. + * Check if thresholds are set correctly and if Pause frames are enabled. + */ + LAN78XX_DUMP_REG(FCT_FLOW), + LAN78XX_DUMP_REG(FLOW), + + /* --- Configuration & Speed --- + * Mismatches between MAC speed (1G) and USB speed (HighSpeed) + * are the #1 cause of buffer overflows. + */ + LAN78XX_DUMP_REG(MAC_CR), /* MAC Speed/Duplex */ + LAN78XX_DUMP_REG(USB_CFG0), /* USB Speed/Burst Cap Enable */ + LAN78XX_DUMP_REG(BURST_CAP), /* USB Burst Size Limit */ + LAN78XX_DUMP_REG(BULK_IN_DLY), /* Inter-packet delay settings */ + + /* --- Debug Pointers --- + * Internal read/write pointers for the FIFO RAM. + * Helps detect if the hardware pointer logic has wrapped or frozen. + */ + LAN78XX_DUMP_REG(DP_SEL), + LAN78XX_DUMP_REG(DP_CMD), +}; + +static const struct lan78xx_reg_map lan78xx_err_regs[] = { + /* --- Interrupt Status --- + * The "Smoking Gun". Reveals if the error was triggered by: + * - MAC_ERR_INT: Internal logic overflow/underflow. + * - PHY_INT: Link loss or signal degradation. + * - TDFO/RDFO: FIFO Overflows (redundant but explicit). + */ + LAN78XX_DUMP_REG(INT_STS), + LAN78XX_DUMP_REG(INT_EP_CTL), + + /* --- System Health --- + * Check for invalid power states (D3 while active) or stuck resets. + * HW_CFG also contains the "Soft Reset" status bit. + */ + LAN78XX_DUMP_REG(HW_CFG), + LAN78XX_DUMP_REG(PMT_CTL), + + /* --- Bus Integrity --- + * USB_CFG1 contains Low Power Mode (LPM) and Suspend guards. + */ + LAN78XX_DUMP_REG(USB_CFG0), + LAN78XX_DUMP_REG(USB_CFG1), + + /* --- MAC Status --- + * Verify if the receiver is actually enabled (RXEN) and if + * filtering (Promiscuous/Multicast) is set as expected. + */ + LAN78XX_DUMP_REG(MAC_CR), + LAN78XX_DUMP_REG(MAC_RX), +}; + static struct sk_buff *lan78xx_get_buf(struct sk_buff_head *buf_pool) { if (skb_queue_empty(buf_pool)) @@ -831,12 +964,67 @@ static void lan78xx_check_stat_rollover(struct lan78xx_net *dev, memcpy(&dev->stats.saved, stats, sizeof(struct lan78xx_statstage)); } +static void lan78xx_check_stat_anomalies(struct lan78xx_net *dev) +{ + u64 delta_pause, delta_rx, delta_hw_drop, delta_sw_rx; + struct lan78xx_dump_ctx ctx = {0}; + struct lan78xx_stat_snapshot now; + const char *anomaly_msg = NULL; + + /* 1. Capture "Now" (Atomic-ish collection) */ + now.time = ktime_get_real(); + + mutex_lock(&dev->stats.access_lock); + now.tx_pause_total = dev->stats.curr_stat.tx_pause_frames; + now.rx_total_frames = dev->stats.curr_stat.rx_unicast_frames + + dev->stats.curr_stat.rx_broadcast_frames + + dev->stats.curr_stat.rx_multicast_frames; + now.rx_hw_drop_total = dev->stats.curr_stat.rx_dropped_frames; + now.tx_unicast_total = dev->stats.curr_stat.tx_unicast_frames; + mutex_unlock(&dev->stats.access_lock); + + now.rx_sw_packets_total = dev->net->stats.rx_packets; + + delta_pause = now.tx_pause_total - dev->snapshot.tx_pause_total; + delta_rx = now.rx_total_frames - dev->snapshot.rx_total_frames; + delta_hw_drop = now.rx_hw_drop_total - dev->snapshot.rx_hw_drop_total; + delta_sw_rx = now.rx_sw_packets_total - dev->snapshot.rx_sw_packets_total; + + now.last_delta_pause = (u32)delta_pause; + now.last_delta_drops = (u32)delta_hw_drop; + + dev->snapshot = now; + + if (delta_pause > LAN78XX_STALL_PAUSE_THRESH && delta_rx == 0) { + anomaly_msg = "Stall: Pause Storm & No RX"; + } else if (delta_hw_drop > LAN78XX_LIVELOCK_DROP_THRESH && + delta_hw_drop > (delta_sw_rx * LAN78XX_LIVELOCK_DROP_RATIO)) { + anomaly_msg = "Stall: RX Livelock Detected (Excessive Drop Ratio)"; + } + + if (!anomaly_msg) + return; + + /* 5. Reporting */ + ctx.msg = anomaly_msg; + ctx.ts = now.time; + ctx.fifo.delta_pause = delta_pause; + ctx.fifo.delta_rx = delta_rx; + ctx.fifo.delta_hw_drop = delta_hw_drop; + ctx.fifo.delta_sw_rx = delta_sw_rx; + + netdev_warn(dev->net, "%s (HW Drops: +%llu, SW RX: +%llu)\n", + ctx.msg, delta_hw_drop, delta_sw_rx); + + devlink_health_report(dev->fifo_reporter, ctx.msg, &ctx); +} + static void lan78xx_update_stats(struct lan78xx_net *dev) { + struct lan78xx_statstage lan78xx_stats; u32 *p, *count, *max; u64 *data; int i; - struct lan78xx_statstage lan78xx_stats; if (usb_autopm_get_interface(dev->intf) < 0) return; @@ -856,6 +1044,8 @@ static void lan78xx_update_stats(struct lan78xx_net *dev) mutex_unlock(&dev->stats.access_lock); + lan78xx_check_stat_anomalies(dev); + usb_autopm_put_interface(dev->intf); } @@ -1625,6 +1815,18 @@ static void lan78xx_status(struct lan78xx_net *dev, struct urb *urb) if (dev->domain_data.phyirq > 0) generic_handle_irq_safe(dev->domain_data.phyirq); + } else if (intdata & (INT_ENP_TDFO_INT | INT_ENP_TDFU_INT | + INT_ENP_RDFO_INT | INT_ENP_MAC_ERR_INT)) { + struct lan78xx_dump_ctx ctx = {0}; + + ctx.msg = "HW Interrupt Error"; + ctx.ts = ktime_get_real(); + ctx.err.int_sts = intdata; + + netdev_warn(dev->net, "HW Error detected: 0x%08x, triggering health report\n", + intdata); + + devlink_health_report(dev->internal_err_reporter, ctx.msg, &ctx); } else { netdev_warn(dev->net, "unexpected interrupt: 0x%08x\n", intdata); @@ -4705,6 +4907,148 @@ static void intr_complete(struct urb *urb) } } +static int lan78xx_dump_regs(struct lan78xx_net *dev, struct devlink_fmsg *fmsg, + const struct lan78xx_reg_map *map, size_t count) +{ + int ret, i; + u32 val; + + for (i = 0; i < count; i++) { + ret = lan78xx_read_reg(dev, map[i].reg, &val); + if (ret) + return ret; + devlink_fmsg_u32_pair_put(fmsg, map[i].name, val); + } + return 0; +} + +static int lan78xx_fifo_dump(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, void *priv_ctx, + struct netlink_ext_ack *extack) +{ + struct lan78xx_net *dev = devlink_health_reporter_priv(reporter); + struct lan78xx_dump_ctx *ctx = priv_ctx; + + /* 1. Context Snapshot: + * Dump the specific counters that triggered the threshold. + * Registers may have changed since the decision was made. + */ + if (ctx) { + devlink_fmsg_string_pair_put(fmsg, "trigger_reason", ctx->msg); + devlink_fmsg_u64_pair_put(fmsg, "timestamp_ns", + ktime_to_ns(ctx->ts)); + + devlink_fmsg_obj_nest_start(fmsg); + devlink_fmsg_u64_pair_put(fmsg, "trigger_delta_pause", + ctx->fifo.delta_pause); + devlink_fmsg_u64_pair_put(fmsg, "trigger_delta_rx", + ctx->fifo.delta_rx); + devlink_fmsg_u64_pair_put(fmsg, "trigger_delta_hw_drop", + ctx->fifo.delta_hw_drop); + devlink_fmsg_u64_pair_put(fmsg, "trigger_delta_sw_rx", + ctx->fifo.delta_sw_rx); + devlink_fmsg_obj_nest_end(fmsg); + } + + /* USB Speed is critical for interpreting throughput/stall issues */ + devlink_fmsg_u8_pair_put(fmsg, "usb_speed_enum", dev->udev->speed); + + /* 2. Live Register Dump */ + return lan78xx_dump_regs(dev, fmsg, lan78xx_fifo_regs, + ARRAY_SIZE(lan78xx_fifo_regs)); +} + +static int lan78xx_internal_err_dump(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, void *priv_ctx, + struct netlink_ext_ack *extack) +{ + struct lan78xx_net *dev = devlink_health_reporter_priv(reporter); + struct lan78xx_dump_ctx *ctx = priv_ctx; + + /* Interrupt status is "write-1-to-clear" or cleared on read. + * We must dump the value seen by the ISR, not the current register + * value. + */ + if (ctx) { + devlink_fmsg_string_pair_put(fmsg, "trigger_reason", ctx->msg); + devlink_fmsg_u64_pair_put(fmsg, "timestamp_ns", + ktime_to_ns(ctx->ts)); + + devlink_fmsg_u32_pair_put(fmsg, "trigger_int_sts", + ctx->err.int_sts); + devlink_fmsg_u32_pair_put(fmsg, "trigger_int_enp", + ctx->err.int_enp); + } + + return lan78xx_dump_regs(dev, fmsg, lan78xx_err_regs, + ARRAY_SIZE(lan78xx_err_regs)); +} + +static const struct devlink_health_reporter_ops lan78xx_fifo_ops = { + .name = "fifo", + .dump = lan78xx_fifo_dump, +}; + +static const struct devlink_health_reporter_ops lan78xx_internal_err_ops = { + .name = "internal_err", + .dump = lan78xx_internal_err_dump, +}; + +static int lan78xx_health_init(struct lan78xx_net *dev) +{ + dev->fifo_reporter = devlink_health_reporter_create(dev->devlink, + &lan78xx_fifo_ops, + dev); + if (IS_ERR(dev->fifo_reporter)) { + netdev_warn(dev->net, "Failed to create fifo reporter\n"); + + return PTR_ERR(dev->fifo_reporter); + } + + dev->internal_err_reporter = + devlink_health_reporter_create(dev->devlink, + &lan78xx_internal_err_ops, dev); + if (IS_ERR(dev->internal_err_reporter)) { + netdev_warn(dev->net, "Failed to create internal_err reporter\n"); + devlink_health_reporter_destroy(dev->fifo_reporter); + + return PTR_ERR(dev->internal_err_reporter); + } + + return 0; +} + +static void lan78xx_health_cleanup(struct lan78xx_net *dev) +{ + devlink_health_reporter_destroy(dev->fifo_reporter); + devlink_health_reporter_destroy(dev->internal_err_reporter); +} + +static int lan78xx_devlink_info_get(struct devlink *devlink, + struct devlink_info_req *req, + struct netlink_ext_ack *extack) +{ + struct lan78xx_devlink_priv *dl_priv = devlink_priv(devlink); + struct lan78xx_net *dev = dl_priv->dev; + char buf[16]; + + snprintf(buf, sizeof(buf), "0x%04X", dev->chipid); + devlink_info_version_fixed_put(req, + DEVLINK_INFO_VERSION_GENERIC_ASIC_ID, + buf); + + snprintf(buf, sizeof(buf), "0x%04X", dev->chiprev); + devlink_info_version_fixed_put(req, + DEVLINK_INFO_VERSION_GENERIC_ASIC_REV, + buf); + + return 0; +} + +static const struct devlink_ops lan78xx_devlink_ops = { + .info_get = lan78xx_devlink_info_get, +}; + static void lan78xx_disconnect(struct usb_interface *intf) { struct lan78xx_net *dev; @@ -4719,6 +5063,13 @@ static void lan78xx_disconnect(struct usb_interface *intf) udev = interface_to_usbdev(intf); net = dev->net; + lan78xx_health_cleanup(dev); + if (dev->devlink) { + devlink_unregister(dev->devlink); + devlink_free(dev->devlink); + dev->devlink = NULL; + } + rtnl_lock(); phylink_stop(dev->phylink); phylink_disconnect_phy(dev->phylink); @@ -4749,6 +5100,30 @@ static void lan78xx_disconnect(struct usb_interface *intf) static void lan78xx_tx_timeout(struct net_device *net, unsigned int txqueue) { struct lan78xx_net *dev = netdev_priv(net); + struct lan78xx_dump_ctx ctx = {0}; + s64 diff_ms; + + /* Calculate time since last health check */ + ctx.ts = ktime_get_real(); + diff_ms = ktime_ms_delta(ctx.ts, dev->snapshot.time); + + /* We rely on the trend data captured during the last valid stat update + * to infer the system state before the crash. + */ + if (dev->snapshot.last_delta_pause > LAN78XX_STALL_PAUSE_THRESH) + ctx.msg = "TX Timeout (Flow Control Storm?)"; + else if (dev->snapshot.last_delta_drops > LAN78XX_TX_TIMEOUT_DROP_THRESH) + ctx.msg = "TX Timeout (FIFO Drop Storm?)"; + else + ctx.msg = "TX Timeout"; + + ctx.fifo.delta_pause = dev->snapshot.last_delta_pause; + ctx.fifo.delta_hw_drop = dev->snapshot.last_delta_drops; + + netdev_warn(dev->net, "%s (Last stat update: %lld ms ago)\n", + ctx.msg, diff_ms); + + devlink_health_report(dev->fifo_reporter, ctx.msg, &ctx); unlink_urbs(dev, &dev->txq); napi_schedule(&dev->napi); @@ -5157,6 +5532,17 @@ static int lan78xx_probe(struct usb_interface *intf, pm_runtime_set_autosuspend_delay(&udev->dev, DEFAULT_AUTOSUSPEND_DELAY); + dev->devlink = devlink_alloc(&lan78xx_devlink_ops, + sizeof(struct lan78xx_devlink_priv), + &udev->dev); + if (dev->devlink) { + struct lan78xx_devlink_priv *dl_priv = devlink_priv(dev->devlink); + + dl_priv->dev = dev; + devlink_register(dev->devlink); + lan78xx_health_init(dev); + } + return 0; phy_uninit: -- 2.47.3 Implement the recovery logic for the devlink FIFO health reporter. The recovery callback triggers a full hardware Lite Reset using lan78xx_reset(). This is a fast, reliable way to restore traffic in unattended embedded deployments when a FIFO stall is detected. Signed-off-by: Oleksij Rempel --- drivers/net/usb/lan78xx.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 221be42e06f4..9dadca4101bc 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -4984,8 +4984,18 @@ static int lan78xx_internal_err_dump(struct devlink_health_reporter *reporter, ARRAY_SIZE(lan78xx_err_regs)); } +static int lan78xx_fifo_recover(struct devlink_health_reporter *reporter, + void *priv_ctx, struct netlink_ext_ack *extack) +{ + struct lan78xx_net *dev = devlink_health_reporter_priv(reporter); + + netdev_warn(dev->net, "Recovering from FIFO stall via Lite Reset\n"); + return lan78xx_reset(dev); +} + static const struct devlink_health_reporter_ops lan78xx_fifo_ops = { .name = "fifo", + .recover = lan78xx_fifo_recover, .dump = lan78xx_fifo_dump, }; -- 2.47.3 Refactor the health reporting to: 1. Introduce a dedicated workqueue for TX timeouts. This prevents calling devlink_health_report (which may sleep) from an atomic context (netdev tx_timeout). 2. Update statistics tracking and reporting context to separate TX Pause and RX Pause frames, allowing finer-grained stall analysis (local vs. link partner induced flow control storm). 3. Change the devlink recovery function to call phylink_mac_change(false). This leverages the newly robust link_down path which performs the necessary locking and conditional Lite Reset. Signed-off-by: Oleksij Rempel --- drivers/net/usb/lan78xx.c | 133 +++++++++++++++++++++++++------------- 1 file changed, 87 insertions(+), 46 deletions(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 9dadca4101bc..316a3a8d0534 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -425,15 +425,36 @@ struct lan78xx_stat_snapshot { ktime_t time; u64 tx_pause_total; + u64 rx_pause_total; u64 tx_unicast_total; u64 rx_total_frames; u64 rx_hw_drop_total; u64 rx_sw_packets_total; - u32 last_delta_pause; + u32 last_delta_rx_pause; + u32 last_delta_tx_pause; u32 last_delta_drops; }; +struct lan78xx_dump_ctx { + const char *msg; + ktime_t ts; /* Timestamp of detection */ + + union { + struct { + u64 delta_tx_pause; + u64 delta_rx_pause; + u64 delta_rx; + u64 delta_hw_drop; + u64 delta_sw_rx; + } fifo; + struct { + u32 int_sts; /* The ISR's view of INT_STS */ + u32 int_enp; /* The ISR's view of INT_ENP_CTL */ + } err; + }; +}; + struct irq_domain_data { struct irq_domain *irqdomain; unsigned int phyirq; @@ -505,27 +526,10 @@ struct lan78xx_net { struct devlink_health_reporter *fifo_reporter; struct devlink_health_reporter *internal_err_reporter; struct lan78xx_stat_snapshot snapshot; + struct work_struct tx_timeout_work; + struct lan78xx_dump_ctx timeout_ctx; }; -struct lan78xx_dump_ctx { - const char *msg; - ktime_t ts; /* Timestamp of detection */ - - union { - struct { - u64 delta_pause; - u64 delta_rx; - u64 delta_hw_drop; - u64 delta_sw_rx; - } fifo; - struct { - u32 int_sts; /* The ISR's view of INT_STS */ - u32 int_enp; /* The ISR's view of INT_ENP_CTL */ - } err; - }; -}; - -/* Register Dump Map Structure */ struct lan78xx_reg_map { u32 reg; const char *name; @@ -966,7 +970,7 @@ static void lan78xx_check_stat_rollover(struct lan78xx_net *dev, static void lan78xx_check_stat_anomalies(struct lan78xx_net *dev) { - u64 delta_pause, delta_rx, delta_hw_drop, delta_sw_rx; + u64 delta_tx_pause, delta_rx_pause, delta_rx, delta_hw_drop, delta_sw_rx; struct lan78xx_dump_ctx ctx = {0}; struct lan78xx_stat_snapshot now; const char *anomaly_msg = NULL; @@ -976,6 +980,7 @@ static void lan78xx_check_stat_anomalies(struct lan78xx_net *dev) mutex_lock(&dev->stats.access_lock); now.tx_pause_total = dev->stats.curr_stat.tx_pause_frames; + now.rx_pause_total = dev->stats.curr_stat.rx_pause_frames; now.rx_total_frames = dev->stats.curr_stat.rx_unicast_frames + dev->stats.curr_stat.rx_broadcast_frames + dev->stats.curr_stat.rx_multicast_frames; @@ -985,17 +990,19 @@ static void lan78xx_check_stat_anomalies(struct lan78xx_net *dev) now.rx_sw_packets_total = dev->net->stats.rx_packets; - delta_pause = now.tx_pause_total - dev->snapshot.tx_pause_total; + delta_tx_pause = now.tx_pause_total - dev->snapshot.tx_pause_total; + delta_rx_pause = now.rx_pause_total - dev->snapshot.rx_pause_total; delta_rx = now.rx_total_frames - dev->snapshot.rx_total_frames; delta_hw_drop = now.rx_hw_drop_total - dev->snapshot.rx_hw_drop_total; delta_sw_rx = now.rx_sw_packets_total - dev->snapshot.rx_sw_packets_total; - now.last_delta_pause = (u32)delta_pause; + now.last_delta_tx_pause = (u32)delta_tx_pause; + now.last_delta_rx_pause = (u32)delta_rx_pause; now.last_delta_drops = (u32)delta_hw_drop; dev->snapshot = now; - if (delta_pause > LAN78XX_STALL_PAUSE_THRESH && delta_rx == 0) { + if (delta_tx_pause > LAN78XX_STALL_PAUSE_THRESH && delta_rx == 0) { anomaly_msg = "Stall: Pause Storm & No RX"; } else if (delta_hw_drop > LAN78XX_LIVELOCK_DROP_THRESH && delta_hw_drop > (delta_sw_rx * LAN78XX_LIVELOCK_DROP_RATIO)) { @@ -1008,10 +1015,11 @@ static void lan78xx_check_stat_anomalies(struct lan78xx_net *dev) /* 5. Reporting */ ctx.msg = anomaly_msg; ctx.ts = now.time; - ctx.fifo.delta_pause = delta_pause; - ctx.fifo.delta_rx = delta_rx; + ctx.fifo.delta_tx_pause = delta_tx_pause; + ctx.fifo.delta_rx_pause = delta_rx_pause; + ctx.fifo.delta_rx = delta_rx; ctx.fifo.delta_hw_drop = delta_hw_drop; - ctx.fifo.delta_sw_rx = delta_sw_rx; + ctx.fifo.delta_sw_rx = delta_sw_rx; netdev_warn(dev->net, "%s (HW Drops: +%llu, SW RX: +%llu)\n", ctx.msg, delta_hw_drop, delta_sw_rx); @@ -2495,6 +2503,24 @@ static void lan78xx_mac_config(struct phylink_config *config, unsigned int mode, ERR_PTR(ret)); } +static int lan78xx_configure_flowcontrol(struct lan78xx_net *dev, + bool tx_pause, bool rx_pause); +static int lan78xx_reset(struct lan78xx_net *dev); + +static void lan78xx_dump_status(struct lan78xx_net *dev, const char *msg) +{ + u32 int_sts, mac_tx, fct_tx_ctl, mac_rx, fct_rx_ctl; + + lan78xx_read_reg(dev, INT_STS, &int_sts); + lan78xx_read_reg(dev, MAC_TX, &mac_tx); + lan78xx_read_reg(dev, FCT_TX_CTL, &fct_tx_ctl); + lan78xx_read_reg(dev, MAC_RX, &mac_rx); + lan78xx_read_reg(dev, FCT_RX_CTL, &fct_rx_ctl); + + netdev_info(dev->net, "[%s] INT_STS: 0x%08x, MAC_TX: 0x%08x, FCT_TX: 0x%08x, MAC_RX: 0x%08x, FCT_RX: 0x%08x\n", + msg, int_sts, mac_tx, fct_tx_ctl, mac_rx, fct_rx_ctl); +} + static void lan78xx_mac_link_down(struct phylink_config *config, unsigned int mode, phy_interface_t interface) { @@ -4939,8 +4965,10 @@ static int lan78xx_fifo_dump(struct devlink_health_reporter *reporter, ktime_to_ns(ctx->ts)); devlink_fmsg_obj_nest_start(fmsg); - devlink_fmsg_u64_pair_put(fmsg, "trigger_delta_pause", - ctx->fifo.delta_pause); + devlink_fmsg_u64_pair_put(fmsg, "trigger_delta_tx_pause", + ctx->fifo.delta_tx_pause); + devlink_fmsg_u64_pair_put(fmsg, "trigger_delta_rx_pause", + ctx->fifo.delta_rx_pause); devlink_fmsg_u64_pair_put(fmsg, "trigger_delta_rx", ctx->fifo.delta_rx); devlink_fmsg_u64_pair_put(fmsg, "trigger_delta_hw_drop", @@ -4989,8 +5017,9 @@ static int lan78xx_fifo_recover(struct devlink_health_reporter *reporter, { struct lan78xx_net *dev = devlink_health_reporter_priv(reporter); - netdev_warn(dev->net, "Recovering from FIFO stall via Lite Reset\n"); - return lan78xx_reset(dev); + netdev_warn(dev->net, "Recovering via Lite Reset\n"); + phylink_mac_change(dev->phylink, false); + return 0; } static const struct devlink_health_reporter_ops lan78xx_fifo_ops = { @@ -5075,6 +5104,7 @@ static void lan78xx_disconnect(struct usb_interface *intf) lan78xx_health_cleanup(dev); if (dev->devlink) { + cancel_work_sync(&dev->tx_timeout_work); devlink_unregister(dev->devlink); devlink_free(dev->devlink); dev->devlink = NULL; @@ -5107,36 +5137,45 @@ static void lan78xx_disconnect(struct usb_interface *intf) usb_put_dev(udev); } +static void lan78xx_tx_timeout_work(struct work_struct *work) +{ + struct lan78xx_net *dev = container_of(work, struct lan78xx_net, + tx_timeout_work); + + devlink_health_report(dev->fifo_reporter, dev->timeout_ctx.msg, + &dev->timeout_ctx); +} + static void lan78xx_tx_timeout(struct net_device *net, unsigned int txqueue) { struct lan78xx_net *dev = netdev_priv(net); - struct lan78xx_dump_ctx ctx = {0}; - s64 diff_ms; + s64 diff_ms = 0; /* Calculate time since last health check */ - ctx.ts = ktime_get_real(); - diff_ms = ktime_ms_delta(ctx.ts, dev->snapshot.time); + dev->timeout_ctx.ts = ktime_get_real(); + diff_ms = ktime_ms_delta(dev->timeout_ctx.ts, dev->snapshot.time); /* We rely on the trend data captured during the last valid stat update * to infer the system state before the crash. */ - if (dev->snapshot.last_delta_pause > LAN78XX_STALL_PAUSE_THRESH) - ctx.msg = "TX Timeout (Flow Control Storm?)"; + if (dev->snapshot.last_delta_rx_pause > LAN78XX_STALL_PAUSE_THRESH) + dev->timeout_ctx.msg = "TX Timeout (Link Partner Pause Storm?)"; + else if (dev->snapshot.last_delta_tx_pause > LAN78XX_STALL_PAUSE_THRESH) + dev->timeout_ctx.msg = "TX Timeout (Local Flow Control Storm?)"; else if (dev->snapshot.last_delta_drops > LAN78XX_TX_TIMEOUT_DROP_THRESH) - ctx.msg = "TX Timeout (FIFO Drop Storm?)"; + dev->timeout_ctx.msg = "TX Timeout (FIFO Drop Storm?)"; else - ctx.msg = "TX Timeout"; + dev->timeout_ctx.msg = "TX Timeout"; - ctx.fifo.delta_pause = dev->snapshot.last_delta_pause; - ctx.fifo.delta_hw_drop = dev->snapshot.last_delta_drops; + dev->timeout_ctx.fifo.delta_rx_pause = dev->snapshot.last_delta_rx_pause; + dev->timeout_ctx.fifo.delta_tx_pause = dev->snapshot.last_delta_tx_pause; + dev->timeout_ctx.fifo.delta_hw_drop = dev->snapshot.last_delta_drops; netdev_warn(dev->net, "%s (Last stat update: %lld ms ago)\n", - ctx.msg, diff_ms); + dev->timeout_ctx.msg, diff_ms); - devlink_health_report(dev->fifo_reporter, ctx.msg, &ctx); - - unlink_urbs(dev, &dev->txq); - napi_schedule(&dev->napi); + /* Defer report to worker to avoid sleeping in atomic context */ + schedule_work(&dev->tx_timeout_work); } static netdev_features_t lan78xx_features_check(struct sk_buff *skb, @@ -5542,6 +5581,8 @@ static int lan78xx_probe(struct usb_interface *intf, pm_runtime_set_autosuspend_delay(&udev->dev, DEFAULT_AUTOSUSPEND_DELAY); + INIT_WORK(&dev->tx_timeout_work, lan78xx_tx_timeout_work); + dev->devlink = devlink_alloc(&lan78xx_devlink_ops, sizeof(struct lan78xx_devlink_priv), &udev->dev); -- 2.47.3 Add a debugfs file (inject\_error) to allow users to trigger specific hardware errors (e.g., Burst Cap Violation, RX FIFO Overflow, USB PHY destabilization) for testing the newly introduced health and recovery mechanisms. Signed-off-by: Oleksij Rempel --- drivers/net/usb/lan78xx.c | 71 +++++++++++++++++++++++++++++++++++++++ drivers/net/usb/lan78xx.h | 4 +++ 2 files changed, 75 insertions(+) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 316a3a8d0534..ae721025cf3d 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -3,6 +3,7 @@ * Copyright (C) 2015 Microchip Technology */ #include +#include #include #include #include @@ -519,6 +520,8 @@ struct lan78xx_net { struct irq_domain_data domain_data; + struct dentry *debugfs_pdev; + struct phylink *phylink; struct phylink_config phylink_config; @@ -5088,6 +5091,68 @@ static const struct devlink_ops lan78xx_devlink_ops = { .info_get = lan78xx_devlink_info_get, }; +static ssize_t lan78xx_inject_write(struct file *file, const char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct lan78xx_net *dev = file->private_data; + char buf[32]; + int val, ret; + u32 reg_val; + + if (count >= sizeof(buf)) + return -EINVAL; + + if (copy_from_user(buf, user_buf, count)) + return -EFAULT; + buf[count] = 0; + + if (kstrtoint(buf, 0, &val)) + return -EINVAL; + + switch (val) { + case 1: /* Trigger Burst Cap Violation (Hang UTX) */ + /* Enable Burst Cap Enforcement */ + ret = lan78xx_read_reg(dev, USB_CFG0, ®_val); + if (ret < 0) + return ret; + reg_val |= USB_CFG_BCE_; + lan78xx_write_reg(dev, USB_CFG0, reg_val); + + /* Set illegal Burst Cap size (512 bytes < Max Frame) */ + lan78xx_write_reg(dev, BURST_CAP, 0x01); + break; + + case 2: /* Trigger RX FIFO Overflow (Hold UTX in Reset) */ + ret = lan78xx_read_reg(dev, USB_CFG0, ®_val); + if (ret < 0) + return ret; + reg_val |= USB_CFG0_UTX_RESET_; + lan78xx_write_reg(dev, USB_CFG0, reg_val); + break; + + case 3: /* Destabilize USB PHY (Invalid HS State) */ + ret = lan78xx_read_reg(dev, LAN78XX_USB2_TEST_REG, ®_val); + if (ret < 0) + return ret; + /* Set bits 15:14 to '10' (Binary) - Defined as "Invalid combination" */ + reg_val &= ~(0x3 << 14); + reg_val |= (0x2 << 14); + lan78xx_write_reg(dev, LAN78XX_USB2_TEST_REG, reg_val); + break; + + default: + return -EINVAL; + } + + return count; +} + +static const struct file_operations lan78xx_inject_fops = { + .open = simple_open, + .write = lan78xx_inject_write, + .llseek = default_llseek, +}; + static void lan78xx_disconnect(struct usb_interface *intf) { struct lan78xx_net *dev; @@ -5102,6 +5167,8 @@ static void lan78xx_disconnect(struct usb_interface *intf) udev = interface_to_usbdev(intf); net = dev->net; + debugfs_remove_recursive(dev->debugfs_pdev); + lan78xx_health_cleanup(dev); if (dev->devlink) { cancel_work_sync(&dev->tx_timeout_work); @@ -5594,6 +5661,10 @@ static int lan78xx_probe(struct usb_interface *intf, lan78xx_health_init(dev); } + dev->debugfs_pdev = debugfs_create_dir(netdev_name(netdev), NULL); + debugfs_create_file("inject_error", 0200, dev->debugfs_pdev, dev, + &lan78xx_inject_fops); + return 0; phy_uninit: diff --git a/drivers/net/usb/lan78xx.h b/drivers/net/usb/lan78xx.h index 968e5e5faee0..16666a998441 100644 --- a/drivers/net/usb/lan78xx.h +++ b/drivers/net/usb/lan78xx.h @@ -366,6 +366,7 @@ #define USB_CFG_MAX_DEV_SPEED_SS_ (0x00008000) #define USB_CFG_MAX_DEV_SPEED_HS_ (0x00000000) #define USB_CFG_MAX_DEV_SPEED_FS_ (0x00002000) +#define USB_CFG0_UTX_RESET_ (0x00000400) #define USB_CFG_PHY_BOOST_MASK_ (0x00000180) #define USB_CFG_PHY_BOOST_PLUS_12_ (0x00000180) #define USB_CFG_PHY_BOOST_PLUS_8_ (0x00000100) @@ -876,4 +877,7 @@ #define OTP_TPVSR_VAL (OTP_BASE_ADDR + 4 * 0x3A) #define OTP_TPVHR_VAL (OTP_BASE_ADDR + 4 * 0x3B) #define OTP_TPVSA_VAL (OTP_BASE_ADDR + 4 * 0x3C) + +#define LAN78XX_USB2_TEST_REG (0x12C4) + #endif /* _LAN78XX_H */ -- 2.47.3