There are various circumstances in which a lane halt, or a lane reset, will fail to complete. If this happens, it will hang the kernel, which only implements a busy loop with no timeout. The circumstances in which this will happen are all bugs in nature: - if we try to power off a powered off lane - if we try to power off a lane that uses a PLL locked onto the wrong refclk frequency (wrong RCW, but SoC boots anyway) Actually, unbounded loops in the kernel are a bad practice, so let's use read_poll_timeout() with a custom function that reads both LNaTRSTCTL (lane transmit control register) and LNaRRSTCTL (lane receive control register) and returns true when the request is done in both directions. The HLT_REQ bit has to clear, whereas the RST_DONE bit has to get set. Because of the new potential timeout error in lynx_28g_power_off() and lynx_28g_power_on(), this needs to be checked for at call sites. Before, these functions only returned zero. Suggested-by: Josua Mayer Link: https://lore.kernel.org/lkml/d0c8bbf8-a0c5-469f-a148-de2235948c0f@solid-run.com/ Signed-off-by: Vladimir Oltean --- part 1 -> part 2: - minor commit message fixups Patch made its last appearance in v3 from part 1: https://lore.kernel.org/linux-phy/20250926180505.760089-16-vladimir.oltean@nxp.com/ (old) part 1 change log: v2->v3: patch is new drivers/phy/freescale/phy-fsl-lynx-28g.c | 96 ++++++++++++++++++------ 1 file changed, 74 insertions(+), 22 deletions(-) diff --git a/drivers/phy/freescale/phy-fsl-lynx-28g.c b/drivers/phy/freescale/phy-fsl-lynx-28g.c index 7ada581bbe4c..048c24c48803 100644 --- a/drivers/phy/freescale/phy-fsl-lynx-28g.c +++ b/drivers/phy/freescale/phy-fsl-lynx-28g.c @@ -254,6 +254,12 @@ #define CR(x) ((x) * 4) +#define LYNX_28G_LANE_HALT_SLEEP_US 100 +#define LYNX_28G_LANE_HALT_TIMEOUT_US 1000000 + +#define LYNX_28G_LANE_RESET_SLEEP_US 100 +#define LYNX_28G_LANE_RESET_TIMEOUT_US 1000000 + enum lynx_28g_eq_type { EQ_TYPE_NO_EQ = 0, EQ_TYPE_2TAP = 1, @@ -672,10 +678,29 @@ static void lynx_28g_lane_set_pll(struct lynx_28g_lane *lane, } } +static bool lynx_28g_lane_halt_done(struct lynx_28g_lane *lane) +{ + u32 trstctl = lynx_28g_lane_read(lane, LNaTRSTCTL); + u32 rrstctl = lynx_28g_lane_read(lane, LNaRRSTCTL); + + return !(trstctl & LNaTRSTCTL_HLT_REQ) && + !(rrstctl & LNaRRSTCTL_HLT_REQ); +} + +static bool lynx_28g_lane_reset_done(struct lynx_28g_lane *lane) +{ + u32 trstctl = lynx_28g_lane_read(lane, LNaTRSTCTL); + u32 rrstctl = lynx_28g_lane_read(lane, LNaRRSTCTL); + + return (trstctl & LNaTRSTCTL_RST_DONE) && + (rrstctl & LNaRRSTCTL_RST_DONE); +} + static int lynx_28g_power_off(struct phy *phy) { struct lynx_28g_lane *lane = phy_get_drvdata(phy); - u32 trstctl, rrstctl; + bool done; + int err; if (!lane->powered_up) return 0; @@ -687,11 +712,15 @@ static int lynx_28g_power_off(struct phy *phy) LNaRRSTCTL_HLT_REQ); /* Wait until the halting process is complete */ - do { - trstctl = lynx_28g_lane_read(lane, LNaTRSTCTL); - rrstctl = lynx_28g_lane_read(lane, LNaRRSTCTL); - } while ((trstctl & LNaTRSTCTL_HLT_REQ) || - (rrstctl & LNaRRSTCTL_HLT_REQ)); + err = read_poll_timeout(lynx_28g_lane_halt_done, done, done, + LYNX_28G_LANE_HALT_SLEEP_US, + LYNX_28G_LANE_HALT_TIMEOUT_US, + false, lane); + if (err) { + dev_err(&phy->dev, "Lane %c halt failed: %pe\n", + 'A' + lane->id, ERR_PTR(err)); + return err; + } lane->powered_up = false; @@ -701,7 +730,8 @@ static int lynx_28g_power_off(struct phy *phy) static int lynx_28g_power_on(struct phy *phy) { struct lynx_28g_lane *lane = phy_get_drvdata(phy); - u32 trstctl, rrstctl; + bool done; + int err; if (lane->powered_up) return 0; @@ -713,11 +743,15 @@ static int lynx_28g_power_on(struct phy *phy) LNaRRSTCTL_RST_REQ); /* Wait until the reset sequence is completed */ - do { - trstctl = lynx_28g_lane_read(lane, LNaTRSTCTL); - rrstctl = lynx_28g_lane_read(lane, LNaRRSTCTL); - } while (!(trstctl & LNaTRSTCTL_RST_DONE) || - !(rrstctl & LNaRRSTCTL_RST_DONE)); + err = read_poll_timeout(lynx_28g_lane_reset_done, done, done, + LYNX_28G_LANE_RESET_SLEEP_US, + LYNX_28G_LANE_RESET_TIMEOUT_US, + false, lane); + if (err) { + dev_err(&phy->dev, "Lane %c reset failed: %pe\n", + 'A' + lane->id, ERR_PTR(err)); + return err; + } lane->powered_up = true; @@ -1132,8 +1166,11 @@ static int lynx_28g_set_mode(struct phy *phy, enum phy_mode mode, int submode) /* If the lane is powered up, put the lane into the halt state while * the reconfiguration is being done. */ - if (powered_up) - lynx_28g_power_off(phy); + if (powered_up) { + err = lynx_28g_power_off(phy); + if (err) + return err; + } err = lynx_28g_lane_disable_pcvt(lane, lane->mode); if (err) @@ -1146,8 +1183,16 @@ static int lynx_28g_set_mode(struct phy *phy, enum phy_mode mode, int submode) lane->mode = lane_mode; out: - if (powered_up) - lynx_28g_power_on(phy); + if (powered_up) { + int err2 = lynx_28g_power_on(phy); + /* + * Don't overwrite a failed protocol converter disable error + * code with a successful lane power on error code, but + * propagate a failed lane power on error. + */ + if (!err) + err = err2; + } return err; } @@ -1180,9 +1225,8 @@ static int lynx_28g_init(struct phy *phy) * probe time. */ lane->powered_up = true; - lynx_28g_power_off(phy); - return 0; + return lynx_28g_power_off(phy); } static const struct phy_ops lynx_28g_ops = { @@ -1240,7 +1284,7 @@ static void lynx_28g_cdr_lock_check(struct work_struct *work) struct lynx_28g_priv *priv = work_to_lynx(work); struct lynx_28g_lane *lane; u32 rrstctl; - int i; + int err, i; for (i = priv->info->first_lane; i < LYNX_28G_NUM_LANE; i++) { lane = &priv->lane[i]; @@ -1258,9 +1302,17 @@ static void lynx_28g_cdr_lock_check(struct work_struct *work) if (!(rrstctl & LNaRRSTCTL_CDR_LOCK)) { lynx_28g_lane_rmw(lane, LNaRRSTCTL, LNaRRSTCTL_RST_REQ, LNaRRSTCTL_RST_REQ); - do { - rrstctl = lynx_28g_lane_read(lane, LNaRRSTCTL); - } while (!(rrstctl & LNaRRSTCTL_RST_DONE)); + + err = read_poll_timeout(lynx_28g_lane_read, rrstctl, + !!(rrstctl & LNaRRSTCTL_RST_DONE), + LYNX_28G_LANE_RESET_SLEEP_US, + LYNX_28G_LANE_RESET_TIMEOUT_US, + false, lane, LNaRRSTCTL); + if (err) { + dev_warn_once(&lane->phy->dev, + "Lane %c receiver reset failed: %pe\n", + 'A' + lane->id, ERR_PTR(err)); + } } mutex_unlock(&lane->phy->mutex); -- 2.34.1