From: Shahar Shitrit mlx5e_reporter_rx_timeout() is currently invoked synchronously in the driver's open error flow. This causes the thread holding priv->state_lock to attempt acquiring the devlink lock, which can result in a circular dependency with other devlink operations. For example: - Devlink health diagnose flow: - __devlink_nl_pre_doit() acquires the devlink lock. - devlink_nl_health_reporter_diagnose_doit() invokes the driver's diagnose callback. - mlx5e_rx_reporter_diagnose() then attempts to acquire priv->state_lock. - Driver open flow: - mlx5e_open() acquires priv->state_lock. - If an error occurs, devlink_health_reporter may be called, attempting to acquire the devlink lock. To prevent this circular locking scenario, defer the RX timeout recovery by scheduling it via a workqueue. This ensures that the recovery work acquires locks in a consistent order: first the devlink lock, then priv->state_lock. Additionally, make the recovery work acquire the netdev instance lock to safely synchronize with the open/close channel flows, similar to mlx5e_tx_timeout_work. Repeatedly attempt to acquire the netdev instance lock until it is taken or the target RQ is no longer active, as indicated by the MLX5E_STATE_CHANNELS_ACTIVE bit. Fixes: 32c57fb26863 ("net/mlx5e: Report and recover from rx timeout") Signed-off-by: Shahar Shitrit Reviewed-by: Cosmin Ratiu Reviewed-by: Dragos Tatulea Signed-off-by: Tariq Toukan --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 1 + .../mellanox/mlx5/core/en/reporter_rx.c | 7 +++++ .../net/ethernet/mellanox/mlx5/core/en_main.c | 26 ++++++++++++++++++- 3 files changed, 33 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 5b0d03b3efe8..48bcd6813aff 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -728,6 +728,7 @@ struct mlx5e_rq { struct xsk_buff_pool *xsk_pool; struct work_struct recover_work; + struct work_struct rx_timeout_work; /* control */ struct mlx5_wq_ctrl wq_ctrl; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c index e75759533ae0..16c44d628eda 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c @@ -170,16 +170,23 @@ static int mlx5e_rx_reporter_err_rq_cqe_recover(void *ctx) static int mlx5e_rx_reporter_timeout_recover(void *ctx) { struct mlx5_eq_comp *eq; + struct mlx5e_priv *priv; struct mlx5e_rq *rq; int err; rq = ctx; + priv = rq->priv; + + mutex_lock(&priv->state_lock); + eq = rq->cq.mcq.eq; err = mlx5e_health_channel_eq_recover(rq->netdev, eq, rq->cq.ch_stats); if (err && rq->icosq) clear_bit(MLX5E_SQ_STATE_ENABLED, &rq->icosq->state); + mutex_unlock(&priv->state_lock); + return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index ea822c69d137..16d818943487 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -707,6 +707,27 @@ static void mlx5e_rq_err_cqe_work(struct work_struct *recover_work) mlx5e_reporter_rq_cqe_err(rq); } +static void mlx5e_rq_timeout_work(struct work_struct *timeout_work) +{ + struct mlx5e_rq *rq = container_of(timeout_work, + struct mlx5e_rq, + rx_timeout_work); + + /* Acquire netdev instance lock to synchronize with channel close and + * reopen flows. Either successfully obtain the lock, or detect that + * channels are closing for another reason, making this work no longer + * necessary. + */ + while (!netdev_trylock(rq->netdev)) { + if (!test_bit(MLX5E_STATE_CHANNELS_ACTIVE, &rq->priv->state)) + return; + msleep(20); + } + + mlx5e_reporter_rx_timeout(rq); + netdev_unlock(rq->netdev); +} + static int mlx5e_alloc_mpwqe_rq_drop_page(struct mlx5e_rq *rq) { rq->wqe_overflow.page = alloc_page(GFP_KERNEL); @@ -830,6 +851,7 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params, rqp->wq.db_numa_node = node; INIT_WORK(&rq->recover_work, mlx5e_rq_err_cqe_work); + INIT_WORK(&rq->rx_timeout_work, mlx5e_rq_timeout_work); if (params->xdp_prog) bpf_prog_inc(params->xdp_prog); @@ -1204,7 +1226,8 @@ int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time) netdev_warn(rq->netdev, "Failed to get min RX wqes on Channel[%d] RQN[0x%x] wq cur_sz(%d) min_rx_wqes(%d)\n", rq->ix, rq->rqn, mlx5e_rqwq_get_cur_sz(rq), min_wqes); - mlx5e_reporter_rx_timeout(rq); + queue_work(rq->priv->wq, &rq->rx_timeout_work); + return -ETIMEDOUT; } @@ -1375,6 +1398,7 @@ void mlx5e_close_rq(struct mlx5e_rq *rq) if (rq->dim) cancel_work_sync(&rq->dim->work); cancel_work_sync(&rq->recover_work); + cancel_work_sync(&rq->rx_timeout_work); mlx5e_destroy_rq(rq); mlx5e_free_rx_descs(rq); mlx5e_free_rq(rq); -- 2.31.1