From: Shahar Shitrit When packets are lost and the device loses track of TLS records, the device attempts to resync by tracking TLS records and requests an async resync from software for this TLS connection. The TLS module handles such device RX resync requests by logging record headers and comparing them with the record tcp_sn when provided by the device. It also increments rcd_delta to track how far the current record tcp_sn is from the tcp_sn of the original resync request. If the device later responds with a matching tcp_sn, the TLS module approves the tcp_sn for resync. However, the device response may be delayed or never arrive, particularly due to traffic-related issues such as packet drops or reordering. In such cases, the TLS module remains unaware that resync will not complete, and continues performing unnecessary work by logging headers and incrementing rcd_delta, which can eventually exceed the threshold and trigger a WARN(). For example, this was observed when the device got out of tracking, causing mlx5e_ktls_handle_get_psv_completion() to fail and ultimately leading to the rcd_delta warning. To address this, call tls_offload_rx_resync_async_request_cancel() to cancel the resync request and stop resync tracking in such error cases. Also, increment the tls_resync_req_skip counter to track these cancellations. Signed-off-by: Shahar Shitrit Signed-off-by: Tariq Toukan --- .../mellanox/mlx5/core/en_accel/ktls_rx.c | 29 +++++++++++++++++-- .../mellanox/mlx5/core/en_accel/ktls_txrx.h | 4 +++ .../net/ethernet/mellanox/mlx5/core/en_rx.c | 4 +++ 3 files changed, 34 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c index 65ccb33edafb..ec9400de95c0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c @@ -339,14 +339,19 @@ static void resync_handle_work(struct work_struct *work) if (unlikely(test_bit(MLX5E_PRIV_RX_FLAG_DELETING, priv_rx->flags))) { mlx5e_ktls_priv_rx_put(priv_rx); + priv_rx->rq_stats->tls_resync_req_skip++; + tls_offload_rx_resync_async_request_cancel(priv_rx->sk); return; } c = resync->priv->channels.c[priv_rx->rxq]; sq = &c->async_icosq; - if (resync_post_get_progress_params(sq, priv_rx)) + if (resync_post_get_progress_params(sq, priv_rx)) { + priv_rx->rq_stats->tls_resync_req_skip++; + tls_offload_rx_resync_async_request_cancel(priv_rx->sk); mlx5e_ktls_priv_rx_put(priv_rx); + } } static void resync_init(struct mlx5e_ktls_rx_resync_ctx *resync, @@ -431,8 +436,11 @@ void mlx5e_ktls_handle_get_psv_completion(struct mlx5e_icosq_wqe_info *wi, priv_rx = buf->priv_rx; dev = mlx5_core_dma_dev(sq->channel->mdev); - if (unlikely(test_bit(MLX5E_PRIV_RX_FLAG_DELETING, priv_rx->flags))) + if (unlikely(test_bit(MLX5E_PRIV_RX_FLAG_DELETING, priv_rx->flags))) { + priv_rx->rq_stats->tls_resync_req_skip++; + tls_offload_rx_resync_async_request_cancel(priv_rx->sk); goto out; + } dma_sync_single_for_cpu(dev, buf->dma_addr, PROGRESS_PARAMS_PADDED_SIZE, DMA_FROM_DEVICE); @@ -443,6 +451,7 @@ void mlx5e_ktls_handle_get_psv_completion(struct mlx5e_icosq_wqe_info *wi, if (tracker_state != MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_TRACKING || auth_state != MLX5E_TLS_PROGRESS_PARAMS_AUTH_STATE_NO_OFFLOAD) { priv_rx->rq_stats->tls_resync_req_skip++; + tls_offload_rx_resync_async_request_cancel(priv_rx->sk); goto out; } @@ -472,8 +481,10 @@ static bool resync_queue_get_psv(struct sock *sk) resync = &priv_rx->resync; mlx5e_ktls_priv_rx_get(priv_rx); - if (unlikely(!queue_work(resync->priv->tls->rx_wq, &resync->work))) + if (unlikely(!queue_work(resync->priv->tls->rx_wq, &resync->work))) { mlx5e_ktls_priv_rx_put(priv_rx); + return false; + } return true; } @@ -557,6 +568,18 @@ void mlx5e_ktls_rx_resync(struct net_device *netdev, struct sock *sk, resync_handle_seq_match(priv_rx, c); } +void +mlx5e_ktls_rx_resync_async_request_cancel(struct mlx5e_icosq_wqe_info *wi) +{ + struct mlx5e_ktls_offload_context_rx *priv_rx; + struct mlx5e_ktls_rx_resync_buf *buf; + + buf = wi->tls_get_params.buf; + priv_rx = buf->priv_rx; + priv_rx->rq_stats->tls_resync_req_skip++; + tls_offload_rx_resync_async_request_cancel(priv_rx->sk); +} + /* End of resync section */ void mlx5e_ktls_handle_rx_skb(struct mlx5e_rq *rq, struct sk_buff *skb, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h index f87b65c560ea..cb08799769ee 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h @@ -29,6 +29,10 @@ void mlx5e_ktls_handle_get_psv_completion(struct mlx5e_icosq_wqe_info *wi, void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq, struct mlx5e_tx_wqe_info *wi, u32 *dma_fifo_cc); + +void +mlx5e_ktls_rx_resync_async_request_cancel(struct mlx5e_icosq_wqe_info *wi); + static inline bool mlx5e_ktls_tx_try_handle_resync_dump_comp(struct mlx5e_txqsq *sq, struct mlx5e_tx_wqe_info *wi, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index b8c609d91d11..c713e683ef1d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -1035,6 +1035,10 @@ int mlx5e_poll_ico_cq(struct mlx5e_cq *cq) netdev_WARN_ONCE(cq->netdev, "Bad OP in ICOSQ CQE: 0x%x\n", get_cqe_opcode(cqe)); +#ifdef CONFIG_MLX5_EN_TLS + if (wi->wqe_type == MLX5E_ICOSQ_WQE_GET_PSV_TLS) + mlx5e_ktls_rx_resync_async_request_cancel(wi); +#endif mlx5e_dump_error_cqe(&sq->cq, sq->sqn, (struct mlx5_err_cqe *)cqe); mlx5_wq_cyc_wqe_dump(&sq->wq, ci, wi->num_wqebbs); -- 2.31.1