From: William Tu Move the async_icosq spinlock from the mlx5e_channel structure into the mlx5e_icosq structure itself for better encapsulation and for later patch to also use it for other icosq use cases. Changes: - Add spinlock_t lock field to struct mlx5e_icosq - Remove async_icosq_lock field from struct mlx5e_channel - Initialize the new lock in mlx5e_open_icosq() - Update all lock usage in ktls_rx.c and en_main.c to use sq->lock instead of c->async_icosq_lock Signed-off-by: William Tu Signed-off-by: Tariq Toukan --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 4 ++-- .../mellanox/mlx5/core/en_accel/ktls_rx.c | 18 +++++++++--------- .../net/ethernet/mellanox/mlx5/core/en_main.c | 12 +++++++----- 3 files changed, 18 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 3ada7c16adfb..70bc878bd2c2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -545,6 +545,8 @@ struct mlx5e_icosq { u32 sqn; u16 reserved_room; unsigned long state; + /* icosq can be accessed from any CPU - the spinlock protects it. */ + spinlock_t lock; struct mlx5e_ktls_resync_resp *ktls_resync; /* control path */ @@ -777,8 +779,6 @@ struct mlx5e_channel { /* Async ICOSQ */ struct mlx5e_icosq async_icosq; - /* async_icosq can be accessed from any CPU - the spinlock protects it. */ - spinlock_t async_icosq_lock; /* data path - accessed per napi poll */ const struct cpumask *aff_mask; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c index da2d1eb52c13..8bc8231f521f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c @@ -203,7 +203,7 @@ static int post_rx_param_wqes(struct mlx5e_channel *c, err = 0; sq = &c->async_icosq; - spin_lock_bh(&c->async_icosq_lock); + spin_lock_bh(&sq->lock); cseg = post_static_params(sq, priv_rx); if (IS_ERR(cseg)) @@ -214,7 +214,7 @@ static int post_rx_param_wqes(struct mlx5e_channel *c, mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, cseg); unlock: - spin_unlock_bh(&c->async_icosq_lock); + spin_unlock_bh(&sq->lock); return err; @@ -277,10 +277,10 @@ resync_post_get_progress_params(struct mlx5e_icosq *sq, buf->priv_rx = priv_rx; - spin_lock_bh(&sq->channel->async_icosq_lock); + spin_lock_bh(&sq->lock); if (unlikely(!mlx5e_icosq_can_post_wqe(sq, MLX5E_KTLS_GET_PROGRESS_WQEBBS))) { - spin_unlock_bh(&sq->channel->async_icosq_lock); + spin_unlock_bh(&sq->lock); err = -ENOSPC; goto err_dma_unmap; } @@ -311,7 +311,7 @@ resync_post_get_progress_params(struct mlx5e_icosq *sq, icosq_fill_wi(sq, pi, &wi); sq->pc++; mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, cseg); - spin_unlock_bh(&sq->channel->async_icosq_lock); + spin_unlock_bh(&sq->lock); return 0; @@ -413,9 +413,9 @@ static void resync_handle_seq_match(struct mlx5e_ktls_offload_context_rx *priv_r return; if (!napi_if_scheduled_mark_missed(&c->napi)) { - spin_lock_bh(&c->async_icosq_lock); + spin_lock_bh(&sq->lock); mlx5e_trigger_irq(sq); - spin_unlock_bh(&c->async_icosq_lock); + spin_unlock_bh(&sq->lock); } } @@ -772,7 +772,7 @@ bool mlx5e_ktls_rx_handle_resync_list(struct mlx5e_channel *c, int budget) clear_bit(MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC, &sq->state); spin_unlock(&ktls_resync->lock); - spin_lock(&c->async_icosq_lock); + spin_lock(&sq->lock); for (j = 0; j < i; j++) { struct mlx5_wqe_ctrl_seg *cseg; @@ -791,7 +791,7 @@ bool mlx5e_ktls_rx_handle_resync_list(struct mlx5e_channel *c, int budget) } if (db_cseg) mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, db_cseg); - spin_unlock(&c->async_icosq_lock); + spin_unlock(&sq->lock); priv_rx->rq_stats->tls_resync_res_ok += j; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 5ec0f5ca45b4..590707dc6f0e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -2075,6 +2075,8 @@ static int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params if (err) goto err_free_icosq; + spin_lock_init(&sq->lock); + if (param->is_tls) { sq->ktls_resync = mlx5e_ktls_rx_resync_create_resp_list(); if (IS_ERR(sq->ktls_resync)) { @@ -2631,8 +2633,6 @@ static int mlx5e_open_queues(struct mlx5e_channel *c, if (err) goto err_close_rx_cq; - spin_lock_init(&c->async_icosq_lock); - err = mlx5e_open_icosq(c, params, &cparam->async_icosq, &c->async_icosq, mlx5e_async_icosq_err_cqe_work); if (err) @@ -2751,9 +2751,11 @@ static int mlx5e_channel_stats_alloc(struct mlx5e_priv *priv, int ix, int cpu) void mlx5e_trigger_napi_icosq(struct mlx5e_channel *c) { - spin_lock_bh(&c->async_icosq_lock); - mlx5e_trigger_irq(&c->async_icosq); - spin_unlock_bh(&c->async_icosq_lock); + struct mlx5e_icosq *async_icosq = &c->async_icosq; + + spin_lock_bh(&async_icosq->lock); + mlx5e_trigger_irq(async_icosq); + spin_unlock_bh(&async_icosq->lock); } void mlx5e_trigger_napi_sched(struct napi_struct *napi) -- 2.31.1 From: William Tu Before the cited commit, ICOSQ is used to post NOP WQE to trigger hardware interrupt and start NAPI, but this mechanism suffers from a race condition: mlx5e_alloc_rx_mpwqe may post UMR WQEs to ICOSQ _before_ NOP WQE is posted. The cited commit fixes the issue by replacing ICOSQ with async ICOSQ, as a new way to post the NOP WQE to trigger the hardware interrupt and NAPI. The patch changes it back by replacing async ICOSQ with regular ICOSQ, for the purpose of saving memory in later patches, and solves the issue by adding a new SQ state, MLX5E_SQ_STATE_LOCK_NEEDED for syncing the start of NAPI. What it does: - Switch trigger path from async ICOSQ to regular ICOSQ to reduce need for async SQ. - Introduce MLX5E_SQ_STATE_LOCK_NEEDED and mlx5e_icosq_sync_lock(), unlock() to prevent the race where UMR WQEs could be posted before the NOP WQE used to trigger NAPI. - Use synchronize_net() once per trigger cycle to quiesce in-flight softirqs before serializing the NOP WQE and any UMR postings via the ICOSQ lock. - Wrap ICOSQ UMR posting in en_rx.c and xsk/rx.c with the new conditional lock. Signed-off-by: William Tu Signed-off-by: Tariq Toukan --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 40 ++++++++++++++++++- .../mellanox/mlx5/core/en/reporter_tx.c | 1 + .../ethernet/mellanox/mlx5/core/en/xsk/rx.c | 3 ++ .../net/ethernet/mellanox/mlx5/core/en_main.c | 14 +++++-- .../net/ethernet/mellanox/mlx5/core/en_rx.c | 3 ++ 5 files changed, 56 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 70bc878bd2c2..9ee07fa19896 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -388,6 +388,7 @@ enum { MLX5E_SQ_STATE_DIM, MLX5E_SQ_STATE_PENDING_XSK_TX, MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC, + MLX5E_SQ_STATE_LOCK_NEEDED, MLX5E_NUM_SQ_STATES, /* Must be kept last */ }; @@ -751,7 +752,7 @@ struct mlx5e_rq { enum mlx5e_channel_state { MLX5E_CHANNEL_STATE_XSK, - MLX5E_CHANNEL_NUM_STATES + MLX5E_CHANNEL_NUM_STATES, /* Must be kept last */ }; struct mlx5e_channel { @@ -801,6 +802,43 @@ struct mlx5e_channel { struct dim_cq_moder tx_cq_moder; }; +enum mlx5e_lock_type { + MLX5E_LOCK_TYPE_NONE, + MLX5E_LOCK_TYPE_SOFTIRQ, + MLX5E_LOCK_TYPE_BH, +}; + +static inline enum mlx5e_lock_type +mlx5e_icosq_sync_lock(struct mlx5e_icosq *sq) +{ + if (!test_bit(MLX5E_SQ_STATE_LOCK_NEEDED, &sq->state)) + return MLX5E_LOCK_TYPE_NONE; + + if (in_softirq()) { + spin_lock(&sq->lock); + return MLX5E_LOCK_TYPE_SOFTIRQ; + } + + spin_lock_bh(&sq->lock); + return MLX5E_LOCK_TYPE_BH; +} + +static inline void mlx5e_icosq_sync_unlock(struct mlx5e_icosq *sq, + enum mlx5e_lock_type lock_type) +{ + switch (lock_type) { + case MLX5E_LOCK_TYPE_SOFTIRQ: + spin_unlock(&sq->lock); + break; + case MLX5E_LOCK_TYPE_BH: + spin_unlock_bh(&sq->lock); + break; + case MLX5E_LOCK_TYPE_NONE: + default: + break; + } +} + struct mlx5e_ptp; struct mlx5e_channels { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c index 9e2cf191ed30..4adc1adf9897 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c @@ -15,6 +15,7 @@ static const char * const sq_sw_state_type_name[] = { [MLX5E_SQ_STATE_DIM] = "dim", [MLX5E_SQ_STATE_PENDING_XSK_TX] = "pending_xsk_tx", [MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC] = "pending_tls_rx_resync", + [MLX5E_SQ_STATE_LOCK_NEEDED] = "lock_needed", }; static int mlx5e_wait_for_sq_flush(struct mlx5e_txqsq *sq) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c index 2b05536d564a..a96fd7f65485 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c @@ -21,6 +21,7 @@ int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, ix); struct mlx5e_icosq *icosq = rq->icosq; struct mlx5_wq_cyc *wq = &icosq->wq; + enum mlx5e_lock_type sync_locked; struct mlx5e_umr_wqe *umr_wqe; struct xdp_buff **xsk_buffs; int batch, i; @@ -47,6 +48,7 @@ int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) goto err_reuse_batch; } + sync_locked = mlx5e_icosq_sync_lock(icosq); pi = mlx5e_icosq_get_next_pi(icosq, rq->mpwqe.umr_wqebbs); umr_wqe = mlx5_wq_cyc_get_wqe(wq, pi); memcpy(umr_wqe, &rq->mpwqe.umr_wqe, sizeof(struct mlx5e_umr_wqe)); @@ -143,6 +145,7 @@ int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) }; icosq->pc += rq->mpwqe.umr_wqebbs; + mlx5e_icosq_sync_unlock(icosq, sync_locked); icosq->doorbell_cseg = &umr_wqe->hdr.ctrl; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 590707dc6f0e..80fb09d902f5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -2751,11 +2751,17 @@ static int mlx5e_channel_stats_alloc(struct mlx5e_priv *priv, int ix, int cpu) void mlx5e_trigger_napi_icosq(struct mlx5e_channel *c) { - struct mlx5e_icosq *async_icosq = &c->async_icosq; + enum mlx5e_lock_type locked_type; - spin_lock_bh(&async_icosq->lock); - mlx5e_trigger_irq(async_icosq); - spin_unlock_bh(&async_icosq->lock); + if (!test_and_set_bit(MLX5E_SQ_STATE_LOCK_NEEDED, &c->icosq.state)) + synchronize_net(); + + locked_type = mlx5e_icosq_sync_lock(&c->icosq); + mlx5e_trigger_irq(&c->icosq); + if (locked_type != MLX5E_LOCK_TYPE_NONE) + mlx5e_icosq_sync_unlock(&c->icosq, locked_type); + + clear_bit(MLX5E_SQ_STATE_LOCK_NEEDED, &c->icosq.state); } void mlx5e_trigger_napi_sched(struct napi_struct *napi) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 1f6930c77437..b54844d80922 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -776,6 +776,7 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) struct mlx5e_icosq *sq = rq->icosq; struct mlx5e_frag_page *frag_page; struct mlx5_wq_cyc *wq = &sq->wq; + enum mlx5e_lock_type sync_locked; struct mlx5e_umr_wqe *umr_wqe; u32 offset; /* 17-bit value with MTT. */ u16 pi; @@ -788,6 +789,7 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) goto err; } + sync_locked = mlx5e_icosq_sync_lock(sq); pi = mlx5e_icosq_get_next_pi(sq, rq->mpwqe.umr_wqebbs); umr_wqe = mlx5_wq_cyc_get_wqe(wq, pi); memcpy(umr_wqe, &rq->mpwqe.umr_wqe, sizeof(struct mlx5e_umr_wqe)); @@ -835,6 +837,7 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) }; sq->pc += rq->mpwqe.umr_wqebbs; + mlx5e_icosq_sync_unlock(sq, sync_locked); sq->doorbell_cseg = &umr_wqe->hdr.ctrl; -- 2.31.1 From: William Tu Dynamically allocate async ICOSQ. ICO (Internal Communication Operations) is for driver to communicate with the HW, and it's not used for traffic. Currently mlx5 driver has sync and async ICO send queues. The async ICOSQ means that it's not necessarily under NAPI context protection. The patch is in preparation for the later patch to detect its usage and enable it when necessary. Signed-off-by: William Tu Signed-off-by: Tariq Toukan --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 2 +- .../ethernet/mellanox/mlx5/core/en/xsk/tx.c | 6 +- .../mellanox/mlx5/core/en_accel/ktls_rx.c | 8 +-- .../mellanox/mlx5/core/en_accel/ktls_txrx.h | 4 +- .../net/ethernet/mellanox/mlx5/core/en_main.c | 67 ++++++++++++++----- .../net/ethernet/mellanox/mlx5/core/en_txrx.c | 7 +- 6 files changed, 66 insertions(+), 28 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 9ee07fa19896..3a68fe651760 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -779,7 +779,7 @@ struct mlx5e_channel { struct mlx5e_xdpsq xsksq; /* Async ICOSQ */ - struct mlx5e_icosq async_icosq; + struct mlx5e_icosq *async_icosq; /* data path - accessed per napi poll */ const struct cpumask *aff_mask; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c index a59199ed590d..9e33156fac8a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c @@ -26,10 +26,12 @@ int mlx5e_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags) * active and not polled by NAPI. Return 0, because the upcoming * activate will trigger the IRQ for us. */ - if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &c->async_icosq.state))) + if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, + &c->async_icosq->state))) return 0; - if (test_and_set_bit(MLX5E_SQ_STATE_PENDING_XSK_TX, &c->async_icosq.state)) + if (test_and_set_bit(MLX5E_SQ_STATE_PENDING_XSK_TX, + &c->async_icosq->state)) return 0; mlx5e_trigger_napi_icosq(c); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c index 8bc8231f521f..5d8fe252799e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c @@ -202,7 +202,7 @@ static int post_rx_param_wqes(struct mlx5e_channel *c, int err; err = 0; - sq = &c->async_icosq; + sq = c->async_icosq; spin_lock_bh(&sq->lock); cseg = post_static_params(sq, priv_rx); @@ -344,7 +344,7 @@ static void resync_handle_work(struct work_struct *work) } c = resync->priv->channels.c[priv_rx->rxq]; - sq = &c->async_icosq; + sq = c->async_icosq; if (resync_post_get_progress_params(sq, priv_rx)) { priv_rx->rq_stats->tls_resync_req_skip++; @@ -371,7 +371,7 @@ static void resync_handle_seq_match(struct mlx5e_ktls_offload_context_rx *priv_r struct mlx5e_icosq *sq; bool trigger_poll; - sq = &c->async_icosq; + sq = c->async_icosq; ktls_resync = sq->ktls_resync; trigger_poll = false; @@ -753,7 +753,7 @@ bool mlx5e_ktls_rx_handle_resync_list(struct mlx5e_channel *c, int budget) LIST_HEAD(local_list); int i, j; - sq = &c->async_icosq; + sq = c->async_icosq; if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state))) return false; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h index cb08799769ee..abc1d92a912a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h @@ -50,7 +50,9 @@ bool mlx5e_ktls_rx_handle_resync_list(struct mlx5e_channel *c, int budget); static inline bool mlx5e_ktls_rx_pending_resync_list(struct mlx5e_channel *c, int budget) { - return budget && test_bit(MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC, &c->async_icosq.state); + return budget && c->async_icosq && + test_bit(MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC, + &c->async_icosq->state); } static inline void diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 80fb09d902f5..2b2504bd2c67 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -2590,6 +2590,47 @@ static int mlx5e_open_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *param return mlx5e_open_rq(params, rq_params, NULL, cpu_to_node(c->cpu), q_counter, &c->rq); } +static struct mlx5e_icosq * +mlx5e_open_async_icosq(struct mlx5e_channel *c, + struct mlx5e_params *params, + struct mlx5e_channel_param *cparam, + struct mlx5e_create_cq_param *ccp) +{ + struct dim_cq_moder icocq_moder = {0, 0}; + struct mlx5e_icosq *async_icosq; + int err; + + async_icosq = kvzalloc_node(sizeof(*async_icosq), GFP_KERNEL, + cpu_to_node(c->cpu)); + if (!async_icosq) + return ERR_PTR(-ENOMEM); + + err = mlx5e_open_cq(c->mdev, icocq_moder, &cparam->async_icosq.cqp, ccp, + &async_icosq->cq); + if (err) + goto err_free_async_icosq; + + err = mlx5e_open_icosq(c, params, &cparam->async_icosq, async_icosq, + mlx5e_async_icosq_err_cqe_work); + if (err) + goto err_close_async_icosq_cq; + + return async_icosq; + +err_close_async_icosq_cq: + mlx5e_close_cq(&async_icosq->cq); +err_free_async_icosq: + kvfree(async_icosq); + return ERR_PTR(err); +} + +static void mlx5e_close_async_icosq(struct mlx5e_icosq *async_icosq) +{ + mlx5e_close_icosq(async_icosq); + mlx5e_close_cq(&async_icosq->cq); + kvfree(async_icosq); +} + static int mlx5e_open_queues(struct mlx5e_channel *c, struct mlx5e_params *params, struct mlx5e_channel_param *cparam) @@ -2601,15 +2642,10 @@ static int mlx5e_open_queues(struct mlx5e_channel *c, mlx5e_build_create_cq_param(&ccp, c); - err = mlx5e_open_cq(c->mdev, icocq_moder, &cparam->async_icosq.cqp, &ccp, - &c->async_icosq.cq); - if (err) - return err; - err = mlx5e_open_cq(c->mdev, icocq_moder, &cparam->icosq.cqp, &ccp, &c->icosq.cq); if (err) - goto err_close_async_icosq_cq; + return err; err = mlx5e_open_tx_cqs(c, params, &ccp, cparam); if (err) @@ -2633,10 +2669,11 @@ static int mlx5e_open_queues(struct mlx5e_channel *c, if (err) goto err_close_rx_cq; - err = mlx5e_open_icosq(c, params, &cparam->async_icosq, &c->async_icosq, - mlx5e_async_icosq_err_cqe_work); - if (err) + c->async_icosq = mlx5e_open_async_icosq(c, params, cparam, &ccp); + if (IS_ERR(c->async_icosq)) { + err = PTR_ERR(c->async_icosq); goto err_close_rq_xdpsq_cq; + } mutex_init(&c->icosq_recovery_lock); @@ -2672,7 +2709,7 @@ static int mlx5e_open_queues(struct mlx5e_channel *c, mlx5e_close_icosq(&c->icosq); err_close_async_icosq: - mlx5e_close_icosq(&c->async_icosq); + mlx5e_close_async_icosq(c->async_icosq); err_close_rq_xdpsq_cq: if (c->xdp) @@ -2691,9 +2728,6 @@ static int mlx5e_open_queues(struct mlx5e_channel *c, err_close_icosq_cq: mlx5e_close_cq(&c->icosq.cq); -err_close_async_icosq_cq: - mlx5e_close_cq(&c->async_icosq.cq); - return err; } @@ -2707,7 +2741,7 @@ static void mlx5e_close_queues(struct mlx5e_channel *c) mlx5e_close_sqs(c); mlx5e_close_icosq(&c->icosq); mutex_destroy(&c->icosq_recovery_lock); - mlx5e_close_icosq(&c->async_icosq); + mlx5e_close_async_icosq(c->async_icosq); if (c->xdp) mlx5e_close_cq(&c->rq_xdpsq.cq); mlx5e_close_cq(&c->rq.cq); @@ -2715,7 +2749,6 @@ static void mlx5e_close_queues(struct mlx5e_channel *c) mlx5e_close_xdpredirect_sq(c->xdpsq); mlx5e_close_tx_cqs(c); mlx5e_close_cq(&c->icosq.cq); - mlx5e_close_cq(&c->async_icosq.cq); } static u8 mlx5e_enumerate_lag_port(struct mlx5_core_dev *mdev, int ix) @@ -2881,7 +2914,7 @@ static void mlx5e_activate_channel(struct mlx5e_channel *c) for (tc = 0; tc < c->num_tc; tc++) mlx5e_activate_txqsq(&c->sq[tc]); mlx5e_activate_icosq(&c->icosq); - mlx5e_activate_icosq(&c->async_icosq); + mlx5e_activate_icosq(c->async_icosq); if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)) mlx5e_activate_xsk(c); @@ -2902,7 +2935,7 @@ static void mlx5e_deactivate_channel(struct mlx5e_channel *c) else mlx5e_deactivate_rq(&c->rq); - mlx5e_deactivate_icosq(&c->async_icosq); + mlx5e_deactivate_icosq(c->async_icosq); mlx5e_deactivate_icosq(&c->icosq); for (tc = 0; tc < c->num_tc; tc++) mlx5e_deactivate_txqsq(&c->sq[tc]); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index 76108299ea57..57c54265dbda 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -180,11 +180,12 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) busy |= work_done == budget; mlx5e_poll_ico_cq(&c->icosq.cq); - if (mlx5e_poll_ico_cq(&c->async_icosq.cq)) + if (mlx5e_poll_ico_cq(&c->async_icosq->cq)) /* Don't clear the flag if nothing was polled to prevent * queueing more WQEs and overflowing the async ICOSQ. */ - clear_bit(MLX5E_SQ_STATE_PENDING_XSK_TX, &c->async_icosq.state); + clear_bit(MLX5E_SQ_STATE_PENDING_XSK_TX, + &c->async_icosq->state); /* Keep after async ICOSQ CQ poll */ if (unlikely(mlx5e_ktls_rx_pending_resync_list(c, budget))) @@ -236,7 +237,7 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) mlx5e_cq_arm(&rq->cq); mlx5e_cq_arm(&c->icosq.cq); - mlx5e_cq_arm(&c->async_icosq.cq); + mlx5e_cq_arm(&c->async_icosq->cq); if (c->xdpsq) mlx5e_cq_arm(&c->xdpsq->cq); -- 2.31.1 From: William Tu The async ICOSQ is only required by TLS RX (for re-sync flow) and XSK TX. Create it only when these features are enabled instead of always allocating it. This reduces per-channel memory usage, saves hardware resources, improves latency, and decreases the default number of SQs (from 4 to 3) and CQs (from 5 to 4). It also speeds up channel open/close operations for a netdev when async ICOSQ is not needed. Currently when TLS RX is enabled, there is no channel reset triggered. As a result, async ICOSQ allocation is not triggered, causing a NULL pointer crash. One solution is to do channel reset every time when toggling TLS RX. However, it's not straightforward as the offload state matters only on connection creation, and can go on beyond the channels reset. In stead, introduce a new field 'ktls_rx_was_enabled': if TLS RX is enabled for the first time: reset channels, create async ICOSQ, set the field. From that point on, no need to reset channels for any TLS RX enable/disable. Async ICOSQ will always be needed. For XSK TX, async ICOSQ is used in wakeup control and is guaranteed to have async ICOSQ allocated. This improves the latency of interface up/down operations when it applies. Perf numbers: NIC: Connect-X7. Setup: 248 channels. Interface up + down: Before: 2.605 secs After: 2.246 secs (1.16x faster) Signed-off-by: William Tu Signed-off-by: Tariq Toukan --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 1 + .../mellanox/mlx5/core/en_accel/ktls.c | 10 +++++-- .../net/ethernet/mellanox/mlx5/core/en_main.c | 30 ++++++++++++------- .../net/ethernet/mellanox/mlx5/core/en_txrx.c | 5 ++-- 4 files changed, 32 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 3a68fe651760..fea26a3a1c87 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -958,6 +958,7 @@ struct mlx5e_priv { u8 max_opened_tc; bool tx_ptp_opened; bool rx_ptp_opened; + bool ktls_rx_was_enabled; struct kernel_hwtstamp_config hwtstamp_config; u16 q_counter[MLX5_SD_MAX_GROUP_SZ]; u16 drop_rq_q_counter; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c index e3e57c849436..1c2cc2aad2b0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c @@ -135,10 +135,15 @@ int mlx5e_ktls_set_feature_rx(struct net_device *netdev, bool enable) int err = 0; mutex_lock(&priv->state_lock); - if (enable) + if (enable) { err = mlx5e_accel_fs_tcp_create(priv->fs); - else + if (!err && !priv->ktls_rx_was_enabled) { + priv->ktls_rx_was_enabled = true; + mlx5e_safe_reopen_channels(priv); + } + } else { mlx5e_accel_fs_tcp_destroy(priv->fs); + } mutex_unlock(&priv->state_lock); return err; @@ -161,6 +166,7 @@ int mlx5e_ktls_init_rx(struct mlx5e_priv *priv) destroy_workqueue(priv->tls->rx_wq); return err; } + priv->ktls_rx_was_enabled = true; } return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 2b2504bd2c67..d1dbba1a7a2f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -2633,7 +2633,8 @@ static void mlx5e_close_async_icosq(struct mlx5e_icosq *async_icosq) static int mlx5e_open_queues(struct mlx5e_channel *c, struct mlx5e_params *params, - struct mlx5e_channel_param *cparam) + struct mlx5e_channel_param *cparam, + bool async_icosq_needed) { const struct net_device_ops *netdev_ops = c->netdev->netdev_ops; struct dim_cq_moder icocq_moder = {0, 0}; @@ -2669,10 +2670,13 @@ static int mlx5e_open_queues(struct mlx5e_channel *c, if (err) goto err_close_rx_cq; - c->async_icosq = mlx5e_open_async_icosq(c, params, cparam, &ccp); - if (IS_ERR(c->async_icosq)) { - err = PTR_ERR(c->async_icosq); - goto err_close_rq_xdpsq_cq; + if (async_icosq_needed) { + c->async_icosq = mlx5e_open_async_icosq(c, params, cparam, + &ccp); + if (IS_ERR(c->async_icosq)) { + err = PTR_ERR(c->async_icosq); + goto err_close_rq_xdpsq_cq; + } } mutex_init(&c->icosq_recovery_lock); @@ -2709,7 +2713,8 @@ static int mlx5e_open_queues(struct mlx5e_channel *c, mlx5e_close_icosq(&c->icosq); err_close_async_icosq: - mlx5e_close_async_icosq(c->async_icosq); + if (c->async_icosq) + mlx5e_close_async_icosq(c->async_icosq); err_close_rq_xdpsq_cq: if (c->xdp) @@ -2741,7 +2746,8 @@ static void mlx5e_close_queues(struct mlx5e_channel *c) mlx5e_close_sqs(c); mlx5e_close_icosq(&c->icosq); mutex_destroy(&c->icosq_recovery_lock); - mlx5e_close_async_icosq(c->async_icosq); + if (c->async_icosq) + mlx5e_close_async_icosq(c->async_icosq); if (c->xdp) mlx5e_close_cq(&c->rq_xdpsq.cq); mlx5e_close_cq(&c->rq.cq); @@ -2827,6 +2833,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, struct mlx5e_channel_param *cparam; struct mlx5_core_dev *mdev; struct mlx5e_xsk_param xsk; + bool async_icosq_needed; struct mlx5e_channel *c; unsigned int irq; int vec_ix; @@ -2876,7 +2883,8 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, netif_napi_add_config_locked(netdev, &c->napi, mlx5e_napi_poll, ix); netif_napi_set_irq_locked(&c->napi, irq); - err = mlx5e_open_queues(c, params, cparam); + async_icosq_needed = !!xsk_pool || priv->ktls_rx_was_enabled; + err = mlx5e_open_queues(c, params, cparam, async_icosq_needed); if (unlikely(err)) goto err_napi_del; @@ -2914,7 +2922,8 @@ static void mlx5e_activate_channel(struct mlx5e_channel *c) for (tc = 0; tc < c->num_tc; tc++) mlx5e_activate_txqsq(&c->sq[tc]); mlx5e_activate_icosq(&c->icosq); - mlx5e_activate_icosq(c->async_icosq); + if (c->async_icosq) + mlx5e_activate_icosq(c->async_icosq); if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)) mlx5e_activate_xsk(c); @@ -2935,7 +2944,8 @@ static void mlx5e_deactivate_channel(struct mlx5e_channel *c) else mlx5e_deactivate_rq(&c->rq); - mlx5e_deactivate_icosq(c->async_icosq); + if (c->async_icosq) + mlx5e_deactivate_icosq(c->async_icosq); mlx5e_deactivate_icosq(&c->icosq); for (tc = 0; tc < c->num_tc; tc++) mlx5e_deactivate_txqsq(&c->sq[tc]); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index 57c54265dbda..ec7391f38642 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -180,7 +180,7 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) busy |= work_done == budget; mlx5e_poll_ico_cq(&c->icosq.cq); - if (mlx5e_poll_ico_cq(&c->async_icosq->cq)) + if (c->async_icosq && mlx5e_poll_ico_cq(&c->async_icosq->cq)) /* Don't clear the flag if nothing was polled to prevent * queueing more WQEs and overflowing the async ICOSQ. */ @@ -237,7 +237,8 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) mlx5e_cq_arm(&rq->cq); mlx5e_cq_arm(&c->icosq.cq); - mlx5e_cq_arm(&c->async_icosq->cq); + if (c->async_icosq) + mlx5e_cq_arm(&c->async_icosq->cq); if (c->xdpsq) mlx5e_cq_arm(&c->xdpsq->cq); -- 2.31.1 The XDP features state might depend of the state of other features, like HW-LRO / HW-GRO. In general, move the re-evaluation announcement of the XDP features (xdp_set_features_flag_locked) into the flow where configuration gets changed. There's no point in updating them elsewhere. This is a more appropriate place, as this modifies the announced features while channels are inactive, which avoids the small interval between channel activation and the proper setting of the XDP features. Signed-off-by: Tariq Toukan Reviewed-by: Dragos Tatulea Reviewed-by: William Tu --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 2 +- .../net/ethernet/mellanox/mlx5/core/en_ethtool.c | 10 +--------- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 13 ++++++------- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 2 +- 4 files changed, 9 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index fea26a3a1c87..f857ed407cb0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -1288,7 +1288,7 @@ void mlx5e_netdev_attach_nic_profile(struct mlx5e_priv *priv); void mlx5e_set_netdev_mtu_boundaries(struct mlx5e_priv *priv); void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 mtu); -void mlx5e_set_xdp_feature(struct net_device *netdev); +void mlx5e_set_xdp_feature(struct mlx5e_priv *priv); netdev_features_t mlx5e_features_check(struct sk_buff *skb, struct net_device *netdev, netdev_features_t features); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 01b8f05a23db..73d567ccd289 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -2286,7 +2286,6 @@ static int set_pflag_rx_striding_rq(struct net_device *netdev, bool enable) struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; struct mlx5e_params new_params; - int err; if (enable) { /* Checking the regular RQ here; mlx5e_validate_xsk_param called @@ -2307,14 +2306,7 @@ static int set_pflag_rx_striding_rq(struct net_device *netdev, bool enable) MLX5E_SET_PFLAG(&new_params, MLX5E_PFLAG_RX_STRIDING_RQ, enable); mlx5e_set_rq_type(mdev, &new_params); - err = mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, true); - if (err) - return err; - - /* update XDP supported features */ - mlx5e_set_xdp_feature(netdev); - - return 0; + return mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, true); } static int set_pflag_rx_no_csum_complete(struct net_device *netdev, bool enable) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index d1dbba1a7a2f..078fd591c540 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3417,6 +3417,7 @@ static int mlx5e_switch_priv_params(struct mlx5e_priv *priv, } } + mlx5e_set_xdp_feature(priv); return 0; } @@ -3448,6 +3449,7 @@ static int mlx5e_switch_priv_channels(struct mlx5e_priv *priv, } } + mlx5e_set_xdp_feature(priv); if (!MLX5_CAP_GEN(priv->mdev, tis_tir_td_order)) mlx5e_close_channels(old_chs); priv->profile->update_rx(priv); @@ -4461,10 +4463,10 @@ static int mlx5e_handle_feature(struct net_device *netdev, return 0; } -void mlx5e_set_xdp_feature(struct net_device *netdev) +void mlx5e_set_xdp_feature(struct mlx5e_priv *priv) { - struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5e_params *params = &priv->channels.params; + struct net_device *netdev = priv->netdev; xdp_features_t val; if (!netdev->netdev_ops->ndo_bpf || @@ -4513,9 +4515,6 @@ int mlx5e_set_features(struct net_device *netdev, netdev_features_t features) return -EINVAL; } - /* update XDP supported features */ - mlx5e_set_xdp_feature(netdev); - return 0; } @@ -5911,7 +5910,7 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) netdev->netmem_tx = true; netif_set_tso_max_size(netdev, GSO_MAX_SIZE); - mlx5e_set_xdp_feature(netdev); + mlx5e_set_xdp_feature(priv); mlx5e_set_netdev_dev_addr(netdev); mlx5e_macsec_build_netdev(priv); mlx5e_ipsec_build_netdev(priv); @@ -6009,7 +6008,7 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev, mlx5e_psp_register(priv); /* update XDP supported features */ - mlx5e_set_xdp_feature(netdev); + mlx5e_set_xdp_feature(priv); if (take_rtnl) rtnl_unlock(); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 0335ca8277ef..ee9595109649 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -867,7 +867,7 @@ static void mlx5e_build_rep_params(struct net_device *netdev) if (take_rtnl) rtnl_lock(); /* update XDP supported features */ - mlx5e_set_xdp_feature(netdev); + mlx5e_set_xdp_feature(priv); if (take_rtnl) rtnl_unlock(); -- 2.31.1 Save per-channel resources in default. As no better API exist, make the XDP-redirect-target SQ available by loading a dummy XDP program. This improves the latency of interface up/down operations when feature is disabled. Perf numbers: NIC: Connect-X7. Setup: 248 channels. Interface up + down: Before: 2.246 secs After: 1.798 secs (1.25x faster) Signed-off-by: Tariq Toukan Reviewed-by: Dragos Tatulea Reviewed-by: William Tu --- .../net/ethernet/mellanox/mlx5/core/en_main.c | 23 +++++++++---------- 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 078fd591c540..23a0b50b9dbd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -2652,7 +2652,7 @@ static int mlx5e_open_queues(struct mlx5e_channel *c, if (err) goto err_close_icosq_cq; - if (netdev_ops->ndo_xdp_xmit) { + if (netdev_ops->ndo_xdp_xmit && c->xdp) { c->xdpsq = mlx5e_open_xdpredirect_sq(c, params, cparam, &ccp); if (IS_ERR(c->xdpsq)) { err = PTR_ERR(c->xdpsq); @@ -4467,19 +4467,18 @@ void mlx5e_set_xdp_feature(struct mlx5e_priv *priv) { struct mlx5e_params *params = &priv->channels.params; struct net_device *netdev = priv->netdev; - xdp_features_t val; + xdp_features_t val = 0; - if (!netdev->netdev_ops->ndo_bpf || - params->packet_merge.type != MLX5E_PACKET_MERGE_NONE) { - xdp_set_features_flag_locked(netdev, 0); - return; - } + if (netdev->netdev_ops->ndo_bpf && + params->packet_merge.type == MLX5E_PACKET_MERGE_NONE) + val = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_XSK_ZEROCOPY | + NETDEV_XDP_ACT_RX_SG; + + if (netdev->netdev_ops->ndo_xdp_xmit && params->xdp_prog) + val |= NETDEV_XDP_ACT_NDO_XMIT | + NETDEV_XDP_ACT_NDO_XMIT_SG; - val = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | - NETDEV_XDP_ACT_XSK_ZEROCOPY | - NETDEV_XDP_ACT_RX_SG | - NETDEV_XDP_ACT_NDO_XMIT | - NETDEV_XDP_ACT_NDO_XMIT_SG; xdp_set_features_flag_locked(netdev, val); } -- 2.31.1