From: Cosmin Ratiu The driver allocates a single doorbell per device and uses it for all Send Queues (SQs). This can become a bottleneck due to the high number of concurrent MMIO accesses when ringing the same doorbell from many channels. This patch makes the doorbells used by channel queues configurable. mlx5e_channel_pick_doorbell() is added to select the doorbell to be used for a given channel, picking the default for now. When opening a channel, the selected doorbell is saved to the channel struct and used whenever channel-related queues are created. Finally, 'uar_page' is added to 'struct mlx5e_create_sq_param' to control which doorbell to use when allocating an SQ, since that can happen outside channel context (e.g. for PTP). Signed-off-by: Cosmin Ratiu Reviewed-by: Dragos Tatulea Signed-off-by: Tariq Toukan --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 1 + .../ethernet/mellanox/mlx5/core/en/params.h | 1 + .../net/ethernet/mellanox/mlx5/core/en/ptp.c | 4 +++- .../net/ethernet/mellanox/mlx5/core/en/ptp.h | 1 + .../net/ethernet/mellanox/mlx5/core/en_main.c | 18 ++++++++++++++---- 5 files changed, 20 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 0dd3bc0f4caa..9c73165653bf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -788,6 +788,7 @@ struct mlx5e_channel { int vec_ix; int sd_ix; int cpu; + struct mlx5_sq_bfreg *bfreg; /* Sync between icosq recovery and XSK enable/disable. */ struct mutex icosq_recovery_lock; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h index e3edf79dde5f..00617c65fe3c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h @@ -51,6 +51,7 @@ struct mlx5e_create_sq_param { u32 tisn; u8 tis_lst_sz; u8 min_inline_mode; + u32 uar_page; }; /* Striding RQ dynamic parameters */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c index 7c1d9a9ea464..a392578a063c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c @@ -334,7 +334,7 @@ static int mlx5e_ptp_alloc_txqsq(struct mlx5e_ptp *c, int txq_ix, sq->mdev = mdev; sq->ch_ix = MLX5E_PTP_CHANNEL_IX; sq->txq_ix = txq_ix; - sq->uar_map = mdev->priv.bfreg.map; + sq->uar_map = c->bfreg->map; sq->min_inline_mode = params->tx_min_inline_mode; sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); sq->stats = &c->priv->ptp_stats.sq[tc]; @@ -486,6 +486,7 @@ static int mlx5e_ptp_open_txqsq(struct mlx5e_ptp *c, u32 tisn, csp.wq_ctrl = &txqsq->wq_ctrl; csp.min_inline_mode = txqsq->min_inline_mode; csp.ts_cqe_to_dest_cqn = ptpsq->ts_cq.mcq.cqn; + csp.uar_page = c->bfreg->index; err = mlx5e_create_sq_rdy(c->mdev, sqp, &csp, 0, &txqsq->sqn); if (err) @@ -900,6 +901,7 @@ int mlx5e_ptp_open(struct mlx5e_priv *priv, struct mlx5e_params *params, c->num_tc = mlx5e_get_dcb_num_tc(params); c->stats = &priv->ptp_stats.ch; c->lag_port = lag_port; + c->bfreg = &mdev->priv.bfreg; err = mlx5e_ptp_set_state(c, params); if (err) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h index 883c044852f1..1b3c9648220b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h @@ -66,6 +66,7 @@ struct mlx5e_ptp { struct mlx5_core_dev *mdev; struct hwtstamp_config *tstamp; DECLARE_BITMAP(state, MLX5E_PTP_STATE_NUM_STATES); + struct mlx5_sq_bfreg *bfreg; }; static inline bool mlx5e_use_ptpsq(struct sk_buff *skb) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 02a538ec2ecb..0425f0e3d3a0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1532,7 +1532,7 @@ static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c, sq->pdev = c->pdev; sq->mkey_be = c->mkey_be; sq->channel = c; - sq->uar_map = mdev->priv.bfreg.map; + sq->uar_map = c->bfreg->map; sq->min_inline_mode = params->tx_min_inline_mode; sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu) - ETH_FCS_LEN; sq->xsk_pool = xsk_pool; @@ -1617,7 +1617,7 @@ static int mlx5e_alloc_icosq(struct mlx5e_channel *c, int err; sq->channel = c; - sq->uar_map = mdev->priv.bfreg.map; + sq->uar_map = c->bfreg->map; sq->reserved_room = param->stop_room; param->wq.db_numa_node = cpu_to_node(c->cpu); @@ -1702,7 +1702,7 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, sq->priv = c->priv; sq->ch_ix = c->ix; sq->txq_ix = txq_ix; - sq->uar_map = mdev->priv.bfreg.map; + sq->uar_map = c->bfreg->map; sq->min_inline_mode = params->tx_min_inline_mode; sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); sq->max_sq_mpw_wqebbs = mlx5e_get_max_sq_aligned_wqebbs(mdev); @@ -1778,7 +1778,7 @@ static int mlx5e_create_sq(struct mlx5_core_dev *mdev, MLX5_SET(sqc, sqc, flush_in_error_en, 1); MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); - MLX5_SET(wq, wq, uar_page, mdev->priv.bfreg.index); + MLX5_SET(wq, wq, uar_page, csp->uar_page); MLX5_SET(wq, wq, log_wq_pg_sz, csp->wq_ctrl->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); MLX5_SET64(wq, wq, dbr_addr, csp->wq_ctrl->db.dma); @@ -1882,6 +1882,7 @@ int mlx5e_open_txqsq(struct mlx5e_channel *c, u32 tisn, int txq_ix, csp.cqn = sq->cq.mcq.cqn; csp.wq_ctrl = &sq->wq_ctrl; csp.min_inline_mode = sq->min_inline_mode; + csp.uar_page = c->bfreg->index; err = mlx5e_create_sq_rdy(c->mdev, param, &csp, qos_queue_group_id, &sq->sqn); if (err) goto err_free_txqsq; @@ -2052,6 +2053,7 @@ static int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params csp.cqn = sq->cq.mcq.cqn; csp.wq_ctrl = &sq->wq_ctrl; csp.min_inline_mode = params->tx_min_inline_mode; + csp.uar_page = c->bfreg->index; err = mlx5e_create_sq_rdy(c->mdev, param, &csp, 0, &sq->sqn); if (err) goto err_free_icosq; @@ -2112,6 +2114,7 @@ int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params, csp.cqn = sq->cq.mcq.cqn; csp.wq_ctrl = &sq->wq_ctrl; csp.min_inline_mode = sq->min_inline_mode; + csp.uar_page = c->bfreg->index; set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); err = mlx5e_create_sq_rdy(c->mdev, param, &csp, 0, &sq->sqn); @@ -2740,6 +2743,11 @@ void mlx5e_trigger_napi_sched(struct napi_struct *napi) local_bh_enable(); } +static void mlx5e_channel_pick_doorbell(struct mlx5e_channel *c) +{ + c->bfreg = &c->mdev->priv.bfreg; +} + static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, struct mlx5e_params *params, struct xsk_buff_pool *xsk_pool, @@ -2794,6 +2802,8 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, c->aff_mask = irq_get_effective_affinity_mask(irq); c->lag_port = mlx5e_enumerate_lag_port(mdev, ix); + mlx5e_channel_pick_doorbell(c); + netif_napi_add_config_locked(netdev, &c->napi, mlx5e_napi_poll, ix); netif_napi_set_irq_locked(&c->napi, irq); -- 2.31.1