From: Ben Ben-Ishay After the ULP consumed the buffers of the offloaded request, it calls the ddp_teardown op to release the NIC mapping for them and allow the NIC to reuse the HW contexts associated with offloading this IO. We do a fast/async un-mapping via UMR WQE. In this case, the ULP does holds off with completing the request towards the upper/application layers until the HW unmapping is done. When the corresponding CQE is received, a notification is done via the the teardown_done ddp callback advertised by the ULP in the ddp context. Signed-off-by: Ben Ben-Ishay Signed-off-by: Boris Pismenny Signed-off-by: Or Gerlitz Signed-off-by: Yoray Zack Signed-off-by: Aurelien Aptel Reviewed-by: Tariq Toukan --- .../net/ethernet/mellanox/mlx5/core/en/txrx.h | 4 ++ .../mellanox/mlx5/core/en_accel/nvmeotcp.c | 58 +++++++++++++++++-- .../mellanox/mlx5/core/en_accel/nvmeotcp.h | 1 + .../net/ethernet/mellanox/mlx5/core/en_rx.c | 6 ++ 4 files changed, 63 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h index 61e9a8539983..ca32a68a22f7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h @@ -73,6 +73,7 @@ enum mlx5e_icosq_wqe_type { #endif #ifdef CONFIG_MLX5_EN_NVMEOTCP MLX5E_ICOSQ_WQE_UMR_NVMEOTCP, + MLX5E_ICOSQ_WQE_UMR_NVMEOTCP_INVALIDATE, MLX5E_ICOSQ_WQE_SET_PSV_NVMEOTCP, #endif }; @@ -264,6 +265,9 @@ struct mlx5e_icosq_wqe_info { struct { struct mlx5e_nvmeotcp_queue *queue; } nvmeotcp_q; + struct { + struct mlx5e_nvmeotcp_queue_entry *entry; + } nvmeotcp_qe; #endif }; }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/nvmeotcp.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/nvmeotcp.c index 48dd242af2bb..639a9187d88c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/nvmeotcp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/nvmeotcp.c @@ -173,6 +173,13 @@ build_nvmeotcp_klm_umr(struct mlx5e_nvmeotcp_queue *queue, cseg->qpn_ds = cpu_to_be32((sqn << MLX5_WQE_CTRL_QPN_SHIFT) | ds_cnt); cseg->general_id = cpu_to_be32(id); + if (!klm_entries) { /* this is invalidate */ + ucseg->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE); + ucseg->flags = MLX5_UMR_INLINE; + mkc->status = MLX5_MKEY_STATUS_FREE; + return; + } + if (klm_type == KLM_UMR && !klm_offset) { ucseg->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_XLT_OCT_SIZE | MLX5_MKEY_MASK_LEN | @@ -285,8 +292,8 @@ build_nvmeotcp_static_params(struct mlx5e_nvmeotcp_queue *queue, static void mlx5e_nvmeotcp_fill_wi(struct mlx5e_nvmeotcp_queue *nvmeotcp_queue, - struct mlx5e_icosq *sq, u32 wqebbs, u16 pi, - enum wqe_type type) + struct mlx5e_icosq *sq, u32 wqebbs, + u16 pi, u16 ccid, enum wqe_type type) { struct mlx5e_icosq_wqe_info *wi = &sq->db.wqe_info[pi]; @@ -298,6 +305,10 @@ mlx5e_nvmeotcp_fill_wi(struct mlx5e_nvmeotcp_queue *nvmeotcp_queue, wi->wqe_type = MLX5E_ICOSQ_WQE_SET_PSV_NVMEOTCP; wi->nvmeotcp_q.queue = nvmeotcp_queue; break; + case KLM_INV_UMR: + wi->wqe_type = MLX5E_ICOSQ_WQE_UMR_NVMEOTCP_INVALIDATE; + wi->nvmeotcp_qe.entry = &nvmeotcp_queue->ccid_table[ccid]; + break; default: /* cases where no further action is required upon * completion, such as ddp setup @@ -319,7 +330,7 @@ mlx5e_nvmeotcp_rx_post_static_params_wqe(struct mlx5e_nvmeotcp_queue *queue, wqebbs = MLX5E_TRANSPORT_SET_STATIC_PARAMS_WQEBBS; pi = mlx5e_icosq_get_next_pi(sq, wqebbs); wqe = MLX5E_TRANSPORT_FETCH_SET_STATIC_PARAMS_WQE(sq, pi); - mlx5e_nvmeotcp_fill_wi(NULL, sq, wqebbs, pi, BSF_UMR); + mlx5e_nvmeotcp_fill_wi(NULL, sq, wqebbs, pi, 0, BSF_UMR); build_nvmeotcp_static_params(queue, wqe, resync_seq, queue->crc_rx); sq->pc += wqebbs; mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, &wqe->ctrl); @@ -337,7 +348,7 @@ mlx5e_nvmeotcp_rx_post_progress_params_wqe(struct mlx5e_nvmeotcp_queue *queue, wqebbs = MLX5E_NVMEOTCP_PROGRESS_PARAMS_WQEBBS; pi = mlx5e_icosq_get_next_pi(sq, wqebbs); wqe = MLX5E_NVMEOTCP_FETCH_PROGRESS_PARAMS_WQE(sq, pi); - mlx5e_nvmeotcp_fill_wi(queue, sq, wqebbs, pi, SET_PSV_UMR); + mlx5e_nvmeotcp_fill_wi(queue, sq, wqebbs, pi, 0, SET_PSV_UMR); build_nvmeotcp_progress_params(queue, wqe, seq); sq->pc += wqebbs; mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, &wqe->ctrl); @@ -363,7 +374,7 @@ post_klm_wqe(struct mlx5e_nvmeotcp_queue *queue, wqebbs = DIV_ROUND_UP(wqe_sz, MLX5_SEND_WQE_BB); pi = mlx5e_icosq_get_next_pi(sq, wqebbs); wqe = MLX5E_NVMEOTCP_FETCH_KLM_WQE(sq, pi); - mlx5e_nvmeotcp_fill_wi(queue, sq, wqebbs, pi, wqe_type); + mlx5e_nvmeotcp_fill_wi(queue, sq, wqebbs, pi, ccid, wqe_type); build_nvmeotcp_klm_umr(queue, wqe, ccid, cur_klm_entries, klm_offset, klm_length, wqe_type); sq->pc += wqebbs; @@ -378,7 +389,10 @@ mlx5e_nvmeotcp_post_klm_wqe(struct mlx5e_nvmeotcp_queue *queue, struct mlx5e_icosq *sq = &queue->sq; u32 klm_offset = 0, wqes, i; - wqes = DIV_ROUND_UP(klm_length, queue->max_klms_per_wqe); + if (wqe_type == KLM_INV_UMR) + wqes = 1; + else + wqes = DIV_ROUND_UP(klm_length, queue->max_klms_per_wqe); spin_lock_bh(&queue->sq_lock); @@ -905,12 +919,44 @@ void mlx5e_nvmeotcp_ctx_complete(struct mlx5e_icosq_wqe_info *wi) complete(&queue->static_params_done); } +void mlx5e_nvmeotcp_ddp_inv_done(struct mlx5e_icosq_wqe_info *wi) +{ + struct mlx5e_nvmeotcp_queue_entry *q_entry = wi->nvmeotcp_qe.entry; + struct mlx5e_nvmeotcp_queue *queue = q_entry->queue; + struct mlx5_core_dev *mdev = queue->priv->mdev; + struct ulp_ddp_io *ddp = q_entry->ddp; + const struct ulp_ddp_ulp_ops *ulp_ops; + + dma_unmap_sg(mdev->device, ddp->sg_table.sgl, + ddp->nents, DMA_FROM_DEVICE); + + q_entry->sgl_length = 0; + + ulp_ops = inet_csk(queue->sk)->icsk_ulp_ddp_ops; + if (ulp_ops && ulp_ops->ddp_teardown_done) + ulp_ops->ddp_teardown_done(q_entry->ddp_ctx); +} + static void mlx5e_nvmeotcp_ddp_teardown(struct net_device *netdev, struct sock *sk, struct ulp_ddp_io *ddp, void *ddp_ctx) { + struct mlx5e_nvmeotcp_queue_entry *q_entry; + struct mlx5e_nvmeotcp_queue *queue; + + queue = container_of(ulp_ddp_get_ctx(sk), struct mlx5e_nvmeotcp_queue, + ulp_ddp_ctx); + q_entry = &queue->ccid_table[ddp->command_id]; + WARN_ONCE(q_entry->sgl_length == 0, + "Invalidation of empty sgl (CID 0x%x, queue 0x%x)\n", + ddp->command_id, queue->id); + + q_entry->ddp_ctx = ddp_ctx; + q_entry->queue = queue; + + mlx5e_nvmeotcp_post_klm_wqe(queue, KLM_INV_UMR, ddp->command_id, 0); } static void diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/nvmeotcp.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/nvmeotcp.h index 4850c19e18c7..67805adc6fdf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/nvmeotcp.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/nvmeotcp.h @@ -113,6 +113,7 @@ void mlx5e_nvmeotcp_cleanup(struct mlx5e_priv *priv); struct mlx5e_nvmeotcp_queue * mlx5e_nvmeotcp_get_queue(struct mlx5e_nvmeotcp *nvmeotcp, int id); void mlx5e_nvmeotcp_put_queue(struct mlx5e_nvmeotcp_queue *queue); +void mlx5e_nvmeotcp_ddp_inv_done(struct mlx5e_icosq_wqe_info *wi); void mlx5e_nvmeotcp_ctx_complete(struct mlx5e_icosq_wqe_info *wi); static inline void mlx5e_nvmeotcp_init_rx(struct mlx5e_priv *priv) {} void mlx5e_nvmeotcp_cleanup_rx(struct mlx5e_priv *priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 3823844b08c0..51b24b0525af 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -963,6 +963,9 @@ void mlx5e_free_icosq_descs(struct mlx5e_icosq *sq) break; #endif #ifdef CONFIG_MLX5_EN_NVMEOTCP + case MLX5E_ICOSQ_WQE_UMR_NVMEOTCP_INVALIDATE: + mlx5e_nvmeotcp_ddp_inv_done(wi); + break; case MLX5E_ICOSQ_WQE_SET_PSV_NVMEOTCP: mlx5e_nvmeotcp_ctx_complete(wi); break; @@ -1073,6 +1076,9 @@ int mlx5e_poll_ico_cq(struct mlx5e_cq *cq, int budget) #ifdef CONFIG_MLX5_EN_NVMEOTCP case MLX5E_ICOSQ_WQE_UMR_NVMEOTCP: break; + case MLX5E_ICOSQ_WQE_UMR_NVMEOTCP_INVALIDATE: + mlx5e_nvmeotcp_ddp_inv_done(wi); + break; case MLX5E_ICOSQ_WQE_SET_PSV_NVMEOTCP: mlx5e_nvmeotcp_ctx_complete(wi); break; -- 2.34.1