Query dma-buf TPH metadata when registering a dma-buf MR for peer-to-peer access and translate the returned steering tag into an mlx5 ST index. The DMAH path keeps priority; dma-buf metadata is the fallback when no DMAH is supplied. Track per-MR ownership of the allocated ST index and release it on MR setup failure, destroy, and before re-entering the FRMR pool. Free mlx5_st_idx_data when its refcount reaches zero to fix a pre-existing leak in mlx5_st_dealloc_index(). Signed-off-by: Zhiping Zhang --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 6 ++ drivers/infiniband/hw/mlx5/mr.c | 86 ++++++++++++++++++- .../net/ethernet/mellanox/mlx5/core/lib/st.c | 28 ++++-- include/linux/mlx5/driver.h | 7 ++ 4 files changed, 115 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index e156dc4d7529..4ab867392267 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -721,6 +721,12 @@ struct mlx5_ib_mr { u8 revoked :1; /* Indicates previous dmabuf page fault occurred */ u8 dmabuf_faulted:1; + /* Set when the MR owns dmabuf_st_index and must + * release it via mlx5_st_dealloc_index() once the + * firmware mkey is no longer referencing it. + */ + u8 dmabuf_st_owned:1; + u16 dmabuf_st_index; struct mlx5_ib_mkey null_mmkey; }; }; diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 3b6da45061a5..8059b5e4da97 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include "dm.h" @@ -46,6 +47,8 @@ #include "data_direct.h" #include "dmah.h" +MODULE_IMPORT_NS("DMA_BUF"); + static int mkey_max_umr_order(struct mlx5_ib_dev *dev) { if (MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset)) @@ -899,6 +902,63 @@ static struct dma_buf_attach_ops mlx5_ib_dmabuf_attach_ops = { .invalidate_mappings = mlx5_ib_dmabuf_invalidate_cb, }; +/* + * Query TPH metadata from @dmabuf and translate the raw steering tag into + * an mlx5 ST index. On success, returns 0 and the caller becomes the + * owner of *@st_index (must be released with mlx5_st_dealloc_index() + * once the firmware mkey no longer references it). On any failure + * *@st_index and *@ph are left as the no-TPH defaults set by the caller. + * + * @dmabuf must already be referenced by the caller (e.g. via the umem's + * attachment) so we don't re-resolve the user's fd here and avoid a + * dup2() TOCTOU between umem creation and TPH lookup. + */ +static void get_tph_mr_dmabuf(struct mlx5_ib_dev *dev, struct dma_buf *dmabuf, + u16 *st_index, u8 *ph) +{ + u8 req_type; + u16 steering_tag; + u8 st_width; + int ret; + + if (!dmabuf->ops->get_tph) + return; + + req_type = pcie_tph_enabled_req_type(dev->mdev->pdev); + switch (req_type) { + case PCI_TPH_REQ_TPH_ONLY: + st_width = 8; + break; + case PCI_TPH_REQ_EXT_TPH: + st_width = 16; + break; + default: + return; + } + + ret = dmabuf->ops->get_tph(dmabuf, &steering_tag, ph, st_width); + if (ret) { + mlx5_ib_dbg(dev, "get_tph failed (%d)\n", ret); + *ph = MLX5_IB_NO_PH; + return; + } + + ret = mlx5_st_alloc_index_by_tag(dev->mdev, steering_tag, st_index); + if (ret) { + *ph = MLX5_IB_NO_PH; + mlx5_ib_dbg(dev, "st_alloc_index_by_tag failed (%d)\n", ret); + } +} + +static void mlx5_ib_mr_put_dmabuf_st(struct mlx5_ib_mr *mr) +{ + if (mr->umem && mr->dmabuf_st_owned) { + mlx5_st_dealloc_index(mr_to_mdev(mr)->mdev, + mr->dmabuf_st_index); + mr->dmabuf_st_owned = 0; + } +} + static struct ib_mr * reg_user_mr_dmabuf(struct ib_pd *pd, struct device *dma_device, u64 offset, u64 length, u64 virt_addr, @@ -941,16 +1001,26 @@ reg_user_mr_dmabuf(struct ib_pd *pd, struct device *dma_device, ph = dmah->ph; if (dmah->valid_fields & BIT(IB_DMAH_CPU_ID_EXISTS)) st_index = mdmah->st_index; + } else { + get_tph_mr_dmabuf(dev, umem_dmabuf->attach->dmabuf, + &st_index, &ph); } mr = alloc_cacheable_mr(pd, &umem_dmabuf->umem, virt_addr, access_flags, access_mode, st_index, ph); if (IS_ERR(mr)) { + if (!dmah && st_index != MLX5_MKC_PCIE_TPH_NO_STEERING_TAG_INDEX) + mlx5_st_dealloc_index(dev->mdev, st_index); ib_umem_release(&umem_dmabuf->umem); return ERR_CAST(mr); } + if (!dmah && st_index != MLX5_MKC_PCIE_TPH_NO_STEERING_TAG_INDEX) { + mr->dmabuf_st_index = st_index; + mr->dmabuf_st_owned = 1; + } + mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key); atomic_add(ib_umem_num_pages(mr->umem), &dev->mdev->priv.reg_pages); @@ -1377,9 +1447,17 @@ static int mlx5r_handle_mkey_cleanup(struct mlx5_ib_mr *mr) bool is_odp = is_odp_mr(mr); int ret; - if (mr->ibmr.frmr.pool && !mlx5_umr_revoke_mr_with_lock(mr) && - !ib_frmr_pool_push(mr->ibmr.device, &mr->ibmr)) - return 0; + if (mr->ibmr.frmr.pool && !mlx5_umr_revoke_mr_with_lock(mr)) { + /* + * The mkey has been revoked: firmware no longer references + * dmabuf_st_index, so release it before this mr can re-enter + * the FRMR cache for reuse by another registration. + */ + mlx5_ib_mr_put_dmabuf_st(mr); + + if (!ib_frmr_pool_push(mr->ibmr.device, &mr->ibmr)) + return 0; + } if (is_odp) mutex_lock(&to_ib_umem_odp(mr->umem)->umem_mutex); @@ -1400,6 +1478,8 @@ static int mlx5r_handle_mkey_cleanup(struct mlx5_ib_mr *mr) dma_resv_unlock( to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv); } + if (!ret) + mlx5_ib_mr_put_dmabuf_st(mr); return ret; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/st.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/st.c index 997be91f0a13..8929c17c88bc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/st.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/st.c @@ -29,7 +29,7 @@ struct mlx5_st *mlx5_st_create(struct mlx5_core_dev *dev) u8 direct_mode = 0; u16 num_entries; u32 tbl_loc; - int ret; + int ret = 0; if (!MLX5_CAP_GEN(dev, mkey_pcie_tph)) return NULL; @@ -92,23 +92,18 @@ void mlx5_st_destroy(struct mlx5_core_dev *dev) kfree(st); } -int mlx5_st_alloc_index(struct mlx5_core_dev *dev, enum tph_mem_type mem_type, - unsigned int cpu_uid, u16 *st_index) +int mlx5_st_alloc_index_by_tag(struct mlx5_core_dev *dev, u16 tag, + u16 *st_index) { struct mlx5_st_idx_data *idx_data; struct mlx5_st *st = dev->st; unsigned long index; u32 xa_id; - u16 tag; - int ret; + int ret = 0; if (!st) return -EOPNOTSUPP; - ret = pcie_tph_get_cpu_st(dev->pdev, mem_type, cpu_uid, &tag); - if (ret) - return ret; - if (st->direct_mode) { *st_index = tag; return 0; @@ -152,6 +147,20 @@ int mlx5_st_alloc_index(struct mlx5_core_dev *dev, enum tph_mem_type mem_type, mutex_unlock(&st->lock); return ret; } +EXPORT_SYMBOL_GPL(mlx5_st_alloc_index_by_tag); + +int mlx5_st_alloc_index(struct mlx5_core_dev *dev, enum tph_mem_type mem_type, + unsigned int cpu_uid, u16 *st_index) +{ + u16 tag; + int ret; + + ret = pcie_tph_get_cpu_st(dev->pdev, mem_type, cpu_uid, &tag); + if (ret) + return ret; + + return mlx5_st_alloc_index_by_tag(dev, tag, st_index); +} EXPORT_SYMBOL_GPL(mlx5_st_alloc_index); int mlx5_st_dealloc_index(struct mlx5_core_dev *dev, u16 st_index) @@ -175,6 +184,7 @@ int mlx5_st_dealloc_index(struct mlx5_core_dev *dev, u16 st_index) if (refcount_dec_and_test(&idx_data->usecount)) { xa_erase(&st->idx_xa, st_index); + kfree(idx_data); /* We leave PCI config space as was before, no mkey will refer to it */ } diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 04b96c5abb57..523a9ab0ae1e 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1166,10 +1166,17 @@ int mlx5_dm_sw_icm_dealloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type u64 length, u16 uid, phys_addr_t addr, u32 obj_id); #ifdef CONFIG_PCIE_TPH +int mlx5_st_alloc_index_by_tag(struct mlx5_core_dev *dev, u16 tag, + u16 *st_index); int mlx5_st_alloc_index(struct mlx5_core_dev *dev, enum tph_mem_type mem_type, unsigned int cpu_uid, u16 *st_index); int mlx5_st_dealloc_index(struct mlx5_core_dev *dev, u16 st_index); #else +static inline int mlx5_st_alloc_index_by_tag(struct mlx5_core_dev *dev, + u16 tag, u16 *st_index) +{ + return -EOPNOTSUPP; +} static inline int mlx5_st_alloc_index(struct mlx5_core_dev *dev, enum tph_mem_type mem_type, unsigned int cpu_uid, u16 *st_index) -- 2.53.0-Meta