Applying the corrupted patch by hand mangled the format string, put the s in the right place. Cc: stable@vger.kernel.org Fixes: 654a27f25530 ("RDMA/ionic: bound node_desc sysfs read with %.64s") Reported-by: Brad Spengler Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/ionic/ionic_ibdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/ionic/ionic_ibdev.c b/drivers/infiniband/hw/ionic/ionic_ibdev.c index 0382a64839d26a..73a616ae350236 100644 --- a/drivers/infiniband/hw/ionic/ionic_ibdev.c +++ b/drivers/infiniband/hw/ionic/ionic_ibdev.c @@ -185,7 +185,7 @@ static ssize_t hca_type_show(struct device *device, struct ionic_ibdev *dev = rdma_device_to_drv_device(device, struct ionic_ibdev, ibdev); - return sysfs_emit(buf, "%s.64\n", dev->ibdev.node_desc); + return sysfs_emit(buf, "%.64s\n", dev->ibdev.node_desc); } static DEVICE_ATTR_RO(hca_type); -- 2.43.0 Sashiko points out the check for inlen==0 got missed, the ={} was not redundant, put it back. Fixes: a9cd442a5347 ("RDMA: Remove redundant = {} for udata req structs") Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 8f50e7342a7694..76d4021475f49b 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -4692,7 +4692,7 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, struct mlx5_ib_dev *dev = to_mdev(ibqp->device); struct mlx5_ib_modify_qp_resp resp = {}; struct mlx5_ib_qp *qp = to_mqp(ibqp); - struct mlx5_ib_modify_qp ucmd; + struct mlx5_ib_modify_qp ucmd = {}; enum ib_qp_type qp_type; enum ib_qp_state cur_state, new_state; int err = -EINVAL; -- 2.43.0 mlx5 has a common pattern implementing a device-global singleton resource where it checks the resource pointer for !NULL and then skips obtaining the lock. This is not ordered properly as observing !NULL doesn't mean that all the data under that pointer is also visible on this CPU when the lock is not taken. Use a release/acquire pairing to explicitly manage this. Pointed out by sashiko, Codex found more cases. Fixes: 5895e70f2e6e ("IB/mlx5: Allocate resources just before first QP/SRQ is created") Fixes: 638420115cc4 ("IB/mlx5: Create UMR QP just before first reg_mr occurs") Link: https://sashiko.dev/#/patchset/SYBPR01MB7881E1E0970268BD69C0BA75AF2B2%40SYBPR01MB7881.ausprd01.prod.outlook.com Assisted-by: Codex:GPT-5.5 Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 8 ++++---- drivers/infiniband/hw/mlx5/umr.c | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 109661c2ac12b0..73fab8a376933d 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -3310,7 +3310,7 @@ int mlx5_ib_dev_res_cq_init(struct mlx5_ib_dev *dev) * devr->c0 is set once, never changed until device unload. * Avoid taking the mutex if initialization is already done. */ - if (devr->c0) + if (smp_load_acquire(&devr->c0)) return 0; mutex_lock(&devr->cq_lock); @@ -3336,7 +3336,7 @@ int mlx5_ib_dev_res_cq_init(struct mlx5_ib_dev *dev) } devr->p0 = pd; - devr->c0 = cq; + smp_store_release(&devr->c0, cq); unlock: mutex_unlock(&devr->cq_lock); @@ -3354,7 +3354,7 @@ int mlx5_ib_dev_res_srq_init(struct mlx5_ib_dev *dev) * devr->s1 is set once, never changed until device unload. * Avoid taking the mutex if initialization is already done. */ - if (devr->s1) + if (smp_load_acquire(&devr->s1)) return 0; mutex_lock(&devr->srq_lock); @@ -3395,7 +3395,7 @@ int mlx5_ib_dev_res_srq_init(struct mlx5_ib_dev *dev) } devr->s0 = s0; - devr->s1 = s1; + smp_store_release(&devr->s1, s1); unlock: mutex_unlock(&devr->srq_lock); diff --git a/drivers/infiniband/hw/mlx5/umr.c b/drivers/infiniband/hw/mlx5/umr.c index 29488fba21a034..f2139474be3751 100644 --- a/drivers/infiniband/hw/mlx5/umr.c +++ b/drivers/infiniband/hw/mlx5/umr.c @@ -147,7 +147,7 @@ int mlx5r_umr_resource_init(struct mlx5_ib_dev *dev) * UMR qp is set once, never changed until device unload. * Avoid taking the mutex if initialization is already done. */ - if (dev->umrc.qp) + if (smp_load_acquire(&dev->umrc.qp)) return 0; mutex_lock(&dev->umrc.init_lock); @@ -185,7 +185,7 @@ int mlx5r_umr_resource_init(struct mlx5_ib_dev *dev) sema_init(&dev->umrc.sem, MAX_UMR_WR); mutex_init(&dev->umrc.lock); dev->umrc.state = MLX5_UMR_STATE_ACTIVE; - dev->umrc.qp = qp; + smp_store_release(&dev->umrc.qp, qp); mutex_unlock(&dev->umrc.init_lock); return 0; -- 2.43.0 Sashiko points out that rx_hash_key_len comes from a uAPI structure and is blindly passed to memcpy, allowing the userspace to trash kernel memory. Bounds check it so the memcpy cannot overflow. Cc: stable@vger.kernel.org Fixes: 0266a177631d ("RDMA/mana_ib: Add a driver for Microsoft Azure Network Adapter") Link: https://sashiko.dev/#/patchset/0-v2-1c49eeb88c48%2B91-rdma_udata_rep_jgg%40nvidia.com?part=1 Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mana/qp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/infiniband/hw/mana/qp.c b/drivers/infiniband/hw/mana/qp.c index 645581359cee0b..f7bb0d1f0f8034 100644 --- a/drivers/infiniband/hw/mana/qp.c +++ b/drivers/infiniband/hw/mana/qp.c @@ -21,6 +21,9 @@ static int mana_ib_cfg_vport_steering(struct mana_ib_dev *dev, gc = mdev_to_gc(dev); + if (rx_hash_key_len > sizeof(req->hashkey)) + return -EINVAL; + req_buf_size = struct_size(req, indir_tab, MANA_INDIRECT_TABLE_DEF_SIZE); req = kzalloc(req_buf_size, GFP_KERNEL); if (!req) -- 2.43.0 Sashiko points out that the user can specify WQs sharing the same CQ as a part of the uAPI and this will trigger the WARN_ON() then go on to corrupt the kernel. Just reject it outright and fail the QP creation. Cc: stable@vger.kernel.org Fixes: c15d7802a424 ("RDMA/mana_ib: Add CQ interrupt support for RAW QP") Link: https://sashiko.dev/#/patchset/0-v2-1c49eeb88c48%2B91-rdma_udata_rep_jgg%40nvidia.com?part=1 Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mana/cq.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mana/cq.c b/drivers/infiniband/hw/mana/cq.c index f4cbe21763bf11..2d682428ef202a 100644 --- a/drivers/infiniband/hw/mana/cq.c +++ b/drivers/infiniband/hw/mana/cq.c @@ -137,8 +137,9 @@ int mana_ib_install_cq_cb(struct mana_ib_dev *mdev, struct mana_ib_cq *cq) if (cq->queue.id >= gc->max_num_cqs) return -EINVAL; - /* Create CQ table entry */ - WARN_ON(gc->cq_table[cq->queue.id]); + /* Create CQ table entry, sharing a CQ between WQs is not supported */ + if (gc->cq_table[cq->queue.id]) + return -EINVAL; if (cq->queue.kmem) gdma_cq = cq->queue.kmem; else -- 2.43.0 Sashiko points out there are two bugs here in the error unwind flow, both related to how the WQ table is unwound. First there is a double i-- on the first failure path due to the while loop having a i--, remove it. Second if mana_ib_install_cq_cb() fails then mana_create_wq_obj() is not undone due to the above i--. Cc: stable@vger.kernel.org Fixes: c15d7802a424 ("RDMA/mana_ib: Add CQ interrupt support for RAW QP") Link: https://sashiko.dev/#/patchset/0-v2-1c49eeb88c48%2B91-rdma_udata_rep_jgg%40nvidia.com?part=1 Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mana/qp.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/mana/qp.c b/drivers/infiniband/hw/mana/qp.c index f7bb0d1f0f8034..8e1f052d0ec976 100644 --- a/drivers/infiniband/hw/mana/qp.c +++ b/drivers/infiniband/hw/mana/qp.c @@ -176,11 +176,8 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd, ret = mana_create_wq_obj(mpc, mpc->port_handle, GDMA_RQ, &wq_spec, &cq_spec, &wq->rx_object); - if (ret) { - /* Do cleanup starting with index i-1 */ - i--; + if (ret) goto fail; - } /* The GDMA regions are now owned by the WQ object */ wq->queue.gdma_region = GDMA_INVALID_DMA_REGION; @@ -200,8 +197,10 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd, /* Create CQ table entry */ ret = mana_ib_install_cq_cb(mdev, cq); - if (ret) + if (ret) { + mana_destroy_wq_obj(mpc, GDMA_RQ, wq->rx_object); goto fail; + } } resp.num_entries = i; -- 2.43.0 Sashiko points out that mana_ib_cfg_vport_steering() is leaked, the normal destroy path cleans it up. Cc: stable@vger.kernel.org Fixes: 0266a177631d ("RDMA/mana_ib: Add a driver for Microsoft Azure Network Adapter") Link: https://sashiko.dev/#/patchset/0-v1-e911b76a94d1%2B65d95-rdma_udata_rep_jgg%40nvidia.com?part=4 Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mana/qp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mana/qp.c b/drivers/infiniband/hw/mana/qp.c index 8e1f052d0ec976..0fbcf449c134b5 100644 --- a/drivers/infiniband/hw/mana/qp.c +++ b/drivers/infiniband/hw/mana/qp.c @@ -217,13 +217,15 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd, ibdev_dbg(&mdev->ib_dev, "Failed to copy to udata create rss-qp, %d\n", ret); - goto fail; + goto err_disable_vport_rx; } kfree(mana_ind_table); return 0; +err_disable_vport_rx: + mana_disable_vport_rx(mpc); fail: while (i-- > 0) { ibwq = ind_tbl->ind_tbl[i]; -- 2.43.0 The intention of this code is to find matching entries exactly, the driver never creates phys_addr's with different lens so the current expression is not a bug, but it doesn't make sense and confuses review tooling. Search for exact match instead. Link: https://sashiko.dev/#/patchset/0-v1-e911b76a94d1%2B65d95-rdma_udata_rep_jgg%40nvidia.com?part=4 Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index c17e2a54dbcaf9..463c9a5703fc4e 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -215,7 +215,7 @@ static void ocrdma_del_mmap(struct ocrdma_ucontext *uctx, u64 phy_addr, mutex_lock(&uctx->mm_list_lock); list_for_each_entry_safe(mm, tmp, &uctx->mm_head, entry) { - if (len != mm->key.len && phy_addr != mm->key.phy_addr) + if (len != mm->key.len || phy_addr != mm->key.phy_addr) continue; list_del(&mm->entry); @@ -233,7 +233,7 @@ static bool ocrdma_search_mmap(struct ocrdma_ucontext *uctx, u64 phy_addr, mutex_lock(&uctx->mm_list_lock); list_for_each_entry(mm, &uctx->mm_head, entry) { - if (len != mm->key.len && phy_addr != mm->key.phy_addr) + if (len != mm->key.len || phy_addr != mm->key.phy_addr) continue; found = true; -- 2.43.0 Sashiko points out that pd->uctx isn't initialized until late in the function so all these error flow references are NULL and will crash. Use the uctx that isn't NULL. Cc: stable@vger.kernel.org Fixes: fe2caefcdf58 ("RDMA/ocrdma: Add driver for Emulex OneConnect IBoE RDMA adapter") Link: https://sashiko.dev/#/patchset/0-v1-e911b76a94d1%2B65d95-rdma_udata_rep_jgg%40nvidia.com?part=4 Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 463c9a5703fc4e..a88cc5d84af828 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -620,9 +620,9 @@ static int ocrdma_copy_pd_uresp(struct ocrdma_dev *dev, struct ocrdma_pd *pd, ucopy_err: if (pd->dpp_enabled) - ocrdma_del_mmap(pd->uctx, dpp_page_addr, PAGE_SIZE); + ocrdma_del_mmap(uctx, dpp_page_addr, PAGE_SIZE); dpp_map_err: - ocrdma_del_mmap(pd->uctx, db_page_addr, db_page_size); + ocrdma_del_mmap(uctx, db_page_addr, db_page_size); return status; } -- 2.43.0 Sashiko points out that pvrdma_uar_free() is already called within pvrdma_dealloc_ucontext(), so calling it before triggers a double free. Cc: stable@vger.kernel.org Fixes: 29c8d9eba550 ("IB: Add vmw_pvrdma driver") Link: https://sashiko.dev/#/patchset/0-v1-e911b76a94d1%2B65d95-rdma_udata_rep_jgg%40nvidia.com?part=4 Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c index bcd43dc30e21c6..c7c2b41060e526 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c @@ -322,7 +322,7 @@ int pvrdma_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata) uresp.qp_tab_size = vdev->dsr->caps.max_qp; ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); if (ret) { - pvrdma_uar_free(vdev, &context->uar); + /* pvrdma_dealloc_ucontext() also frees the UAR */ pvrdma_dealloc_ucontext(&context->ibucontext); return -EFAULT; } -- 2.43.0 Sashiko points out that mlx4_srq_alloc() was not undone during error unwind, add the missing call to mlx4_srq_free(). Cc: stable@vger.kernel.org Fixes: 225c7b1feef1 ("IB/mlx4: Add a driver Mellanox ConnectX InfiniBand adapters") Link: https://sashiko.dev/#/patchset/0-v1-e911b76a94d1%2B65d95-rdma_udata_rep_jgg%40nvidia.com?part=8 Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx4/srq.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c index 5b23e5f8b84aca..767840736d583b 100644 --- a/drivers/infiniband/hw/mlx4/srq.c +++ b/drivers/infiniband/hw/mlx4/srq.c @@ -194,13 +194,15 @@ int mlx4_ib_create_srq(struct ib_srq *ib_srq, if (udata) if (ib_copy_to_udata(udata, &srq->msrq.srqn, sizeof (__u32))) { err = -EFAULT; - goto err_wrid; + goto err_srq; } init_attr->attr.max_wr = srq->msrq.max - 1; return 0; +err_srq: + mlx4_srq_free(dev->dev, &srq->msrq); err_wrid: if (udata) mlx4_ib_db_unmap_user(ucontext, &srq->db); -- 2.43.0 Sashiko points out the radix_tree itself is RCU safe, but nothing ever frees the mlx4_srq struct with RCU, and it isn't even accessed within the RCU critical section. It also will crash if an event is delivered before the srq object is finished initializing. Use the spinlock since it isn't easy to make RCU work, use refcount_inc_not_zero() to protect against partially initialized objects, and order the refcount_set() to be after the srq is fully initialized. Cc: stable@vger.kernel.org Fixes: 30353bfc43a1 ("net/mlx4_core: Use RCU to perform radix tree lookup for SRQ") Link: https://sashiko.dev/#/patchset/0-v2-1c49eeb88c48%2B91-rdma_udata_rep_jgg%40nvidia.com?part=5 Signed-off-by: Jason Gunthorpe --- drivers/net/ethernet/mellanox/mlx4/srq.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/srq.c b/drivers/net/ethernet/mellanox/mlx4/srq.c index dd890f5d7b725c..8711689120f302 100644 --- a/drivers/net/ethernet/mellanox/mlx4/srq.c +++ b/drivers/net/ethernet/mellanox/mlx4/srq.c @@ -44,13 +44,14 @@ void mlx4_srq_event(struct mlx4_dev *dev, u32 srqn, int event_type) { struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table; struct mlx4_srq *srq; + unsigned long flags; - rcu_read_lock(); + spin_lock_irqsave(&srq_table->lock, flags); srq = radix_tree_lookup(&srq_table->tree, srqn & (dev->caps.num_srqs - 1)); - rcu_read_unlock(); - if (srq) - refcount_inc(&srq->refcount); - else { + if (!srq || !refcount_inc_not_zero(&srq->refcount)) + srq = NULL; + spin_unlock_irqrestore(&srq_table->lock, flags); + if (!srq) { mlx4_warn(dev, "Async event for bogus SRQ %08x\n", srqn); return; } @@ -203,8 +204,8 @@ int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, u32 cqn, u16 xrcd, if (err) goto err_radix; - refcount_set(&srq->refcount, 1); init_completion(&srq->free); + refcount_set_release(&srq->refcount, 1); return 0; -- 2.43.0 Sashiko points out that once the srq memory is stored into the xarray by alloc_srqc() it can immediately be looked up by: xa_lock(&srq_table->xa); srq = xa_load(&srq_table->xa, srqn & (hr_dev->caps.num_srqs - 1)); if (srq) refcount_inc(&srq->refcount); xa_unlock(&srq_table->xa); Which will fail refcount debug because the refcount is 0 and then crash: srq->event(srq, event_type); Because event is NULL. Use refcount_inc_not_zero() instead to ensure a partially prepared srq is never retrieved from the event handler and fix the ordering of the initialization so refcount becomes 1 only after it is fully ready. All the initialization must be done before calling free_srqc() since it depends on the completion and refcount. Fixes: 9a4435375cd1 ("IB/hns: Add driver files for hns RoCE driver") Link: https://sashiko.dev/#/patchset/0-v1-e911b76a94d1%2B65d95-rdma_udata_rep_jgg%40nvidia.com?part=3 Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_srq.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index cb848e8e6bbd76..8b94cbdfa54dfa 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -16,8 +16,8 @@ void hns_roce_srq_event(struct hns_roce_dev *hr_dev, u32 srqn, int event_type) xa_lock(&srq_table->xa); srq = xa_load(&srq_table->xa, srqn & (hr_dev->caps.num_srqs - 1)); - if (srq) - refcount_inc(&srq->refcount); + if (srq && !refcount_inc_not_zero(&srq->refcount)) + srq = NULL; xa_unlock(&srq_table->xa); if (!srq) { @@ -470,6 +470,10 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, if (ret) goto err_srqn; + srq->event = hns_roce_ib_srq_event; + init_completion(&srq->free); + refcount_set_release(&srq->refcount, 1); + if (udata) { resp.cap_flags = srq->cap_flags; resp.srqn = srq->srqn; @@ -480,10 +484,6 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, } } - srq->event = hns_roce_ib_srq_event; - refcount_set(&srq->refcount, 1); - init_completion(&srq->free); - return 0; err_srqc: -- 2.43.0 Similar to the SRQ case the hr_qp is stored in the xarray before it is fully initialized. Unlike the SRQ case the error unwinds do not wait for the completion so keep the refcount 0 until the function succeeds. Fixes: 9a4435375cd1 ("IB/hns: Add driver files for hns RoCE driver") Suggested-by: Junxian Huang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_qp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index a27ea85bb06323..f94ba98871f0d0 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -47,8 +47,8 @@ static struct hns_roce_qp *hns_roce_qp_lookup(struct hns_roce_dev *hr_dev, xa_lock_irqsave(&hr_dev->qp_table_xa, flags); qp = __hns_roce_qp_lookup(hr_dev, qpn); - if (qp) - refcount_inc(&qp->refcount); + if (qp && !refcount_inc_not_zero(&qp->refcount)) + qp = NULL; xa_unlock_irqrestore(&hr_dev->qp_table_xa, flags); if (!qp) @@ -1251,8 +1251,8 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, hr_qp->ibqp.qp_num = hr_qp->qpn; hr_qp->event = hns_roce_ib_qp_event; - refcount_set(&hr_qp->refcount, 1); init_completion(&hr_qp->free); + refcount_set_release(&hr_qp->refcount, 1); return 0; -- 2.43.0 Sashiko points out that hns_roce_qp_remove() requires the caller to hold locks. The error flow in hns_roce_create_qp_common() doesn't hold those locks for the error unwind so it risks corrupting memory. Grab the same locks the other two callers use. Cc: stable@vger.kernel.org Fixes: e088a685eae9 ("RDMA/hns: Support rq record doorbell for the user space") Link: https://sashiko.dev/#/patchset/0-v2-1c49eeb88c48%2B91-rdma_udata_rep_jgg%40nvidia.com?part=9 Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_qp.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index f94ba98871f0d0..bf04ee84a94392 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -1171,6 +1171,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, struct hns_roce_ib_create_qp_resp resp = {}; struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_ib_create_qp ucmd = {}; + unsigned long flags; int ret; mutex_init(&hr_qp->mutex); @@ -1257,7 +1258,13 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, return 0; err_flow_ctrl: + spin_lock_irqsave(&hr_dev->qp_list_lock, flags); + hns_roce_lock_cqs(init_attr->send_cq ? to_hr_cq(init_attr->send_cq) : NULL, + init_attr->recv_cq ? to_hr_cq(init_attr->recv_cq) : NULL); hns_roce_qp_remove(hr_dev, hr_qp); + hns_roce_unlock_cqs(init_attr->send_cq ? to_hr_cq(init_attr->send_cq) : NULL, + init_attr->recv_cq ? to_hr_cq(init_attr->recv_cq) : NULL); + spin_unlock_irqrestore(&hr_dev->qp_list_lock, flags); err_store: free_qpc(hr_dev, hr_qp); err_qpc: -- 2.43.0