Remove io_ring_ctx arg from io_region_pin_pages() and io_region_allocate_pages() that isn't used. Signed-off-by: David Wei --- io_uring/memmap.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/io_uring/memmap.c b/io_uring/memmap.c index aa388ecd4754..d1318079c337 100644 --- a/io_uring/memmap.c +++ b/io_uring/memmap.c @@ -131,9 +131,8 @@ static int io_region_init_ptr(struct io_mapped_region *mr) return 0; } -static int io_region_pin_pages(struct io_ring_ctx *ctx, - struct io_mapped_region *mr, - struct io_uring_region_desc *reg) +static int io_region_pin_pages(struct io_mapped_region *mr, + struct io_uring_region_desc *reg) { unsigned long size = mr->nr_pages << PAGE_SHIFT; struct page **pages; @@ -150,8 +149,7 @@ static int io_region_pin_pages(struct io_ring_ctx *ctx, return 0; } -static int io_region_allocate_pages(struct io_ring_ctx *ctx, - struct io_mapped_region *mr, +static int io_region_allocate_pages(struct io_mapped_region *mr, struct io_uring_region_desc *reg, unsigned long mmap_offset) { @@ -219,9 +217,9 @@ int io_create_region(struct io_ring_ctx *ctx, struct io_mapped_region *mr, mr->nr_pages = nr_pages; if (reg->flags & IORING_MEM_REGION_TYPE_USER) - ret = io_region_pin_pages(ctx, mr, reg); + ret = io_region_pin_pages(mr, reg); else - ret = io_region_allocate_pages(ctx, mr, reg, mmap_offset); + ret = io_region_allocate_pages(mr, reg, mmap_offset); if (ret) goto out_free; -- 2.47.3 Refactor io_free_region() to take user_struct directly, instead of accessing it from the ring ctx. Signed-off-by: David Wei --- io_uring/io_uring.c | 6 +++--- io_uring/kbuf.c | 4 ++-- io_uring/memmap.c | 8 ++++---- io_uring/memmap.h | 2 +- io_uring/register.c | 6 +++--- io_uring/zcrx.c | 2 +- 6 files changed, 14 insertions(+), 14 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 200b6c4bb2cc..7d42748774f8 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -2798,8 +2798,8 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, u32 flags, static void io_rings_free(struct io_ring_ctx *ctx) { - io_free_region(ctx, &ctx->sq_region); - io_free_region(ctx, &ctx->ring_region); + io_free_region(ctx->user, &ctx->sq_region); + io_free_region(ctx->user, &ctx->ring_region); ctx->rings = NULL; ctx->sq_sqes = NULL; } @@ -2884,7 +2884,7 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx) io_eventfd_unregister(ctx); io_free_alloc_caches(ctx); io_destroy_buffers(ctx); - io_free_region(ctx, &ctx->param_region); + io_free_region(ctx->user, &ctx->param_region); mutex_unlock(&ctx->uring_lock); if (ctx->sq_creds) put_cred(ctx->sq_creds); diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c index c034c90396bc..8a329556f8df 100644 --- a/io_uring/kbuf.c +++ b/io_uring/kbuf.c @@ -428,7 +428,7 @@ static int io_remove_buffers_legacy(struct io_ring_ctx *ctx, static void io_put_bl(struct io_ring_ctx *ctx, struct io_buffer_list *bl) { if (bl->flags & IOBL_BUF_RING) - io_free_region(ctx, &bl->region); + io_free_region(ctx->user, &bl->region); else io_remove_buffers_legacy(ctx, bl, -1U); @@ -672,7 +672,7 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) io_buffer_add_list(ctx, bl, reg.bgid); return 0; fail: - io_free_region(ctx, &bl->region); + io_free_region(ctx->user, &bl->region); kfree(bl); return ret; } diff --git a/io_uring/memmap.c b/io_uring/memmap.c index d1318079c337..b1054fe94568 100644 --- a/io_uring/memmap.c +++ b/io_uring/memmap.c @@ -88,7 +88,7 @@ enum { IO_REGION_F_SINGLE_REF = 4, }; -void io_free_region(struct io_ring_ctx *ctx, struct io_mapped_region *mr) +void io_free_region(struct user_struct *user, struct io_mapped_region *mr) { if (mr->pages) { long nr_refs = mr->nr_pages; @@ -105,8 +105,8 @@ void io_free_region(struct io_ring_ctx *ctx, struct io_mapped_region *mr) } if ((mr->flags & IO_REGION_F_VMAP) && mr->ptr) vunmap(mr->ptr); - if (mr->nr_pages && ctx->user) - __io_unaccount_mem(ctx->user, mr->nr_pages); + if (mr->nr_pages && user) + __io_unaccount_mem(user, mr->nr_pages); memset(mr, 0, sizeof(*mr)); } @@ -228,7 +228,7 @@ int io_create_region(struct io_ring_ctx *ctx, struct io_mapped_region *mr, goto out_free; return 0; out_free: - io_free_region(ctx, mr); + io_free_region(ctx->user, mr); return ret; } diff --git a/io_uring/memmap.h b/io_uring/memmap.h index 58002976e0c3..a7c476f499d5 100644 --- a/io_uring/memmap.h +++ b/io_uring/memmap.h @@ -16,7 +16,7 @@ unsigned long io_uring_get_unmapped_area(struct file *file, unsigned long addr, unsigned long flags); int io_uring_mmap(struct file *file, struct vm_area_struct *vma); -void io_free_region(struct io_ring_ctx *ctx, struct io_mapped_region *mr); +void io_free_region(struct user_struct *user, struct io_mapped_region *mr); int io_create_region(struct io_ring_ctx *ctx, struct io_mapped_region *mr, struct io_uring_region_desc *reg, unsigned long mmap_offset); diff --git a/io_uring/register.c b/io_uring/register.c index 1a3e05be6e7b..023f5e7a18da 100644 --- a/io_uring/register.c +++ b/io_uring/register.c @@ -381,8 +381,8 @@ struct io_ring_ctx_rings { static void io_register_free_rings(struct io_ring_ctx *ctx, struct io_ring_ctx_rings *r) { - io_free_region(ctx, &r->sq_region); - io_free_region(ctx, &r->ring_region); + io_free_region(ctx->user, &r->sq_region); + io_free_region(ctx->user, &r->ring_region); } #define swap_old(ctx, o, n, field) \ @@ -604,7 +604,7 @@ static int io_register_mem_region(struct io_ring_ctx *ctx, void __user *uarg) if (ret) return ret; if (copy_to_user(rd_uptr, &rd, sizeof(rd))) { - io_free_region(ctx, ®ion); + io_free_region(ctx->user, ®ion); return -EFAULT; } diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c index a816f5902091..d15453884004 100644 --- a/io_uring/zcrx.c +++ b/io_uring/zcrx.c @@ -378,7 +378,7 @@ static int io_allocate_rbuf_ring(struct io_zcrx_ifq *ifq, static void io_free_rbuf_ring(struct io_zcrx_ifq *ifq) { - io_free_region(ifq->ctx, &ifq->region); + io_free_region(ifq->ctx->user, &ifq->region); ifq->rq_ring = NULL; ifq->rqes = NULL; } -- 2.47.3 Refactor io_{un}account_mem() to take user_struct and mm_struct directly, instead of accessing it from the ring ctx. Signed-off-by: David Wei --- io_uring/rsrc.c | 26 ++++++++++++++------------ io_uring/rsrc.h | 6 ++++-- io_uring/zcrx.c | 5 +++-- 3 files changed, 21 insertions(+), 16 deletions(-) diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index d787c16dc1c3..59135fe84082 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -56,27 +56,29 @@ int __io_account_mem(struct user_struct *user, unsigned long nr_pages) return 0; } -void io_unaccount_mem(struct io_ring_ctx *ctx, unsigned long nr_pages) +void io_unaccount_mem(struct user_struct *user, struct mm_struct *mm_account, + unsigned long nr_pages) { - if (ctx->user) - __io_unaccount_mem(ctx->user, nr_pages); + if (user) + __io_unaccount_mem(user, nr_pages); - if (ctx->mm_account) - atomic64_sub(nr_pages, &ctx->mm_account->pinned_vm); + if (mm_account) + atomic64_sub(nr_pages, &mm_account->pinned_vm); } -int io_account_mem(struct io_ring_ctx *ctx, unsigned long nr_pages) +int io_account_mem(struct user_struct *user, struct mm_struct *mm_account, + unsigned long nr_pages) { int ret; - if (ctx->user) { - ret = __io_account_mem(ctx->user, nr_pages); + if (user) { + ret = __io_account_mem(user, nr_pages); if (ret) return ret; } - if (ctx->mm_account) - atomic64_add(nr_pages, &ctx->mm_account->pinned_vm); + if (mm_account) + atomic64_add(nr_pages, &mm_account->pinned_vm); return 0; } @@ -145,7 +147,7 @@ static void io_buffer_unmap(struct io_ring_ctx *ctx, struct io_mapped_ubuf *imu) } if (imu->acct_pages) - io_unaccount_mem(ctx, imu->acct_pages); + io_unaccount_mem(ctx->user, ctx->mm_account, imu->acct_pages); imu->release(imu->priv); io_free_imu(ctx, imu); } @@ -684,7 +686,7 @@ static int io_buffer_account_pin(struct io_ring_ctx *ctx, struct page **pages, if (!imu->acct_pages) return 0; - ret = io_account_mem(ctx, imu->acct_pages); + ret = io_account_mem(ctx->user, ctx->mm_account, imu->acct_pages); if (ret) imu->acct_pages = 0; return ret; diff --git a/io_uring/rsrc.h b/io_uring/rsrc.h index a3ca6ba66596..d603f6a47f5e 100644 --- a/io_uring/rsrc.h +++ b/io_uring/rsrc.h @@ -120,8 +120,10 @@ int io_files_update(struct io_kiocb *req, unsigned int issue_flags); int io_files_update_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe); int __io_account_mem(struct user_struct *user, unsigned long nr_pages); -int io_account_mem(struct io_ring_ctx *ctx, unsigned long nr_pages); -void io_unaccount_mem(struct io_ring_ctx *ctx, unsigned long nr_pages); +int io_account_mem(struct user_struct *user, struct mm_struct *mm_account, + unsigned long nr_pages); +void io_unaccount_mem(struct user_struct *user, struct mm_struct *mm_account, + unsigned long nr_pages); static inline void __io_unaccount_mem(struct user_struct *user, unsigned long nr_pages) diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c index d15453884004..30d3a7b3c407 100644 --- a/io_uring/zcrx.c +++ b/io_uring/zcrx.c @@ -200,7 +200,7 @@ static int io_import_umem(struct io_zcrx_ifq *ifq, } mem->account_pages = io_count_account_pages(pages, nr_pages); - ret = io_account_mem(ifq->ctx, mem->account_pages); + ret = io_account_mem(ifq->ctx->user, ifq->ctx->mm_account, mem->account_pages); if (ret < 0) mem->account_pages = 0; @@ -389,7 +389,8 @@ static void io_zcrx_free_area(struct io_zcrx_area *area) io_release_area_mem(&area->mem); if (area->mem.account_pages) - io_unaccount_mem(area->ifq->ctx, area->mem.account_pages); + io_unaccount_mem(area->ifq->ctx->user, area->ifq->ctx->mm_account, + area->mem.account_pages); kvfree(area->freelist); kvfree(area->nia.niovs); -- 2.47.3 Add io_zcrx_ifq arg to io_zcrx_free_area(). A QOL change to reduce line widths. Signed-off-by: David Wei --- io_uring/zcrx.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c index 30d3a7b3c407..5c90404283ff 100644 --- a/io_uring/zcrx.c +++ b/io_uring/zcrx.c @@ -383,9 +383,10 @@ static void io_free_rbuf_ring(struct io_zcrx_ifq *ifq) ifq->rqes = NULL; } -static void io_zcrx_free_area(struct io_zcrx_area *area) +static void io_zcrx_free_area(struct io_zcrx_ifq *ifq, + struct io_zcrx_area *area) { - io_zcrx_unmap_area(area->ifq, area); + io_zcrx_unmap_area(ifq, area); io_release_area_mem(&area->mem); if (area->mem.account_pages) @@ -464,7 +465,7 @@ static int io_zcrx_create_area(struct io_zcrx_ifq *ifq, return 0; err: if (area) - io_zcrx_free_area(area); + io_zcrx_free_area(ifq, area); return ret; } @@ -523,7 +524,7 @@ static void io_zcrx_ifq_free(struct io_zcrx_ifq *ifq) io_close_queue(ifq); if (ifq->area) - io_zcrx_free_area(ifq->area); + io_zcrx_free_area(ifq, ifq->area); if (ifq->dev) put_device(ifq->dev); -- 2.47.3 In preparation for removing ifq->ctx and making ifq lifetime independent of ring ctx, add user_struct and mm_struct to io_zcrx_ifq. In the ifq cleanup path, these are the only fields used from the main ring ctx to do accounting. Taking a copy in the ifq allows ifq->ctx to be removed later, including the ctx->refs held by the ifq. Signed-off-by: David Wei --- io_uring/zcrx.c | 24 ++++++++++++++++++------ io_uring/zcrx.h | 2 ++ 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c index 5c90404283ff..774efbce8cb6 100644 --- a/io_uring/zcrx.c +++ b/io_uring/zcrx.c @@ -200,7 +200,7 @@ static int io_import_umem(struct io_zcrx_ifq *ifq, } mem->account_pages = io_count_account_pages(pages, nr_pages); - ret = io_account_mem(ifq->ctx->user, ifq->ctx->mm_account, mem->account_pages); + ret = io_account_mem(ifq->user, ifq->mm_account, mem->account_pages); if (ret < 0) mem->account_pages = 0; @@ -344,7 +344,8 @@ static void io_zcrx_get_niov_uref(struct net_iov *niov) atomic_inc(io_get_user_counter(niov)); } -static int io_allocate_rbuf_ring(struct io_zcrx_ifq *ifq, +static int io_allocate_rbuf_ring(struct io_ring_ctx *ctx, + struct io_zcrx_ifq *ifq, struct io_uring_zcrx_ifq_reg *reg, struct io_uring_region_desc *rd, u32 id) @@ -362,7 +363,7 @@ static int io_allocate_rbuf_ring(struct io_zcrx_ifq *ifq, mmap_offset = IORING_MAP_OFF_ZCRX_REGION; mmap_offset += id << IORING_OFF_PBUF_SHIFT; - ret = io_create_region(ifq->ctx, &ifq->region, rd, mmap_offset); + ret = io_create_region(ctx, &ifq->region, rd, mmap_offset); if (ret < 0) return ret; @@ -378,7 +379,7 @@ static int io_allocate_rbuf_ring(struct io_zcrx_ifq *ifq, static void io_free_rbuf_ring(struct io_zcrx_ifq *ifq) { - io_free_region(ifq->ctx->user, &ifq->region); + io_free_region(ifq->user, &ifq->region); ifq->rq_ring = NULL; ifq->rqes = NULL; } @@ -390,7 +391,7 @@ static void io_zcrx_free_area(struct io_zcrx_ifq *ifq, io_release_area_mem(&area->mem); if (area->mem.account_pages) - io_unaccount_mem(area->ifq->ctx->user, area->ifq->ctx->mm_account, + io_unaccount_mem(ifq->user, ifq->mm_account, area->mem.account_pages); kvfree(area->freelist); @@ -525,6 +526,9 @@ static void io_zcrx_ifq_free(struct io_zcrx_ifq *ifq) if (ifq->area) io_zcrx_free_area(ifq, ifq->area); + free_uid(ifq->user); + if (ifq->mm_account) + mmdrop(ifq->mm_account); if (ifq->dev) put_device(ifq->dev); @@ -588,6 +592,14 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx, ifq = io_zcrx_ifq_alloc(ctx); if (!ifq) return -ENOMEM; + if (ctx->user) { + get_uid(ctx->user); + ifq->user = ctx->user; + } + if (ctx->mm_account) { + mmgrab(ctx->mm_account); + ifq->mm_account = ctx->mm_account; + } ifq->rq_entries = reg.rq_entries; scoped_guard(mutex, &ctx->mmap_lock) { @@ -597,7 +609,7 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx, goto ifq_free; } - ret = io_allocate_rbuf_ring(ifq, ®, &rd, id); + ret = io_allocate_rbuf_ring(ctx, ifq, ®, &rd, id); if (ret) goto err; diff --git a/io_uring/zcrx.h b/io_uring/zcrx.h index 33ef61503092..8d828dc9b0e4 100644 --- a/io_uring/zcrx.h +++ b/io_uring/zcrx.h @@ -42,6 +42,8 @@ struct io_zcrx_ifq { struct io_ring_ctx *ctx; struct io_zcrx_area *area; unsigned niov_shift; + struct user_struct *user; + struct mm_struct *mm_account; spinlock_t rq_lock ____cacheline_aligned_in_smp; struct io_uring *rq_ring; -- 2.47.3 In preparation for removing the ref on ctx->refs held by an ifq and removing io_shutdown_zcrx_ifqs(), move io_unregister_zcrx_ifqs() down such that it can call io_zcrx_scrub(). Signed-off-by: David Wei --- io_uring/zcrx.c | 44 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c index 774efbce8cb6..b3f3d55d2f63 100644 --- a/io_uring/zcrx.c +++ b/io_uring/zcrx.c @@ -662,28 +662,6 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx, return ret; } -void io_unregister_zcrx_ifqs(struct io_ring_ctx *ctx) -{ - struct io_zcrx_ifq *ifq; - - lockdep_assert_held(&ctx->uring_lock); - - while (1) { - scoped_guard(mutex, &ctx->mmap_lock) { - unsigned long id = 0; - - ifq = xa_find(&ctx->zcrx_ctxs, &id, ULONG_MAX, XA_PRESENT); - if (ifq) - xa_erase(&ctx->zcrx_ctxs, id); - } - if (!ifq) - break; - io_zcrx_ifq_free(ifq); - } - - xa_destroy(&ctx->zcrx_ctxs); -} - static struct net_iov *__io_zcrx_get_free_niov(struct io_zcrx_area *area) { unsigned niov_idx; @@ -749,6 +727,28 @@ void io_shutdown_zcrx_ifqs(struct io_ring_ctx *ctx) } } +void io_unregister_zcrx_ifqs(struct io_ring_ctx *ctx) +{ + struct io_zcrx_ifq *ifq; + + lockdep_assert_held(&ctx->uring_lock); + + while (1) { + scoped_guard(mutex, &ctx->mmap_lock) { + unsigned long id = 0; + + ifq = xa_find(&ctx->zcrx_ctxs, &id, ULONG_MAX, XA_PRESENT); + if (ifq) + xa_erase(&ctx->zcrx_ctxs, id); + } + if (!ifq) + break; + io_zcrx_ifq_free(ifq); + } + + xa_destroy(&ctx->zcrx_ctxs); +} + static inline u32 io_zcrx_rqring_entries(struct io_zcrx_ifq *ifq) { u32 entries; -- 2.47.3 Add a refcount to struct io_zcrx_ifq to track the number of rings that share it. For now, this is only ever 1 i.e. not shared. This refcount replaces the ref that the ifq holds on ctx->refs via the page pool memory provider. This was used to keep the ifq around until the ring ctx is being freed i.e. ctx->refs fall to 0. But with ifq now being refcounted directly by the ring, and ifq->ctx removed, this is no longer necessary. Since ifqs now no longer hold refs to ring ctx, there isn't a need to split the cleanup of ifqs into two: io_shutdown_zcrx_ifqs() in io_ring_exit_work() while waiting for ctx->refs to drop to 0, and io_unregister_zcrx_ifqs() after. Remove io_shutdown_zcrx_ifqs(). So an ifq now behaves like a normal refcounted object; the last ref from a ring will free the ifq. Signed-off-by: David Wei --- io_uring/io_uring.c | 5 ----- io_uring/zcrx.c | 24 +++++------------------- io_uring/zcrx.h | 6 +----- 3 files changed, 6 insertions(+), 29 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 7d42748774f8..8af5efda9c11 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -3042,11 +3042,6 @@ static __cold void io_ring_exit_work(struct work_struct *work) io_cqring_overflow_kill(ctx); mutex_unlock(&ctx->uring_lock); } - if (!xa_empty(&ctx->zcrx_ctxs)) { - mutex_lock(&ctx->uring_lock); - io_shutdown_zcrx_ifqs(ctx); - mutex_unlock(&ctx->uring_lock); - } if (ctx->flags & IORING_SETUP_DEFER_TASKRUN) io_move_task_work_from_local(ctx); diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c index b3f3d55d2f63..6324dfa61ce0 100644 --- a/io_uring/zcrx.c +++ b/io_uring/zcrx.c @@ -479,7 +479,6 @@ static struct io_zcrx_ifq *io_zcrx_ifq_alloc(struct io_ring_ctx *ctx) return NULL; ifq->if_rxq = -1; - ifq->ctx = ctx; spin_lock_init(&ifq->rq_lock); mutex_init(&ifq->pp_lock); return ifq; @@ -592,6 +591,7 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx, ifq = io_zcrx_ifq_alloc(ctx); if (!ifq) return -ENOMEM; + refcount_set(&ifq->refs, 1); if (ctx->user) { get_uid(ctx->user); ifq->user = ctx->user; @@ -714,19 +714,6 @@ static void io_zcrx_scrub(struct io_zcrx_ifq *ifq) } } -void io_shutdown_zcrx_ifqs(struct io_ring_ctx *ctx) -{ - struct io_zcrx_ifq *ifq; - unsigned long index; - - lockdep_assert_held(&ctx->uring_lock); - - xa_for_each(&ctx->zcrx_ctxs, index, ifq) { - io_zcrx_scrub(ifq); - io_close_queue(ifq); - } -} - void io_unregister_zcrx_ifqs(struct io_ring_ctx *ctx) { struct io_zcrx_ifq *ifq; @@ -743,7 +730,10 @@ void io_unregister_zcrx_ifqs(struct io_ring_ctx *ctx) } if (!ifq) break; - io_zcrx_ifq_free(ifq); + if (refcount_dec_and_test(&ifq->refs)) { + io_zcrx_scrub(ifq); + io_zcrx_ifq_free(ifq); + } } xa_destroy(&ctx->zcrx_ctxs); @@ -894,15 +884,11 @@ static int io_pp_zc_init(struct page_pool *pp) if (ret) return ret; - percpu_ref_get(&ifq->ctx->refs); return 0; } static void io_pp_zc_destroy(struct page_pool *pp) { - struct io_zcrx_ifq *ifq = io_pp_to_ifq(pp); - - percpu_ref_put(&ifq->ctx->refs); } static int io_pp_nl_fill(void *mp_priv, struct sk_buff *rsp, diff --git a/io_uring/zcrx.h b/io_uring/zcrx.h index 8d828dc9b0e4..5951f127298c 100644 --- a/io_uring/zcrx.h +++ b/io_uring/zcrx.h @@ -39,9 +39,9 @@ struct io_zcrx_area { }; struct io_zcrx_ifq { - struct io_ring_ctx *ctx; struct io_zcrx_area *area; unsigned niov_shift; + refcount_t refs; struct user_struct *user; struct mm_struct *mm_account; @@ -70,7 +70,6 @@ int io_zcrx_return_bufs(struct io_ring_ctx *ctx, int io_register_zcrx_ifq(struct io_ring_ctx *ctx, struct io_uring_zcrx_ifq_reg __user *arg); void io_unregister_zcrx_ifqs(struct io_ring_ctx *ctx); -void io_shutdown_zcrx_ifqs(struct io_ring_ctx *ctx); int io_zcrx_recv(struct io_kiocb *req, struct io_zcrx_ifq *ifq, struct socket *sock, unsigned int flags, unsigned issue_flags, unsigned int *len); @@ -85,9 +84,6 @@ static inline int io_register_zcrx_ifq(struct io_ring_ctx *ctx, static inline void io_unregister_zcrx_ifqs(struct io_ring_ctx *ctx) { } -static inline void io_shutdown_zcrx_ifqs(struct io_ring_ctx *ctx) -{ -} static inline int io_zcrx_recv(struct io_kiocb *req, struct io_zcrx_ifq *ifq, struct socket *sock, unsigned int flags, unsigned issue_flags, unsigned int *len) -- 2.47.3 Add a way to share an ifq from a src ring that is real (i.e. bound to a HW RX queue) with other rings. This is done by passing a new flag IORING_ZCRX_IFQ_REG_SHARE in the registration struct io_uring_zcrx_ifq_reg, alongside the fd of the src ring and its ifq id to be shared. Signed-off-by: David Wei --- include/uapi/linux/io_uring.h | 4 +++ io_uring/zcrx.c | 65 ++++++++++++++++++++++++++++++++++- 2 files changed, 68 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 04797a9b76bc..4da4552a4215 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -1063,6 +1063,10 @@ struct io_uring_zcrx_area_reg { __u64 __resv2[2]; }; +enum io_uring_zcrx_ifq_reg_flags { + IORING_ZCRX_IFQ_REG_SHARE = 1, +}; + /* * Argument for IORING_REGISTER_ZCRX_IFQ */ diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c index 6324dfa61ce0..094bd595d517 100644 --- a/io_uring/zcrx.c +++ b/io_uring/zcrx.c @@ -22,10 +22,10 @@ #include #include "io_uring.h" -#include "kbuf.h" #include "memmap.h" #include "zcrx.h" #include "rsrc.h" +#include "register.h" #define IO_ZCRX_AREA_SUPPORTED_FLAGS (IORING_ZCRX_AREA_DMABUF) @@ -546,6 +546,67 @@ struct io_mapped_region *io_zcrx_get_region(struct io_ring_ctx *ctx, return ifq ? &ifq->region : NULL; } +static int io_share_zcrx_ifq(struct io_ring_ctx *ctx, + struct io_uring_zcrx_ifq_reg __user *arg, + struct io_uring_zcrx_ifq_reg *reg) +{ + struct io_ring_ctx *src_ctx; + struct io_zcrx_ifq *src_ifq; + struct file *file; + int src_fd, ret; + u32 src_id, id; + + src_fd = reg->if_idx; + src_id = reg->if_rxq; + + file = io_uring_register_get_file(src_fd, false); + if (IS_ERR(file)) + return PTR_ERR(file); + + src_ctx = file->private_data; + if (src_ctx == ctx) + return -EBADFD; + + mutex_unlock(&ctx->uring_lock); + mutex_lock(&src_ctx->uring_lock); + + src_ifq = xa_load(&src_ctx->zcrx_ctxs, src_id); + if (!src_ifq) { + mutex_unlock(&src_ctx->uring_lock); + fput(file); + mutex_lock(&ctx->uring_lock); + return -EINVAL; + } + + refcount_inc(&src_ifq->refs); + mutex_unlock(&src_ctx->uring_lock); + fput(file); + mutex_lock(&ctx->uring_lock); + + scoped_guard(mutex, &ctx->mmap_lock) { + ret = xa_alloc(&ctx->zcrx_ctxs, &id, NULL, xa_limit_31b, GFP_KERNEL); + if (ret) + return ret; + } + + reg->zcrx_id = id; + if (copy_to_user(arg, reg, sizeof(*reg))) { + ret = -EFAULT; + goto err; + } + + scoped_guard(mutex, &ctx->mmap_lock) { + ret = -ENOMEM; + if (xa_store(&ctx->zcrx_ctxs, id, src_ifq, GFP_KERNEL)) + goto err; + } + return 0; +err: + scoped_guard(mutex, &ctx->mmap_lock) + xa_erase(&ctx->zcrx_ctxs, id); + return ret; +} + int io_register_zcrx_ifq(struct io_ring_ctx *ctx, struct io_uring_zcrx_ifq_reg __user *arg) { @@ -571,6 +632,8 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx, return -EINVAL; if (copy_from_user(®, arg, sizeof(reg))) return -EFAULT; + if (reg.flags & IORING_ZCRX_IFQ_REG_SHARE) + return io_share_zcrx_ifq(ctx, arg, ®); if (copy_from_user(&rd, u64_to_user_ptr(reg.region_ptr), sizeof(rd))) return -EFAULT; if (!mem_is_zero(®.__resv, sizeof(reg.__resv)) || -- 2.47.3