A minor cleanup precomputing the sq size first instead of branching array_size() in io_allocate_scq_urings(). Signed-off-by: Pavel Begunkov --- io_uring/io_uring.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index e8af963d3233..f9fc297e2fce 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -3607,6 +3607,7 @@ static __cold int io_allocate_scq_urings(struct io_ring_ctx *ctx, struct io_uring_region_desc rd; struct io_rings *rings; size_t size, sq_array_offset; + size_t sqe_size; int ret; /* make sure these are sane, as we already accounted them */ @@ -3636,10 +3637,11 @@ static __cold int io_allocate_scq_urings(struct io_ring_ctx *ctx, rings->sq_ring_entries = p->sq_entries; rings->cq_ring_entries = p->cq_entries; + sqe_size = sizeof(struct io_uring_sqe); if (p->flags & IORING_SETUP_SQE128) - size = array_size(2 * sizeof(struct io_uring_sqe), p->sq_entries); - else - size = array_size(sizeof(struct io_uring_sqe), p->sq_entries); + sqe_size *= 2; + + size = array_size(sqe_size, p->sq_entries); if (size == SIZE_MAX) { io_rings_free(ctx); return -EOVERFLOW; -- 2.49.0 It's a good practice to validate parameters before doing any heavy stuff like queue allocations. Do that for io_allocate_scq_urings(). Signed-off-by: Pavel Begunkov --- io_uring/io_uring.c | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index f9fc297e2fce..1e8566b39b52 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -3606,21 +3606,27 @@ static __cold int io_allocate_scq_urings(struct io_ring_ctx *ctx, { struct io_uring_region_desc rd; struct io_rings *rings; - size_t size, sq_array_offset; - size_t sqe_size; + size_t sq_array_offset; + size_t sq_size, cq_size, sqe_size; int ret; /* make sure these are sane, as we already accounted them */ ctx->sq_entries = p->sq_entries; ctx->cq_entries = p->cq_entries; - size = rings_size(ctx->flags, p->sq_entries, p->cq_entries, + sqe_size = sizeof(struct io_uring_sqe); + if (p->flags & IORING_SETUP_SQE128) + sqe_size *= 2; + sq_size = array_size(sqe_size, p->sq_entries); + if (sq_size == SIZE_MAX) + return -EOVERFLOW; + cq_size = rings_size(ctx->flags, p->sq_entries, p->cq_entries, &sq_array_offset); - if (size == SIZE_MAX) + if (cq_size == SIZE_MAX) return -EOVERFLOW; memset(&rd, 0, sizeof(rd)); - rd.size = PAGE_ALIGN(size); + rd.size = PAGE_ALIGN(cq_size); if (ctx->flags & IORING_SETUP_NO_MMAP) { rd.user_addr = p->cq_off.user_addr; rd.flags |= IORING_MEM_REGION_TYPE_USER; @@ -3637,18 +3643,8 @@ static __cold int io_allocate_scq_urings(struct io_ring_ctx *ctx, rings->sq_ring_entries = p->sq_entries; rings->cq_ring_entries = p->cq_entries; - sqe_size = sizeof(struct io_uring_sqe); - if (p->flags & IORING_SETUP_SQE128) - sqe_size *= 2; - - size = array_size(sqe_size, p->sq_entries); - if (size == SIZE_MAX) { - io_rings_free(ctx); - return -EOVERFLOW; - } - memset(&rd, 0, sizeof(rd)); - rd.size = PAGE_ALIGN(size); + rd.size = PAGE_ALIGN(sq_size); if (ctx->flags & IORING_SETUP_NO_MMAP) { rd.user_addr = p->sq_off.user_addr; rd.flags |= IORING_MEM_REGION_TYPE_USER; -- 2.49.0 io_create_region_mmap_safe() is only needed when the created region is exposed to userspace code via mmap. io_register_resize_rings() creates them locally on stack, so the no mmap_safe version of the helper is enough. Signed-off-by: Pavel Begunkov --- io_uring/register.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/io_uring/register.c b/io_uring/register.c index 2e4717f1357c..a809d95153e4 100644 --- a/io_uring/register.c +++ b/io_uring/register.c @@ -432,7 +432,7 @@ static int io_register_resize_rings(struct io_ring_ctx *ctx, void __user *arg) rd.user_addr = p.cq_off.user_addr; rd.flags |= IORING_MEM_REGION_TYPE_USER; } - ret = io_create_region_mmap_safe(ctx, &n.ring_region, &rd, IORING_OFF_CQ_RING); + ret = io_create_region(ctx, &n.ring_region, &rd, IORING_OFF_CQ_RING); if (ret) { io_register_free_rings(ctx, &p, &n); return ret; @@ -472,7 +472,7 @@ static int io_register_resize_rings(struct io_ring_ctx *ctx, void __user *arg) rd.user_addr = p.sq_off.user_addr; rd.flags |= IORING_MEM_REGION_TYPE_USER; } - ret = io_create_region_mmap_safe(ctx, &n.sq_region, &rd, IORING_OFF_SQES); + ret = io_create_region(ctx, &n.sq_region, &rd, IORING_OFF_SQES); if (ret) { io_register_free_rings(ctx, &p, &n); return ret; -- 2.49.0 io_register_free_rings() doesn't use its "struct io_uring_params" parameter, remove it. Signed-off-by: Pavel Begunkov --- io_uring/register.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/io_uring/register.c b/io_uring/register.c index a809d95153e4..f7f71f035b0d 100644 --- a/io_uring/register.c +++ b/io_uring/register.c @@ -379,7 +379,6 @@ struct io_ring_ctx_rings { }; static void io_register_free_rings(struct io_ring_ctx *ctx, - struct io_uring_params *p, struct io_ring_ctx_rings *r) { io_free_region(ctx, &r->sq_region); @@ -434,7 +433,7 @@ static int io_register_resize_rings(struct io_ring_ctx *ctx, void __user *arg) } ret = io_create_region(ctx, &n.ring_region, &rd, IORING_OFF_CQ_RING); if (ret) { - io_register_free_rings(ctx, &p, &n); + io_register_free_rings(ctx, &n); return ret; } n.rings = io_region_get_ptr(&n.ring_region); @@ -453,7 +452,7 @@ static int io_register_resize_rings(struct io_ring_ctx *ctx, void __user *arg) WRITE_ONCE(n.rings->cq_ring_entries, p.cq_entries); if (copy_to_user(arg, &p, sizeof(p))) { - io_register_free_rings(ctx, &p, &n); + io_register_free_rings(ctx, &n); return -EFAULT; } @@ -462,7 +461,7 @@ static int io_register_resize_rings(struct io_ring_ctx *ctx, void __user *arg) else size = array_size(sizeof(struct io_uring_sqe), p.sq_entries); if (size == SIZE_MAX) { - io_register_free_rings(ctx, &p, &n); + io_register_free_rings(ctx, &n); return -EOVERFLOW; } @@ -474,7 +473,7 @@ static int io_register_resize_rings(struct io_ring_ctx *ctx, void __user *arg) } ret = io_create_region(ctx, &n.sq_region, &rd, IORING_OFF_SQES); if (ret) { - io_register_free_rings(ctx, &p, &n); + io_register_free_rings(ctx, &n); return ret; } n.sq_sqes = io_region_get_ptr(&n.sq_region); @@ -564,7 +563,7 @@ static int io_register_resize_rings(struct io_ring_ctx *ctx, void __user *arg) out: spin_unlock(&ctx->completion_lock); mutex_unlock(&ctx->mmap_lock); - io_register_free_rings(ctx, &p, to_free); + io_register_free_rings(ctx, to_free); if (ctx->sq_data) io_sq_thread_unpark(ctx->sq_data); -- 2.49.0 io_free_region() tolerates empty regions but there is no reason to that either. If the first io_create_region() in io_register_resize_rings() fails, just return the error without attempting to clean it up. Signed-off-by: Pavel Begunkov --- io_uring/register.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/io_uring/register.c b/io_uring/register.c index f7f71f035b0d..b11550ed940c 100644 --- a/io_uring/register.c +++ b/io_uring/register.c @@ -432,10 +432,9 @@ static int io_register_resize_rings(struct io_ring_ctx *ctx, void __user *arg) rd.flags |= IORING_MEM_REGION_TYPE_USER; } ret = io_create_region(ctx, &n.ring_region, &rd, IORING_OFF_CQ_RING); - if (ret) { - io_register_free_rings(ctx, &n); + if (ret) return ret; - } + n.rings = io_region_get_ptr(&n.ring_region); /* -- 2.49.0 kbuf ring is published by io_buffer_add_list(), which correctly protects with mmap_lock, there is no need to use io_create_region_mmap_safe() before as the region is not yet exposed to the userspace via mmap. Signed-off-by: Pavel Begunkov --- io_uring/kbuf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c index aad655e38672..e271b44ff73e 100644 --- a/io_uring/kbuf.c +++ b/io_uring/kbuf.c @@ -630,7 +630,7 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) rd.user_addr = reg.ring_addr; rd.flags |= IORING_MEM_REGION_TYPE_USER; } - ret = io_create_region_mmap_safe(ctx, &bl->region, &rd, mmap_offset); + ret = io_create_region(ctx, &bl->region, &rd, mmap_offset); if (ret) goto fail; br = io_region_get_ptr(&bl->region); -- 2.49.0 io_register_mem_region() can try to remove a region right after publishing it. This non-atomicity is annoying. Do it in two steps similar to io_register_mem_region(), create memory first and publish it once the rest of the handling is done. Remove now unused io_create_region_mmap_safe(), which was assumed to be a temporary solution from day one. Signed-off-by: Pavel Begunkov --- io_uring/memmap.c | 21 --------------------- io_uring/memmap.h | 12 ++++++++++++ io_uring/register.c | 11 ++++++----- 3 files changed, 18 insertions(+), 26 deletions(-) diff --git a/io_uring/memmap.c b/io_uring/memmap.c index 2e99dffddfc5..aa388ecd4754 100644 --- a/io_uring/memmap.c +++ b/io_uring/memmap.c @@ -234,27 +234,6 @@ int io_create_region(struct io_ring_ctx *ctx, struct io_mapped_region *mr, return ret; } -int io_create_region_mmap_safe(struct io_ring_ctx *ctx, struct io_mapped_region *mr, - struct io_uring_region_desc *reg, - unsigned long mmap_offset) -{ - struct io_mapped_region tmp_mr; - int ret; - - memcpy(&tmp_mr, mr, sizeof(tmp_mr)); - ret = io_create_region(ctx, &tmp_mr, reg, mmap_offset); - if (ret) - return ret; - - /* - * Once published mmap can find it without holding only the ->mmap_lock - * and not ->uring_lock. - */ - guard(mutex)(&ctx->mmap_lock); - memcpy(mr, &tmp_mr, sizeof(tmp_mr)); - return 0; -} - static struct io_mapped_region *io_mmap_get_region(struct io_ring_ctx *ctx, loff_t pgoff) { diff --git a/io_uring/memmap.h b/io_uring/memmap.h index 08419684e4bc..58002976e0c3 100644 --- a/io_uring/memmap.h +++ b/io_uring/memmap.h @@ -36,4 +36,16 @@ static inline bool io_region_is_set(struct io_mapped_region *mr) return !!mr->nr_pages; } +static inline void io_region_publish(struct io_ring_ctx *ctx, + struct io_mapped_region *src_region, + struct io_mapped_region *dst_region) +{ + /* + * Once published mmap can find it without holding only the ->mmap_lock + * and not ->uring_lock. + */ + guard(mutex)(&ctx->mmap_lock); + *dst_region = *src_region; +} + #endif diff --git a/io_uring/register.c b/io_uring/register.c index b11550ed940c..43eb02004824 100644 --- a/io_uring/register.c +++ b/io_uring/register.c @@ -576,6 +576,7 @@ static int io_register_mem_region(struct io_ring_ctx *ctx, void __user *uarg) struct io_uring_mem_region_reg reg; struct io_uring_region_desc __user *rd_uptr; struct io_uring_region_desc rd; + struct io_mapped_region region = {}; int ret; if (io_region_is_set(&ctx->param_region)) @@ -599,20 +600,20 @@ static int io_register_mem_region(struct io_ring_ctx *ctx, void __user *uarg) !(ctx->flags & IORING_SETUP_R_DISABLED)) return -EINVAL; - ret = io_create_region_mmap_safe(ctx, &ctx->param_region, &rd, - IORING_MAP_OFF_PARAM_REGION); + ret = io_create_region(ctx, ®ion, &rd, IORING_MAP_OFF_PARAM_REGION); if (ret) return ret; if (copy_to_user(rd_uptr, &rd, sizeof(rd))) { - guard(mutex)(&ctx->mmap_lock); - io_free_region(ctx, &ctx->param_region); + io_free_region(ctx, ®ion); return -EFAULT; } if (reg.flags & IORING_MEM_REGION_REG_WAIT_ARG) { - ctx->cq_wait_arg = io_region_get_ptr(&ctx->param_region); + ctx->cq_wait_arg = io_region_get_ptr(®ion); ctx->cq_wait_size = rd.size; } + + io_region_publish(ctx, ®ion, &ctx->param_region); return 0; } -- 2.49.0