The new operation is like dup3(). The source file can be a regular file descriptor or a direct descriptor. The destination is a regular file descriptor. The direct descriptor variant is useful to move a descriptor to an fd and close the existing fd with a single acquisition of the `struct files_struct` `file_lock`. Combined with IORING_OP_ACCEPT or IORING_OP_OPENAT2 with direct descriptors, it can reduce lock contention for multithreaded applications. Signed-off-by: Daniele Di Proietto --- include/uapi/linux/io_uring.h | 10 +++++++ io_uring/opdef.c | 8 ++++++ io_uring/openclose.c | 49 +++++++++++++++++++++++++++++++++++ io_uring/openclose.h | 3 +++ 4 files changed, 70 insertions(+) diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 1ff16141c8a5..472bebeb569d 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -74,6 +74,7 @@ struct io_uring_sqe { __u32 install_fd_flags; __u32 nop_flags; __u32 pipe_flags; + __u32 dup_flags; }; __u64 user_data; /* data to be passed back at completion time */ /* pack this to avoid bogus arm OABI complaints */ @@ -90,6 +91,7 @@ struct io_uring_sqe { __u32 file_index; __u32 zcrx_ifq_idx; __u32 optlen; + __s32 dup_new_fd; struct { __u16 addr_len; __u16 __pad3[1]; @@ -316,6 +318,7 @@ enum io_uring_op { IORING_OP_PIPE, IORING_OP_NOP128, IORING_OP_URING_CMD128, + IORING_OP_DUP, /* this goes last, obviously */ IORING_OP_LAST, @@ -475,6 +478,13 @@ enum io_uring_msg_ring_flags { */ #define IORING_FIXED_FD_NO_CLOEXEC (1U << 0) +/* + * IORING_OP_DUP flags (sqe->dup_flags) + * + * IORING_DUP_NO_CLOEXEC Don't mark the fd as O_CLOEXEC + */ +#define IORING_DUP_NO_CLOEXEC (1U << 0) + /* * IORING_OP_NOP flags (sqe->nop_flags) * diff --git a/io_uring/opdef.c b/io_uring/opdef.c index 91a23baf415e..34103b9108f6 100644 --- a/io_uring/opdef.c +++ b/io_uring/opdef.c @@ -599,6 +599,11 @@ const struct io_issue_def io_issue_defs[] = { .prep = io_uring_cmd_prep, .issue = io_uring_cmd, }, + [IORING_OP_DUP] = { + .needs_file = 1, + .prep = io_dup_prep, + .issue = io_dup, + }, }; const struct io_cold_def io_cold_defs[] = { @@ -857,6 +862,9 @@ const struct io_cold_def io_cold_defs[] = { .sqe_copy = io_uring_cmd_sqe_copy, .cleanup = io_uring_cmd_cleanup, }, + [IORING_OP_DUP] = { + .name = "DUP", + }, }; const char *io_uring_get_opcode(u8 opcode) diff --git a/io_uring/openclose.c b/io_uring/openclose.c index c71242915dad..f7a6d45cba17 100644 --- a/io_uring/openclose.c +++ b/io_uring/openclose.c @@ -39,6 +39,12 @@ struct io_fixed_install { unsigned int o_flags; }; +struct io_dup { + struct file *file; + int new_fd; + unsigned int o_flags; +}; + static bool io_openat_force_async(struct io_open *open) { /* @@ -446,3 +452,46 @@ int io_pipe(struct io_kiocb *req, unsigned int issue_flags) fput(files[1]); return ret; } + +int io_dup_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) +{ + unsigned int flags; + struct io_dup *id; + int new_fd; + + if (sqe->off || sqe->addr || sqe->len || sqe->buf_index || sqe->addr3) + return -EINVAL; + + flags = READ_ONCE(sqe->dup_flags); + if (flags & ~IORING_DUP_NO_CLOEXEC) + return -EINVAL; + + new_fd = READ_ONCE(sqe->dup_new_fd); + if (new_fd < 0) + return -EBADF; + + /* ensure the task's creds are used when installing/receiving fds */ + if (req->flags & REQ_F_CREDS) + return -EPERM; + + id = io_kiocb_to_cmd(req, struct io_dup); + id->o_flags = O_CLOEXEC; + if (flags & IORING_DUP_NO_CLOEXEC) + id->o_flags = 0; + id->new_fd = new_fd; + + return 0; +} + +int io_dup(struct io_kiocb *req, unsigned int issue_flags) +{ + struct io_dup *id; + int ret; + + id = io_kiocb_to_cmd(req, struct io_dup); + ret = replace_fd(id->new_fd, id->file, id->o_flags); + if (ret < 0) + req_set_fail(req); + io_req_set_res(req, ret, 0); + return IOU_COMPLETE; +} diff --git a/io_uring/openclose.h b/io_uring/openclose.h index 566739920658..86c91ad33714 100644 --- a/io_uring/openclose.h +++ b/io_uring/openclose.h @@ -21,3 +21,6 @@ int io_pipe(struct io_kiocb *req, unsigned int issue_flags); int io_install_fixed_fd_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe); int io_install_fixed_fd(struct io_kiocb *req, unsigned int issue_flags); + +int io_dup_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe); +int io_dup(struct io_kiocb *req, unsigned int issue_flags); -- 2.43.0