Add infrastructure for delivering I/O commands to ublk server in batches, preparing for the upcoming UBLK_U_IO_FETCH_IO_CMDS feature. Key components: - struct ublk_batch_fcmd: Represents a batch fetch uring_cmd that will receive multiple I/O tags in a single operation, using io_uring's multishot command for efficient ublk IO delivery. - ublk_batch_dispatch(): Batch version of ublk_dispatch_req() that: * Pulls multiple request tags from the events FIFO (lock-free reader) * Prepares each I/O for delivery (including auto buffer registration) * Delivers tags to userspace via single uring_cmd notification * Handles partial failures by restoring undelivered tags to FIFO The batch approach significantly reduces notification overhead by aggregating multiple I/O completions into single uring_cmd, while maintaining the same I/O processing semantics as individual operations. Error handling ensures system consistency: if buffer selection or CQE posting fails, undelivered tags are restored to the FIFO for retry. This runs in task work context, scheduled via io_uring_cmd_complete_in_task() or called directly from ->uring_cmd(), enabling efficient batch processing without blocking the I/O submission path. Signed-off-by: Ming Lei --- drivers/block/ublk_drv.c | 123 ++++++++++++++++++++++++++++++++++ include/uapi/linux/ublk_cmd.h | 6 ++ 2 files changed, 129 insertions(+) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 0f955592ebd5..2ed2adc93df6 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -91,6 +91,12 @@ UBLK_BATCH_F_HAS_BUF_ADDR | \ UBLK_BATCH_F_AUTO_BUF_REG_FALLBACK) +/* ublk batch fetch uring_cmd */ +struct ublk_batch_fcmd { + struct io_uring_cmd *cmd; + unsigned short buf_group; +}; + struct ublk_uring_cmd_pdu { /* * Store requests in same batch temporarily for queuing them to @@ -623,6 +629,32 @@ static wait_queue_head_t ublk_idr_wq; /* wait until one idr is freed */ static DEFINE_MUTEX(ublk_ctl_mutex); +static void ublk_batch_deinit_fetch_buf(struct ublk_batch_io_data *data, + struct ublk_batch_fcmd *fcmd, + int res) +{ + io_uring_cmd_done(fcmd->cmd, res, 0, data->issue_flags); + fcmd->cmd = NULL; +} + +static int ublk_batch_fetch_post_cqe(struct ublk_batch_fcmd *fcmd, + struct io_br_sel *sel, + unsigned int issue_flags) +{ + if (io_uring_mshot_cmd_post_cqe(fcmd->cmd, sel, issue_flags)) + return -ENOBUFS; + return 0; +} + +static ssize_t ublk_batch_copy_io_tags(struct ublk_batch_fcmd *fcmd, + void __user *buf, const u16 *tag_buf, + unsigned int len) +{ + if (copy_to_user(buf, tag_buf, len)) + return -EFAULT; + return len; +} + #define UBLK_MAX_UBLKS UBLK_MINORS /* @@ -1424,6 +1456,97 @@ static void ublk_dispatch_req(struct ublk_queue *ubq, } } +static bool __ublk_batch_prep_dispatch(struct ublk_batch_io_data *data, + unsigned short tag) +{ + struct ublk_queue *ubq = data->ubq; + struct ublk_device *ub = ubq->dev; + struct ublk_io *io = &ubq->ios[tag]; + struct request *req = blk_mq_tag_to_rq(ub->tag_set.tags[ubq->q_id], tag); + enum auto_buf_reg_res res = AUTO_BUF_REG_FALLBACK; + struct io_uring_cmd *cmd = data->cmd; + + if (!ublk_start_io(ubq, req, io)) + return false; + + if (ublk_support_auto_buf_reg(ubq) && ublk_rq_has_data(req)) + res = __ublk_do_auto_buf_reg(ubq, req, io, cmd, + data->issue_flags); + + ublk_io_lock(io); + ublk_prep_auto_buf_reg_io(ubq, req, io, cmd, res == AUTO_BUF_REG_OK); + ublk_io_unlock(io); + + return res != AUTO_BUF_REG_FAIL; +} + +static void ublk_batch_prep_dispatch(struct ublk_batch_io_data *data, + unsigned short *tag_buf, + unsigned int len) +{ + int i; + + for (i = 0; i < len; i += 1) { + unsigned short tag = tag_buf[i]; + + if (!__ublk_batch_prep_dispatch(data, tag)) + tag_buf[i] = UBLK_BATCH_IO_UNUSED_TAG; + } +} + +#define MAX_NR_TAG 128 +static int __ublk_batch_dispatch(struct ublk_batch_io_data *data, + struct ublk_batch_fcmd *fcmd) +{ + unsigned short tag_buf[MAX_NR_TAG]; + struct io_br_sel sel; + size_t len = 0; + int ret; + + sel = io_uring_cmd_buffer_select(fcmd->cmd, fcmd->buf_group, &len, + data->issue_flags); + if (sel.val < 0) + return sel.val; + if (!sel.addr) + return -ENOBUFS; + + /* single reader needn't lock and sizeof(kfifo element) is 2 bytes */ + len = min(len, sizeof(tag_buf)) / 2; + len = kfifo_out(&data->ubq->evts_fifo, tag_buf, len); + + ublk_batch_prep_dispatch(data, tag_buf, len); + + sel.val = ublk_batch_copy_io_tags(fcmd, sel.addr, tag_buf, len * 2); + ret = ublk_batch_fetch_post_cqe(fcmd, &sel, data->issue_flags); + if (unlikely(ret < 0)) { + int res = kfifo_in_spinlocked_noirqsave(&data->ubq->evts_fifo, + tag_buf, len, &data->ubq->evts_lock); + + pr_warn("%s: copy tags or post CQE failure, move back " + "tags(%d %lu) ret %d\n", __func__, res, len, + ret); + } + return ret; +} + +static __maybe_unused int +ublk_batch_dispatch(struct ublk_batch_io_data *data, + struct ublk_batch_fcmd *fcmd) +{ + int ret = 0; + + while (!ublk_io_evts_empty(data->ubq)) { + ret = __ublk_batch_dispatch(data, fcmd); + if (ret <= 0) + break; + } + + if (ret < 0) + ublk_batch_deinit_fetch_buf(data, fcmd, ret); + + return ret; +} + static void ublk_cmd_tw_cb(struct io_uring_cmd *cmd, unsigned int issue_flags) { diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h index 695b38522995..fbd3582bc203 100644 --- a/include/uapi/linux/ublk_cmd.h +++ b/include/uapi/linux/ublk_cmd.h @@ -553,6 +553,12 @@ struct ublk_elem_header { __u32 result; /* I/O completion result (commit only) */ }; +/* + * If this tag value is observed from buffer of `UBLK_U_IO_FETCH_IO_CMDS` + * ublk server can simply ignore it + */ +#define UBLK_BATCH_IO_UNUSED_TAG (__u16)(-1) + /* * uring_cmd buffer structure * -- 2.47.0