Zoned writes may be requeued. This happens if a block driver returns BLK_STS_RESOURCE, to handle SCSI unit attentions or by the SCSI error handler after error handling has finished. A later patch enables write pipelining and increases the number of pending writes per zone. If multiple writes are pending per zone, write requests may be requeued in another order than submitted. Restore the request order if requests are requeued. Add RQF_DONTPREP to RQF_NOMERGE_FLAGS because this patch may cause RQF_DONTPREP requests to be sent to the code that checks whether a request can be merged. RQF_DONTPREP requests must not be merged. Cc: Damien Le Moal Cc: Christoph Hellwig Signed-off-by: Bart Van Assche --- block/bfq-iosched.c | 2 ++ block/blk-mq.c | 20 +++++++++++++++++++- block/blk-mq.h | 2 ++ block/kyber-iosched.c | 2 ++ block/mq-deadline.c | 7 ++++++- include/linux/blk-mq.h | 2 +- 6 files changed, 32 insertions(+), 3 deletions(-) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 50e51047e1fe..297a37b38bbb 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -6259,6 +6259,8 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, if (flags & BLK_MQ_INSERT_AT_HEAD) { list_add(&rq->queuelist, &bfqd->dispatch); + } else if (flags & BLK_MQ_INSERT_ORDERED) { + blk_mq_insert_ordered(rq, &bfqd->dispatch); } else if (!bfqq) { list_add_tail(&rq->queuelist, &bfqd->dispatch); } else { diff --git a/block/blk-mq.c b/block/blk-mq.c index 2af9c59ff2ad..7f43138f8a09 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1583,7 +1583,9 @@ static void blk_mq_requeue_work(struct work_struct *work) * already. Insert it into the hctx dispatch list to avoid * block layer merges for the request. */ - if (rq->rq_flags & RQF_DONTPREP) + if (blk_mq_preserve_order(rq)) + blk_mq_insert_request(rq, BLK_MQ_INSERT_ORDERED); + else if (rq->rq_flags & RQF_DONTPREP) blk_mq_request_bypass_insert(rq, 0); else blk_mq_insert_request(rq, BLK_MQ_INSERT_AT_HEAD); @@ -2617,6 +2619,20 @@ static void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, blk_mq_run_hw_queue(hctx, run_queue_async); } +void blk_mq_insert_ordered(struct request *rq, struct list_head *list) +{ + struct request_queue *q = rq->q; + struct request *rq2; + + list_for_each_entry(rq2, list, queuelist) + if (rq2->q == q && blk_rq_pos(rq2) > blk_rq_pos(rq)) + break; + + /* Insert rq before rq2. If rq2 is the list head, append at the end. */ + list_add_tail(&rq->queuelist, &rq2->queuelist); +} +EXPORT_SYMBOL_GPL(blk_mq_insert_ordered); + static void blk_mq_insert_request(struct request *rq, blk_insert_t flags) { struct request_queue *q = rq->q; @@ -2671,6 +2687,8 @@ static void blk_mq_insert_request(struct request *rq, blk_insert_t flags) spin_lock(&ctx->lock); if (flags & BLK_MQ_INSERT_AT_HEAD) list_add(&rq->queuelist, &ctx->rq_lists[hctx->type]); + else if (flags & BLK_MQ_INSERT_ORDERED) + blk_mq_insert_ordered(rq, &ctx->rq_lists[hctx->type]); else list_add_tail(&rq->queuelist, &ctx->rq_lists[hctx->type]); diff --git a/block/blk-mq.h b/block/blk-mq.h index affb2e14b56e..393660311a56 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -40,8 +40,10 @@ enum { typedef unsigned int __bitwise blk_insert_t; #define BLK_MQ_INSERT_AT_HEAD ((__force blk_insert_t)0x01) +#define BLK_MQ_INSERT_ORDERED ((__force blk_insert_t)0x02) void blk_mq_submit_bio(struct bio *bio); +void blk_mq_insert_ordered(struct request *rq, struct list_head *list); int blk_mq_poll(struct request_queue *q, blk_qc_t cookie, struct io_comp_batch *iob, unsigned int flags); void blk_mq_exit_queue(struct request_queue *q); diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c index 70cbc7b2deb4..e1ed9e9206fe 100644 --- a/block/kyber-iosched.c +++ b/block/kyber-iosched.c @@ -591,6 +591,8 @@ static void kyber_insert_requests(struct blk_mq_hw_ctx *hctx, trace_block_rq_insert(rq); if (flags & BLK_MQ_INSERT_AT_HEAD) list_move(&rq->queuelist, head); + else if (flags & BLK_MQ_INSERT_ORDERED) + blk_mq_insert_ordered(rq, head); else list_move_tail(&rq->queuelist, head); sbitmap_set_bit(&khd->kcq_map[sched_domain], diff --git a/block/mq-deadline.c b/block/mq-deadline.c index b9b7cdf1d3c9..1226ad3876ab 100644 --- a/block/mq-deadline.c +++ b/block/mq-deadline.c @@ -686,7 +686,12 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, * set expire time and add to fifo list */ rq->fifo_time = jiffies + dd->fifo_expire[data_dir]; - list_add_tail(&rq->queuelist, &per_prio->fifo_list[data_dir]); + if (flags & BLK_MQ_INSERT_ORDERED) + blk_mq_insert_ordered(rq, + &per_prio->fifo_list[data_dir]); + else + list_add_tail(&rq->queuelist, + &per_prio->fifo_list[data_dir]); } } diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 30d7cd1b0484..1c516151fff0 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -86,7 +86,7 @@ enum rqf_flags { /* flags that prevent us from merging requests: */ #define RQF_NOMERGE_FLAGS \ - (RQF_STARTED | RQF_FLUSH_SEQ | RQF_SPECIAL_PAYLOAD) + (RQF_STARTED | RQF_FLUSH_SEQ | RQF_DONTPREP | RQF_SPECIAL_PAYLOAD) enum mq_rq_state { MQ_RQ_IDLE = 0,