One of the optimizations in the block layer is that the software queues are bypassed if it is expected that the block driver will accept a request. This can cause request reordering even for requests submitted from the same CPU core. This patch preserves the order for sequential zoned writes submitted from a given CPU core by always inserting these requests into the appropriate software queue. Cc: Damien Le Moal Cc: Christoph Hellwig Signed-off-by: Bart Van Assche --- block/blk-mq.c | 35 +++++++++++++++++++++++++++++++++-- include/linux/blk-mq.h | 11 +++++++++++ include/linux/blkdev.h | 17 +++++++++++++++++ 3 files changed, 61 insertions(+), 2 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index b2fdeaac0efb..e7958cfddfbf 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1549,6 +1549,35 @@ void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list) } EXPORT_SYMBOL(blk_mq_requeue_request); +/* + * Whether the block layer should preserve the order of @rq relative to other + * requests submitted to the same software queue. + */ +static bool blk_mq_preserve_order(struct request *rq) +{ + return blk_pipeline_zwr(rq->q) && blk_rq_is_seq_zoned_write(rq); +} + +/* + * Whether the order should be preserved for any request in @list. Returns %true + * if and only if zoned write pipelining is enabled and if there are any + * sequential zoned writes in @list. + */ +static bool blk_mq_preserve_order_for_list(struct request_queue *q, + struct list_head *list) +{ + struct request *rq; + + if (!blk_pipeline_zwr(q)) + return false; + + list_for_each_entry(rq, list, queuelist) + if (blk_rq_is_seq_zoned_write(rq)) + return true; + + return false; +} + static void blk_mq_requeue_work(struct work_struct *work) { struct request_queue *q = @@ -2578,7 +2607,8 @@ static void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, * Try to issue requests directly if the hw queue isn't busy to save an * extra enqueue & dequeue to the sw queue. */ - if (!hctx->dispatch_busy && !run_queue_async) { + if (!hctx->dispatch_busy && !run_queue_async && + !blk_mq_preserve_order_for_list(hctx->queue, list)) { blk_mq_run_dispatch_ops(hctx->queue, blk_mq_try_issue_list_directly(hctx, list)); if (list_empty(list)) @@ -3230,7 +3260,8 @@ void blk_mq_submit_bio(struct bio *bio) hctx = rq->mq_hctx; if ((rq->rq_flags & RQF_USE_SCHED) || - (hctx->dispatch_busy && (q->nr_hw_queues == 1 || !is_sync))) { + (hctx->dispatch_busy && (q->nr_hw_queues == 1 || !is_sync)) || + blk_mq_preserve_order(rq)) { blk_mq_insert_request(rq, 0); blk_mq_run_hw_queue(hctx, true); } else { diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index b54506b3b76d..b88b870aaf8f 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -1211,4 +1211,15 @@ static inline int blk_rq_map_sg(struct request *rq, struct scatterlist *sglist) } void blk_dump_rq_flags(struct request *, char *); +static inline bool blk_rq_is_seq_zoned_write(struct request *rq) +{ + switch (req_op(rq)) { + case REQ_OP_WRITE: + case REQ_OP_WRITE_ZEROES: + return bdev_zone_is_seq(rq->q->disk->part0, blk_rq_pos(rq)); + default: + return false; + } +} + #endif /* BLK_MQ_H */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index b97132252ec2..f18523e841a4 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -865,6 +865,18 @@ static inline unsigned int disk_nr_zones(struct gendisk *disk) return disk->nr_zones; } +/* + * blk_pipeline_zwr() - Whether or not sequential zoned writes will be + * pipelined per zone. + * @q: request queue pointer. + * + * Return: %true if and only if zoned writes will be pipelined per zone. + */ +static inline bool blk_pipeline_zwr(struct request_queue *q) +{ + return q->limits.features & BLK_FEAT_ORDERED_HWQ; +} + /** * bio_needs_zone_write_plugging - Check if a BIO needs to be handled with zone * write plugging @@ -951,6 +963,11 @@ static inline bool bdev_zone_is_seq(struct block_device *bdev, sector_t sector) return false; } +static inline bool blk_pipeline_zwr(struct request_queue *q) +{ + return false; +} + static inline bool bio_needs_zone_write_plugging(struct bio *bio) { return false;