Software that submits zoned writes, e.g. a filesystem, may submit zoned writes from multiple CPU cores as long as the zoned writes are serialized per zone. Submitting bios from different CPUs may cause bio reordering if e.g. different bios reach the storage device through different queues. Prepare for preserving the order of pipelined zoned writes per zone by adding the 'rq_cpu` argument to blk_zone_plug_bio(). This argument tells blk_zone_plug_bio() from which CPU a cached request has been allocated. The cached request will only be used if it matches the CPU from which zoned writes are being submitted for the zone associated with the bio. Cc: Damien Le Moal Cc: Christoph Hellwig Signed-off-by: Bart Van Assche --- block/blk-mq.c | 7 +++---- block/blk-zoned.c | 5 ++++- drivers/md/dm.c | 5 ++--- include/linux/blkdev.h | 5 +++-- 4 files changed, 12 insertions(+), 10 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 33b639653b5d..31c0db1fc217 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -3219,10 +3219,9 @@ void blk_mq_submit_bio(struct bio *bio) if (blk_mq_attempt_bio_merge(q, bio, nr_segs)) goto queue_exit; - if (bio_needs_zone_write_plugging(bio)) { - if (blk_zone_plug_bio(bio, nr_segs)) - goto queue_exit; - } + if (bio_needs_zone_write_plugging(bio) && + blk_zone_plug_bio(bio, nr_segs, rq ? rq->mq_ctx->cpu : -1)) + goto queue_exit; new_request: if (rq) { diff --git a/block/blk-zoned.c b/block/blk-zoned.c index 1b5923c1a149..dfc77fc44837 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -1127,6 +1127,9 @@ static void blk_zone_wplug_handle_native_zone_append(struct bio *bio) * blk_zone_plug_bio - Handle a zone write BIO with zone write plugging * @bio: The BIO being submitted * @nr_segs: The number of physical segments of @bio + * @rq_cpu: software queue onto which a request will be queued. -1 if the caller + * has not yet decided onto which software queue to queue the request or if + * the bio won't be converted into a request. * * Handle write, write zeroes and zone append operations requiring emulation * using zone write plugging. @@ -1135,7 +1138,7 @@ static void blk_zone_wplug_handle_native_zone_append(struct bio *bio) * write plug. Otherwise, return false to let the submission path process * @bio normally. */ -bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs) +bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs, int rq_cpu) { struct block_device *bdev = bio->bi_bdev; diff --git a/drivers/md/dm.c b/drivers/md/dm.c index a44e8c2dccee..cd0ec4a39b4d 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1804,9 +1804,8 @@ static inline bool dm_zone_bio_needs_split(struct bio *bio) static inline bool dm_zone_plug_bio(struct mapped_device *md, struct bio *bio) { - if (!bio_needs_zone_write_plugging(bio)) - return false; - return blk_zone_plug_bio(bio, 0); + return bio_needs_zone_write_plugging(bio) && + blk_zone_plug_bio(bio, 0, -1); } static blk_status_t __send_zone_reset_all_emulated(struct clone_info *ci, diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 2c2579d4b7ed..88fdbd6b1ac0 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -904,7 +904,7 @@ static inline bool bio_needs_zone_write_plugging(struct bio *bio) } } -bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs); +bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs, int rq_cpu); /** * disk_zone_capacity - returns the zone capacity of zone containing @sector @@ -944,7 +944,8 @@ static inline bool bio_needs_zone_write_plugging(struct bio *bio) return false; } -static inline bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs) +static inline bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs, + int rq_cpu) { return false; }