When loop device is configured with DIRECT_IO, NOWAIT IOs can hang because of a deadlock scenario: - loop IO can be throttled by rqos - backing file IOs are always added to tail of per-task ->bio_list When loop IO is throttled, the dependent backing file IO cannot be handled, resulting in deadlock. Introduce BD_LOWLEVEL_BIO_FIRST flag and apply it for loop DIRECT_IO mode, so block layer goes through __submit_bio_noacct() in which the low level IOs are handled first. This ensures proper bio submission order and prevents NOWAIT IO hangs in loop direct I/O mode. Fixes: 0ba93a906dda ("loop: try to handle loop aio command via NOWAIT IO first") Signed-off-by: Ming Lei --- block/blk-core.c | 3 ++- drivers/block/loop.c | 12 ++++++++++++ include/linux/blk_types.h | 1 + 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/block/blk-core.c b/block/blk-core.c index 14ae73eebe0d..3ae9eebc9fab 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -751,7 +751,8 @@ void submit_bio_noacct_nocheck(struct bio *bio, bool split) bio_list_add_head(¤t->bio_list[0], bio); else bio_list_add(¤t->bio_list[0], bio); - } else if (!bdev_test_flag(bio->bi_bdev, BD_HAS_SUBMIT_BIO)) { + } else if (!bdev_test_flag(bio->bi_bdev, BD_HAS_SUBMIT_BIO | + BD_LOWLEVEL_BIO_FIRST)) { __submit_bio_noacct_mq(bio); } else { __submit_bio_noacct(bio); diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 705373b9668d..d305622568ed 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -208,6 +208,18 @@ static inline void loop_update_dio(struct loop_device *lo) if ((lo->lo_flags & LO_FLAGS_DIRECT_IO) && !lo_can_use_dio(lo)) lo->lo_flags &= ~LO_FLAGS_DIRECT_IO; + + /* + * NOWAIT is applied for direct IO mode, so backing file IOs are + * submitted in loop IO context. BD_LOWLEVEL_BIO_FIRST has to be + * set for avoiding IO deadlock which is triggered when loop IO + * and backing file IO are reordered, meanwhile loop IO is throttled + * by block layer RQOS. + */ + if (lo->lo_flags & LO_FLAGS_DIRECT_IO) + bdev_set_flag(lo->lo_disk->part0, BD_LOWLEVEL_BIO_FIRST); + else + bdev_clear_flag(lo->lo_disk->part0, BD_LOWLEVEL_BIO_FIRST); } /** diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index cbbcb9051ec3..71b96f17ca27 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -54,6 +54,7 @@ struct block_device { #ifdef CONFIG_FAIL_MAKE_REQUEST #define BD_MAKE_IT_FAIL (1u<<12) #endif +#define BD_LOWLEVEL_BIO_FIRST (1u<<13) dev_t bd_dev; struct address_space *bd_mapping; /* page cache */ -- 2.47.0