When blk_mq_flush_plug_list() dispatches requests, stacking drivers (like loop with NOWAIT support) may submit bios to backing devices from within their queue_rq() callback. These backing device IOs can add new requests back to the same plug->mq_list that's currently being iterated, causing list corruption. The corruption occurs in this call path: submit_bio() with active plug blk_mq_submit_bio() blk_add_rq_to_plug() - adds loop request if plug is full: blk_mq_flush_plug_list(plug, false) blk_mq_dispatch_list(&plug->mq_list, ...) rq_list_pop(&plug->mq_list) # Iterating loop_queue_rq() lo_rw_aio_nowait() file->f_op->write_iter() # Backing file IO submit_bio() # New bio! blk_mq_submit_bio() blk_add_rq_to_plug() rq_list_add_tail(&plug->mq_list, rq) # CORRUPTION! Fix this by extracting the dispatch logic into __blk_mq_flush_plug_list() and making blk_mq_flush_plug_list() loop: 1. Save plug state (rq_count, has_elevator, multiple_queues) 2. Reset plug state to allow new requests 3. Swap plug->mq_list to a local list 4. Dispatch the local list via __blk_mq_flush_plug_list() 5. Repeat if new requests were added during dispatch This ensures: - No list corruption: each iteration works on a detached copy - Complete flush: keeps flushing until no new requests are added - Handles nesting: recursive calls see rq_count=0 and return early Signed-off-by: Ming Lei --- block/blk-mq.c | 64 +++++++++++++++++++++++++++++++++++++------------- 1 file changed, 48 insertions(+), 16 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index f2650c97a75e..128c2bc28d94 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2955,10 +2955,30 @@ static void blk_mq_dispatch_multiple_queue_requests(struct rq_list *rqs) } while (!rq_list_empty(rqs)); } +static void __blk_mq_flush_plug_list(struct rq_list *list, + unsigned int depth, + bool has_elevator, + bool multiple_queues, + bool from_schedule) +{ + if (!has_elevator && !from_schedule) { + if (multiple_queues) { + blk_mq_dispatch_multiple_queue_requests(list); + return; + } + + blk_mq_dispatch_queue_requests(list, depth); + if (rq_list_empty(list)) + return; + } + + do { + blk_mq_dispatch_list(list, from_schedule); + } while (!rq_list_empty(list)); +} + void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule) { - unsigned int depth; - /* * We may have been called recursively midway through handling * plug->mq_list via a schedule() in the driver's queue_rq() callback. @@ -2968,23 +2988,35 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule) */ if (plug->rq_count == 0) return; - depth = plug->rq_count; - plug->rq_count = 0; - if (!plug->has_elevator && !from_schedule) { - if (plug->multiple_queues) { - blk_mq_dispatch_multiple_queue_requests(&plug->mq_list); - return; - } + /* + * Flush requests in a loop to handle cases where queue_rq() callback + * adds new requests back to the plug (e.g., stacking drivers like loop + * submitting bios to backing devices). Keep flushing until no new + * requests are added. + */ + do { + struct rq_list reqs; + unsigned int depth; + bool has_elevator, multiple_queues; - blk_mq_dispatch_queue_requests(&plug->mq_list, depth); - if (rq_list_empty(&plug->mq_list)) - return; - } + depth = plug->rq_count; + plug->rq_count = 0; + has_elevator = plug->has_elevator; + plug->has_elevator = false; + multiple_queues = plug->multiple_queues; + plug->multiple_queues = false; - do { - blk_mq_dispatch_list(&plug->mq_list, from_schedule); - } while (!rq_list_empty(&plug->mq_list)); + /* + * Swap plug->mq_list to a local list to allow new requests + * being added to plug->mq_list during dispatching. + */ + reqs = plug->mq_list; + rq_list_init(&plug->mq_list); + + __blk_mq_flush_plug_list(&reqs, depth, has_elevator, + multiple_queues, from_schedule); + } while (plug->rq_count > 0); } static void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx, -- 2.47.0