While nr_hw_update allocates tagset tags it acquires ->pcpu_alloc_mutex after ->freeze_lock is acquired or queue is frozen. This potentially creates a circular dependency involving ->fs_reclaim if reclaim is triggered simultaneously in a code path which first acquires ->pcpu_ alloc_mutex. As the queue is already frozen while nr_hw_queue update allocates tagsets, the reclaim can't forward progress and thus it could cause a potential deadlock as reported in lockdep splat[1]. Fix this by pre-allocating tagset tags before we freeze queue during nr_hw_queue update. Later the allocated tagset tags could be safely installed and used after queue is frozen. Reported-by: Yi Zhang Closes: https://lore.kernel.org/all/CAHj4cs8F=OV9s3La2kEQ34YndgfZP-B5PHS4Z8_b9euKG6J4mw@mail.gmail.com/ [1] Signed-off-by: Nilay Shroff --- block/blk-mq.c | 53 +++++++++++++++++++++++++++++++++++--------------- 1 file changed, 37 insertions(+), 16 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index d5602ff62c7c..687fd47786a6 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -4793,10 +4793,10 @@ static void blk_mq_update_queue_map(struct blk_mq_tag_set *set) } } -static int blk_mq_realloc_tag_set_tags(struct blk_mq_tag_set *set, - int new_nr_hw_queues) +static int blk_mq_prealloc_tag_set_tags(struct blk_mq_tag_set *set, + int new_nr_hw_queues, struct blk_mq_tags ***tags) { - struct blk_mq_tags **new_tags; + struct blk_mq_tags **new_tags = NULL; int i; if (set->nr_hw_queues >= new_nr_hw_queues) @@ -4807,24 +4807,42 @@ static int blk_mq_realloc_tag_set_tags(struct blk_mq_tag_set *set, if (!new_tags) return -ENOMEM; - if (set->tags) - memcpy(new_tags, set->tags, set->nr_hw_queues * - sizeof(*set->tags)); - kfree(set->tags); - set->tags = new_tags; - for (i = set->nr_hw_queues; i < new_nr_hw_queues; i++) { - if (!__blk_mq_alloc_map_and_rqs(set, i)) { - while (--i >= set->nr_hw_queues) - __blk_mq_free_map_and_rqs(set, i); - return -ENOMEM; + if (blk_mq_is_shared_tags(set->flags)) + new_tags[i] = set->shared_tags; + else { + new_tags[i] = blk_mq_alloc_map_and_rqs(set, i, + set->queue_depth); + if (!new_tags[i]) + goto out_unwind; } cond_resched(); } done: - set->nr_hw_queues = new_nr_hw_queues; + *tags = new_tags; return 0; +out_unwind: + while (--i >= set->nr_hw_queues) { + if (!blk_mq_is_shared_tags(set->flags)) + blk_mq_free_map_and_rqs(set, new_tags[i], i); + } + return -ENOMEM; +} + +static void blk_mq_init_tag_set_tags(struct blk_mq_tag_set *set, + int new_nr_hw_queues, struct blk_mq_tags **new_tags) +{ + if (set->nr_hw_queues >= new_nr_hw_queues) + goto done; + + if (set->tags) + memcpy(new_tags, set->tags, set->nr_hw_queues * + sizeof(*set->tags)); + kfree(set->tags); + set->tags = new_tags; +done: + set->nr_hw_queues = new_nr_hw_queues; } /* @@ -5113,6 +5131,7 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, unsigned int memflags; int i; struct xarray elv_tbl; + struct blk_mq_tags **new_tags; bool queues_frozen = false; lockdep_assert_held(&set->tag_list_lock); @@ -5147,11 +5166,13 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, if (blk_mq_elv_switch_none(q, &elv_tbl)) goto switch_back; + if (blk_mq_prealloc_tag_set_tags(set, nr_hw_queues, &new_tags) < 0) + goto switch_back; + list_for_each_entry(q, &set->tag_list, tag_set_list) blk_mq_freeze_queue_nomemsave(q); queues_frozen = true; - if (blk_mq_realloc_tag_set_tags(set, nr_hw_queues) < 0) - goto switch_back; + blk_mq_init_tag_set_tags(set, nr_hw_queues, new_tags); fallback: blk_mq_update_queue_map(set); -- 2.52.0