queue should not be freezed under rq_qos_mutex, see example index commit 9730763f4756 ("block: correct locking order for protecting blk-wbt parameters"), which means current implementation of rq_qos_add() is problematic. Add a new helper and prepare to fix this problem in following patches. Signed-off-by: Yu Kuai --- block/blk-rq-qos.c | 27 +++++++++++++++++++++++++++ block/blk-rq-qos.h | 2 ++ 2 files changed, 29 insertions(+) diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c index 654478dfbc20..353397d7e126 100644 --- a/block/blk-rq-qos.c +++ b/block/blk-rq-qos.c @@ -322,6 +322,33 @@ void rq_qos_exit(struct request_queue *q) mutex_unlock(&q->rq_qos_mutex); } +int rq_qos_add_freezed(struct rq_qos *rqos, struct gendisk *disk, + enum rq_qos_id id, const struct rq_qos_ops *ops) +{ + struct request_queue *q = disk->queue; + + WARN_ON_ONCE(q->mq_freeze_depth == 0); + lockdep_assert_held(&q->rq_qos_mutex); + + if (rq_qos_id(q, id)) + return -EBUSY; + + rqos->disk = disk; + rqos->id = id; + rqos->ops = ops; + rqos->next = q->rq_qos; + q->rq_qos = rqos; + blk_queue_flag_set(QUEUE_FLAG_QOS_ENABLED, q); + + if (rqos->ops->debugfs_attrs) { + mutex_lock(&q->debugfs_mutex); + blk_mq_debugfs_register_rqos(rqos); + mutex_unlock(&q->debugfs_mutex); + } + + return 0; +} + int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id, const struct rq_qos_ops *ops) { diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h index b538f2c0febc..4a7fec01600b 100644 --- a/block/blk-rq-qos.h +++ b/block/blk-rq-qos.h @@ -87,6 +87,8 @@ static inline void rq_wait_init(struct rq_wait *rq_wait) int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id, const struct rq_qos_ops *ops); +int rq_qos_add_freezed(struct rq_qos *rqos, struct gendisk *disk, + enum rq_qos_id id, const struct rq_qos_ops *ops); void rq_qos_del(struct rq_qos *rqos); typedef bool (acquire_inflight_cb_t)(struct rq_wait *rqw, void *private_data); -- 2.51.0 wbt_init() can be called from sysfs attribute and wbt_enable_default(), however the lock order are inversely. - queue_wb_lat_store() freeze queue first, and then wbt_init() hold rq_qos_mutex. In this case queue will be freezed again inside rq_qos_add(), however, in this case freeze queue recursivly is inoperative; - wbt_enable_default() from elevator switch will hold rq_qos_mutex first, and then rq_qos_add() will freeze queue; Fix this problem by converting to use new helper rq_qos_add_freezed() in wbt_init(), and for wbt_enable_default(), freeze queue before calling wbt_init(). Fixes: a13bd91be223 ("block/rq_qos: protect rq_qos apis with a new lock") Signed-off-by: Yu Kuai --- block/blk-wbt.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/block/blk-wbt.c b/block/blk-wbt.c index eb8037bae0bd..a784f6d338b4 100644 --- a/block/blk-wbt.c +++ b/block/blk-wbt.c @@ -724,8 +724,12 @@ void wbt_enable_default(struct gendisk *disk) if (!blk_queue_registered(q)) return; - if (queue_is_mq(q) && enable) + if (queue_is_mq(q) && enable) { + unsigned int memflags = blk_mq_freeze_queue(q); + wbt_init(disk); + blk_mq_unfreeze_queue(q, memflags); + } } EXPORT_SYMBOL_GPL(wbt_enable_default); @@ -922,7 +926,7 @@ int wbt_init(struct gendisk *disk) * Assign rwb and add the stats callback. */ mutex_lock(&q->rq_qos_mutex); - ret = rq_qos_add(&rwb->rqos, disk, RQ_QOS_WBT, &wbt_rqos_ops); + ret = rq_qos_add_freezed(&rwb->rqos, disk, RQ_QOS_WBT, &wbt_rqos_ops); mutex_unlock(&q->rq_qos_mutex); if (ret) goto err_free; -- 2.51.0 Like wbt, rq_qos_add() can be called from two path and the lock order are inversely: - From ioc_qos_write(), queue is already freezed before rq_qos_add(); - From ioc_cost_model_write(), rq_qos_add() is called directly; Fix this problem by converting to use blkg_conf_open_bdev_frozen() from ioc_cost_model_write(), then since all rq_qos_add() callers already freeze queue, convert to use rq_qos_add_freezed. Signed-off-by: Yu Kuai --- block/blk-iocost.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/block/blk-iocost.c b/block/blk-iocost.c index 5bfd70311359..233c9749bfc9 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -2927,7 +2927,7 @@ static int blk_iocost_init(struct gendisk *disk) * called before policy activation completion, can't assume that the * target bio has an iocg associated and need to test for NULL iocg. */ - ret = rq_qos_add(&ioc->rqos, disk, RQ_QOS_COST, &ioc_rqos_ops); + ret = rq_qos_add_freezed(&ioc->rqos, disk, RQ_QOS_COST, &ioc_rqos_ops); if (ret) goto err_free_ioc; @@ -3410,7 +3410,7 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input, { struct blkg_conf_ctx ctx; struct request_queue *q; - unsigned int memflags; + unsigned long memflags; struct ioc *ioc; u64 u[NR_I_LCOEFS]; bool user; @@ -3419,9 +3419,11 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input, blkg_conf_init(&ctx, input); - ret = blkg_conf_open_bdev(&ctx); - if (ret) + memflags = blkg_conf_open_bdev_frozen(&ctx); + if (IS_ERR_VALUE(memflags)) { + ret = memflags; goto err; + } body = ctx.body; q = bdev_get_queue(ctx.bdev); @@ -3438,7 +3440,6 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input, ioc = q_to_ioc(q); } - memflags = blk_mq_freeze_queue(q); blk_mq_quiesce_queue(q); spin_lock_irq(&ioc->lock); @@ -3490,20 +3491,18 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input, spin_unlock_irq(&ioc->lock); blk_mq_unquiesce_queue(q); - blk_mq_unfreeze_queue(q, memflags); - blkg_conf_exit(&ctx); + blkg_conf_exit_frozen(&ctx, memflags); return nbytes; einval: spin_unlock_irq(&ioc->lock); blk_mq_unquiesce_queue(q); - blk_mq_unfreeze_queue(q, memflags); ret = -EINVAL; err: - blkg_conf_exit(&ctx); + blkg_conf_exit_frozen(&ctx, memflags); return ret; } -- 2.51.0 Currently blk-iolatency will hold rq_qos_mutex first and then call rq_qos_add() to freeze queue. Fix this problem by converting to use blkg_conf_open_bdev_frozen() from iolatency_set_limit(), and convert to use rq_qos_add_freezed(). Signed-off-by: Yu Kuai --- block/blk-iolatency.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c index 45bd18f68541..1565352b176d 100644 --- a/block/blk-iolatency.c +++ b/block/blk-iolatency.c @@ -764,8 +764,8 @@ static int blk_iolatency_init(struct gendisk *disk) if (!blkiolat) return -ENOMEM; - ret = rq_qos_add(&blkiolat->rqos, disk, RQ_QOS_LATENCY, - &blkcg_iolatency_ops); + ret = rq_qos_add_freezed(&blkiolat->rqos, disk, RQ_QOS_LATENCY, + &blkcg_iolatency_ops); if (ret) goto err_free; ret = blkcg_activate_policy(disk, &blkcg_policy_iolatency); @@ -831,16 +831,19 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf, struct blkcg_gq *blkg; struct blkg_conf_ctx ctx; struct iolatency_grp *iolat; + unsigned long memflags; char *p, *tok; u64 lat_val = 0; u64 oldval; - int ret; + int ret = 0; blkg_conf_init(&ctx, buf); - ret = blkg_conf_open_bdev(&ctx); - if (ret) + memflags = blkg_conf_open_bdev_frozen(&ctx); + if (IS_ERR_VALUE(memflags)) { + ret = memflags; goto out; + } /* * blk_iolatency_init() may fail after rq_qos_add() succeeds which can @@ -890,7 +893,7 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf, iolatency_clear_scaling(blkg); ret = 0; out: - blkg_conf_exit(&ctx); + blkg_conf_exit_frozen(&ctx, memflags); return ret ?: nbytes; } -- 2.51.0 Now that there is no caller of rq_qos_add(), remove it, and also rename rq_qos_add_freezed() back to rq_qos_add(). Signed-off-by: Yu Kuai --- block/blk-iocost.c | 2 +- block/blk-iolatency.c | 4 ++-- block/blk-rq-qos.c | 42 ++---------------------------------------- block/blk-rq-qos.h | 6 ++---- block/blk-wbt.c | 2 +- 5 files changed, 8 insertions(+), 48 deletions(-) diff --git a/block/blk-iocost.c b/block/blk-iocost.c index 233c9749bfc9..0948f628386f 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -2927,7 +2927,7 @@ static int blk_iocost_init(struct gendisk *disk) * called before policy activation completion, can't assume that the * target bio has an iocg associated and need to test for NULL iocg. */ - ret = rq_qos_add_freezed(&ioc->rqos, disk, RQ_QOS_COST, &ioc_rqos_ops); + ret = rq_qos_add(&ioc->rqos, disk, RQ_QOS_COST, &ioc_rqos_ops); if (ret) goto err_free_ioc; diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c index 1565352b176d..5b18125e21c9 100644 --- a/block/blk-iolatency.c +++ b/block/blk-iolatency.c @@ -764,8 +764,8 @@ static int blk_iolatency_init(struct gendisk *disk) if (!blkiolat) return -ENOMEM; - ret = rq_qos_add_freezed(&blkiolat->rqos, disk, RQ_QOS_LATENCY, - &blkcg_iolatency_ops); + ret = rq_qos_add(&blkiolat->rqos, disk, RQ_QOS_LATENCY, + &blkcg_iolatency_ops); if (ret) goto err_free; ret = blkcg_activate_policy(disk, &blkcg_policy_iolatency); diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c index 353397d7e126..3a49af00b738 100644 --- a/block/blk-rq-qos.c +++ b/block/blk-rq-qos.c @@ -322,8 +322,8 @@ void rq_qos_exit(struct request_queue *q) mutex_unlock(&q->rq_qos_mutex); } -int rq_qos_add_freezed(struct rq_qos *rqos, struct gendisk *disk, - enum rq_qos_id id, const struct rq_qos_ops *ops) +int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, + enum rq_qos_id id, const struct rq_qos_ops *ops) { struct request_queue *q = disk->queue; @@ -349,44 +349,6 @@ int rq_qos_add_freezed(struct rq_qos *rqos, struct gendisk *disk, return 0; } -int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id, - const struct rq_qos_ops *ops) -{ - struct request_queue *q = disk->queue; - unsigned int memflags; - - lockdep_assert_held(&q->rq_qos_mutex); - - rqos->disk = disk; - rqos->id = id; - rqos->ops = ops; - - /* - * No IO can be in-flight when adding rqos, so freeze queue, which - * is fine since we only support rq_qos for blk-mq queue. - */ - memflags = blk_mq_freeze_queue(q); - - if (rq_qos_id(q, rqos->id)) - goto ebusy; - rqos->next = q->rq_qos; - q->rq_qos = rqos; - blk_queue_flag_set(QUEUE_FLAG_QOS_ENABLED, q); - - blk_mq_unfreeze_queue(q, memflags); - - if (rqos->ops->debugfs_attrs) { - mutex_lock(&q->debugfs_mutex); - blk_mq_debugfs_register_rqos(rqos); - mutex_unlock(&q->debugfs_mutex); - } - - return 0; -ebusy: - blk_mq_unfreeze_queue(q, memflags); - return -EBUSY; -} - void rq_qos_del(struct rq_qos *rqos) { struct request_queue *q = rqos->disk->queue; diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h index 4a7fec01600b..8bbf178c16b0 100644 --- a/block/blk-rq-qos.h +++ b/block/blk-rq-qos.h @@ -85,10 +85,8 @@ static inline void rq_wait_init(struct rq_wait *rq_wait) init_waitqueue_head(&rq_wait->wait); } -int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id, - const struct rq_qos_ops *ops); -int rq_qos_add_freezed(struct rq_qos *rqos, struct gendisk *disk, - enum rq_qos_id id, const struct rq_qos_ops *ops); +int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, + enum rq_qos_id id, const struct rq_qos_ops *ops); void rq_qos_del(struct rq_qos *rqos); typedef bool (acquire_inflight_cb_t)(struct rq_wait *rqw, void *private_data); diff --git a/block/blk-wbt.c b/block/blk-wbt.c index a784f6d338b4..d7f1e6ba1790 100644 --- a/block/blk-wbt.c +++ b/block/blk-wbt.c @@ -926,7 +926,7 @@ int wbt_init(struct gendisk *disk) * Assign rwb and add the stats callback. */ mutex_lock(&q->rq_qos_mutex); - ret = rq_qos_add_freezed(&rwb->rqos, disk, RQ_QOS_WBT, &wbt_rqos_ops); + ret = rq_qos_add(&rwb->rqos, disk, RQ_QOS_WBT, &wbt_rqos_ops); mutex_unlock(&q->rq_qos_mutex); if (ret) goto err_free; -- 2.51.0