Add support to submit a bio per-path. In addition, for failover, add support to requeue a failed bio. NVMe has almost like-for-like equivalents here: - nvme_available_path() -> mpath_available_path() - nvme_requeue_work() -> mpath_requeue_work() - nvme_ns_head_submit_bio() -> mpath_bdev_submit_bio() For failover, a driver may want to re-submit a bio, so add support to clone a bio prior to submission. A bio which is submitted to a per-path device has flag REQ_MPATH set, same as what is done for NVMe with REQ_NVME_MPATH. Signed-off-by: John Garry --- include/linux/multipath.h | 15 +++++++ lib/multipath.c | 92 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 106 insertions(+), 1 deletion(-) diff --git a/include/linux/multipath.h b/include/linux/multipath.h index c964a1aba9c42..d557fb9bab4c9 100644 --- a/include/linux/multipath.h +++ b/include/linux/multipath.h @@ -3,6 +3,7 @@ #define _LIBMULTIPATH_H #include +#include #include extern const struct block_device_operations mpath_ops; @@ -40,10 +41,12 @@ struct mpath_device { }; struct mpath_head_template { + bool (*available_path)(struct mpath_device *, bool *); bool (*is_disabled)(struct mpath_device *); bool (*is_optimized)(struct mpath_device *); enum mpath_access_state (*get_access_state)(struct mpath_device *); enum mpath_iopolicy_e (*get_iopolicy)(struct mpath_head *); + struct bio *(*clone_bio)(struct bio *); const struct attribute_group **device_groups; }; @@ -56,12 +59,23 @@ struct mpath_head { struct kref ref; + struct bio_list requeue_list; /* list for requeing bio */ + spinlock_t requeue_lock; + struct work_struct requeue_work; /* work struct for requeue */ + unsigned long flags; struct mpath_device __rcu *current_path[MAX_NUMNODES]; const struct mpath_head_template *mpdt; void *drvdata; }; +#define REQ_MPATH REQ_DRV + +static inline bool is_mpath_request(struct request *req) +{ + return req->cmd_flags & REQ_MPATH; +} + static inline struct mpath_disk *mpath_bd_device_to_disk(struct device *dev) { return dev_get_drvdata(dev); @@ -82,6 +96,7 @@ int mpath_set_iopolicy(const char *val, int *iopolicy); int mpath_get_iopolicy(char *buf, int iopolicy); int mpath_get_head(struct mpath_head *mpath_head); void mpath_put_head(struct mpath_head *mpath_head); +void mpath_requeue_work(struct work_struct *work); struct mpath_head *mpath_alloc_head(void); void mpath_put_disk(struct mpath_disk *mpath_disk); void mpath_remove_disk(struct mpath_disk *mpath_disk); diff --git a/lib/multipath.c b/lib/multipath.c index 65a0d2d2bf524..b494b35e8dccc 100644 --- a/lib/multipath.c +++ b/lib/multipath.c @@ -5,6 +5,7 @@ */ #include #include +#include static struct mpath_device *mpath_find_path(struct mpath_head *mpath_head); @@ -227,7 +228,6 @@ static struct mpath_device *mpath_numa_path(struct mpath_head *mpath_head, return mpath_device; } -__maybe_unused static struct mpath_device *mpath_find_path(struct mpath_head *mpath_head) { enum mpath_iopolicy_e iopolicy = @@ -243,6 +243,66 @@ static struct mpath_device *mpath_find_path(struct mpath_head *mpath_head) } } +static bool mpath_available_path(struct mpath_head *mpath_head) +{ + struct mpath_device *mpath_device; + + if (!test_bit(MPATH_HEAD_DISK_LIVE, &mpath_head->flags)) + return false; + + list_for_each_entry_srcu(mpath_device, &mpath_head->dev_list, siblings, + srcu_read_lock_held(&mpath_head->srcu)) { + bool available = false; + + if (!mpath_head->mpdt->available_path(mpath_device, + &available)) + continue; + if (available) + return true; + } + + return false; +} + +static void mpath_bdev_submit_bio(struct bio *bio) +{ + struct mpath_disk *mpath_disk = bio->bi_bdev->bd_disk->private_data; + struct mpath_head *mpath_head = mpath_disk->mpath_head; + struct device *dev = mpath_disk->parent; + struct mpath_device *mpath_device; + int srcu_idx; + + bio = bio_split_to_limits(bio); + if (!bio) + return; + + srcu_idx = srcu_read_lock(&mpath_head->srcu); + mpath_device = mpath_find_path(mpath_head); + + if (likely(mpath_device)) { + bio->bi_opf |= REQ_MPATH; + if (mpath_head->mpdt->clone_bio) + bio = mpath_head->mpdt->clone_bio(bio); + trace_block_bio_remap(bio, disk_devt(mpath_device->disk), + bio->bi_iter.bi_sector); + bio_set_dev(bio, mpath_device->disk->part0); + + submit_bio_noacct(bio); + } else if (mpath_available_path(mpath_head)) { + dev_warn_ratelimited(dev, "no usable path - requeuing I/O\n"); + + spin_lock_irq(&mpath_head->requeue_lock); + bio_list_add(&mpath_head->requeue_list, bio); + spin_unlock_irq(&mpath_head->requeue_lock); + } else { + dev_warn_ratelimited(dev, "no available path - failing I/O\n"); + + bio_io_error(bio); + } + + srcu_read_unlock(&mpath_head->srcu, srcu_idx); +} + static void mpath_free_head(struct kref *ref) { struct mpath_head *mpath_head = @@ -310,6 +370,7 @@ const struct block_device_operations mpath_ops = { .owner = THIS_MODULE, .open = mpath_bdev_open, .release = mpath_bdev_release, + .submit_bio = mpath_bdev_submit_bio, }; EXPORT_SYMBOL_GPL(mpath_ops); @@ -327,6 +388,24 @@ static void multipath_partition_scan_work(struct work_struct *work) mutex_unlock(&mpath_disk->disk->open_mutex); } +void mpath_requeue_work(struct work_struct *work) +{ + struct mpath_head *mpath_head = + container_of(work, struct mpath_head, requeue_work); + struct bio *bio, *next; + + spin_lock_irq(&mpath_head->requeue_lock); + next = bio_list_get(&mpath_head->requeue_list); + spin_unlock_irq(&mpath_head->requeue_lock); + + while ((bio = next) != NULL) { + next = bio->bi_next; + bio->bi_next = NULL; + submit_bio_noacct(bio); + } +} +EXPORT_SYMBOL_GPL(mpath_requeue_work); + void mpath_remove_disk(struct mpath_disk *mpath_disk) { struct mpath_head *mpath_head = mpath_disk->mpath_head; @@ -334,6 +413,12 @@ void mpath_remove_disk(struct mpath_disk *mpath_disk) if (test_and_clear_bit(MPATH_HEAD_DISK_LIVE, &mpath_head->flags)) { struct gendisk *disk = mpath_disk->disk; + /* + * requeue I/O after MPATH_HEAD_DISK_LIVE has been cleared + * to allow multipath to fail all I/O. + */ + kblockd_schedule_work(&mpath_head->requeue_work); + mpath_synchronize(mpath_head); del_gendisk(disk); } @@ -409,6 +494,7 @@ void mpath_device_set_live(struct mpath_disk *mpath_disk, mutex_unlock(&mpath_head->lock); mpath_synchronize(mpath_head); + kblockd_schedule_work(&mpath_head->requeue_work); } EXPORT_SYMBOL_GPL(mpath_device_set_live); @@ -424,6 +510,10 @@ struct mpath_head *mpath_alloc_head(void) mutex_init(&mpath_head->lock); kref_init(&mpath_head->ref); + INIT_WORK(&mpath_head->requeue_work, mpath_requeue_work); + spin_lock_init(&mpath_head->requeue_lock); + bio_list_init(&mpath_head->requeue_list); + ret = init_srcu_struct(&mpath_head->srcu); if (ret) { kfree(mpath_head); -- 2.43.5