Add a macro for the max queue depth which is supported. Signed-off-by: John Garry --- drivers/scsi/scsi.c | 2 +- drivers/scsi/scsi_priv.h | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index 76cdad063f7bc..28c9bbf439db6 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -204,7 +204,7 @@ void scsi_finish_command(struct scsi_cmnd *cmd) */ int scsi_device_max_queue_depth(struct scsi_device *sdev) { - return min_t(int, sdev->host->can_queue, 4096); + return min_t(int, sdev->host->can_queue, SCSI_MAX_QUEUE_DEPTH); } /** diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h index d07ec15d6c002..679752c5f8bba 100644 --- a/drivers/scsi/scsi_priv.h +++ b/drivers/scsi/scsi_priv.h @@ -18,6 +18,8 @@ struct scsi_nl_hdr; #define SCSI_CMD_RETRIES_NO_LIMIT -1 +#define SCSI_MAX_QUEUE_DEPTH 4096 + /* * Error codes used by scsi-ml internally. These must not be used by drivers. */ -- 2.43.5 For a scsi_device to support multipath, introduce structure scsi_mpath_device to hold multipath-specific details. Like NS structure for NVME, scsi_mpath_device holds the mpath_device structure to device management and path selection. Two module params are introduced to enable multipath: - scsi_multipath - scsi_multipath_always SCSI multipath will only be available until the following conditions: - scsi_multipath enabled and ALUA supported and unique ID available in VPD page 83. - scsi_multipath_always enabled and unique ID available in VPD page 83 The scsi_device structure contains a pointer to scsi_mpath_device, which means whether multipath is enabled or disabled for the scsi_device. Signed-off-by: John Garry --- drivers/scsi/Kconfig | 10 +++ drivers/scsi/Makefile | 1 + drivers/scsi/scsi.c | 8 +- drivers/scsi/scsi_multipath.c | 158 ++++++++++++++++++++++++++++++++++ drivers/scsi/scsi_scan.c | 4 + drivers/scsi/scsi_sysfs.c | 2 + include/scsi/scsi_device.h | 2 + include/scsi/scsi_multipath.h | 55 ++++++++++++ 8 files changed, 239 insertions(+), 1 deletion(-) create mode 100644 drivers/scsi/scsi_multipath.c create mode 100644 include/scsi/scsi_multipath.h diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig index 19d0884479a24..cfab7ad1e3c2c 100644 --- a/drivers/scsi/Kconfig +++ b/drivers/scsi/Kconfig @@ -76,6 +76,16 @@ config SCSI_LIB_KUNIT_TEST If unsure say N. +config SCSI_MULTIPATH + bool "SCSI multipath support" + depends on SCSI_MOD + select LIBMULTIPATH + help + This option enables support for native SCSI multipath support for + SCSI host. + + If unsure say N. + comment "SCSI support type (disk, tape, CD-ROM)" depends on SCSI diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile index 16de3e41f94c4..64b7a82828b81 100644 --- a/drivers/scsi/Makefile +++ b/drivers/scsi/Makefile @@ -168,6 +168,7 @@ scsi_mod-y += scsi_trace.o scsi_logging.o scsi_mod-$(CONFIG_PM) += scsi_pm.o scsi_mod-$(CONFIG_SCSI_DH) += scsi_dh.o scsi_mod-$(CONFIG_BLK_DEV_BSG) += scsi_bsg.o +scsi_mod-$(CONFIG_SCSI_MULTIPATH) += scsi_multipath.o hv_storvsc-y := storvsc_drv.o diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index 28c9bbf439db6..99920715a9896 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -64,6 +64,7 @@ #include #include #include +#include #include #include "scsi_priv.h" @@ -1042,12 +1043,16 @@ static int __init init_scsi(void) error = scsi_sysfs_register(); if (error) goto cleanup_sysctl; + error = scsi_multipath_init(); + if (error) + goto cleanup_sysfs; scsi_netlink_init(); printk(KERN_NOTICE "SCSI subsystem initialized\n"); return 0; - +cleanup_sysfs: + scsi_sysfs_unregister(); cleanup_sysctl: scsi_exit_sysctl(); cleanup_hosts: @@ -1066,6 +1071,7 @@ static int __init init_scsi(void) static void __exit exit_scsi(void) { scsi_netlink_exit(); + scsi_multipath_exit(); scsi_sysfs_unregister(); scsi_exit_sysctl(); scsi_exit_hosts(); diff --git a/drivers/scsi/scsi_multipath.c b/drivers/scsi/scsi_multipath.c new file mode 100644 index 0000000000000..04e0bad3d9204 --- /dev/null +++ b/drivers/scsi/scsi_multipath.c @@ -0,0 +1,158 @@ +// SPDX-License-Indentifier: GPL-2.0 +/* + * Copyright (c) 2026 Oracle Corp + * + */ + +#include +#include +#include +#include +#include +#include + +#include "scsi_priv.h" + +bool scsi_multipath; +static bool scsi_multipath_always; + +static int multipath_param_set(const char *val, const struct kernel_param *kp) +{ + int ret; + bool *arg = kp->arg; + + ret = param_set_bool(val, kp); + if (ret) + return ret; + + if (scsi_multipath_always && !*arg) { + pr_err("Can't disable multipath when multipath_always_on is configured.\n"); + *arg = true; + return -EINVAL; + } + + return 0; +} + +static const struct kernel_param_ops multipath_param_ops = { + .set = multipath_param_set, + .get = param_get_bool, +}; + +module_param_cb(scsi_multipath, &multipath_param_ops, &scsi_multipath, 0444); +MODULE_PARM_DESC(scsi_multipath, "turn on native multipath support"); + +static int multipath_always_on_set(const char *val, + const struct kernel_param *kp) +{ + int ret; + bool *arg = kp->arg; + + ret = param_set_bool(val, kp); + if (ret < 0) + return ret; + + if (*arg) + scsi_multipath = true; + + return 0; +} + +static const struct kernel_param_ops multipath_always_on_ops = { + .set = multipath_always_on_set, + .get = param_get_bool, +}; + +module_param_cb(scsi_multipath_always, &multipath_always_on_ops, + &scsi_multipath_always, 0444); +MODULE_PARM_DESC(scsi_multipath_always, + "create multipath node always even for no ALUA support"); + +static int scsi_mpath_unique_lun_id(struct scsi_device *sdev) +{ + struct scsi_mpath_device *scsi_mpath_dev = sdev->scsi_mpath_dev; + int ret; + + ret = scsi_vpd_lun_id(sdev, scsi_mpath_dev->device_id_str, + SCSI_MPATH_DEVICE_ID_LEN); + if (ret < 0) + return ret; + + return 0; +} + +static int scsi_multipath_sdev_init(struct scsi_device *sdev) +{ + struct Scsi_Host *shost = sdev->host; + struct scsi_mpath_device *scsi_mpath_dev; + struct mpath_device *mpath_device; + + scsi_mpath_dev = kzalloc(sizeof(*scsi_mpath_dev), GFP_KERNEL); + if (!scsi_mpath_dev) + return -ENOMEM; + scsi_mpath_dev->sdev = sdev; + sdev->scsi_mpath_dev = scsi_mpath_dev; + + mpath_device = &scsi_mpath_dev->mpath_device; + mpath_device->numa_node = dev_to_node(shost->dma_dev); + + return 0; +} + +static void scsi_multipath_sdev_uninit(struct scsi_device *sdev) +{ + kfree(sdev->scsi_mpath_dev); + sdev->scsi_mpath_dev = NULL; +} + +int scsi_mpath_dev_alloc(struct scsi_device *sdev) +{ + int ret; + + if (!scsi_multipath) + return 0; + + if (!scsi_device_tpgs(sdev) && !scsi_multipath_always) { + sdev_printk(KERN_NOTICE, sdev, "tpgs are required for multipath support\n"); + return 0; + } + + ret = scsi_multipath_sdev_init(sdev); + if (ret) + return ret; + + ret = scsi_mpath_unique_lun_id(sdev); + if (ret < 0) { + ret = 0; + goto out_uninit; + } + + return 0; + +out_uninit: + scsi_multipath_sdev_uninit(sdev); + return ret; +} + +void scsi_mpath_dev_release(struct scsi_device *sdev) +{ + struct scsi_mpath_device *scsi_mpath_dev = sdev->scsi_mpath_dev; + + if (!scsi_mpath_dev) + return; + + scsi_multipath_sdev_uninit(sdev); + +} + +int __init scsi_multipath_init(void) +{ + return 0; +} + +void __exit scsi_multipath_exit(void) +{ +} + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("scsi_multipath"); diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index 7acbfcfc2172e..e22d3245d4b65 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -46,6 +46,7 @@ #include #include #include +#include #include "scsi_priv.h" #include "scsi_logging.h" @@ -1122,6 +1123,9 @@ static int scsi_add_lun(struct scsi_device *sdev, unsigned char *inq_result, sdev->max_queue_depth = sdev->queue_depth; WARN_ON_ONCE(sdev->max_queue_depth > sdev->budget_map.depth); + if (scsi_mpath_dev_alloc(sdev)) + return SCSI_SCAN_NO_RESPONSE; + /* * Ok, the device is now all set up, we can * register it and tell the rest of the kernel diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index 99eb0a30df615..0d69e27600a7a 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -23,6 +23,7 @@ #include #include #include +#include #include "scsi_priv.h" #include "scsi_logging.h" @@ -455,6 +456,7 @@ static void scsi_device_dev_release(struct device *dev) might_sleep(); scsi_dh_release_device(sdev); + scsi_mpath_dev_release(sdev); parent = sdev->sdev_gendev.parent; diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index d32f5841f4f85..52974dba0a724 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -279,6 +279,8 @@ struct scsi_device { struct device sdev_gendev, sdev_dev; + struct scsi_mpath_device *scsi_mpath_dev; + struct work_struct requeue_work; struct scsi_device_handler *handler; diff --git a/include/scsi/scsi_multipath.h b/include/scsi/scsi_multipath.h new file mode 100644 index 0000000000000..ca00ea10cd5db --- /dev/null +++ b/include/scsi/scsi_multipath.h @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _SCSI_SCSI_MULTIPATH_H +#define _SCSI_SCSI_MULTIPATH_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_SCSI_MULTIPATH +#define SCSI_MPATH_DEVICE_ID_LEN 40 + +struct scsi_mpath_device { + struct mpath_device mpath_device; + struct scsi_device *sdev; + + char device_id_str[SCSI_MPATH_DEVICE_ID_LEN]; +}; +#define to_scsi_mpath_device(d) \ + container_of(d, struct scsi_mpath_device, mpath_device) + +int scsi_mpath_dev_alloc(struct scsi_device *sdev); +void scsi_mpath_dev_release(struct scsi_device *sdev); +int scsi_multipath_init(void); +void scsi_multipath_exit(void); +#else /* CONFIG_SCSI_MULTIPATH */ + +struct scsi_mpath_device { +}; + +static inline int scsi_mpath_dev_alloc(struct scsi_device *sdev) +{ + return 0; +} +static inline void scsi_mpath_dev_release(struct scsi_device *sdev) +{ +} +static inline int scsi_multipath_init(void) +{ + return 0; +} +static inline void scsi_multipath_exit(void) +{ +} +#endif /* CONFIG_SCSI_MULTIPATH */ +#endif /* _SCSI_SCSI_MULTIPATH_H */ -- 2.43.5 Introduce a scsi_device head structure - scsi_mpath_head - to manage multipathing for a scsi_device. This is similar to nvme_ns_head structure. There is no reference in scsi_mpath_head to any disk, as this would be mananged by the scsi_disk driver. A list of scsi_mpath_head structures is managed to lookup for matching multipathed scsi_device's. Matching is done through the scsi_device unique id. Signed-off-by: John Garry --- drivers/scsi/scsi_multipath.c | 147 ++++++++++++++++++++++++++++++++++ drivers/scsi/scsi_sysfs.c | 3 + include/scsi/scsi_multipath.h | 29 +++++++ 3 files changed, 179 insertions(+) diff --git a/drivers/scsi/scsi_multipath.c b/drivers/scsi/scsi_multipath.c index 04e0bad3d9204..49316269fad8e 100644 --- a/drivers/scsi/scsi_multipath.c +++ b/drivers/scsi/scsi_multipath.c @@ -16,6 +16,10 @@ bool scsi_multipath; static bool scsi_multipath_always; +static LIST_HEAD(scsi_mpath_heads_list); +static DEFINE_MUTEX(scsi_mpath_heads_lock); +static DEFINE_IDA(scsi_multipath_dev_ida); + static int multipath_param_set(const char *val, const struct kernel_param *kp) { int ret; @@ -99,6 +103,73 @@ static int scsi_multipath_sdev_init(struct scsi_device *sdev) return 0; } +struct mpath_head_template smpdt_pr = { +}; + +static struct scsi_mpath_head *scsi_mpath_alloc_head(void) +{ + struct scsi_mpath_head *scsi_mpath_head; + int ret; + + scsi_mpath_head = kzalloc(sizeof(*scsi_mpath_head), GFP_KERNEL); + if (!scsi_mpath_head) + return NULL; + + ida_init(&scsi_mpath_head->ida); + mutex_init(&scsi_mpath_head->lock); + + scsi_mpath_head->mpath_head = mpath_alloc_head(); + if (IS_ERR(scsi_mpath_head->mpath_head)) + goto out_free; + scsi_mpath_head->mpath_head->mpdt = &smpdt_pr; + scsi_mpath_head->mpath_head->drvdata = scsi_mpath_head; + + scsi_mpath_head->index = ida_alloc(&scsi_multipath_dev_ida, GFP_KERNEL); + if (scsi_mpath_head->index < 0) + goto out_put_head; + + device_initialize(&scsi_mpath_head->dev); + ret = dev_set_name(&scsi_mpath_head->dev, "%d", scsi_mpath_head->index); + if (ret) { + put_device(&scsi_mpath_head->dev); + goto out_free_ida; + } + + return scsi_mpath_head; + +out_free_ida: + ida_free(&scsi_multipath_dev_ida, scsi_mpath_head->index); +out_put_head: + mpath_put_head(scsi_mpath_head->mpath_head); +out_free: + kfree(scsi_mpath_head); + return NULL; +} + +static struct scsi_mpath_head *scsi_mpath_find_head( + struct scsi_mpath_device *scsi_mpath_dev) +{ + struct scsi_mpath_head *scsi_mpath_head; + int ret; + + mutex_lock(&scsi_mpath_heads_lock); + list_for_each_entry(scsi_mpath_head, &scsi_mpath_heads_list, entry) { + ret = scsi_mpath_get_head(scsi_mpath_head); + if (ret) + continue; + if (strncmp(scsi_mpath_head->wwid, + scsi_mpath_dev->device_id_str, + SCSI_MPATH_DEVICE_ID_LEN) == 0) { + + mutex_unlock(&scsi_mpath_heads_lock); + return scsi_mpath_head; + } + scsi_mpath_put_head(scsi_mpath_head); + } + + return NULL; +} + static void scsi_multipath_sdev_uninit(struct scsi_device *sdev) { kfree(sdev->scsi_mpath_dev); @@ -107,6 +178,7 @@ static void scsi_multipath_sdev_uninit(struct scsi_device *sdev) int scsi_mpath_dev_alloc(struct scsi_device *sdev) { + struct scsi_mpath_head *scsi_mpath_head; int ret; if (!scsi_multipath) @@ -127,13 +199,75 @@ int scsi_mpath_dev_alloc(struct scsi_device *sdev) goto out_uninit; } + scsi_mpath_head = scsi_mpath_find_head(sdev->scsi_mpath_dev); + if (scsi_mpath_head) + goto found; + /* scsi_mpath_disks_list lock held */ + scsi_mpath_head = scsi_mpath_alloc_head(); + if (!scsi_mpath_head) + goto out_uninit; + + strcpy(scsi_mpath_head->wwid, sdev->scsi_mpath_dev->device_id_str); + + ret = device_add(&scsi_mpath_head->dev); + if (ret) + goto out_put_head; + + list_add_tail(&scsi_mpath_head->entry, &scsi_mpath_heads_list); + + mutex_unlock(&scsi_mpath_heads_lock); + sdev->scsi_mpath_dev->scsi_mpath_head = scsi_mpath_head; + +found: + sdev->scsi_mpath_dev->index = ida_alloc(&scsi_mpath_head->ida, GFP_KERNEL); + if (sdev->scsi_mpath_dev->index < 0) { + ret = sdev->scsi_mpath_dev->index; + goto out_put_head; + } + + mutex_lock(&scsi_mpath_head->lock); + scsi_mpath_head->dev_count++; + mutex_unlock(&scsi_mpath_head->lock); + + sdev->scsi_mpath_dev->scsi_mpath_head = scsi_mpath_head; return 0; +out_put_head: + scsi_mpath_put_head(scsi_mpath_head); out_uninit: + mutex_unlock(&scsi_mpath_heads_lock); scsi_multipath_sdev_uninit(sdev); return ret; } +static void scsi_mpath_remove_head(struct scsi_mpath_device *scsi_mpath_dev) +{ + struct scsi_mpath_head *scsi_mpath_head = + scsi_mpath_dev->scsi_mpath_head; + bool last_path = false; + + mutex_lock(&scsi_mpath_head->lock); + scsi_mpath_head->dev_count--; + if (scsi_mpath_head->dev_count == 0) + last_path = true; + mutex_unlock(&scsi_mpath_head->lock); + + if (last_path) + device_del(&scsi_mpath_head->dev); + + scsi_mpath_dev->scsi_mpath_head = NULL; + scsi_mpath_put_head(scsi_mpath_head); +} + +void scsi_mpath_remove_device(struct scsi_mpath_device *scsi_mpath_dev) +{ + struct scsi_mpath_head *scsi_mpath_head = scsi_mpath_dev->scsi_mpath_head; + + ida_free(&scsi_mpath_head->ida, scsi_mpath_dev->index); + + scsi_mpath_remove_head(scsi_mpath_dev); +} + void scsi_mpath_dev_release(struct scsi_device *sdev) { struct scsi_mpath_device *scsi_mpath_dev = sdev->scsi_mpath_dev; @@ -142,8 +276,21 @@ void scsi_mpath_dev_release(struct scsi_device *sdev) return; scsi_multipath_sdev_uninit(sdev); +} + +int scsi_mpath_get_head(struct scsi_mpath_head *scsi_mpath_head) +{ + if (!get_device(&scsi_mpath_head->dev)) + return -ENXIO; + return 0; +} +EXPORT_SYMBOL_GPL(scsi_mpath_get_head); +void scsi_mpath_put_head(struct scsi_mpath_head *scsi_mpath_head) +{ + put_device(&scsi_mpath_head->dev); } +EXPORT_SYMBOL_GPL(scsi_mpath_put_head); int __init scsi_multipath_init(void) { diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index 0d69e27600a7a..287a683e89ae5 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -1447,6 +1447,9 @@ void __scsi_remove_device(struct scsi_device *sdev) } else put_device(&sdev->sdev_dev); + if (sdev->scsi_mpath_dev) + scsi_mpath_remove_device(sdev->scsi_mpath_dev); + /* * Stop accepting new requests and wait until all queuecommand() and * scsi_run_queue() invocations have finished before tearing down the diff --git a/include/scsi/scsi_multipath.h b/include/scsi/scsi_multipath.h index ca00ea10cd5db..38953b05a44dc 100644 --- a/include/scsi/scsi_multipath.h +++ b/include/scsi/scsi_multipath.h @@ -19,9 +19,22 @@ #ifdef CONFIG_SCSI_MULTIPATH #define SCSI_MPATH_DEVICE_ID_LEN 40 +struct scsi_mpath_head { + char wwid[SCSI_MPATH_DEVICE_ID_LEN]; + struct list_head entry; + int dev_count; + struct ida ida; + struct mutex lock; + struct mpath_head *mpath_head; + struct device dev; + int index; +}; + struct scsi_mpath_device { struct mpath_device mpath_device; struct scsi_device *sdev; + int index; + struct scsi_mpath_head *scsi_mpath_head; char device_id_str[SCSI_MPATH_DEVICE_ID_LEN]; }; @@ -32,8 +45,13 @@ int scsi_mpath_dev_alloc(struct scsi_device *sdev); void scsi_mpath_dev_release(struct scsi_device *sdev); int scsi_multipath_init(void); void scsi_multipath_exit(void); +void scsi_mpath_remove_device(struct scsi_mpath_device *scsi_mpath_dev); +int scsi_mpath_get_head(struct scsi_mpath_head *); +void scsi_mpath_put_head(struct scsi_mpath_head *); #else /* CONFIG_SCSI_MULTIPATH */ +struct scsi_mpath_head { +}; struct scsi_mpath_device { }; @@ -51,5 +69,16 @@ static inline int scsi_multipath_init(void) static inline void scsi_multipath_exit(void) { } +static inline void scsi_mpath_remove_device(struct scsi_mpath_device + *scsi_mpath_dev) +{ +} +static inline int scsi_mpath_get_head(struct scsi_mpath_head *) +{ + return 0; +} +static inline void scsi_mpath_put_head(struct scsi_mpath_head *) +{ +} #endif /* CONFIG_SCSI_MULTIPATH */ #endif /* _SCSI_SCSI_MULTIPATH_H */ -- 2.43.5 Introduce a new class for multipathed devices, scsi_mpath_device_class. The purpose of this class is for managing the scsi_mpath_head.dev member. The naming for the scsi_device structure is in form H:C:I:L, where H is host, C is channel, I is ID, and L is lun. However, for a multipathed scsi_device, all the naming members may be different between member scsi_device's. As such, just use a simple single-number naming index for each scsi_mpath_head. The sysfs device folder will have links to the scsi_device's so, it will be possible to lookup the member scsi_device's. An example sysfs entry is as follows: # ls -l /sys/class/scsi_mpath_device/0/ total 0 drwxr-xr-x 2 root root 0 Feb 24 11:56 power lrwxrwxrwx 1 root root 0 Feb 24 11:56 subsystem -> ../../../../class/scsi_mpath_device -rw-r--r-- 1 root root 4096 Feb 24 11:55 uevent -r--r--r-- 1 root root 4096 Feb 24 11:56 wwid # cat /sys/class/scsi_mpath_device/0/wwid naa.600140505200a986f0043c9afa1fd077 Signed-off-by: John Garry --- drivers/scsi/scsi_multipath.c | 67 ++++++++++++++++++++++++++++++++++- 1 file changed, 66 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_multipath.c b/drivers/scsi/scsi_multipath.c index 49316269fad8e..05af178921cb4 100644 --- a/drivers/scsi/scsi_multipath.c +++ b/drivers/scsi/scsi_multipath.c @@ -85,6 +85,69 @@ static int scsi_mpath_unique_lun_id(struct scsi_device *sdev) return 0; } +static void scsi_mpath_delete_head(struct scsi_mpath_head *scsi_mpath_head) +{ + mutex_lock(&scsi_mpath_heads_lock); + list_del_init(&scsi_mpath_head->entry); + mutex_unlock(&scsi_mpath_heads_lock); +} + +static void scsi_mpath_head_release(struct device *dev) +{ + struct scsi_mpath_head *scsi_mpath_head = + container_of(dev, struct scsi_mpath_head, dev); + struct mpath_head *mpath_head = scsi_mpath_head->mpath_head; + + scsi_mpath_delete_head(scsi_mpath_head); + ida_free(&scsi_multipath_dev_ida, scsi_mpath_head->index); + mpath_put_head(mpath_head); + kfree(scsi_mpath_head); +} + +static ssize_t scsi_mpath_device_wwid_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct scsi_mpath_head *scsi_mpath_head = + container_of(dev, struct scsi_mpath_head, dev); + + return sysfs_emit(buf, "%s\n", scsi_mpath_head->wwid); +} + +static DEVICE_ATTR(wwid, S_IRUGO, scsi_mpath_device_wwid_show, NULL); + +static struct attribute *scsi_mpath_device_attrs[] = { + &dev_attr_wwid.attr, + NULL +}; + +static const struct attribute_group scsi_mpath_device_attrs_group = { + .attrs = scsi_mpath_device_attrs, +}; + +static bool scsi_multipath_sysfs_group_visible(struct kobject *kobj) +{ + return true; +} + +static bool scsi_multipath_sysfs_attr_visible(struct kobject *kobj, + struct attribute *attr, int n) +{ + return false; +} +DEFINE_SYSFS_GROUP_VISIBLE(scsi_multipath_sysfs) + +const struct attribute_group *scsi_mpath_device_groups[] = { + &scsi_mpath_device_attrs_group, + NULL +}; + +static const struct class scsi_mpath_device_class = { + .name = "scsi_mpath_device", + .dev_groups = scsi_mpath_device_groups, + .dev_release = scsi_mpath_head_release, +}; + static int scsi_multipath_sdev_init(struct scsi_device *sdev) { struct Scsi_Host *shost = sdev->host; @@ -129,6 +192,7 @@ static struct scsi_mpath_head *scsi_mpath_alloc_head(void) goto out_put_head; device_initialize(&scsi_mpath_head->dev); + scsi_mpath_head->dev.class = &scsi_mpath_device_class; ret = dev_set_name(&scsi_mpath_head->dev, "%d", scsi_mpath_head->index); if (ret) { put_device(&scsi_mpath_head->dev); @@ -294,11 +358,12 @@ EXPORT_SYMBOL_GPL(scsi_mpath_put_head); int __init scsi_multipath_init(void) { - return 0; + return class_register(&scsi_mpath_device_class); } void __exit scsi_multipath_exit(void) { + class_unregister(&scsi_mpath_device_class); } MODULE_LICENSE("GPL"); -- 2.43.5 Provide a link in sysfs from a scsi_mpath_device to member scsi_device's. An example is as follows: # ls -l /sys/class/scsi_mpath_device/0/multipath/ total 0 lrwxrwxrwx 1 root root 0 Feb 24 12:01 8:0:0:0 -> ../../../../platform/host8/session1/target8:0:0/8:0:0:0 lrwxrwxrwx 1 root root 0 Feb 24 12:01 9:0:0:0 -> ../../../../platform/host9/session2/target9:0:0/9:0:0:0 Signed-off-by: John Garry --- drivers/scsi/scsi_multipath.c | 45 +++++++++++++++++++++++++++++++++++ drivers/scsi/scsi_sysfs.c | 5 ++++ include/scsi/scsi_multipath.h | 9 +++++++ 3 files changed, 59 insertions(+) diff --git a/drivers/scsi/scsi_multipath.c b/drivers/scsi/scsi_multipath.c index 05af178921cb4..ac55f9f39a5b2 100644 --- a/drivers/scsi/scsi_multipath.c +++ b/drivers/scsi/scsi_multipath.c @@ -125,6 +125,15 @@ static const struct attribute_group scsi_mpath_device_attrs_group = { .attrs = scsi_mpath_device_attrs, }; +static struct attribute dummy_attr = { + .name = "dummy", +}; + +static struct attribute *scsi_mpath_attrs[] = { + &dummy_attr, + NULL +}; + static bool scsi_multipath_sysfs_group_visible(struct kobject *kobj) { return true; @@ -137,11 +146,47 @@ static bool scsi_multipath_sysfs_attr_visible(struct kobject *kobj, } DEFINE_SYSFS_GROUP_VISIBLE(scsi_multipath_sysfs) +static const struct attribute_group scsi_mpath_attr_group = { + .name = "multipath", + .attrs = scsi_mpath_attrs, + .is_visible = SYSFS_GROUP_VISIBLE(scsi_multipath_sysfs), +}; + const struct attribute_group *scsi_mpath_device_groups[] = { &scsi_mpath_device_attrs_group, + &scsi_mpath_attr_group, NULL }; +void scsi_mpath_add_sysfs_link(struct scsi_device *sdev) +{ + struct device *target = &sdev->sdev_gendev; + struct scsi_mpath_head *scsi_mpath_head = + sdev->scsi_mpath_dev->scsi_mpath_head; + struct device *source = &scsi_mpath_head->dev; + int error; + + error = sysfs_add_link_to_group(&source->kobj, "multipath", + &target->kobj, dev_name(target)); + if (error) { + sdev_printk(KERN_INFO, sdev, "Failed to create mpath sysfs link, errno=%d\n", + error); + } +} +EXPORT_SYMBOL_GPL(scsi_mpath_add_sysfs_link); + +void scsi_mpath_remove_sysfs_link(struct scsi_device *sdev) +{ + struct device *target = &sdev->sdev_gendev; + struct scsi_mpath_head *scsi_mpath_head = + sdev->scsi_mpath_dev->scsi_mpath_head; + struct device *source = &scsi_mpath_head->dev; + + sysfs_remove_link_from_group(&source->kobj, "multipath", + dev_name(target)); +} +EXPORT_SYMBOL_GPL(scsi_mpath_remove_sysfs_link); + static const struct class scsi_mpath_device_class = { .name = "scsi_mpath_device", .dev_groups = scsi_mpath_device_groups, diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index 287a683e89ae5..3b03ee00c8df3 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -1389,6 +1389,9 @@ int scsi_sysfs_add_sdev(struct scsi_device *sdev) transport_add_device(&sdev->sdev_gendev); sdev->is_visible = 1; + if (sdev->scsi_mpath_dev) + scsi_mpath_add_sysfs_link(sdev); + if (IS_ENABLED(CONFIG_BLK_DEV_BSG)) { sdev->bsg_dev = scsi_bsg_register_queue(sdev); if (IS_ERR(sdev->bsg_dev)) { @@ -1441,6 +1444,8 @@ void __scsi_remove_device(struct scsi_device *sdev) if (IS_ENABLED(CONFIG_BLK_DEV_BSG) && sdev->bsg_dev) bsg_unregister_queue(sdev->bsg_dev); + if (sdev->scsi_mpath_dev) + scsi_mpath_remove_sysfs_link(sdev); device_unregister(&sdev->sdev_dev); transport_remove_device(dev); device_del(dev); diff --git a/include/scsi/scsi_multipath.h b/include/scsi/scsi_multipath.h index 38953b05a44dc..d8102df329d6b 100644 --- a/include/scsi/scsi_multipath.h +++ b/include/scsi/scsi_multipath.h @@ -46,6 +46,8 @@ void scsi_mpath_dev_release(struct scsi_device *sdev); int scsi_multipath_init(void); void scsi_multipath_exit(void); void scsi_mpath_remove_device(struct scsi_mpath_device *scsi_mpath_dev); +void scsi_mpath_add_sysfs_link(struct scsi_device *sdev); +void scsi_mpath_remove_sysfs_link(struct scsi_device *sdev); int scsi_mpath_get_head(struct scsi_mpath_head *); void scsi_mpath_put_head(struct scsi_mpath_head *); #else /* CONFIG_SCSI_MULTIPATH */ @@ -80,5 +82,12 @@ static inline int scsi_mpath_get_head(struct scsi_mpath_head *) static inline void scsi_mpath_put_head(struct scsi_mpath_head *) { } + +static inline void scsi_mpath_add_sysfs_link(struct scsi_device *sdev) +{ +} +static inline void scsi_mpath_remove_sysfs_link(struct scsi_device *sdev) +{ +} #endif /* CONFIG_SCSI_MULTIPATH */ #endif /* _SCSI_SCSI_MULTIPATH_H */ -- 2.43.5 Add support to set the multipath iopolicy. The iopolicy member is per scsi_mpath_head structure. A module param is added so that the default iopolicy may be set. Signed-off-by: John Garry --- drivers/scsi/scsi_multipath.c | 57 +++++++++++++++++++++++++++++++++++ include/scsi/scsi_multipath.h | 1 + 2 files changed, 58 insertions(+) diff --git a/drivers/scsi/scsi_multipath.c b/drivers/scsi/scsi_multipath.c index ac55f9f39a5b2..4b7984e7e74ba 100644 --- a/drivers/scsi/scsi_multipath.c +++ b/drivers/scsi/scsi_multipath.c @@ -72,6 +72,23 @@ module_param_cb(scsi_multipath_always, &multipath_always_on_ops, MODULE_PARM_DESC(scsi_multipath_always, "create multipath node always even for no ALUA support"); +static int iopolicy = MPATH_IOPOLICY_NUMA; + +static int scsi_set_iopolicy(const char *val, const struct kernel_param *kp) +{ + return mpath_set_iopolicy(val, &iopolicy); +} + +static int scsi_get_iopolicy(char *buf, const struct kernel_param *kp) +{ + return mpath_get_iopolicy(buf, iopolicy); +} + +module_param_call(iopolicy, scsi_set_iopolicy, scsi_get_iopolicy, + &iopolicy, 0644); +MODULE_PARM_DESC(iopolicy, + "Default multipath I/O policy; 'numa' (default), 'round-robin' or 'queue-depth'"); + static int scsi_mpath_unique_lun_id(struct scsi_device *sdev) { struct scsi_mpath_device *scsi_mpath_dev = sdev->scsi_mpath_dev; @@ -116,8 +133,40 @@ static ssize_t scsi_mpath_device_wwid_show(struct device *dev, static DEVICE_ATTR(wwid, S_IRUGO, scsi_mpath_device_wwid_show, NULL); +static void scsi_mpath_device_iopolicy_store_update(void *data) +{ + struct scsi_mpath_head *scsi_mpath_head = data; + struct mpath_head *mpath_head = scsi_mpath_head->mpath_head; + + mpath_clear_paths(mpath_head); + kblockd_schedule_work(&mpath_head->requeue_work); +} + +static ssize_t scsi_mpath_device_iopolicy_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct scsi_mpath_head *scsi_mpath_head = + container_of(dev, struct scsi_mpath_head, dev); + + return mpath_iopolicy_store(&scsi_mpath_head->iopolicy, buf, count, + scsi_mpath_device_iopolicy_store_update, scsi_mpath_head); +} + +static ssize_t scsi_mpath_device_iopolicy_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct scsi_mpath_head *scsi_mpath_head = + container_of(dev, struct scsi_mpath_head, dev); + + return mpath_iopolicy_show(&scsi_mpath_head->iopolicy, buf); +} + +static DEVICE_ATTR(iopolicy, S_IRUGO | S_IWUSR, + scsi_mpath_device_iopolicy_show, scsi_mpath_device_iopolicy_store); + static struct attribute *scsi_mpath_device_attrs[] = { &dev_attr_wwid.attr, + &dev_attr_iopolicy.attr, NULL }; @@ -211,7 +260,15 @@ static int scsi_multipath_sdev_init(struct scsi_device *sdev) return 0; } +static enum mpath_iopolicy_e scsi_mpath_get_iopolicy(struct mpath_head *mpath_head) +{ + struct scsi_mpath_head *scsi_mpath_head = mpath_head->drvdata; + + return mpath_read_iopolicy(&scsi_mpath_head->iopolicy); +} + struct mpath_head_template smpdt_pr = { + .get_iopolicy = scsi_mpath_get_iopolicy, }; static struct scsi_mpath_head *scsi_mpath_alloc_head(void) diff --git a/include/scsi/scsi_multipath.h b/include/scsi/scsi_multipath.h index d8102df329d6b..8dbe1c3784d2c 100644 --- a/include/scsi/scsi_multipath.h +++ b/include/scsi/scsi_multipath.h @@ -25,6 +25,7 @@ struct scsi_mpath_head { int dev_count; struct ida ida; struct mutex lock; + struct mpath_iopolicy iopolicy; struct mpath_head *mpath_head; struct device dev; int index; -- 2.43.5 For failover handling, we must resubmit each bio. However, unlike NVMe, for SCSI there is no guarantee that any bio submitted is either all or none completed. As such, for SCSI, for failover handling we will take the approach to just re-submit the original bio. For this clone and submit each bio. Signed-off-by: John Garry --- drivers/scsi/scsi_multipath.c | 51 ++++++++++++++++++++++++++++++++++- include/scsi/scsi_multipath.h | 1 + 2 files changed, 51 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_multipath.c b/drivers/scsi/scsi_multipath.c index 4b7984e7e74ba..d79a92ec0cf6c 100644 --- a/drivers/scsi/scsi_multipath.c +++ b/drivers/scsi/scsi_multipath.c @@ -89,6 +89,14 @@ module_param_call(iopolicy, scsi_set_iopolicy, scsi_get_iopolicy, MODULE_PARM_DESC(iopolicy, "Default multipath I/O policy; 'numa' (default), 'round-robin' or 'queue-depth'"); +struct scsi_mpath_clone_bio { + struct bio *master_bio; + struct bio clone; +}; + +#define scsi_mpath_to_master_bio(clone) \ + container_of(clone, struct scsi_mpath_clone_bio, clone) + static int scsi_mpath_unique_lun_id(struct scsi_device *sdev) { struct scsi_mpath_device *scsi_mpath_dev = sdev->scsi_mpath_dev; @@ -116,6 +124,7 @@ static void scsi_mpath_head_release(struct device *dev) struct mpath_head *mpath_head = scsi_mpath_head->mpath_head; scsi_mpath_delete_head(scsi_mpath_head); + bioset_exit(&scsi_mpath_head->bio_pool); ida_free(&scsi_multipath_dev_ida, scsi_mpath_head->index); mpath_put_head(mpath_head); kfree(scsi_mpath_head); @@ -260,6 +269,39 @@ static int scsi_multipath_sdev_init(struct scsi_device *sdev) return 0; } +static void scsi_mpath_clone_end_io(struct bio *clone) +{ + struct scsi_mpath_clone_bio *scsi_mpath_clone_bio = + scsi_mpath_to_master_bio(clone); + struct bio *master_bio = scsi_mpath_clone_bio->master_bio; + + master_bio->bi_status = clone->bi_status; + bio_put(clone); + bio_endio(master_bio); +} + +static struct bio *scsi_mpath_clone_bio(struct bio *bio) +{ + struct mpath_disk *mpath_disk = bio->bi_bdev->bd_disk->private_data; + struct mpath_head *mpath_head = mpath_disk->mpath_head; + struct scsi_mpath_clone_bio *scsi_mpath_clone_bio; + struct scsi_mpath_head *scsi_mpath_head = mpath_head->drvdata; + struct bio *clone; + + clone = bio_alloc_clone(bio->bi_bdev, bio, GFP_NOWAIT, + &scsi_mpath_head->bio_pool); + if (!clone) + return NULL; + + clone->bi_end_io = scsi_mpath_clone_end_io; + + scsi_mpath_clone_bio = container_of(clone, + struct scsi_mpath_clone_bio, clone); + scsi_mpath_clone_bio->master_bio = bio; + + return clone; +} + static enum mpath_iopolicy_e scsi_mpath_get_iopolicy(struct mpath_head *mpath_head) { struct scsi_mpath_head *scsi_mpath_head = mpath_head->drvdata; @@ -269,6 +311,7 @@ static enum mpath_iopolicy_e scsi_mpath_get_iopolicy(struct mpath_head *mpath_he struct mpath_head_template smpdt_pr = { .get_iopolicy = scsi_mpath_get_iopolicy, + .clone_bio = scsi_mpath_clone_bio, }; static struct scsi_mpath_head *scsi_mpath_alloc_head(void) @@ -283,9 +326,13 @@ static struct scsi_mpath_head *scsi_mpath_alloc_head(void) ida_init(&scsi_mpath_head->ida); mutex_init(&scsi_mpath_head->lock); + if (bioset_init(&scsi_mpath_head->bio_pool, SCSI_MAX_QUEUE_DEPTH, + offsetof(struct scsi_mpath_clone_bio, clone), + BIOSET_NEED_BVECS|BIOSET_PERCPU_CACHE)) + goto out_free; scsi_mpath_head->mpath_head = mpath_alloc_head(); if (IS_ERR(scsi_mpath_head->mpath_head)) - goto out_free; + goto out_bioset_exit; scsi_mpath_head->mpath_head->mpdt = &smpdt_pr; scsi_mpath_head->mpath_head->drvdata = scsi_mpath_head; @@ -307,6 +354,8 @@ static struct scsi_mpath_head *scsi_mpath_alloc_head(void) ida_free(&scsi_multipath_dev_ida, scsi_mpath_head->index); out_put_head: mpath_put_head(scsi_mpath_head->mpath_head); +out_bioset_exit: + bioset_exit(&scsi_mpath_head->bio_pool); out_free: kfree(scsi_mpath_head); return NULL; diff --git a/include/scsi/scsi_multipath.h b/include/scsi/scsi_multipath.h index 8dbe1c3784d2c..bd99ea017379d 100644 --- a/include/scsi/scsi_multipath.h +++ b/include/scsi/scsi_multipath.h @@ -26,6 +26,7 @@ struct scsi_mpath_head { struct ida ida; struct mutex lock; struct mpath_iopolicy iopolicy; + struct bio_set bio_pool; struct mpath_head *mpath_head; struct device dev; int index; -- 2.43.5 Add scsi_mpath_dev_clear_path() to clear a device path when it becomes blocked, and call from __scsi_internal_device_block_nowait(). Signed-off-by: John Garry --- drivers/scsi/scsi_lib.c | 3 +++ drivers/scsi/scsi_multipath.c | 11 +++++++++++ include/scsi/scsi_multipath.h | 5 +++++ 3 files changed, 19 insertions(+) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 93031326ac3ee..ab224cd61f3ae 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -33,6 +33,7 @@ #include #include #include /* scsi_init_limits() */ +#include #include #include @@ -2898,6 +2899,8 @@ EXPORT_SYMBOL(scsi_target_resume); static int __scsi_internal_device_block_nowait(struct scsi_device *sdev) { + if (sdev->scsi_mpath_dev) + scsi_mpath_dev_clear_path(sdev->scsi_mpath_dev); if (scsi_device_set_state(sdev, SDEV_BLOCK)) return scsi_device_set_state(sdev, SDEV_CREATED_BLOCK); diff --git a/drivers/scsi/scsi_multipath.c b/drivers/scsi/scsi_multipath.c index d79a92ec0cf6c..c3e0f792e921f 100644 --- a/drivers/scsi/scsi_multipath.c +++ b/drivers/scsi/scsi_multipath.c @@ -151,6 +151,17 @@ static void scsi_mpath_device_iopolicy_store_update(void *data) kblockd_schedule_work(&mpath_head->requeue_work); } +void scsi_mpath_dev_clear_path(struct scsi_mpath_device *scsi_mpath_dev) +{ + struct mpath_device *mpath_device = &scsi_mpath_dev->mpath_device; + struct scsi_mpath_head *scsi_mpath_head = scsi_mpath_dev->scsi_mpath_head; + struct mpath_head *mpath_head = scsi_mpath_head->mpath_head; + + if (mpath_clear_current_path(mpath_head, mpath_device)) + mpath_synchronize(mpath_head); +} +EXPORT_SYMBOL_GPL(scsi_mpath_dev_clear_path); + static ssize_t scsi_mpath_device_iopolicy_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { diff --git a/include/scsi/scsi_multipath.h b/include/scsi/scsi_multipath.h index bd99ea017379d..79e6860243e74 100644 --- a/include/scsi/scsi_multipath.h +++ b/include/scsi/scsi_multipath.h @@ -47,6 +47,7 @@ int scsi_mpath_dev_alloc(struct scsi_device *sdev); void scsi_mpath_dev_release(struct scsi_device *sdev); int scsi_multipath_init(void); void scsi_multipath_exit(void); +void scsi_mpath_dev_clear_path(struct scsi_mpath_device *scsi_mpath_dev); void scsi_mpath_remove_device(struct scsi_mpath_device *scsi_mpath_dev); void scsi_mpath_add_sysfs_link(struct scsi_device *sdev); void scsi_mpath_remove_sysfs_link(struct scsi_device *sdev); @@ -73,6 +74,10 @@ static inline int scsi_multipath_init(void) static inline void scsi_multipath_exit(void) { } +static inline void scsi_mpath_dev_clear_path( + struct scsi_mpath_device *scsi_mpath_dev) +{ +} static inline void scsi_mpath_remove_device(struct scsi_mpath_device *scsi_mpath_dev) { -- 2.43.5 For a scmd which suffers failover, requeue the master bio of each bio attached to its request. A handler is added in the scsi_driver structure to lookup a mpath_disk from a request. This is needed because the scsi_disk structure will manage the mpath_disk, and the code core has no method to look this up from the scsi_scmnd. Failover occurs when the scsi_cmnd has failed and it is discovered that the original scsi_device has transport down. Signed-off-by: John Garry --- drivers/scsi/scsi_error.c | 12 ++++++ drivers/scsi/scsi_lib.c | 9 +++- drivers/scsi/scsi_multipath.c | 80 +++++++++++++++++++++++++++++++++++ include/scsi/scsi.h | 1 + include/scsi/scsi_driver.h | 3 ++ include/scsi/scsi_multipath.h | 14 ++++++ 6 files changed, 118 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index f869108fd9693..0fd1b46764c3f 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include "scsi_priv.h" @@ -1901,12 +1902,16 @@ bool scsi_noretry_cmd(struct scsi_cmnd *scmd) enum scsi_disposition scsi_decide_disposition(struct scsi_cmnd *scmd) { enum scsi_disposition rtn; + struct request *req = scsi_cmd_to_rq(scmd); /* * if the device is offline, then we clearly just pass the result back * up to the top level. */ if (!scsi_device_online(scmd->device)) { + if (scsi_is_mpath_request(req)) + return scsi_mpath_failover_disposition(scmd); + SCSI_LOG_ERROR_RECOVERY(5, scmd_printk(KERN_INFO, scmd, "%s: device offline - report as SUCCESS\n", __func__)); return SUCCESS; @@ -2070,6 +2075,13 @@ enum scsi_disposition scsi_decide_disposition(struct scsi_cmnd *scmd) maybe_retry: + /* + * For SCSI Multipath check if there are path errors to + * trigger failover to available path + */ + if (scsi_is_mpath_request(req)) + return scsi_mpath_failover_disposition(scmd); + /* we requeue for retry because the error was retryable, and * the request was not marked fast fail. Note that above, * even if the request is marked fast fail, we still requeue diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index ab224cd61f3ae..7ed0defc8161e 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1550,7 +1550,7 @@ static void scsi_complete(struct request *rq) atomic_inc(&cmd->device->ioerr_cnt); disposition = scsi_decide_disposition(cmd); - if (disposition != SUCCESS && scsi_cmd_runtime_exceeced(cmd)) + if (disposition != SUCCESS && disposition != FAILOVER && scsi_cmd_runtime_exceeced(cmd)) disposition = SUCCESS; scsi_log_completion(cmd, disposition); @@ -1565,6 +1565,9 @@ static void scsi_complete(struct request *rq) case ADD_TO_MLQUEUE: scsi_queue_insert(cmd, SCSI_MLQUEUE_DEVICE_BUSY); break; + case FAILOVER: + scsi_mpath_failover_req(rq); + break; default: scsi_eh_scmd_add(cmd); break; @@ -1935,6 +1938,10 @@ static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx, if (req->rq_flags & RQF_DONTPREP) scsi_mq_uninit_cmd(cmd); scsi_run_queue_async(sdev); + if (!scsi_device_online(sdev) && scsi_is_mpath_request(req)) { + scsi_mpath_failover_req(req); + return 0; + } break; } return ret; diff --git a/drivers/scsi/scsi_multipath.c b/drivers/scsi/scsi_multipath.c index c3e0f792e921f..16b1f84fc552c 100644 --- a/drivers/scsi/scsi_multipath.c +++ b/drivers/scsi/scsi_multipath.c @@ -518,6 +518,86 @@ void scsi_mpath_put_head(struct scsi_mpath_head *scsi_mpath_head) } EXPORT_SYMBOL_GPL(scsi_mpath_put_head); +bool scsi_is_mpath_request(struct request *req) +{ + return is_mpath_request(req); +} +EXPORT_SYMBOL_GPL(scsi_is_mpath_request); + +static inline void bio_list_add_clone_master(struct bio_list *bl, + struct bio *clone) +{ + struct scsi_mpath_clone_bio *scsi_mpath_clone_bio; + struct bio *master_bio; + + if (clone->bi_next) + bio_list_add_clone_master(bl, clone->bi_next); + + scsi_mpath_clone_bio = scsi_mpath_to_master_bio(clone); + master_bio = scsi_mpath_clone_bio->master_bio; + + if (bl->tail) + bl->tail->bi_next = master_bio; + else + bl->head = master_bio; + + bl->tail = master_bio; + + bio_put(clone); +} + +void scsi_mpath_failover_req(struct request *req) +{ + struct scsi_cmnd *scmd = blk_mq_rq_to_pdu(req); + struct scsi_device *sdev = scmd->device; + struct scsi_driver *drv = to_scsi_driver(sdev->sdev_gendev.driver); + struct mpath_disk *mpath_disk = drv->to_mpath_disk(req); + struct scsi_mpath_device *scsi_mpath_dev = sdev->scsi_mpath_dev; + struct mpath_head *mpath_head = mpath_disk->mpath_head; + unsigned long flags; + + scsi_mpath_dev_clear_path(scsi_mpath_dev); + + spin_lock_irqsave(&mpath_head->requeue_lock, flags); + bio_list_add_clone_master(&mpath_head->requeue_list, req->bio); + spin_unlock_irqrestore(&mpath_head->requeue_lock, flags); + req->bio = NULL; + req->biotail = NULL; + req->__data_len = 0; + + /* End old request with clone detached */ + scmd->result = 0; + blk_mq_end_request(req, 0); + + kblockd_schedule_work(&mpath_head->requeue_work); +} + +static inline bool scsi_is_mpath_error(struct scsi_cmnd *scmd) +{ + struct scsi_device *sdev = scmd->device; + + if (sdev->sdev_state == SDEV_TRANSPORT_OFFLINE) + return true; + return false; +} + +int scsi_mpath_failover_disposition(struct scsi_cmnd *scmd) +{ + struct request *req = scsi_cmd_to_rq(scmd); + + if (is_mpath_request(req)) { + if (scsi_is_mpath_error(scmd) || + blk_queue_dying(req->q)) + return FAILOVER; + return NEEDS_RETRY; + } else { + if (blk_queue_dying(req->q)) + return SUCCESS; + } + + return SUCCESS; +} + int __init scsi_multipath_init(void) { return class_register(&scsi_mpath_device_class); diff --git a/include/scsi/scsi.h b/include/scsi/scsi.h index 96b3503666703..544153a01b3fd 100644 --- a/include/scsi/scsi.h +++ b/include/scsi/scsi.h @@ -103,6 +103,7 @@ enum scsi_disposition { TIMEOUT_ERROR = 0x2007, SCSI_RETURN_NOT_HANDLED = 0x2008, FAST_IO_FAIL = 0x2009, + FAILOVER = 0x2010, }; /* diff --git a/include/scsi/scsi_driver.h b/include/scsi/scsi_driver.h index c0e89996bdb3f..85e792dc4db50 100644 --- a/include/scsi/scsi_driver.h +++ b/include/scsi/scsi_driver.h @@ -19,6 +19,9 @@ struct scsi_driver { int (*done)(struct scsi_cmnd *); int (*eh_action)(struct scsi_cmnd *, int); void (*eh_reset)(struct scsi_cmnd *); + #ifdef CONFIG_SCSI_MULTIPATH + struct mpath_disk *(*to_mpath_disk)(struct request *); + #endif }; #define to_scsi_driver(drv) \ container_of((drv), struct scsi_driver, gendrv) diff --git a/include/scsi/scsi_multipath.h b/include/scsi/scsi_multipath.h index 79e6860243e74..07db217edb085 100644 --- a/include/scsi/scsi_multipath.h +++ b/include/scsi/scsi_multipath.h @@ -43,6 +43,9 @@ struct scsi_mpath_device { #define to_scsi_mpath_device(d) \ container_of(d, struct scsi_mpath_device, mpath_device) +void scsi_mpath_failover_req(struct request *); +int scsi_mpath_failover_disposition(struct scsi_cmnd *); +bool scsi_is_mpath_request(struct request *req); int scsi_mpath_dev_alloc(struct scsi_device *sdev); void scsi_mpath_dev_release(struct scsi_device *sdev); int scsi_multipath_init(void); @@ -60,6 +63,17 @@ struct scsi_mpath_head { struct scsi_mpath_device { }; +static inline void scsi_mpath_failover_req(struct request *) +{ +} +static inline int scsi_mpath_failover_disposition(struct scsi_cmnd *) +{ + return 0; +} +static inline bool scsi_is_mpath_request(struct request *req) +{ + return false; +} static inline int scsi_mpath_dev_alloc(struct scsi_device *sdev) { return 0; -- 2.43.5 Add scsi_mpath_{start,end}_request() to handle updating private multipath request data, like nvme_mpath_{start,end}_request(). Since we may need to update mpath_disk data, add a callbacks in scsi_driver to actually do this work for the scsi driver. Signed-off-by: John Garry --- drivers/scsi/scsi_lib.c | 4 ++++ include/scsi/scsi_driver.h | 2 ++ include/scsi/scsi_multipath.h | 24 ++++++++++++++++++++++++ 3 files changed, 30 insertions(+) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 7ed0defc8161e..61179caa7b2c8 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -654,6 +654,8 @@ static bool scsi_end_request(struct request *req, blk_status_t error, */ destroy_rcu_head(&cmd->rcu); + scsi_mpath_end_request(req); + /* * In the MQ case the command gets freed by __blk_mq_end_request, * so we have to do all cleanup that depends on it earlier. @@ -1887,6 +1889,8 @@ static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx, memset(cmd->sense_buffer, 0, SCSI_SENSE_BUFFERSIZE); cmd->submitter = SUBMITTED_BY_BLOCK_LAYER; + scsi_mpath_start_request(req); + blk_mq_start_request(req); if (blk_mq_is_reserved_rq(req)) { reason = shost->hostt->queue_reserved_command(shost, cmd); diff --git a/include/scsi/scsi_driver.h b/include/scsi/scsi_driver.h index 85e792dc4db50..44e50229a75e7 100644 --- a/include/scsi/scsi_driver.h +++ b/include/scsi/scsi_driver.h @@ -20,6 +20,8 @@ struct scsi_driver { int (*eh_action)(struct scsi_cmnd *, int); void (*eh_reset)(struct scsi_cmnd *); #ifdef CONFIG_SCSI_MULTIPATH + void (*mpath_start_cmd)(struct scsi_cmnd *); + void (*mpath_end_cmd)(struct scsi_cmnd *); struct mpath_disk *(*to_mpath_disk)(struct request *); #endif }; diff --git a/include/scsi/scsi_multipath.h b/include/scsi/scsi_multipath.h index 07db217edb085..6cb3107260952 100644 --- a/include/scsi/scsi_multipath.h +++ b/include/scsi/scsi_multipath.h @@ -56,6 +56,23 @@ void scsi_mpath_add_sysfs_link(struct scsi_device *sdev); void scsi_mpath_remove_sysfs_link(struct scsi_device *sdev); int scsi_mpath_get_head(struct scsi_mpath_head *); void scsi_mpath_put_head(struct scsi_mpath_head *); + +static inline void scsi_mpath_start_request(struct request *req) +{ + struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req); + + if (is_mpath_request(req)) + scsi_cmd_to_driver(cmd)->mpath_start_cmd(cmd); +} + +static inline void scsi_mpath_end_request(struct request *req) +{ + struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req); + + if (is_mpath_request(req)) + scsi_cmd_to_driver(cmd)->mpath_start_cmd(cmd); +} + #else /* CONFIG_SCSI_MULTIPATH */ struct scsi_mpath_head { @@ -104,6 +121,13 @@ static inline void scsi_mpath_put_head(struct scsi_mpath_head *) { } +static inline void scsi_mpath_start_request(struct request *) +{ +} +static inline void scsi_mpath_end_request(struct request *) +{ +} + static inline void scsi_mpath_add_sysfs_link(struct scsi_device *sdev) { } -- 2.43.5 Add a callback for the scsi_mpath_ioctl.bdev_ioctl . Since this is concerned with the mpath_disk, we rely on the scsi_driver to handle the ioctl. Signed-off-by: John Garry --- drivers/scsi/scsi_multipath.c | 21 +++++++++++++++++++++ include/scsi/scsi_driver.h | 2 ++ 2 files changed, 23 insertions(+) diff --git a/drivers/scsi/scsi_multipath.c b/drivers/scsi/scsi_multipath.c index 16b1f84fc552c..36f13605b44e7 100644 --- a/drivers/scsi/scsi_multipath.c +++ b/drivers/scsi/scsi_multipath.c @@ -320,7 +320,28 @@ static enum mpath_iopolicy_e scsi_mpath_get_iopolicy(struct mpath_head *mpath_he return mpath_read_iopolicy(&scsi_mpath_head->iopolicy); } +static int scsi_mpath_ioctl(struct block_device *bdev, + struct mpath_device *mpath_device, + blk_mode_t mode, unsigned int cmd, + unsigned long arg, int srcu_idx) +{ + struct gendisk *disk = bdev->bd_disk; + struct mpath_disk *mpath_disk = mpath_gendisk_to_disk(disk); + struct mpath_head *mpath_head = mpath_disk->mpath_head; + struct scsi_mpath_device *scsi_mpath_dev = + to_scsi_mpath_device(mpath_device); + struct scsi_device *sdev = scsi_mpath_dev->sdev; + struct scsi_driver *drv = to_scsi_driver(sdev->sdev_gendev.driver); + int err; + + err = drv->mpath_ioctl(sdev, mode & BLK_OPEN_WRITE, cmd, arg); + + mpath_head_read_unlock(mpath_head, srcu_idx); + return err; +} + struct mpath_head_template smpdt_pr = { + .bdev_ioctl = scsi_mpath_ioctl, .get_iopolicy = scsi_mpath_get_iopolicy, .clone_bio = scsi_mpath_clone_bio, }; diff --git a/include/scsi/scsi_driver.h b/include/scsi/scsi_driver.h index 44e50229a75e7..799071b8bdee2 100644 --- a/include/scsi/scsi_driver.h +++ b/include/scsi/scsi_driver.h @@ -22,6 +22,8 @@ struct scsi_driver { #ifdef CONFIG_SCSI_MULTIPATH void (*mpath_start_cmd)(struct scsi_cmnd *); void (*mpath_end_cmd)(struct scsi_cmnd *); + int (*mpath_ioctl)(struct scsi_device *sdev, blk_mode_t mode, + unsigned int cmd, unsigned long arg); struct mpath_disk *(*to_mpath_disk)(struct request *); #endif }; -- 2.43.5 Until ALUA is supported, just always say that the path is optimized. In addition, just add basic scsi_device state tests for checking on path state. Signed-off-by: John Garry --- drivers/scsi/scsi_multipath.c | 45 +++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/drivers/scsi/scsi_multipath.c b/drivers/scsi/scsi_multipath.c index 36f13605b44e7..6aeac20a350ff 100644 --- a/drivers/scsi/scsi_multipath.c +++ b/drivers/scsi/scsi_multipath.c @@ -340,8 +340,53 @@ static int scsi_mpath_ioctl(struct block_device *bdev, return err; } +static bool scsi_mpath_is_disabled(struct mpath_device *mpath_device) +{ + struct scsi_mpath_device *scsi_mpath_dev = + to_scsi_mpath_device(mpath_device); + struct scsi_device *sdev = scsi_mpath_dev->sdev; + enum scsi_device_state sdev_state = sdev->sdev_state; + + if (sdev_state == SDEV_RUNNING || sdev_state == SDEV_CANCEL) + return false; + + return true; +} + +static bool scsi_mpath_is_optimized(struct mpath_device *mpath_device) +{ + if (scsi_mpath_is_disabled(mpath_device)) + return false; + return true; +} + +/* Until we have ALUA support, we're always optimised */ +static enum mpath_access_state scsi_mpath_get_access_state( + struct mpath_device *mpath_device) +{ + if (scsi_mpath_is_disabled(mpath_device)) + return MPATH_STATE_INVALID; + return MPATH_STATE_OPTIMIZED; +} + +static bool scsi_mpath_available_path(struct mpath_device *mpath_device, bool *available) +{ + struct scsi_mpath_device *scsi_mpath_dev = + to_scsi_mpath_device(mpath_device); + struct scsi_device *sdev = scsi_mpath_dev->sdev; + + if (scsi_device_blocked(sdev)) + return false; + + return scsi_device_online(sdev); +} + struct mpath_head_template smpdt_pr = { + .is_disabled = scsi_mpath_is_disabled, + .is_optimized = scsi_mpath_is_optimized, + .get_access_state = scsi_mpath_get_access_state, .bdev_ioctl = scsi_mpath_ioctl, + .available_path = scsi_mpath_available_path, .get_iopolicy = scsi_mpath_get_iopolicy, .clone_bio = scsi_mpath_clone_bio, }; -- 2.43.5 Set disk device_groups as mpath_device_groups, as this gives us the "multipath" syfs device groups. Signed-off-by: John Garry --- drivers/scsi/scsi_multipath.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/scsi_multipath.c b/drivers/scsi/scsi_multipath.c index 6aeac20a350ff..73afcbaf2d7de 100644 --- a/drivers/scsi/scsi_multipath.c +++ b/drivers/scsi/scsi_multipath.c @@ -389,6 +389,7 @@ struct mpath_head_template smpdt_pr = { .available_path = scsi_mpath_available_path, .get_iopolicy = scsi_mpath_get_iopolicy, .clone_bio = scsi_mpath_clone_bio, + .device_groups = mpath_device_groups, }; static struct scsi_mpath_head *scsi_mpath_alloc_head(void) -- 2.43.5 Add path_pr_ops structure with callbacks. Since PR ops are related to mpath_disk, add new structure type scsi_mpath_pr_ops, which allows PR ops be executed for a scsi_device. Since PR are related to mpath_disk, provide scsi_driver member to allow scsi_disk driver set it PR ops calllback. Signed-off-by: John Garry --- drivers/scsi/scsi_multipath.c | 110 ++++++++++++++++++++++++++++++++++ include/scsi/scsi_driver.h | 1 + include/scsi/scsi_multipath.h | 17 ++++++ 3 files changed, 128 insertions(+) diff --git a/drivers/scsi/scsi_multipath.c b/drivers/scsi/scsi_multipath.c index 73afcbaf2d7de..1489c7e979167 100644 --- a/drivers/scsi/scsi_multipath.c +++ b/drivers/scsi/scsi_multipath.c @@ -381,6 +381,115 @@ static bool scsi_mpath_available_path(struct mpath_device *mpath_device, bool *a return scsi_device_online(sdev); } +static int scsi_mpath_pr_register(struct mpath_device *mpath_device, + u64 old_key, u64 new_key, u32 flags) +{ + struct scsi_mpath_device *scsi_mpath_dev = + to_scsi_mpath_device(mpath_device); + struct scsi_device *sdev = scsi_mpath_dev->sdev; + struct scsi_driver *drv = to_scsi_driver(sdev->sdev_gendev.driver); + + if (!drv->mpath_pr_ops) + return -EOPNOTSUPP; + + return drv->mpath_pr_ops->pr_register(sdev, old_key, new_key, flags); +} + +static int scsi_mpath_pr_reserve(struct mpath_device *mpath_device, u64 key, + enum pr_type type, u32 flags) +{ + struct scsi_mpath_device *scsi_mpath_dev = + to_scsi_mpath_device(mpath_device); + struct scsi_device *sdev = scsi_mpath_dev->sdev; + struct scsi_driver *drv = to_scsi_driver(sdev->sdev_gendev.driver); + + if (!drv->mpath_pr_ops) + return -EOPNOTSUPP; + + return drv->mpath_pr_ops->pr_reserve(sdev, key, type, flags); +} + +static int scsi_mpath_pr_release(struct mpath_device *mpath_device, u64 key, + enum pr_type type) +{ + struct scsi_mpath_device *scsi_mpath_dev = + to_scsi_mpath_device(mpath_device); + struct scsi_device *sdev = scsi_mpath_dev->sdev; + struct scsi_driver *drv = to_scsi_driver(sdev->sdev_gendev.driver); + + if (!drv->mpath_pr_ops) + return -EOPNOTSUPP; + + return drv->mpath_pr_ops->pr_release(sdev, key, type); +} + +static int scsi_mpath_pr_preempt(struct mpath_device *mpath_device, + u64 old_key, u64 new_key, enum pr_type type, + bool abort) +{ + struct scsi_mpath_device *scsi_mpath_dev = + to_scsi_mpath_device(mpath_device); + struct scsi_device *sdev = scsi_mpath_dev->sdev; + struct scsi_driver *drv = to_scsi_driver(sdev->sdev_gendev.driver); + + if (!drv->mpath_pr_ops) + return -EOPNOTSUPP; + + return drv->mpath_pr_ops->pr_preempt(sdev, old_key, new_key, + type, abort); +} + +static int scsi_mpath_pr_clear(struct mpath_device *mpath_device, u64 key) +{ + struct scsi_mpath_device *scsi_mpath_dev = + to_scsi_mpath_device(mpath_device); + struct scsi_device *sdev = scsi_mpath_dev->sdev; + struct scsi_driver *drv = to_scsi_driver(sdev->sdev_gendev.driver); + + if (!drv->mpath_pr_ops) + return -EOPNOTSUPP; + + return drv->mpath_pr_ops->pr_clear(sdev, key); +} + +static int scsi_mpath_pr_read_keys(struct mpath_device *mpath_device, + struct pr_keys *keys_info) +{ + struct scsi_mpath_device *scsi_mpath_dev = + to_scsi_mpath_device(mpath_device); + struct scsi_device *sdev = scsi_mpath_dev->sdev; + struct scsi_driver *drv = to_scsi_driver(sdev->sdev_gendev.driver); + + if (!drv->mpath_pr_ops) + return -EOPNOTSUPP; + + return drv->mpath_pr_ops->pr_read_keys(sdev, keys_info); +} + +static int scsi_mpath_pr_read_reservation(struct mpath_device *mpath_device, + struct pr_held_reservation *rsv) +{ + struct scsi_mpath_device *scsi_mpath_dev = + to_scsi_mpath_device(mpath_device); + struct scsi_device *sdev = scsi_mpath_dev->sdev; + struct scsi_driver *drv = to_scsi_driver(sdev->sdev_gendev.driver); + + if (!drv->mpath_pr_ops) + return -EOPNOTSUPP; + + return drv->mpath_pr_ops->pr_read_reservation(sdev, rsv); +} + +static const struct mpath_pr_ops scsi_mpath_pr_ops = { + .pr_register = scsi_mpath_pr_register, + .pr_reserve = scsi_mpath_pr_reserve, + .pr_release = scsi_mpath_pr_release, + .pr_preempt = scsi_mpath_pr_preempt, + .pr_clear = scsi_mpath_pr_clear, + .pr_read_keys = scsi_mpath_pr_read_keys, + .pr_read_reservation = scsi_mpath_pr_read_reservation, +}; + struct mpath_head_template smpdt_pr = { .is_disabled = scsi_mpath_is_disabled, .is_optimized = scsi_mpath_is_optimized, @@ -389,6 +498,7 @@ struct mpath_head_template smpdt_pr = { .available_path = scsi_mpath_available_path, .get_iopolicy = scsi_mpath_get_iopolicy, .clone_bio = scsi_mpath_clone_bio, + .pr_ops = &scsi_mpath_pr_ops, .device_groups = mpath_device_groups, }; diff --git a/include/scsi/scsi_driver.h b/include/scsi/scsi_driver.h index 799071b8bdee2..2aaa5d270d818 100644 --- a/include/scsi/scsi_driver.h +++ b/include/scsi/scsi_driver.h @@ -25,6 +25,7 @@ struct scsi_driver { int (*mpath_ioctl)(struct scsi_device *sdev, blk_mode_t mode, unsigned int cmd, unsigned long arg); struct mpath_disk *(*to_mpath_disk)(struct request *); + const struct scsi_mpath_pr_ops *mpath_pr_ops; #endif }; #define to_scsi_driver(drv) \ diff --git a/include/scsi/scsi_multipath.h b/include/scsi/scsi_multipath.h index 6cb3107260952..cb63c6536b854 100644 --- a/include/scsi/scsi_multipath.h +++ b/include/scsi/scsi_multipath.h @@ -40,6 +40,23 @@ struct scsi_mpath_device { char device_id_str[SCSI_MPATH_DEVICE_ID_LEN]; }; + +struct scsi_mpath_pr_ops { + int (*pr_register)(struct scsi_device *, u64 old_key, + u64 new_key, u32 flags); + int (*pr_reserve)(struct scsi_device *e, u64 key, + enum pr_type type, u32 flags); + int (*pr_release)(struct scsi_device *, u64 key, + enum pr_type type); + int (*pr_preempt)(struct scsi_device *, u64 old_key, + u64 new_key, enum pr_type type, bool abort); + int (*pr_clear)(struct scsi_device *, u64 key); + int (*pr_read_keys)(struct scsi_device *, + struct pr_keys *keys_info); + int (*pr_read_reservation)(struct scsi_device *, + struct pr_held_reservation *rsv); +}; + #define to_scsi_mpath_device(d) \ container_of(d, struct scsi_mpath_device, mpath_device) -- 2.43.5 Refactor each PR op into a helper which accepts a scsi_disk, as this is require for supporting scsi-multipath PR ops. For scsi-multipath, the multipath PR ops are passed a mpath_device, which can be converted to a scsi_disk pointer (which we are providing an API for here). Signed-off-by: John Garry --- drivers/scsi/sd.c | 88 ++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 71 insertions(+), 17 deletions(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index f50b92e632018..cea3ab54c4417 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -1958,10 +1958,9 @@ static int sd_scsi_to_pr_err(struct scsi_sense_hdr *sshdr, int result) } } -static int sd_pr_in_command(struct block_device *bdev, u8 sa, +static int sd_pr_in_command(struct scsi_disk *sdkp, u8 sa, unsigned char *data, int data_len) { - struct scsi_disk *sdkp = scsi_disk(bdev->bd_disk); struct scsi_device *sdev = sdkp->device; struct scsi_sense_hdr sshdr; u8 cmd[10] = { PERSISTENT_RESERVE_IN, sa }; @@ -2000,7 +1999,8 @@ static int sd_pr_in_command(struct block_device *bdev, u8 sa, return sd_scsi_to_pr_err(&sshdr, result); } -static int sd_pr_read_keys(struct block_device *bdev, struct pr_keys *keys_info) +static int sd_pr_read_keys_disk(struct scsi_disk *sdkp, + struct pr_keys *keys_info) { int result, i, data_offset, num_copy_keys; u32 num_keys = keys_info->num_keys; @@ -2021,7 +2021,7 @@ static int sd_pr_read_keys(struct block_device *bdev, struct pr_keys *keys_info) if (!data) return -ENOMEM; - result = sd_pr_in_command(bdev, READ_KEYS, data, data_len); + result = sd_pr_in_command(sdkp, READ_KEYS, data, data_len); if (result) goto free_data; @@ -2041,15 +2041,22 @@ static int sd_pr_read_keys(struct block_device *bdev, struct pr_keys *keys_info) return result; } -static int sd_pr_read_reservation(struct block_device *bdev, - struct pr_held_reservation *rsv) +static int sd_pr_read_keys(struct block_device *bdev, + struct pr_keys *keys_info) { struct scsi_disk *sdkp = scsi_disk(bdev->bd_disk); + + return sd_pr_read_keys_disk(sdkp, keys_info); +} + +static int sd_pr_read_reservation_disk(struct scsi_disk *sdkp, + struct pr_held_reservation *rsv) +{ struct scsi_device *sdev = sdkp->device; u8 data[24] = { }; int result, len; - result = sd_pr_in_command(bdev, READ_RESERVATION, data, sizeof(data)); + result = sd_pr_in_command(sdkp, READ_RESERVATION, data, sizeof(data)); if (result) return result; @@ -2070,11 +2077,17 @@ static int sd_pr_read_reservation(struct block_device *bdev, rsv->type = scsi_pr_type_to_block(data[21] & 0x0f); return 0; } +static int sd_pr_read_reservation(struct block_device *bdev, + struct pr_held_reservation *rsv) +{ + struct scsi_disk *sdkp = scsi_disk(bdev->bd_disk); -static int sd_pr_out_command(struct block_device *bdev, u8 sa, u64 key, + return sd_pr_read_reservation_disk(sdkp, rsv); +} + +static int sd_pr_out_command(struct scsi_disk *sdkp, u8 sa, u64 key, u64 sa_key, enum scsi_pr_type type, u8 flags) { - struct scsi_disk *sdkp = scsi_disk(bdev->bd_disk); struct scsi_device *sdev = sdkp->device; struct scsi_sense_hdr sshdr; struct scsi_failure failure_defs[] = { @@ -2123,41 +2136,82 @@ static int sd_pr_out_command(struct block_device *bdev, u8 sa, u64 key, return sd_scsi_to_pr_err(&sshdr, result); } -static int sd_pr_register(struct block_device *bdev, u64 old_key, u64 new_key, +static int sd_pr_register_disk(struct scsi_disk *sdkp, u64 old_key, u64 new_key, u32 flags) { if (flags & ~PR_FL_IGNORE_KEY) return -EOPNOTSUPP; - return sd_pr_out_command(bdev, (flags & PR_FL_IGNORE_KEY) ? 0x06 : 0x00, + + return sd_pr_out_command(sdkp, + (flags & PR_FL_IGNORE_KEY) ? 0x06 : 0x00, old_key, new_key, 0, (1 << 0) /* APTPL */); } -static int sd_pr_reserve(struct block_device *bdev, u64 key, enum pr_type type, +static int sd_pr_register(struct block_device *bdev, u64 old_key, u64 new_key, u32 flags) +{ + struct scsi_disk *sdkp = scsi_disk(bdev->bd_disk); + + return sd_pr_register_disk(sdkp, old_key, new_key, flags); +} + +static int sd_pr_reserve_disk(struct scsi_disk *sdkp, u64 key, + enum pr_type type, u32 flags) { if (flags) return -EOPNOTSUPP; - return sd_pr_out_command(bdev, 0x01, key, 0, + return sd_pr_out_command(sdkp, 0x01, key, 0, + block_pr_type_to_scsi(type), 0); +} + +static int sd_pr_reserve(struct block_device *bdev, u64 key, + enum pr_type type, u32 flags) +{ + struct scsi_disk *sdkp = scsi_disk(bdev->bd_disk); + + return sd_pr_reserve_disk(sdkp, key, type, flags); +} + +static int sd_pr_release_disk(struct scsi_disk *sdkp, u64 key, + enum pr_type type) +{ + return sd_pr_out_command(sdkp, 0x02, key, 0, block_pr_type_to_scsi(type), 0); } static int sd_pr_release(struct block_device *bdev, u64 key, enum pr_type type) { - return sd_pr_out_command(bdev, 0x02, key, 0, + struct scsi_disk *sdkp = scsi_disk(bdev->bd_disk); + + return sd_pr_release_disk(sdkp, key, type); +} + +static int sd_pr_preempt_disk(struct scsi_disk *sdkp, u64 old_key, u64 new_key, + enum pr_type type, bool abort) +{ + return sd_pr_out_command(sdkp, abort ? 0x05 : 0x04, old_key, new_key, block_pr_type_to_scsi(type), 0); } static int sd_pr_preempt(struct block_device *bdev, u64 old_key, u64 new_key, enum pr_type type, bool abort) { - return sd_pr_out_command(bdev, abort ? 0x05 : 0x04, old_key, new_key, - block_pr_type_to_scsi(type), 0); + struct scsi_disk *sdkp = scsi_disk(bdev->bd_disk); + + return sd_pr_preempt_disk(sdkp, old_key, new_key, type, abort); +} + +static int sd_pr_clear_disk(struct scsi_disk *sdkp, u64 key) +{ + return sd_pr_out_command(sdkp, 0x03, key, 0, 0, 0); } static int sd_pr_clear(struct block_device *bdev, u64 key) { - return sd_pr_out_command(bdev, 0x03, key, 0, 0, 0); + struct scsi_disk *sdkp = scsi_disk(bdev->bd_disk); + + return sd_pr_clear_disk(sdkp, key); } static const struct pr_ops sd_pr_ops = { -- 2.43.5 Add a new class, sd_mpath_disk_class, which is the multipath version of the scsi_disk class. Structure sd_mpath_disk is introduced to manage the mpath_disk. Signed-off-by: John Garry --- drivers/scsi/sd.c | 43 ++++++++++++++++++++++++++++++++++++++++++- drivers/scsi/sd.h | 3 +++ 2 files changed, 45 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index cea3ab54c4417..222e28ed44e9b 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -70,6 +70,7 @@ #include #include #include +#include #include "sd.h" #include "scsi_priv.h" @@ -115,6 +116,39 @@ static DEFINE_IDA(sd_index_ida); static mempool_t *sd_page_pool; static struct lock_class_key sd_bio_compl_lkclass; +#ifdef CONFIG_SCSI_MULTIPATH +struct sd_mpath_disk { + struct mpath_disk *mpath_disk; +}; + +static void sd_mpath_disk_release(struct device *dev) +{ +} + +static const struct class sd_mpath_disk_class = { + .name = "scsi_mpath_disk", + .dev_release = sd_mpath_disk_release, +}; + +static int sd_mpath_class_register(void) +{ + return class_register(&sd_mpath_disk_class); +} + +static void sd_mpath_class_unregister(void) +{ + class_unregister(&sd_mpath_disk_class); +} +#else /* CONFIG_SCSI_MULTIPATH */ +static int sd_mpath_class_register(void) +{ + return 0; +} + +static void sd_mpath_class_unregister(void) +{ +} +#endif static const char *sd_cache_types[] = { "write through", "none", "write back", @@ -4464,11 +4498,15 @@ static int __init init_sd(void) if (err) goto err_out; + err = sd_mpath_class_register(); + if (err) + goto err_out_class; + sd_page_pool = mempool_create_page_pool(SD_MEMPOOL_SIZE, 0); if (!sd_page_pool) { printk(KERN_ERR "sd: can't init discard page pool\n"); err = -ENOMEM; - goto err_out_class; + goto err_out_mpath_class; } err = scsi_register_driver(&sd_template.gendrv); @@ -4479,6 +4517,8 @@ static int __init init_sd(void) err_out_driver: mempool_destroy(sd_page_pool); +err_out_mpath_class: + sd_mpath_class_unregister(); err_out_class: class_unregister(&sd_disk_class); err_out: @@ -4502,6 +4542,7 @@ static void __exit exit_sd(void) mempool_destroy(sd_page_pool); class_unregister(&sd_disk_class); + sd_mpath_class_unregister(); for (i = 0; i < SD_MAJORS; i++) unregister_blkdev(sd_major(i), "sd"); diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h index 574af82430169..304b24644d942 100644 --- a/drivers/scsi/sd.h +++ b/drivers/scsi/sd.h @@ -83,6 +83,9 @@ struct zoned_disk_info { struct scsi_disk { struct scsi_device *device; + #ifdef CONFIG_SCSI_MULTIPATH + struct sd_mpath_disk *sd_mpath_disk; + #endif /* * disk_dev is used to show attributes in /sys/class/scsi_disk/, -- 2.43.5 These do the same as nvme_mpath_{start,end}_request() Signed-off-by: John Garry --- drivers/scsi/sd.c | 55 +++++++++++++++++++++++++++++++++++ include/scsi/scsi_cmnd.h | 5 ++++ include/scsi/scsi_multipath.h | 1 + 3 files changed, 61 insertions(+) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 222e28ed44e9b..845d392456549 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -1546,6 +1546,57 @@ static void sd_uninit_command(struct scsi_cmnd *SCpnt) mempool_free(rq->special_vec.bv_page, sd_page_pool); } +#ifdef CONFIG_SCSI_MULTIPATH +static void sd_mpath_start_command(struct scsi_cmnd *scmd) +{ + struct request *req = scsi_cmd_to_rq(scmd); + struct scsi_disk *sdkp = scsi_disk(req->q->disk); + struct sd_mpath_disk *sd_mpath_disk = sdkp->sd_mpath_disk; + struct mpath_disk *mpath_disk = sd_mpath_disk->mpath_disk; + struct scsi_device *sdev = scmd->device; + struct mpath_head *mpath_head = mpath_disk->mpath_head; + struct scsi_mpath_head *scsi_mpath_head = mpath_head->drvdata; + struct gendisk *disk = mpath_disk->disk; + + if (mpath_qd_iopolicy(&scsi_mpath_head->iopolicy) && + !(scmd->flags & SCMD_MPATH_CNT_ACTIVE)) { + struct scsi_mpath_device *scsi_mpath_dev = sdev->scsi_mpath_dev; + + atomic_inc(&scsi_mpath_dev->nr_active); + scmd->flags |= SCMD_MPATH_CNT_ACTIVE; + } + + if (!blk_queue_io_stat(disk->queue) || blk_rq_is_passthrough(req) || + (scmd->flags & SCMD_MPATH_IO_STATS)) + return; + + scmd->flags |= SCMD_MPATH_IO_STATS; + scmd->start_time = bdev_start_io_acct(disk->part0, req_op(req), + jiffies); +} + +static void sd_mpath_end_command(struct scsi_cmnd *scmd) +{ + struct request *req = scsi_cmd_to_rq(scmd); + struct scsi_disk *sdkp = scsi_disk(req->q->disk); + struct sd_mpath_disk *sd_mpath_disk = sdkp->sd_mpath_disk; + struct mpath_disk *mpath_disk = sd_mpath_disk->mpath_disk; + struct scsi_device *sdev = scmd->device; + + if (scmd->flags & SCMD_MPATH_CNT_ACTIVE) { + struct scsi_mpath_device *scsi_mpath_dev = sdev->scsi_mpath_dev; + + atomic_dec_if_positive(&scsi_mpath_dev->nr_active); + } + + if (!(scmd->flags & SCMD_MPATH_IO_STATS)) + return; + bdev_end_io_acct(mpath_disk->disk->part0, req_op(req), + blk_rq_bytes(req) >> SECTOR_SHIFT, + scmd->start_time); +} +#endif + static bool sd_need_revalidate(struct gendisk *disk, struct scsi_disk *sdkp) { if (sdkp->device->removable || sdkp->write_prot) { @@ -4468,6 +4519,10 @@ static struct scsi_driver sd_template = { .resume = sd_resume, .init_command = sd_init_command, .uninit_command = sd_uninit_command, + #ifdef CONFIG_SCSI_MULTIPATH + .mpath_start_cmd = sd_mpath_start_command, + .mpath_end_cmd = sd_mpath_end_command, + #endif .done = sd_done, .eh_action = sd_eh_action, .eh_reset = sd_eh_reset, diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h index 8ecfb94049db5..c6571a36e577b 100644 --- a/include/scsi/scsi_cmnd.h +++ b/include/scsi/scsi_cmnd.h @@ -60,6 +60,8 @@ struct scsi_pointer { #define SCMD_FAIL_IF_RECOVERING (1 << 4) /* flags preserved across unprep / reprep */ #define SCMD_PRESERVED_FLAGS (SCMD_INITIALIZED | SCMD_FAIL_IF_RECOVERING) +#define SCMD_MPATH_IO_STATS (1 << 5) +#define SCMD_MPATH_CNT_ACTIVE (1 << 6) /* for scmd->state */ #define SCMD_STATE_COMPLETE 0 @@ -139,6 +141,9 @@ struct scsi_cmnd { * to release this memory. (The memory * obtained by scsi_malloc is guaranteed * to be at an address < 16Mb). */ + #ifdef CONFIG_SCSI_MULTIPATH + unsigned long start_time; + #endif int result; /* Status code from lower level driver */ }; diff --git a/include/scsi/scsi_multipath.h b/include/scsi/scsi_multipath.h index cb63c6536b854..2011447f482d6 100644 --- a/include/scsi/scsi_multipath.h +++ b/include/scsi/scsi_multipath.h @@ -36,6 +36,7 @@ struct scsi_mpath_device { struct mpath_device mpath_device; struct scsi_device *sdev; int index; + atomic_t nr_active; struct scsi_mpath_head *scsi_mpath_head; char device_id_str[SCSI_MPATH_DEVICE_ID_LEN]; -- 2.43.5 Add scsi_driver.mpath_ioctl callback. Signed-off-by: John Garry --- drivers/scsi/sd.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 845d392456549..b807452a4bdc3 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -4070,6 +4070,19 @@ static int sd_format_disk_name(char *prefix, int index, char *buf, int buflen) return 0; } +#ifdef CONFIG_SCSI_MULTIPATH +static int sd_mpath_ioctl(struct scsi_device *sdp, blk_mode_t mode, + unsigned int cmd, unsigned long arg) +{ + struct scsi_disk *sdkp = dev_get_drvdata(&sdp->sdev_gendev); + struct gendisk *disk = sdkp->disk; + struct block_device *bdev = disk->part0; + + return sd_ioctl(bdev, mode, cmd, arg); +} + +#else /* CONFIG_SCSI_MULTIPATH */ +#endif /** * sd_probe - called during driver initialization and whenever a * new scsi device is attached to the system. It is called once @@ -4522,6 +4535,7 @@ static struct scsi_driver sd_template = { #ifdef CONFIG_SCSI_MULTIPATH .mpath_start_cmd = sd_mpath_start_command, .mpath_end_cmd = sd_mpath_end_command, + .mpath_ioctl = sd_mpath_ioctl, #endif .done = sd_done, .eh_action = sd_eh_action, -- 2.43.5 Add sd_mpath_pr_ops, which callbacks which call into disk variant of sd PR ops. Signed-off-by: John Garry --- drivers/scsi/sd.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index b807452a4bdc3..f94a3b696dcab 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -4081,6 +4081,61 @@ static int sd_mpath_ioctl(struct scsi_device *sdp, blk_mode_t mode, return sd_ioctl(bdev, mode, cmd, arg); } +static int sd_mpath_pr_register(struct scsi_device *sdp, u64 old_key, + u64 new_key, u32 flags) +{ + return sd_pr_register_disk(dev_get_drvdata(&sdp->sdev_gendev), old_key, + new_key, flags); +} + +static int sd_mpath_pr_reserve(struct scsi_device *sdp, u64 key, + enum pr_type type, u32 flags) +{ + return sd_pr_reserve_disk(dev_get_drvdata(&sdp->sdev_gendev), key, type, + flags); +} + +static int sd_mpath_pr_release(struct scsi_device *sdp, u64 key, + enum pr_type type) +{ + return sd_pr_release_disk(dev_get_drvdata(&sdp->sdev_gendev), key, type); +} + +static int sd_mpath_pr_preempt(struct scsi_device *sdp, u64 old, + u64 new, enum pr_type type, bool abort) +{ + return sd_pr_preempt_disk(dev_get_drvdata(&sdp->sdev_gendev), old, new, + type, abort); +} + +static int sd_mpath_pr_clear(struct scsi_device *sdp, u64 key) +{ + return sd_pr_clear_disk(dev_get_drvdata(&sdp->sdev_gendev), key); +} + +static int sd_mpath_pr_read_keys(struct scsi_device *sdp, + struct pr_keys *keys_info) +{ + return sd_pr_read_keys_disk(dev_get_drvdata(&sdp->sdev_gendev), + keys_info); +} + +static int sd_mpath_pr_read_reservation(struct scsi_device *sdp, + struct pr_held_reservation *resv) +{ + return sd_pr_read_reservation_disk(dev_get_drvdata(&sdp->sdev_gendev), + resv); +} + +static const struct scsi_mpath_pr_ops sd_mpath_pr_ops = { + .pr_register = sd_mpath_pr_register, + .pr_reserve = sd_mpath_pr_reserve, + .pr_release = sd_mpath_pr_release, + .pr_preempt = sd_mpath_pr_preempt, + .pr_clear = sd_mpath_pr_clear, + .pr_read_keys = sd_mpath_pr_read_keys, + .pr_read_reservation = sd_mpath_pr_read_reservation, +}; #else /* CONFIG_SCSI_MULTIPATH */ #endif /** @@ -4536,6 +4591,7 @@ static struct scsi_driver sd_template = { .mpath_start_cmd = sd_mpath_start_command, .mpath_end_cmd = sd_mpath_end_command, .mpath_ioctl = sd_mpath_ioctl, + .mpath_pr_ops = &sd_mpath_pr_ops, #endif .done = sd_done, .eh_action = sd_eh_action, -- 2.43.5 Add a function to find associated mpath_disk for a request. Signed-off-by: John Garry --- drivers/scsi/sd.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index f94a3b696dcab..9617878b53ec6 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -4081,6 +4081,14 @@ static int sd_mpath_ioctl(struct scsi_device *sdp, blk_mode_t mode, return sd_ioctl(bdev, mode, cmd, arg); } +static struct mpath_disk *sd_mpath_to_disk(struct request *req) +{ + struct scsi_disk *sdkp = req->part->bd_disk->private_data; + struct sd_mpath_disk *sd_mpath_disk = sdkp->sd_mpath_disk; + + return sd_mpath_disk->mpath_disk; +} + static int sd_mpath_pr_register(struct scsi_device *sdp, u64 old_key, u64 new_key, u32 flags) { @@ -4592,6 +4600,7 @@ static struct scsi_driver sd_template = { .mpath_end_cmd = sd_mpath_end_command, .mpath_ioctl = sd_mpath_ioctl, .mpath_pr_ops = &sd_mpath_pr_ops, + .to_mpath_disk = sd_mpath_to_disk, #endif .done = sd_done, .eh_action = sd_eh_action, -- 2.43.5 Add support to attach a multipath disk. We still allocate the gendisk per path, and this is required for the per-path submission. However, those gendisks are marked as hidden. Those disks are named sdX:Y, where X is the multipath disk index and Y is the per-path index. A global list of sd_mpath_disks is kept for matching scsi_device's. The multipath gendisk has the name and disk->major/minor set to minic a scsi_disk. The following is an example of relevant scsi_disk and block sysfs directories: $ ls -l /sys/block/ | grep sdc lrwxrwxrwx 1 root root 0 Feb 24 16:01 sdc -> ../devices/virtual/scsi_mpath_disk/0/sdc lrwxrwxrwx 1 root root 0 Feb 24 16:01 sdc:0 -> ../devices/platform/host8/session1/target8:0:0/8:0:0:0/block/sdc:0 lrwxrwxrwx 1 root root 0 Feb 24 16:02 sdc:1 -> ../devices/platform/host9/session2/target9:0:0/9:0:0:0/block/sdc:1 $ ls -l /sys/class/scsi_mpath_disk/0/ total 0 drwxr-xr-x 2 root root 0 Feb 24 16:03 power drwxr-xr-x 11 root root 0 Feb 24 16:01 sdc lrwxrwxrwx 1 root root 0 Feb 24 16:01 subsystem -> ../../../../class/scsi_mpath_disk -rw-r--r-- 1 root root 4096 Feb 24 16:01 uevent $ ls -l /sys/class/scsi_mpath_disk/0/sdc/multipath/ total 0 lrwxrwxrwx 1 root root 0 Feb 24 16:20 sdc:0 -> ../../../../../platform/host8/session1/target8:0:0/8:0:0:0/block/sdc:0 lrwxrwxrwx 1 root root 0 Feb 24 16:20 sdc:1 -> ../../../../../platform/host9/session2/target9:0:0/9:0:0:0/block/sdc:1 $ ls -l /dev/sdc* brw-rw---- 1 root disk 8, 32 Feb 24 16:01 /dev/sdc brw-rw---- 1 root disk 8, 33 Feb 24 16:01 /dev/sdc1 brw-rw---- 1 root disk 8, 34 Feb 24 16:01 /dev/sdc2 $ lsblk /dev/sdc NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINTS sdc 8:32 0 600M 0 disk |-sdc1 8:33 0 9M 0 part `-sdc2 8:34 0 568M 0 part Signed-off-by: John Garry --- drivers/scsi/sd.c | 376 +++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 358 insertions(+), 18 deletions(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 9617878b53ec6..409c0937764d9 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -117,12 +117,33 @@ static DEFINE_IDA(sd_index_ida); static mempool_t *sd_page_pool; static struct lock_class_key sd_bio_compl_lkclass; #ifdef CONFIG_SCSI_MULTIPATH +static LIST_HEAD(sd_mpath_disks_list); +static DEFINE_MUTEX(sd_mpath_disks_lock); + struct sd_mpath_disk { + struct device dev; + int disk_index; + int disk_count; + struct list_head entry; + struct mutex lock; struct mpath_disk *mpath_disk; + struct scsi_mpath_head *scsi_mpath_head; }; static void sd_mpath_disk_release(struct device *dev) { + struct sd_mpath_disk *sd_mpath_disk = + container_of(dev, struct sd_mpath_disk, dev); + struct scsi_mpath_head *scsi_mpath_head = + sd_mpath_disk->scsi_mpath_head; + struct mpath_disk *mpath_disk = sd_mpath_disk->mpath_disk; + + mpath_put_disk(mpath_disk); + + ida_free(&sd_index_ida, sd_mpath_disk->disk_index); + scsi_mpath_put_head(scsi_mpath_head); + + kfree(sd_mpath_disk); } static const struct class sd_mpath_disk_class = { @@ -4144,7 +4165,302 @@ static const struct scsi_mpath_pr_ops sd_mpath_pr_ops = { .pr_read_keys = sd_mpath_pr_read_keys, .pr_read_reservation = sd_mpath_pr_read_reservation, }; + +static int sd_mpath_revalidate_head(struct scsi_disk *sdkp) +{ + struct sd_mpath_disk *sd_mpath_disk = sdkp->sd_mpath_disk; + struct mpath_disk *mpath_disk = sd_mpath_disk->mpath_disk;; + struct gendisk *disk = mpath_disk->disk; + struct queue_limits *sdkp_lim = &sdkp->disk->queue->limits; + struct queue_limits lim; + unsigned int memflags; + int ret; + + lim = queue_limits_start_update(disk->queue); + memflags = blk_mq_freeze_queue(disk->queue); + + lim.logical_block_size = sdkp_lim->logical_block_size; + lim.physical_block_size = sdkp_lim->physical_block_size; + lim.io_min = sdkp_lim->io_min; + lim.io_opt = sdkp_lim->io_opt; + + queue_limits_stack_bdev(&lim, sdkp->disk->part0, 0, + disk->disk_name); + + /* TODO: setup integrity limits */ + lim.max_write_streams = sdkp_lim->max_write_streams; + lim.write_stream_granularity = sdkp_lim->write_stream_granularity; + ret = queue_limits_commit_update(disk->queue, &lim); + + set_capacity_and_notify(disk, get_capacity(sdkp->disk)); + + blk_mq_unfreeze_queue(disk->queue, memflags); + + return ret; +} +static int sd_mpath_get_disk(struct sd_mpath_disk *sd_mpath_disk) +{ + if (!get_device(&sd_mpath_disk->dev)) + return -ENXIO; + return 0; +} + +static void sd_mpath_put_disk(struct sd_mpath_disk *sd_mpath_disk) +{ + put_device(&sd_mpath_disk->dev); +} + +static struct sd_mpath_disk *sd_mpath_find_disk(struct scsi_device *sdp) +{ + struct scsi_mpath_device *scsi_mpath_dev = sdp->scsi_mpath_dev; + struct sd_mpath_disk *sd_mpath_disk; + int ret; + + mutex_lock(&sd_mpath_disks_lock); + list_for_each_entry(sd_mpath_disk, &sd_mpath_disks_list, entry) { + struct scsi_mpath_head *scsi_mpath_head; + struct mpath_disk *mpath_disk; + struct mpath_head *mpath_head; + + ret = sd_mpath_get_disk(sd_mpath_disk); + if (ret) + continue; + mpath_disk = sd_mpath_disk->mpath_disk; + mpath_head = mpath_disk->mpath_head; + scsi_mpath_head = mpath_head->drvdata; + + if (strncmp(scsi_mpath_head->wwid, + scsi_mpath_dev->device_id_str, + SCSI_MPATH_DEVICE_ID_LEN) == 0) { + + mutex_unlock(&sd_mpath_disks_lock); + return sd_mpath_disk; + } + sd_mpath_put_disk(sd_mpath_disk); + } + + return NULL; +} + +static void sd_mpath_add_disk(struct scsi_disk *sdkp) +{ + struct scsi_device *sdp = sdkp->device; + struct scsi_mpath_device *scsi_mpath_dev = sdp->scsi_mpath_dev; + struct mpath_device *mpath_device = &scsi_mpath_dev->mpath_device; + struct sd_mpath_disk *sd_mpath_disk = sdkp->sd_mpath_disk; + struct mpath_disk *mpath_disk = sd_mpath_disk->mpath_disk; + struct mpath_head *mpath_head = mpath_disk->mpath_head; + + mpath_device->disk = sdkp->disk; + mpath_add_device(mpath_head, mpath_device); + mpath_device_set_live(mpath_disk, mpath_device); +} + +static int sd_mpath_probe(struct scsi_disk *sdkp) +{ + struct scsi_device *sdp = sdkp->device; + struct scsi_mpath_device *scsi_mpath_dev = sdp->scsi_mpath_dev; + struct device *dma_dev = sdp->host->dma_dev; + struct scsi_mpath_head *scsi_mpath_head = + scsi_mpath_dev->scsi_mpath_head; + struct sd_mpath_disk *sd_mpath_disk; + struct mpath_head *mpath_head = scsi_mpath_head->mpath_head; + struct queue_limits lim; + struct gendisk *disk; + int error; + + /* + * sd_mpath_disks_list is kept locked if no disk found. + * Otherwise an extra reference is taken. + */ + sd_mpath_disk = sd_mpath_find_disk(sdp); + if (sd_mpath_disk) { + mutex_lock(&sd_mpath_disk->lock); + sd_mpath_disk->disk_count++; + mutex_unlock(&sd_mpath_disk->lock); + goto found; + } + + sd_mpath_disk = kzalloc(sizeof(*sd_mpath_disk), GFP_KERNEL); + if (!sd_mpath_disk) { + error = -ENOMEM; + goto out_unlock; + } + + sd_mpath_disk->scsi_mpath_head = scsi_mpath_head; + device_initialize(&sd_mpath_disk->dev); + mutex_init(&sd_mpath_disk->lock); + sd_mpath_disk->dev.class = &sd_mpath_disk_class; + + blk_set_stacking_limits(&lim); + lim.dma_alignment = 3; + lim.features |= BLK_FEAT_IO_STAT | BLK_FEAT_NOWAIT | + BLK_FEAT_POLL | BLK_FEAT_ATOMIC_WRITES; + + sd_mpath_disk->mpath_disk = mpath_alloc_head_disk(&lim, + dev_to_node(dma_dev)); + if (!sd_mpath_disk->mpath_disk) { + error = -ENOMEM; + goto out_free_disk; + } + disk = sd_mpath_disk->mpath_disk->disk; + mpath_get_head(mpath_head); /* undone in mpath_free_disk() */ + + sd_mpath_disk->mpath_disk->mpath_head = mpath_head; + sd_mpath_disk->mpath_disk->parent = &sd_mpath_disk->dev; + + error = ida_alloc(&sd_index_ida, GFP_KERNEL); + if (error < 0) { + sdev_printk(KERN_WARNING, sdp, "sd_probe: memory exhausted.\n"); + goto out_put_disk; + } + sd_mpath_disk->disk_index = error; + error = sd_format_disk_name("sd", sd_mpath_disk->disk_index, + disk->disk_name, DISK_NAME_LEN); + if (error) + goto out_free_index; + + error = dev_set_name(&sd_mpath_disk->dev, "%s", + dev_name(&scsi_mpath_head->dev)); + if (error) + goto out_free_index; + + /* undone in sd_mpath_disk_release() */ + scsi_mpath_get_head(scsi_mpath_head); + + error = device_add(&sd_mpath_disk->dev); + if (error) { + put_device(&sd_mpath_disk->dev); + goto out_unlock; + } + + list_add_tail(&sd_mpath_disk->entry, &sd_mpath_disks_list); + disk->major = sd_major((sd_mpath_disk->disk_index & 0xf0) >> 4); + disk->first_minor = ((sd_mpath_disk->disk_index & 0xf) << 4) | + (sd_mpath_disk->disk_index & 0xfff00); + disk->minors = SD_MINORS; + + sd_mpath_disk->disk_count = 1; + mutex_unlock(&sd_mpath_disks_lock); + +found: + sdkp->sd_mpath_disk = sd_mpath_disk; + sdkp->disk->flags |= GENHD_FL_HIDDEN; + snprintf(sdkp->disk->disk_name, DISK_NAME_LEN, "%s:%d", + sd_mpath_disk->mpath_disk->disk->disk_name, + scsi_mpath_dev->index); + + sdkp->index = -1; + return 0; + +out_free_index: + ida_free(&sd_index_ida, sd_mpath_disk->disk_index); +out_put_disk: + mpath_put_disk(sd_mpath_disk->mpath_disk); +out_free_disk: + kfree(sd_mpath_disk); +out_unlock: + mutex_unlock(&sd_mpath_disks_lock); + return error; +} + +static void sd_mpath_remove(struct scsi_disk *sdkp) +{ + struct sd_mpath_disk *sd_mpath_disk = sdkp->sd_mpath_disk; + struct scsi_device *sdp = sdkp->device; + struct scsi_mpath_device *scsi_mpath_dev = sdp->scsi_mpath_dev; + struct mpath_device *mpath_device = &scsi_mpath_dev->mpath_device; + struct mpath_disk *mpath_disk = sd_mpath_disk->mpath_disk; + struct mpath_head *mpath_head = mpath_disk->mpath_head; + bool remove = false; + + mpath_synchronize(mpath_head); + + if (mpath_clear_current_path(mpath_head, mpath_device)) + mpath_synchronize(mpath_head); + + mpath_delete_device(mpath_head, mpath_device); + + mutex_lock(&sd_mpath_disk->lock); + sd_mpath_disk->disk_count--; + /* delayed removal not yet supported */ + if (!sd_mpath_disk->disk_count) { + mutex_lock(&sd_mpath_disks_lock); + list_del_init(&sd_mpath_disk->entry); + mutex_unlock(&sd_mpath_disks_lock); + + remove = true; + } + mutex_unlock(&sd_mpath_disk->lock); + mpath_remove_sysfs_link(mpath_disk, mpath_device); + mpath_device->disk = NULL; + + if (remove) { + device_del(&sd_mpath_disk->dev); + mpath_remove_disk(mpath_disk); + } + sd_mpath_put_disk(sd_mpath_disk); +} + +/* + * Always calls for a failed probe, so we need to handle that some structures + * have not been setup. + */ +static void sd_mpath_fail_probe(struct scsi_disk *sdkp) +{ + struct sd_mpath_disk *sd_mpath_disk = sdkp->sd_mpath_disk; + struct scsi_mpath_device *scsi_mpath_dev; + struct mpath_device *mpath_device; + struct scsi_device *sdp = sdkp->device; + struct mpath_disk *mpath_disk; + bool remove = false; + + if (!sd_mpath_disk) + return; + + mpath_disk = sd_mpath_disk->mpath_disk; + scsi_mpath_dev = sdp->scsi_mpath_dev; + mpath_device = &scsi_mpath_dev->mpath_device; + + mutex_lock(&sd_mpath_disk->lock); + sd_mpath_disk->disk_count--; + if (!sd_mpath_disk->disk_count) { + mutex_lock(&sd_mpath_disks_lock); + list_del_init(&sd_mpath_disk->entry); + mutex_unlock(&sd_mpath_disks_lock); + + remove = true; + } + mutex_unlock(&sd_mpath_disk->lock); + mpath_device->disk = NULL; + + if (remove) { + device_del(&sd_mpath_disk->dev); + mpath_remove_disk(mpath_disk); + } + sd_mpath_put_disk(sd_mpath_disk); +} + #else /* CONFIG_SCSI_MULTIPATH */ +static int sd_mpath_probe(struct scsi_disk *sdkp) +{ + return 0; +} +static void sd_mpath_remove(struct scsi_disk *sdkp) +{ + return; +} +static void sd_mpath_fail_probe(struct scsi_disk *sdkp) +{ + +} +static int sd_mpath_revalidate_head(struct scsi_disk *sdkp) +{ + return 0; +} +static void sd_mpath_add_disk(struct scsi_disk *sdkp) +{ +} #endif /** * sd_probe - called during driver initialization and whenever a @@ -4198,22 +4514,33 @@ static int sd_probe(struct device *dev) &sd_bio_compl_lkclass); if (!gd) goto out_free; + sdkp->disk = gd; + sdkp->device = sdp; - index = ida_alloc(&sd_index_ida, GFP_KERNEL); - if (index < 0) { - sdev_printk(KERN_WARNING, sdp, "sd_probe: memory exhausted.\n"); - goto out_put; - } + if (sdp->scsi_mpath_dev) { + error = sd_mpath_probe(sdkp); + if (error) + goto out_put; + } else { + index = ida_alloc(&sd_index_ida, GFP_KERNEL); + if (index < 0) { + sdev_printk(KERN_WARNING, sdp, "sd_probe: memory exhausted.\n"); + goto out_put; + } - error = sd_format_disk_name("sd", index, gd->disk_name, DISK_NAME_LEN); - if (error) { - sdev_printk(KERN_WARNING, sdp, "SCSI disk (sd) name length exceeded.\n"); - goto out_free_index; + error = sd_format_disk_name("sd", index, gd->disk_name, + DISK_NAME_LEN); + if (error) { + sdev_printk(KERN_WARNING, sdp, "SCSI disk (sd) name length exceeded.\n"); + goto out_free_index; + } + sdkp->index = index; + + gd->major = sd_major((index & 0xf0) >> 4); + gd->first_minor = ((index & 0xf) << 4) | (index & 0xfff00); + gd->minors = SD_MINORS; } - sdkp->device = sdp; - sdkp->disk = gd; - sdkp->index = index; sdkp->max_retries = SD_MAX_RETRIES; atomic_set(&sdkp->openers, 0); atomic_set(&sdkp->device->ioerr_cnt, 0); @@ -4233,16 +4560,13 @@ static int sd_probe(struct device *dev) error = device_add(&sdkp->disk_dev); if (error) { + sd_mpath_fail_probe(sdkp); put_device(&sdkp->disk_dev); goto out; } dev_set_drvdata(dev, sdkp); - gd->major = sd_major((index & 0xf0) >> 4); - gd->first_minor = ((index & 0xf) << 4) | (index & 0xfff00); - gd->minors = SD_MINORS; - gd->fops = &sd_fops; gd->private_data = sdkp; @@ -4260,6 +4584,12 @@ static int sd_probe(struct device *dev) sd_revalidate_disk(gd); + if (sdp->scsi_mpath_dev) { + error = sd_mpath_revalidate_head(sdkp); + if (error) + sdev_printk(KERN_WARNING, sdp, "could not revalidate multipath limits\n"); + } + if (sdp->removable) { gd->flags |= GENHD_FL_REMOVABLE; gd->events |= DISK_EVENT_MEDIA_CHANGE; @@ -4274,11 +4604,15 @@ static int sd_probe(struct device *dev) error = device_add_disk(dev, gd, NULL); if (error) { + sd_mpath_fail_probe(sdkp); device_unregister(&sdkp->disk_dev); put_disk(gd); goto out; } + if (sdp->scsi_mpath_dev) + sd_mpath_add_disk(sdkp); + if (sdkp->security) { sdkp->opal_dev = init_opal_dev(sdkp, &sd_sec_submit); if (sdkp->opal_dev) @@ -4292,7 +4626,8 @@ static int sd_probe(struct device *dev) return 0; out_free_index: - ida_free(&sd_index_ida, index); + if (index >= 0) + ida_free(&sd_index_ida, index); out_put: put_disk(gd); out_free: @@ -4316,6 +4651,10 @@ static int sd_probe(struct device *dev) static int sd_remove(struct device *dev) { struct scsi_disk *sdkp = dev_get_drvdata(dev); + struct scsi_device *sdp = sdkp->device; // new code + + if (sdp->scsi_mpath_dev) + sd_mpath_remove(sdkp); scsi_autopm_get_device(sdkp->device); @@ -4332,7 +4671,8 @@ static void scsi_disk_release(struct device *dev) { struct scsi_disk *sdkp = to_scsi_disk(dev); - ida_free(&sd_index_ida, sdkp->index); + if (sdkp->index >= 0) + ida_free(&sd_index_ida, sdkp->index); put_device(&sdkp->device->sdev_gendev); free_opal_dev(sdkp->opal_dev); -- 2.43.5 Add a mpath_dev file so that the multipath disk can be looked up from per-path gendisk directory. The following is an example of this usage: $ ls -l /dev/sdc brw-rw---- 1 root disk 8, 32 Feb 24 16:08 /dev/sdc $ cat /sys/class/scsi_mpath_disk/0/sdc/multipath/sdc:0/mpath_dev 8:32 This can be used by a util like lsscsi, which would find that the gendisk for the per-path scsi_device is missing. Signed-off-by: John Garry --- drivers/scsi/sd.c | 49 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 409c0937764d9..f5922a9fe6c1b 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -4198,6 +4198,51 @@ static int sd_mpath_revalidate_head(struct scsi_disk *sdkp) return ret; } + +static ssize_t sd_mpath_dev_show(struct device *dev, + struct device_attribute *attr, char *page) +{ + struct gendisk *gd = dev_to_disk(dev); + struct scsi_disk *sdkp = gd->private_data; + struct sd_mpath_disk *sd_mpath_disk = sdkp->sd_mpath_disk; + struct mpath_disk *mpath_disk = sd_mpath_disk->mpath_disk; + struct gendisk *disk = mpath_disk->disk; + struct device *disk_dev = disk_to_dev(disk); + + return print_dev_t(page, disk_dev->devt); +} +static DEVICE_ATTR(mpath_dev, 0444, sd_mpath_dev_show, NULL); + +static struct attribute *sd_mpath_dev_attrs[] = { + &dev_attr_mpath_dev.attr, + NULL +}; + +static umode_t sd_mpath_dev_attr_is_visible(struct kobject *kobj, + struct attribute *attr, int i) +{ + struct device *dev = kobj_to_dev(kobj); + struct gendisk *gd = dev_to_disk(dev); + struct scsi_disk *sdkp = gd->private_data; + struct scsi_device *sdev = sdkp->device; + struct scsi_mpath_device *scsi_mpath_device = sdev->scsi_mpath_dev; + + if (!scsi_mpath_device) + return 0; + + return attr->mode; +} + +static const struct attribute_group sd_mpath_dev_attr_group = { + .is_visible = sd_mpath_dev_attr_is_visible, + .attrs = sd_mpath_dev_attrs, +}; + +static const struct attribute_group *sd_mpath_dev_groups[] = { + &sd_mpath_dev_attr_group, + NULL +}; + static int sd_mpath_get_disk(struct sd_mpath_disk *sd_mpath_disk) { if (!get_device(&sd_mpath_disk->dev)) @@ -4461,6 +4506,8 @@ static int sd_mpath_revalidate_head(struct scsi_disk *sdkp) static void sd_mpath_add_disk(struct scsi_disk *sdkp) { } + +#define sd_mpath_dev_groups NULL #endif /** * sd_probe - called during driver initialization and whenever a @@ -4602,7 +4649,7 @@ static int sd_probe(struct device *dev) sdp->host->rpm_autosuspend_delay); } - error = device_add_disk(dev, gd, NULL); + error = device_add_disk(dev, gd, sd_mpath_dev_groups); if (error) { sd_mpath_fail_probe(sdkp); device_unregister(&sdkp->disk_dev); -- 2.43.5 Add an attribute to show multipath NUMA node per-path (scsi_disk). The following is an example of reading the file: $ cat /sys/devices/platform/host8/session1/target8:0:0/8:0:0:0/block/sdc:0/numa_ mpath_numa_nodes 0-3 $ cat /sys/devices/platform/host9/session2/target9:0:0/9:0:0:0/block/sdc:1/numa_ mpath_numa_nodes $ Signed-off-by: John Garry --- drivers/scsi/sd.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index f5922a9fe6c1b..52d9bc34bd666 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -4213,8 +4213,28 @@ static ssize_t sd_mpath_dev_show(struct device *dev, } static DEVICE_ATTR(mpath_dev, 0444, sd_mpath_dev_show, NULL); +static ssize_t sd_mpath_numa_nodes_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct gendisk *gd = dev_to_disk(dev); + struct scsi_disk *sdkp = gd->private_data; + struct scsi_device *sdev = sdkp->device; + struct scsi_mpath_device *scsi_mpath_dev = sdev->scsi_mpath_dev; + struct mpath_device *mpath_device = &scsi_mpath_dev->mpath_device; + struct sd_mpath_disk *sd_mpath_disk = sdkp->sd_mpath_disk; + struct mpath_disk *mpath_disk = sd_mpath_disk->mpath_disk; + struct mpath_head *mpath_head = mpath_disk->mpath_head; + struct scsi_mpath_head *scsi_mpath_head = mpath_head->drvdata; + struct mpath_iopolicy *mpath_iopolicy = &scsi_mpath_head->iopolicy; + + return mpath_numa_nodes_show(mpath_head, mpath_device, + mpath_iopolicy, buf); +} +static DEVICE_ATTR(mpath_numa_nodes, 0444, sd_mpath_numa_nodes_show, NULL); + static struct attribute *sd_mpath_dev_attrs[] = { &dev_attr_mpath_dev.attr, + &dev_attr_mpath_numa_nodes.attr, NULL }; -- 2.43.5 Add a queue_depth file so that the multipath dynamic queue depth can be looked up from per-path gendisk (scsi_disk) directory. Signed-off-by: John Garry --- drivers/scsi/sd.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 52d9bc34bd666..27f64560335a4 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -4232,9 +4232,30 @@ static ssize_t sd_mpath_numa_nodes_show(struct device *dev, } static DEVICE_ATTR(mpath_numa_nodes, 0444, sd_mpath_numa_nodes_show, NULL); +static ssize_t sd_mpath_queue_depth_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct gendisk *gd = dev_to_disk(dev); + struct scsi_disk *sdkp = gd->private_data; + struct scsi_device *sdev = sdkp->device; + struct scsi_mpath_device *scsi_mpath_device = sdev->scsi_mpath_dev; + struct mpath_device *mpath_device = &scsi_mpath_device->mpath_device; + struct sd_mpath_disk *sd_mpath_disk = sdkp->sd_mpath_disk; + struct mpath_disk *mpath_disk = sd_mpath_disk->mpath_disk; + struct mpath_head *mpath_head = mpath_disk->mpath_head; + struct scsi_mpath_head *scsi_mpath_head = mpath_head->drvdata; + + if (!mpath_qd_iopolicy(&scsi_mpath_head->iopolicy)) + return 0; + + return sysfs_emit(buf, "%d\n", atomic_read(&mpath_device->nr_active)); +} +static DEVICE_ATTR(mpath_queue_depth, 0444, sd_mpath_queue_depth_show, NULL); + static struct attribute *sd_mpath_dev_attrs[] = { &dev_attr_mpath_dev.attr, &dev_attr_mpath_numa_nodes.attr, + &dev_attr_mpath_queue_depth.attr, NULL }; -- 2.43.5