Ensure that fmb_lock is held by pcibios_enable_device() and pcibios_disable_device() when calling zpci_fmb_enable_device() or zpci_fmb_disable_device(), respectively. Additionally, assert that the fmb_lock is held within the latter two functions to prevent future race conditions regarding new callers. Fixes: af0a8a8453f7 ("s390/pci: implement pcibios_add_device") Fixes: 944239c59e93 ("s390/pci: implement pcibios_release_device") Cc: stable@vger.kernel.org Signed-off-by: Omar Elghoul --- arch/s390/pci/pci.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 39bd2adfc240..2910d4038d39 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -173,6 +173,8 @@ int zpci_fmb_enable_device(struct zpci_dev *zdev) unsigned long flags; u8 cc, status; + lockdep_assert_held(&zdev->fmb_lock); + if (zdev->fmb || sizeof(*zdev->fmb) < zdev->fmb_length) return -EINVAL; @@ -211,6 +213,8 @@ int zpci_fmb_disable_device(struct zpci_dev *zdev) struct zpci_fib fib = {0}; u8 cc, status; + lockdep_assert_held(&zdev->fmb_lock); + if (!zdev->fmb) return -EINVAL; @@ -639,7 +643,9 @@ int pcibios_enable_device(struct pci_dev *pdev, int mask) struct zpci_dev *zdev = to_zpci(pdev); zpci_debug_init_device(zdev, dev_name(&pdev->dev)); + mutex_lock(&zdev->fmb_lock); zpci_fmb_enable_device(zdev); + mutex_unlock(&zdev->fmb_lock); return pci_enable_resources(pdev, mask); } @@ -648,7 +654,9 @@ void pcibios_disable_device(struct pci_dev *pdev) { struct zpci_dev *zdev = to_zpci(pdev); + mutex_lock(&zdev->fmb_lock); zpci_fmb_disable_device(zdev); + mutex_unlock(&zdev->fmb_lock); zpci_debug_exit_device(zdev); } -- 2.54.0 Introduce a function zpci_fmb_reenable_device() that checks for the state of the FMB and reuses the same buffer where appropriate. If the FMB was not previously enabled, enable it for the device. Call this function during a zPCI device re-enablement, which in turn implicitly ensures that the FMB is is enabled for host devices during their KVM registration. Besides re-enabling the FMB itself in zpci_fmb_reenable_device() also clear out the software counters, such that a program resetting an FMB sees all counters start from zero as expected. Separate this clearing of software counters out into zpci_fmb_clear_iommu_ctrs() and reuse it in zpci_fmb_enable_device() and zpci_fmb_reenable_device(). Likewise separate the FMB enable logic into zpci_fmb_do_enable() to be reused in the same two functions. Signed-off-by: Omar Elghoul --- arch/s390/include/asm/pci.h | 1 + arch/s390/pci/pci.c | 96 +++++++++++++++++++++++++++++-------- 2 files changed, 78 insertions(+), 19 deletions(-) diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 5dcf35f0f325..65014e52d559 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -323,6 +323,7 @@ void zpci_remove_parent_msi_domain(struct zpci_bus *zbus); /* FMB */ int zpci_fmb_enable_device(struct zpci_dev *); int zpci_fmb_disable_device(struct zpci_dev *); +int zpci_fmb_reenable_device(struct zpci_dev *zdev); /* Debug */ int zpci_debug_init(void); diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 2910d4038d39..652f0b7e8893 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -164,24 +164,10 @@ int zpci_unregister_ioat(struct zpci_dev *zdev, u8 dmaas) return cc; } -/* Modify PCI: Set PCI function measurement parameters */ -int zpci_fmb_enable_device(struct zpci_dev *zdev) +static void zpci_fmb_clear_iommu_ctrs(struct zpci_dev *zdev) { - u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_SET_MEASURE); struct zpci_iommu_ctrs *ctrs; - struct zpci_fib fib = {0}; - unsigned long flags; - u8 cc, status; - - lockdep_assert_held(&zdev->fmb_lock); - - if (zdev->fmb || sizeof(*zdev->fmb) < zdev->fmb_length) - return -EINVAL; - - zdev->fmb = kmem_cache_zalloc(zdev_fmb_cache, GFP_KERNEL); - if (!zdev->fmb) - return -ENOMEM; - WARN_ON((u64) zdev->fmb & 0xf); + unsigned long flags = 0; /* reset software counters */ spin_lock_irqsave(&zdev->dom_lock, flags); @@ -194,17 +180,49 @@ int zpci_fmb_enable_device(struct zpci_dev *zdev) atomic64_set(&ctrs->sync_rpcits, 0); } spin_unlock_irqrestore(&zdev->dom_lock, flags); +} +static int zpci_fmb_do_enable(struct zpci_dev *zdev) +{ + /* This helper assumes that zdev->fmb is already allocated and thus only + * takes care of the actual enablement. + */ + u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_SET_MEASURE); + struct zpci_fib fib = {0}; + u8 cc, status; fib.fmb_addr = virt_to_phys(zdev->fmb); fib.gd = zdev->gisa; cc = zpci_mod_fc(req, &fib, &status); - if (cc) { + + return cc ? -EIO : 0; +} + +/* Modify PCI: Set PCI function measurement parameters */ +int zpci_fmb_enable_device(struct zpci_dev *zdev) +{ + int rc; + + lockdep_assert_held(&zdev->fmb_lock); + + if (zdev->fmb || sizeof(*zdev->fmb) < zdev->fmb_length) + return -EINVAL; + + zdev->fmb = kmem_cache_zalloc(zdev_fmb_cache, GFP_KERNEL); + if (!zdev->fmb) + return -ENOMEM; + WARN_ON((u64) zdev->fmb & 0xf); + + zpci_fmb_clear_iommu_ctrs(zdev); + + rc = zpci_fmb_do_enable(zdev); + if (rc) { kmem_cache_free(zdev_fmb_cache, zdev->fmb); zdev->fmb = NULL; } - return cc ? -EIO : 0; + return rc; } +EXPORT_SYMBOL_GPL(zpci_fmb_enable_device); /* Modify PCI: Disable PCI function measurement */ int zpci_fmb_disable_device(struct zpci_dev *zdev) @@ -231,6 +249,41 @@ int zpci_fmb_disable_device(struct zpci_dev *zdev) } return cc ? -EIO : 0; } +EXPORT_SYMBOL_GPL(zpci_fmb_disable_device); + +int zpci_fmb_reenable_device(struct zpci_dev *zdev) +{ + u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_SET_MEASURE); + struct zpci_fib fib = {0}; + u8 cc, status; + int rc; + + lockdep_assert_held(&zdev->fmb_lock); + + if (!zdev->fmb) + return zpci_fmb_enable_device(zdev); + + fib.gd = zdev->gisa; + cc = zpci_mod_fc(req, &fib, &status); /* Disable function measurement */ + + /* Unlike in zpci_fmb_disable_device(), cc == 3 is not a valid state here + * because we are re-enabling function measurement for the same function + * handle. + */ + if (cc) + return -EIO; + + zpci_fmb_clear_iommu_ctrs(zdev); + + rc = zpci_fmb_do_enable(zdev); + if (rc) { + kmem_cache_free(zdev_fmb_cache, zdev->fmb); + zdev->fmb = NULL; + } + + return rc; +} +EXPORT_SYMBOL_GPL(zpci_fmb_reenable_device); static int zpci_cfg_load(struct zpci_dev *zdev, int offset, u32 *val, u8 len) { @@ -737,9 +790,14 @@ int zpci_reenable_device(struct zpci_dev *zdev) } rc = zpci_iommu_register_ioat(zdev, &status); - if (rc) + if (rc) { zpci_disable_device(zdev); + return rc; + } + mutex_lock(&zdev->fmb_lock); + zpci_fmb_reenable_device(zdev); + mutex_unlock(&zdev->fmb_lock); return rc; } EXPORT_SYMBOL_GPL(zpci_reenable_device); -- 2.54.0 Introduce new VFIO features for zPCI devices to provide FMB passthrough to userspace. Allow the user to enable or disable the FMB using the SET-only feature VFIO_DEVICE_FEATURE_ZPCI_FMB_ENABLE. Likewise allow the user to read the latest FMB using the GET-only feature VFIO_DEVICE_FEATURE_ZPCI_FMB_READ in the case where the FMB is enabled. Signed-off-by: Omar Elghoul --- drivers/vfio/pci/vfio_pci_core.c | 4 +++ drivers/vfio/pci/vfio_pci_priv.h | 18 ++++++++++ drivers/vfio/pci/vfio_pci_zdev.c | 57 ++++++++++++++++++++++++++++++++ include/uapi/linux/vfio.h | 29 ++++++++++++++++ 4 files changed, 108 insertions(+) diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c index 050e7542952e..44e8e5526eae 100644 --- a/drivers/vfio/pci/vfio_pci_core.c +++ b/drivers/vfio/pci/vfio_pci_core.c @@ -1569,6 +1569,10 @@ int vfio_pci_core_ioctl_feature(struct vfio_device *device, u32 flags, return vfio_pci_core_feature_token(vdev, flags, arg, argsz); case VFIO_DEVICE_FEATURE_DMA_BUF: return vfio_pci_core_feature_dma_buf(vdev, flags, arg, argsz); + case VFIO_DEVICE_FEATURE_ZPCI_FMB_ENABLE: + return vfio_pci_zdev_feature_fmb_enable(vdev, flags, arg, argsz); + case VFIO_DEVICE_FEATURE_ZPCI_FMB_READ: + return vfio_pci_zdev_feature_fmb_read(vdev, flags, arg, argsz); default: return -ENOTTY; } diff --git a/drivers/vfio/pci/vfio_pci_priv.h b/drivers/vfio/pci/vfio_pci_priv.h index fca9d0dfac90..b7db064a6a95 100644 --- a/drivers/vfio/pci/vfio_pci_priv.h +++ b/drivers/vfio/pci/vfio_pci_priv.h @@ -93,6 +93,10 @@ int vfio_pci_info_zdev_add_caps(struct vfio_pci_core_device *vdev, struct vfio_info_cap *caps); int vfio_pci_zdev_open_device(struct vfio_pci_core_device *vdev); void vfio_pci_zdev_close_device(struct vfio_pci_core_device *vdev); +int vfio_pci_zdev_feature_fmb_enable(struct vfio_pci_core_device *vdev, u32 flags, + void __user *arg, size_t argsz); +int vfio_pci_zdev_feature_fmb_read(struct vfio_pci_core_device *vdev, u32 flags, + void __user *arg, size_t argsz); #else static inline int vfio_pci_info_zdev_add_caps(struct vfio_pci_core_device *vdev, struct vfio_info_cap *caps) @@ -107,6 +111,20 @@ static inline int vfio_pci_zdev_open_device(struct vfio_pci_core_device *vdev) static inline void vfio_pci_zdev_close_device(struct vfio_pci_core_device *vdev) {} + +static inline int vfio_pci_zdev_feature_fmb_enable(struct vfio_pci_core_device *vdev, + u32 flags, void __user *arg, + size_t argsz) +{ + return -ENOTTY; +} + +static inline int vfio_pci_zdev_feature_fmb_read(struct vfio_pci_core_device *vdev, + u32 flags, void __user *arg, + size_t argsz) +{ + return -ENOTTY; +} #endif static inline bool vfio_pci_is_vga(struct pci_dev *pdev) diff --git a/drivers/vfio/pci/vfio_pci_zdev.c b/drivers/vfio/pci/vfio_pci_zdev.c index 0990fdb146b7..09454495ee23 100644 --- a/drivers/vfio/pci/vfio_pci_zdev.c +++ b/drivers/vfio/pci/vfio_pci_zdev.c @@ -167,3 +167,60 @@ void vfio_pci_zdev_close_device(struct vfio_pci_core_device *vdev) if (zpci_kvm_hook.kvm_unregister) zpci_kvm_hook.kvm_unregister(zdev); } + +int vfio_pci_zdev_feature_fmb_enable(struct vfio_pci_core_device *vdev, u32 flags, + void __user *arg, size_t argsz) +{ + struct zpci_dev *zdev; + struct vfio_device_feature_zpci_fmb_enable fmb_enable; + int ret; + + ret = vfio_check_feature(flags, argsz, VFIO_DEVICE_FEATURE_SET, sizeof(fmb_enable)); + if (ret != 1) + return ret; + + zdev = to_zpci(vdev->pdev); + if (!zdev) + return -ENODEV; + + guard(mutex)(&zdev->fmb_lock); + + if (copy_from_user(&fmb_enable, arg, sizeof(fmb_enable))) + return -EFAULT; + + if (fmb_enable.enabled) + return zpci_fmb_reenable_device(zdev); + return zpci_fmb_disable_device(zdev); +} + +int vfio_pci_zdev_feature_fmb_read(struct vfio_pci_core_device *vdev, u32 flags, + void __user *arg, size_t argsz) +{ + struct zpci_dev *zdev; + struct vfio_device_feature_zpci_fmb_read fmb_read; + struct zpci_fmb fmb_temp = {0}; + int ret; + + ret = vfio_check_feature(flags, argsz, VFIO_DEVICE_FEATURE_GET, sizeof(fmb_read)); + if (ret != 1) + return ret; + + zdev = to_zpci(vdev->pdev); + if (!zdev) + return -ENODEV; + + guard(mutex)(&zdev->fmb_lock); + + if (!zdev->fmb) + return -ENOMSG; + if (copy_from_user(&fmb_read, arg, sizeof(fmb_read))) + return -EFAULT; + if (!fmb_read.data) + return -EINVAL; + + memcpy(&fmb_temp, zdev->fmb, zdev->fmb_length); + if (copy_to_user(fmb_read.data, &fmb_temp, zdev->fmb_length)) + return -EFAULT; + + return 0; +} diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 5de618a3a5ee..3988e8690e0b 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -1534,6 +1534,35 @@ struct vfio_device_feature_dma_buf { */ #define VFIO_DEVICE_FEATURE_MIG_PRECOPY_INFOv2 12 +/** + * Upon VFIO_DEVICE_FEATURE_SET, enable or disable FMB for the VFIO zPCI device. + * + * enabled is treated as a bool, so any non-zero value evaluates to true. This + * feature fails on attempt to double enable/disable. + * + * Returns: 0 on success, -1 and errno set appropriately on error. + */ +#define VFIO_DEVICE_FEATURE_ZPCI_FMB_ENABLE 13 + +struct vfio_device_feature_zpci_fmb_enable { + __u8 enabled; +}; + +/** + * Upon VFIO_DEVICE_FEATURE_GET, provide FMB passthrough for VFIO zPCI devices. + * + * The user-provided buffer must be at least fmb_length large, where fmb_length + * is reported in VFIO_DEVICE_INFO_CAP_ZPCI_BASE. + * + * Returns: 0 on success, -1 and errno set appropriately on error. errno==ENOMSG + * when the FMB is not enabled. + */ +#define VFIO_DEVICE_FEATURE_ZPCI_FMB_READ 14 + +struct vfio_device_feature_zpci_fmb_read { + void __user *data; +}; + /* -------- API for Type1 VFIO IOMMU -------- */ /** -- 2.54.0 Introduce a fence over enabling or disabling FMB via sysfs when the zPCI device is associated with a KVM. This will allow a KVM guest to use FMB passthrough and avoid the edge-case where the host disables FMB while the guest is still using it, which may cause partial counter resets and inconsistent reads which have no parallel in the architecture. With this patch, the userspace driver, likely QEMU, is still able to enable or disable the FMB using the VFIO device feature introduced in the previous patch, effectively securing what is associated with the VM state and isolating it from other processes on the host. For VFIO devices that are not associated with a KVM (i.e., for userspace drivers other than QEMU), this fence does not take effect. Signed-off-by: Omar Elghoul Reviewed-by: Niklas Schnelle --- arch/s390/pci/pci_debug.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/s390/pci/pci_debug.c b/arch/s390/pci/pci_debug.c index c7ed7bf254b5..a2dc79418c21 100644 --- a/arch/s390/pci/pci_debug.c +++ b/arch/s390/pci/pci_debug.c @@ -149,9 +149,15 @@ static ssize_t pci_perf_seq_write(struct file *file, const char __user *ubuf, if (!zdev) return 0; + mutex_lock(&zdev->kzdev_lock); + if (zdev->kzdev) { + rc = -EPERM; + goto out_unlock_kzdev; + } + rc = kstrtoul_from_user(ubuf, count, 10, &val); if (rc) - return rc; + goto out_unlock_kzdev; mutex_lock(&zdev->fmb_lock); switch (val) { @@ -163,6 +169,9 @@ static ssize_t pci_perf_seq_write(struct file *file, const char __user *ubuf, break; } mutex_unlock(&zdev->fmb_lock); + +out_unlock_kzdev: + mutex_unlock(&zdev->kzdev_lock); return rc ? rc : count; } -- 2.54.0