Add foundational infrastructure for vfio-nvme, enabling support for live migration of NVMe devices via the VFIO framework. The following components are included: - Core driver skeleton for vfio-nvme support under drivers/vfio/pci/nvme/ - Definitions of basic data structures used in live migration (e.g., nvmevf_pci_core_device and nvmevf_migration_file) - Implementation of helper routines for managing migration file state - Integration of PCI driver callbacks and error handling logic - Registration with vfio-pci-core through nvmevf_pci_ops - Initial support for VFIO migration states and device open/close flows Subsequent patches will build upon this base to implement actual live migration commands and complete the vfio device state handling logic. Signed-off-by: Lei Rao Signed-off-by: Max Gurtovoy Signed-off-by: Chaitanya Kulkarni --- drivers/vfio/pci/Kconfig | 2 + drivers/vfio/pci/Makefile | 2 + drivers/vfio/pci/nvme/Kconfig | 10 ++ drivers/vfio/pci/nvme/Makefile | 3 + drivers/vfio/pci/nvme/nvme.c | 196 +++++++++++++++++++++++++++++++++ drivers/vfio/pci/nvme/nvme.h | 36 ++++++ 6 files changed, 249 insertions(+) create mode 100644 drivers/vfio/pci/nvme/Kconfig create mode 100644 drivers/vfio/pci/nvme/Makefile create mode 100644 drivers/vfio/pci/nvme/nvme.c create mode 100644 drivers/vfio/pci/nvme/nvme.h diff --git a/drivers/vfio/pci/Kconfig b/drivers/vfio/pci/Kconfig index 2b0172f54665..8f94429e7adc 100644 --- a/drivers/vfio/pci/Kconfig +++ b/drivers/vfio/pci/Kconfig @@ -67,4 +67,6 @@ source "drivers/vfio/pci/nvgrace-gpu/Kconfig" source "drivers/vfio/pci/qat/Kconfig" +source "drivers/vfio/pci/nvme/Kconfig" + endmenu diff --git a/drivers/vfio/pci/Makefile b/drivers/vfio/pci/Makefile index cf00c0a7e55c..be8c4b5ee0ba 100644 --- a/drivers/vfio/pci/Makefile +++ b/drivers/vfio/pci/Makefile @@ -10,6 +10,8 @@ obj-$(CONFIG_VFIO_PCI) += vfio-pci.o obj-$(CONFIG_MLX5_VFIO_PCI) += mlx5/ +obj-$(CONFIG_NVME_VFIO_PCI) += nvme/ + obj-$(CONFIG_HISI_ACC_VFIO_PCI) += hisilicon/ obj-$(CONFIG_PDS_VFIO_PCI) += pds/ diff --git a/drivers/vfio/pci/nvme/Kconfig b/drivers/vfio/pci/nvme/Kconfig new file mode 100644 index 000000000000..12e0eaba0de1 --- /dev/null +++ b/drivers/vfio/pci/nvme/Kconfig @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: GPL-2.0-only +config NVME_VFIO_PCI + tristate "VFIO support for NVMe PCI devices" + depends on NVME_CORE + depends on VFIO_PCI_CORE + help + This provides migration support for NVMe devices using the + VFIO framework. + + If you don't know what to do here, say N. diff --git a/drivers/vfio/pci/nvme/Makefile b/drivers/vfio/pci/nvme/Makefile new file mode 100644 index 000000000000..2f4a0ad3d9cf --- /dev/null +++ b/drivers/vfio/pci/nvme/Makefile @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only +obj-$(CONFIG_NVME_VFIO_PCI) += nvme-vfio-pci.o +nvme-vfio-pci-y := nvme.o diff --git a/drivers/vfio/pci/nvme/nvme.c b/drivers/vfio/pci/nvme/nvme.c new file mode 100644 index 000000000000..08bee3274207 --- /dev/null +++ b/drivers/vfio/pci/nvme/nvme.c @@ -0,0 +1,196 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2022, INTEL CORPORATION. All rights reserved + * Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "nvme.h" + +static void nvmevf_disable_fd(struct nvmevf_migration_file *migf) +{ + mutex_lock(&migf->lock); + + /* release the device states buffer */ + kvfree(migf->vf_data); + migf->vf_data = NULL; + migf->disabled = true; + migf->total_length = 0; + migf->filp->f_pos = 0; + mutex_unlock(&migf->lock); +} + +static void nvmevf_disable_fds(struct nvmevf_pci_core_device *nvmevf_dev) +{ + if (nvmevf_dev->resuming_migf) { + nvmevf_disable_fd(nvmevf_dev->resuming_migf); + fput(nvmevf_dev->resuming_migf->filp); + nvmevf_dev->resuming_migf = NULL; + } + + if (nvmevf_dev->saving_migf) { + nvmevf_disable_fd(nvmevf_dev->saving_migf); + fput(nvmevf_dev->saving_migf->filp); + nvmevf_dev->saving_migf = NULL; + } +} + +static void nvmevf_state_mutex_unlock(struct nvmevf_pci_core_device *nvmevf_dev) +{ + lockdep_assert_held(&nvmevf_dev->state_mutex); +again: + spin_lock(&nvmevf_dev->reset_lock); + if (nvmevf_dev->deferred_reset) { + nvmevf_dev->deferred_reset = false; + spin_unlock(&nvmevf_dev->reset_lock); + nvmevf_dev->mig_state = VFIO_DEVICE_STATE_RUNNING; + nvmevf_disable_fds(nvmevf_dev); + goto again; + } + mutex_unlock(&nvmevf_dev->state_mutex); + spin_unlock(&nvmevf_dev->reset_lock); +} + +static struct nvmevf_pci_core_device *nvmevf_drvdata(struct pci_dev *pdev) +{ + struct vfio_pci_core_device *core_device = dev_get_drvdata(&pdev->dev); + + return container_of(core_device, struct nvmevf_pci_core_device, + core_device); +} + +static int nvmevf_pci_open_device(struct vfio_device *core_vdev) +{ + struct nvmevf_pci_core_device *nvmevf_dev; + struct vfio_pci_core_device *vdev; + int ret; + + nvmevf_dev = container_of(core_vdev, struct nvmevf_pci_core_device, + core_device.vdev); + vdev = &nvmevf_dev->core_device; + + ret = vfio_pci_core_enable(vdev); + if (ret) + return ret; + + if (nvmevf_dev->migrate_cap) + nvmevf_dev->mig_state = VFIO_DEVICE_STATE_RUNNING; + vfio_pci_core_finish_enable(vdev); + return 0; +} + +static void nvmevf_pci_close_device(struct vfio_device *core_vdev) +{ + struct nvmevf_pci_core_device *nvmevf_dev; + + nvmevf_dev = container_of(core_vdev, struct nvmevf_pci_core_device, + core_device.vdev); + + if (nvmevf_dev->migrate_cap) { + mutex_lock(&nvmevf_dev->state_mutex); + nvmevf_disable_fds(nvmevf_dev); + nvmevf_state_mutex_unlock(nvmevf_dev); + } + + vfio_pci_core_close_device(core_vdev); +} + +static const struct vfio_device_ops nvmevf_pci_ops = { + .name = "nvme-vfio-pci", + .release = vfio_pci_core_release_dev, + .open_device = nvmevf_pci_open_device, + .close_device = nvmevf_pci_close_device, + .ioctl = vfio_pci_core_ioctl, + .device_feature = vfio_pci_core_ioctl_feature, + .read = vfio_pci_core_read, + .write = vfio_pci_core_write, + .mmap = vfio_pci_core_mmap, + .request = vfio_pci_core_request, + .match = vfio_pci_core_match, +}; + +static int nvmevf_pci_probe(struct pci_dev *pdev, + const struct pci_device_id *id) +{ + struct nvmevf_pci_core_device *nvmevf_dev; + int ret; + + nvmevf_dev = vfio_alloc_device(nvmevf_pci_core_device, core_device.vdev, + &pdev->dev, &nvmevf_pci_ops); + if (IS_ERR(nvmevf_dev)) + return PTR_ERR(nvmevf_dev); + + dev_set_drvdata(&pdev->dev, &nvmevf_dev->core_device); + ret = vfio_pci_core_register_device(&nvmevf_dev->core_device); + if (ret) + goto out_put_dev; + + return 0; + +out_put_dev: + vfio_put_device(&nvmevf_dev->core_device.vdev); + return ret; +} + +static void nvmevf_pci_remove(struct pci_dev *pdev) +{ + struct nvmevf_pci_core_device *nvmevf_dev = nvmevf_drvdata(pdev); + + vfio_pci_core_unregister_device(&nvmevf_dev->core_device); + vfio_put_device(&nvmevf_dev->core_device.vdev); +} + +static void nvmevf_pci_aer_reset_done(struct pci_dev *pdev) +{ + struct nvmevf_pci_core_device *nvmevf_dev = nvmevf_drvdata(pdev); + + if (!nvmevf_dev->migrate_cap) + return; + + /* + * As the higher VFIO layers are holding locks across reset and using + * those same locks with the mm_lock we need to prevent ABBA deadlock + * with the state_mutex and mm_lock. + * In case the state_mutex was taken already we defer the cleanup work + * to the unlock flow of the other running context. + */ + spin_lock(&nvmevf_dev->reset_lock); + nvmevf_dev->deferred_reset = true; + if (!mutex_trylock(&nvmevf_dev->state_mutex)) { + spin_unlock(&nvmevf_dev->reset_lock); + return; + } + spin_unlock(&nvmevf_dev->reset_lock); + nvmevf_state_mutex_unlock(nvmevf_dev); +} + +static const struct pci_error_handlers nvmevf_err_handlers = { + .reset_done = nvmevf_pci_aer_reset_done, + .error_detected = vfio_pci_core_aer_err_detected, +}; + +static struct pci_driver nvmevf_pci_driver = { + .name = KBUILD_MODNAME, + .probe = nvmevf_pci_probe, + .remove = nvmevf_pci_remove, + .err_handler = &nvmevf_err_handlers, + .driver_managed_dma = true, +}; + +module_pci_driver(nvmevf_pci_driver); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Chaitanya Kulkarni "); +MODULE_DESCRIPTION("NVMe VFIO PCI - VFIO PCI driver with live migration support for NVMe"); diff --git a/drivers/vfio/pci/nvme/nvme.h b/drivers/vfio/pci/nvme/nvme.h new file mode 100644 index 000000000000..ee602254679e --- /dev/null +++ b/drivers/vfio/pci/nvme/nvme.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2022, INTEL CORPORATION. All rights reserved + * Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved + */ + +#ifndef NVME_VFIO_PCI_H +#define NVME_VFIO_PCI_H + +#include +#include +#include + +struct nvmevf_migration_file { + struct file *filp; + struct mutex lock; + bool disabled; + u8 *vf_data; + size_t total_length; +}; + +struct nvmevf_pci_core_device { + struct vfio_pci_core_device core_device; + int vf_id; + u8 migrate_cap:1; + u8 deferred_reset:1; + /* protect migration state */ + struct mutex state_mutex; + enum vfio_device_mig_state mig_state; + /* protect the reset_done flow */ + spinlock_t reset_lock; + struct nvmevf_migration_file *resuming_migf; + struct nvmevf_migration_file *saving_migf; +}; + +#endif /* NVME_VFIO_PCI_H */ -- 2.40.0