Intel IOMMU driver already supports replacing IOMMU domains attachments with PASIDs. Add support for replacing a domain attached with no_pasid. This includes replacing domains in legacy mode. Signed-off-by: Samiullah Khawaja --- drivers/iommu/intel/iommu.c | 107 ++++++++++++++++++++++++++---------- 1 file changed, 77 insertions(+), 30 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 134302fbcd92..c0e359fd3ee1 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -1140,6 +1140,7 @@ static void context_present_cache_flush(struct intel_iommu *iommu, u16 did, } static int domain_context_mapping_one(struct dmar_domain *domain, + struct dmar_domain *old_domain, struct intel_iommu *iommu, u8 bus, u8 devfn) { @@ -1148,7 +1149,8 @@ static int domain_context_mapping_one(struct dmar_domain *domain, u16 did = domain_id_iommu(domain, iommu); int translation = CONTEXT_TT_MULTI_LEVEL; struct pt_iommu_vtdss_hw_info pt_info; - struct context_entry *context; + struct context_entry *context, new_context; + u16 did_old; int ret; if (WARN_ON(!intel_domain_is_ss_paging(domain))) @@ -1166,26 +1168,44 @@ static int domain_context_mapping_one(struct dmar_domain *domain, goto out_unlock; ret = 0; - if (context_present(context) && !context_copied(iommu, bus, devfn)) + if (!old_domain && (context_present(context) && !context_copied(iommu, bus, devfn))) goto out_unlock; + if (old_domain) { + did_old = context_domain_id(context); + WARN_ON(did_old != domain_id_iommu(old_domain, iommu)); + } + copied_context_tear_down(iommu, context, bus, devfn); - context_clear_entry(context); - context_set_domain_id(context, did); + context_set_domain_id(&new_context, did); if (info && info->ats_supported) translation = CONTEXT_TT_DEV_IOTLB; else translation = CONTEXT_TT_MULTI_LEVEL; - context_set_address_root(context, pt_info.ssptptr); - context_set_address_width(context, pt_info.aw); - context_set_translation_type(context, translation); - context_set_fault_enable(context); - context_set_present(context); + context_set_address_root(&new_context, pt_info.ssptptr); + context_set_address_width(&new_context, pt_info.aw); + context_set_translation_type(&new_context, translation); + context_set_fault_enable(&new_context); + context_set_present(&new_context); + + *context = new_context; if (!ecap_coherent(iommu->ecap)) clflush_cache_range(context, sizeof(*context)); - context_present_cache_flush(iommu, did, bus, devfn); + + /* + * Spec 6.5.3.3, changing a present context entry requires, + * - IOTLB invalidation for each effected Domain. + * - Issue Device IOTLB invalidation for function. + */ + if (old_domain) { + intel_context_flush_no_pasid(info, context, did); + intel_context_flush_no_pasid(info, context, did_old); + } else { + context_present_cache_flush(iommu, did, bus, devfn); + } + ret = 0; out_unlock: @@ -1194,30 +1214,39 @@ static int domain_context_mapping_one(struct dmar_domain *domain, return ret; } +struct domain_context_mapping_data { + struct dmar_domain *domain; + struct dmar_domain *old_domain; +}; + static int domain_context_mapping_cb(struct pci_dev *pdev, u16 alias, void *opaque) { struct device_domain_info *info = dev_iommu_priv_get(&pdev->dev); struct intel_iommu *iommu = info->iommu; - struct dmar_domain *domain = opaque; + struct domain_context_mapping_data *data = opaque; - return domain_context_mapping_one(domain, iommu, + return domain_context_mapping_one(data->domain, data->old_domain, iommu, PCI_BUS_NUM(alias), alias & 0xff); } static int -domain_context_mapping(struct dmar_domain *domain, struct device *dev) +domain_context_mapping(struct dmar_domain *domain, + struct dmar_domain *old_domain, struct device *dev) { struct device_domain_info *info = dev_iommu_priv_get(dev); struct intel_iommu *iommu = info->iommu; u8 bus = info->bus, devfn = info->devfn; + struct domain_context_mapping_data data; int ret; if (!dev_is_pci(dev)) - return domain_context_mapping_one(domain, iommu, bus, devfn); + return domain_context_mapping_one(domain, old_domain, iommu, bus, devfn); + data.domain = domain; + data.old_domain = old_domain; ret = pci_for_each_dma_alias(to_pci_dev(dev), - domain_context_mapping_cb, domain); + domain_context_mapping_cb, &data); if (ret) return ret; @@ -1309,18 +1338,28 @@ static int domain_setup_first_level(struct intel_iommu *iommu, pt_info.gcr3_pt, flags, old); } -static int dmar_domain_attach_device(struct dmar_domain *domain, - struct device *dev) +static int device_replace_dmar_domain(struct dmar_domain *domain, + struct dmar_domain *old_domain, + struct device *dev) { struct device_domain_info *info = dev_iommu_priv_get(dev); struct intel_iommu *iommu = info->iommu; unsigned long flags; int ret; + if (old_domain && dev_is_real_dma_subdevice(dev)) + return -EOPNOTSUPP; + ret = domain_attach_iommu(domain, iommu); if (ret) return ret; + if (old_domain) { + spin_lock_irqsave(&info->domain->lock, flags); + list_del(&info->link); + spin_unlock_irqrestore(&info->domain->lock, flags); + } + info->domain = domain; info->domain_attached = true; spin_lock_irqsave(&domain->lock, flags); @@ -1331,27 +1370,27 @@ static int dmar_domain_attach_device(struct dmar_domain *domain, return 0; if (!sm_supported(iommu)) - ret = domain_context_mapping(domain, dev); + ret = domain_context_mapping(domain, old_domain, dev); else if (intel_domain_is_fs_paging(domain)) ret = domain_setup_first_level(iommu, domain, dev, - IOMMU_NO_PASID, NULL); + IOMMU_NO_PASID, &old_domain->domain); else if (intel_domain_is_ss_paging(domain)) ret = domain_setup_second_level(iommu, domain, dev, - IOMMU_NO_PASID, NULL); + IOMMU_NO_PASID, &old_domain->domain); else if (WARN_ON(true)) ret = -EINVAL; - if (ret) - goto out_block_translation; + if (!ret) + ret = cache_tag_assign_domain(domain, dev, IOMMU_NO_PASID); - ret = cache_tag_assign_domain(domain, dev, IOMMU_NO_PASID); if (ret) - goto out_block_translation; + device_block_translation(dev); - return 0; + if (old_domain) { + cache_tag_unassign_domain(old_domain, dev, IOMMU_NO_PASID); + domain_detach_iommu(old_domain, iommu); + } -out_block_translation: - device_block_translation(dev); return ret; } @@ -3127,19 +3166,27 @@ static int intel_iommu_attach_device(struct iommu_domain *domain, struct device *dev, struct iommu_domain *old) { + struct device_domain_info *info = dev_iommu_priv_get(dev); int ret; - device_block_translation(dev); + if (dev_is_real_dma_subdevice(dev) || + domain->type != __IOMMU_DOMAIN_PAGING || + !info->domain || &info->domain->domain != old) + old = NULL; + + if (!old) + device_block_translation(dev); ret = paging_domain_compatible(domain, dev); if (ret) return ret; - ret = iopf_for_domain_set(domain, dev); + ret = iopf_for_domain_replace(domain, old, dev); if (ret) return ret; - ret = dmar_domain_attach_device(to_dmar_domain(domain), dev); + ret = device_replace_dmar_domain(to_dmar_domain(domain), + old ? to_dmar_domain(old) : NULL, dev); if (ret) iopf_for_domain_remove(domain, dev); -- 2.52.0.351.gbe84eed79e-goog Add API to init a struct iommu using an already opened iommufd instance and attach devices to it. Signed-off-by: Samiullah Khawaja --- .../vfio/lib/include/libvfio/iommu.h | 2 + .../lib/include/libvfio/vfio_pci_device.h | 2 + tools/testing/selftests/vfio/lib/iommu.c | 60 +++++++++++++++++-- .../selftests/vfio/lib/vfio_pci_device.c | 16 ++++- 4 files changed, 74 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/vfio/lib/include/libvfio/iommu.h b/tools/testing/selftests/vfio/lib/include/libvfio/iommu.h index 5c9b9dc6d993..9e96da1e6fd3 100644 --- a/tools/testing/selftests/vfio/lib/include/libvfio/iommu.h +++ b/tools/testing/selftests/vfio/lib/include/libvfio/iommu.h @@ -29,10 +29,12 @@ struct iommu { int container_fd; int iommufd; u32 ioas_id; + u32 hwpt_id; struct list_head dma_regions; }; struct iommu *iommu_init(const char *iommu_mode); +struct iommu *iommufd_iommu_init(int iommufd, u32 dev_id); void iommu_cleanup(struct iommu *iommu); int __iommu_map(struct iommu *iommu, struct dma_region *region); diff --git a/tools/testing/selftests/vfio/lib/include/libvfio/vfio_pci_device.h b/tools/testing/selftests/vfio/lib/include/libvfio/vfio_pci_device.h index 2858885a89bb..1143ceb6a9b8 100644 --- a/tools/testing/selftests/vfio/lib/include/libvfio/vfio_pci_device.h +++ b/tools/testing/selftests/vfio/lib/include/libvfio/vfio_pci_device.h @@ -19,6 +19,7 @@ struct vfio_pci_device { const char *bdf; int fd; int group_fd; + u32 dev_id; struct iommu *iommu; @@ -65,6 +66,7 @@ void vfio_pci_config_access(struct vfio_pci_device *device, bool write, #define vfio_pci_config_writew(_d, _o, _v) vfio_pci_config_write(_d, _o, _v, u16) #define vfio_pci_config_writel(_d, _o, _v) vfio_pci_config_write(_d, _o, _v, u32) +void vfio_pci_device_attach_iommu(struct vfio_pci_device *device, struct iommu *iommu); void vfio_pci_irq_enable(struct vfio_pci_device *device, u32 index, u32 vector, int count); void vfio_pci_irq_disable(struct vfio_pci_device *device, u32 index); diff --git a/tools/testing/selftests/vfio/lib/iommu.c b/tools/testing/selftests/vfio/lib/iommu.c index 58b7fb7430d4..2c67d7e24d0c 100644 --- a/tools/testing/selftests/vfio/lib/iommu.c +++ b/tools/testing/selftests/vfio/lib/iommu.c @@ -408,6 +408,18 @@ struct iommu_iova_range *iommu_iova_ranges(struct iommu *iommu, u32 *nranges) return ranges; } +static u32 iommufd_hwpt_alloc(struct iommu *iommu, u32 dev_id) +{ + struct iommu_hwpt_alloc args = { + .size = sizeof(args), + .pt_id = iommu->ioas_id, + .dev_id = dev_id, + }; + + ioctl_assert(iommu->iommufd, IOMMU_HWPT_ALLOC, &args); + return args.out_hwpt_id; +} + static u32 iommufd_ioas_alloc(int iommufd) { struct iommu_ioas_alloc args = { @@ -418,11 +430,9 @@ static u32 iommufd_ioas_alloc(int iommufd) return args.out_ioas_id; } -struct iommu *iommu_init(const char *iommu_mode) +static struct iommu *iommu_alloc(const char *iommu_mode) { - const char *container_path; struct iommu *iommu; - int version; iommu = calloc(1, sizeof(*iommu)); VFIO_ASSERT_NOT_NULL(iommu); @@ -430,6 +440,16 @@ struct iommu *iommu_init(const char *iommu_mode) INIT_LIST_HEAD(&iommu->dma_regions); iommu->mode = lookup_iommu_mode(iommu_mode); + return iommu; +} + +struct iommu *iommu_init(const char *iommu_mode) +{ + const char *container_path; + struct iommu *iommu; + int version; + + iommu = iommu_alloc(iommu_mode); container_path = iommu->mode->container_path; if (container_path) { @@ -453,10 +473,42 @@ struct iommu *iommu_init(const char *iommu_mode) return iommu; } +struct iommu *iommufd_iommu_init(int iommufd, u32 dev_id) +{ + struct iommu *iommu; + + iommu = iommu_alloc("iommufd"); + + iommu->iommufd = dup(iommufd); + VFIO_ASSERT_GT(iommu->iommufd, 0); + + iommu->ioas_id = iommufd_ioas_alloc(iommu->iommufd); + iommu->hwpt_id = iommufd_hwpt_alloc(iommu, dev_id); + + return iommu; +} + +static void iommufd_iommu_cleanup(struct iommu *iommu) +{ + struct iommu_destroy args = { + .size = sizeof(args), + }; + + if (iommu->hwpt_id) { + args.id = iommu->hwpt_id; + ioctl_assert(iommu->iommufd, IOMMU_DESTROY, &args); + } + + args.id = iommu->ioas_id; + ioctl_assert(iommu->iommufd, IOMMU_DESTROY, &args); + + VFIO_ASSERT_EQ(close(iommu->iommufd), 0); +} + void iommu_cleanup(struct iommu *iommu) { if (iommu->iommufd) - VFIO_ASSERT_EQ(close(iommu->iommufd), 0); + iommufd_iommu_cleanup(iommu); else VFIO_ASSERT_EQ(close(iommu->container_fd), 0); diff --git a/tools/testing/selftests/vfio/lib/vfio_pci_device.c b/tools/testing/selftests/vfio/lib/vfio_pci_device.c index fac4c0ecadef..9bc1f5ade5c4 100644 --- a/tools/testing/selftests/vfio/lib/vfio_pci_device.c +++ b/tools/testing/selftests/vfio/lib/vfio_pci_device.c @@ -298,7 +298,7 @@ const char *vfio_pci_get_cdev_path(const char *bdf) return cdev_path; } -static void vfio_device_bind_iommufd(int device_fd, int iommufd) +static int vfio_device_bind_iommufd(int device_fd, int iommufd) { struct vfio_device_bind_iommufd args = { .argsz = sizeof(args), @@ -306,6 +306,7 @@ static void vfio_device_bind_iommufd(int device_fd, int iommufd) }; ioctl_assert(device_fd, VFIO_DEVICE_BIND_IOMMUFD, &args); + return args.out_devid; } static void vfio_device_attach_iommufd_pt(int device_fd, u32 pt_id) @@ -326,10 +327,21 @@ static void vfio_pci_iommufd_setup(struct vfio_pci_device *device, const char *b VFIO_ASSERT_GE(device->fd, 0); free((void *)cdev_path); - vfio_device_bind_iommufd(device->fd, device->iommu->iommufd); + device->dev_id = vfio_device_bind_iommufd(device->fd, device->iommu->iommufd); vfio_device_attach_iommufd_pt(device->fd, device->iommu->ioas_id); } +void vfio_pci_device_attach_iommu(struct vfio_pci_device *device, struct iommu *iommu) +{ + u32 pt_id = iommu->ioas_id; + + if (iommu->hwpt_id) + pt_id = iommu->hwpt_id; + + VFIO_ASSERT_NE(pt_id, 0); + vfio_device_attach_iommufd_pt(device->fd, pt_id); +} + struct vfio_pci_device *vfio_pci_device_init(const char *bdf, struct iommu *iommu) { struct vfio_pci_device *device; -- 2.52.0.351.gbe84eed79e-goog Add a test that does iommufd hwpt replace while a DMA is ongoing. This verifies the hitless replace of IOMMU domain without disrupting the DMA. Note that the new domain is attached after mapping the required DMA memory at the same IOVA in the new domain. Signed-off-by: Samiullah Khawaja --- tools/testing/selftests/vfio/Makefile | 1 + .../vfio/vfio_iommufd_hwpt_replace_test.c | 151 ++++++++++++++++++ 2 files changed, 152 insertions(+) create mode 100644 tools/testing/selftests/vfio/vfio_iommufd_hwpt_replace_test.c diff --git a/tools/testing/selftests/vfio/Makefile b/tools/testing/selftests/vfio/Makefile index 3c796ca99a50..09a1e57cc77d 100644 --- a/tools/testing/selftests/vfio/Makefile +++ b/tools/testing/selftests/vfio/Makefile @@ -1,5 +1,6 @@ CFLAGS = $(KHDR_INCLUDES) TEST_GEN_PROGS += vfio_dma_mapping_test +TEST_GEN_PROGS += vfio_iommufd_hwpt_replace_test TEST_GEN_PROGS += vfio_iommufd_setup_test TEST_GEN_PROGS += vfio_pci_device_test TEST_GEN_PROGS += vfio_pci_device_init_perf_test diff --git a/tools/testing/selftests/vfio/vfio_iommufd_hwpt_replace_test.c b/tools/testing/selftests/vfio/vfio_iommufd_hwpt_replace_test.c new file mode 100644 index 000000000000..efef3233494f --- /dev/null +++ b/tools/testing/selftests/vfio/vfio_iommufd_hwpt_replace_test.c @@ -0,0 +1,151 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include +#include + +#include +#include + +#include + +#include "kselftest_harness.h" + +static const char *device_bdf; + +static void region_setup(struct iommu *iommu, + struct iova_allocator *iova_allocator, + struct dma_region *region, u64 size) +{ + const int flags = MAP_SHARED | MAP_ANONYMOUS; + const int prot = PROT_READ | PROT_WRITE; + void *vaddr; + + vaddr = mmap(NULL, size, prot, flags, -1, 0); + VFIO_ASSERT_NE(vaddr, MAP_FAILED); + + region->vaddr = vaddr; + region->iova = iova_allocator_alloc(iova_allocator, size); + region->size = size; + + iommu_map(iommu, region); +} + +static void region_teardown(struct iommu *iommu, struct dma_region *region) +{ + iommu_unmap(iommu, region); + VFIO_ASSERT_EQ(munmap(region->vaddr, region->size), 0); +} + +FIXTURE(vfio_iommufd_replace_hwpt_test) { + struct iommu *iommu; + struct vfio_pci_device *device; + struct iova_allocator *iova_allocator; + struct dma_region memcpy_region; + void *vaddr; + + u64 size; + void *src; + void *dst; + iova_t src_iova; + iova_t dst_iova; +}; + +FIXTURE_SETUP(vfio_iommufd_replace_hwpt_test) +{ + struct vfio_pci_driver *driver; + + self->iommu = iommu_init("iommufd"); + self->device = vfio_pci_device_init(device_bdf, self->iommu); + self->iova_allocator = iova_allocator_init(self->iommu); + + driver = &self->device->driver; + + region_setup(self->iommu, self->iova_allocator, &self->memcpy_region, SZ_1G); + region_setup(self->iommu, self->iova_allocator, &driver->region, SZ_2M); + + if (driver->ops) + vfio_pci_driver_init(self->device); + + self->size = self->memcpy_region.size / 2; + self->src = self->memcpy_region.vaddr; + self->dst = self->src + self->size; + + self->src_iova = to_iova(self->device, self->src); + self->dst_iova = to_iova(self->device, self->dst); +} + +FIXTURE_TEARDOWN(vfio_iommufd_replace_hwpt_test) +{ + struct vfio_pci_driver *driver = &self->device->driver; + + if (driver->ops) + vfio_pci_driver_remove(self->device); + + region_teardown(self->iommu, &self->memcpy_region); + region_teardown(self->iommu, &driver->region); + + iova_allocator_cleanup(self->iova_allocator); + vfio_pci_device_cleanup(self->device); + iommu_cleanup(self->iommu); +} + +FIXTURE_VARIANT(vfio_iommufd_replace_hwpt_test) { + bool replace_hwpt; +}; + +FIXTURE_VARIANT_ADD(vfio_iommufd_replace_hwpt_test, domain_replace) { + .replace_hwpt = true, +}; + +FIXTURE_VARIANT_ADD(vfio_iommufd_replace_hwpt_test, noreplace) { + .replace_hwpt = false, +}; + +TEST_F(vfio_iommufd_replace_hwpt_test, memcpy) +{ + struct dma_region memcpy_region, driver_region; + struct iommu *iommu2; + + if (self->device->driver.ops) { + memset(self->src, 'x', self->size); + memset(self->dst, 'y', self->size); + + vfio_pci_driver_memcpy_start(self->device, + self->src_iova, + self->dst_iova, + self->size, + 100); + } + + if (variant->replace_hwpt) { + iommu2 = iommufd_iommu_init(self->iommu->iommufd, + self->device->dev_id); + + memcpy_region = self->memcpy_region; + driver_region = self->device->driver.region; + + iommu_map(iommu2, &memcpy_region); + iommu_map(iommu2, &driver_region); + + vfio_pci_device_attach_iommu(self->device, iommu2); + } + + if (self->device->driver.ops) { + ASSERT_EQ(0, vfio_pci_driver_memcpy_wait(self->device)); + ASSERT_EQ(0, memcmp(self->src, self->dst, self->size)); + } + + if (variant->replace_hwpt) { + vfio_pci_device_attach_iommu(self->device, self->iommu); + + iommu_unmap(iommu2, &memcpy_region); + iommu_unmap(iommu2, &driver_region); + iommu_cleanup(iommu2); + } +} + +int main(int argc, char *argv[]) +{ + device_bdf = vfio_selftests_get_bdf(&argc, argv); + + return test_harness_run(argc, argv); +} -- 2.52.0.351.gbe84eed79e-goog