Expand the VFIO DMABUF revocation state to three states: Not revoked, temporarily revoked, and permanently revoked. The first two are for existing transient revocation, e.g. across a function reset, and the DMABUF is put into the last in response to an ioctl(DMA_BUF_IOCTL_REVOKE) request. When triggered, dynamic imports are removed, PTEs zapped, and the state changed such that no future mappings/imports are allowed. This is useful to reclaim VFIO PCI BAR ranges previously delegated to a subordinate process: The driver process can ensure that the loans are closed down before repurposing exported ranges. Signed-off-by: Matt Evans --- drivers/vfio/pci/vfio_pci_dmabuf.c | 64 +++++++++++++++++++++++++----- 1 file changed, 53 insertions(+), 11 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci_dmabuf.c b/drivers/vfio/pci/vfio_pci_dmabuf.c index bebb496bd0f2..af30ca205f31 100644 --- a/drivers/vfio/pci/vfio_pci_dmabuf.c +++ b/drivers/vfio/pci/vfio_pci_dmabuf.c @@ -9,6 +9,17 @@ MODULE_IMPORT_NS("DMA_BUF"); +enum vfio_pci_dma_buf_status { + /* + * A buffer can move freely between OK/accessible and revoked + * states (for example, a device reset will temporarily revoke + * it). It can also be permanently revoked. + */ + VFIO_PCI_DMABUF_OK = 0, + VFIO_PCI_DMABUF_TEMP_REVOKED = 1, + VFIO_PCI_DMABUF_PERM_REVOKED = 2, +}; + struct vfio_pci_dma_buf { struct dma_buf *dmabuf; struct vfio_pci_core_device *vdev; @@ -17,9 +28,11 @@ struct vfio_pci_dma_buf { struct dma_buf_phys_vec *phys_vec; struct p2pdma_provider *provider; u32 nr_ranges; - u8 revoked : 1; + enum vfio_pci_dma_buf_status status; }; +static int vfio_pci_dma_buf_revoke(struct dma_buf *dmabuf); + static int vfio_pci_dma_buf_pin(struct dma_buf_attachment *attachment) { return -EOPNOTSUPP; @@ -38,7 +51,7 @@ static int vfio_pci_dma_buf_attach(struct dma_buf *dmabuf, if (!attachment->peer2peer) return -EOPNOTSUPP; - if (priv->revoked) + if (priv->status != VFIO_PCI_DMABUF_OK) return -ENODEV; return 0; @@ -52,7 +65,7 @@ vfio_pci_dma_buf_map(struct dma_buf_attachment *attachment, dma_resv_assert_held(priv->dmabuf->resv); - if (priv->revoked) + if (priv->status != VFIO_PCI_DMABUF_OK) return ERR_PTR(-ENODEV); return dma_buf_phys_vec_to_sgt(attachment, priv->provider, @@ -205,7 +218,7 @@ static vm_fault_t vfio_pci_dma_buf_mmap_huge_fault(struct vm_fault *vmf, * revocation/unmap and status change occurs * whilst holding memory_lock. */ - if (priv->revoked) + if (priv->status != VFIO_PCI_DMABUF_OK) ret = VM_FAULT_SIGBUS; else ret = vfio_pci_vmf_insert_pfn(vdev, vmf, pfn, order); @@ -246,7 +259,7 @@ static bool vfio_pci_dma_buf_is_mappable(struct dma_buf *dmabuf) * on: for example, users should not be mmap()ing a buffer * that's being moved [by a user-triggered activity]. */ - if (priv->revoked) + if (priv->status != VFIO_PCI_DMABUF_OK) return false; return true; @@ -296,6 +309,7 @@ static const struct dma_buf_ops vfio_pci_dmabuf_ops = { .unmap_dma_buf = vfio_pci_dma_buf_unmap, .release = vfio_pci_dma_buf_release, .mmap = vfio_pci_dma_buf_mmap, + .revoke = vfio_pci_dma_buf_revoke, }; /* @@ -320,7 +334,7 @@ int vfio_pci_dma_buf_iommufd_map(struct dma_buf_attachment *attachment, return -EOPNOTSUPP; priv = attachment->dmabuf->priv; - if (priv->revoked) + if (priv->status != VFIO_PCI_DMABUF_OK) return -ENODEV; /* More than one range to iommufd will require proper DMABUF support */ @@ -506,7 +520,8 @@ int vfio_pci_core_feature_dma_buf(struct vfio_pci_core_device *vdev, u32 flags, INIT_LIST_HEAD(&priv->dmabufs_elm); down_write(&vdev->memory_lock); dma_resv_lock(priv->dmabuf->resv, NULL); - priv->revoked = !__vfio_pci_memory_enabled(vdev); + priv->status = __vfio_pci_memory_enabled(vdev) ? VFIO_PCI_DMABUF_OK : + VFIO_PCI_DMABUF_TEMP_REVOKED; list_add_tail(&priv->dmabufs_elm, &vdev->dmabufs); dma_resv_unlock(priv->dmabuf->resv); up_write(&vdev->memory_lock); @@ -541,7 +556,7 @@ void vfio_pci_dma_buf_move(struct vfio_pci_core_device *vdev, bool revoked) lockdep_assert_held_write(&vdev->memory_lock); /* * Holding memory_lock ensures a racing - * vfio_pci_dma_buf_mmap_*_fault() observes priv->revoked + * vfio_pci_dma_buf_mmap_*_fault() observes priv->status * properly. */ @@ -549,9 +564,11 @@ void vfio_pci_dma_buf_move(struct vfio_pci_core_device *vdev, bool revoked) if (!get_file_active(&priv->dmabuf->file)) continue; - if (priv->revoked != revoked) { + if ((priv->status == VFIO_PCI_DMABUF_OK && revoked) || + (priv->status == VFIO_PCI_DMABUF_TEMP_REVOKED && !revoked)) { dma_resv_lock(priv->dmabuf->resv, NULL); - priv->revoked = revoked; + priv->status = revoked ? VFIO_PCI_DMABUF_TEMP_REVOKED : + VFIO_PCI_DMABUF_OK; dma_buf_move_notify(priv->dmabuf); dma_resv_unlock(priv->dmabuf->resv); @@ -580,7 +597,7 @@ void vfio_pci_dma_buf_cleanup(struct vfio_pci_core_device *vdev) dma_resv_lock(priv->dmabuf->resv, NULL); list_del_init(&priv->dmabufs_elm); priv->vdev = NULL; - priv->revoked = true; + priv->status = VFIO_PCI_DMABUF_PERM_REVOKED; dma_buf_move_notify(priv->dmabuf); dma_resv_unlock(priv->dmabuf->resv); unmap_mapping_range(priv->dmabuf->file->f_mapping, @@ -590,3 +607,28 @@ void vfio_pci_dma_buf_cleanup(struct vfio_pci_core_device *vdev) } up_write(&vdev->memory_lock); } + +static int vfio_pci_dma_buf_revoke(struct dma_buf *dmabuf) +{ + struct vfio_pci_dma_buf *priv = dmabuf->priv; + struct vfio_pci_core_device *vdev; + + vdev = READ_ONCE(priv->vdev); + + if (!vdev) + return -ENODEV; + + scoped_guard(rwsem_read, &vdev->memory_lock) { + if (priv->status == VFIO_PCI_DMABUF_PERM_REVOKED) + return -EBADFD; + + dma_resv_lock(priv->dmabuf->resv, NULL); + priv->status = VFIO_PCI_DMABUF_PERM_REVOKED; + dma_buf_move_notify(priv->dmabuf); + dma_resv_unlock(priv->dmabuf->resv); + + unmap_mapping_range(priv->dmabuf->file->f_mapping, + 0, priv->size, 1); + } + return 0; +} -- 2.47.3