vfio_pci_core_setup_barmap() is used in a couple of paths (vfio_pci_bar_rw(), mmap()) to ensure BARs are mapped before access, and these paths could execute concurrently. Concurrent execution of vfio_pci_core_setup_barmap() could lead to some callers getting -EBUSY, which would be treated as fatal. Introduce a new vfio_pci_core_lock_setup_barmap() function, which takes the vdev->memory_lock for write across BAR initialization. Current in-kernel use moves to this. The existing (exported!) vfio_pci_core_setup_barmap() keeps its 'unlocked' behaviour. Fixes: 7f5764e179c6 ("vfio: use vfio_pci_core_setup_barmap to map bar in mmap") Fixes: 0d77ed3589ac0 ("vfio/pci: Pull BAR mapping setup from read-write path") Signed-off-by: Matt Evans --- drivers/vfio/pci/nvgrace-gpu/main.c | 2 +- drivers/vfio/pci/vfio_pci_core.c | 2 +- drivers/vfio/pci/vfio_pci_dmabuf.c | 2 +- drivers/vfio/pci/vfio_pci_rdwr.c | 43 +++++++++++++++++++++++++---- drivers/vfio/pci/virtio/legacy_io.c | 2 +- include/linux/vfio_pci_core.h | 1 + 6 files changed, 42 insertions(+), 10 deletions(-) diff --git a/drivers/vfio/pci/nvgrace-gpu/main.c b/drivers/vfio/pci/nvgrace-gpu/main.c index fa056b69f899..c1df437754f9 100644 --- a/drivers/vfio/pci/nvgrace-gpu/main.c +++ b/drivers/vfio/pci/nvgrace-gpu/main.c @@ -189,7 +189,7 @@ static int nvgrace_gpu_open_device(struct vfio_device *core_vdev) * register reads on first fault before establishing any GPU * memory mapping. */ - ret = vfio_pci_core_setup_barmap(vdev, 0); + ret = vfio_pci_core_lock_setup_barmap(vdev, 0); if (ret) goto error_exit; diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c index 3f8d093aacf8..4e9091e5fcc2 100644 --- a/drivers/vfio/pci/vfio_pci_core.c +++ b/drivers/vfio/pci/vfio_pci_core.c @@ -1764,7 +1764,7 @@ int vfio_pci_core_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma * Even though we don't make use of the barmap for the mmap, * we need to request the region and the barmap tracks that. */ - ret = vfio_pci_core_setup_barmap(vdev, index); + ret = vfio_pci_core_lock_setup_barmap(vdev, index); if (ret) return ret; diff --git a/drivers/vfio/pci/vfio_pci_dmabuf.c b/drivers/vfio/pci/vfio_pci_dmabuf.c index fefe7cf4256b..281ba7d69567 100644 --- a/drivers/vfio/pci/vfio_pci_dmabuf.c +++ b/drivers/vfio/pci/vfio_pci_dmabuf.c @@ -277,7 +277,7 @@ int vfio_pci_core_feature_dma_buf(struct vfio_pci_core_device *vdev, u32 flags, * were requested before returning DMABUFs that reference * them. Barmap setup does this: */ - ret = vfio_pci_core_setup_barmap(vdev, get_dma_buf.region_index); + ret = vfio_pci_core_lock_setup_barmap(vdev, get_dma_buf.region_index); if (ret) goto err_free_phys; diff --git a/drivers/vfio/pci/vfio_pci_rdwr.c b/drivers/vfio/pci/vfio_pci_rdwr.c index 4251ee03e146..11e155acf8ef 100644 --- a/drivers/vfio/pci/vfio_pci_rdwr.c +++ b/drivers/vfio/pci/vfio_pci_rdwr.c @@ -198,15 +198,12 @@ ssize_t vfio_pci_core_do_io_rw(struct vfio_pci_core_device *vdev, bool test_mem, } EXPORT_SYMBOL_GPL(vfio_pci_core_do_io_rw); -int vfio_pci_core_setup_barmap(struct vfio_pci_core_device *vdev, int bar) +static int __vfio_pci_core_iomap_barmap(struct vfio_pci_core_device *vdev, int bar) { struct pci_dev *pdev = vdev->pdev; int ret; void __iomem *io; - if (vdev->barmap[bar]) - return 0; - ret = pci_request_selected_regions(pdev, 1 << bar, "vfio"); if (ret) return ret; @@ -221,6 +218,40 @@ int vfio_pci_core_setup_barmap(struct vfio_pci_core_device *vdev, int bar) return 0; } + +int vfio_pci_core_lock_setup_barmap(struct vfio_pci_core_device *vdev, int bar) +{ + int ret; + + lockdep_assert_not_held(&vdev->memory_lock); + + if (likely(READ_ONCE(vdev->barmap[bar]))) + return 0; + + down_write(&vdev->memory_lock); + if (unlikely(READ_ONCE(vdev->barmap[bar]))) { + up_write(&vdev->memory_lock); + return 0; + } + + ret = __vfio_pci_core_iomap_barmap(vdev, bar); + up_write(&vdev->memory_lock); + + return ret; +} + +int vfio_pci_core_setup_barmap(struct vfio_pci_core_device *vdev, int bar) +{ + /* + * An external caller must prevent concurrent calls of this, + * including via other VFIO-internal paths (for example, by + * holding vdev->memory_lock). + */ + if (vdev->barmap[bar]) + return 0; + + return __vfio_pci_core_iomap_barmap(vdev, bar); +} EXPORT_SYMBOL_GPL(vfio_pci_core_setup_barmap); ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf, @@ -274,7 +305,7 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf, */ max_width = VFIO_PCI_IO_WIDTH_4; } else { - int ret = vfio_pci_core_setup_barmap(vdev, bar); + int ret = vfio_pci_core_lock_setup_barmap(vdev, bar); if (ret) { done = ret; goto out; @@ -452,7 +483,7 @@ int vfio_pci_ioeventfd(struct vfio_pci_core_device *vdev, loff_t offset, if (count == 8) return -EINVAL; - ret = vfio_pci_core_setup_barmap(vdev, bar); + ret = vfio_pci_core_lock_setup_barmap(vdev, bar); if (ret) return ret; diff --git a/drivers/vfio/pci/virtio/legacy_io.c b/drivers/vfio/pci/virtio/legacy_io.c index 1ed349a55629..c77064e3f5c4 100644 --- a/drivers/vfio/pci/virtio/legacy_io.c +++ b/drivers/vfio/pci/virtio/legacy_io.c @@ -305,7 +305,7 @@ static int virtiovf_set_notify_addr(struct virtiovf_pci_core_device *virtvdev) * Setup the BAR where the 'notify' exists to be used by vfio as well * This will let us mmap it only once and use it when needed. */ - ret = vfio_pci_core_setup_barmap(core_device, + ret = vfio_pci_core_lock_setup_barmap(core_device, virtvdev->notify_bar); if (ret) return ret; diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h index 2ebba746c18f..2ea4e773c121 100644 --- a/include/linux/vfio_pci_core.h +++ b/include/linux/vfio_pci_core.h @@ -189,6 +189,7 @@ int vfio_pci_core_enable(struct vfio_pci_core_device *vdev); void vfio_pci_core_disable(struct vfio_pci_core_device *vdev); void vfio_pci_core_finish_enable(struct vfio_pci_core_device *vdev); int vfio_pci_core_setup_barmap(struct vfio_pci_core_device *vdev, int bar); +int vfio_pci_core_lock_setup_barmap(struct vfio_pci_core_device *vdev, int bar); pci_ers_result_t vfio_pci_core_aer_err_detected(struct pci_dev *pdev, pci_channel_state_t state); ssize_t vfio_pci_core_do_io_rw(struct vfio_pci_core_device *vdev, bool test_mem, -- 2.47.3