Since we could request to add more regions after initialization. We would need locking to avoid racing with readers and cause UAF. use RCU for read-write synchronization. And region_lock mutex is used to synchronize the write section. Changing the value of num_regions is done under the mutex. Since the num_regions can only increase, using READ_ONCE and WRITE_ONCE should be enough to make sure we have a valid value. On the write section, synchronize_rcu() is run before incrementing num_regions. Doing that makes sure read sections are passed before increasing num_regions to avoid causing out-of-bound access. Signed-off-by: Mahmoud Adam --- drivers/vfio/pci/vfio_pci_core.c | 59 +++++++++++++++++++++++--------- drivers/vfio/pci/vfio_pci_igd.c | 16 ++++++--- include/linux/vfio_pci_core.h | 1 + 3 files changed, 55 insertions(+), 21 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c index 6629490c0e46f..78e18bfd973e5 100644 --- a/drivers/vfio/pci/vfio_pci_core.c +++ b/drivers/vfio/pci/vfio_pci_core.c @@ -882,7 +882,8 @@ static int msix_mmappable_cap(struct vfio_pci_core_device *vdev, } /* - * Registers a new region to vfio_pci_core_device. + * Registers a new region to vfio_pci_core_device. region_lock should + * be held when multiple registers could happen. * Returns region index on success or a negative errno. */ int vfio_pci_core_register_dev_region(struct vfio_pci_core_device *vdev, @@ -890,15 +891,20 @@ int vfio_pci_core_register_dev_region(struct vfio_pci_core_device *vdev, const struct vfio_pci_regops *ops, size_t size, u32 flags, void *data) { - int num_regions = vdev->num_regions; struct vfio_pci_region *region, *old_region; + int num_regions; + + mutex_lock(&vdev->region_lock); + num_regions = READ_ONCE(vdev->num_regions); region = kmalloc((num_regions + 1) * sizeof(*region), GFP_KERNEL_ACCOUNT); if (!region) return -ENOMEM; - old_region = vdev->region; + old_region = + rcu_dereference_protected(vdev->region, + lockdep_is_held(&vdev->region_lock)); if (old_region) memcpy(region, old_region, num_regions * sizeof(*region)); @@ -909,8 +915,10 @@ int vfio_pci_core_register_dev_region(struct vfio_pci_core_device *vdev, region[num_regions].flags = flags; region[num_regions].data = data; - vdev->region = region; - vdev->num_regions++; + rcu_assign_pointer(vdev->region, region); + synchronize_rcu(); + WRITE_ONCE(vdev->num_regions, READ_ONCE(vdev->num_regions) + 1); + mutex_unlock(&vdev->region_lock); kfree(old_region); return num_regions; } @@ -968,7 +976,7 @@ static int vfio_pci_ioctl_get_info(struct vfio_pci_core_device *vdev, if (vdev->reset_works) info.flags |= VFIO_DEVICE_FLAGS_RESET; - info.num_regions = VFIO_PCI_NUM_REGIONS + vdev->num_regions; + info.num_regions = VFIO_PCI_NUM_REGIONS + READ_ONCE(vdev->num_regions); info.num_irqs = VFIO_PCI_NUM_IRQS; ret = vfio_pci_info_zdev_add_caps(vdev, &caps); @@ -1094,13 +1102,16 @@ static int vfio_pci_ioctl_get_region_info(struct vfio_pci_core_device *vdev, .header.version = 1 }; - if (info.index >= VFIO_PCI_NUM_REGIONS + vdev->num_regions) + if (info.index >= VFIO_PCI_NUM_REGIONS + + READ_ONCE(vdev->num_regions)) return -EINVAL; - info.index = array_index_nospec( - info.index, VFIO_PCI_NUM_REGIONS + vdev->num_regions); + info.index = array_index_nospec(info.index, + VFIO_PCI_NUM_REGIONS + + READ_ONCE(vdev->num_regions)); i = info.index - VFIO_PCI_NUM_REGIONS; - region = &vdev->region[i]; + rcu_read_lock(); + region = &rcu_dereference(vdev->region)[i]; info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index); info.size = region->size; @@ -1111,15 +1122,20 @@ static int vfio_pci_ioctl_get_region_info(struct vfio_pci_core_device *vdev, ret = vfio_info_add_capability(&caps, &cap_type.header, sizeof(cap_type)); - if (ret) + if (ret) { + rcu_read_unlock(); return ret; + } if (region->ops->add_capability) { ret = region->ops->add_capability( vdev, region, &caps); - if (ret) + if (ret) { + rcu_read_unlock(); return ret; + } } + rcu_read_unlock(); } } @@ -1536,7 +1552,7 @@ static ssize_t vfio_pci_rw(struct vfio_pci_core_device *vdev, char __user *buf, unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos); int ret; - if (index >= VFIO_PCI_NUM_REGIONS + vdev->num_regions) + if (index >= VFIO_PCI_NUM_REGIONS + READ_ONCE(vdev->num_regions)) return -EINVAL; ret = pm_runtime_resume_and_get(&vdev->pdev->dev); @@ -1568,8 +1584,11 @@ static ssize_t vfio_pci_rw(struct vfio_pci_core_device *vdev, char __user *buf, default: index -= VFIO_PCI_NUM_REGIONS; - ret = vdev->region[index].ops->rw(vdev, buf, - count, ppos, iswrite); + rcu_read_lock(); + ret = rcu_dereference(vdev->region)[index].ops->rw(vdev, buf, + count, ppos, + iswrite); + rcu_read_unlock(); break; } @@ -1726,7 +1745,7 @@ int vfio_pci_core_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma index = vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT); - if (index >= VFIO_PCI_NUM_REGIONS + vdev->num_regions) + if (index >= VFIO_PCI_NUM_REGIONS + READ_ONCE(vdev->num_regions)) return -EINVAL; if (vma->vm_end < vma->vm_start) return -EINVAL; @@ -1734,12 +1753,16 @@ int vfio_pci_core_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma return -EINVAL; if (index >= VFIO_PCI_NUM_REGIONS) { int regnum = index - VFIO_PCI_NUM_REGIONS; - struct vfio_pci_region *region = vdev->region + regnum; + struct vfio_pci_region *region; + + rcu_read_lock(); + region = rcu_dereference(vdev->region) + regnum; ret = -EINVAL; if (region->ops && region->ops->mmap && (region->flags & VFIO_REGION_INFO_FLAG_MMAP)) ret = region->ops->mmap(vdev, region, vma); + rcu_read_unlock(); return ret; } if (index >= VFIO_PCI_ROM_REGION_INDEX) @@ -2107,6 +2130,7 @@ int vfio_pci_core_init_dev(struct vfio_device *core_vdev) INIT_LIST_HEAD(&vdev->sriov_pfs_item); init_rwsem(&vdev->memory_lock); xa_init(&vdev->ctx); + mutex_init(&vdev->region_lock); return 0; } @@ -2119,6 +2143,7 @@ void vfio_pci_core_release_dev(struct vfio_device *core_vdev) mutex_destroy(&vdev->igate); mutex_destroy(&vdev->ioeventfds_lock); + mutex_destroy(&vdev->region_lock); kfree(vdev->region); kfree(vdev->pm_save); } diff --git a/drivers/vfio/pci/vfio_pci_igd.c b/drivers/vfio/pci/vfio_pci_igd.c index 93ddef48e4e4c..1f7e9e82ac08c 100644 --- a/drivers/vfio/pci/vfio_pci_igd.c +++ b/drivers/vfio/pci/vfio_pci_igd.c @@ -71,13 +71,17 @@ static ssize_t vfio_pci_igd_rw(struct vfio_pci_core_device *vdev, struct vfio_pci_region *region; struct igd_opregion_vbt *opregionvbt; - region = &vdev->region[i]; + rcu_read_lock(); + region = &rcu_dereference(vdev->region)[i]; opregionvbt = region->data; - if (pos >= region->size || iswrite) + if (pos >= region->size || iswrite) { + rcu_read_unlock(); return -EINVAL; + } count = min_t(size_t, count, region->size - pos); + rcu_read_unlock(); remaining = count; /* Copy until OpRegion version */ @@ -293,13 +297,17 @@ static ssize_t vfio_pci_igd_cfg_rw(struct vfio_pci_core_device *vdev, struct vfio_pci_region *region; struct pci_dev *pdev; - region = &vdev->region[i]; + rcu_read_lock(); + region = &rcu_dereference(vdev->region)[i]; pdev = region->data; - if (pos >= region->size || iswrite) + if (pos >= region->size || iswrite) { + rcu_read_unlock(); return -EINVAL; + } size = count = min(count, (size_t)(region->size - pos)); + rcu_read_unlock(); if ((pos & 1) && size) { u8 val; diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h index f541044e42a2a..e106e58f297e9 100644 --- a/include/linux/vfio_pci_core.h +++ b/include/linux/vfio_pci_core.h @@ -63,6 +63,7 @@ struct vfio_pci_core_device { int irq_type; int num_regions; struct vfio_pci_region *region; + struct mutex region_lock; u8 msi_qmax; u8 msix_bar; u16 msix_size; -- 2.47.3 Amazon Web Services Development Center Germany GmbH Tamara-Danz-Str. 13 10243 Berlin Geschaeftsfuehrung: Christian Schlaeger Eingetragen am Amtsgericht Charlottenburg unter HRB 257764 B Sitz: Berlin Ust-ID: DE 365 538 597