add mmap maple tree for vfio_device_file, this allows vfio devices to create per mmap request options. the vfio device needs to insert/allocate the region range offset & size and make it accessible for the user, probably when the user is calling DEVICE_GET_REGION_INFO, and then vfio uses the maple_tree to find the entry (vfio_mmap) needed for mmap op, this adds the vfio_mmap_init & vfio_mmap_free for initialization and freeing the entry, the freeing is done through the free callback in the vfio_mmap_ops, which vfio_devices should implement if they are allocating an entry. Signed-off-by: Mahmoud Adam --- I didn't find a situation where we would need to use ref counting for now, so I didn't implement it, I think most cases are already handled by file ref counting, but maybe I'm overlooking something here. drivers/vfio/vfio.h | 1 + drivers/vfio/vfio_main.c | 29 +++++++++++++++++++++++++++++ include/linux/vfio.h | 17 +++++++++++++++++ 3 files changed, 47 insertions(+) diff --git a/drivers/vfio/vfio.h b/drivers/vfio/vfio.h index 50128da18bcaf..3f0cf2dd41116 100644 --- a/drivers/vfio/vfio.h +++ b/drivers/vfio/vfio.h @@ -19,6 +19,7 @@ struct vfio_container; struct vfio_device_file { struct vfio_device *device; struct vfio_group *group; + struct maple_tree mmap_mt; u8 access_granted; u32 devid; /* only valid when iommufd is valid */ diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c index 1fd261efc582d..4c4af4de60d12 100644 --- a/drivers/vfio/vfio_main.c +++ b/drivers/vfio/vfio_main.c @@ -39,6 +39,7 @@ #include #include #include +#include #include "vfio.h" #define DRIVER_VERSION "0.3" @@ -498,6 +499,7 @@ vfio_allocate_device_file(struct vfio_device *device) df->device = device; spin_lock_init(&df->kvm_ref_lock); + mt_init_flags(&df->mmap_mt, MT_FLAGS_ALLOC_RANGE); return df; } @@ -622,6 +624,25 @@ static inline void vfio_device_pm_runtime_put(struct vfio_device *device) pm_runtime_put(dev); } +void vfio_mmap_init(struct vfio_device *vdev, struct vfio_mmap *vmmap, + u32 region_flags, u64 offset, u64 size, + struct vfio_mmap_ops *ops) +{ + vmmap->owner = vdev; + vmmap->offset = offset; + vmmap->ops = ops; + vmmap->size = size; + vmmap->region_flags = region_flags; +} +EXPORT_SYMBOL_GPL(vfio_mmap_init); + +void vfio_mmap_free(struct vfio_mmap *vmmap) +{ + if (vmmap->ops && vmmap->ops->free) + vmmap->ops->free(vmmap); +} +EXPORT_SYMBOL_GPL(vfio_mmap_free); + /* * VFIO Device fd */ @@ -629,14 +650,22 @@ static int vfio_device_fops_release(struct inode *inode, struct file *filep) { struct vfio_device_file *df = filep->private_data; struct vfio_device *device = df->device; + struct vfio_mmap *vmmap; + unsigned long index = 0; if (df->group) vfio_df_group_close(df); else vfio_df_unbind_iommufd(df); + mt_for_each(&df->mmap_mt, vmmap, index, ULONG_MAX) { + mtree_erase(&df->mmap_mt, index); + vfio_mmap_free(vmmap); + } + vfio_device_put_registration(device); + mtree_destroy(&df->mmap_mt); kfree(df); return 0; diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 707b00772ce1f..6e0aca05aa406 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -80,6 +80,19 @@ struct vfio_device { #endif }; +struct vfio_mmap { + struct vfio_device *owner; + u64 offset; + u64 size; + u32 region_flags; + struct vfio_mmap_ops *ops; +}; + +struct vfio_mmap_ops { + void (*free)(struct vfio_mmap *vmmap); +}; + + /** * struct vfio_device_ops - VFIO bus driver device callbacks * @@ -338,6 +351,10 @@ int vfio_pin_pages(struct vfio_device *device, dma_addr_t iova, void vfio_unpin_pages(struct vfio_device *device, dma_addr_t iova, int npage); int vfio_dma_rw(struct vfio_device *device, dma_addr_t iova, void *data, size_t len, bool write); +void vfio_mmap_init(struct vfio_device *vdev, struct vfio_mmap *vmmap, + u32 region_flags, u64 offset, u64 size, + struct vfio_mmap_ops *ops); +void vfio_mmap_free(struct vfio_mmap *vmmap); /* * Sub-module helpers -- 2.47.3 Amazon Web Services Development Center Germany GmbH Tamara-Danz-Str. 13 10243 Berlin Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss Eingetragen am Amtsgericht Charlottenburg unter HRB 257764 B Sitz: Berlin Ust-ID: DE 365 538 597 add a new transient operations for ioctl & mmap that allows using the new mmap maple tree, since these operations are used extensively, it's better to transition into new temporary ops, then after onboarding all the users to the new ops we will drop the old legacy ops proto and replace it with the new ones. Having new ops allows us to enforce the vmmap existence check when mmap ops is called, and make the migration more stable, and reviewable. ioctl needs to have access over the whole mt to add/query entries when needed, this allows inserting new range in the mt for example when DEVICE_GET_REGION_INFO is called by the user, this also enabled us to add other uapi to change mmap attrs in a certain range. When mmapping there must be a vmmap entry for that offset otherwise return -EINVAL. Signed-off-by: Mahmoud Adam --- This names is only used for the migration period that was I used 2 as a suffix, maybe _vmmap could also be used or similar. drivers/vfio/vfio_main.c | 12 ++++++++++++ include/linux/vfio.h | 4 ++++ 2 files changed, 16 insertions(+) diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c index 4c4af4de60d12..3275ff56eef47 100644 --- a/drivers/vfio/vfio_main.c +++ b/drivers/vfio/vfio_main.c @@ -1324,6 +1324,10 @@ static long vfio_device_fops_unl_ioctl(struct file *filep, break; default: + if (device->ops->ioctl2) { + ret = device->ops->ioctl2(device, cmd, arg, &df->mmap_mt); + break; + } if (unlikely(!device->ops->ioctl)) ret = -EINVAL; else @@ -1372,11 +1376,19 @@ static int vfio_device_fops_mmap(struct file *filep, struct vm_area_struct *vma) { struct vfio_device_file *df = filep->private_data; struct vfio_device *device = df->device; + struct vfio_mmap *vmmap; /* Paired with smp_store_release() following vfio_df_open() */ if (!smp_load_acquire(&df->access_granted)) return -EINVAL; + if (device->ops->mmap2) { + vmmap = mtree_load(&df->mmap_mt, (vma->vm_pgoff << PAGE_SHIFT)); + if (!vmmap) + return -EINVAL; + return device->ops->mmap2(device, vma, vmmap); + } + if (unlikely(!device->ops->mmap)) return -EINVAL; diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 6e0aca05aa406..836ef72a38104 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -142,7 +142,11 @@ struct vfio_device_ops { size_t count, loff_t *size); long (*ioctl)(struct vfio_device *vdev, unsigned int cmd, unsigned long arg); + long (*ioctl2)(struct vfio_device *vdev, unsigned int cmd, + unsigned long arg, struct maple_tree *mmap_mt); int (*mmap)(struct vfio_device *vdev, struct vm_area_struct *vma); + int (*mmap2)(struct vfio_device *vdev, struct vm_area_struct *vma, + struct vfio_mmap *vmmap); void (*request)(struct vfio_device *vdev, unsigned int count); int (*match)(struct vfio_device *vdev, char *buf); void (*dma_unmap)(struct vfio_device *vdev, u64 iova, u64 length); -- 2.47.3 Amazon Web Services Development Center Germany GmbH Tamara-Danz-Str. 13 10243 Berlin Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss Eingetragen am Amtsgericht Charlottenburg unter HRB 257764 B Sitz: Berlin Ust-ID: DE 365 538 597 rename vm operations from mmap to vm to avoid confusion with the vfio mmap naming. mainly because we will reuse the name vfio_pci_mmap_ops for the following patches. Signed-off-by: Mahmoud Adam --- drivers/vfio/pci/vfio_pci_core.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c index 6328c3a05bcdd..9a22969607bfe 100644 --- a/drivers/vfio/pci/vfio_pci_core.c +++ b/drivers/vfio/pci/vfio_pci_core.c @@ -1641,7 +1641,7 @@ static unsigned long vma_to_pfn(struct vm_area_struct *vma) return (pci_resource_start(vdev->pdev, index) >> PAGE_SHIFT) + pgoff; } -static vm_fault_t vfio_pci_mmap_huge_fault(struct vm_fault *vmf, +static vm_fault_t vfio_pci_vm_huge_fault(struct vm_fault *vmf, unsigned int order) { struct vm_area_struct *vma = vmf->vma; @@ -1696,15 +1696,15 @@ static vm_fault_t vfio_pci_mmap_huge_fault(struct vm_fault *vmf, return ret; } -static vm_fault_t vfio_pci_mmap_page_fault(struct vm_fault *vmf) +static vm_fault_t vfio_pci_vm_page_fault(struct vm_fault *vmf) { - return vfio_pci_mmap_huge_fault(vmf, 0); + return vfio_pci_vm_huge_fault(vmf, 0); } -static const struct vm_operations_struct vfio_pci_mmap_ops = { - .fault = vfio_pci_mmap_page_fault, +static const struct vm_operations_struct vfio_pci_vm_ops = { + .fault = vfio_pci_vm_page_fault, #ifdef CONFIG_ARCH_SUPPORTS_HUGE_PFNMAP - .huge_fault = vfio_pci_mmap_huge_fault, + .huge_fault = vfio_pci_vm_huge_fault, #endif }; @@ -1792,7 +1792,7 @@ int vfio_pci_core_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma */ vm_flags_set(vma, VM_ALLOW_ANY_UNCACHED | VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP); - vma->vm_ops = &vfio_pci_mmap_ops; + vma->vm_ops = &vfio_pci_vm_ops; return 0; } -- 2.47.3 Amazon Web Services Development Center Germany GmbH Tamara-Danz-Str. 13 10243 Berlin Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss Eingetragen am Amtsgericht Charlottenburg unter HRB 257764 B Sitz: Berlin Ust-ID: DE 365 538 597 all switch cases are calculating the offset similarly, and they are not used until the end of the function, just calculate the offset once at the end, which makes it simpler to change the offset in one spot in the following patch. Signed-off-by: Mahmoud Adam --- drivers/vfio/pci/vfio_pci_core.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c index 9a22969607bfe..467466a0b619f 100644 --- a/drivers/vfio/pci/vfio_pci_core.c +++ b/drivers/vfio/pci/vfio_pci_core.c @@ -1014,13 +1014,11 @@ static int vfio_pci_ioctl_get_region_info(struct vfio_pci_core_device *vdev, switch (info.index) { case VFIO_PCI_CONFIG_REGION_INDEX: - info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index); info.size = pdev->cfg_size; info.flags = VFIO_REGION_INFO_FLAG_READ | VFIO_REGION_INFO_FLAG_WRITE; break; case VFIO_PCI_BAR0_REGION_INDEX ... VFIO_PCI_BAR5_REGION_INDEX: - info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index); info.size = pci_resource_len(pdev, info.index); if (!info.size) { info.flags = 0; @@ -1044,7 +1042,6 @@ static int vfio_pci_ioctl_get_region_info(struct vfio_pci_core_device *vdev, size_t size; u16 cmd; - info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index); info.flags = 0; info.size = 0; @@ -1074,7 +1071,6 @@ static int vfio_pci_ioctl_get_region_info(struct vfio_pci_core_device *vdev, if (!vdev->has_vga) return -EINVAL; - info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index); info.size = 0xc0000; info.flags = VFIO_REGION_INFO_FLAG_READ | VFIO_REGION_INFO_FLAG_WRITE; @@ -1093,7 +1089,6 @@ static int vfio_pci_ioctl_get_region_info(struct vfio_pci_core_device *vdev, i = info.index - VFIO_PCI_NUM_REGIONS; - info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index); info.size = vdev->region[i].size; info.flags = vdev->region[i].flags; @@ -1131,6 +1126,7 @@ static int vfio_pci_ioctl_get_region_info(struct vfio_pci_core_device *vdev, kfree(caps.buf); } + info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index); return copy_to_user(arg, &info, minsz) ? -EFAULT : 0; } -- 2.47.3 Amazon Web Services Development Center Germany GmbH Tamara-Danz-Str. 13 10243 Berlin Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss Eingetragen am Amtsgericht Charlottenburg unter HRB 257764 B Sitz: Berlin Ust-ID: DE 365 538 597 support the new vmmap solution to vfio-pci-core, this adds the vfio_pci_mmap struct. the core already keeps the offset and size of the region, extend it with bar_index. Add alloc helper funciton for vfio_pci, which allocates and insert vmmap to the mt, for the transitioning period the mtree_insert_range is used with the same offset calculation as the legacy solution, so that we don't break VFIO_PCI_OFFSET_TO_INDEX usages, eventually after all the vfio_pci_devices are migrated to the new ops, these macros will be replaced with mtree_load or similar, then maple tree allocation could be used instead of direct insertions. Signed-off-by: Mahmoud Adam --- drivers/vfio/pci/vfio_pci_core.c | 44 ++++++++++++++++++++++++++++++++ include/linux/vfio_pci_core.h | 10 ++++++++ 2 files changed, 54 insertions(+) diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c index 467466a0b619f..7a431a03bd850 100644 --- a/drivers/vfio/pci/vfio_pci_core.c +++ b/drivers/vfio/pci/vfio_pci_core.c @@ -882,6 +882,50 @@ static int msix_mmappable_cap(struct vfio_pci_core_device *vdev, return vfio_info_add_capability(caps, &header, sizeof(header)); } +static void vfio_pci_mmap_free(struct vfio_mmap *core_vmmap) +{ + struct vfio_pci_mmap *vmmap = container_of(core_vmmap, + struct vfio_pci_mmap, + core); + kfree(vmmap); +} + +static struct vfio_mmap_ops vfio_pci_mmap_ops = { + .free = vfio_pci_mmap_free, +}; + +int vfio_pci_mmap_alloc(struct vfio_pci_core_device *vdev, + struct maple_tree *mmap_mt, u32 region_flags, + size_t bar_size, unsigned int bar_index, + unsigned long *offset) +{ + struct vfio_pci_mmap *vmmap; + int ret; + unsigned long alloc_size; + vmmap = kzalloc(sizeof(*vmmap), GFP_KERNEL); + if (!vmmap) + return -ENOMEM; + + alloc_size = PAGE_ALIGN(bar_size); + /* keep the offset aligned to the current usage for now, so we + * don't break VFIO_PCI_OFFSET_TO_INDEX */ + *offset = VFIO_PCI_INDEX_TO_OFFSET(bar_index); + vmmap->bar_index = bar_index; + vfio_mmap_init(&vdev->vdev, &vmmap->core, region_flags, + *offset, alloc_size, &vfio_pci_mmap_ops); + ret = mtree_insert_range(mmap_mt, *offset, + *offset + alloc_size - 1, + &vmmap->core, GFP_KERNEL); + if (ret) { + vfio_mmap_free(&vmmap->core); + /* for now if it exists reuse it */ + if (ret != -EEXIST) + return ret; + } + return 0; +} +EXPORT_SYMBOL(vfio_pci_mmap_alloc); + int vfio_pci_core_register_dev_region(struct vfio_pci_core_device *vdev, unsigned int type, unsigned int subtype, const struct vfio_pci_regops *ops, diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h index fbb472dd99b36..532d2914a9c2e 100644 --- a/include/linux/vfio_pci_core.h +++ b/include/linux/vfio_pci_core.h @@ -49,6 +49,11 @@ struct vfio_pci_region { u32 flags; }; +struct vfio_pci_mmap { + struct vfio_mmap core; + unsigned int bar_index; +}; + struct vfio_pci_core_device { struct vfio_device vdev; struct pci_dev *pdev; @@ -137,6 +142,11 @@ bool vfio_pci_core_range_intersect_range(loff_t buf_start, size_t buf_cnt, loff_t *buf_offset, size_t *intersect_count, size_t *register_offset); +int vfio_pci_mmap_alloc(struct vfio_pci_core_device *vdev, + struct maple_tree *mmap_mt, u32 region_flags, + size_t bar_size, unsigned int bar_index, + unsigned long *offset); + #define VFIO_IOWRITE_DECLARATION(size) \ int vfio_pci_core_iowrite##size(struct vfio_pci_core_device *vdev, \ bool test_mem, u##size val, void __iomem *io); -- 2.47.3 Amazon Web Services Development Center Germany GmbH Tamara-Danz-Str. 13 10243 Berlin Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss Eingetragen am Amtsgericht Charlottenburg unter HRB 257764 B Sitz: Berlin Ust-ID: DE 365 538 597 This use the new mt to create the mmap offset entries when user calls get_region_info, and return the vmmap range offset, and when the user use the same region range for mmaping, we have access on the vmmap entry, where we could know which bar from the bar_index, and later change mmaping attributes like WC. On top of that since we use the mt range offset, eventually we will not need this legacy VFIO_PCI_INDEX_TO_OFFSET, which means more dynamic offset calculation that remove the limitation of the legacy system. To avoid duplicating the functions for mmap & get_region_info, this create a common function and use mmap_mt/vmmap if not null, for ioctl2 just override VFIO_DEVICE_GET_REGION_INFO only with the new function. Signed-off-by: Mahmoud Adam --- This follow the same temprory suffix "2", but also this is only for the migration period, the other function will be dropped and replace eventually. drivers/vfio/pci/vfio_pci_core.c | 72 +++++++++++++++++++++++++++++--- include/linux/vfio_pci_core.h | 4 ++ 2 files changed, 71 insertions(+), 5 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c index 7a431a03bd850..8418d98ac66ce 100644 --- a/drivers/vfio/pci/vfio_pci_core.c +++ b/drivers/vfio/pci/vfio_pci_core.c @@ -1041,8 +1041,10 @@ static int vfio_pci_ioctl_get_info(struct vfio_pci_core_device *vdev, return copy_to_user(arg, &info, minsz) ? -EFAULT : 0; } -static int vfio_pci_ioctl_get_region_info(struct vfio_pci_core_device *vdev, - struct vfio_region_info __user *arg) + +static int _vfio_pci_ioctl_get_region_info(struct vfio_pci_core_device *vdev, + struct maple_tree *mmap_mt, + struct vfio_region_info __user *arg) { unsigned long minsz = offsetofend(struct vfio_region_info, offset); struct pci_dev *pdev = vdev->pdev; @@ -1170,10 +1172,32 @@ static int vfio_pci_ioctl_get_region_info(struct vfio_pci_core_device *vdev, kfree(caps.buf); } - info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index); + if (mmap_mt) { + ret = vfio_pci_mmap_alloc(vdev, mmap_mt, + info.flags, info.size, info.index, + (unsigned long *) &info.offset); + if (ret) + return ret; + } else { + info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index); + } + return copy_to_user(arg, &info, minsz) ? -EFAULT : 0; } +static int vfio_pci_ioctl_get_region_info(struct vfio_pci_core_device *vdev, + struct vfio_region_info __user *arg) +{ + return _vfio_pci_ioctl_get_region_info(vdev, NULL, arg); +} + +static int vfio_pci_ioctl_get_region_info2(struct vfio_pci_core_device *vdev, + struct maple_tree *mmap_mt, + struct vfio_region_info __user *arg) +{ + return _vfio_pci_ioctl_get_region_info(vdev, mmap_mt, arg); +} + static int vfio_pci_ioctl_get_irq_info(struct vfio_pci_core_device *vdev, struct vfio_irq_info __user *arg) { @@ -1514,6 +1538,23 @@ long vfio_pci_core_ioctl(struct vfio_device *core_vdev, unsigned int cmd, } EXPORT_SYMBOL_GPL(vfio_pci_core_ioctl); + +long vfio_pci_core_ioctl2(struct vfio_device *core_vdev, unsigned int cmd, + unsigned long arg, struct maple_tree *mmap_mt) +{ + struct vfio_pci_core_device *vdev = + container_of(core_vdev, struct vfio_pci_core_device, vdev); + void __user *uarg = (void __user *)arg; + + switch (cmd) { + case VFIO_DEVICE_GET_REGION_INFO: + return vfio_pci_ioctl_get_region_info2(vdev, mmap_mt, uarg); + default: + return vfio_pci_core_ioctl(core_vdev, cmd, arg); + } +} +EXPORT_SYMBOL_GPL(vfio_pci_core_ioctl2); + static int vfio_pci_core_feature_token(struct vfio_device *device, u32 flags, uuid_t __user *arg, size_t argsz) { @@ -1748,16 +1789,24 @@ static const struct vm_operations_struct vfio_pci_vm_ops = { #endif }; -int vfio_pci_core_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma) +static int _vfio_pci_core_mmap(struct vfio_device *core_vdev, + struct vm_area_struct *vma, + struct vfio_mmap *core_vmmap) { struct vfio_pci_core_device *vdev = container_of(core_vdev, struct vfio_pci_core_device, vdev); + struct vfio_pci_mmap *vmmap = NULL; struct pci_dev *pdev = vdev->pdev; unsigned int index; u64 phys_len, req_len, pgoff, req_start; int ret; - index = vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT); + if (core_vmmap) { + vmmap = container_of(core_vmmap, struct vfio_pci_mmap, core); + index = vmmap->bar_index; + } else { + index = vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT); + } if (index >= VFIO_PCI_NUM_REGIONS + vdev->num_regions) return -EINVAL; @@ -1836,8 +1885,21 @@ int vfio_pci_core_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma return 0; } + +int vfio_pci_core_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma) +{ + return _vfio_pci_core_mmap(core_vdev, vma, NULL); +} EXPORT_SYMBOL_GPL(vfio_pci_core_mmap); +int vfio_pci_core_mmap2(struct vfio_device *core_vdev, + struct vm_area_struct *vma, + struct vfio_mmap *core_vmmap) +{ + return _vfio_pci_core_mmap(core_vdev, vma, core_vmmap); +} +EXPORT_SYMBOL_GPL(vfio_pci_core_mmap2); + void vfio_pci_core_request(struct vfio_device *core_vdev, unsigned int count) { struct vfio_pci_core_device *vdev = diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h index 532d2914a9c2e..cb52b92340451 100644 --- a/include/linux/vfio_pci_core.h +++ b/include/linux/vfio_pci_core.h @@ -118,6 +118,8 @@ int vfio_pci_core_sriov_configure(struct vfio_pci_core_device *vdev, int nr_virtfn); long vfio_pci_core_ioctl(struct vfio_device *core_vdev, unsigned int cmd, unsigned long arg); +long vfio_pci_core_ioctl2(struct vfio_device *core_vdev, unsigned int cmd, + unsigned long arg, struct maple_tree *mmap_attrs_mt); int vfio_pci_core_ioctl_feature(struct vfio_device *device, u32 flags, void __user *arg, size_t argsz); ssize_t vfio_pci_core_read(struct vfio_device *core_vdev, char __user *buf, @@ -125,6 +127,8 @@ ssize_t vfio_pci_core_read(struct vfio_device *core_vdev, char __user *buf, ssize_t vfio_pci_core_write(struct vfio_device *core_vdev, const char __user *buf, size_t count, loff_t *ppos); int vfio_pci_core_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma); +int vfio_pci_core_mmap2(struct vfio_device *core_vdev, struct vm_area_struct *vma, + struct vfio_mmap *core_vmmap); void vfio_pci_core_request(struct vfio_device *core_vdev, unsigned int count); int vfio_pci_core_match(struct vfio_device *core_vdev, char *buf); int vfio_pci_core_enable(struct vfio_pci_core_device *vdev); -- 2.47.3 Amazon Web Services Development Center Germany GmbH Tamara-Danz-Str. 13 10243 Berlin Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss Eingetragen am Amtsgericht Charlottenburg unter HRB 257764 B Sitz: Berlin Ust-ID: DE 365 538 597 changing vfio-pci vfio ops to use the new ioctl and mmap is enough to make this migrated, and it gives a initial example of the migration of vfio-pci devices. Signed-off-by: Mahmoud Adam --- drivers/vfio/pci/vfio_pci.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 5ba39f7623bb7..73d3eded7f95d 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -131,11 +131,11 @@ static const struct vfio_device_ops vfio_pci_ops = { .release = vfio_pci_core_release_dev, .open_device = vfio_pci_open_device, .close_device = vfio_pci_core_close_device, - .ioctl = vfio_pci_core_ioctl, + .ioctl2 = vfio_pci_core_ioctl2, .device_feature = vfio_pci_core_ioctl_feature, .read = vfio_pci_core_read, .write = vfio_pci_core_write, - .mmap = vfio_pci_core_mmap, + .mmap2 = vfio_pci_core_mmap2, .request = vfio_pci_core_request, .match = vfio_pci_core_match, .bind_iommufd = vfio_iommufd_physical_bind, -- 2.47.3 Amazon Web Services Development Center Germany GmbH Tamara-Danz-Str. 13 10243 Berlin Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss Eingetragen am Amtsgericht Charlottenburg unter HRB 257764 B Sitz: Berlin Ust-ID: DE 365 538 597 This uapi allows setting mmap attributes using a specified region offset, region offset is expected to be used from returned value of VFIO_DEVICE_GET_REGION_INFO or similar, where vmmap mt entry was created, start with write_combine attribute, which the user can use to request mmap to use wc. vfio devices expected to load the vmmap entry from mt and do the needed region specific checks, and sets the attributes accordingly. Signed-off-by: Mahmoud Adam --- drivers/vfio/vfio_main.c | 1 + include/linux/vfio.h | 1 + include/uapi/linux/vfio.h | 19 +++++++++++++++++++ 3 files changed, 21 insertions(+) diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c index 3275ff56eef47..58c3cf12a5317 100644 --- a/drivers/vfio/vfio_main.c +++ b/drivers/vfio/vfio_main.c @@ -633,6 +633,7 @@ void vfio_mmap_init(struct vfio_device *vdev, struct vfio_mmap *vmmap, vmmap->ops = ops; vmmap->size = size; vmmap->region_flags = region_flags; + vmmap->attrs = 0; } EXPORT_SYMBOL_GPL(vfio_mmap_init); diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 836ef72a38104..5885df1729183 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -85,6 +85,7 @@ struct vfio_mmap { u64 offset; u64 size; u32 region_flags; + u32 attrs; struct vfio_mmap_ops *ops; }; diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 5764f315137f9..2e3fa90eef5a3 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -1831,6 +1831,25 @@ struct vfio_iommu_spapr_tce_remove { }; #define VFIO_IOMMU_SPAPR_TCE_REMOVE _IO(VFIO_TYPE, VFIO_BASE + 20) +/** + * VFIO_DEVICE_SET_MMAP_ATTRS - _IOW(VFIO_TYPE, VFIO_BASE + 21, struct vfio_mmap_attrs) + * + * Set memory mapping attributes for a specified region offset before + * calling mmap, it expects that the offset used was fetched by + * calling VFIO_DEVICE_GET_REGION_INFO. + * + * Attributes supported: + * - VFIO_MMAP_ATTR_WRITE_COMBINE: use write-combine when requested to mmap this offset. + * + * Return: 0 on success, -errno on failure. + */ +struct vfio_mmap_attrs { + __u64 offset; /* Region offset */ + __u32 attrs; +#define VFIO_MMAP_ATTR_WRITE_COMBINE (1 << 0) +}; +#define VFIO_DEVICE_SET_MMAP_ATTRS _IO(VFIO_TYPE, VFIO_BASE + 21) + /* ***************************************************************** */ #endif /* _UAPIVFIO_H */ -- 2.47.3 Amazon Web Services Development Center Germany GmbH Tamara-Danz-Str. 13 10243 Berlin Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss Eingetragen am Amtsgericht Charlottenburg unter HRB 257764 B Sitz: Berlin Ust-ID: DE 365 538 597 Now we established the required dependencies to support WC through the new vmmap in vfio_pci, this implements the new uapi & checks if the WC attr is set while mmaping, then calls pgprot_writecombine. Signed-off-by: Mahmoud Adam --- drivers/vfio/pci/vfio_pci_core.c | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c index 8418d98ac66ce..461440700af75 100644 --- a/drivers/vfio/pci/vfio_pci_core.c +++ b/drivers/vfio/pci/vfio_pci_core.c @@ -1198,6 +1198,30 @@ static int vfio_pci_ioctl_get_region_info2(struct vfio_pci_core_device *vdev, return _vfio_pci_ioctl_get_region_info(vdev, mmap_mt, arg); } +static int vfio_pci_ioctl_set_mmap_attrs(struct vfio_pci_core_device *vdev, + struct maple_tree *mmap_mt, + struct vfio_irq_info __user *arg) +{ + struct vfio_mmap_attrs vmmap_attrs; + struct vfio_pci_mmap *vmmap; + + if (copy_from_user(&vmmap_attrs, arg, sizeof(vmmap_attrs))) + return -EFAULT; + + if (vmmap_attrs.attrs & ~VFIO_MMAP_ATTR_WRITE_COMBINE) + return -EINVAL; + + vmmap = mtree_load(mmap_mt, vmmap_attrs.offset); + if (!vmmap) + return -EINVAL; + + if (!(vmmap->core.region_flags & VFIO_REGION_INFO_FLAG_MMAP)) + return -EINVAL; + + vmmap->core.attrs = vmmap_attrs.attrs; + return 0; +} + static int vfio_pci_ioctl_get_irq_info(struct vfio_pci_core_device *vdev, struct vfio_irq_info __user *arg) { @@ -1549,6 +1573,8 @@ long vfio_pci_core_ioctl2(struct vfio_device *core_vdev, unsigned int cmd, switch (cmd) { case VFIO_DEVICE_GET_REGION_INFO: return vfio_pci_ioctl_get_region_info2(vdev, mmap_mt, uarg); + case VFIO_DEVICE_SET_MMAP_ATTRS: + return vfio_pci_ioctl_set_mmap_attrs(vdev, mmap_mt, uarg); default: return vfio_pci_core_ioctl(core_vdev, cmd, arg); } @@ -1855,7 +1881,10 @@ static int _vfio_pci_core_mmap(struct vfio_device *core_vdev, } vma->vm_private_data = vdev; - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + if (vmmap && vmmap->core.attrs & VFIO_MMAP_ATTR_WRITE_COMBINE) + vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); + else + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot); /* -- 2.47.3 Amazon Web Services Development Center Germany GmbH Tamara-Danz-Str. 13 10243 Berlin Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss Eingetragen am Amtsgericht Charlottenburg unter HRB 257764 B Sitz: Berlin Ust-ID: DE 365 538 597