From: Ankit Agrawal To make use of the huge pfnmap support and to support zap/remap sequence, fault/huge_fault ops based mapping mechanism needs to be implemented. Currently nvgrace-gpu module relies on remap_pfn_range to do the mapping during VM bootup. Replace it to instead rely on fault and use vmf_insert_pfn to setup the mapping. Signed-off-by: Ankit Agrawal --- drivers/vfio/pci/nvgrace-gpu/main.c | 50 ++++++++++++++++++----------- 1 file changed, 31 insertions(+), 19 deletions(-) diff --git a/drivers/vfio/pci/nvgrace-gpu/main.c b/drivers/vfio/pci/nvgrace-gpu/main.c index e346392b72f6..ecfecd0916c9 100644 --- a/drivers/vfio/pci/nvgrace-gpu/main.c +++ b/drivers/vfio/pci/nvgrace-gpu/main.c @@ -130,6 +130,33 @@ static void nvgrace_gpu_close_device(struct vfio_device *core_vdev) vfio_pci_core_close_device(core_vdev); } +static vm_fault_t nvgrace_gpu_vfio_pci_fault(struct vm_fault *vmf) +{ + struct vm_area_struct *vma = vmf->vma; + struct nvgrace_gpu_pci_core_device *nvdev = vma->vm_private_data; + int index = vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT); + vm_fault_t ret = VM_FAULT_SIGBUS; + struct mem_region *memregion; + unsigned long pgoff, pfn; + + memregion = nvgrace_gpu_memregion(index, nvdev); + if (!memregion) + return ret; + + pgoff = (vmf->address - vma->vm_start) >> PAGE_SHIFT; + pfn = PHYS_PFN(memregion->memphys) + pgoff; + + down_read(&nvdev->core_device.memory_lock); + ret = vmf_insert_pfn(vmf->vma, vmf->address, pfn); + up_read(&nvdev->core_device.memory_lock); + + return ret; +} + +static const struct vm_operations_struct nvgrace_gpu_vfio_pci_mmap_ops = { + .fault = nvgrace_gpu_vfio_pci_fault, +}; + static int nvgrace_gpu_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma) { @@ -137,10 +164,8 @@ static int nvgrace_gpu_mmap(struct vfio_device *core_vdev, container_of(core_vdev, struct nvgrace_gpu_pci_core_device, core_device.vdev); struct mem_region *memregion; - unsigned long start_pfn; u64 req_len, pgoff, end; unsigned int index; - int ret = 0; index = vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT); @@ -157,7 +182,6 @@ static int nvgrace_gpu_mmap(struct vfio_device *core_vdev, ((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1); if (check_sub_overflow(vma->vm_end, vma->vm_start, &req_len) || - check_add_overflow(PHYS_PFN(memregion->memphys), pgoff, &start_pfn) || check_add_overflow(PFN_PHYS(pgoff), req_len, &end)) return -EOVERFLOW; @@ -168,6 +192,8 @@ static int nvgrace_gpu_mmap(struct vfio_device *core_vdev, if (end > memregion->memlength) return -EINVAL; + vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP); + /* * The carved out region of the device memory needs the NORMAL_NC * property. Communicate as such to the hypervisor. @@ -184,23 +210,9 @@ static int nvgrace_gpu_mmap(struct vfio_device *core_vdev, vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); } - /* - * Perform a PFN map to the memory and back the device BAR by the - * GPU memory. - * - * The available GPU memory size may not be power-of-2 aligned. The - * remainder is only backed by vfio_device_ops read/write handlers. - * - * During device reset, the GPU is safely disconnected to the CPU - * and access to the BAR will be immediately returned preventing - * machine check. - */ - ret = remap_pfn_range(vma, vma->vm_start, start_pfn, - req_len, vma->vm_page_prot); - if (ret) - return ret; - vma->vm_pgoff = start_pfn; + vma->vm_ops = &nvgrace_gpu_vfio_pci_mmap_ops; + vma->vm_private_data = nvdev; return 0; } -- 2.34.1