We need the ability to split PFN remap between updating the VMA and performing the actual remap, in order to do away with the legacy f_op->mmap hook. To do so, update the PFN remap code to provide shared logic, and also make remap_pfn_range_notrack() static, as its one user, io_mapping_map_user() was removed in commit 9a4f90e24661 ("mm: remove mm/io-mapping.c"). Then, introduce remap_pfn_range_prepare(), which accepts VMA descriptor and PFN parameters, and remap_pfn_range_complete() which accepts the same parameters as remap_pfn_rangte(). remap_pfn_range_prepare() will set the cow vma->vm_pgoff if necessary, so it must be supplied with a correct PFN to do so. If the caller must hold locks to be able to do this, those locks should be held across the operation, and mmap_abort() should be provided to revoke the lock should an error arise. While we're here, also clean up the duplicated #ifdef __HAVE_PFNMAP_TRACKING check and put into a single #ifdef/#else block. We would prefer to define these functions in mm/internal.h, however we will do the same for io_remap*() and these have arch defines that require access to the remap functions. Signed-off-by: Lorenzo Stoakes --- include/linux/mm.h | 25 +++++++-- mm/memory.c | 128 ++++++++++++++++++++++++++++----------------- 2 files changed, 102 insertions(+), 51 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 0b97589aec6d..0e256823799d 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -489,6 +489,21 @@ extern unsigned int kobjsize(const void *objp); */ #define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_PFNMAP | VM_MIXEDMAP) +/* + * Physically remapped pages are special. Tell the + * rest of the world about it: + * VM_IO tells people not to look at these pages + * (accesses can have side effects). + * VM_PFNMAP tells the core MM that the base pages are just + * raw PFN mappings, and do not have a "struct page" associated + * with them. + * VM_DONTEXPAND + * Disable vma merging and expanding with mremap(). + * VM_DONTDUMP + * Omit vma from core dump, even when VM_IO turned off. + */ +#define VM_REMAP_FLAGS (VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP) + /* This mask prevents VMA from being scanned with khugepaged */ #define VM_NO_KHUGEPAGED (VM_SPECIAL | VM_HUGETLB) @@ -3623,10 +3638,12 @@ unsigned long change_prot_numa(struct vm_area_struct *vma, struct vm_area_struct *find_extend_vma_locked(struct mm_struct *, unsigned long addr); -int remap_pfn_range(struct vm_area_struct *, unsigned long addr, - unsigned long pfn, unsigned long size, pgprot_t); -int remap_pfn_range_notrack(struct vm_area_struct *vma, unsigned long addr, - unsigned long pfn, unsigned long size, pgprot_t prot); +int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, + unsigned long pfn, unsigned long size, pgprot_t pgprot); +void remap_pfn_range_prepare(struct vm_area_desc *desc, unsigned long pfn); +int remap_pfn_range_complete(struct vm_area_struct *vma, unsigned long addr, + unsigned long pfn, unsigned long size, pgprot_t pgprot); + int vm_insert_page(struct vm_area_struct *, unsigned long addr, struct page *); int vm_insert_pages(struct vm_area_struct *vma, unsigned long addr, struct page **pages, unsigned long *num); diff --git a/mm/memory.c b/mm/memory.c index 3e0404bd57a0..5c4d5261996d 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2903,8 +2903,27 @@ static inline int remap_p4d_range(struct mm_struct *mm, pgd_t *pgd, return 0; } +static int get_remap_pgoff(vm_flags_t vm_flags, unsigned long addr, + unsigned long end, unsigned long vm_start, unsigned long vm_end, + unsigned long pfn, pgoff_t *vm_pgoff_p) +{ + /* + * There's a horrible special case to handle copy-on-write + * behaviour that some programs depend on. We mark the "original" + * un-COW'ed pages by matching them up with "vma->vm_pgoff". + * See vm_normal_page() for details. + */ + if (is_cow_mapping(vm_flags)) { + if (addr != vm_start || end != vm_end) + return -EINVAL; + *vm_pgoff_p = pfn; + } + + return 0; +} + static int remap_pfn_range_internal(struct vm_area_struct *vma, unsigned long addr, - unsigned long pfn, unsigned long size, pgprot_t prot) + unsigned long pfn, unsigned long size, pgprot_t prot, bool set_vma) { pgd_t *pgd; unsigned long next; @@ -2915,32 +2934,17 @@ static int remap_pfn_range_internal(struct vm_area_struct *vma, unsigned long ad if (WARN_ON_ONCE(!PAGE_ALIGNED(addr))) return -EINVAL; - /* - * Physically remapped pages are special. Tell the - * rest of the world about it: - * VM_IO tells people not to look at these pages - * (accesses can have side effects). - * VM_PFNMAP tells the core MM that the base pages are just - * raw PFN mappings, and do not have a "struct page" associated - * with them. - * VM_DONTEXPAND - * Disable vma merging and expanding with mremap(). - * VM_DONTDUMP - * Omit vma from core dump, even when VM_IO turned off. - * - * There's a horrible special case to handle copy-on-write - * behaviour that some programs depend on. We mark the "original" - * un-COW'ed pages by matching them up with "vma->vm_pgoff". - * See vm_normal_page() for details. - */ - if (is_cow_mapping(vma->vm_flags)) { - if (addr != vma->vm_start || end != vma->vm_end) - return -EINVAL; - vma->vm_pgoff = pfn; + if (set_vma) { + err = get_remap_pgoff(vma->vm_flags, addr, end, + vma->vm_start, vma->vm_end, + pfn, &vma->vm_pgoff); + if (err) + return err; + vm_flags_set(vma, VM_REMAP_FLAGS); + } else { + VM_WARN_ON_ONCE((vma->vm_flags & VM_REMAP_FLAGS) != VM_REMAP_FLAGS); } - vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP); - BUG_ON(addr >= end); pfn -= addr >> PAGE_SHIFT; pgd = pgd_offset(mm, addr); @@ -2960,11 +2964,10 @@ static int remap_pfn_range_internal(struct vm_area_struct *vma, unsigned long ad * Variant of remap_pfn_range that does not call track_pfn_remap. The caller * must have pre-validated the caching bits of the pgprot_t. */ -int remap_pfn_range_notrack(struct vm_area_struct *vma, unsigned long addr, - unsigned long pfn, unsigned long size, pgprot_t prot) +static int remap_pfn_range_notrack(struct vm_area_struct *vma, unsigned long addr, + unsigned long pfn, unsigned long size, pgprot_t prot, bool set_vma) { - int error = remap_pfn_range_internal(vma, addr, pfn, size, prot); - + int error = remap_pfn_range_internal(vma, addr, pfn, size, prot, set_vma); if (!error) return 0; @@ -2977,6 +2980,18 @@ int remap_pfn_range_notrack(struct vm_area_struct *vma, unsigned long addr, return error; } +void remap_pfn_range_prepare(struct vm_area_desc *desc, unsigned long pfn) +{ + /* + * We set addr=VMA start, end=VMA end here, so this won't fail, but we + * check it again on complete and will fail there if specified addr is + * invalid. + */ + get_remap_pgoff(desc->vm_flags, desc->start, desc->end, + desc->start, desc->end, pfn, &desc->pgoff); + desc->vm_flags |= VM_REMAP_FLAGS; +} + #ifdef __HAVE_PFNMAP_TRACKING static inline struct pfnmap_track_ctx *pfnmap_track_ctx_alloc(unsigned long pfn, unsigned long size, pgprot_t *prot) @@ -3005,23 +3020,9 @@ void pfnmap_track_ctx_release(struct kref *ref) pfnmap_untrack(ctx->pfn, ctx->size); kfree(ctx); } -#endif /* __HAVE_PFNMAP_TRACKING */ -/** - * remap_pfn_range - remap kernel memory to userspace - * @vma: user vma to map to - * @addr: target page aligned user address to start at - * @pfn: page frame number of kernel physical memory address - * @size: size of mapping area - * @prot: page protection flags for this mapping - * - * Note: this is only safe if the mm semaphore is held when called. - * - * Return: %0 on success, negative error code otherwise. - */ -#ifdef __HAVE_PFNMAP_TRACKING -int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, - unsigned long pfn, unsigned long size, pgprot_t prot) +static int remap_pfn_range_track(struct vm_area_struct *vma, unsigned long addr, + unsigned long pfn, unsigned long size, pgprot_t prot, bool set_vma) { struct pfnmap_track_ctx *ctx = NULL; int err; @@ -3047,7 +3048,7 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, return -EINVAL; } - err = remap_pfn_range_notrack(vma, addr, pfn, size, prot); + err = remap_pfn_range_notrack(vma, addr, pfn, size, prot, set_vma); if (ctx) { if (err) kref_put(&ctx->kref, pfnmap_track_ctx_release); @@ -3057,11 +3058,44 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, return err; } +/** + * remap_pfn_range - remap kernel memory to userspace + * @vma: user vma to map to + * @addr: target page aligned user address to start at + * @pfn: page frame number of kernel physical memory address + * @size: size of mapping area + * @prot: page protection flags for this mapping + * + * Note: this is only safe if the mm semaphore is held when called. + * + * Return: %0 on success, negative error code otherwise. + */ +int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, + unsigned long pfn, unsigned long size, pgprot_t prot) +{ + return remap_pfn_range_track(vma, addr, pfn, size, prot, + /* set_vma = */true); +} + +int remap_pfn_range_complete(struct vm_area_struct *vma, unsigned long addr, + unsigned long pfn, unsigned long size, pgprot_t prot) +{ + /* With set_vma = false, the VMA will not be modified. */ + return remap_pfn_range_track(vma, addr, pfn, size, prot, + /* set_vma = */false); +} #else int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn, unsigned long size, pgprot_t prot) { - return remap_pfn_range_notrack(vma, addr, pfn, size, prot); + return remap_pfn_range_notrack(vma, addr, pfn, size, prot, /* set_vma = */true); +} + +int remap_pfn_range_complete(struct vm_area_struct *vma, unsigned long addr, + unsigned long pfn, unsigned long size, pgprot_t prot) +{ + return remap_pfn_range_notrack(vma, addr, pfn, size, prot, + /* set_vma = */false); } #endif EXPORT_SYMBOL(remap_pfn_range); -- 2.51.0