Update the mem char driver (backing /dev/mem and /dev/zero) to use f_op->mmap_prepare hook rather than the deprecated f_op->mmap. The /dev/zero implementation has a very unique and rather concerning characteristic in that it converts MAP_PRIVATE mmap() mappings anonymous when they are, in fact, not. The new f_op->mmap_prepare() can support this, but rather than introducing a helper function to perform this hack (and risk introducing other users), simply set desc->vm_op to NULL here and add a comment describing what's going on. We also introduce shmem_zero_setup_desc() to allow for the shared mapping case via an f_op->mmap_prepare() hook, and generalise the code between this and shmem_zero_setup(). We also use the desc->action_error_hook to filter the remap error to -EAGAIN to keep behaviour consistent. Signed-off-by: Lorenzo Stoakes --- drivers/char/mem.c | 75 ++++++++++++++++++++++------------------ include/linux/shmem_fs.h | 3 +- mm/shmem.c | 40 ++++++++++++++++----- 3 files changed, 76 insertions(+), 42 deletions(-) diff --git a/drivers/char/mem.c b/drivers/char/mem.c index 34b815901b20..23194788ee41 100644 --- a/drivers/char/mem.c +++ b/drivers/char/mem.c @@ -304,13 +304,13 @@ static unsigned zero_mmap_capabilities(struct file *file) } /* can't do an in-place private mapping if there's no MMU */ -static inline int private_mapping_ok(struct vm_area_struct *vma) +static inline int private_mapping_ok(struct vm_area_desc *desc) { - return is_nommu_shared_mapping(vma->vm_flags); + return is_nommu_shared_mapping(desc->vm_flags); } #else -static inline int private_mapping_ok(struct vm_area_struct *vma) +static inline int private_mapping_ok(struct vm_area_desc *desc) { return 1; } @@ -322,46 +322,50 @@ static const struct vm_operations_struct mmap_mem_ops = { #endif }; -static int mmap_mem(struct file *file, struct vm_area_struct *vma) +static int mmap_filter_error(int err) { - size_t size = vma->vm_end - vma->vm_start; - phys_addr_t offset = (phys_addr_t)vma->vm_pgoff << PAGE_SHIFT; + return -EAGAIN; +} + +static int mmap_mem_prepare(struct vm_area_desc *desc) +{ + struct file *file = desc->file; + const size_t size = vma_desc_size(desc); + const phys_addr_t offset = (phys_addr_t)desc->pgoff << PAGE_SHIFT; /* Does it even fit in phys_addr_t? */ - if (offset >> PAGE_SHIFT != vma->vm_pgoff) + if (offset >> PAGE_SHIFT != desc->pgoff) return -EINVAL; /* It's illegal to wrap around the end of the physical address space. */ if (offset + (phys_addr_t)size - 1 < offset) return -EINVAL; - if (!valid_mmap_phys_addr_range(vma->vm_pgoff, size)) + if (!valid_mmap_phys_addr_range(desc->pgoff, size)) return -EINVAL; - if (!private_mapping_ok(vma)) + if (!private_mapping_ok(desc)) return -ENOSYS; - if (!range_is_allowed(vma->vm_pgoff, size)) + if (!range_is_allowed(desc->pgoff, size)) return -EPERM; - if (!phys_mem_access_prot_allowed(file, vma->vm_pgoff, size, - &vma->vm_page_prot)) + if (!phys_mem_access_prot_allowed(file, desc->pgoff, size, + &desc->page_prot)) return -EINVAL; - vma->vm_page_prot = phys_mem_access_prot(file, vma->vm_pgoff, - size, - vma->vm_page_prot); + desc->page_prot = phys_mem_access_prot(file, desc->pgoff, + size, + desc->page_prot); - vma->vm_ops = &mmap_mem_ops; + desc->vm_ops = &mmap_mem_ops; /* Remap-pfn-range will mark the range VM_IO */ - if (remap_pfn_range(vma, - vma->vm_start, - vma->vm_pgoff, - size, - vma->vm_page_prot)) { - return -EAGAIN; - } + mmap_action_remap(&desc->action, desc->start, desc->pgoff, size, + desc->page_prot); + /* We filter remap errors to -EAGAIN. */ + desc->action.error_hook = mmap_filter_error; + return 0; } @@ -501,14 +505,18 @@ static ssize_t read_zero(struct file *file, char __user *buf, return cleared; } -static int mmap_zero(struct file *file, struct vm_area_struct *vma) +static int mmap_prepare_zero(struct vm_area_desc *desc) { #ifndef CONFIG_MMU return -ENOSYS; #endif - if (vma->vm_flags & VM_SHARED) - return shmem_zero_setup(vma); - vma_set_anonymous(vma); + if (desc->vm_flags & VM_SHARED) + return shmem_zero_setup_desc(desc); + /* + * This is a highly unique situation where we mark a MAP_PRIVATE mapping + *of /dev/zero anonymous, despite it not being. + */ + desc->vm_ops = NULL; return 0; } @@ -526,10 +534,11 @@ static unsigned long get_unmapped_area_zero(struct file *file, { if (flags & MAP_SHARED) { /* - * mmap_zero() will call shmem_zero_setup() to create a file, - * so use shmem's get_unmapped_area in case it can be huge; - * and pass NULL for file as in mmap.c's get_unmapped_area(), - * so as not to confuse shmem with our handle on "/dev/zero". + * mmap_prepare_zero() will call shmem_zero_setup() to create a + * file, so use shmem's get_unmapped_area in case it can be + * huge; and pass NULL for file as in mmap.c's + * get_unmapped_area(), so as not to confuse shmem with our + * handle on "/dev/zero". */ return shmem_get_unmapped_area(NULL, addr, len, pgoff, flags); } @@ -632,7 +641,7 @@ static const struct file_operations __maybe_unused mem_fops = { .llseek = memory_lseek, .read = read_mem, .write = write_mem, - .mmap = mmap_mem, + .mmap_prepare = mmap_mem_prepare, .open = open_mem, #ifndef CONFIG_MMU .get_unmapped_area = get_unmapped_area_mem, @@ -668,7 +677,7 @@ static const struct file_operations zero_fops = { .write_iter = write_iter_zero, .splice_read = copy_splice_read, .splice_write = splice_write_zero, - .mmap = mmap_zero, + .mmap_prepare = mmap_prepare_zero, .get_unmapped_area = get_unmapped_area_zero, #ifndef CONFIG_MMU .mmap_capabilities = zero_mmap_capabilities, diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index 0e47465ef0fd..5b368f9549d6 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -94,7 +94,8 @@ extern struct file *shmem_kernel_file_setup(const char *name, loff_t size, unsigned long flags); extern struct file *shmem_file_setup_with_mnt(struct vfsmount *mnt, const char *name, loff_t size, unsigned long flags); -extern int shmem_zero_setup(struct vm_area_struct *); +int shmem_zero_setup(struct vm_area_struct *vma); +int shmem_zero_setup_desc(struct vm_area_desc *desc); extern unsigned long shmem_get_unmapped_area(struct file *, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags); extern int shmem_lock(struct file *file, int lock, struct ucounts *ucounts); diff --git a/mm/shmem.c b/mm/shmem.c index 990e33c6a776..cb6ff00eb4cb 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -5893,14 +5893,9 @@ struct file *shmem_file_setup_with_mnt(struct vfsmount *mnt, const char *name, } EXPORT_SYMBOL_GPL(shmem_file_setup_with_mnt); -/** - * shmem_zero_setup - setup a shared anonymous mapping - * @vma: the vma to be mmapped is prepared by do_mmap - */ -int shmem_zero_setup(struct vm_area_struct *vma) +static struct file *__shmem_zero_setup(unsigned long start, unsigned long end, vm_flags_t vm_flags) { - struct file *file; - loff_t size = vma->vm_end - vma->vm_start; + loff_t size = end - start; /* * Cloning a new file under mmap_lock leads to a lock ordering conflict @@ -5908,7 +5903,17 @@ int shmem_zero_setup(struct vm_area_struct *vma) * accessible to the user through its mapping, use S_PRIVATE flag to * bypass file security, in the same way as shmem_kernel_file_setup(). */ - file = shmem_kernel_file_setup("dev/zero", size, vma->vm_flags); + return shmem_kernel_file_setup("dev/zero", size, vm_flags); +} + +/** + * shmem_zero_setup - setup a shared anonymous mapping + * @vma: the vma to be mmapped is prepared by do_mmap + */ +int shmem_zero_setup(struct vm_area_struct *vma) +{ + struct file *file = __shmem_zero_setup(vma->vm_start, vma->vm_end, vma->vm_flags); + if (IS_ERR(file)) return PTR_ERR(file); @@ -5920,6 +5925,25 @@ int shmem_zero_setup(struct vm_area_struct *vma) return 0; } +/** + * shmem_zero_setup_desc - same as shmem_zero_setup, but determined by VMA + * descriptor for convenience. + * @desc: Describes VMA + * Returns: 0 on success, or error + */ +int shmem_zero_setup_desc(struct vm_area_desc *desc) +{ + struct file *file = __shmem_zero_setup(desc->start, desc->end, desc->vm_flags); + + if (IS_ERR(file)) + return PTR_ERR(file); + + desc->vm_file = file; + desc->vm_ops = &shmem_anon_vm_ops; + + return 0; +} + /** * shmem_read_folio_gfp - read into page cache, using specified page allocation flags. * @mapping: the folio's address_space -- 2.51.0