From: Nikita Kalyazin Move MM-generic parts of guest_memfd from KVM to MM. This allows other hypervisors to use guestmem code and enables UserfaultFD implementation for guest_memfd [1]. Previously it was not possible because KVM (and guest_memfd code) might be built as a module. Based on a patch by Elliot Berman [2]. [1] https://lore.kernel.org/kvm/20250404154352.23078-1-kalyazin@amazon.com [2] https://lore.kernel.org/kvm/20241122-guestmem-library-v5-2-450e92951a15@quicinc.com Signed-off-by: Nikita Kalyazin --- MAINTAINERS | 2 + include/linux/guestmem.h | 46 +++++ mm/Kconfig | 3 + mm/Makefile | 1 + mm/guestmem.c | 380 +++++++++++++++++++++++++++++++++++++++ virt/kvm/Kconfig | 1 + virt/kvm/guest_memfd.c | 303 ++++--------------------------- 7 files changed, 465 insertions(+), 271 deletions(-) create mode 100644 include/linux/guestmem.h create mode 100644 mm/guestmem.c diff --git a/MAINTAINERS b/MAINTAINERS index fed6cd812d79..c468c4847ffd 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15956,6 +15956,7 @@ W: http://www.linux-mm.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm T: quilt git://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new F: mm/ +F: mm/guestmem.c F: tools/mm/ MEMORY MANAGEMENT - CORE @@ -15973,6 +15974,7 @@ W: http://www.linux-mm.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm F: include/linux/gfp.h F: include/linux/gfp_types.h +F: include/linux/guestmem.h F: include/linux/highmem.h F: include/linux/memory.h F: include/linux/mm.h diff --git a/include/linux/guestmem.h b/include/linux/guestmem.h new file mode 100644 index 000000000000..2a173261d32b --- /dev/null +++ b/include/linux/guestmem.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_GUESTMEM_H +#define _LINUX_GUESTMEM_H + +#include + +struct address_space; +struct list_head; +struct inode; + +/** + * struct guestmem_ops - Hypervisor-specific maintenance operations + * @release_folio - Try to bring the folio back to fully owned by Linux + * for instance: about to free the folio [optional] + * @invalidate_begin - start invalidating mappings between start and end offsets + * @invalidate_end - paired with ->invalidate_begin() [optional] + * @supports_mmap - return true if the inode supports mmap [optional] + */ +struct guestmem_ops { + bool (*release_folio)(struct address_space *mapping, + struct folio *folio); + void (*invalidate_begin)(struct list_head *entry, pgoff_t start, + pgoff_t end); + void (*invalidate_end)(struct list_head *entry, pgoff_t start, + pgoff_t end); + bool (*supports_mmap)(struct inode *inode); +}; + +int guestmem_attach_mapping(struct address_space *mapping, + const struct guestmem_ops *const ops, + struct list_head *data); +void guestmem_detach_mapping(struct address_space *mapping, + struct list_head *data); + +struct folio *guestmem_grab_folio(struct address_space *mapping, pgoff_t index); + +int guestmem_punch_hole(struct address_space *mapping, loff_t offset, + loff_t len); +int guestmem_allocate(struct address_space *mapping, loff_t offset, loff_t len); + +bool guestmem_test_no_direct_map(struct inode *inode); +void guestmem_mark_prepared(struct folio *folio); +int guestmem_mmap(struct file *file, struct vm_area_struct *vma); +bool guestmem_vma_is_guestmem(struct vm_area_struct *vma); + +#endif diff --git a/mm/Kconfig b/mm/Kconfig index e443fe8cd6cf..a3705099601f 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -1254,6 +1254,9 @@ config SECRETMEM memory areas visible only in the context of the owning process and not mapped to other processes and other kernel page tables. +config GUESTMEM + bool + config ANON_VMA_NAME bool "Anonymous VMA name support" depends on PROC_FS && ADVISE_SYSCALLS && MMU diff --git a/mm/Makefile b/mm/Makefile index ef54aa615d9d..c92892acd819 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -138,6 +138,7 @@ obj-$(CONFIG_PERCPU_STATS) += percpu-stats.o obj-$(CONFIG_ZONE_DEVICE) += memremap.o obj-$(CONFIG_HMM_MIRROR) += hmm.o obj-$(CONFIG_MEMFD_CREATE) += memfd.o +obj-$(CONFIG_GUESTMEM) += guestmem.o obj-$(CONFIG_MAPPING_DIRTY_HELPERS) += mapping_dirty_helpers.o obj-$(CONFIG_PTDUMP) += ptdump.o obj-$(CONFIG_PAGE_REPORTING) += page_reporting.o diff --git a/mm/guestmem.c b/mm/guestmem.c new file mode 100644 index 000000000000..110087aff7e8 --- /dev/null +++ b/mm/guestmem.c @@ -0,0 +1,380 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include + +struct guestmem { + const struct guestmem_ops *ops; +}; + +static inline bool __guestmem_release_folio(struct address_space *mapping, + struct folio *folio) +{ + struct guestmem *gmem = mapping->i_private_data; + + if (gmem->ops->release_folio) { + if (!gmem->ops->release_folio(mapping, folio)) + return false; + } + + return true; +} + +static inline void +__guestmem_invalidate_begin(struct address_space *const mapping, pgoff_t start, + pgoff_t end) +{ + struct guestmem *gmem = mapping->i_private_data; + struct list_head *entry; + + list_for_each(entry, &mapping->i_private_list) + gmem->ops->invalidate_begin(entry, start, end); +} + +static inline void +__guestmem_invalidate_end(struct address_space *const mapping, pgoff_t start, + pgoff_t end) +{ + struct guestmem *gmem = mapping->i_private_data; + struct list_head *entry; + + if (gmem->ops->invalidate_end) { + list_for_each(entry, &mapping->i_private_list) + gmem->ops->invalidate_end(entry, start, end); + } +} + +static int guestmem_write_begin(const struct kiocb *kiocb, + struct address_space *mapping, + loff_t pos, unsigned int len, + struct folio **foliop, + void **fsdata) +{ + struct file *file = kiocb->ki_filp; + pgoff_t index = pos >> PAGE_SHIFT; + struct folio *folio; + + if (!PAGE_ALIGNED(pos) || len != PAGE_SIZE) + return -EINVAL; + + if (pos + len > i_size_read(file_inode(file))) + return -EINVAL; + + folio = guestmem_grab_folio(file_inode(file)->i_mapping, index); + if (IS_ERR(folio)) + return -EFAULT; + + if (WARN_ON_ONCE(folio_test_large(folio))) { + folio_unlock(folio); + folio_put(folio); + return -EFAULT; + } + + if (folio_test_uptodate(folio)) { + folio_unlock(folio); + folio_put(folio); + return -ENOSPC; + } + + *foliop = folio; + return 0; +} + +static int guestmem_write_end(const struct kiocb *kiocb, + struct address_space *mapping, + loff_t pos, unsigned int len, unsigned int copied, + struct folio *folio, void *fsdata) +{ + if (copied) { + if (copied < len) { + unsigned int from = pos & (PAGE_SIZE - 1); + + folio_zero_range(folio, from + copied, len - copied); + } + guestmem_mark_prepared(folio); + } + + folio_unlock(folio); + folio_put(folio); + + return copied; +} + +static void guestmem_free_folio(struct address_space *mapping, + struct folio *folio) +{ + WARN_ON_ONCE(!__guestmem_release_folio(mapping, folio)); +} + +static int guestmem_error_folio(struct address_space *mapping, + struct folio *folio) +{ + pgoff_t start, end; + + filemap_invalidate_lock_shared(mapping); + + start = folio->index; + end = start + folio_nr_pages(folio); + + __guestmem_invalidate_begin(mapping, start, end); + + /* + * Do not truncate the range, what action is taken in response to the + * error is userspace's decision (assuming the architecture supports + * gracefully handling memory errors). If/when the guest attempts to + * access a poisoned page, kvm_gmem_get_pfn() will return -EHWPOISON, + * at which point KVM can either terminate the VM or propagate the + * error to userspace. + */ + + __guestmem_invalidate_end(mapping, start, end); + + filemap_invalidate_unlock_shared(mapping); + return MF_FAILED; +} + +static int guestmem_migrate_folio(struct address_space *mapping, + struct folio *dst, struct folio *src, + enum migrate_mode mode) +{ + WARN_ON_ONCE(1); + return -EINVAL; +} + +static const struct address_space_operations guestmem_aops = { + .dirty_folio = noop_dirty_folio, + .write_begin = guestmem_write_begin, + .write_end = guestmem_write_end, + .free_folio = guestmem_free_folio, + .error_remove_folio = guestmem_error_folio, + .migrate_folio = guestmem_migrate_folio, +}; + +int guestmem_attach_mapping(struct address_space *mapping, + const struct guestmem_ops *const ops, + struct list_head *data) +{ + struct guestmem *gmem; + + if (mapping->a_ops == &guestmem_aops) { + gmem = mapping->i_private_data; + if (gmem->ops != ops) + return -EINVAL; + + goto add; + } + + gmem = kzalloc(sizeof(*gmem), GFP_KERNEL); + if (!gmem) + return -ENOMEM; + + gmem->ops = ops; + + mapping->a_ops = &guestmem_aops; + mapping->i_private_data = gmem; + + mapping_set_gfp_mask(mapping, GFP_HIGHUSER); + mapping_set_inaccessible(mapping); + /* Unmovable mappings are supposed to be marked unevictable as well. */ + WARN_ON_ONCE(!mapping_unevictable(mapping)); + +add: + list_add(data, &mapping->i_private_list); + return 0; +} +EXPORT_SYMBOL_GPL(guestmem_attach_mapping); + +void guestmem_detach_mapping(struct address_space *mapping, + struct list_head *data) +{ + list_del(data); + + if (list_empty(&mapping->i_private_list)) { + /** + * Ensures we call ->free_folio() for any allocated folios. + * Any folios allocated after this point are assumed not to be + * accessed by the guest, so we don't need to worry about + * guestmem ops not being called on them. + */ + truncate_inode_pages(mapping, 0); + + kfree(mapping->i_private_data); + mapping->i_private_data = NULL; + mapping->a_ops = &empty_aops; + } +} +EXPORT_SYMBOL_GPL(guestmem_detach_mapping); + +struct folio *guestmem_grab_folio(struct address_space *mapping, pgoff_t index) +{ + /* TODO: Support huge pages. */ + return filemap_grab_folio(mapping, index); +} +EXPORT_SYMBOL_GPL(guestmem_grab_folio); + +int guestmem_punch_hole(struct address_space *mapping, loff_t offset, + loff_t len) +{ + pgoff_t start = offset >> PAGE_SHIFT; + pgoff_t end = (offset + len) >> PAGE_SHIFT; + + filemap_invalidate_lock(mapping); + __guestmem_invalidate_begin(mapping, start, end); + + truncate_inode_pages_range(mapping, offset, offset + len - 1); + + __guestmem_invalidate_end(mapping, start, end); + filemap_invalidate_unlock(mapping); + + return 0; +} +EXPORT_SYMBOL_GPL(guestmem_punch_hole); + +int guestmem_allocate(struct address_space *mapping, loff_t offset, loff_t len) +{ + pgoff_t start, index, end; + int r; + + /* Dedicated guest is immutable by default. */ + if (offset + len > i_size_read(mapping->host)) + return -EINVAL; + + filemap_invalidate_lock_shared(mapping); + + start = offset >> PAGE_SHIFT; + end = (offset + len) >> PAGE_SHIFT; + + r = 0; + for (index = start; index < end; ) { + struct folio *folio; + + if (signal_pending(current)) { + r = -EINTR; + break; + } + + folio = guestmem_grab_folio(mapping, index); + if (IS_ERR(folio)) { + r = PTR_ERR(folio); + break; + } + + index = folio_next_index(folio); + + folio_unlock(folio); + folio_put(folio); + + /* 64-bit only, wrapping the index should be impossible. */ + if (WARN_ON_ONCE(!index)) + break; + + cond_resched(); + } + + filemap_invalidate_unlock_shared(mapping); + + return r; +} +EXPORT_SYMBOL_GPL(guestmem_allocate); + +bool guestmem_test_no_direct_map(struct inode *inode) +{ + return mapping_no_direct_map(inode->i_mapping); +} +EXPORT_SYMBOL_GPL(guestmem_test_no_direct_map); + +void guestmem_mark_prepared(struct folio *folio) +{ + struct inode *inode = folio_inode(folio); + + if (guestmem_test_no_direct_map(inode)) + set_direct_map_valid_noflush(folio_page(folio, 0), folio_nr_pages(folio), false); + + folio_mark_uptodate(folio); +} +EXPORT_SYMBOL_GPL(guestmem_mark_prepared); + +static vm_fault_t guestmem_fault_user_mapping(struct vm_fault *vmf) +{ + struct inode *inode = file_inode(vmf->vma->vm_file); + struct folio *folio; + vm_fault_t ret = VM_FAULT_LOCKED; + + if (((loff_t)vmf->pgoff << PAGE_SHIFT) >= i_size_read(inode)) + return VM_FAULT_SIGBUS; + + folio = guestmem_grab_folio(inode->i_mapping, vmf->pgoff); + if (IS_ERR(folio)) { + int err = PTR_ERR(folio); + + if (err == -EAGAIN) + return VM_FAULT_RETRY; + + return vmf_error(err); + } + + if (WARN_ON_ONCE(folio_test_large(folio))) { + ret = VM_FAULT_SIGBUS; + goto out_folio; + } + + if (!folio_test_uptodate(folio)) { + clear_highpage(folio_page(folio, 0)); + guestmem_mark_prepared(folio); + } + + if (userfaultfd_minor(vmf->vma)) { + folio_unlock(folio); + return handle_userfault(vmf, VM_UFFD_MINOR); + } + + vmf->page = folio_file_page(folio, vmf->pgoff); + +out_folio: + if (ret != VM_FAULT_LOCKED) { + folio_unlock(folio); + folio_put(folio); + } + + return ret; +} + +static const struct vm_operations_struct guestmem_vm_ops = { + .fault = guestmem_fault_user_mapping, +}; + +int guestmem_mmap(struct file *file, struct vm_area_struct *vma) +{ + struct address_space *mapping = file_inode(file)->i_mapping; + struct guestmem *gmem = mapping->i_private_data; + + if (!gmem->ops->supports_mmap || !gmem->ops->supports_mmap(file_inode(file))) + return -ENODEV; + + if ((vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) != + (VM_SHARED | VM_MAYSHARE)) { + return -EINVAL; + } + + vma->vm_ops = &guestmem_vm_ops; + + return 0; +} +EXPORT_SYMBOL_GPL(guestmem_mmap); + +bool guestmem_vma_is_guestmem(struct vm_area_struct *vma) +{ + struct inode *inode; + + if (!vma->vm_file) + return false; + + inode = file_inode(vma->vm_file); + if (!inode || !inode->i_mapping || !inode->i_mapping->i_private_data) + return false; + + return inode->i_mapping->a_ops == &guestmem_aops; +} diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index 1b7d5be0b6c4..41e26ad33c1b 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -114,6 +114,7 @@ config KVM_GENERIC_MEMORY_ATTRIBUTES config KVM_GUEST_MEMFD select XARRAY_MULTI + select GUESTMEM bool config HAVE_KVM_ARCH_GMEM_PREPARE diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c index 6989362c056c..15ab13bf6d40 100644 --- a/virt/kvm/guest_memfd.c +++ b/virt/kvm/guest_memfd.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include #include +#include #include #include #include @@ -43,26 +44,6 @@ static int __kvm_gmem_prepare_folio(struct kvm *kvm, struct kvm_memory_slot *slo return 0; } -static bool kvm_gmem_test_no_direct_map(struct inode *inode) -{ - return ((unsigned long) inode->i_private) & GUEST_MEMFD_FLAG_NO_DIRECT_MAP; -} - -static inline int kvm_gmem_mark_prepared(struct folio *folio) -{ - struct inode *inode = folio_inode(folio); - int r = 0; - - if (kvm_gmem_test_no_direct_map(inode)) - r = set_direct_map_valid_noflush(folio_page(folio, 0), folio_nr_pages(folio), - false); - - if (!r) - folio_mark_uptodate(folio); - - return r; -} - /* * Process @folio, which contains @gfn, so that the guest can use it. * The folio must be locked and the gfn must be contained in @slot. @@ -98,7 +79,7 @@ static int kvm_gmem_prepare_folio(struct kvm *kvm, struct kvm_memory_slot *slot, index = ALIGN_DOWN(index, 1 << folio_order(folio)); r = __kvm_gmem_prepare_folio(kvm, slot, index, folio); if (!r) - r = kvm_gmem_mark_prepared(folio); + guestmem_mark_prepared(folio); return r; } @@ -114,8 +95,7 @@ static int kvm_gmem_prepare_folio(struct kvm *kvm, struct kvm_memory_slot *slot, */ static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index) { - /* TODO: Support huge pages. */ - return filemap_grab_folio(inode->i_mapping, index); + return guestmem_grab_folio(inode->i_mapping, index); } static void kvm_gmem_invalidate_begin(struct kvm_gmem *gmem, pgoff_t start, @@ -167,79 +147,6 @@ static void kvm_gmem_invalidate_end(struct kvm_gmem *gmem, pgoff_t start, } } -static long kvm_gmem_punch_hole(struct inode *inode, loff_t offset, loff_t len) -{ - struct list_head *gmem_list = &inode->i_mapping->i_private_list; - pgoff_t start = offset >> PAGE_SHIFT; - pgoff_t end = (offset + len) >> PAGE_SHIFT; - struct kvm_gmem *gmem; - - /* - * Bindings must be stable across invalidation to ensure the start+end - * are balanced. - */ - filemap_invalidate_lock(inode->i_mapping); - - list_for_each_entry(gmem, gmem_list, entry) - kvm_gmem_invalidate_begin(gmem, start, end); - - truncate_inode_pages_range(inode->i_mapping, offset, offset + len - 1); - - list_for_each_entry(gmem, gmem_list, entry) - kvm_gmem_invalidate_end(gmem, start, end); - - filemap_invalidate_unlock(inode->i_mapping); - - return 0; -} - -static long kvm_gmem_allocate(struct inode *inode, loff_t offset, loff_t len) -{ - struct address_space *mapping = inode->i_mapping; - pgoff_t start, index, end; - int r; - - /* Dedicated guest is immutable by default. */ - if (offset + len > i_size_read(inode)) - return -EINVAL; - - filemap_invalidate_lock_shared(mapping); - - start = offset >> PAGE_SHIFT; - end = (offset + len) >> PAGE_SHIFT; - - r = 0; - for (index = start; index < end; ) { - struct folio *folio; - - if (signal_pending(current)) { - r = -EINTR; - break; - } - - folio = kvm_gmem_get_folio(inode, index); - if (IS_ERR(folio)) { - r = PTR_ERR(folio); - break; - } - - index = folio_next_index(folio); - - folio_unlock(folio); - folio_put(folio); - - /* 64-bit only, wrapping the index should be impossible. */ - if (WARN_ON_ONCE(!index)) - break; - - cond_resched(); - } - - filemap_invalidate_unlock_shared(mapping); - - return r; -} - static long kvm_gmem_fallocate(struct file *file, int mode, loff_t offset, loff_t len) { @@ -255,9 +162,9 @@ static long kvm_gmem_fallocate(struct file *file, int mode, loff_t offset, return -EINVAL; if (mode & FALLOC_FL_PUNCH_HOLE) - ret = kvm_gmem_punch_hole(file_inode(file), offset, len); + ret = guestmem_punch_hole(file_inode(file)->i_mapping, offset, len); else - ret = kvm_gmem_allocate(file_inode(file), offset, len); + ret = guestmem_allocate(file_inode(file)->i_mapping, offset, len); if (!ret) file_modified(file); @@ -299,7 +206,7 @@ static int kvm_gmem_release(struct inode *inode, struct file *file) kvm_gmem_invalidate_begin(gmem, 0, -1ul); kvm_gmem_invalidate_end(gmem, 0, -1ul); - list_del(&gmem->entry); + guestmem_detach_mapping(inode->i_mapping, &gmem->entry); filemap_invalidate_unlock(inode->i_mapping); @@ -335,74 +242,8 @@ static bool kvm_gmem_supports_mmap(struct inode *inode) return flags & GUEST_MEMFD_FLAG_MMAP; } -static vm_fault_t kvm_gmem_fault_user_mapping(struct vm_fault *vmf) -{ - struct inode *inode = file_inode(vmf->vma->vm_file); - struct folio *folio; - vm_fault_t ret = VM_FAULT_LOCKED; - - if (((loff_t)vmf->pgoff << PAGE_SHIFT) >= i_size_read(inode)) - return VM_FAULT_SIGBUS; - - folio = kvm_gmem_get_folio(inode, vmf->pgoff); - if (IS_ERR(folio)) { - int err = PTR_ERR(folio); - - if (err == -EAGAIN) - return VM_FAULT_RETRY; - - return vmf_error(err); - } - - if (WARN_ON_ONCE(folio_test_large(folio))) { - ret = VM_FAULT_SIGBUS; - goto out_folio; - } - - if (!folio_test_uptodate(folio)) { - int err = 0; - - clear_highpage(folio_page(folio, 0)); - err = kvm_gmem_mark_prepared(folio); - - if (err) { - ret = vmf_error(err); - goto out_folio; - } - } - - vmf->page = folio_file_page(folio, vmf->pgoff); - -out_folio: - if (ret != VM_FAULT_LOCKED) { - folio_unlock(folio); - folio_put(folio); - } - - return ret; -} - -static const struct vm_operations_struct kvm_gmem_vm_ops = { - .fault = kvm_gmem_fault_user_mapping, -}; - -static int kvm_gmem_mmap(struct file *file, struct vm_area_struct *vma) -{ - if (!kvm_gmem_supports_mmap(file_inode(file))) - return -ENODEV; - - if ((vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) != - (VM_SHARED | VM_MAYSHARE)) { - return -EINVAL; - } - - vma->vm_ops = &kvm_gmem_vm_ops; - - return 0; -} - static struct file_operations kvm_gmem_fops = { - .mmap = kvm_gmem_mmap, + .mmap = guestmem_mmap, .llseek = default_llseek, .write_iter = generic_perform_write, .open = generic_file_open, @@ -415,104 +256,24 @@ void kvm_gmem_init(struct module *module) kvm_gmem_fops.owner = module; } -static int kvm_kmem_gmem_write_begin(const struct kiocb *kiocb, - struct address_space *mapping, - loff_t pos, unsigned int len, - struct folio **foliop, - void **fsdata) -{ - struct file *file = kiocb->ki_filp; - pgoff_t index = pos >> PAGE_SHIFT; - struct folio *folio; - - if (!PAGE_ALIGNED(pos) || len != PAGE_SIZE) - return -EINVAL; - - if (pos + len > i_size_read(file_inode(file))) - return -EINVAL; - - folio = kvm_gmem_get_folio(file_inode(file), index); - if (IS_ERR(folio)) - return -EFAULT; - - if (WARN_ON_ONCE(folio_test_large(folio))) { - folio_unlock(folio); - folio_put(folio); - return -EFAULT; - } - - if (folio_test_uptodate(folio)) { - folio_unlock(folio); - folio_put(folio); - return -ENOSPC; - } - - *foliop = folio; - return 0; -} - -static int kvm_kmem_gmem_write_end(const struct kiocb *kiocb, - struct address_space *mapping, - loff_t pos, unsigned int len, - unsigned int copied, - struct folio *folio, void *fsdata) +static void kvm_guestmem_invalidate_begin(struct list_head *entry, pgoff_t start, + pgoff_t end) { - if (copied) { - if (copied < len) { - unsigned int from = pos & (PAGE_SIZE - 1); - - folio_zero_range(folio, from + copied, len - copied); - } - kvm_gmem_mark_prepared(folio); - } - - folio_unlock(folio); - folio_put(folio); - - return copied; -} + struct kvm_gmem *gmem = container_of(entry, struct kvm_gmem, entry); -static int kvm_gmem_migrate_folio(struct address_space *mapping, - struct folio *dst, struct folio *src, - enum migrate_mode mode) -{ - WARN_ON_ONCE(1); - return -EINVAL; + kvm_gmem_invalidate_begin(gmem, start, end); } -static int kvm_gmem_error_folio(struct address_space *mapping, struct folio *folio) +static void kvm_guestmem_invalidate_end(struct list_head *entry, pgoff_t start, + pgoff_t end) { - struct list_head *gmem_list = &mapping->i_private_list; - struct kvm_gmem *gmem; - pgoff_t start, end; - - filemap_invalidate_lock_shared(mapping); - - start = folio->index; - end = start + folio_nr_pages(folio); - - list_for_each_entry(gmem, gmem_list, entry) - kvm_gmem_invalidate_begin(gmem, start, end); + struct kvm_gmem *gmem = container_of(entry, struct kvm_gmem, entry); - /* - * Do not truncate the range, what action is taken in response to the - * error is userspace's decision (assuming the architecture supports - * gracefully handling memory errors). If/when the guest attempts to - * access a poisoned page, kvm_gmem_get_pfn() will return -EHWPOISON, - * at which point KVM can either terminate the VM or propagate the - * error to userspace. - */ - - list_for_each_entry(gmem, gmem_list, entry) - kvm_gmem_invalidate_end(gmem, start, end); - - filemap_invalidate_unlock_shared(mapping); - - return MF_DELAYED; + kvm_gmem_invalidate_end(gmem, start, end); } -static void kvm_gmem_free_folio(struct address_space *mapping, - struct folio *folio) +static bool kvm_gmem_release_folio(struct address_space *mapping, + struct folio *folio) { struct page *page = folio_page(folio, 0); kvm_pfn_t pfn = page_to_pfn(page); @@ -525,19 +286,19 @@ static void kvm_gmem_free_folio(struct address_space *mapping, * happened in set_direct_map_invalid_noflush() in kvm_gmem_mark_prepared(). * Thus set_direct_map_valid_noflush() here only updates prot bits. */ - if (kvm_gmem_test_no_direct_map(mapping->host)) + if (guestmem_test_no_direct_map(mapping->host)) set_direct_map_valid_noflush(page, folio_nr_pages(folio), true); kvm_arch_gmem_invalidate(pfn, pfn + (1ul << order)); + + return true; } -static const struct address_space_operations kvm_gmem_aops = { - .dirty_folio = noop_dirty_folio, - .write_begin = kvm_kmem_gmem_write_begin, - .write_end = kvm_kmem_gmem_write_end, - .migrate_folio = kvm_gmem_migrate_folio, - .error_remove_folio = kvm_gmem_error_folio, - .free_folio = kvm_gmem_free_folio, +static const struct guestmem_ops kvm_guestmem_ops = { + .invalidate_begin = kvm_guestmem_invalidate_begin, + .invalidate_end = kvm_guestmem_invalidate_end, + .release_folio = kvm_gmem_release_folio, + .supports_mmap = kvm_gmem_supports_mmap, }; static int kvm_gmem_setattr(struct mnt_idmap *idmap, struct dentry *dentry, @@ -587,13 +348,12 @@ static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags) inode->i_private = (void *)(unsigned long)flags; inode->i_op = &kvm_gmem_iops; - inode->i_mapping->a_ops = &kvm_gmem_aops; inode->i_mode |= S_IFREG; inode->i_size = size; - mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER); - mapping_set_inaccessible(inode->i_mapping); - /* Unmovable mappings are supposed to be marked unevictable as well. */ - WARN_ON_ONCE(!mapping_unevictable(inode->i_mapping)); + err = guestmem_attach_mapping(inode->i_mapping, &kvm_guestmem_ops, + &gmem->entry); + if (err) + goto err_putfile; if (flags & GUEST_MEMFD_FLAG_NO_DIRECT_MAP) mapping_set_no_direct_map(inode->i_mapping); @@ -601,11 +361,12 @@ static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags) kvm_get_kvm(kvm); gmem->kvm = kvm; xa_init(&gmem->bindings); - list_add(&gmem->entry, &inode->i_mapping->i_private_list); fd_install(fd, file); return fd; +err_putfile: + fput(file); err_gmem: kfree(gmem); err_fd: @@ -869,7 +630,7 @@ long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long p = src ? src + i * PAGE_SIZE : NULL; ret = post_populate(kvm, gfn, pfn, p, max_order, opaque); if (!ret) - ret = kvm_gmem_mark_prepared(folio); + guestmem_mark_prepared(folio); put_folio_and_exit: folio_put(folio); -- 2.50.1