From: Nikita Kalyazin UserfaultFD support in guestmem enables use cases like restoring a guest_memfd-backed VM from a memory snapshot in Firecracker [1] where an external process is responsible for supplying the content of the guest memory or live migration of guest_memfd-backed VMs. [1] https://github.com/firecracker-microvm/firecracker/blob/main/docs/snapshotting/handling-page-faults-on-snapshot-resume.md Signed-off-by: Nikita Kalyazin --- Documentation/admin-guide/mm/userfaultfd.rst | 4 +++- fs/userfaultfd.c | 3 ++- include/linux/userfaultfd_k.h | 8 +++++--- include/uapi/linux/userfaultfd.h | 8 +++++++- mm/userfaultfd.c | 14 +++++++++++--- 5 files changed, 28 insertions(+), 9 deletions(-) diff --git a/Documentation/admin-guide/mm/userfaultfd.rst b/Documentation/admin-guide/mm/userfaultfd.rst index e5cc8848dcb3..ca8c5954ffdb 100644 --- a/Documentation/admin-guide/mm/userfaultfd.rst +++ b/Documentation/admin-guide/mm/userfaultfd.rst @@ -111,7 +111,9 @@ events, except page fault notifications, may be generated: - ``UFFD_FEATURE_MINOR_HUGETLBFS`` indicates that the kernel supports ``UFFDIO_REGISTER_MODE_MINOR`` registration for hugetlbfs virtual memory areas. ``UFFD_FEATURE_MINOR_SHMEM`` is the analogous feature indicating - support for shmem virtual memory areas. + support for shmem virtual memory areas. ``UFFD_FEATURE_MINOR_GUESTMEM`` + is the analogous feature indicating support for guestmem-backed memory + areas. - ``UFFD_FEATURE_MOVE`` indicates that the kernel supports moving an existing page contents from userspace. diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 54c6cc7fe9c6..e4e80f1072a6 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -1978,7 +1978,8 @@ static int userfaultfd_api(struct userfaultfd_ctx *ctx, uffdio_api.features = UFFD_API_FEATURES; #ifndef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR uffdio_api.features &= - ~(UFFD_FEATURE_MINOR_HUGETLBFS | UFFD_FEATURE_MINOR_SHMEM); + ~(UFFD_FEATURE_MINOR_HUGETLBFS | UFFD_FEATURE_MINOR_SHMEM | + UFFD_FEATURE_MINOR_GUESTMEM); #endif #ifndef CONFIG_HAVE_ARCH_USERFAULTFD_WP uffdio_api.features &= ~UFFD_FEATURE_PAGEFAULT_FLAG_WP; diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h index c0e716aec26a..37bd4e71b611 100644 --- a/include/linux/userfaultfd_k.h +++ b/include/linux/userfaultfd_k.h @@ -14,6 +14,7 @@ #include /* linux/include/uapi/linux/userfaultfd.h */ #include +#include #include #include #include @@ -218,7 +219,8 @@ static inline bool vma_can_userfault(struct vm_area_struct *vma, return false; if ((vm_flags & VM_UFFD_MINOR) && - (!is_vm_hugetlb_page(vma) && !vma_is_shmem(vma))) + (!is_vm_hugetlb_page(vma) && !vma_is_shmem(vma) && + !guestmem_vma_is_guestmem(vma))) return false; /* @@ -238,9 +240,9 @@ static inline bool vma_can_userfault(struct vm_area_struct *vma, return false; #endif - /* By default, allow any of anon|shmem|hugetlb */ + /* By default, allow any of anon|shmem|hugetlb|guestmem */ return vma_is_anonymous(vma) || is_vm_hugetlb_page(vma) || - vma_is_shmem(vma); + vma_is_shmem(vma) || guestmem_vma_is_guestmem(vma); } static inline bool vma_has_uffd_without_event_remap(struct vm_area_struct *vma) diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h index 2841e4ea8f2c..0fe9fbd29772 100644 --- a/include/uapi/linux/userfaultfd.h +++ b/include/uapi/linux/userfaultfd.h @@ -42,7 +42,8 @@ UFFD_FEATURE_WP_UNPOPULATED | \ UFFD_FEATURE_POISON | \ UFFD_FEATURE_WP_ASYNC | \ - UFFD_FEATURE_MOVE) + UFFD_FEATURE_MOVE | \ + UFFD_FEATURE_MINOR_GUESTMEM) #define UFFD_API_IOCTLS \ ((__u64)1 << _UFFDIO_REGISTER | \ (__u64)1 << _UFFDIO_UNREGISTER | \ @@ -230,6 +231,10 @@ struct uffdio_api { * * UFFD_FEATURE_MOVE indicates that the kernel supports moving an * existing page contents from userspace. + * + * UFFD_FEATURE_MINOR_GUESTMEM indicates the same support as + * UFFD_FEATURE_MINOR_HUGETLBFS, but for guestmem-backed pages + * instead. */ #define UFFD_FEATURE_PAGEFAULT_FLAG_WP (1<<0) #define UFFD_FEATURE_EVENT_FORK (1<<1) @@ -248,6 +253,7 @@ struct uffdio_api { #define UFFD_FEATURE_POISON (1<<14) #define UFFD_FEATURE_WP_ASYNC (1<<15) #define UFFD_FEATURE_MOVE (1<<16) +#define UFFD_FEATURE_MINOR_GUESTMEM (1<<17) __u64 features; __u64 ioctls; diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index 45e6290e2e8b..304e5d7dbb70 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -388,7 +388,14 @@ static int mfill_atomic_pte_continue(pmd_t *dst_pmd, struct page *page; int ret; - ret = shmem_get_folio(inode, pgoff, 0, &folio, SGP_NOALLOC); + if (guestmem_vma_is_guestmem(dst_vma)) { + ret = 0; + folio = guestmem_grab_folio(inode->i_mapping, pgoff); + if (IS_ERR(folio)) + ret = PTR_ERR(folio); + } else { + ret = shmem_get_folio(inode, pgoff, 0, &folio, SGP_NOALLOC); + } /* Our caller expects us to return -EFAULT if we failed to find folio */ if (ret == -ENOENT) ret = -EFAULT; @@ -766,9 +773,10 @@ static __always_inline ssize_t mfill_atomic(struct userfaultfd_ctx *ctx, return mfill_atomic_hugetlb(ctx, dst_vma, dst_start, src_start, len, flags); - if (!vma_is_anonymous(dst_vma) && !vma_is_shmem(dst_vma)) + if (!vma_is_anonymous(dst_vma) && !vma_is_shmem(dst_vma) + && !guestmem_vma_is_guestmem(dst_vma)) goto out_unlock; - if (!vma_is_shmem(dst_vma) && + if (!vma_is_shmem(dst_vma) && !guestmem_vma_is_guestmem(dst_vma) && uffd_flags_mode_is(flags, MFILL_ATOMIC_CONTINUE)) goto out_unlock; -- 2.50.1