Extend kvm_syscalls.h to wrap madvise() to assert success. This will be used in the next patch. Signed-off-by: Ackerley Tng --- tools/testing/selftests/kvm/include/kvm_syscalls.h | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/kvm/include/kvm_syscalls.h b/tools/testing/selftests/kvm/include/kvm_syscalls.h index d4e613162bba9..843c9904c46f6 100644 --- a/tools/testing/selftests/kvm/include/kvm_syscalls.h +++ b/tools/testing/selftests/kvm/include/kvm_syscalls.h @@ -77,5 +77,6 @@ __KVM_SYSCALL_DEFINE(munmap, 2, void *, mem, size_t, size); __KVM_SYSCALL_DEFINE(close, 1, int, fd); __KVM_SYSCALL_DEFINE(fallocate, 4, int, fd, int, mode, loff_t, offset, loff_t, len); __KVM_SYSCALL_DEFINE(ftruncate, 2, unsigned int, fd, off_t, length); +__KVM_SYSCALL_DEFINE(madvise, 3, void *, addr, size_t, length, int, advice); #endif /* SELFTEST_KVM_SYSCALLS_H */ -- 2.53.0.345.g96ddfc5eaa-goog guest_memfd only supports PAGE_SIZE pages, and khugepaged or MADV_COLLAPSE collapsing pages may result in private memory regions being mapped into host page tables. Add test to verify that MADV_COLLAPSE fails on guest_memfd folios, and any subsequent usage of guest_memfd memory faults in PAGE_SIZE folios. Running this test should not result in any memory failure logs or kernel WARNings. This selftest was added as a result of a syzbot-reported issue where khugepaged operating on guest_memfd memory with MADV_HUGEPAGE caused the collapse of folios, which then subsequently resulted in a WARNing. Link: https://syzkaller.appspot.com/bug?extid=33a04338019ac7e43a44 Suggested-by: David Hildenbrand Signed-off-by: Ackerley Tng --- .../testing/selftests/kvm/guest_memfd_test.c | 70 ++++++++++++++++++- 1 file changed, 67 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/kvm/guest_memfd_test.c b/tools/testing/selftests/kvm/guest_memfd_test.c index 618c937f3c90f..0edbc7cf6c1ad 100644 --- a/tools/testing/selftests/kvm/guest_memfd_test.c +++ b/tools/testing/selftests/kvm/guest_memfd_test.c @@ -171,6 +171,64 @@ static void test_numa_allocation(int fd, size_t total_size) kvm_munmap(mem, total_size); } +static void test_collapse(int fd, uint64_t flags) +{ + const size_t pmd_size = get_trans_hugepagesz(); + void *reserved_addr; + void *aligned_addr; + char *mem; + off_t i; + + /* + * To even reach the point where the guest_memfd folios will + * get collapsed, both the userspace address and the offset + * within the guest_memfd have to be aligned to pmd_size. + * + * To achieve that alignment, reserve virtual address space + * with regular mmap, then use MAP_FIXED to allocate memory + * from a pmd_size-aligned offset (0) at a known, available + * virtual address. + */ + reserved_addr = kvm_mmap(pmd_size * 2, PROT_NONE, + MAP_PRIVATE | MAP_ANONYMOUS, -1); + aligned_addr = align_ptr_up(reserved_addr, pmd_size); + + mem = mmap(aligned_addr, pmd_size, PROT_READ | PROT_WRITE, + MAP_FIXED | MAP_SHARED, fd, 0); + TEST_ASSERT(IS_ALIGNED((u64)mem, pmd_size), + "Userspace address must be aligned to PMD size."); + + /* + * Use reads to populate page table to avoid setting dirty + * flag on page. + */ + for (i = 0; i < pmd_size; i += getpagesize()) + READ_ONCE(mem[i]); + + /* + * Advising the use of huge pages in guest_memfd should be + * fine... + */ + kvm_madvise(mem, pmd_size, MADV_HUGEPAGE); + + /* + * ... but collapsing folios must not be supported to avoid + * mapping beyond shared ranges into host userspace page + * tables. + */ + TEST_ASSERT_EQ(madvise(mem, pmd_size, MADV_COLLAPSE), -1); + TEST_ASSERT_EQ(errno, EINVAL); + + /* + * Removing from host page tables and re-faulting should be + * fine; should not end up faulting in a collapsed/huge folio. + */ + kvm_madvise(mem, pmd_size, MADV_DONTNEED); + READ_ONCE(mem[0]); + + kvm_munmap(reserved_addr, pmd_size * 2); +} + static void test_fault_sigbus(int fd, size_t accessible_size, size_t map_size) { const char val = 0xaa; @@ -350,14 +408,17 @@ static void test_guest_memfd_flags(struct kvm_vm *vm) } } -#define gmem_test(__test, __vm, __flags) \ +#define __gmem_test(__test, __vm, __flags, __gmem_size) \ do { \ - int fd = vm_create_guest_memfd(__vm, page_size * 4, __flags); \ + int fd = vm_create_guest_memfd(__vm, __gmem_size, __flags); \ \ - test_##__test(fd, page_size * 4); \ + test_##__test(fd, __gmem_size); \ close(fd); \ } while (0) +#define gmem_test(__test, __vm, __flags) \ + __gmem_test(__test, __vm, __flags, page_size * 4) + static void __test_guest_memfd(struct kvm_vm *vm, uint64_t flags) { test_create_guest_memfd_multiple(vm); @@ -367,9 +428,12 @@ static void __test_guest_memfd(struct kvm_vm *vm, uint64_t flags) if (flags & GUEST_MEMFD_FLAG_MMAP) { if (flags & GUEST_MEMFD_FLAG_INIT_SHARED) { + size_t pmd_size = get_trans_hugepagesz(); + gmem_test(mmap_supported, vm, flags); gmem_test(fault_overflow, vm, flags); gmem_test(numa_allocation, vm, flags); + __gmem_test(collapse, vm, flags, pmd_size); } else { gmem_test(fault_private, vm, flags); } -- 2.53.0.345.g96ddfc5eaa-goog