From: Nikita Kalyazin write syscall populates guest_memfd with user-supplied data in a generic way, ie no vendor-specific preparation is performed. This is supposed to be used in non-CoCo setups where guest memory is not hardware-encrypted. The following behaviour is implemented: - only page-aligned count and offset are allowed - if the memory is already allocated, the call will successfully populate it - if the memory is not allocated, the call will both allocate and populate - if the memory is already populated, the call will not repopulate it Signed-off-by: Nikita Kalyazin --- virt/kvm/guest_memfd.c | 64 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 63 insertions(+), 1 deletion(-) diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c index 08a6bc7d25b6..a2e86ec13e4b 100644 --- a/virt/kvm/guest_memfd.c +++ b/virt/kvm/guest_memfd.c @@ -379,7 +379,9 @@ static int kvm_gmem_mmap(struct file *file, struct vm_area_struct *vma) } static struct file_operations kvm_gmem_fops = { - .mmap = kvm_gmem_mmap, + .mmap = kvm_gmem_mmap, + .llseek = default_llseek, + .write_iter = generic_perform_write, .open = generic_file_open, .release = kvm_gmem_release, .fallocate = kvm_gmem_fallocate, @@ -390,6 +392,63 @@ void kvm_gmem_init(struct module *module) kvm_gmem_fops.owner = module; } +static int kvm_kmem_gmem_write_begin(const struct kiocb *kiocb, + struct address_space *mapping, + loff_t pos, unsigned int len, + struct folio **foliop, + void **fsdata) +{ + struct file *file = kiocb->ki_filp; + pgoff_t index = pos >> PAGE_SHIFT; + struct folio *folio; + + if (!PAGE_ALIGNED(pos) || len != PAGE_SIZE) + return -EINVAL; + + if (pos + len > i_size_read(file_inode(file))) + return -EINVAL; + + folio = kvm_gmem_get_folio(file_inode(file), index); + if (IS_ERR(folio)) + return -EFAULT; + + if (WARN_ON_ONCE(folio_test_large(folio))) { + folio_unlock(folio); + folio_put(folio); + return -EFAULT; + } + + if (folio_test_uptodate(folio)) { + folio_unlock(folio); + folio_put(folio); + return -ENOSPC; + } + + *foliop = folio; + return 0; +} + +static int kvm_kmem_gmem_write_end(const struct kiocb *kiocb, + struct address_space *mapping, + loff_t pos, unsigned int len, + unsigned int copied, + struct folio *folio, void *fsdata) +{ + if (copied) { + if (copied < len) { + unsigned int from = pos & (PAGE_SIZE - 1); + + folio_zero_range(folio, from + copied, len - copied); + } + kvm_gmem_mark_prepared(folio); + } + + folio_unlock(folio); + folio_put(folio); + + return copied; +} + static int kvm_gmem_migrate_folio(struct address_space *mapping, struct folio *dst, struct folio *src, enum migrate_mode mode) @@ -442,6 +501,8 @@ static void kvm_gmem_free_folio(struct folio *folio) static const struct address_space_operations kvm_gmem_aops = { .dirty_folio = noop_dirty_folio, + .write_begin = kvm_kmem_gmem_write_begin, + .write_end = kvm_kmem_gmem_write_end, .migrate_folio = kvm_gmem_migrate_folio, .error_remove_folio = kvm_gmem_error_folio, #ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE @@ -489,6 +550,7 @@ static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags) } file->f_flags |= O_LARGEFILE; + file->f_mode |= FMODE_LSEEK | FMODE_PWRITE; inode = file->f_inode; WARN_ON(file->f_mapping != inode->i_mapping); -- 2.50.1 From: Nikita Kalyazin This is to reflect that the write syscall is now implemented for guest_memfd. Signed-off-by: Nikita Kalyazin --- .../testing/selftests/kvm/guest_memfd_test.c | 86 +++++++++++++++++-- 1 file changed, 80 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/kvm/guest_memfd_test.c b/tools/testing/selftests/kvm/guest_memfd_test.c index b3ca6737f304..1236e31f5041 100644 --- a/tools/testing/selftests/kvm/guest_memfd_test.c +++ b/tools/testing/selftests/kvm/guest_memfd_test.c @@ -24,18 +24,91 @@ #include "test_util.h" #include "ucall_common.h" -static void test_file_read_write(int fd) +static void test_file_read(int fd) { char buf[64]; TEST_ASSERT(read(fd, buf, sizeof(buf)) < 0, "read on a guest_mem fd should fail"); - TEST_ASSERT(write(fd, buf, sizeof(buf)) < 0, - "write on a guest_mem fd should fail"); TEST_ASSERT(pread(fd, buf, sizeof(buf), 0) < 0, "pread on a guest_mem fd should fail"); - TEST_ASSERT(pwrite(fd, buf, sizeof(buf), 0) < 0, - "pwrite on a guest_mem fd should fail"); +} + +static void test_file_write(int fd, size_t total_size) +{ + size_t page_size = getpagesize(); + void *buf = NULL; + int ret; + + ret = posix_memalign(&buf, page_size, total_size); + TEST_ASSERT_EQ(ret, 0); + + /* Check arguments correctness checks work as expected */ + + ret = pwrite(fd, buf, page_size - 1, 0); + TEST_ASSERT(ret == -1, "write unaligned count on a guest_mem fd should fail"); + TEST_ASSERT_EQ(errno, EINVAL); + + ret = pwrite(fd, buf, page_size, 1); + TEST_ASSERT(ret == -1, "write unaligned offset on a guest_mem fd should fail"); + TEST_ASSERT_EQ(errno, EINVAL); + + ret = pwrite(fd, buf, page_size, total_size); + TEST_ASSERT(ret == -1, "writing past the file size on a guest_mem fd should fail"); + TEST_ASSERT_EQ(errno, EINVAL); + + ret = pwrite(fd, NULL, page_size, 0); + TEST_ASSERT(ret == -1, "supplying a NULL buffer when writing a guest_mem fd should fail"); + TEST_ASSERT_EQ(errno, EFAULT); + + /* Check double population is not allowed */ + + ret = pwrite(fd, buf, page_size, 0); + TEST_ASSERT(ret == page_size, "page-aligned write on a guest_mem fd should succeed"); + + ret = pwrite(fd, buf, page_size, 0); + TEST_ASSERT(ret == -1, "write on already populated guest_mem fd should fail"); + TEST_ASSERT_EQ(errno, ENOSPC); + + ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0, page_size); + TEST_ASSERT(!ret, "fallocate(PUNCH_HOLE) should succeed"); + + /* Check population is allowed again after punching a hole */ + + ret = pwrite(fd, buf, page_size, 0); + TEST_ASSERT(ret == page_size, + "page-aligned write on a punched guest_mem fd should succeed"); + + ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0, page_size); + TEST_ASSERT(!ret, "fallocate(PUNCH_HOLE) should succeed"); + + /* Check population of already allocated memory is allowed */ + + ret = fallocate(fd, FALLOC_FL_KEEP_SIZE, 0, page_size); + TEST_ASSERT(!ret, "fallocate with aligned offset and size should succeed"); + + ret = pwrite(fd, buf, page_size, 0); + TEST_ASSERT(ret == page_size, "write on a preallocated guest_mem fd should succeed"); + + ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0, page_size); + TEST_ASSERT(!ret, "fallocate(PUNCH_HOLE) should succeed"); + + /* Check population works until an already populated page is encountered */ + + ret = pwrite(fd, buf, total_size, 0); + TEST_ASSERT(ret == total_size, "page-aligned write on a guest_mem fd should succeed"); + + ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0, page_size); + TEST_ASSERT(!ret, "fallocate(PUNCH_HOLE) should succeed"); + + ret = pwrite(fd, buf, total_size, 0); + TEST_ASSERT(ret == page_size, "write on a guest_mem fd should not overwrite data"); + + ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0, total_size); + TEST_ASSERT(!ret, "fallocate(PUNCH_HOLE) should succeed"); + + + free(buf); } static void test_mmap_supported(int fd, size_t page_size, size_t total_size) @@ -281,7 +354,8 @@ static void test_guest_memfd(unsigned long vm_type) fd = vm_create_guest_memfd(vm, total_size, flags); - test_file_read_write(fd); + test_file_read(fd); + test_file_write(fd, total_size); if (flags & GUEST_MEMFD_FLAG_MMAP) { test_mmap_supported(fd, page_size, total_size); -- 2.50.1