From: Nikita Kalyazin write syscall populates guest_memfd with user-supplied data in a generic way, ie no vendor-specific preparation is performed. If the request is not page-aligned, the remaining bytes are initialised to 0. write is only supported for non-CoCo setups where guest memory is not hardware-encrypted. Signed-off-by: Nikita Kalyazin --- virt/kvm/guest_memfd.c | 48 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c index 94bafd6c558c..f4e218049afa 100644 --- a/virt/kvm/guest_memfd.c +++ b/virt/kvm/guest_memfd.c @@ -380,6 +380,8 @@ static int kvm_gmem_mmap(struct file *file, struct vm_area_struct *vma) static struct file_operations kvm_gmem_fops = { .mmap = kvm_gmem_mmap, + .llseek = default_llseek, + .write_iter = generic_perform_write, .open = generic_file_open, .release = kvm_gmem_release, .fallocate = kvm_gmem_fallocate, @@ -390,6 +392,49 @@ void kvm_gmem_init(struct module *module) kvm_gmem_fops.owner = module; } +static int kvm_kmem_gmem_write_begin(const struct kiocb *kiocb, + struct address_space *mapping, + loff_t pos, unsigned int len, + struct folio **foliop, + void **fsdata) +{ + struct file *file = kiocb->ki_filp; + struct inode *inode = file_inode(file); + pgoff_t index = pos >> PAGE_SHIFT; + struct folio *folio; + + if (!kvm_gmem_supports_mmap(inode)) + return -ENODEV; + + if (pos + len > i_size_read(inode)) + return -EINVAL; + + folio = kvm_gmem_get_folio(inode, index); + if (IS_ERR(folio)) + return -EFAULT; + + *foliop = folio; + return 0; +} + +static int kvm_kmem_gmem_write_end(const struct kiocb *kiocb, + struct address_space *mapping, + loff_t pos, unsigned int len, + unsigned int copied, + struct folio *folio, void *fsdata) +{ + if (copied && copied < len) { + unsigned int from = pos & ((1UL << folio_order(folio)) - 1); + + folio_zero_range(folio, from + copied, len - copied); + } + + folio_unlock(folio); + folio_put(folio); + + return copied; +} + static int kvm_gmem_migrate_folio(struct address_space *mapping, struct folio *dst, struct folio *src, enum migrate_mode mode) @@ -442,6 +487,8 @@ static void kvm_gmem_free_folio(struct folio *folio) static const struct address_space_operations kvm_gmem_aops = { .dirty_folio = noop_dirty_folio, + .write_begin = kvm_kmem_gmem_write_begin, + .write_end = kvm_kmem_gmem_write_end, .migrate_folio = kvm_gmem_migrate_folio, .error_remove_folio = kvm_gmem_error_folio, #ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE @@ -489,6 +536,7 @@ static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags) } file->f_flags |= O_LARGEFILE; + file->f_mode |= FMODE_LSEEK | FMODE_PWRITE; inode = file->f_inode; WARN_ON(file->f_mapping != inode->i_mapping); -- 2.50.1 From: Nikita Kalyazin This is to reflect that the write syscall is now implemented for guest_memfd. Signed-off-by: Nikita Kalyazin --- .../testing/selftests/kvm/guest_memfd_test.c | 51 ++++++++++++++++--- 1 file changed, 45 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/kvm/guest_memfd_test.c b/tools/testing/selftests/kvm/guest_memfd_test.c index b3ca6737f304..be1f78542d64 100644 --- a/tools/testing/selftests/kvm/guest_memfd_test.c +++ b/tools/testing/selftests/kvm/guest_memfd_test.c @@ -24,18 +24,55 @@ #include "test_util.h" #include "ucall_common.h" -static void test_file_read_write(int fd) +static void test_file_read(int fd) { char buf[64]; TEST_ASSERT(read(fd, buf, sizeof(buf)) < 0, "read on a guest_mem fd should fail"); - TEST_ASSERT(write(fd, buf, sizeof(buf)) < 0, - "write on a guest_mem fd should fail"); TEST_ASSERT(pread(fd, buf, sizeof(buf), 0) < 0, "pread on a guest_mem fd should fail"); - TEST_ASSERT(pwrite(fd, buf, sizeof(buf), 0) < 0, - "pwrite on a guest_mem fd should fail"); +} + +static void test_write_supported(int fd, size_t total_size) +{ + size_t page_size = getpagesize(); + void *buf = NULL; + int ret; + + ret = posix_memalign(&buf, page_size, total_size); + TEST_ASSERT_EQ(ret, 0); + + ret = pwrite(fd, buf, page_size, total_size); + TEST_ASSERT(ret == -1, "writing past the file size on a guest_mem fd should fail"); + TEST_ASSERT_EQ(errno, EINVAL); + + ret = pwrite(fd, buf, page_size, 0); + TEST_ASSERT(ret == page_size, "write on a guest_mem fd should succeed"); + + ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0, page_size); + TEST_ASSERT(!ret, "fallocate(PUNCH_HOLE) should succeed"); + + free(buf); +} + +static void test_write_not_supported(int fd, size_t total_size) +{ + size_t page_size = getpagesize(); + void *buf = NULL; + int ret; + + ret = posix_memalign(&buf, page_size, total_size); + TEST_ASSERT_EQ(ret, 0); + + ret = pwrite(fd, buf, page_size, 0); + TEST_ASSERT(ret == -1, "write on guest_mem fd should fail"); + TEST_ASSERT_EQ(errno, ENODEV); + + ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0, page_size); + TEST_ASSERT(!ret, "fallocate(PUNCH_HOLE) should succeed"); + + free(buf); } static void test_mmap_supported(int fd, size_t page_size, size_t total_size) @@ -281,12 +318,14 @@ static void test_guest_memfd(unsigned long vm_type) fd = vm_create_guest_memfd(vm, total_size, flags); - test_file_read_write(fd); + test_file_read(fd); if (flags & GUEST_MEMFD_FLAG_MMAP) { + test_write_supported(fd, total_size); test_mmap_supported(fd, page_size, total_size); test_fault_overflow(fd, page_size, total_size); } else { + test_write_not_supported(fd, total_size); test_mmap_not_supported(fd, page_size, total_size); } -- 2.50.1