Add a gmem "pre-zap" hook to allow arch code to take action before a shared<=>private conversion, and just as importantly, to let arch code reject/fail a conversion, e.g. if the conversion requires new page tables and KVM hits in OOM situation. The new hook will be used by TDX to split hugepages as necessary to avoid overzapping PTEs, which for all intents and purposes corrupts guest data for TDX VMs (memory is wiped when private PTEs are removed). TODO: Wire this up the convert path, not the PUNCH_HOLE path, once in-place conversion is supported. Signed-off-by: Sean Christopherson --- arch/x86/kvm/Kconfig | 1 + arch/x86/kvm/mmu/tdp_mmu.c | 8 ++++++ include/linux/kvm_host.h | 5 ++++ virt/kvm/Kconfig | 4 +++ virt/kvm/guest_memfd.c | 50 ++++++++++++++++++++++++++++++++++++-- 5 files changed, 66 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index d916bd766c94..5f8d8daf4289 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -138,6 +138,7 @@ config KVM_INTEL_TDX depends on INTEL_TDX_HOST select KVM_GENERIC_MEMORY_ATTRIBUTES select HAVE_KVM_ARCH_GMEM_POPULATE + select HAVE_KVM_ARCH_GMEM_CONVERT help Provides support for launching Intel Trust Domain Extensions (TDX) confidential VMs on Intel processors. diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 0cdc6782e508..c46ebdacdb50 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -1630,6 +1630,14 @@ int kvm_tdp_mmu_split_huge_pages(struct kvm_vcpu *vcpu, gfn_t start, gfn_t end, } EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_tdp_mmu_split_huge_pages); +#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_CONVERT +int kvm_arch_gmem_convert(struct kvm *kvm, gfn_t start, gfn_t end, + bool to_private) +{ + return 0; +} +#endif /* CONFIG_HAVE_KVM_ARCH_GMEM_CONVERT */ + static bool tdp_mmu_need_write_protect(struct kvm *kvm, struct kvm_mmu_page *sp) { /* diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 782f4d670793..c0bafff274b6 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -2588,6 +2588,11 @@ long kvm_gmem_populate(struct kvm *kvm, gfn_t gfn, void __user *src, long npages kvm_gmem_populate_cb post_populate, void *opaque); #endif +#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_CONVERT +int kvm_arch_gmem_convert(struct kvm *kvm, gfn_t start, gfn_t end, + bool to_private); +#endif + #ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE void kvm_arch_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end); #endif diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index 267c7369c765..05d69eaa50ae 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -125,3 +125,7 @@ config HAVE_KVM_ARCH_GMEM_INVALIDATE config HAVE_KVM_ARCH_GMEM_POPULATE bool depends on KVM_GUEST_MEMFD + +config HAVE_KVM_ARCH_GMEM_CONVERT + bool + depends on KVM_GUEST_MEMFD \ No newline at end of file diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c index 51dbb309188f..b01f333a5e95 100644 --- a/virt/kvm/guest_memfd.c +++ b/virt/kvm/guest_memfd.c @@ -164,6 +164,46 @@ static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index) return folio; } +#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_CONVERT +static int __kvm_gmem_convert(struct gmem_file *f, pgoff_t start, pgoff_t end, + bool to_private) +{ + struct kvm_memory_slot *slot; + unsigned long index; + int r; + + xa_for_each_range(&f->bindings, index, slot, start, end - 1) { + r = kvm_arch_gmem_convert(f->kvm, + kvm_gmem_get_start_gfn(slot, start), + kvm_gmem_get_end_gfn(slot, end), + to_private); + if (r) + return r; + } + return 0; +} + +static int kvm_gmem_convert(struct inode *inode, pgoff_t start, pgoff_t end, + bool to_private) +{ + struct gmem_file *f; + int r; + + kvm_gmem_for_each_file(f, inode->i_mapping) { + r = __kvm_gmem_convert(f, start, end, to_private); + if (r) + return r; + } + return 0; +} +#else +static int kvm_gmem_convert(struct inode *inode, pgoff_t start, pgoff_t end, + bool to_private) +{ + return 0; +} +#endif + static enum kvm_gfn_range_filter kvm_gmem_get_invalidate_filter(struct inode *inode) { if (GMEM_I(inode)->flags & GUEST_MEMFD_FLAG_INIT_SHARED) @@ -244,6 +284,7 @@ static long kvm_gmem_punch_hole(struct inode *inode, loff_t offset, loff_t len) { pgoff_t start = offset >> PAGE_SHIFT; pgoff_t end = (offset + len) >> PAGE_SHIFT; + int r; /* * Bindings must be stable across invalidation to ensure the start+end @@ -253,13 +294,18 @@ static long kvm_gmem_punch_hole(struct inode *inode, loff_t offset, loff_t len) kvm_gmem_invalidate_begin(inode, start, end); - truncate_inode_pages_range(inode->i_mapping, offset, offset + len - 1); + /* + * For demonstration purposes, pretend this is a private=>shared conversion. + */ + r = kvm_gmem_convert(inode, start, end, false); + if (!r) + truncate_inode_pages_range(inode->i_mapping, offset, offset + len - 1); kvm_gmem_invalidate_end(inode, start, end); filemap_invalidate_unlock(inode->i_mapping); - return 0; + return r; } static long kvm_gmem_allocate(struct inode *inode, loff_t offset, loff_t len) -- 2.53.0.rc1.217.geba53bf80e-goog