Move mmu_external_spt_cache behind x86 ops. In the mirror/external MMU concept, the KVM MMU manages a non-active EPT tree for private memory (the mirror). The actual active EPT tree the private memory is protected inside the TDX module. Whenever the mirror EPT is changed, it needs to call out into one of a set of x86 opts that implement various update operation with TDX specific SEAMCALLs and other tricks. These implementations operate on the TDX S-EPT (the external). In reality these external operations are designed narrowly with respect to TDX particulars. On the surface, what TDX specific things are happening to fulfill these update operations are mostly hidden from the MMU, but there is one particular area of interest where some details leak through. The S-EPT needs pages to use for the S-EPT page tables. These page tables need to be allocated before taking the mmu lock, like all the rest. So the KVM MMU pre-allocates pages for TDX to use for the S-EPT in the same place where it pre-allocates the other page tables. It’s not too bad and fits nicely with the others. However, Dynamic PAMT will need even more pages for the same operations. Further, these pages will need to be handed to the arch/x86 side which used them for DPAMT updates, which is hard for the existing KVM based cache. The details living in core MMU code start to add up. So in preparation to make it more complicated, move the external page table cache into TDX code by putting it behind some x86 ops. Have one for topping up and one for allocation. Don’t go so far to try to hide the existence of external page tables completely from the generic MMU, as they are currently stored in their mirror struct kvm_mmu_page and it’s quite handy. To plumb the memory cache operations through tdx.c, export some of the functions temporarily. This will be removed in future changes. Acked-by: Kiryl Shutsemau Signed-off-by: Rick Edgecombe --- v4: - Add Kiryl ack - Log typo (Binbin) - Add pages arg to topup_external_fault_cache() (Yan) - After more consideration, create free_external_fault_cache() as suggest by (Yan) v3: - New patch --- arch/x86/include/asm/kvm-x86-ops.h | 3 +++ arch/x86/include/asm/kvm_host.h | 14 +++++++++----- arch/x86/kvm/mmu/mmu.c | 6 +++--- arch/x86/kvm/mmu/mmu_internal.h | 2 +- arch/x86/kvm/vmx/tdx.c | 24 ++++++++++++++++++++++++ arch/x86/kvm/vmx/tdx.h | 2 ++ virt/kvm/kvm_main.c | 3 +++ 7 files changed, 45 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h index de709fb5bd76..58c5c9b082ca 100644 --- a/arch/x86/include/asm/kvm-x86-ops.h +++ b/arch/x86/include/asm/kvm-x86-ops.h @@ -98,6 +98,9 @@ KVM_X86_OP_OPTIONAL(link_external_spt) KVM_X86_OP_OPTIONAL(set_external_spte) KVM_X86_OP_OPTIONAL(free_external_spt) KVM_X86_OP_OPTIONAL(remove_external_spte) +KVM_X86_OP_OPTIONAL(alloc_external_fault_cache) +KVM_X86_OP_OPTIONAL(topup_external_fault_cache) +KVM_X86_OP_OPTIONAL(free_external_fault_cache) KVM_X86_OP(has_wbinvd_exit) KVM_X86_OP(get_l2_tsc_offset) KVM_X86_OP(get_l2_tsc_multiplier) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 8d8cc059fed6..dde94b84610c 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -855,11 +855,6 @@ struct kvm_vcpu_arch { struct kvm_mmu_memory_cache mmu_shadow_page_cache; struct kvm_mmu_memory_cache mmu_shadowed_info_cache; struct kvm_mmu_memory_cache mmu_page_header_cache; - /* - * This cache is to allocate external page table. E.g. private EPT used - * by the TDX module. - */ - struct kvm_mmu_memory_cache mmu_external_spt_cache; /* * QEMU userspace and the guest each have their own FPU state. @@ -1856,6 +1851,15 @@ struct kvm_x86_ops { void (*remove_external_spte)(struct kvm *kvm, gfn_t gfn, enum pg_level level, u64 mirror_spte); + /* Allocation a pages from the external page cache. */ + void *(*alloc_external_fault_cache)(struct kvm_vcpu *vcpu); + + /* Top up extra pages needed for faulting in external page tables. */ + int (*topup_external_fault_cache)(struct kvm_vcpu *vcpu, unsigned int cnt); + + /* Free in external page fault cache. */ + void (*free_external_fault_cache)(struct kvm_vcpu *vcpu); + bool (*has_wbinvd_exit)(void); u64 (*get_l2_tsc_offset)(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 3cfabfbdd843..3b1b91fd37dd 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -601,8 +601,7 @@ static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu, bool maybe_indirect) if (r) return r; if (kvm_has_mirrored_tdp(vcpu->kvm)) { - r = kvm_mmu_topup_memory_cache(&vcpu->arch.mmu_external_spt_cache, - PT64_ROOT_MAX_LEVEL); + r = kvm_x86_call(topup_external_fault_cache)(vcpu, PT64_ROOT_MAX_LEVEL); if (r) return r; } @@ -625,8 +624,9 @@ static void mmu_free_memory_caches(struct kvm_vcpu *vcpu) kvm_mmu_free_memory_cache(&vcpu->arch.mmu_pte_list_desc_cache); kvm_mmu_free_memory_cache(&vcpu->arch.mmu_shadow_page_cache); kvm_mmu_free_memory_cache(&vcpu->arch.mmu_shadowed_info_cache); - kvm_mmu_free_memory_cache(&vcpu->arch.mmu_external_spt_cache); kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_header_cache); + if (kvm_has_mirrored_tdp(vcpu->kvm)) + kvm_x86_call(free_external_fault_cache)(vcpu); } static void mmu_free_pte_list_desc(struct pte_list_desc *pte_list_desc) diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h index 73cdcbccc89e..12234ee468ce 100644 --- a/arch/x86/kvm/mmu/mmu_internal.h +++ b/arch/x86/kvm/mmu/mmu_internal.h @@ -165,7 +165,7 @@ static inline void kvm_mmu_alloc_external_spt(struct kvm_vcpu *vcpu, struct kvm_ * Therefore, KVM does not need to initialize or access external_spt. * KVM only interacts with sp->spt for private EPT operations. */ - sp->external_spt = kvm_mmu_memory_cache_alloc(&vcpu->arch.mmu_external_spt_cache); + sp->external_spt = kvm_x86_call(alloc_external_fault_cache)(vcpu); } static inline gfn_t kvm_gfn_root_bits(const struct kvm *kvm, const struct kvm_mmu_page *root) diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c index b6d7f4b5f40f..260bb0e6eb44 100644 --- a/arch/x86/kvm/vmx/tdx.c +++ b/arch/x86/kvm/vmx/tdx.c @@ -1642,6 +1642,27 @@ static int tdx_mem_page_add(struct kvm *kvm, gfn_t gfn, enum pg_level level, return 0; } +static void *tdx_alloc_external_fault_cache(struct kvm_vcpu *vcpu) +{ + struct vcpu_tdx *tdx = to_tdx(vcpu); + + return kvm_mmu_memory_cache_alloc(&tdx->mmu_external_spt_cache); +} + +static int tdx_topup_external_fault_cache(struct kvm_vcpu *vcpu, unsigned int cnt) +{ + struct vcpu_tdx *tdx = to_tdx(vcpu); + + return kvm_mmu_topup_memory_cache(&tdx->mmu_external_spt_cache, cnt); +} + +static void tdx_free_external_fault_cache(struct kvm_vcpu *vcpu) +{ + struct vcpu_tdx *tdx = to_tdx(vcpu); + + kvm_mmu_free_memory_cache(&tdx->mmu_external_spt_cache); +} + static int tdx_mem_page_aug(struct kvm *kvm, gfn_t gfn, enum pg_level level, kvm_pfn_t pfn) { @@ -3602,4 +3623,7 @@ void __init tdx_hardware_setup(void) vt_x86_ops.free_external_spt = tdx_sept_free_private_spt; vt_x86_ops.remove_external_spte = tdx_sept_remove_private_spte; vt_x86_ops.protected_apic_has_interrupt = tdx_protected_apic_has_interrupt; + vt_x86_ops.alloc_external_fault_cache = tdx_alloc_external_fault_cache; + vt_x86_ops.topup_external_fault_cache = tdx_topup_external_fault_cache; + vt_x86_ops.free_external_fault_cache = tdx_free_external_fault_cache; } diff --git a/arch/x86/kvm/vmx/tdx.h b/arch/x86/kvm/vmx/tdx.h index ce2720a028ad..1eefa1b0df5e 100644 --- a/arch/x86/kvm/vmx/tdx.h +++ b/arch/x86/kvm/vmx/tdx.h @@ -73,6 +73,8 @@ struct vcpu_tdx { u64 map_gpa_next; u64 map_gpa_end; + + struct kvm_mmu_memory_cache mmu_external_spt_cache; }; void tdh_vp_rd_failed(struct vcpu_tdx *tdx, char *uclass, u32 field, u64 err); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 9eca084bdcbe..cff24b950baa 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -404,6 +404,7 @@ int kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int min) { return __kvm_mmu_topup_memory_cache(mc, KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE, min); } +EXPORT_SYMBOL_GPL(kvm_mmu_topup_memory_cache); int kvm_mmu_memory_cache_nr_free_objects(struct kvm_mmu_memory_cache *mc) { @@ -424,6 +425,7 @@ void kvm_mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc) mc->objects = NULL; mc->capacity = 0; } +EXPORT_SYMBOL_GPL(kvm_mmu_free_memory_cache); void *kvm_mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc) { @@ -436,6 +438,7 @@ void *kvm_mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc) BUG_ON(!p); return p; } +EXPORT_SYMBOL_GPL(kvm_mmu_memory_cache_alloc); #endif static void kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) -- 2.51.2