Implement the split_external_spt hook to enable huge page splitting for TDX when kvm->mmu_lock is held for writing. Invoke tdh_mem_range_block(), tdh_mem_track(), kicking off vCPUs, tdh_mem_page_demote() in sequence. All operations are performed under kvm->mmu_lock held for writing, similar to those in page removal. Even with kvm->mmu_lock held for writing, tdh_mem_page_demote() may still contend with tdh_vp_enter() and potentially with the guest's S-EPT entry operations. Therefore, kick off other vCPUs and prevent tdh_vp_enter() from being called on them to ensure success on the second attempt. Use KVM_BUG_ON() for any other unexpected errors. Signed-off-by: Xiaoyao Li Signed-off-by: Isaku Yamahata Signed-off-by: Yan Zhao --- RFC v2: - Split out the code to handle the error TDX_INTERRUPTED_RESTARTABLE. - Rebased to 6.16.0-rc6 (the way of defining TDX hook changes). RFC v1: - Split patch for exclusive mmu_lock only, - Invoke tdx_sept_zap_private_spte() and tdx_track() for splitting. - Handled busy error of tdh_mem_page_demote() by kicking off vCPUs. --- arch/x86/kvm/vmx/tdx.c | 45 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c index 376287a2ddf4..8a60ba5b6595 100644 --- a/arch/x86/kvm/vmx/tdx.c +++ b/arch/x86/kvm/vmx/tdx.c @@ -1915,6 +1915,50 @@ static int tdx_sept_free_private_spt(struct kvm *kvm, gfn_t gfn, return 0; } +static int tdx_spte_demote_private_spte(struct kvm *kvm, gfn_t gfn, + enum pg_level level, struct page *page) +{ + int tdx_level = pg_level_to_tdx_sept_level(level); + struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm); + gpa_t gpa = gfn_to_gpa(gfn); + u64 err, entry, level_state; + + err = tdh_mem_page_demote(&kvm_tdx->td, gpa, tdx_level, page, + &entry, &level_state); + + if (unlikely(tdx_operand_busy(err))) { + tdx_no_vcpus_enter_start(kvm); + err = tdh_mem_page_demote(&kvm_tdx->td, gpa, tdx_level, page, + &entry, &level_state); + tdx_no_vcpus_enter_stop(kvm); + } + + if (KVM_BUG_ON(err, kvm)) { + pr_tdx_error_2(TDH_MEM_PAGE_DEMOTE, err, entry, level_state); + return -EIO; + } + return 0; +} + +static int tdx_sept_split_private_spt(struct kvm *kvm, gfn_t gfn, enum pg_level level, + void *private_spt) +{ + struct page *page = virt_to_page(private_spt); + int ret; + + if (KVM_BUG_ON(to_kvm_tdx(kvm)->state != TD_STATE_RUNNABLE || + level != PG_LEVEL_2M, kvm)) + return -EINVAL; + + ret = tdx_sept_zap_private_spte(kvm, gfn, level, page); + if (ret <= 0) + return ret; + + tdx_track(kvm); + + return tdx_spte_demote_private_spte(kvm, gfn, level, page); +} + static int tdx_sept_remove_private_spte(struct kvm *kvm, gfn_t gfn, enum pg_level level, kvm_pfn_t pfn) { @@ -3668,5 +3712,6 @@ void __init tdx_hardware_setup(void) vt_x86_ops.set_external_spte = tdx_sept_set_private_spte; vt_x86_ops.free_external_spt = tdx_sept_free_private_spt; vt_x86_ops.remove_external_spte = tdx_sept_remove_private_spte; + vt_x86_ops.split_external_spt = tdx_sept_split_private_spt; vt_x86_ops.protected_apic_has_interrupt = tdx_protected_apic_has_interrupt; } -- 2.43.2