Remove the completely unnecessary assumptions that memory mapped into a TDX guest is backed by refcounted struct page memory. TDH_MEM_PAGE_ADD and TDH_MEM_PAGE_AUG are glorified writes to PTEs, they have no business placing requirements on how KVM and guest_memfd manage memory. Rip out the misguided struct page assumptions/constraints before hugepage support is added for S-EPT, e.g. so the kernel doesn't pick up even worse assumptions like "a hugepage must be contained in a single folio". TODO (before merge): Replace "u64 pfn" with something type-safe. Signed-off-by: Sean Christopherson --- arch/x86/include/asm/tdx.h | 25 ++++++--------- arch/x86/kvm/vmx/tdx.c | 33 ++++++++++--------- arch/x86/virt/vmx/tdx/tdx.c | 63 +++++++++++++++++++------------------ 3 files changed, 59 insertions(+), 62 deletions(-) diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h index 56bdfbce4289..1f57f7721286 100644 --- a/arch/x86/include/asm/tdx.h +++ b/arch/x86/include/asm/tdx.h @@ -154,10 +154,10 @@ static inline void tdx_init_pamt_cache(struct tdx_pamt_cache *cache) void tdx_free_pamt_cache(struct tdx_pamt_cache *cache); int tdx_topup_pamt_cache(struct tdx_pamt_cache *cache, unsigned long npages); -int tdx_pamt_get(struct page *page, struct tdx_pamt_cache *cache); -void tdx_pamt_put(struct page *page); +int tdx_pamt_get(u64 pfn, struct tdx_pamt_cache *cache); +void tdx_pamt_put(u64 pfn); -void tdx_quirk_reset_page(struct page *page); +void tdx_quirk_reset_page(u64 pfn); int tdx_guest_keyid_alloc(void); u32 tdx_get_nr_guest_keyids(void); @@ -206,23 +206,18 @@ struct tdx_vp { struct page **tdcx_pages; }; -static inline u64 mk_keyed_paddr(u16 hkid, struct page *page) +static inline u64 mk_keyed_paddr(u16 hkid, u64 pfn) { - u64 ret; - - ret = page_to_phys(page); - /* KeyID bits are just above the physical address bits: */ - ret |= (u64)hkid << boot_cpu_data.x86_phys_bits; - - return ret; + /* KeyID bits are just above the physical address bits. */ + return PFN_PHYS(pfn) | ((u64)hkid << boot_cpu_data.x86_phys_bits); } u64 tdh_vp_enter(struct tdx_vp *vp, struct tdx_module_args *args); u64 tdh_mng_addcx(struct tdx_td *td, struct page *tdcs_page); -u64 tdh_mem_page_add(struct tdx_td *td, u64 gpa, struct page *page, struct page *source, u64 *ext_err1, u64 *ext_err2); +u64 tdh_mem_page_add(struct tdx_td *td, u64 gpa, u64 pfn, struct page *source, u64 *ext_err1, u64 *ext_err2); u64 tdh_mem_sept_add(struct tdx_td *td, u64 gpa, enum pg_level level, struct page *page, u64 *ext_err1, u64 *ext_err2); u64 tdh_vp_addcx(struct tdx_vp *vp, struct page *tdcx_page); -u64 tdh_mem_page_aug(struct tdx_td *td, u64 gpa, enum pg_level level, struct page *page, u64 *ext_err1, u64 *ext_err2); +u64 tdh_mem_page_aug(struct tdx_td *td, u64 gpa, enum pg_level level, u64 pfn, u64 *ext_err1, u64 *ext_err2); u64 tdh_mem_range_block(struct tdx_td *td, u64 gpa, enum pg_level level, u64 *ext_err1, u64 *ext_err2); u64 tdh_mng_key_config(struct tdx_td *td); u64 tdh_mng_create(struct tdx_td *td, u16 hkid); @@ -237,12 +232,12 @@ u64 tdh_mng_init(struct tdx_td *td, u64 td_params, u64 *extended_err); u64 tdh_vp_init(struct tdx_vp *vp, u64 initial_rcx, u32 x2apicid); u64 tdh_vp_rd(struct tdx_vp *vp, u64 field, u64 *data); u64 tdh_vp_wr(struct tdx_vp *vp, u64 field, u64 data, u64 mask); -u64 tdh_phymem_page_reclaim(struct page *page, u64 *tdx_pt, u64 *tdx_owner, u64 *tdx_size); +u64 tdh_phymem_page_reclaim(u64 pfn, u64 *tdx_pt, u64 *tdx_owner, u64 *tdx_size); u64 tdh_mem_track(struct tdx_td *tdr); u64 tdh_mem_page_remove(struct tdx_td *td, u64 gpa, enum pg_level level, u64 *ext_err1, u64 *ext_err2); u64 tdh_phymem_cache_wb(bool resume); u64 tdh_phymem_page_wbinvd_tdr(struct tdx_td *td); -u64 tdh_phymem_page_wbinvd_hkid(u64 hkid, struct page *page); +u64 tdh_phymem_page_wbinvd_hkid(u64 hkid, u64 pfn); #else static inline void tdx_init(void) { } static inline int tdx_cpu_enable(void) { return -ENODEV; } diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c index d74a2547e512..4ac312376ac9 100644 --- a/arch/x86/kvm/vmx/tdx.c +++ b/arch/x86/kvm/vmx/tdx.c @@ -318,11 +318,11 @@ static inline void tdx_disassociate_vp(struct kvm_vcpu *vcpu) }) /* TDH.PHYMEM.PAGE.RECLAIM is allowed only when destroying the TD. */ -static int __tdx_reclaim_page(struct page *page) +static int __tdx_reclaim_page(kvm_pfn_t pfn) { u64 err, rcx, rdx, r8; - err = tdh_phymem_page_reclaim(page, &rcx, &rdx, &r8); + err = tdh_phymem_page_reclaim(pfn, &rcx, &rdx, &r8); /* * No need to check for TDX_OPERAND_BUSY; all TD pages are freed @@ -337,11 +337,12 @@ static int __tdx_reclaim_page(struct page *page) static int tdx_reclaim_page(struct page *page) { + kvm_pfn_t pfn = page_to_pfn(page); int r; - r = __tdx_reclaim_page(page); + r = __tdx_reclaim_page(pfn); if (!r) - tdx_quirk_reset_page(page); + tdx_quirk_reset_page(pfn); return r; } @@ -583,7 +584,7 @@ static void tdx_reclaim_td_control_pages(struct kvm *kvm) if (!kvm_tdx->td.tdr_page) return; - if (__tdx_reclaim_page(kvm_tdx->td.tdr_page)) + if (__tdx_reclaim_page(page_to_pfn(kvm_tdx->td.tdr_page))) return; /* @@ -595,7 +596,7 @@ static void tdx_reclaim_td_control_pages(struct kvm *kvm) if (TDX_BUG_ON(err, TDH_PHYMEM_PAGE_WBINVD, kvm)) return; - tdx_quirk_reset_page(kvm_tdx->td.tdr_page); + tdx_quirk_reset_page(page_to_pfn(kvm_tdx->td.tdr_page)); __tdx_free_control_page(kvm_tdx->td.tdr_page); kvm_tdx->td.tdr_page = NULL; @@ -1640,8 +1641,8 @@ static int tdx_mem_page_add(struct kvm *kvm, gfn_t gfn, enum pg_level level, KVM_BUG_ON(!kvm_tdx->page_add_src, kvm)) return -EIO; - err = tdh_mem_page_add(&kvm_tdx->td, gpa, pfn_to_page(pfn), - kvm_tdx->page_add_src, &entry, &level_state); + err = tdh_mem_page_add(&kvm_tdx->td, gpa, pfn, kvm_tdx->page_add_src, + &entry, &level_state); if (unlikely(IS_TDX_OPERAND_BUSY(err))) return -EBUSY; @@ -1655,12 +1656,11 @@ static int tdx_mem_page_aug(struct kvm *kvm, gfn_t gfn, enum pg_level level, kvm_pfn_t pfn) { struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm); - struct page *page = pfn_to_page(pfn); gpa_t gpa = gfn_to_gpa(gfn); u64 entry, level_state; u64 err; - err = tdh_mem_page_aug(&kvm_tdx->td, gpa, level, page, &entry, &level_state); + err = tdh_mem_page_aug(&kvm_tdx->td, gpa, level, pfn, &entry, &level_state); if (unlikely(IS_TDX_OPERAND_BUSY(err))) return -EBUSY; @@ -1712,7 +1712,6 @@ static int tdx_sept_set_private_spte(struct kvm *kvm, gfn_t gfn, struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm); kvm_pfn_t pfn = spte_to_pfn(mirror_spte); struct vcpu_tdx *tdx = to_tdx(vcpu); - struct page *page = pfn_to_page(pfn); int ret; if (KVM_BUG_ON(!vcpu, kvm)) @@ -1730,7 +1729,7 @@ static int tdx_sept_set_private_spte(struct kvm *kvm, gfn_t gfn, WARN_ON_ONCE((mirror_spte & VMX_EPT_RWX_MASK) != VMX_EPT_RWX_MASK); - ret = tdx_pamt_get(page, &tdx->pamt_cache); + ret = tdx_pamt_get(pfn, &tdx->pamt_cache); if (ret) return ret; @@ -1752,7 +1751,7 @@ static int tdx_sept_set_private_spte(struct kvm *kvm, gfn_t gfn, ret = tdx_mem_page_add(kvm, gfn, level, pfn); if (ret) - tdx_pamt_put(page); + tdx_pamt_put(pfn); return ret; } @@ -1828,8 +1827,8 @@ static void tdx_sept_reclaim_private_sp(struct kvm *kvm, gfn_t gfn, static void tdx_sept_remove_private_spte(struct kvm *kvm, gfn_t gfn, enum pg_level level, u64 mirror_spte) { - struct page *page = pfn_to_page(spte_to_pfn(mirror_spte)); struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm); + kvm_pfn_t pfn = spte_to_pfn(mirror_spte); gpa_t gpa = gfn_to_gpa(gfn); u64 err, entry, level_state; @@ -1868,12 +1867,12 @@ static void tdx_sept_remove_private_spte(struct kvm *kvm, gfn_t gfn, if (TDX_BUG_ON_2(err, TDH_MEM_PAGE_REMOVE, entry, level_state, kvm)) return; - err = tdh_phymem_page_wbinvd_hkid((u16)kvm_tdx->hkid, page); + err = tdh_phymem_page_wbinvd_hkid((u16)kvm_tdx->hkid, pfn); if (TDX_BUG_ON(err, TDH_PHYMEM_PAGE_WBINVD, kvm)) return; - tdx_quirk_reset_page(page); - tdx_pamt_put(page); + tdx_quirk_reset_page(pfn); + tdx_pamt_put(pfn); } void tdx_deliver_interrupt(struct kvm_lapic *apic, int delivery_mode, diff --git a/arch/x86/virt/vmx/tdx/tdx.c b/arch/x86/virt/vmx/tdx/tdx.c index 90407493bb45..85c31ed9b9d1 100644 --- a/arch/x86/virt/vmx/tdx/tdx.c +++ b/arch/x86/virt/vmx/tdx/tdx.c @@ -799,9 +799,9 @@ static void tdx_quirk_reset_paddr(unsigned long base, unsigned long size) mb(); } -void tdx_quirk_reset_page(struct page *page) +void tdx_quirk_reset_page(u64 pfn) { - tdx_quirk_reset_paddr(page_to_phys(page), PAGE_SIZE); + tdx_quirk_reset_paddr(PFN_PHYS(pfn), PAGE_SIZE); } EXPORT_SYMBOL_FOR_KVM(tdx_quirk_reset_page); @@ -1665,6 +1665,11 @@ static void tdx_clflush_page(struct page *page) clflush_cache_range(page_to_virt(page), PAGE_SIZE); } +static void tdx_clflush_pfn(u64 pfn) +{ + clflush_cache_range(__va(PFN_PHYS(pfn)), PAGE_SIZE); +} + static int pg_level_to_tdx_sept_level(enum pg_level level) { WARN_ON_ONCE(level == PG_LEVEL_NONE); @@ -1691,17 +1696,17 @@ u64 tdh_mng_addcx(struct tdx_td *td, struct page *tdcs_page) } EXPORT_SYMBOL_FOR_KVM(tdh_mng_addcx); -u64 tdh_mem_page_add(struct tdx_td *td, u64 gpa, struct page *page, struct page *source, u64 *ext_err1, u64 *ext_err2) +u64 tdh_mem_page_add(struct tdx_td *td, u64 gpa, u64 pfn, struct page *source, u64 *ext_err1, u64 *ext_err2) { struct tdx_module_args args = { .rcx = gpa, .rdx = tdx_tdr_pa(td), - .r8 = page_to_phys(page), + .r8 = PFN_PHYS(pfn), .r9 = page_to_phys(source), }; u64 ret; - tdx_clflush_page(page); + tdx_clflush_pfn(pfn); ret = seamcall_ret(TDH_MEM_PAGE_ADD, &args); *ext_err1 = args.rcx; @@ -1743,17 +1748,17 @@ u64 tdh_vp_addcx(struct tdx_vp *vp, struct page *tdcx_page) } EXPORT_SYMBOL_FOR_KVM(tdh_vp_addcx); -u64 tdh_mem_page_aug(struct tdx_td *td, u64 gpa, enum pg_level level, - struct page *page, u64 *ext_err1, u64 *ext_err2) +u64 tdh_mem_page_aug(struct tdx_td *td, u64 gpa, enum pg_level level, u64 pfn, + u64 *ext_err1, u64 *ext_err2) { struct tdx_module_args args = { .rcx = gpa | pg_level_to_tdx_sept_level(level), .rdx = tdx_tdr_pa(td), - .r8 = page_to_phys(page), + .r8 = PFN_PHYS(pfn), }; u64 ret; - tdx_clflush_page(page); + tdx_clflush_pfn(pfn); ret = seamcall_ret(TDH_MEM_PAGE_AUG, &args); *ext_err1 = args.rcx; @@ -1997,10 +2002,10 @@ EXPORT_SYMBOL_FOR_KVM(tdh_vp_init); * So despite the names, they must be interpted specially as described by the spec. Return * them only for error reporting purposes. */ -u64 tdh_phymem_page_reclaim(struct page *page, u64 *tdx_pt, u64 *tdx_owner, u64 *tdx_size) +u64 tdh_phymem_page_reclaim(u64 pfn, u64 *tdx_pt, u64 *tdx_owner, u64 *tdx_size) { struct tdx_module_args args = { - .rcx = page_to_phys(page), + .rcx = PFN_PHYS(pfn), }; u64 ret; @@ -2056,17 +2061,17 @@ u64 tdh_phymem_page_wbinvd_tdr(struct tdx_td *td) { struct tdx_module_args args = {}; - args.rcx = mk_keyed_paddr(tdx_global_keyid, td->tdr_page); + args.rcx = mk_keyed_paddr(tdx_global_keyid, page_to_pfn(td->tdr_page)); return seamcall(TDH_PHYMEM_PAGE_WBINVD, &args); } EXPORT_SYMBOL_FOR_KVM(tdh_phymem_page_wbinvd_tdr); -u64 tdh_phymem_page_wbinvd_hkid(u64 hkid, struct page *page) +u64 tdh_phymem_page_wbinvd_hkid(u64 hkid, u64 pfn) { struct tdx_module_args args = {}; - args.rcx = mk_keyed_paddr(hkid, page); + args.rcx = mk_keyed_paddr(hkid, pfn); return seamcall(TDH_PHYMEM_PAGE_WBINVD, &args); } @@ -2136,11 +2141,9 @@ static void free_pamt_array(u64 *pa_array) * Calculate the arg needed for operating on the DPAMT backing for * a given 4KB page. */ -static u64 pamt_2mb_arg(struct page *page) +static u64 pamt_2mb_arg(u64 pfn) { - unsigned long hpa_2mb = ALIGN_DOWN(page_to_phys(page), PMD_SIZE); - - return hpa_2mb | TDX_PS_2M; + return ALIGN_DOWN(PFN_PHYS(pfn), PMD_SIZE) | TDX_PS_2M; } /* @@ -2149,10 +2152,10 @@ static u64 pamt_2mb_arg(struct page *page) * error. In the case of TDX module error, the return code is stored * in tdx_err. */ -static u64 tdh_phymem_pamt_add(struct page *page, u64 *pamt_pa_array) +static u64 tdh_phymem_pamt_add(u64 pfn, u64 *pamt_pa_array) { struct tdx_module_args args = { - .rcx = pamt_2mb_arg(page) + .rcx = pamt_2mb_arg(pfn) }; dpamt_copy_to_regs(&args, rdx, pamt_pa_array); @@ -2161,10 +2164,10 @@ static u64 tdh_phymem_pamt_add(struct page *page, u64 *pamt_pa_array) } /* Remove PAMT backing for the given page. */ -static u64 tdh_phymem_pamt_remove(struct page *page, u64 *pamt_pa_array) +static u64 tdh_phymem_pamt_remove(u64 pfn, u64 *pamt_pa_array) { struct tdx_module_args args = { - .rcx = pamt_2mb_arg(page), + .rcx = pamt_2mb_arg(pfn), }; u64 ret; @@ -2180,7 +2183,7 @@ static u64 tdh_phymem_pamt_remove(struct page *page, u64 *pamt_pa_array) static DEFINE_SPINLOCK(pamt_lock); /* Bump PAMT refcount for the given page and allocate PAMT memory if needed */ -int tdx_pamt_get(struct page *page, struct tdx_pamt_cache *cache) +int tdx_pamt_get(u64 pfn, struct tdx_pamt_cache *cache) { u64 pamt_pa_array[MAX_NR_DPAMT_ARGS]; atomic_t *pamt_refcount; @@ -2190,7 +2193,7 @@ int tdx_pamt_get(struct page *page, struct tdx_pamt_cache *cache) if (!tdx_supports_dynamic_pamt(&tdx_sysinfo)) return 0; - pamt_refcount = tdx_find_pamt_refcount(page_to_pfn(page)); + pamt_refcount = tdx_find_pamt_refcount(pfn); /* * If the pamt page is already added (i.e. refcount >= 1), @@ -2214,7 +2217,7 @@ int tdx_pamt_get(struct page *page, struct tdx_pamt_cache *cache) } /* Try to add the pamt page and take the refcount 0->1. */ - tdx_status = tdh_phymem_pamt_add(page, pamt_pa_array); + tdx_status = tdh_phymem_pamt_add(pfn, pamt_pa_array); if (IS_TDX_SUCCESS(tdx_status)) { /* * The refcount is zero, and this locked path is the only way to @@ -2257,7 +2260,7 @@ EXPORT_SYMBOL_FOR_KVM(tdx_pamt_get); * Drop PAMT refcount for the given page and free PAMT memory if it is no * longer needed. */ -void tdx_pamt_put(struct page *page) +void tdx_pamt_put(u64 pfn) { u64 pamt_pa_array[MAX_NR_DPAMT_ARGS]; atomic_t *pamt_refcount; @@ -2266,7 +2269,7 @@ void tdx_pamt_put(struct page *page) if (!tdx_supports_dynamic_pamt(&tdx_sysinfo)) return; - pamt_refcount = tdx_find_pamt_refcount(page_to_pfn(page)); + pamt_refcount = tdx_find_pamt_refcount(pfn); /* * If the there are more than 1 references on the pamt page, @@ -2285,7 +2288,7 @@ void tdx_pamt_put(struct page *page) return; /* Try to remove the pamt page and take the refcount 1->0. */ - tdx_status = tdh_phymem_pamt_remove(page, pamt_pa_array); + tdx_status = tdh_phymem_pamt_remove(pfn, pamt_pa_array); /* * Don't free pamt_pa_array as it could hold garbage when @@ -2357,7 +2360,7 @@ struct page *__tdx_alloc_control_page(gfp_t gfp) if (!page) return NULL; - if (tdx_pamt_get(page, NULL)) { + if (tdx_pamt_get(page_to_pfn(page), NULL)) { __free_page(page); return NULL; } @@ -2375,7 +2378,7 @@ void __tdx_free_control_page(struct page *page) if (!page) return; - tdx_pamt_put(page); + tdx_pamt_put(page_to_pfn(page)); __free_page(page); } EXPORT_SYMBOL_FOR_KVM(__tdx_free_control_page); -- 2.53.0.rc1.217.geba53bf80e-goog