is_access_allowed(), and is_executable_pte() within it, are effectively a special version of permission_fault() that only supports a subset of roles. In particular it does not allow SMEP, SMAP and PKE. Replace its implementation with a modified version of permission_fault(); the new version will support SMEP (and hence AMD GMET) for free as soon as update_spte_permission_bitmask() stops hardcoding cr4_smep == false. This prepares for a possible future where TDP entries could have XS!=XU, for example as part of implementing Hyper-V VSM natively inside KVM. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 18 ++++++++++++--- arch/x86/kvm/mmu/spte.h | 46 +++++++++++++++++++++----------------- arch/x86/kvm/mmu/tdp_mmu.c | 3 ++- 3 files changed, 42 insertions(+), 25 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index ddda1f1be686..0ec8c9dc2c33 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -3670,6 +3670,7 @@ static u64 *fast_pf_get_last_sptep(struct kvm_vcpu *vcpu, gpa_t gpa, u64 *spte) */ static int fast_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) { + struct kvm_mmu *mmu; struct kvm_mmu_page *sp; int ret = RET_PF_INVALID; u64 spte; @@ -3679,6 +3680,7 @@ static int fast_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) if (!page_fault_can_be_fast(vcpu->kvm, fault)) return ret; + mmu = vcpu->arch.mmu; walk_shadow_page_lockless_begin(vcpu); do { @@ -3714,7 +3716,7 @@ static int fast_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) * Need not check the access of upper level table entries since * they are always ACC_ALL. */ - if (is_access_allowed(fault, spte)) { + if (!spte_permission_fault(mmu, spte, fault)) { ret = RET_PF_SPURIOUS; break; } @@ -3737,7 +3739,7 @@ static int fast_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) * that were write-protected for dirty-logging or access * tracking are handled here. Don't bother checking if the * SPTE is writable to prioritize running with A/D bits enabled. - * The is_access_allowed() check above handles the common case + * The spte_permission_fault() check above handles the common case * of the fault being spurious, and the SPTE is known to be * shadow-present, i.e. except for access tracking restoration * making the new SPTE writable, the check is wasteful. @@ -3762,7 +3764,7 @@ static int fast_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) /* Verify that the fault can be handled in the fast path */ if (new_spte == spte || - !is_access_allowed(fault, new_spte)) + spte_permission_fault(mmu, new_spte, fault)) break; /* @@ -5675,6 +5677,12 @@ static void update_permission_bitmask(struct kvm_pagewalk *w, bool tdp, bool ept is_cr0_wp(w), is_efer_nx(w)); } +static void update_spte_permission_bitmask(struct kvm_mmu *mmu, bool tdp, bool ept) +{ + __update_permission_bitmask(&mmu->fmt, tdp, ept, + mmu->root_role.cr4_smep, false, true, true); +} + /* * PKU is an additional mechanism by which the paging controls access to * user-mode addresses based on the value in the PKRU register. Protection @@ -5884,6 +5892,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu, context->page_fault = kvm_tdp_page_fault; context->sync_spte = NULL; + update_spte_permission_bitmask(context, true, shadow_xs_mask); reset_tdp_shadow_zero_bits_mask(context); } @@ -5902,6 +5911,7 @@ static void shadow_mmu_init_context(struct kvm_vcpu *vcpu, struct kvm_mmu *conte else paging32_init_context(context); + update_spte_permission_bitmask(context, context == &vcpu->arch.guest_mmu, false); reset_shadow_zero_bits_mask(vcpu, context); } @@ -6030,6 +6040,8 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly, update_permission_bitmask(tdp_walk, true, true); tdp_walk->fmt.pkru_mask = 0; reset_rsvds_bits_mask_ept(vcpu, execonly, huge_page_level); + + update_spte_permission_bitmask(context, true, true); reset_ept_shadow_zero_bits_mask(context, execonly); } diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h index 918533e61b98..9bddfa0e02b9 100644 --- a/arch/x86/kvm/mmu/spte.h +++ b/arch/x86/kvm/mmu/spte.h @@ -357,17 +357,6 @@ static inline bool is_last_spte(u64 pte, int level) return (level == PG_LEVEL_4K) || is_large_pte(pte); } -static inline bool is_executable_pte(u64 spte) -{ - /* - * For now, return true if either the XS or XU bit is set - * This function is only used for fast_page_fault, - * which never processes shadow EPT, and regular page - * tables always have XS==XU. - */ - return (spte & (shadow_xs_mask | shadow_xu_mask | shadow_nx_mask)) != shadow_nx_mask; -} - static inline kvm_pfn_t spte_to_pfn(u64 pte) { return (pte & SPTE_BASE_ADDR_MASK) >> PAGE_SHIFT; @@ -496,20 +485,35 @@ static inline bool is_mmu_writable_spte(u64 spte) } /* - * Returns true if the access indicated by @fault is allowed by the existing - * SPTE protections. Note, the caller is responsible for checking that the - * SPTE is a shadow-present, leaf SPTE (either before or after). + * Returns true if the access indicated by @fault is forbidden by the existing + * SPTE protections. */ -static inline bool is_access_allowed(struct kvm_page_fault *fault, u64 spte) +static inline bool spte_permission_fault(struct kvm_mmu *mmu, u64 spte, + struct kvm_page_fault *fault) { - if (fault->exec) - return is_executable_pte(spte); + unsigned int pfec = fault->error_code; + int index = pfec >> 1; + int pte_access; - if (fault->write) - return is_writable_pte(spte); + if (!is_shadow_present_pte(spte)) + return true; - /* Fault was on Read access */ - return spte & PT_PRESENT_MASK; + BUILD_BUG_ON(PT_PRESENT_MASK != ACC_READ_MASK); + BUILD_BUG_ON(PT_WRITABLE_MASK != ACC_WRITE_MASK); + BUILD_BUG_ON(VMX_EPT_READABLE_MASK != ACC_READ_MASK); + BUILD_BUG_ON(VMX_EPT_WRITABLE_MASK != ACC_WRITE_MASK); + + /* strip nested paging fault error codes */ + pte_access = spte & (PT_PRESENT_MASK | PT_WRITABLE_MASK); + if (shadow_nx_mask) { + pte_access |= spte & shadow_user_mask ? ACC_USER_MASK : 0; + pte_access |= spte & shadow_nx_mask ? 0 : ACC_EXEC_MASK; + } else { + pte_access |= spte & shadow_xs_mask ? ACC_EXEC_MASK : 0; + pte_access |= spte & shadow_xu_mask ? ACC_USER_EXEC_MASK : 0; + } + + return (mmu->fmt.permissions[index] >> pte_access) & 1; } /* diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 5a2f8ce9a32b..839a8e416510 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -1169,6 +1169,7 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, struct tdp_iter *iter) { + struct kvm_mmu *mmu = vcpu->arch.mmu; struct kvm_mmu_page *sp = sptep_to_sp(rcu_dereference(iter->sptep)); u64 new_spte; int ret = RET_PF_FIXED; @@ -1178,7 +1179,7 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu, return RET_PF_RETRY; if (is_shadow_present_pte(iter->old_spte) && - (fault->prefetch || is_access_allowed(fault, iter->old_spte)) && + (fault->prefetch || !spte_permission_fault(mmu, iter->old_spte, fault)) && is_last_spte(iter->old_spte, iter->level)) { WARN_ON_ONCE(fault->pfn != spte_to_pfn(iter->old_spte)); return RET_PF_SPURIOUS; -- 2.52.0