Remove one level of indirection, and prepare for using the permission bitmask machinery for shadow pages as well. Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 38 +++++------ arch/x86/kvm/mmu/mmu.c | 116 ++++++++++++++++---------------- arch/x86/kvm/mmu/paging_tmpl.h | 8 +-- arch/x86/kvm/mmu/spte.c | 4 +- arch/x86/kvm/mmu/spte.h | 18 ++--- arch/x86/kvm/vmx/vmx.c | 2 +- 6 files changed, 91 insertions(+), 95 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 8191f20b87a7..c015d0e492ed 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -447,9 +447,24 @@ struct kvm_pio_request { #define PT64_ROOT_MAX_LEVEL 5 -struct rsvd_bits_validate { +struct kvm_page_format { u64 rsvd_bits_mask[2][PT64_ROOT_MAX_LEVEL]; u64 bad_mt_xwr; + + /* + * The pkru_mask indicates if protection key checks are needed. It + * consists of 16 domains indexed by page fault error code bits [4:1], + * with PFEC.RSVD replaced by ACC_USER_MASK from the page tables. + * Each domain has 2 bits which are ANDed with AD and WD from PKRU. + */ + u32 pkru_mask; + + /* + * Bitmap; bit set = permission fault + * Array index: page fault error code [4:1] + * Bit index: pte permissions in ACC_* format + */ + u16 permissions[16]; }; struct kvm_mmu_root_info { @@ -476,25 +491,6 @@ struct kvm_page_fault; * and 2-level 32-bit). The kvm_pagewalk structure abstracts the details of the * current mmu mode. */ -struct kvm_page_format { - struct rsvd_bits_validate guest_rsvd_check; - - /* - * The pkru_mask indicates if protection key checks are needed. It - * consists of 16 domains indexed by page fault error code bits [4:1], - * with PFEC.RSVD replaced by ACC_USER_MASK from the page tables. - * Each domain has 2 bits which are ANDed with AD and WD from PKRU. - */ - u32 pkru_mask; - - /* - * Bitmap; bit set = permission fault - * Array index: page fault error code [4:1] - * Bit index: pte permissions in ACC_* format - */ - u16 permissions[16]; -}; - struct kvm_pagewalk { unsigned long (*get_guest_pgd)(struct kvm_vcpu *vcpu); u64 (*get_pdptr)(struct kvm_vcpu *vcpu, int index); @@ -529,7 +525,7 @@ struct kvm_mmu { * bits include not only hardware reserved bits but also * the bits spte never used. */ - struct rsvd_bits_validate shadow_zero_check; + struct kvm_page_format fmt; }; enum pmc_type { diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index ac2abd86a7c6..58a98bae75e6 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -4422,7 +4422,7 @@ static int get_sptes_lockless(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep) { u64 sptes[PT64_ROOT_MAX_LEVEL + 1]; - struct rsvd_bits_validate *rsvd_check; + struct kvm_page_format *rsvd_check; int root, leaf, level; bool reserved = false; @@ -4443,7 +4443,7 @@ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep) if (!is_shadow_present_pte(sptes[leaf])) leaf++; - rsvd_check = &vcpu->arch.mmu->shadow_zero_check; + rsvd_check = &vcpu->arch.mmu->fmt; for (level = root; level >= leaf; level--) reserved |= is_rsvd_spte(rsvd_check, sptes[level], level); @@ -5298,7 +5298,7 @@ static bool sync_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn, #include "paging_tmpl.h" #undef PTTYPE -static void __reset_rsvds_bits_mask(struct rsvd_bits_validate *rsvd_check, +static void __reset_rsvds_bits_mask(struct kvm_page_format *fmt, u64 pa_bits_rsvd, int level, bool nx, bool gbpages, bool pse, bool amd) { @@ -5306,7 +5306,7 @@ static void __reset_rsvds_bits_mask(struct rsvd_bits_validate *rsvd_check, u64 nonleaf_bit8_rsvd = 0; u64 high_bits_rsvd; - rsvd_check->bad_mt_xwr = 0; + fmt->bad_mt_xwr = 0; if (!gbpages) gbpages_bit_rsvd = rsvd_bits(7, 7); @@ -5330,59 +5330,59 @@ static void __reset_rsvds_bits_mask(struct rsvd_bits_validate *rsvd_check, switch (level) { case PT32_ROOT_LEVEL: /* no rsvd bits for 2 level 4K page table entries */ - rsvd_check->rsvd_bits_mask[0][1] = 0; - rsvd_check->rsvd_bits_mask[0][0] = 0; - rsvd_check->rsvd_bits_mask[1][0] = - rsvd_check->rsvd_bits_mask[0][0]; + fmt->rsvd_bits_mask[0][1] = 0; + fmt->rsvd_bits_mask[0][0] = 0; + fmt->rsvd_bits_mask[1][0] = + fmt->rsvd_bits_mask[0][0]; if (!pse) { - rsvd_check->rsvd_bits_mask[1][1] = 0; + fmt->rsvd_bits_mask[1][1] = 0; break; } if (is_cpuid_PSE36()) /* 36bits PSE 4MB page */ - rsvd_check->rsvd_bits_mask[1][1] = rsvd_bits(17, 21); + fmt->rsvd_bits_mask[1][1] = rsvd_bits(17, 21); else /* 32 bits PSE 4MB page */ - rsvd_check->rsvd_bits_mask[1][1] = rsvd_bits(13, 21); + fmt->rsvd_bits_mask[1][1] = rsvd_bits(13, 21); break; case PT32E_ROOT_LEVEL: - rsvd_check->rsvd_bits_mask[0][2] = rsvd_bits(63, 63) | + fmt->rsvd_bits_mask[0][2] = rsvd_bits(63, 63) | high_bits_rsvd | rsvd_bits(5, 8) | rsvd_bits(1, 2); /* PDPTE */ - rsvd_check->rsvd_bits_mask[0][1] = high_bits_rsvd; /* PDE */ - rsvd_check->rsvd_bits_mask[0][0] = high_bits_rsvd; /* PTE */ - rsvd_check->rsvd_bits_mask[1][1] = high_bits_rsvd | + fmt->rsvd_bits_mask[0][1] = high_bits_rsvd; /* PDE */ + fmt->rsvd_bits_mask[0][0] = high_bits_rsvd; /* PTE */ + fmt->rsvd_bits_mask[1][1] = high_bits_rsvd | rsvd_bits(13, 20); /* large page */ - rsvd_check->rsvd_bits_mask[1][0] = - rsvd_check->rsvd_bits_mask[0][0]; + fmt->rsvd_bits_mask[1][0] = + fmt->rsvd_bits_mask[0][0]; break; case PT64_ROOT_5LEVEL: - rsvd_check->rsvd_bits_mask[0][4] = high_bits_rsvd | + fmt->rsvd_bits_mask[0][4] = high_bits_rsvd | nonleaf_bit8_rsvd | rsvd_bits(7, 7); - rsvd_check->rsvd_bits_mask[1][4] = - rsvd_check->rsvd_bits_mask[0][4]; + fmt->rsvd_bits_mask[1][4] = + fmt->rsvd_bits_mask[0][4]; fallthrough; case PT64_ROOT_4LEVEL: - rsvd_check->rsvd_bits_mask[0][3] = high_bits_rsvd | + fmt->rsvd_bits_mask[0][3] = high_bits_rsvd | nonleaf_bit8_rsvd | rsvd_bits(7, 7); - rsvd_check->rsvd_bits_mask[0][2] = high_bits_rsvd | + fmt->rsvd_bits_mask[0][2] = high_bits_rsvd | gbpages_bit_rsvd; - rsvd_check->rsvd_bits_mask[0][1] = high_bits_rsvd; - rsvd_check->rsvd_bits_mask[0][0] = high_bits_rsvd; - rsvd_check->rsvd_bits_mask[1][3] = - rsvd_check->rsvd_bits_mask[0][3]; - rsvd_check->rsvd_bits_mask[1][2] = high_bits_rsvd | + fmt->rsvd_bits_mask[0][1] = high_bits_rsvd; + fmt->rsvd_bits_mask[0][0] = high_bits_rsvd; + fmt->rsvd_bits_mask[1][3] = + fmt->rsvd_bits_mask[0][3]; + fmt->rsvd_bits_mask[1][2] = high_bits_rsvd | gbpages_bit_rsvd | rsvd_bits(13, 29); - rsvd_check->rsvd_bits_mask[1][1] = high_bits_rsvd | + fmt->rsvd_bits_mask[1][1] = high_bits_rsvd | rsvd_bits(13, 20); /* large page */ - rsvd_check->rsvd_bits_mask[1][0] = - rsvd_check->rsvd_bits_mask[0][0]; + fmt->rsvd_bits_mask[1][0] = + fmt->rsvd_bits_mask[0][0]; break; } } @@ -5390,7 +5390,7 @@ static void __reset_rsvds_bits_mask(struct rsvd_bits_validate *rsvd_check, static void reset_guest_rsvds_bits_mask(struct kvm_vcpu *vcpu, struct kvm_pagewalk *w) { - __reset_rsvds_bits_mask(&w->fmt.guest_rsvd_check, + __reset_rsvds_bits_mask(&w->fmt, vcpu->arch.reserved_gpa_bits, w->cpu_role.base.level, is_efer_nx(w), guest_cpu_cap_has(vcpu, X86_FEATURE_GBPAGES), @@ -5398,7 +5398,7 @@ static void reset_guest_rsvds_bits_mask(struct kvm_vcpu *vcpu, guest_cpuid_is_amd_compatible(vcpu)); } -static void __reset_rsvds_bits_mask_ept(struct rsvd_bits_validate *rsvd_check, +static void __reset_rsvds_bits_mask_ept(struct kvm_page_format *fmt, u64 pa_bits_rsvd, bool execonly, int huge_page_level) { @@ -5411,18 +5411,18 @@ static void __reset_rsvds_bits_mask_ept(struct rsvd_bits_validate *rsvd_check, if (huge_page_level < PG_LEVEL_2M) large_2m_rsvd = rsvd_bits(7, 7); - rsvd_check->rsvd_bits_mask[0][4] = high_bits_rsvd | rsvd_bits(3, 7); - rsvd_check->rsvd_bits_mask[0][3] = high_bits_rsvd | rsvd_bits(3, 7); - rsvd_check->rsvd_bits_mask[0][2] = high_bits_rsvd | rsvd_bits(3, 6) | large_1g_rsvd; - rsvd_check->rsvd_bits_mask[0][1] = high_bits_rsvd | rsvd_bits(3, 6) | large_2m_rsvd; - rsvd_check->rsvd_bits_mask[0][0] = high_bits_rsvd; + fmt->rsvd_bits_mask[0][4] = high_bits_rsvd | rsvd_bits(3, 7); + fmt->rsvd_bits_mask[0][3] = high_bits_rsvd | rsvd_bits(3, 7); + fmt->rsvd_bits_mask[0][2] = high_bits_rsvd | rsvd_bits(3, 6) | large_1g_rsvd; + fmt->rsvd_bits_mask[0][1] = high_bits_rsvd | rsvd_bits(3, 6) | large_2m_rsvd; + fmt->rsvd_bits_mask[0][0] = high_bits_rsvd; /* large page */ - rsvd_check->rsvd_bits_mask[1][4] = rsvd_check->rsvd_bits_mask[0][4]; - rsvd_check->rsvd_bits_mask[1][3] = rsvd_check->rsvd_bits_mask[0][3]; - rsvd_check->rsvd_bits_mask[1][2] = high_bits_rsvd | rsvd_bits(12, 29) | large_1g_rsvd; - rsvd_check->rsvd_bits_mask[1][1] = high_bits_rsvd | rsvd_bits(12, 20) | large_2m_rsvd; - rsvd_check->rsvd_bits_mask[1][0] = rsvd_check->rsvd_bits_mask[0][0]; + fmt->rsvd_bits_mask[1][4] = fmt->rsvd_bits_mask[0][4]; + fmt->rsvd_bits_mask[1][3] = fmt->rsvd_bits_mask[0][3]; + fmt->rsvd_bits_mask[1][2] = high_bits_rsvd | rsvd_bits(12, 29) | large_1g_rsvd; + fmt->rsvd_bits_mask[1][1] = high_bits_rsvd | rsvd_bits(12, 20) | large_2m_rsvd; + fmt->rsvd_bits_mask[1][0] = fmt->rsvd_bits_mask[0][0]; bad_mt_xwr = 0xFFull << (2 * 8); /* bits 3..5 must not be 2 */ bad_mt_xwr |= 0xFFull << (3 * 8); /* bits 3..5 must not be 3 */ @@ -5433,13 +5433,13 @@ static void __reset_rsvds_bits_mask_ept(struct rsvd_bits_validate *rsvd_check, /* bits 0..2 must not be 100 unless VMX capabilities allow it */ bad_mt_xwr |= REPEAT_BYTE(1ull << 4); } - rsvd_check->bad_mt_xwr = bad_mt_xwr; + fmt->bad_mt_xwr = bad_mt_xwr; } static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu, bool execonly, int huge_page_level) { - __reset_rsvds_bits_mask_ept(&vcpu->arch.tdp_walk.fmt.guest_rsvd_check, + __reset_rsvds_bits_mask_ept(&vcpu->arch.tdp_walk.fmt, vcpu->arch.reserved_gpa_bits, execonly, huge_page_level); } @@ -5461,13 +5461,13 @@ static void reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, bool is_amd = true; /* KVM doesn't use 2-level page tables for the shadow MMU. */ bool is_pse = false; - struct rsvd_bits_validate *shadow_zero_check; + struct kvm_page_format *fmt; int i; WARN_ON_ONCE(context->root_role.level < PT32E_ROOT_LEVEL); - shadow_zero_check = &context->shadow_zero_check; - __reset_rsvds_bits_mask(shadow_zero_check, reserved_hpa_bits(), + fmt = &context->fmt; + __reset_rsvds_bits_mask(fmt, reserved_hpa_bits(), context->root_role.level, context->root_role.efer_nx, guest_cpu_cap_has(vcpu, X86_FEATURE_GBPAGES), @@ -5483,10 +5483,10 @@ static void reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, * Bits in shadow_me_mask but not in shadow_me_value are * not allowed to be set. */ - shadow_zero_check->rsvd_bits_mask[0][i] |= shadow_me_mask; - shadow_zero_check->rsvd_bits_mask[1][i] |= shadow_me_mask; - shadow_zero_check->rsvd_bits_mask[0][i] &= ~shadow_me_value; - shadow_zero_check->rsvd_bits_mask[1][i] &= ~shadow_me_value; + fmt->rsvd_bits_mask[0][i] |= shadow_me_mask; + fmt->rsvd_bits_mask[1][i] |= shadow_me_mask; + fmt->rsvd_bits_mask[0][i] &= ~shadow_me_value; + fmt->rsvd_bits_mask[1][i] &= ~shadow_me_value; } } @@ -5503,18 +5503,18 @@ static inline bool boot_cpu_is_amd(void) */ static void reset_tdp_shadow_zero_bits_mask(struct kvm_mmu *context) { - struct rsvd_bits_validate *shadow_zero_check; + struct kvm_page_format *fmt; int i; - shadow_zero_check = &context->shadow_zero_check; + fmt = &context->fmt; if (boot_cpu_is_amd()) - __reset_rsvds_bits_mask(shadow_zero_check, reserved_hpa_bits(), + __reset_rsvds_bits_mask(fmt, reserved_hpa_bits(), context->root_role.level, true, boot_cpu_has(X86_FEATURE_GBPAGES), false, true); else - __reset_rsvds_bits_mask_ept(shadow_zero_check, + __reset_rsvds_bits_mask_ept(fmt, reserved_hpa_bits(), false, max_huge_page_level); @@ -5522,8 +5522,8 @@ static void reset_tdp_shadow_zero_bits_mask(struct kvm_mmu *context) return; for (i = context->root_role.level; --i >= 0;) { - shadow_zero_check->rsvd_bits_mask[0][i] &= ~shadow_me_mask; - shadow_zero_check->rsvd_bits_mask[1][i] &= ~shadow_me_mask; + fmt->rsvd_bits_mask[0][i] &= ~shadow_me_mask; + fmt->rsvd_bits_mask[1][i] &= ~shadow_me_mask; } } @@ -5534,7 +5534,7 @@ static void reset_tdp_shadow_zero_bits_mask(struct kvm_mmu *context) static void reset_ept_shadow_zero_bits_mask(struct kvm_mmu *context, bool execonly) { - __reset_rsvds_bits_mask_ept(&context->shadow_zero_check, + __reset_rsvds_bits_mask_ept(&context->fmt, reserved_hpa_bits(), execonly, max_huge_page_level); } diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h index c9e2e7a41a4b..e86d5b9a4d6c 100644 --- a/arch/x86/kvm/mmu/paging_tmpl.h +++ b/arch/x86/kvm/mmu/paging_tmpl.h @@ -138,19 +138,19 @@ static inline int FNAME(is_present_gpte)(struct kvm_pagewalk *w, #endif } -static bool FNAME(is_bad_mt_xwr)(struct rsvd_bits_validate *rsvd_check, u64 gpte) +static bool FNAME(is_bad_mt_xwr)(struct kvm_page_format *fmt, u64 gpte) { #if PTTYPE != PTTYPE_EPT return false; #else - return __is_bad_mt_xwr(rsvd_check, gpte); + return __is_bad_mt_xwr(fmt, gpte); #endif } static bool FNAME(is_rsvd_bits_set)(struct kvm_page_format *fmt, u64 gpte, int level) { - return __is_rsvd_bits_set(&fmt->guest_rsvd_check, gpte, level) || - FNAME(is_bad_mt_xwr)(&fmt->guest_rsvd_check, gpte); + return __is_rsvd_bits_set(fmt, gpte, level) || + FNAME(is_bad_mt_xwr)(fmt, gpte); } static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu, diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c index d2f5f7dd8fe1..bdf72a98c19c 100644 --- a/arch/x86/kvm/mmu/spte.c +++ b/arch/x86/kvm/mmu/spte.c @@ -280,9 +280,9 @@ bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, if (prefetch && !synchronizing) spte = mark_spte_for_access_track(spte); - WARN_ONCE(is_rsvd_spte(&vcpu->arch.mmu->shadow_zero_check, spte, level), + WARN_ONCE(is_rsvd_spte(&vcpu->arch.mmu->fmt, spte, level), "spte = 0x%llx, level = %d, rsvd bits = 0x%llx", spte, level, - get_rsvd_bits(&vcpu->arch.mmu->shadow_zero_check, spte, level)); + get_rsvd_bits(&vcpu->arch.mmu->fmt, spte, level)); /* * Mark the memslot dirty *after* modifying it for access tracking. diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h index 13eea94dd212..918533e61b98 100644 --- a/arch/x86/kvm/mmu/spte.h +++ b/arch/x86/kvm/mmu/spte.h @@ -378,33 +378,33 @@ static inline bool is_accessed_spte(u64 spte) return spte & shadow_accessed_mask; } -static inline u64 get_rsvd_bits(struct rsvd_bits_validate *rsvd_check, u64 pte, +static inline u64 get_rsvd_bits(struct kvm_page_format *fmt, u64 pte, int level) { int bit7 = (pte >> 7) & 1; - return rsvd_check->rsvd_bits_mask[bit7][level-1]; + return fmt->rsvd_bits_mask[bit7][level-1]; } -static inline bool __is_rsvd_bits_set(struct rsvd_bits_validate *rsvd_check, +static inline bool __is_rsvd_bits_set(struct kvm_page_format *fmt, u64 pte, int level) { - return pte & get_rsvd_bits(rsvd_check, pte, level); + return pte & get_rsvd_bits(fmt, pte, level); } -static inline bool __is_bad_mt_xwr(struct rsvd_bits_validate *rsvd_check, +static inline bool __is_bad_mt_xwr(struct kvm_page_format *fmt, u64 pte) { if (pte & VMX_EPT_USER_EXECUTABLE_MASK) pte |= VMX_EPT_EXECUTABLE_MASK; - return rsvd_check->bad_mt_xwr & BIT_ULL(pte & 0x3f); + return fmt->bad_mt_xwr & BIT_ULL(pte & 0x3f); } -static __always_inline bool is_rsvd_spte(struct rsvd_bits_validate *rsvd_check, +static __always_inline bool is_rsvd_spte(struct kvm_page_format *fmt, u64 spte, int level) { - return __is_bad_mt_xwr(rsvd_check, spte) || - __is_rsvd_bits_set(rsvd_check, spte, level); + return __is_bad_mt_xwr(fmt, spte) || + __is_rsvd_bits_set(fmt, spte, level); } /* diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 0717dcd2d37d..76a9ec1b2380 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -8703,7 +8703,7 @@ __init int vmx_hardware_setup(void) /* * Setup shadow_me_value/shadow_me_mask to include MKTME KeyID - * bits to shadow_zero_check. + * bits into the MMU's struct kvm_page_format. */ vmx_setup_me_spte_mask(); -- 2.52.0