Extend __vmx_handle_ept_violation to understand mmu_has_mbec and the differences between user mode and kernel mode fetches. Add synthetic PF bit PFERR_USER_FETCH_MASK in EPT violation handler, used in error_code as a signal that the EPT violation is a user mode instruction fetch into permission_fault. Extend permissions_fault and route mmu_has_mbec to a special handler, mbec_permission_fault, since permission_fault can no longer trivially shift to figure out if there was a permission fault or not. Signed-off-by: Jon Kohler --- arch/x86/include/asm/kvm_host.h | 8 +++- arch/x86/kvm/mmu.h | 7 +++- arch/x86/kvm/mmu/mmu.c | 66 +++++++++++++++++++++++++++++++++ arch/x86/kvm/mmu/spte.h | 14 ++++--- arch/x86/kvm/vmx/common.h | 22 ++++++----- 5 files changed, 100 insertions(+), 17 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 66afcff43ec5..99381c55fceb 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -286,7 +286,13 @@ enum x86_intercept_stage; * when the guest was accessing private memory. */ #define PFERR_PRIVATE_ACCESS BIT_ULL(49) -#define PFERR_SYNTHETIC_MASK (PFERR_IMPLICIT_ACCESS | PFERR_PRIVATE_ACCESS) +/* + * USER_FETCH_MASK is a KVM-defined flag used to indicate user fetches when + * translating EPT violations for Intel MBEC. + */ +#define PFERR_USER_FETCH_MASK BIT_ULL(50) +#define PFERR_SYNTHETIC_MASK (PFERR_IMPLICIT_ACCESS | PFERR_PRIVATE_ACCESS | \ + PFERR_USER_FETCH_MASK) /* apic attention bits */ #define KVM_APIC_CHECK_VAPIC 0 diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 558a15ff82e6..d7bf679183f7 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -95,6 +95,8 @@ int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code, u64 fault_address, char *insn, int insn_len); void __kvm_mmu_refresh_passthrough_bits(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu); +bool mbec_permission_fault(struct kvm_vcpu *vcpu, unsigned int pte_access, + unsigned int pfec); int kvm_mmu_load(struct kvm_vcpu *vcpu); void kvm_mmu_unload(struct kvm_vcpu *vcpu); @@ -216,7 +218,10 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, kvm_mmu_refresh_passthrough_bits(vcpu, mmu); - fault = (mmu->permissions[index] >> pte_access) & 1; + if (mmu_has_mbec(vcpu)) + fault = mbec_permission_fault(vcpu, pte_access, pfec); + else + fault = (mmu->permissions[index] >> pte_access) & 1; WARN_ON_ONCE(pfec & (PFERR_PK_MASK | PFERR_SS_MASK | PFERR_RSVD_MASK)); if (unlikely(mmu->pkru_mask)) { diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index b0eb8d4c5ef2..673f2cebc36c 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -5664,6 +5664,72 @@ void __kvm_mmu_refresh_passthrough_bits(struct kvm_vcpu *vcpu, reset_guest_paging_metadata(vcpu, mmu); } +/* + * Check permissions for MBEC-enabled EPT accesses. + * Handles all permission checks with MBEC awareness (UX/KX distinction). + * + * Returns true if access should fault, false otherwise. + */ +bool mbec_permission_fault(struct kvm_vcpu *vcpu, unsigned int pte_access, + unsigned int pfec) +{ + bool has_ux = pte_access & ACC_USER_EXEC_MASK; + bool has_kx = pte_access & ACC_EXEC_MASK; + bool write_fault = false; + bool fetch_fault = false; + bool read_fault = false; + + /* + * Fault conditions: + * - Write fault: pfec has WRITE_MASK set but pte_access lacks + * WRITE permission + * - Fetch fault: pfec has FETCH_MASK set but pte_access lacks + * matching execute permission. For MBEC, checks both guest PTE + * U/S bits and CPL, both are additive: + * * If neither UX nor KX is set: + * always fault (no execute permission at all) + * * User fetch (guest PTE user OR CPL > 0): + * requires UX permission (has_ux) + * * Kernel fetch (guest PTE supervisor AND CPL = 0): + * requires KX permission (has_kx) + * - Read fault: pfec has USER_MASK set (read access in EPT + * context) but pte_access lacks read permission + * + * Note: In EPT context, PFERR_USER_MASK indicates read access, + * not user-mode access. This is different from regular paging + * where PFERR_USER_MASK means user-mode (CPL=3). + * ACC_USER_MASK in EPT context maps to VMX_EPT_READABLE_MASK + * (bit 0), the readable permission. + */ + + /* Check write permission independently */ + if (pfec & PFERR_WRITE_MASK) + write_fault = !(pte_access & ACC_WRITE_MASK); + + /* Check fetch permission independently */ + if (pfec & PFERR_FETCH_MASK) { + /* + * For MBEC, check execute permissions. A fetch faults if: + * - User fetch (guest PTE user OR CPL > 0) lacks UX permission + * - Kernel fetch (guest PTE supervisor AND CPL = 0) lacks KX permission + */ + bool is_user_fetch = (pfec & PFERR_USER_FETCH_MASK) || + (kvm_x86_call(get_cpl)(vcpu) > 0); + + /* + * A user-mode fetch requires user-execute permission (UX). + * A kernel-mode fetch requires kernel-execute permission (KX). + */ + fetch_fault = is_user_fetch ? !has_ux : !has_kx; + } + + /* Check read permission: PFERR_USER_MASK indicates read in EPT */ + if (pfec & PFERR_USER_MASK) + read_fault = !(pte_access & ACC_USER_MASK); + + return write_fault || fetch_fault || read_fault; +} + static inline int kvm_mmu_get_tdp_level(struct kvm_vcpu *vcpu) { int maxpa; diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h index 74fb1fe60d89..cb94f039898d 100644 --- a/arch/x86/kvm/mmu/spte.h +++ b/arch/x86/kvm/mmu/spte.h @@ -383,14 +383,18 @@ static inline bool is_executable_pte_fault(u64 spte, */ if (WARN_ON_ONCE(!shadow_x_mask)) return fault->user || !(spte & shadow_user_mask); - /* - * For TDP MMU, the fault->user bit indicates a read access, - * not the guest's CPL. For execute faults, check both execute - * permissions since we don't know the actual CPL. + * For TDP MMU, fault->user indicates a read access, not CPL. + * For execute faults, we don't know the CPL here, so we can't + * definitively check permissions. Being optimistic and checking + * for any execute permission can lead to infinite fault loops + * if the wrong type of execute permission is present (e.g. UX + * only for a kernel fetch). The safe approach is to be + * pessimistic and return false, forcing the fault to the slow + * path which can do a full permission check. */ if (fault->is_tdp) - return spte & (shadow_x_mask | shadow_ux_mask); + return false; return spte & (fault->user ? shadow_ux_mask : shadow_x_mask); } diff --git a/arch/x86/kvm/vmx/common.h b/arch/x86/kvm/vmx/common.h index adf925500b9e..96bdca78696d 100644 --- a/arch/x86/kvm/vmx/common.h +++ b/arch/x86/kvm/vmx/common.h @@ -83,6 +83,7 @@ static inline bool vt_is_tdx_private_gpa(struct kvm *kvm, gpa_t gpa) static inline int __vmx_handle_ept_violation(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned long exit_qualification) { + unsigned long rwx_mask; u64 error_code; /* Is it a read fault? */ @@ -92,16 +93,17 @@ static inline int __vmx_handle_ept_violation(struct kvm_vcpu *vcpu, gpa_t gpa, error_code |= (exit_qualification & EPT_VIOLATION_ACC_WRITE) ? PFERR_WRITE_MASK : 0; /* Is it a fetch fault? */ - error_code |= (exit_qualification & EPT_VIOLATION_ACC_INSTR) - ? PFERR_FETCH_MASK : 0; - /* - * ept page table entry is present? - * note: unconditionally clear USER_EXEC until mode-based - * execute control is implemented - */ - error_code |= (exit_qualification & - (EPT_VIOLATION_PROT_MASK & ~EPT_VIOLATION_PROT_USER_EXEC)) - ? PFERR_PRESENT_MASK : 0; + if (exit_qualification & EPT_VIOLATION_ACC_INSTR) { + error_code |= PFERR_FETCH_MASK; + if (mmu_has_mbec(vcpu) && + exit_qualification & EPT_VIOLATION_PROT_USER_EXEC) + error_code |= PFERR_USER_FETCH_MASK; + } + /* ept page table entry is present? */ + rwx_mask = EPT_VIOLATION_PROT_MASK; + if (mmu_has_mbec(vcpu)) + rwx_mask |= EPT_VIOLATION_PROT_USER_EXEC; + error_code |= (exit_qualification & rwx_mask) ? PFERR_PRESENT_MASK : 0; if (exit_qualification & EPT_VIOLATION_GVA_IS_VALID) error_code |= (exit_qualification & EPT_VIOLATION_GVA_TRANSLATED) ? -- 2.43.0