Handle Machine Checks (#MC) that happen on VM-Enter (VMX or TDX) outside of KVM's fastpath so that as much host state as possible is re-loaded before invoking the kernel's #MC handler. The only requirement is that KVM invokes the #MC handler before enabling IRQs (and even that could _probably_ be related to handling #MCs before enabling preemption). Waiting to handle #MCs until "more" host state is loaded hardens KVM against flaws in the #MC handler, which has historically been quite brittle. E.g. prior to commit 5567d11c21a1 ("x86/mce: Send #MC singal from task work"), the #MC code could trigger a schedule() with IRQs and preemption disabled. That led to a KVM hack-a-fix in commit 1811d979c716 ("x86/kvm: move kvm_load/put_guest_xcr0 into atomic context"). Signed-off-by: Sean Christopherson --- arch/x86/kvm/vmx/main.c | 13 ++++++++++++- arch/x86/kvm/vmx/tdx.c | 3 --- arch/x86/kvm/vmx/vmx.c | 3 --- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c index 0eb2773b2ae2..1beaec5b9727 100644 --- a/arch/x86/kvm/vmx/main.c +++ b/arch/x86/kvm/vmx/main.c @@ -608,6 +608,17 @@ static void vt_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa, vmx_load_mmu_pgd(vcpu, root_hpa, pgd_level); } +static void vt_handle_exit_irqoff(struct kvm_vcpu *vcpu) +{ + if (unlikely((u16)vmx_get_exit_reason(vcpu).basic == EXIT_REASON_MCE_DURING_VMENTRY)) + kvm_machine_check(); + + if (is_td_vcpu(vcpu)) + return; + + return vmx_handle_exit_irqoff(vcpu); +} + static void vt_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) { if (is_td_vcpu(vcpu)) @@ -969,7 +980,7 @@ struct kvm_x86_ops vt_x86_ops __initdata = { .load_mmu_pgd = vt_op(load_mmu_pgd), .check_intercept = vmx_check_intercept, - .handle_exit_irqoff = vmx_handle_exit_irqoff, + .handle_exit_irqoff = vt_op(handle_exit_irqoff), .update_cpu_dirty_logging = vt_op(update_cpu_dirty_logging), diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c index 326db9b9c567..a2f6ba3268d1 100644 --- a/arch/x86/kvm/vmx/tdx.c +++ b/arch/x86/kvm/vmx/tdx.c @@ -1069,9 +1069,6 @@ fastpath_t tdx_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags) if (unlikely((tdx->vp_enter_ret & TDX_SW_ERROR) == TDX_SW_ERROR)) return EXIT_FASTPATH_NONE; - if (unlikely(vmx_get_exit_reason(vcpu).basic == EXIT_REASON_MCE_DURING_VMENTRY)) - kvm_machine_check(); - trace_kvm_exit(vcpu, KVM_ISA_VMX); if (unlikely(tdx_failed_vmentry(vcpu))) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 1021d3b65ea0..123dae8cf46b 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7527,9 +7527,6 @@ fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags) if (unlikely(vmx->fail)) return EXIT_FASTPATH_NONE; - if (unlikely((u16)vmx_get_exit_reason(vcpu).basic == EXIT_REASON_MCE_DURING_VMENTRY)) - kvm_machine_check(); - trace_kvm_exit(vcpu, KVM_ISA_VMX); if (unlikely(vmx_get_exit_reason(vcpu).failed_vmentry)) -- 2.51.1.930.gacf6e81ea2-goog