Move the VMXON setup from the KVM initialization path to the CPU startup phase to guarantee that hardware virtualization is enabled early and without interruption. As a result, KVM, often loaded as a kernel module, no longer needs to worry about whether or not VMXON has been executed on a CPU (e.g., CPU offline events or system reboots while KVM is loading). Signed-off-by: Xin Li (Intel) --- arch/x86/include/asm/processor.h | 1 + arch/x86/include/asm/vmx.h | 5 ++ arch/x86/kernel/cpu/common.c | 91 ++++++++++++++++++++++++ arch/x86/kvm/vmx/vmcs.h | 5 -- arch/x86/kvm/vmx/vmx.c | 117 ++----------------------------- arch/x86/power/cpu.c | 7 +- 6 files changed, 107 insertions(+), 119 deletions(-) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index bde58f6510ac..59660428f46d 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -230,6 +230,7 @@ void init_cpu_devs(void); void get_cpu_vendor(struct cpuinfo_x86 *c); extern void early_cpu_init(void); extern void identify_secondary_cpu(unsigned int cpu); +extern void cpu_enable_virtualization(void); extern void print_cpu_info(struct cpuinfo_x86 *); void print_cpu_msr(struct cpuinfo_x86 *); diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index cca7d6641287..736d04c1b2fc 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -20,6 +20,11 @@ #include #include +struct vmcs_hdr { + u32 revision_id:31; + u32 shadow_vmcs:1; +}; + #define VMCS_CONTROL_BIT(x) BIT(VMX_FEATURE_##x & 0x1f) /* diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 34a054181c4d..e36877b5a240 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -72,6 +72,7 @@ #include #include #include +#include #include "cpu.h" @@ -1923,6 +1924,84 @@ static void generic_identify(struct cpuinfo_x86 *c) #endif } +static bool is_vmx_supported(void) +{ + int cpu = raw_smp_processor_id(); + + if (!(cpuid_ecx(1) & (1 << (X86_FEATURE_VMX & 31)))) { + /* May not be an Intel CPU */ + pr_info("VMX not supported by CPU%d\n", cpu); + return false; + } + + if (!this_cpu_has(X86_FEATURE_MSR_IA32_FEAT_CTL) || + !this_cpu_has(X86_FEATURE_VMX)) { + pr_err("VMX not enabled (by BIOS) in MSR_IA32_FEAT_CTL on CPU%d\n", cpu); + return false; + } + + return true; +} + +/* IA-32 SDM Vol 3B: VMCS size is never greater than 4kB. */ +union vmxon_vmcs { + struct vmcs_hdr hdr; + char data[PAGE_SIZE]; +}; + +static DEFINE_PER_CPU_PAGE_ALIGNED(union vmxon_vmcs, vmxon_vmcs); + +/* + * Executed during the CPU startup phase to execute VMXON to enable VMX. This + * ensures that KVM, often loaded as a kernel module, no longer needs to worry + * about whether or not VMXON has been executed on a CPU (e.g., CPU offline + * events or system reboots while KVM is loading). + * + * VMXON is not expected to fault, but fault handling is kept as a precaution + * against any unexpected code paths that might trigger it and can be removed + * later if unnecessary. + */ +void cpu_enable_virtualization(void) +{ + u64 vmxon_pointer = __pa(this_cpu_ptr(&vmxon_vmcs)); + int cpu = raw_smp_processor_id(); + u64 basic_msr; + + if (!is_vmx_supported()) + return; + + if (cr4_read_shadow() & X86_CR4_VMXE) { + pr_err("VMX already enabled on CPU%d\n", cpu); + return; + } + + memset(this_cpu_ptr(&vmxon_vmcs), 0, PAGE_SIZE); + + /* + * Even though not explicitly documented by TLFS, VMXArea passed as + * VMXON argument should still be marked with revision_id reported by + * physical CPU. + */ + rdmsrq(MSR_IA32_VMX_BASIC, basic_msr); + this_cpu_ptr(&vmxon_vmcs)->hdr.revision_id = vmx_basic_vmcs_revision_id(basic_msr); + + intel_pt_handle_vmx(1); + + cr4_set_bits(X86_CR4_VMXE); + + asm goto("1: vmxon %[vmxon_pointer]\n\t" + _ASM_EXTABLE(1b, %l[fault]) + : : [vmxon_pointer] "m"(vmxon_pointer) + : : fault); + + return; + +fault: + pr_err("VMXON faulted on CPU%d\n", cpu); + cr4_clear_bits(X86_CR4_VMXE); + intel_pt_handle_vmx(0); +} + /* * This does the hard work of actually picking apart the CPU stuff... */ @@ -2120,6 +2199,12 @@ void identify_secondary_cpu(unsigned int cpu) tsx_ap_init(); c->initialized = true; + + /* + * Enable AP virtualization immediately after initializing the per-CPU + * cpuinfo_x86 structure, ensuring that this_cpu_has() operates correctly. + */ + cpu_enable_virtualization(); } void print_cpu_info(struct cpuinfo_x86 *c) @@ -2551,6 +2636,12 @@ void __init arch_cpu_finalize_init(void) *c = boot_cpu_data; c->initialized = true; + /* + * Enable BSP virtualization right after the BSP cpuinfo_x86 structure + * is initialized to ensure this_cpu_has() works as expected. + */ + cpu_enable_virtualization(); + alternative_instructions(); if (IS_ENABLED(CONFIG_X86_64)) { diff --git a/arch/x86/kvm/vmx/vmcs.h b/arch/x86/kvm/vmx/vmcs.h index b25625314658..da5631924432 100644 --- a/arch/x86/kvm/vmx/vmcs.h +++ b/arch/x86/kvm/vmx/vmcs.h @@ -13,11 +13,6 @@ #define ROL16(val, n) ((u16)(((u16)(val) << (n)) | ((u16)(val) >> (16 - (n))))) -struct vmcs_hdr { - u32 revision_id:31; - u32 shadow_vmcs:1; -}; - struct vmcs { struct vmcs_hdr hdr; u32 abort; diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index aa157fe5b7b3..f6742df0c4ff 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -468,7 +468,6 @@ noinline void invept_error(unsigned long ext, u64 eptp) vmx_insn_failed("invept failed: ext=0x%lx eptp=%llx\n", ext, eptp); } -static DEFINE_PER_CPU(struct vmcs *, vmxarea); DEFINE_PER_CPU(struct vmcs *, current_vmcs); /* * We maintain a per-CPU linked-list of VMCS loaded on that CPU. This is needed @@ -2736,43 +2735,14 @@ static int setup_vmcs_config(struct vmcs_config *vmcs_conf, return 0; } -static bool __kvm_is_vmx_supported(void) -{ - int cpu = smp_processor_id(); - - if (!(cpuid_ecx(1) & feature_bit(VMX))) { - pr_err("VMX not supported by CPU %d\n", cpu); - return false; - } - - if (!this_cpu_has(X86_FEATURE_MSR_IA32_FEAT_CTL) || - !this_cpu_has(X86_FEATURE_VMX)) { - pr_err("VMX not enabled (by BIOS) in MSR_IA32_FEAT_CTL on CPU %d\n", cpu); - return false; - } - - return true; -} - -static bool kvm_is_vmx_supported(void) -{ - bool supported; - - migrate_disable(); - supported = __kvm_is_vmx_supported(); - migrate_enable(); - - return supported; -} - int vmx_check_processor_compat(void) { int cpu = raw_smp_processor_id(); struct vmcs_config vmcs_conf; struct vmx_capability vmx_cap; - if (!__kvm_is_vmx_supported()) - return -EIO; + if (!(cr4_read_shadow() & X86_CR4_VMXE)) + return -EOPNOTSUPP; if (setup_vmcs_config(&vmcs_conf, &vmx_cap) < 0) { pr_err("Failed to setup VMCS config on CPU %d\n", cpu); @@ -2787,34 +2757,12 @@ int vmx_check_processor_compat(void) return 0; } -static int kvm_cpu_vmxon(u64 vmxon_pointer) -{ - u64 msr; - - cr4_set_bits(X86_CR4_VMXE); - - asm goto("1: vmxon %[vmxon_pointer]\n\t" - _ASM_EXTABLE(1b, %l[fault]) - : : [vmxon_pointer] "m"(vmxon_pointer) - : : fault); - return 0; - -fault: - WARN_ONCE(1, "VMXON faulted, MSR_IA32_FEAT_CTL (0x3a) = 0x%llx\n", - rdmsrq_safe(MSR_IA32_FEAT_CTL, &msr) ? 0xdeadbeef : msr); - cr4_clear_bits(X86_CR4_VMXE); - - return -EFAULT; -} - int vmx_enable_virtualization_cpu(void) { int cpu = raw_smp_processor_id(); - u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); - int r; - if (cr4_read_shadow() & X86_CR4_VMXE) - return -EBUSY; + if (!(cr4_read_shadow() & X86_CR4_VMXE)) + return -EOPNOTSUPP; /* * This can happen if we hot-added a CPU but failed to allocate @@ -2823,14 +2771,6 @@ int vmx_enable_virtualization_cpu(void) if (kvm_is_using_evmcs() && !hv_get_vp_assist_page(cpu)) return -EFAULT; - intel_pt_handle_vmx(1); - - r = kvm_cpu_vmxon(phys_addr); - if (r) { - intel_pt_handle_vmx(0); - return r; - } - return 0; } @@ -2931,47 +2871,6 @@ int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) return -ENOMEM; } -static void free_kvm_area(void) -{ - int cpu; - - for_each_possible_cpu(cpu) { - free_vmcs(per_cpu(vmxarea, cpu)); - per_cpu(vmxarea, cpu) = NULL; - } -} - -static __init int alloc_kvm_area(void) -{ - int cpu; - - for_each_possible_cpu(cpu) { - struct vmcs *vmcs; - - vmcs = alloc_vmcs_cpu(false, cpu, GFP_KERNEL); - if (!vmcs) { - free_kvm_area(); - return -ENOMEM; - } - - /* - * When eVMCS is enabled, alloc_vmcs_cpu() sets - * vmcs->revision_id to KVM_EVMCS_VERSION instead of - * revision_id reported by MSR_IA32_VMX_BASIC. - * - * However, even though not explicitly documented by - * TLFS, VMXArea passed as VMXON argument should - * still be marked with revision_id reported by - * physical CPU. - */ - if (kvm_is_using_evmcs()) - vmcs->hdr.revision_id = vmx_basic_vmcs_revision_id(vmcs_config.basic); - - per_cpu(vmxarea, cpu) = vmcs; - } - return 0; -} - static void fix_pmode_seg(struct kvm_vcpu *vcpu, int seg, struct kvm_segment *save) { @@ -8204,8 +8103,6 @@ void vmx_hardware_unsetup(void) if (nested) nested_vmx_hardware_unsetup(); - - free_kvm_area(); } void vmx_vm_destroy(struct kvm *kvm) @@ -8499,10 +8396,6 @@ __init int vmx_hardware_setup(void) vmx_set_cpu_caps(); - r = alloc_kvm_area(); - if (r && nested) - nested_vmx_hardware_unsetup(); - kvm_set_posted_intr_wakeup_handler(pi_wakeup_handler); /* @@ -8554,7 +8447,7 @@ int __init vmx_init(void) KVM_SANITY_CHECK_VM_STRUCT_SIZE(kvm_vmx); - if (!kvm_is_vmx_supported()) + if (!(cr4_read_shadow() & X86_CR4_VMXE)) return -EOPNOTSUPP; /* diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 916441f5e85c..0eec314b79c2 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -206,11 +206,11 @@ static void notrace __restore_processor_state(struct saved_context *ctxt) /* cr4 was introduced in the Pentium CPU */ #ifdef CONFIG_X86_32 if (ctxt->cr4) - __write_cr4(ctxt->cr4); + __write_cr4(ctxt->cr4 & ~X86_CR4_VMXE); #else /* CONFIG X86_64 */ wrmsrq(MSR_EFER, ctxt->efer); - __write_cr4(ctxt->cr4); + __write_cr4(ctxt->cr4 & ~X86_CR4_VMXE); #endif write_cr3(ctxt->cr3); write_cr2(ctxt->cr2); @@ -291,6 +291,9 @@ static void notrace __restore_processor_state(struct saved_context *ctxt) * because some of the MSRs are "emulated" in microcode. */ msr_restore_context(ctxt); + + if (ctxt->cr4 & X86_CR4_VMXE) + cpu_enable_virtualization(); } /* Needed by apm.c */ -- 2.51.0 Relocate VMXOFF from the KVM module unload path to the CPU shutdown phase. This simplifies proper virtualization cleanup during system shutdown, CPU hotplug (online/offline cycles), and suspend-to-disk (S4) transitions. Since INIT interrupts are blocked during VMX operation, VMXOFF must run just before a CPU shuts down to allow it to be brought back online later. As a result, VMX instructions are no longer expected to fault. Signed-off-by: Xin Li (Intel) --- arch/x86/include/asm/processor.h | 1 + arch/x86/kernel/cpu/common.c | 37 ++++++++++++++++++++++++++++++++ arch/x86/kernel/crash.c | 4 ++++ arch/x86/kernel/process.c | 3 +++ arch/x86/kernel/reboot.c | 11 ++++++---- arch/x86/kernel/smp.c | 5 +++++ arch/x86/kernel/smpboot.c | 6 ++++++ arch/x86/kvm/vmx/vmx.c | 30 -------------------------- arch/x86/power/cpu.c | 3 +++ 9 files changed, 66 insertions(+), 34 deletions(-) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 59660428f46d..0bfd4eb1e9e2 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -231,6 +231,7 @@ void get_cpu_vendor(struct cpuinfo_x86 *c); extern void early_cpu_init(void); extern void identify_secondary_cpu(unsigned int cpu); extern void cpu_enable_virtualization(void); +extern void cpu_disable_virtualization(void); extern void print_cpu_info(struct cpuinfo_x86 *); void print_cpu_msr(struct cpuinfo_x86 *); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index e36877b5a240..39b9be9a2fb1 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -2002,6 +2002,43 @@ void cpu_enable_virtualization(void) intel_pt_handle_vmx(0); } +/* + * Because INIT interrupts are blocked during VMX operation, this function + * must be called just before a CPU shuts down to ensure it can be brought + * back online later. + * + * Consequently, VMX instructions are no longer expected to fault. + * + * Although VMXOFF should not fault, fault handling is retained as a + * precaution against any unexpected code paths that might trigger it and + * can be removed later if unnecessary. + */ +void cpu_disable_virtualization(void) +{ + int cpu = raw_smp_processor_id(); + + if (!is_vmx_supported()) + return; + + if (!(cr4_read_shadow() & X86_CR4_VMXE)) { + pr_err("VMX not enabled or already disabled on CPU%d\n", cpu); + return; + } + + asm goto("1: vmxoff\n\t" + _ASM_EXTABLE(1b, %l[fault]) + ::: "cc", "memory" : fault); + +exit: + cr4_clear_bits(X86_CR4_VMXE); + intel_pt_handle_vmx(0); + return; + +fault: + pr_err("VMXOFF faulted on CPU%d\n", cpu); + goto exit; +} + /* * This does the hard work of actually picking apart the CPU stuff... */ diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index c6b12bed173d..772c6d350b50 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -111,6 +111,7 @@ void native_machine_crash_shutdown(struct pt_regs *regs) crash_smp_send_stop(); + /* Kept to VMCLEAR loaded VMCSs */ cpu_emergency_disable_virtualization(); /* @@ -141,6 +142,9 @@ void native_machine_crash_shutdown(struct pt_regs *regs) x86_platform.guest.enc_kexec_finish(); crash_save_cpu(regs, smp_processor_id()); + + /* Disable virtualization on the last running CPU, usually the BSP */ + cpu_disable_virtualization(); } #if defined(CONFIG_KEXEC_FILE) || defined(CONFIG_CRASH_HOTPLUG) diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 1b7960cf6eb0..a0f6397b81ab 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -827,6 +827,9 @@ void __noreturn stop_this_cpu(void *dummy) disable_local_APIC(); mcheck_cpu_clear(c); + /* Disable virtualization, usually this is an AP */ + cpu_disable_virtualization(); + /* * Use wbinvd on processors that support SME. This provides support * for performing a successful kexec when going from SME inactive diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 964f6b0a3d68..7433e634018f 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -764,6 +764,9 @@ void native_machine_shutdown(void) if (kexec_in_progress) x86_platform.guest.enc_kexec_finish(); + + /* Disable virtualization on the last running CPU, usually the BSP */ + cpu_disable_virtualization(); } static void __machine_emergency_restart(int emergency) @@ -873,14 +876,14 @@ static int crash_nmi_callback(unsigned int val, struct pt_regs *regs) if (shootdown_callback) shootdown_callback(cpu, regs); - /* - * Prepare the CPU for reboot _after_ invoking the callback so that the - * callback can safely use virtualization instructions, e.g. VMCLEAR. - */ + /* Kept to VMCLEAR loaded VMCSs */ cpu_emergency_disable_virtualization(); atomic_dec(&waiting_for_crash_ipi); + /* Disable virtualization, usually this is an AP */ + cpu_disable_virtualization(); + if (smp_ops.stop_this_cpu) { smp_ops.stop_this_cpu(); BUG(); diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index b014e6d229f9..eb6a389ba1a9 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -124,7 +124,9 @@ static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs) if (raw_smp_processor_id() == atomic_read(&stopping_cpu)) return NMI_HANDLED; + /* Kept to VMCLEAR loaded VMCSs */ cpu_emergency_disable_virtualization(); + stop_this_cpu(NULL); return NMI_HANDLED; @@ -136,7 +138,10 @@ static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs) DEFINE_IDTENTRY_SYSVEC(sysvec_reboot) { apic_eoi(); + + /* Kept to VMCLEAR loaded VMCSs */ cpu_emergency_disable_virtualization(); + stop_this_cpu(NULL); } diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 33e166f6ab12..fe3b04f33b3f 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1229,6 +1229,12 @@ int native_cpu_disable(void) */ apic_soft_disable(); + /* + * IPIs have been disabled as mentioned above, so virtualization + * can now be safely shut down. + */ + cpu_disable_virtualization(); + return 0; } diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index f6742df0c4ff..26af0a8ae08f 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -674,29 +674,6 @@ static int vmx_set_guest_uret_msr(struct vcpu_vmx *vmx, return ret; } -/* - * Disable VMX and clear CR4.VMXE (even if VMXOFF faults) - * - * Note, VMXOFF causes a #UD if the CPU is !post-VMXON, but it's impossible to - * atomically track post-VMXON state, e.g. this may be called in NMI context. - * Eat all faults as all other faults on VMXOFF faults are mode related, i.e. - * faults are guaranteed to be due to the !post-VMXON check unless the CPU is - * magically in RM, VM86, compat mode, or at CPL>0. - */ -static int kvm_cpu_vmxoff(void) -{ - asm goto("1: vmxoff\n\t" - _ASM_EXTABLE(1b, %l[fault]) - ::: "cc", "memory" : fault); - - cr4_clear_bits(X86_CR4_VMXE); - return 0; - -fault: - cr4_clear_bits(X86_CR4_VMXE); - return -EIO; -} - void vmx_emergency_disable_virtualization_cpu(void) { int cpu = raw_smp_processor_id(); @@ -719,8 +696,6 @@ void vmx_emergency_disable_virtualization_cpu(void) if (v->shadow_vmcs) vmcs_clear(v->shadow_vmcs); } - - kvm_cpu_vmxoff(); } static void __loaded_vmcs_clear(void *arg) @@ -2788,12 +2763,7 @@ void vmx_disable_virtualization_cpu(void) { vmclear_local_loaded_vmcss(); - if (kvm_cpu_vmxoff()) - kvm_spurious_fault(); - hv_reset_evmcs(); - - intel_pt_handle_vmx(0); } struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu, gfp_t flags) diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 0eec314b79c2..d2c865fdb069 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -129,6 +129,9 @@ static void __save_processor_state(struct saved_context *ctxt) ctxt->misc_enable_saved = !rdmsrq_safe(MSR_IA32_MISC_ENABLE, &ctxt->misc_enable); msr_save_context(ctxt); + + /* Now CR4 is saved, disable VMX and clear CR4.VMXE */ + cpu_disable_virtualization(); } /* Needed by apm.c */ -- 2.51.0 Relocate the VMCLEAR of VMCSs from KVM to cpu_disable_virtualization() in x86. This eliminates the need to call cpu_emergency_disable_virtualization() before cpu_disable_virtualization() and prepares for removing the emergency reboot callback that calls into KVM from the CPU reboot path. Signed-off-by: Xin Li (Intel) --- arch/x86/include/asm/processor.h | 1 + arch/x86/kernel/cpu/common.c | 34 ++++++++++++++++++++++++++++++++ arch/x86/kernel/crash.c | 3 --- arch/x86/kernel/reboot.c | 7 +++---- arch/x86/kernel/smp.c | 6 ------ arch/x86/kvm/vmx/vmcs.h | 5 ++++- arch/x86/kvm/vmx/vmx.c | 34 +++----------------------------- 7 files changed, 45 insertions(+), 45 deletions(-) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 0bfd4eb1e9e2..d8a28c57248d 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -230,6 +230,7 @@ void init_cpu_devs(void); void get_cpu_vendor(struct cpuinfo_x86 *c); extern void early_cpu_init(void); extern void identify_secondary_cpu(unsigned int cpu); +extern struct list_head* get_loaded_vmcss_on_cpu(int cpu); extern void cpu_enable_virtualization(void); extern void cpu_disable_virtualization(void); extern void print_cpu_info(struct cpuinfo_x86 *); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 39b9be9a2fb1..73abacf57ed4 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1950,6 +1950,18 @@ union vmxon_vmcs { }; static DEFINE_PER_CPU_PAGE_ALIGNED(union vmxon_vmcs, vmxon_vmcs); +/* + * We maintain a per-CPU linked-list of VMCS loaded on that CPU. This is needed + * when a CPU is brought down, and we need to VMCLEAR all VMCSs loaded on it. + */ +static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu); + +/* Export an accessor rather than the raw data */ +struct list_head* get_loaded_vmcss_on_cpu(int cpu) +{ + return &per_cpu(loaded_vmcss_on_cpu, cpu); +} +EXPORT_SYMBOL_GPL(get_loaded_vmcss_on_cpu); /* * Executed during the CPU startup phase to execute VMXON to enable VMX. This @@ -1975,6 +1987,8 @@ void cpu_enable_virtualization(void) return; } + INIT_LIST_HEAD(get_loaded_vmcss_on_cpu(cpu)); + memset(this_cpu_ptr(&vmxon_vmcs), 0, PAGE_SIZE); /* @@ -2002,6 +2016,18 @@ void cpu_enable_virtualization(void) intel_pt_handle_vmx(0); } +static __always_inline void vmclear(void *p) +{ + u64 pa = __pa(p); + asm volatile ("vmclear %0" : : "m"(pa) : "cc"); +} + +struct loaded_vmcs_basic { + struct list_head loaded_vmcss_on_cpu_link; + struct vmcs_hdr *vmcs; + struct vmcs_hdr *shadow_vmcs; +}; + /* * Because INIT interrupts are blocked during VMX operation, this function * must be called just before a CPU shuts down to ensure it can be brought @@ -2016,6 +2042,7 @@ void cpu_enable_virtualization(void) void cpu_disable_virtualization(void) { int cpu = raw_smp_processor_id(); + struct loaded_vmcs_basic *v; if (!is_vmx_supported()) return; @@ -2025,6 +2052,13 @@ void cpu_disable_virtualization(void) return; } + list_for_each_entry(v, get_loaded_vmcss_on_cpu(cpu), + loaded_vmcss_on_cpu_link) { + vmclear(v->vmcs); + if (v->shadow_vmcs) + vmclear(v->shadow_vmcs); + } + asm goto("1: vmxoff\n\t" _ASM_EXTABLE(1b, %l[fault]) ::: "cc", "memory" : fault); diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index 772c6d350b50..e5b374587be2 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -111,9 +111,6 @@ void native_machine_crash_shutdown(struct pt_regs *regs) crash_smp_send_stop(); - /* Kept to VMCLEAR loaded VMCSs */ - cpu_emergency_disable_virtualization(); - /* * Disable Intel PT to stop its logging */ diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 7433e634018f..d8c3e2d8481f 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -633,7 +633,7 @@ static void native_machine_emergency_restart(void) unsigned short mode; if (reboot_emergency) - emergency_reboot_disable_virtualization(); + nmi_shootdown_cpus_on_restart(); tboot_shutdown(TB_SHUTDOWN_REBOOT); @@ -876,9 +876,6 @@ static int crash_nmi_callback(unsigned int val, struct pt_regs *regs) if (shootdown_callback) shootdown_callback(cpu, regs); - /* Kept to VMCLEAR loaded VMCSs */ - cpu_emergency_disable_virtualization(); - atomic_dec(&waiting_for_crash_ipi); /* Disable virtualization, usually this is an AP */ @@ -955,6 +952,8 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback) static inline void nmi_shootdown_cpus_on_restart(void) { + local_irq_disable(); + if (!crash_ipi_issued) nmi_shootdown_cpus(NULL); } diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index eb6a389ba1a9..b4f50c88e7e2 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -124,9 +124,6 @@ static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs) if (raw_smp_processor_id() == atomic_read(&stopping_cpu)) return NMI_HANDLED; - /* Kept to VMCLEAR loaded VMCSs */ - cpu_emergency_disable_virtualization(); - stop_this_cpu(NULL); return NMI_HANDLED; @@ -139,9 +136,6 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_reboot) { apic_eoi(); - /* Kept to VMCLEAR loaded VMCSs */ - cpu_emergency_disable_virtualization(); - stop_this_cpu(NULL); } diff --git a/arch/x86/kvm/vmx/vmcs.h b/arch/x86/kvm/vmx/vmcs.h index da5631924432..10cbfd567dec 100644 --- a/arch/x86/kvm/vmx/vmcs.h +++ b/arch/x86/kvm/vmx/vmcs.h @@ -52,8 +52,12 @@ struct vmcs_controls_shadow { * Track a VMCS that may be loaded on a certain CPU. If it is (cpu!=-1), also * remember whether it was VMLAUNCHed, and maintain a linked list of all VMCSs * loaded on this CPU (so we can clear them if the CPU goes down). + * + * Note, the first three members must be a list_head and two pointers, please + * refer to struct loaded_vmcs_basic defined in arch/x86/kernel/cpu/common.c. */ struct loaded_vmcs { + struct list_head loaded_vmcss_on_cpu_link; struct vmcs *vmcs; struct vmcs *shadow_vmcs; int cpu; @@ -65,7 +69,6 @@ struct loaded_vmcs { ktime_t entry_time; s64 vnmi_blocked_time; unsigned long *msr_bitmap; - struct list_head loaded_vmcss_on_cpu_link; struct vmcs_host_state host_state; struct vmcs_controls_shadow controls_shadow; }; diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 26af0a8ae08f..b033288e645a 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -469,11 +469,6 @@ noinline void invept_error(unsigned long ext, u64 eptp) } DEFINE_PER_CPU(struct vmcs *, current_vmcs); -/* - * We maintain a per-CPU linked-list of VMCS loaded on that CPU. This is needed - * when a CPU is brought down, and we need to VMCLEAR all VMCSs loaded on it. - */ -static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu); static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS); static DEFINE_SPINLOCK(vmx_vpid_lock); @@ -676,26 +671,6 @@ static int vmx_set_guest_uret_msr(struct vcpu_vmx *vmx, void vmx_emergency_disable_virtualization_cpu(void) { - int cpu = raw_smp_processor_id(); - struct loaded_vmcs *v; - - kvm_rebooting = true; - - /* - * Note, CR4.VMXE can be _cleared_ in NMI context, but it can only be - * set in task context. If this races with VMX is disabled by an NMI, - * VMCLEAR and VMXOFF may #UD, but KVM will eat those faults due to - * kvm_rebooting set. - */ - if (!(__read_cr4() & X86_CR4_VMXE)) - return; - - list_for_each_entry(v, &per_cpu(loaded_vmcss_on_cpu, cpu), - loaded_vmcss_on_cpu_link) { - vmcs_clear(v->vmcs); - if (v->shadow_vmcs) - vmcs_clear(v->shadow_vmcs); - } } static void __loaded_vmcs_clear(void *arg) @@ -1388,7 +1363,7 @@ void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu) smp_rmb(); list_add(&vmx->loaded_vmcs->loaded_vmcss_on_cpu_link, - &per_cpu(loaded_vmcss_on_cpu, cpu)); + get_loaded_vmcss_on_cpu(cpu)); local_irq_enable(); } @@ -2754,7 +2729,7 @@ static void vmclear_local_loaded_vmcss(void) int cpu = raw_smp_processor_id(); struct loaded_vmcs *v, *n; - list_for_each_entry_safe(v, n, &per_cpu(loaded_vmcss_on_cpu, cpu), + list_for_each_entry_safe(v, n, get_loaded_vmcss_on_cpu(cpu), loaded_vmcss_on_cpu_link) __loaded_vmcs_clear(v); } @@ -8441,11 +8416,8 @@ int __init vmx_init(void) if (r) goto err_l1d_flush; - for_each_possible_cpu(cpu) { - INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu)); - + for_each_possible_cpu(cpu) pi_init_cpu(cpu); - } vmx_check_vmcs12_offsets(); -- 2.51.0 Remove emergency_reboot_disable_virtualization() now that virtualization is disabled after the CPU shuts down its local APIC and just before it powers off. Also remove kvm_arch_{enable,disable}_virtualization() as they are no longer needed. Signed-off-by: Xin Li (Intel) --- arch/x86/include/asm/kvm_host.h | 1 - arch/x86/include/asm/reboot.h | 11 ----- arch/x86/kernel/reboot.c | 72 --------------------------------- arch/x86/kvm/svm/svm.c | 8 ---- arch/x86/kvm/vmx/main.c | 1 - arch/x86/kvm/vmx/vmx.c | 4 -- arch/x86/kvm/x86.c | 10 ----- include/linux/kvm_host.h | 8 ---- virt/kvm/kvm_main.c | 14 ------- 9 files changed, 129 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f19a76d3ca0e..131cd3dfae35 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1697,7 +1697,6 @@ struct kvm_x86_ops { int (*enable_virtualization_cpu)(void); void (*disable_virtualization_cpu)(void); - cpu_emergency_virt_cb *emergency_disable_virtualization_cpu; void (*hardware_unsetup)(void); bool (*has_emulated_msr)(struct kvm *kvm, u32 index); diff --git a/arch/x86/include/asm/reboot.h b/arch/x86/include/asm/reboot.h index ecd58ea9a837..a671a1145906 100644 --- a/arch/x86/include/asm/reboot.h +++ b/arch/x86/include/asm/reboot.h @@ -25,17 +25,6 @@ void __noreturn machine_real_restart(unsigned int type); #define MRR_BIOS 0 #define MRR_APM 1 -typedef void (cpu_emergency_virt_cb)(void); -#if IS_ENABLED(CONFIG_KVM_X86) -void cpu_emergency_register_virt_callback(cpu_emergency_virt_cb *callback); -void cpu_emergency_unregister_virt_callback(cpu_emergency_virt_cb *callback); -void cpu_emergency_disable_virtualization(void); -#else -static inline void cpu_emergency_register_virt_callback(cpu_emergency_virt_cb *callback) {} -static inline void cpu_emergency_unregister_virt_callback(cpu_emergency_virt_cb *callback) {} -static inline void cpu_emergency_disable_virtualization(void) {} -#endif /* CONFIG_KVM_X86 */ - typedef void (*nmi_shootdown_cb)(int, struct pt_regs*); void nmi_shootdown_cpus(nmi_shootdown_cb callback); void run_crash_ipi_callback(struct pt_regs *regs); diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index d8c3e2d8481f..0916dd0ca86f 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -530,78 +530,6 @@ static inline void kb_wait(void) static inline void nmi_shootdown_cpus_on_restart(void); -#if IS_ENABLED(CONFIG_KVM_X86) -/* RCU-protected callback to disable virtualization prior to reboot. */ -static cpu_emergency_virt_cb __rcu *cpu_emergency_virt_callback; - -void cpu_emergency_register_virt_callback(cpu_emergency_virt_cb *callback) -{ - if (WARN_ON_ONCE(rcu_access_pointer(cpu_emergency_virt_callback))) - return; - - rcu_assign_pointer(cpu_emergency_virt_callback, callback); -} -EXPORT_SYMBOL_GPL(cpu_emergency_register_virt_callback); - -void cpu_emergency_unregister_virt_callback(cpu_emergency_virt_cb *callback) -{ - if (WARN_ON_ONCE(rcu_access_pointer(cpu_emergency_virt_callback) != callback)) - return; - - rcu_assign_pointer(cpu_emergency_virt_callback, NULL); - synchronize_rcu(); -} -EXPORT_SYMBOL_GPL(cpu_emergency_unregister_virt_callback); - -/* - * Disable virtualization, i.e. VMX or SVM, to ensure INIT is recognized during - * reboot. VMX blocks INIT if the CPU is post-VMXON, and SVM blocks INIT if - * GIF=0, i.e. if the crash occurred between CLGI and STGI. - */ -void cpu_emergency_disable_virtualization(void) -{ - cpu_emergency_virt_cb *callback; - - /* - * IRQs must be disabled as KVM enables virtualization in hardware via - * function call IPIs, i.e. IRQs need to be disabled to guarantee - * virtualization stays disabled. - */ - lockdep_assert_irqs_disabled(); - - rcu_read_lock(); - callback = rcu_dereference(cpu_emergency_virt_callback); - if (callback) - callback(); - rcu_read_unlock(); -} - -static void emergency_reboot_disable_virtualization(void) -{ - local_irq_disable(); - - /* - * Disable virtualization on all CPUs before rebooting to avoid hanging - * the system, as VMX and SVM block INIT when running in the host. - * - * We can't take any locks and we may be on an inconsistent state, so - * use NMIs as IPIs to tell the other CPUs to disable VMX/SVM and halt. - * - * Do the NMI shootdown even if virtualization is off on _this_ CPU, as - * other CPUs may have virtualization enabled. - */ - if (rcu_access_pointer(cpu_emergency_virt_callback)) { - /* Safely force _this_ CPU out of VMX/SVM operation. */ - cpu_emergency_disable_virtualization(); - - /* Disable VMX/SVM and halt on other CPUs. */ - nmi_shootdown_cpus_on_restart(); - } -} -#else -static void emergency_reboot_disable_virtualization(void) { } -#endif /* CONFIG_KVM_X86 */ - void __attribute__((weak)) mach_reboot_fixups(void) { } diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index d9931c6c4bc6..795e5961c1d9 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -497,13 +497,6 @@ static inline void kvm_cpu_svm_disable(void) } } -static void svm_emergency_disable_virtualization_cpu(void) -{ - kvm_rebooting = true; - - kvm_cpu_svm_disable(); -} - static void svm_disable_virtualization_cpu(void) { /* Make sure we clean up behind us */ @@ -5050,7 +5043,6 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .hardware_unsetup = svm_hardware_unsetup, .enable_virtualization_cpu = svm_enable_virtualization_cpu, .disable_virtualization_cpu = svm_disable_virtualization_cpu, - .emergency_disable_virtualization_cpu = svm_emergency_disable_virtualization_cpu, .has_emulated_msr = svm_has_emulated_msr, .vcpu_create = svm_vcpu_create, diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c index dbab1c15b0cd..ce46b80368c9 100644 --- a/arch/x86/kvm/vmx/main.c +++ b/arch/x86/kvm/vmx/main.c @@ -864,7 +864,6 @@ struct kvm_x86_ops vt_x86_ops __initdata = { .enable_virtualization_cpu = vmx_enable_virtualization_cpu, .disable_virtualization_cpu = vt_op(disable_virtualization_cpu), - .emergency_disable_virtualization_cpu = vmx_emergency_disable_virtualization_cpu, .has_emulated_msr = vt_op(has_emulated_msr), diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index b033288e645a..fdb9bc19f037 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -669,10 +669,6 @@ static int vmx_set_guest_uret_msr(struct vcpu_vmx *vmx, return ret; } -void vmx_emergency_disable_virtualization_cpu(void) -{ -} - static void __loaded_vmcs_clear(void *arg) { struct loaded_vmcs *loaded_vmcs = arg; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 604490b1cb19..8b9f64770684 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -12543,16 +12543,6 @@ void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector) } EXPORT_SYMBOL_GPL(kvm_vcpu_deliver_sipi_vector); -void kvm_arch_enable_virtualization(void) -{ - cpu_emergency_register_virt_callback(kvm_x86_ops.emergency_disable_virtualization_cpu); -} - -void kvm_arch_disable_virtualization(void) -{ - cpu_emergency_unregister_virt_callback(kvm_x86_ops.emergency_disable_virtualization_cpu); -} - int kvm_arch_enable_virtualization_cpu(void) { struct kvm *kvm; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 15656b7fba6c..151305b33bce 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1603,14 +1603,6 @@ static inline void kvm_create_vcpu_debugfs(struct kvm_vcpu *vcpu) {} #endif #ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING -/* - * kvm_arch_{enable,disable}_virtualization() are called on one CPU, under - * kvm_usage_lock, immediately after/before 0=>1 and 1=>0 transitions of - * kvm_usage_count, i.e. at the beginning of the generic hardware enabling - * sequence, and at the end of the generic hardware disabling sequence. - */ -void kvm_arch_enable_virtualization(void); -void kvm_arch_disable_virtualization(void); /* * kvm_arch_{enable,disable}_virtualization_cpu() are called on "every" CPU to * do the actual twiddling of hardware bits. The hooks are called on all diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 6c07dd423458..6e86c6a45a71 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -5566,16 +5566,6 @@ static DEFINE_PER_CPU(bool, virtualization_enabled); static DEFINE_MUTEX(kvm_usage_lock); static int kvm_usage_count; -__weak void kvm_arch_enable_virtualization(void) -{ - -} - -__weak void kvm_arch_disable_virtualization(void) -{ - -} - static int kvm_enable_virtualization_cpu(void) { if (__this_cpu_read(virtualization_enabled)) @@ -5675,8 +5665,6 @@ int kvm_enable_virtualization(void) if (kvm_usage_count++) return 0; - kvm_arch_enable_virtualization(); - r = cpuhp_setup_state(CPUHP_AP_KVM_ONLINE, "kvm/cpu:online", kvm_online_cpu, kvm_offline_cpu); if (r) @@ -5707,7 +5695,6 @@ int kvm_enable_virtualization(void) unregister_syscore_ops(&kvm_syscore_ops); cpuhp_remove_state(CPUHP_AP_KVM_ONLINE); err_cpuhp: - kvm_arch_disable_virtualization(); --kvm_usage_count; return r; } @@ -5722,7 +5709,6 @@ void kvm_disable_virtualization(void) unregister_syscore_ops(&kvm_syscore_ops); cpuhp_remove_state(CPUHP_AP_KVM_ONLINE); - kvm_arch_disable_virtualization(); } EXPORT_SYMBOL_GPL(kvm_disable_virtualization); -- 2.51.0 Drop kvm_rebooting and all related uses. Virtualization is now disabled immediately before a CPU shuts down, eliminating any chance of executing virtualization instructions during reboot. Signed-off-by: Xin Li (Intel) --- arch/x86/kvm/svm/vmenter.S | 42 ++++++++++++-------------------------- arch/x86/kvm/vmx/tdx.c | 4 +--- arch/x86/kvm/vmx/vmenter.S | 2 -- arch/x86/kvm/x86.c | 8 ++------ include/linux/kvm_host.h | 1 - virt/kvm/kvm_main.c | 15 ++------------ 6 files changed, 18 insertions(+), 54 deletions(-) diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S index 235c4af6b692..d530f62679b9 100644 --- a/arch/x86/kvm/svm/vmenter.S +++ b/arch/x86/kvm/svm/vmenter.S @@ -145,7 +145,6 @@ SYM_FUNC_START(__svm_vcpu_run) */ mov SVM_vmcb01_pa(%_ASM_DI), %_ASM_AX 1: vmload %_ASM_AX -2: /* Get svm->current_vmcb->pa into RAX. */ mov SVM_current_vmcb(%_ASM_DI), %_ASM_AX @@ -173,8 +172,8 @@ SYM_FUNC_START(__svm_vcpu_run) VM_CLEAR_CPU_BUFFERS /* Enter guest mode */ -3: vmrun %_ASM_AX -4: +2: vmrun %_ASM_AX + /* Pop @svm to RAX while it's the only available register. */ pop %_ASM_AX @@ -200,13 +199,11 @@ SYM_FUNC_START(__svm_vcpu_run) mov %_ASM_AX, %_ASM_DI mov SVM_vmcb01_pa(%_ASM_DI), %_ASM_AX -5: vmsave %_ASM_AX -6: +3: vmsave %_ASM_AX /* Restores GSBASE among other things, allowing access to percpu data. */ pop %_ASM_AX -7: vmload %_ASM_AX -8: +4: vmload %_ASM_AX /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */ FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_VMEXIT @@ -269,23 +266,12 @@ SYM_FUNC_START(__svm_vcpu_run) RESTORE_GUEST_SPEC_CTRL_BODY RESTORE_HOST_SPEC_CTRL_BODY (%_ASM_SP) -10: cmpb $0, _ASM_RIP(kvm_rebooting) - jne 2b - ud2 -30: cmpb $0, _ASM_RIP(kvm_rebooting) - jne 4b - ud2 -50: cmpb $0, _ASM_RIP(kvm_rebooting) - jne 6b - ud2 -70: cmpb $0, _ASM_RIP(kvm_rebooting) - jne 8b - ud2 - - _ASM_EXTABLE(1b, 10b) - _ASM_EXTABLE(3b, 30b) - _ASM_EXTABLE(5b, 50b) - _ASM_EXTABLE(7b, 70b) +5: ud2 + + _ASM_EXTABLE(1b, 5b) + _ASM_EXTABLE(2b, 5b) + _ASM_EXTABLE(3b, 5b) + _ASM_EXTABLE(4b, 5b) SYM_FUNC_END(__svm_vcpu_run) @@ -343,7 +329,7 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run) /* Enter guest mode */ 1: vmrun %rax -2: + /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */ FILL_RETURN_BUFFER %rax, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_VMEXIT @@ -365,11 +351,9 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run) RESTORE_GUEST_SPEC_CTRL_BODY RESTORE_HOST_SPEC_CTRL_BODY %sil -3: cmpb $0, kvm_rebooting(%rip) - jne 2b - ud2 +2: ud2 - _ASM_EXTABLE(1b, 3b) + _ASM_EXTABLE(1b, 2b) SYM_FUNC_END(__svm_sev_es_vcpu_run) #endif /* CONFIG_KVM_AMD_SEV */ diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c index 66744f5768c8..cfe5f8b63973 100644 --- a/arch/x86/kvm/vmx/tdx.c +++ b/arch/x86/kvm/vmx/tdx.c @@ -2052,10 +2052,8 @@ int tdx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t fastpath) * Handle TDX SW errors, including TDX_SEAMCALL_UD, TDX_SEAMCALL_GP and * TDX_SEAMCALL_VMFAILINVALID. */ - if (unlikely((vp_enter_ret & TDX_SW_ERROR) == TDX_SW_ERROR)) { - KVM_BUG_ON(!kvm_rebooting, vcpu->kvm); + if (unlikely((vp_enter_ret & TDX_SW_ERROR) == TDX_SW_ERROR)) goto unhandled_exit; - } if (unlikely(tdx_failed_vmentry(vcpu))) { /* diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S index 0a6cf5bff2aa..3457b5e1f856 100644 --- a/arch/x86/kvm/vmx/vmenter.S +++ b/arch/x86/kvm/vmx/vmenter.S @@ -293,8 +293,6 @@ SYM_INNER_LABEL_ALIGN(vmx_vmexit, SYM_L_GLOBAL) RET .Lfixup: - cmpb $0, _ASM_RIP(kvm_rebooting) - jne .Lvmfail ud2 .Lvmfail: /* VM-Fail: set return value to 1 */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 8b9f64770684..1abc4550fd76 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -687,15 +687,11 @@ static void drop_user_return_notifiers(void) /* * Handle a fault on a hardware virtualization (VMX or SVM) instruction. - * - * Hardware virtualization extension instructions may fault if a reboot turns - * off virtualization while processes are running. Usually after catching the - * fault we just panic; during reboot instead the instruction is ignored. */ noinstr void kvm_spurious_fault(void) { - /* Fault while not rebooting. We want the trace. */ - BUG_ON(!kvm_rebooting); + /* We want the trace. */ + BUG_ON(true); } EXPORT_SYMBOL_GPL(kvm_spurious_fault); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 151305b33bce..2d9c306db4f0 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -2276,7 +2276,6 @@ static inline bool kvm_check_request(int req, struct kvm_vcpu *vcpu) #ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING extern bool enable_virt_at_load; -extern bool kvm_rebooting; #endif extern unsigned int halt_poll_ns; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 6e86c6a45a71..0037761c1a51 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -5559,9 +5559,6 @@ bool enable_virt_at_load = true; module_param(enable_virt_at_load, bool, 0444); EXPORT_SYMBOL_GPL(enable_virt_at_load); -__visible bool kvm_rebooting; -EXPORT_SYMBOL_GPL(kvm_rebooting); - static DEFINE_PER_CPU(bool, virtualization_enabled); static DEFINE_MUTEX(kvm_usage_lock); static int kvm_usage_count; @@ -5610,18 +5607,10 @@ static int kvm_offline_cpu(unsigned int cpu) static void kvm_shutdown(void) { /* - * Disable hardware virtualization and set kvm_rebooting to indicate - * that KVM has asynchronously disabled hardware virtualization, i.e. - * that relevant errors and exceptions aren't entirely unexpected. - * Some flavors of hardware virtualization need to be disabled before - * transferring control to firmware (to perform shutdown/reboot), e.g. - * on x86, virtualization can block INIT interrupts, which are used by - * firmware to pull APs back under firmware control. Note, this path - * is used for both shutdown and reboot scenarios, i.e. neither name is - * 100% comprehensive. + * Note, this path is used for both shutdown and reboot scenarios, i.e. + * neither name is 100% comprehensive. */ pr_info("kvm: exiting hardware virtualization\n"); - kvm_rebooting = true; on_each_cpu(kvm_disable_virtualization_cpu, NULL, 1); } -- 2.51.0