Move the VMXON setup from the KVM initialization path to the CPU startup phase to guarantee that hardware virtualization is enabled early and without interruption. As a result, KVM, often loaded as a kernel module, no longer needs to worry about whether or not VMXON has been executed on a CPU (e.g., CPU offline events or system reboots while KVM is loading). Signed-off-by: Xin Li (Intel) --- arch/x86/include/asm/processor.h | 1 + arch/x86/include/asm/vmx.h | 5 ++ arch/x86/kernel/cpu/common.c | 91 ++++++++++++++++++++++++ arch/x86/kvm/vmx/vmcs.h | 5 -- arch/x86/kvm/vmx/vmx.c | 117 ++----------------------------- arch/x86/power/cpu.c | 7 +- 6 files changed, 107 insertions(+), 119 deletions(-) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index bde58f6510ac..59660428f46d 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -230,6 +230,7 @@ void init_cpu_devs(void); void get_cpu_vendor(struct cpuinfo_x86 *c); extern void early_cpu_init(void); extern void identify_secondary_cpu(unsigned int cpu); +extern void cpu_enable_virtualization(void); extern void print_cpu_info(struct cpuinfo_x86 *); void print_cpu_msr(struct cpuinfo_x86 *); diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index cca7d6641287..736d04c1b2fc 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -20,6 +20,11 @@ #include #include +struct vmcs_hdr { + u32 revision_id:31; + u32 shadow_vmcs:1; +}; + #define VMCS_CONTROL_BIT(x) BIT(VMX_FEATURE_##x & 0x1f) /* diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 34a054181c4d..e36877b5a240 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -72,6 +72,7 @@ #include #include #include +#include #include "cpu.h" @@ -1923,6 +1924,84 @@ static void generic_identify(struct cpuinfo_x86 *c) #endif } +static bool is_vmx_supported(void) +{ + int cpu = raw_smp_processor_id(); + + if (!(cpuid_ecx(1) & (1 << (X86_FEATURE_VMX & 31)))) { + /* May not be an Intel CPU */ + pr_info("VMX not supported by CPU%d\n", cpu); + return false; + } + + if (!this_cpu_has(X86_FEATURE_MSR_IA32_FEAT_CTL) || + !this_cpu_has(X86_FEATURE_VMX)) { + pr_err("VMX not enabled (by BIOS) in MSR_IA32_FEAT_CTL on CPU%d\n", cpu); + return false; + } + + return true; +} + +/* IA-32 SDM Vol 3B: VMCS size is never greater than 4kB. */ +union vmxon_vmcs { + struct vmcs_hdr hdr; + char data[PAGE_SIZE]; +}; + +static DEFINE_PER_CPU_PAGE_ALIGNED(union vmxon_vmcs, vmxon_vmcs); + +/* + * Executed during the CPU startup phase to execute VMXON to enable VMX. This + * ensures that KVM, often loaded as a kernel module, no longer needs to worry + * about whether or not VMXON has been executed on a CPU (e.g., CPU offline + * events or system reboots while KVM is loading). + * + * VMXON is not expected to fault, but fault handling is kept as a precaution + * against any unexpected code paths that might trigger it and can be removed + * later if unnecessary. + */ +void cpu_enable_virtualization(void) +{ + u64 vmxon_pointer = __pa(this_cpu_ptr(&vmxon_vmcs)); + int cpu = raw_smp_processor_id(); + u64 basic_msr; + + if (!is_vmx_supported()) + return; + + if (cr4_read_shadow() & X86_CR4_VMXE) { + pr_err("VMX already enabled on CPU%d\n", cpu); + return; + } + + memset(this_cpu_ptr(&vmxon_vmcs), 0, PAGE_SIZE); + + /* + * Even though not explicitly documented by TLFS, VMXArea passed as + * VMXON argument should still be marked with revision_id reported by + * physical CPU. + */ + rdmsrq(MSR_IA32_VMX_BASIC, basic_msr); + this_cpu_ptr(&vmxon_vmcs)->hdr.revision_id = vmx_basic_vmcs_revision_id(basic_msr); + + intel_pt_handle_vmx(1); + + cr4_set_bits(X86_CR4_VMXE); + + asm goto("1: vmxon %[vmxon_pointer]\n\t" + _ASM_EXTABLE(1b, %l[fault]) + : : [vmxon_pointer] "m"(vmxon_pointer) + : : fault); + + return; + +fault: + pr_err("VMXON faulted on CPU%d\n", cpu); + cr4_clear_bits(X86_CR4_VMXE); + intel_pt_handle_vmx(0); +} + /* * This does the hard work of actually picking apart the CPU stuff... */ @@ -2120,6 +2199,12 @@ void identify_secondary_cpu(unsigned int cpu) tsx_ap_init(); c->initialized = true; + + /* + * Enable AP virtualization immediately after initializing the per-CPU + * cpuinfo_x86 structure, ensuring that this_cpu_has() operates correctly. + */ + cpu_enable_virtualization(); } void print_cpu_info(struct cpuinfo_x86 *c) @@ -2551,6 +2636,12 @@ void __init arch_cpu_finalize_init(void) *c = boot_cpu_data; c->initialized = true; + /* + * Enable BSP virtualization right after the BSP cpuinfo_x86 structure + * is initialized to ensure this_cpu_has() works as expected. + */ + cpu_enable_virtualization(); + alternative_instructions(); if (IS_ENABLED(CONFIG_X86_64)) { diff --git a/arch/x86/kvm/vmx/vmcs.h b/arch/x86/kvm/vmx/vmcs.h index b25625314658..da5631924432 100644 --- a/arch/x86/kvm/vmx/vmcs.h +++ b/arch/x86/kvm/vmx/vmcs.h @@ -13,11 +13,6 @@ #define ROL16(val, n) ((u16)(((u16)(val) << (n)) | ((u16)(val) >> (16 - (n))))) -struct vmcs_hdr { - u32 revision_id:31; - u32 shadow_vmcs:1; -}; - struct vmcs { struct vmcs_hdr hdr; u32 abort; diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index aa157fe5b7b3..f6742df0c4ff 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -468,7 +468,6 @@ noinline void invept_error(unsigned long ext, u64 eptp) vmx_insn_failed("invept failed: ext=0x%lx eptp=%llx\n", ext, eptp); } -static DEFINE_PER_CPU(struct vmcs *, vmxarea); DEFINE_PER_CPU(struct vmcs *, current_vmcs); /* * We maintain a per-CPU linked-list of VMCS loaded on that CPU. This is needed @@ -2736,43 +2735,14 @@ static int setup_vmcs_config(struct vmcs_config *vmcs_conf, return 0; } -static bool __kvm_is_vmx_supported(void) -{ - int cpu = smp_processor_id(); - - if (!(cpuid_ecx(1) & feature_bit(VMX))) { - pr_err("VMX not supported by CPU %d\n", cpu); - return false; - } - - if (!this_cpu_has(X86_FEATURE_MSR_IA32_FEAT_CTL) || - !this_cpu_has(X86_FEATURE_VMX)) { - pr_err("VMX not enabled (by BIOS) in MSR_IA32_FEAT_CTL on CPU %d\n", cpu); - return false; - } - - return true; -} - -static bool kvm_is_vmx_supported(void) -{ - bool supported; - - migrate_disable(); - supported = __kvm_is_vmx_supported(); - migrate_enable(); - - return supported; -} - int vmx_check_processor_compat(void) { int cpu = raw_smp_processor_id(); struct vmcs_config vmcs_conf; struct vmx_capability vmx_cap; - if (!__kvm_is_vmx_supported()) - return -EIO; + if (!(cr4_read_shadow() & X86_CR4_VMXE)) + return -EOPNOTSUPP; if (setup_vmcs_config(&vmcs_conf, &vmx_cap) < 0) { pr_err("Failed to setup VMCS config on CPU %d\n", cpu); @@ -2787,34 +2757,12 @@ int vmx_check_processor_compat(void) return 0; } -static int kvm_cpu_vmxon(u64 vmxon_pointer) -{ - u64 msr; - - cr4_set_bits(X86_CR4_VMXE); - - asm goto("1: vmxon %[vmxon_pointer]\n\t" - _ASM_EXTABLE(1b, %l[fault]) - : : [vmxon_pointer] "m"(vmxon_pointer) - : : fault); - return 0; - -fault: - WARN_ONCE(1, "VMXON faulted, MSR_IA32_FEAT_CTL (0x3a) = 0x%llx\n", - rdmsrq_safe(MSR_IA32_FEAT_CTL, &msr) ? 0xdeadbeef : msr); - cr4_clear_bits(X86_CR4_VMXE); - - return -EFAULT; -} - int vmx_enable_virtualization_cpu(void) { int cpu = raw_smp_processor_id(); - u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); - int r; - if (cr4_read_shadow() & X86_CR4_VMXE) - return -EBUSY; + if (!(cr4_read_shadow() & X86_CR4_VMXE)) + return -EOPNOTSUPP; /* * This can happen if we hot-added a CPU but failed to allocate @@ -2823,14 +2771,6 @@ int vmx_enable_virtualization_cpu(void) if (kvm_is_using_evmcs() && !hv_get_vp_assist_page(cpu)) return -EFAULT; - intel_pt_handle_vmx(1); - - r = kvm_cpu_vmxon(phys_addr); - if (r) { - intel_pt_handle_vmx(0); - return r; - } - return 0; } @@ -2931,47 +2871,6 @@ int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) return -ENOMEM; } -static void free_kvm_area(void) -{ - int cpu; - - for_each_possible_cpu(cpu) { - free_vmcs(per_cpu(vmxarea, cpu)); - per_cpu(vmxarea, cpu) = NULL; - } -} - -static __init int alloc_kvm_area(void) -{ - int cpu; - - for_each_possible_cpu(cpu) { - struct vmcs *vmcs; - - vmcs = alloc_vmcs_cpu(false, cpu, GFP_KERNEL); - if (!vmcs) { - free_kvm_area(); - return -ENOMEM; - } - - /* - * When eVMCS is enabled, alloc_vmcs_cpu() sets - * vmcs->revision_id to KVM_EVMCS_VERSION instead of - * revision_id reported by MSR_IA32_VMX_BASIC. - * - * However, even though not explicitly documented by - * TLFS, VMXArea passed as VMXON argument should - * still be marked with revision_id reported by - * physical CPU. - */ - if (kvm_is_using_evmcs()) - vmcs->hdr.revision_id = vmx_basic_vmcs_revision_id(vmcs_config.basic); - - per_cpu(vmxarea, cpu) = vmcs; - } - return 0; -} - static void fix_pmode_seg(struct kvm_vcpu *vcpu, int seg, struct kvm_segment *save) { @@ -8204,8 +8103,6 @@ void vmx_hardware_unsetup(void) if (nested) nested_vmx_hardware_unsetup(); - - free_kvm_area(); } void vmx_vm_destroy(struct kvm *kvm) @@ -8499,10 +8396,6 @@ __init int vmx_hardware_setup(void) vmx_set_cpu_caps(); - r = alloc_kvm_area(); - if (r && nested) - nested_vmx_hardware_unsetup(); - kvm_set_posted_intr_wakeup_handler(pi_wakeup_handler); /* @@ -8554,7 +8447,7 @@ int __init vmx_init(void) KVM_SANITY_CHECK_VM_STRUCT_SIZE(kvm_vmx); - if (!kvm_is_vmx_supported()) + if (!(cr4_read_shadow() & X86_CR4_VMXE)) return -EOPNOTSUPP; /* diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 916441f5e85c..0eec314b79c2 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -206,11 +206,11 @@ static void notrace __restore_processor_state(struct saved_context *ctxt) /* cr4 was introduced in the Pentium CPU */ #ifdef CONFIG_X86_32 if (ctxt->cr4) - __write_cr4(ctxt->cr4); + __write_cr4(ctxt->cr4 & ~X86_CR4_VMXE); #else /* CONFIG X86_64 */ wrmsrq(MSR_EFER, ctxt->efer); - __write_cr4(ctxt->cr4); + __write_cr4(ctxt->cr4 & ~X86_CR4_VMXE); #endif write_cr3(ctxt->cr3); write_cr2(ctxt->cr2); @@ -291,6 +291,9 @@ static void notrace __restore_processor_state(struct saved_context *ctxt) * because some of the MSRs are "emulated" in microcode. */ msr_restore_context(ctxt); + + if (ctxt->cr4 & X86_CR4_VMXE) + cpu_enable_virtualization(); } /* Needed by apm.c */ -- 2.51.0