Allocate the root VMCS (misleading called "vmxarea" and "kvm_area" in KVM) for each possible CPU during early boot CPU bringup, before early TDX initialization, so that TDX can eventually do VMXON on-demand (to make SEAMCALLs) without needing to load kvm-intel.ko. Allocate the pages early on, e.g. instead of trying to do so on-demand, to avoid having to juggle allocation failures at runtime. Opportunistically rename the per-CPU pointers to better reflect the role of the VMCS. Use Intel's "root VMCS" terminology, e.g. from various VMCS patents[1][2] and older SDMs, not the more opaque "VMXON region" used in recent versions of the SDM. While it's possible the VMCS passed to VMXON no longer serves as _the_ root VMCS on modern CPUs, it is still in effect a "root mode VMCS", as described in the patents. Link: https://patentimages.storage.googleapis.com/c7/e4/32/d7a7def5580667/WO2013101191A1.pdf [1] Link: https://patentimages.storage.googleapis.com/13/f6/8d/1361fab8c33373/US20080163205A1.pdf [2] Signed-off-by: Sean Christopherson --- arch/x86/include/asm/virt.h | 13 ++++++- arch/x86/kernel/cpu/common.c | 2 + arch/x86/kvm/vmx/vmx.c | 58 ++--------------------------- arch/x86/virt/hw.c | 71 ++++++++++++++++++++++++++++++++++++ 4 files changed, 89 insertions(+), 55 deletions(-) diff --git a/arch/x86/include/asm/virt.h b/arch/x86/include/asm/virt.h index 131b9bf9ef3c..0da6db4f5b0c 100644 --- a/arch/x86/include/asm/virt.h +++ b/arch/x86/include/asm/virt.h @@ -2,10 +2,21 @@ #ifndef _ASM_X86_VIRT_H #define _ASM_X86_VIRT_H -#include +#include + +#include #if IS_ENABLED(CONFIG_KVM_X86) extern bool virt_rebooting; + +void __init x86_virt_init(void); + +#if IS_ENABLED(CONFIG_KVM_INTEL) +DECLARE_PER_CPU(struct vmcs *, root_vmcs); +#endif + +#else +static __always_inline void x86_virt_init(void) {} #endif #endif /* _ASM_X86_VIRT_H */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index e7ab22fce3b5..dda9e41292db 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -71,6 +71,7 @@ #include #include #include +#include #include #include @@ -2143,6 +2144,7 @@ static __init void identify_boot_cpu(void) cpu_detect_tlb(&boot_cpu_data); setup_cr_pinning(); + x86_virt_init(); tsx_init(); tdx_init(); lkgs_init(); diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index fc6e3b620866..abd4830f71d8 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -580,7 +580,6 @@ noinline void invept_error(unsigned long ext, u64 eptp) vmx_insn_failed("invept failed: ext=0x%lx eptp=%llx\n", ext, eptp); } -static DEFINE_PER_CPU(struct vmcs *, vmxarea); DEFINE_PER_CPU(struct vmcs *, current_vmcs); /* * We maintain a per-CPU linked-list of VMCS loaded on that CPU. This is needed @@ -2934,6 +2933,9 @@ static bool __kvm_is_vmx_supported(void) return false; } + if (!per_cpu(root_vmcs, cpu)) + return false; + return true; } @@ -3008,7 +3010,7 @@ static int kvm_cpu_vmxon(u64 vmxon_pointer) int vmx_enable_virtualization_cpu(void) { int cpu = raw_smp_processor_id(); - u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); + u64 phys_addr = __pa(per_cpu(root_vmcs, cpu)); int r; if (cr4_read_shadow() & X86_CR4_VMXE) @@ -3129,47 +3131,6 @@ int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) return -ENOMEM; } -static void free_kvm_area(void) -{ - int cpu; - - for_each_possible_cpu(cpu) { - free_vmcs(per_cpu(vmxarea, cpu)); - per_cpu(vmxarea, cpu) = NULL; - } -} - -static __init int alloc_kvm_area(void) -{ - int cpu; - - for_each_possible_cpu(cpu) { - struct vmcs *vmcs; - - vmcs = alloc_vmcs_cpu(false, cpu, GFP_KERNEL); - if (!vmcs) { - free_kvm_area(); - return -ENOMEM; - } - - /* - * When eVMCS is enabled, alloc_vmcs_cpu() sets - * vmcs->revision_id to KVM_EVMCS_VERSION instead of - * revision_id reported by MSR_IA32_VMX_BASIC. - * - * However, even though not explicitly documented by - * TLFS, VMXArea passed as VMXON argument should - * still be marked with revision_id reported by - * physical CPU. - */ - if (kvm_is_using_evmcs()) - vmcs->hdr.revision_id = vmx_basic_vmcs_revision_id(vmcs_config.basic); - - per_cpu(vmxarea, cpu) = vmcs; - } - return 0; -} - static void fix_pmode_seg(struct kvm_vcpu *vcpu, int seg, struct kvm_segment *save) { @@ -8566,8 +8527,6 @@ void vmx_hardware_unsetup(void) if (nested) nested_vmx_hardware_unsetup(); - - free_kvm_area(); } void vmx_vm_destroy(struct kvm *kvm) @@ -8870,10 +8829,6 @@ __init int vmx_hardware_setup(void) return r; } - r = alloc_kvm_area(); - if (r) - goto err_kvm_area; - kvm_set_posted_intr_wakeup_handler(pi_wakeup_handler); /* @@ -8900,11 +8855,6 @@ __init int vmx_hardware_setup(void) kvm_caps.inapplicable_quirks &= ~KVM_X86_QUIRK_IGNORE_GUEST_PAT; return 0; - -err_kvm_area: - if (nested) - nested_vmx_hardware_unsetup(); - return r; } void vmx_exit(void) diff --git a/arch/x86/virt/hw.c b/arch/x86/virt/hw.c index df3dc18d19b4..56972f594d90 100644 --- a/arch/x86/virt/hw.c +++ b/arch/x86/virt/hw.c @@ -1,7 +1,78 @@ // SPDX-License-Identifier: GPL-2.0-only +#include +#include +#include #include +#include +#include +#include +#include #include +#include __visible bool virt_rebooting; EXPORT_SYMBOL_FOR_KVM(virt_rebooting); + +#if IS_ENABLED(CONFIG_KVM_INTEL) +DEFINE_PER_CPU(struct vmcs *, root_vmcs); +EXPORT_PER_CPU_SYMBOL(root_vmcs); + +static __init void x86_vmx_exit(void) +{ + int cpu; + + for_each_possible_cpu(cpu) { + free_page((unsigned long)per_cpu(root_vmcs, cpu)); + per_cpu(root_vmcs, cpu) = NULL; + } +} + +static __init int x86_vmx_init(void) +{ + u64 basic_msr; + u32 rev_id; + int cpu; + + if (!cpu_feature_enabled(X86_FEATURE_VMX)) + return -EOPNOTSUPP; + + rdmsrq(MSR_IA32_VMX_BASIC, basic_msr); + + /* IA-32 SDM Vol 3B: VMCS size is never greater than 4kB. */ + if (WARN_ON_ONCE(vmx_basic_vmcs_size(basic_msr) > PAGE_SIZE)) + return -EIO; + + /* + * Even if eVMCS is enabled (or will be enabled?), and even though not + * explicitly documented by TLFS, the root VMCS passed to VMXON should + * still be marked with the revision_id reported by the physical CPU. + */ + rev_id = vmx_basic_vmcs_revision_id(basic_msr); + + for_each_possible_cpu(cpu) { + int node = cpu_to_node(cpu); + struct page *page; + struct vmcs *vmcs; + + page = __alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0); + if (!page) { + x86_vmx_exit(); + return -ENOMEM; + } + + vmcs = page_address(page); + vmcs->hdr.revision_id = rev_id; + per_cpu(root_vmcs, cpu) = vmcs; + } + + return 0; +} +#else +static __init int x86_vmx_init(void) { return -EOPNOTSUPP; } +#endif + +void __init x86_virt_init(void) +{ + x86_vmx_init(); +} -- 2.53.0.310.g728cabbaf7-goog