Explicitly clear exit_code_hi in the VMCB when synthesizing "normal" nested VM-Exits, as the full exit code is a 64-bit value (spoiler alert), and all exit codes for non-failing VMRUN use only bits 31:0. Cc: Jim Mattson Cc: Yosry Ahmed Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson --- arch/x86/kvm/svm/svm.c | 2 ++ arch/x86/kvm/svm/svm.h | 7 ++++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index fc42bcdbb520..7ea034ee6b6c 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -2433,6 +2433,7 @@ static bool check_selective_cr0_intercepted(struct kvm_vcpu *vcpu, if (cr0 ^ val) { svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE; + svm->vmcb->control.exit_code_hi = 0; ret = (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE); } @@ -4608,6 +4609,7 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu, if (static_cpu_has(X86_FEATURE_NRIPS)) vmcb->control.next_rip = info->next_rip; vmcb->control.exit_code = icpt_info.exit_code; + vmcb->control.exit_code_hi = 0; vmexit = nested_svm_exit_handled(svm); ret = (vmexit == NESTED_EXIT_DONE) ? X86EMUL_INTERCEPTED diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index c2acaa49ee1c..253a8dca412c 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -763,9 +763,10 @@ int nested_svm_vmexit(struct vcpu_svm *svm); static inline int nested_svm_simple_vmexit(struct vcpu_svm *svm, u32 exit_code) { - svm->vmcb->control.exit_code = exit_code; - svm->vmcb->control.exit_info_1 = 0; - svm->vmcb->control.exit_info_2 = 0; + svm->vmcb->control.exit_code = exit_code; + svm->vmcb->control.exit_code_hi = 0; + svm->vmcb->control.exit_info_1 = 0; + svm->vmcb->control.exit_info_2 = 0; return nested_svm_vmexit(svm); } -- 2.52.0.rc1.455.g30608eb744-goog Set exit_code_hi to -1u as a temporary band-aid to fix a long-standing (effectively since KVM's inception) bug where KVM treats the exit code as a 32-bit value, when in reality it's a 64-bit value. Per the APM, offset 0x70 is a single 64-bit value: 070h 63:0 EXITCODE And a sane reading of the error values defined in "Table C-1. SVM Intercept Codes" is that negative values use the full 64 bits: –1 VMEXIT_INVALID Invalid guest state in VMCB. –2 VMEXIT_BUSYBUSY bit was set in the VMSA –3 VMEXIT_IDLE_REQUIREDThe sibling thread is not in an idle state -4 VMEXIT_INVALID_PMC Invalid PMC state And that interpretation is confirmed by testing on Milan and Turin (by setting bits in CR0[63:32] to generate VMEXIT_INVALID on VMRUN). Furthermore, Xen has treated exitcode as a 64-bit value since HVM support was adding in 2006 (see Xen commit d1bd157fbc ("Big merge the HVM full-virtualisation abstractions.")). Cc: Jim Mattson Cc: Yosry Ahmed Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson --- arch/x86/kvm/svm/nested.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index c81005b24522..ba0f11c68372 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -985,7 +985,7 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu) if (!nested_vmcb_check_save(vcpu) || !nested_vmcb_check_controls(vcpu)) { vmcb12->control.exit_code = SVM_EXIT_ERR; - vmcb12->control.exit_code_hi = 0; + vmcb12->control.exit_code_hi = -1u; vmcb12->control.exit_info_1 = 0; vmcb12->control.exit_info_2 = 0; goto out; @@ -1018,7 +1018,7 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu) svm->soft_int_injected = false; svm->vmcb->control.exit_code = SVM_EXIT_ERR; - svm->vmcb->control.exit_code_hi = 0; + svm->vmcb->control.exit_code_hi = -1u; svm->vmcb->control.exit_info_1 = 0; svm->vmcb->control.exit_info_2 = 0; -- 2.52.0.rc1.455.g30608eb744-goog Add a helper to detect VMRUN failures so that KVM can guard against its own long-standing bug, where KVM neglects to set exitcode[63:32] when synthesizing a nested VMFAIL_INVALID VM-Exit. This will allow fixing KVM's mess of treating exitcode as two separate 32-bit values without breaking KVM-on-KVM when running on an older, unfixed KVM. Cc: Jim Mattson Cc: Yosry Ahmed Signed-off-by: Sean Christopherson --- arch/x86/kvm/svm/nested.c | 16 +++++++--------- arch/x86/kvm/svm/svm.c | 4 ++-- arch/x86/kvm/svm/svm.h | 5 +++++ 3 files changed, 14 insertions(+), 11 deletions(-) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index ba0f11c68372..8070e20ed5a7 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -1134,7 +1134,7 @@ int nested_svm_vmexit(struct vcpu_svm *svm) vmcb12->control.exit_info_1 = vmcb02->control.exit_info_1; vmcb12->control.exit_info_2 = vmcb02->control.exit_info_2; - if (vmcb12->control.exit_code != SVM_EXIT_ERR) + if (svm_is_vmrun_failure(vmcb12->control.exit_code)) nested_save_pending_event_to_vmcb12(svm, vmcb12); if (guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS)) @@ -1425,6 +1425,9 @@ static int nested_svm_intercept(struct vcpu_svm *svm) u32 exit_code = svm->vmcb->control.exit_code; int vmexit = NESTED_EXIT_HOST; + if (svm_is_vmrun_failure(exit_code)) + return NESTED_EXIT_DONE; + switch (exit_code) { case SVM_EXIT_MSR: vmexit = nested_svm_exit_handled_msr(svm); @@ -1432,7 +1435,7 @@ static int nested_svm_intercept(struct vcpu_svm *svm) case SVM_EXIT_IOIO: vmexit = nested_svm_intercept_ioio(svm); break; - case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: { + case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: /* * Host-intercepted exceptions have been checked already in * nested_svm_exit_special. There is nothing to do here, @@ -1440,15 +1443,10 @@ static int nested_svm_intercept(struct vcpu_svm *svm) */ vmexit = NESTED_EXIT_DONE; break; - } - case SVM_EXIT_ERR: { - vmexit = NESTED_EXIT_DONE; - break; - } - default: { + default: if (vmcb12_is_intercept(&svm->nested.ctl, exit_code)) vmexit = NESTED_EXIT_DONE; - } + break; } return vmexit; diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 7ea034ee6b6c..52b759408853 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -3530,7 +3530,7 @@ static int svm_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) return 1; } - if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) { + if (svm_is_vmrun_failure(svm->vmcb->control.exit_code)) { kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; kvm_run->fail_entry.hardware_entry_failure_reason = svm->vmcb->control.exit_code; @@ -4302,7 +4302,7 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags) /* Track VMRUNs that have made past consistency checking */ if (svm->nested.nested_run_pending && - svm->vmcb->control.exit_code != SVM_EXIT_ERR) + !svm_is_vmrun_failure(svm->vmcb->control.exit_code)) ++vcpu->stat.nested_run; svm->nested.nested_run_pending = 0; diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 253a8dca412c..6b35925e3a33 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -426,6 +426,11 @@ static __always_inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu) return container_of(vcpu, struct vcpu_svm, vcpu); } +static inline bool svm_is_vmrun_failure(u64 exit_code) +{ + return (u32)exit_code == (u32)SVM_EXIT_ERR; +} + /* * Only the PDPTRs are loaded on demand into the shadow MMU. All other * fields are synchronized on VM-Exit, because accessing the VMCB is cheap. -- 2.52.0.rc1.455.g30608eb744-goog Fold svm_check_exit_valid() and svm_handle_invalid_exit() into their sole caller, svm_invoke_exit_handler(), as having tiny single-use helpers makes the code unncessarily difficult to follow. This will also allow for additional cleanups in svm_invoke_exit_handler(). No functional change intended. Suggested-by: Yosry Ahmed Signed-off-by: Sean Christopherson --- arch/x86/kvm/svm/svm.c | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 52b759408853..638a67ef0c37 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -3433,23 +3433,13 @@ static void dump_vmcb(struct kvm_vcpu *vcpu) sev_free_decrypted_vmsa(vcpu, save); } -static bool svm_check_exit_valid(u64 exit_code) -{ - return (exit_code < ARRAY_SIZE(svm_exit_handlers) && - svm_exit_handlers[exit_code]); -} - -static int svm_handle_invalid_exit(struct kvm_vcpu *vcpu, u64 exit_code) -{ - dump_vmcb(vcpu); - kvm_prepare_unexpected_reason_exit(vcpu, exit_code); - return 0; -} - int svm_invoke_exit_handler(struct kvm_vcpu *vcpu, u64 exit_code) { - if (!svm_check_exit_valid(exit_code)) - return svm_handle_invalid_exit(vcpu, exit_code); + if (exit_code >= ARRAY_SIZE(svm_exit_handlers)) + goto unexpected_vmexit; + + if (!svm_exit_handlers[exit_code]) + goto unexpected_vmexit; #ifdef CONFIG_MITIGATION_RETPOLINE if (exit_code == SVM_EXIT_MSR) @@ -3468,6 +3458,11 @@ int svm_invoke_exit_handler(struct kvm_vcpu *vcpu, u64 exit_code) #endif #endif return svm_exit_handlers[exit_code](vcpu); + +unexpected_vmexit: + dump_vmcb(vcpu); + kvm_prepare_unexpected_reason_exit(vcpu, exit_code); + return 0; } static void svm_get_exit_info(struct kvm_vcpu *vcpu, u32 *reason, -- 2.52.0.rc1.455.g30608eb744-goog Check for an unexpected/unhandled VM-Exit after the manual RETPOLINE=y handling. The entire point of the RETPOLINE checks is to optimize for common VM-Exits, i.e. checking for the rare case of an unsupported VM-Exit is counter-productive. This also aligns SVM and VMX exit handling. No functional change intended. Signed-off-by: Sean Christopherson --- arch/x86/kvm/svm/svm.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 638a67ef0c37..202a4d8088a2 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -3435,12 +3435,6 @@ static void dump_vmcb(struct kvm_vcpu *vcpu) int svm_invoke_exit_handler(struct kvm_vcpu *vcpu, u64 exit_code) { - if (exit_code >= ARRAY_SIZE(svm_exit_handlers)) - goto unexpected_vmexit; - - if (!svm_exit_handlers[exit_code]) - goto unexpected_vmexit; - #ifdef CONFIG_MITIGATION_RETPOLINE if (exit_code == SVM_EXIT_MSR) return msr_interception(vcpu); @@ -3457,6 +3451,12 @@ int svm_invoke_exit_handler(struct kvm_vcpu *vcpu, u64 exit_code) return sev_handle_vmgexit(vcpu); #endif #endif + if (exit_code >= ARRAY_SIZE(svm_exit_handlers)) + goto unexpected_vmexit; + + if (!svm_exit_handlers[exit_code]) + goto unexpected_vmexit; + return svm_exit_handlers[exit_code](vcpu); unexpected_vmexit: -- 2.52.0.rc1.455.g30608eb744-goog Explicitly filter out 64-bit exit codes when invoking exit handlers, as svm_exit_handlers[] will never be sized with entries that use bits 63:32. Processing the non-failing exit code as a 32-bit value will allow tracking exit_code as a single 64-bit value (which it is, architecturally). This will also allow hardening KVM against Spectre-like attacks without needing to do silly things to avoid build failures on 32-bit kernels (array_index_nospec() rightly asserts that the index fits in an "unsigned long"). Omit the check when running as a VM, as KVM has historically failed to set bits 63:32 appropriately when synthesizing VM-Exits, i.e. KVM could get false positives when running as a VM on an older, broken KVM/kernel. From a functional perspective, omitting the check is "fine", as any unwanted collision between e.g. VMEXIT_INVALID and a 32-bit exit code will be fatal to KVM-on-KVM regardless of what KVM-as-L1 does. Signed-off-by: Sean Christopherson --- arch/x86/kvm/svm/svm.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 202a4d8088a2..3b05476296d0 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -3433,8 +3433,22 @@ static void dump_vmcb(struct kvm_vcpu *vcpu) sev_free_decrypted_vmsa(vcpu, save); } -int svm_invoke_exit_handler(struct kvm_vcpu *vcpu, u64 exit_code) +int svm_invoke_exit_handler(struct kvm_vcpu *vcpu, u64 __exit_code) { + u32 exit_code = __exit_code; + + /* + * SVM uses negative values, i.e. 64-bit values, to indicate that VMRUN + * failed. Report all such errors to userspace (note, VMEXIT_INVALID, + * a.k.a. SVM_EXIT_ERR, is special cased by svm_handle_exit()). Skip + * the check when running as a VM, as KVM has historically left garbage + * in bits 63:32, i.e. running KVM-on-KVM would hit false positives if + * the underlying kernel is buggy. + */ + if (!cpu_feature_enabled(X86_FEATURE_HYPERVISOR) && + (u64)exit_code != __exit_code) + goto unexpected_vmexit; + #ifdef CONFIG_MITIGATION_RETPOLINE if (exit_code == SVM_EXIT_MSR) return msr_interception(vcpu); @@ -3461,7 +3475,7 @@ int svm_invoke_exit_handler(struct kvm_vcpu *vcpu, u64 exit_code) unexpected_vmexit: dump_vmcb(vcpu); - kvm_prepare_unexpected_reason_exit(vcpu, exit_code); + kvm_prepare_unexpected_reason_exit(vcpu, __exit_code); return 0; } -- 2.52.0.rc1.455.g30608eb744-goog Fix KVM's long-standing buggy handling of SVM's exit_code as a 32-bit value. Per the APM and Xen commit d1bd157fbc ("Big merge the HVM full-virtualisation abstractions.") (which is arguably more trustworthy than KVM), offset 0x70 is a single 64-bit value: 070h 63:0 EXITCODE Track exit_code as a single u64 to prevent reintroducing bugs where KVM neglects to correctly set bits 63:32. Fixes: 6aa8b732ca01 ("[PATCH] kvm: userspace interface") Cc: Jim Mattson Cc: Yosry Ahmed Signed-off-by: Sean Christopherson --- arch/x86/include/asm/svm.h | 3 +-- arch/x86/include/uapi/asm/svm.h | 32 ++++++++++++++--------------- arch/x86/kvm/svm/hyperv.c | 1 - arch/x86/kvm/svm/nested.c | 13 +++--------- arch/x86/kvm/svm/sev.c | 36 +++++++++++---------------------- arch/x86/kvm/svm/svm.c | 7 ++----- arch/x86/kvm/svm/svm.h | 4 +--- arch/x86/kvm/trace.h | 2 +- include/hyperv/hvgdk.h | 2 +- 9 files changed, 37 insertions(+), 63 deletions(-) diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index e69b6d0dedcf..66b22cffedfc 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -136,8 +136,7 @@ struct __attribute__ ((__packed__)) vmcb_control_area { u32 int_vector; u32 int_state; u8 reserved_3[4]; - u32 exit_code; - u32 exit_code_hi; + u64 exit_code; u64 exit_info_1; u64 exit_info_2; u32 exit_int_info; diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h index 650e3256ea7d..010a45c9f614 100644 --- a/arch/x86/include/uapi/asm/svm.h +++ b/arch/x86/include/uapi/asm/svm.h @@ -103,38 +103,38 @@ #define SVM_EXIT_VMGEXIT 0x403 /* SEV-ES software-defined VMGEXIT events */ -#define SVM_VMGEXIT_MMIO_READ 0x80000001 -#define SVM_VMGEXIT_MMIO_WRITE 0x80000002 -#define SVM_VMGEXIT_NMI_COMPLETE 0x80000003 -#define SVM_VMGEXIT_AP_HLT_LOOP 0x80000004 -#define SVM_VMGEXIT_AP_JUMP_TABLE 0x80000005 +#define SVM_VMGEXIT_MMIO_READ 0x80000001ull +#define SVM_VMGEXIT_MMIO_WRITE 0x80000002ull +#define SVM_VMGEXIT_NMI_COMPLETE 0x80000003ull +#define SVM_VMGEXIT_AP_HLT_LOOP 0x80000004ull +#define SVM_VMGEXIT_AP_JUMP_TABLE 0x80000005ull #define SVM_VMGEXIT_SET_AP_JUMP_TABLE 0 #define SVM_VMGEXIT_GET_AP_JUMP_TABLE 1 -#define SVM_VMGEXIT_PSC 0x80000010 -#define SVM_VMGEXIT_GUEST_REQUEST 0x80000011 -#define SVM_VMGEXIT_EXT_GUEST_REQUEST 0x80000012 -#define SVM_VMGEXIT_AP_CREATION 0x80000013 +#define SVM_VMGEXIT_PSC 0x80000010ull +#define SVM_VMGEXIT_GUEST_REQUEST 0x80000011ull +#define SVM_VMGEXIT_EXT_GUEST_REQUEST 0x80000012ull +#define SVM_VMGEXIT_AP_CREATION 0x80000013ull #define SVM_VMGEXIT_AP_CREATE_ON_INIT 0 #define SVM_VMGEXIT_AP_CREATE 1 #define SVM_VMGEXIT_AP_DESTROY 2 -#define SVM_VMGEXIT_SNP_RUN_VMPL 0x80000018 -#define SVM_VMGEXIT_SAVIC 0x8000001a +#define SVM_VMGEXIT_SNP_RUN_VMPL 0x80000018ull +#define SVM_VMGEXIT_SAVIC 0x8000001aull #define SVM_VMGEXIT_SAVIC_REGISTER_GPA 0 #define SVM_VMGEXIT_SAVIC_UNREGISTER_GPA 1 #define SVM_VMGEXIT_SAVIC_SELF_GPA ~0ULL -#define SVM_VMGEXIT_HV_FEATURES 0x8000fffd -#define SVM_VMGEXIT_TERM_REQUEST 0x8000fffe +#define SVM_VMGEXIT_HV_FEATURES 0x8000fffdull +#define SVM_VMGEXIT_TERM_REQUEST 0x8000fffeull #define SVM_VMGEXIT_TERM_REASON(reason_set, reason_code) \ /* SW_EXITINFO1[3:0] */ \ (((((u64)reason_set) & 0xf)) | \ /* SW_EXITINFO1[11:4] */ \ ((((u64)reason_code) & 0xff) << 4)) -#define SVM_VMGEXIT_UNSUPPORTED_EVENT 0x8000ffff +#define SVM_VMGEXIT_UNSUPPORTED_EVENT 0x8000ffffull /* Exit code reserved for hypervisor/software use */ -#define SVM_EXIT_SW 0xf0000000 +#define SVM_EXIT_SW 0xf0000000ull -#define SVM_EXIT_ERR -1 +#define SVM_EXIT_ERR -1ull #define SVM_EXIT_REASONS \ { SVM_EXIT_READ_CR0, "read_cr0" }, \ diff --git a/arch/x86/kvm/svm/hyperv.c b/arch/x86/kvm/svm/hyperv.c index 088f6429b24c..3ec580d687f5 100644 --- a/arch/x86/kvm/svm/hyperv.c +++ b/arch/x86/kvm/svm/hyperv.c @@ -11,7 +11,6 @@ void svm_hv_inject_synthetic_vmexit_post_tlb_flush(struct kvm_vcpu *vcpu) struct vcpu_svm *svm = to_svm(vcpu); svm->vmcb->control.exit_code = HV_SVM_EXITCODE_ENL; - svm->vmcb->control.exit_code_hi = 0; svm->vmcb->control.exit_info_1 = HV_SVM_ENL_EXITCODE_TRAP_AFTER_FLUSH; svm->vmcb->control.exit_info_2 = 0; nested_svm_vmexit(svm); diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 8070e20ed5a7..89120245cd22 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -45,7 +45,6 @@ static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu, * correctly fill in the high bits of exit_info_1. */ vmcb->control.exit_code = SVM_EXIT_NPF; - vmcb->control.exit_code_hi = 0; vmcb->control.exit_info_1 = (1ULL << 32); vmcb->control.exit_info_2 = fault->address; } @@ -421,7 +420,6 @@ void __nested_copy_vmcb_control_to_cache(struct kvm_vcpu *vcpu, to->int_vector = from->int_vector; to->int_state = from->int_state; to->exit_code = from->exit_code; - to->exit_code_hi = from->exit_code_hi; to->exit_info_1 = from->exit_info_1; to->exit_info_2 = from->exit_info_2; to->exit_int_info = from->exit_int_info; @@ -727,8 +725,8 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm, enter_guest_mode(vcpu); /* - * Filled at exit: exit_code, exit_code_hi, exit_info_1, exit_info_2, - * exit_int_info, exit_int_info_err, next_rip, insn_len, insn_bytes. + * Filled at exit: exit_code, exit_info_1, exit_info_2, exit_int_info, + * exit_int_info_err, next_rip, insn_len, insn_bytes. */ if (guest_cpu_cap_has(vcpu, X86_FEATURE_VGIF) && @@ -985,7 +983,6 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu) if (!nested_vmcb_check_save(vcpu) || !nested_vmcb_check_controls(vcpu)) { vmcb12->control.exit_code = SVM_EXIT_ERR; - vmcb12->control.exit_code_hi = -1u; vmcb12->control.exit_info_1 = 0; vmcb12->control.exit_info_2 = 0; goto out; @@ -1018,7 +1015,6 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu) svm->soft_int_injected = false; svm->vmcb->control.exit_code = SVM_EXIT_ERR; - svm->vmcb->control.exit_code_hi = -1u; svm->vmcb->control.exit_info_1 = 0; svm->vmcb->control.exit_info_2 = 0; @@ -1130,7 +1126,6 @@ int nested_svm_vmexit(struct vcpu_svm *svm) vmcb12->control.int_state = vmcb02->control.int_state; vmcb12->control.exit_code = vmcb02->control.exit_code; - vmcb12->control.exit_code_hi = vmcb02->control.exit_code_hi; vmcb12->control.exit_info_1 = vmcb02->control.exit_info_1; vmcb12->control.exit_info_2 = vmcb02->control.exit_info_2; @@ -1422,7 +1417,7 @@ static int nested_svm_intercept_ioio(struct vcpu_svm *svm) static int nested_svm_intercept(struct vcpu_svm *svm) { - u32 exit_code = svm->vmcb->control.exit_code; + u64 exit_code = svm->vmcb->control.exit_code; int vmexit = NESTED_EXIT_HOST; if (svm_is_vmrun_failure(exit_code)) @@ -1494,7 +1489,6 @@ static void nested_svm_inject_exception_vmexit(struct kvm_vcpu *vcpu) struct vmcb *vmcb = svm->vmcb; vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + ex->vector; - vmcb->control.exit_code_hi = 0; if (ex->has_error_code) vmcb->control.exit_info_1 = ex->error_code; @@ -1669,7 +1663,6 @@ static void nested_copy_vmcb_cache_to_control(struct vmcb_control_area *dst, dst->int_vector = from->int_vector; dst->int_state = from->int_state; dst->exit_code = from->exit_code; - dst->exit_code_hi = from->exit_code_hi; dst->exit_info_1 = from->exit_info_1; dst->exit_info_2 = from->exit_info_2; dst->exit_int_info = from->exit_int_info; diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 0835c664fbfd..5aedd07194aa 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -3264,11 +3264,6 @@ void sev_free_vcpu(struct kvm_vcpu *vcpu) kvfree(svm->sev_es.ghcb_sa); } -static u64 kvm_get_cached_sw_exit_code(struct vmcb_control_area *control) -{ - return (((u64)control->exit_code_hi) << 32) | control->exit_code; -} - static void dump_ghcb(struct vcpu_svm *svm) { struct vmcb_control_area *control = &svm->vmcb->control; @@ -3290,7 +3285,7 @@ static void dump_ghcb(struct vcpu_svm *svm) */ pr_err("GHCB (GPA=%016llx) snapshot:\n", svm->vmcb->control.ghcb_gpa); pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_code", - kvm_get_cached_sw_exit_code(control), kvm_ghcb_sw_exit_code_is_valid(svm)); + control->exit_code, kvm_ghcb_sw_exit_code_is_valid(svm)); pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_info_1", control->exit_info_1, kvm_ghcb_sw_exit_info_1_is_valid(svm)); pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_info_2", @@ -3324,7 +3319,6 @@ static void sev_es_sync_from_ghcb(struct vcpu_svm *svm) struct vmcb_control_area *control = &svm->vmcb->control; struct kvm_vcpu *vcpu = &svm->vcpu; struct ghcb *ghcb = svm->sev_es.ghcb; - u64 exit_code; /* * The GHCB protocol so far allows for the following data @@ -3358,9 +3352,7 @@ static void sev_es_sync_from_ghcb(struct vcpu_svm *svm) __kvm_emulate_msr_write(vcpu, MSR_IA32_XSS, kvm_ghcb_get_xss(svm)); /* Copy the GHCB exit information into the VMCB fields */ - exit_code = kvm_ghcb_get_sw_exit_code(svm); - control->exit_code = lower_32_bits(exit_code); - control->exit_code_hi = upper_32_bits(exit_code); + control->exit_code = kvm_ghcb_get_sw_exit_code(svm); control->exit_info_1 = kvm_ghcb_get_sw_exit_info_1(svm); control->exit_info_2 = kvm_ghcb_get_sw_exit_info_2(svm); svm->sev_es.sw_scratch = kvm_ghcb_get_sw_scratch_if_valid(svm); @@ -3373,15 +3365,8 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm) { struct vmcb_control_area *control = &svm->vmcb->control; struct kvm_vcpu *vcpu = &svm->vcpu; - u64 exit_code; u64 reason; - /* - * Retrieve the exit code now even though it may not be marked valid - * as it could help with debugging. - */ - exit_code = kvm_get_cached_sw_exit_code(control); - /* Only GHCB Usage code 0 is supported */ if (svm->sev_es.ghcb->ghcb_usage) { reason = GHCB_ERR_INVALID_USAGE; @@ -3395,7 +3380,7 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm) !kvm_ghcb_sw_exit_info_2_is_valid(svm)) goto vmgexit_err; - switch (exit_code) { + switch (control->exit_code) { case SVM_EXIT_READ_DR7: break; case SVM_EXIT_WRITE_DR7: @@ -3496,15 +3481,19 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm) return 0; vmgexit_err: + /* + * Print the exit code even though it may not be marked valid as it + * could help with debugging. + */ if (reason == GHCB_ERR_INVALID_USAGE) { vcpu_unimpl(vcpu, "vmgexit: ghcb usage %#x is not valid\n", svm->sev_es.ghcb->ghcb_usage); } else if (reason == GHCB_ERR_INVALID_EVENT) { vcpu_unimpl(vcpu, "vmgexit: exit code %#llx is not valid\n", - exit_code); + control->exit_code); } else { vcpu_unimpl(vcpu, "vmgexit: exit code %#llx input is not valid\n", - exit_code); + control->exit_code); dump_ghcb(svm); } @@ -4343,7 +4332,7 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); struct vmcb_control_area *control = &svm->vmcb->control; - u64 ghcb_gpa, exit_code; + u64 ghcb_gpa; int ret; /* Validate the GHCB */ @@ -4385,8 +4374,7 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu) svm_vmgexit_success(svm, 0); - exit_code = kvm_get_cached_sw_exit_code(control); - switch (exit_code) { + switch (control->exit_code) { case SVM_VMGEXIT_MMIO_READ: ret = setup_vmgexit_scratch(svm, true, control->exit_info_2); if (ret) @@ -4478,7 +4466,7 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu) ret = -EINVAL; break; default: - ret = svm_invoke_exit_handler(vcpu, exit_code); + ret = svm_invoke_exit_handler(vcpu, control->exit_code); } return ret; diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 3b05476296d0..85bc99f93275 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -2433,7 +2433,6 @@ static bool check_selective_cr0_intercepted(struct kvm_vcpu *vcpu, if (cr0 ^ val) { svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE; - svm->vmcb->control.exit_code_hi = 0; ret = (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE); } @@ -3265,7 +3264,7 @@ static void dump_vmcb(struct kvm_vcpu *vcpu) pr_err("%-20s%08x\n", "int_ctl:", control->int_ctl); pr_err("%-20s%08x\n", "int_vector:", control->int_vector); pr_err("%-20s%08x\n", "int_state:", control->int_state); - pr_err("%-20s%08x\n", "exit_code:", control->exit_code); + pr_err("%-20s%016llx\n", "exit_code:", control->exit_code); pr_err("%-20s%016llx\n", "exit_info1:", control->exit_info_1); pr_err("%-20s%016llx\n", "exit_info2:", control->exit_info_2); pr_err("%-20s%08x\n", "exit_int_info:", control->exit_int_info); @@ -3515,7 +3514,6 @@ static int svm_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) { struct vcpu_svm *svm = to_svm(vcpu); struct kvm_run *kvm_run = vcpu->run; - u32 exit_code = svm->vmcb->control.exit_code; /* SEV-ES guests must use the CR write traps to track CR registers. */ if (!sev_es_guest(vcpu->kvm)) { @@ -3551,7 +3549,7 @@ static int svm_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) if (exit_fastpath != EXIT_FASTPATH_NONE) return 1; - return svm_invoke_exit_handler(vcpu, exit_code); + return svm_invoke_exit_handler(vcpu, svm->vmcb->control.exit_code); } static int pre_svm_run(struct kvm_vcpu *vcpu) @@ -4618,7 +4616,6 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu, if (static_cpu_has(X86_FEATURE_NRIPS)) vmcb->control.next_rip = info->next_rip; vmcb->control.exit_code = icpt_info.exit_code; - vmcb->control.exit_code_hi = 0; vmexit = nested_svm_exit_handled(svm); ret = (vmexit == NESTED_EXIT_DONE) ? X86EMUL_INTERCEPTED diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 6b35925e3a33..31ee4f65dcc2 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -162,8 +162,7 @@ struct vmcb_ctrl_area_cached { u32 int_ctl; u32 int_vector; u32 int_state; - u32 exit_code; - u32 exit_code_hi; + u64 exit_code; u64 exit_info_1; u64 exit_info_2; u32 exit_int_info; @@ -769,7 +768,6 @@ int nested_svm_vmexit(struct vcpu_svm *svm); static inline int nested_svm_simple_vmexit(struct vcpu_svm *svm, u32 exit_code) { svm->vmcb->control.exit_code = exit_code; - svm->vmcb->control.exit_code_hi = 0; svm->vmcb->control.exit_info_1 = 0; svm->vmcb->control.exit_info_2 = 0; return nested_svm_vmexit(svm); diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index e79bc9cb7162..4c7a5cd10990 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -781,7 +781,7 @@ TRACE_EVENT_KVM_EXIT(kvm_nested_vmexit); * Tracepoint for #VMEXIT reinjected to the guest */ TRACE_EVENT(kvm_nested_vmexit_inject, - TP_PROTO(__u32 exit_code, + TP_PROTO(__u64 exit_code, __u64 exit_info1, __u64 exit_info2, __u32 exit_int_info, __u32 exit_int_info_err, __u32 isa), TP_ARGS(exit_code, exit_info1, exit_info2, diff --git a/include/hyperv/hvgdk.h b/include/hyperv/hvgdk.h index dd6d4939ea29..56b695873a72 100644 --- a/include/hyperv/hvgdk.h +++ b/include/hyperv/hvgdk.h @@ -281,7 +281,7 @@ struct hv_vmcb_enlightenments { #define HV_VMCB_NESTED_ENLIGHTENMENTS 31 /* Synthetic VM-Exit */ -#define HV_SVM_EXITCODE_ENL 0xf0000000 +#define HV_SVM_EXITCODE_ENL 0xf0000000u #define HV_SVM_ENL_EXITCODE_TRAP_AFTER_FLUSH (1) /* VM_PARTITION_ASSIST_PAGE */ -- 2.52.0.rc1.455.g30608eb744-goog Limit KVM's incorrect check for VMXEXIT_INVALID, a.k.a. SVM_EXIT_ERR, to running as a VM, as detected by X86_FEATURE_HYPERVISOR. The exit_code and all failure codes, e.g. VMXEXIT_INVALID, are 64-bit values, and so checking only bits 31:0 could result in false positives when running on non-broken hardware, e.g. in the extremely unlikely scenario exit code 0xffffffffull is ever generated by hardware. Keep the 32-bit check to play nice with running on broken KVM (for years, KVM has not set bits 63:32 when synthesizing nested SVM VM-Exits). Signed-off-by: Sean Christopherson --- arch/x86/kvm/svm/svm.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 31ee4f65dcc2..801dcfc64d0b 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -427,7 +427,10 @@ static __always_inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu) static inline bool svm_is_vmrun_failure(u64 exit_code) { - return (u32)exit_code == (u32)SVM_EXIT_ERR; + if (cpu_feature_enabled(X86_FEATURE_HYPERVISOR)) + return (u32)exit_code == (u32)SVM_EXIT_ERR; + + return exit_code == SVM_EXIT_ERR; } /* -- 2.52.0.rc1.455.g30608eb744-goog Explicitly clamp the exit code used to index KVM's exit handlers to guard against Spectre-like attacks, mainly to provide consistency between VMX and SVM (VMX was given the same treatment by commit c926f2f7230b ("KVM: x86: Protect exit_reason from being used in Spectre-v1/L1TF attacks"). For normal VMs, it's _extremely_ unlikely the exit code could be used to exploit a speculation vulnerability, as the exit code is set by hardware and unexpected/unknown exit codes should be quite well bounded (as is/was the case with VMX). But with SEV-ES+, the exit code is guest-controlled as it comes from the GHCB, not from hardware, i.e. an attack from the guest is at least somewhat plausible. Irrespective of SEV-ES+, hardening KVM is easy and inexpensive, and such an attack is theoretically possible. Signed-off-by: Sean Christopherson --- arch/x86/kvm/svm/svm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 85bc99f93275..308c70b6924e 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -3467,6 +3467,7 @@ int svm_invoke_exit_handler(struct kvm_vcpu *vcpu, u64 __exit_code) if (exit_code >= ARRAY_SIZE(svm_exit_handlers)) goto unexpected_vmexit; + exit_code = array_index_nospec(exit_code, ARRAY_SIZE(svm_exit_handlers)); if (!svm_exit_handlers[exit_code]) goto unexpected_vmexit; -- 2.52.0.rc1.455.g30608eb744-goog