Extend mediated PMU support for Intel CPUs without support for saving PERF_GLOBAL_CONTROL into the guest VMCS field on VM-Exit, e.g. for Skylake and its derivatives, as well as Icelake. While supporting CPUs without VM_EXIT_SAVE_IA32_PERF_GLOBAL_CTRL isn't completely trivial, it's not that complex either. And not supporting such CPUs would mean not supporting 7+ years of Intel CPUs released in the past 10 years. On VM-Exit, immediately propagate the saved PERF_GLOBAL_CTRL to the VMCS as well as KVM's software cache so that KVM doesn't need to add full EXREG tracking of PERF_GLOBAL_CTRL. In practice, the vast majority of VM-Exits won't trigger software writes to guest PERF_GLOBAL_CTRL, so deferring the VMWRITE to the next VM-Enter would only delay the inevitable without batching/avoiding VMWRITEs. Note! Take care to refresh VM_EXIT_MSR_STORE_COUNT on nested VM-Exit, as it's unfortunately possible that KVM could recalculate MSR intercepts while L2 is active, e.g. if userspace loads nested state and _then_ sets PERF_CAPABILITIES. Eating the VMWRITE on every nested VM-Exit is unfortunate, but that's a pre-existing problem and can/should be solved separately, e.g. modifying the number of auto-load entries while L2 is active is also uncommon on modern CPUs. Signed-off-by: Sean Christopherson --- arch/x86/kvm/vmx/nested.c | 6 ++++- arch/x86/kvm/vmx/pmu_intel.c | 7 ----- arch/x86/kvm/vmx/vmx.c | 52 ++++++++++++++++++++++++++++++++---- 3 files changed, 52 insertions(+), 13 deletions(-) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 614b789ecf16..1ee1edc8419d 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -5142,7 +5142,11 @@ void __nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason, kvm_nested_vmexit_handle_ibrs(vcpu); - /* Update any VMCS fields that might have changed while L2 ran */ + /* + * Update any VMCS fields that might have changed while vmcs02 was the + * active VMCS. The tracking is per-vCPU, not per-VMCS. + */ + vmcs_write32(VM_EXIT_MSR_STORE_COUNT, vmx->msr_autostore.nr); vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr); vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr); vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset); diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index 55249fa4db95..27eb76e6b6a0 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -777,13 +777,6 @@ static bool intel_pmu_is_mediated_pmu_supported(struct x86_pmu_capability *host_ if (WARN_ON_ONCE(!cpu_has_load_perf_global_ctrl())) return false; - /* - * KVM doesn't yet support mediated PMU on CPUs without support for - * saving PERF_GLOBAL_CTRL via a dedicated VMCS field. - */ - if (!cpu_has_save_perf_global_ctrl()) - return false; - return true; } diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 6a17cb90eaf4..ba1262c3e3ff 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1204,6 +1204,17 @@ static bool update_transition_efer(struct vcpu_vmx *vmx) return true; } +static void vmx_add_autostore_msr(struct vcpu_vmx *vmx, u32 msr) +{ + vmx_add_auto_msr(&vmx->msr_autostore, msr, 0, VM_EXIT_MSR_STORE_COUNT, + vmx->vcpu.kvm); +} + +static void vmx_remove_autostore_msr(struct vcpu_vmx *vmx, u32 msr) +{ + vmx_remove_auto_msr(&vmx->msr_autostore, msr, VM_EXIT_MSR_STORE_COUNT); +} + #ifdef CONFIG_X86_32 /* * On 32-bit kernels, VM exits still load the FS and GS bases from the @@ -4225,6 +4236,8 @@ void pt_update_intercept_for_msr(struct kvm_vcpu *vcpu) static void vmx_recalc_pmu_msr_intercepts(struct kvm_vcpu *vcpu) { + u64 vm_exit_controls_bits = VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | + VM_EXIT_SAVE_IA32_PERF_GLOBAL_CTRL; bool has_mediated_pmu = kvm_vcpu_has_mediated_pmu(vcpu); struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -4234,12 +4247,19 @@ static void vmx_recalc_pmu_msr_intercepts(struct kvm_vcpu *vcpu) if (!enable_mediated_pmu) return; + if (!cpu_has_save_perf_global_ctrl()) { + vm_exit_controls_bits &= ~VM_EXIT_SAVE_IA32_PERF_GLOBAL_CTRL; + + if (has_mediated_pmu) + vmx_add_autostore_msr(vmx, MSR_CORE_PERF_GLOBAL_CTRL); + else + vmx_remove_autostore_msr(vmx, MSR_CORE_PERF_GLOBAL_CTRL); + } + vm_entry_controls_changebit(vmx, VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL, has_mediated_pmu); - vm_exit_controls_changebit(vmx, VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | - VM_EXIT_SAVE_IA32_PERF_GLOBAL_CTRL, - has_mediated_pmu); + vm_exit_controls_changebit(vmx, vm_exit_controls_bits, has_mediated_pmu); for (i = 0; i < pmu->nr_arch_gp_counters; i++) { vmx_set_intercept_for_msr(vcpu, MSR_IA32_PERFCTR0 + i, @@ -7346,6 +7366,29 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx) msrs[i].host); } +static void vmx_refresh_guest_perf_global_control(struct kvm_vcpu *vcpu) +{ + struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); + struct vcpu_vmx *vmx = to_vmx(vcpu); + + if (msr_write_intercepted(vmx, MSR_CORE_PERF_GLOBAL_CTRL)) + return; + + if (!cpu_has_save_perf_global_ctrl()) { + int slot = vmx_find_loadstore_msr_slot(&vmx->msr_autostore, + MSR_CORE_PERF_GLOBAL_CTRL); + + if (WARN_ON_ONCE(slot < 0)) + return; + + pmu->global_ctrl = vmx->msr_autostore.val[slot].value; + vmcs_write64(GUEST_IA32_PERF_GLOBAL_CTRL, pmu->global_ctrl); + return; + } + + pmu->global_ctrl = vmcs_read64(GUEST_IA32_PERF_GLOBAL_CTRL); +} + static void vmx_update_hv_timer(struct kvm_vcpu *vcpu, bool force_immediate_exit) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -7631,8 +7674,7 @@ fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags) vmx->loaded_vmcs->launched = 1; - if (!msr_write_intercepted(vmx, MSR_CORE_PERF_GLOBAL_CTRL)) - vcpu_to_pmu(vcpu)->global_ctrl = vmcs_read64(GUEST_IA32_PERF_GLOBAL_CTRL); + vmx_refresh_guest_perf_global_control(vcpu); vmx_recover_nmi_blocking(vmx); vmx_complete_interrupts(vmx); -- 2.52.0.223.gf5cc29aaa4-goog