Extract the computation of eventsel_hw from amd_pmu_set_msr() into a separate helper function, amd_pmu_set_eventsel_hw(). No functional change intended. Signed-off-by: Jim Mattson --- arch/x86/kvm/svm/pmu.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c index 7aa298eeb072..d9ca633f9f49 100644 --- a/arch/x86/kvm/svm/pmu.c +++ b/arch/x86/kvm/svm/pmu.c @@ -147,6 +147,12 @@ static int amd_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return 1; } +static void amd_pmu_set_eventsel_hw(struct kvm_pmc *pmc) +{ + pmc->eventsel_hw = (pmc->eventsel & ~AMD64_EVENTSEL_HOSTONLY) | + AMD64_EVENTSEL_GUESTONLY; +} + static int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) { struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); @@ -166,8 +172,7 @@ static int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) data &= ~pmu->reserved_bits; if (data != pmc->eventsel) { pmc->eventsel = data; - pmc->eventsel_hw = (data & ~AMD64_EVENTSEL_HOSTONLY) | - AMD64_EVENTSEL_GUESTONLY; + amd_pmu_set_eventsel_hw(pmc); kvm_pmu_request_counter_reprogram(pmc); } return 0; -- 2.53.0.rc2.204.g2597b5adb4-goog Update amd_pmu_set_eventsel_hw() to clear the event selector's hardware enable bit when the PMC should not count based on the guest's Host-Only and Guest-Only event selector bits and the current vCPU state. Signed-off-by: Jim Mattson --- arch/x86/include/asm/perf_event.h | 2 ++ arch/x86/kvm/svm/pmu.c | 18 ++++++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 0d9af4135e0a..4dfe12053c09 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -58,6 +58,8 @@ #define AMD64_EVENTSEL_INT_CORE_ENABLE (1ULL << 36) #define AMD64_EVENTSEL_GUESTONLY (1ULL << 40) #define AMD64_EVENTSEL_HOSTONLY (1ULL << 41) +#define AMD64_EVENTSEL_HOST_GUEST_MASK \ + (AMD64_EVENTSEL_HOSTONLY | AMD64_EVENTSEL_GUESTONLY) #define AMD64_EVENTSEL_INT_CORE_SEL_SHIFT 37 #define AMD64_EVENTSEL_INT_CORE_SEL_MASK \ diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c index d9ca633f9f49..8d451110a94d 100644 --- a/arch/x86/kvm/svm/pmu.c +++ b/arch/x86/kvm/svm/pmu.c @@ -149,8 +149,26 @@ static int amd_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) static void amd_pmu_set_eventsel_hw(struct kvm_pmc *pmc) { + struct kvm_vcpu *vcpu = pmc->vcpu; + u64 host_guest_bits; + pmc->eventsel_hw = (pmc->eventsel & ~AMD64_EVENTSEL_HOSTONLY) | AMD64_EVENTSEL_GUESTONLY; + + if (!(pmc->eventsel & ARCH_PERFMON_EVENTSEL_ENABLE)) + return; + + if (!(vcpu->arch.efer & EFER_SVME)) + return; + + host_guest_bits = pmc->eventsel & AMD64_EVENTSEL_HOST_GUEST_MASK; + if (!host_guest_bits || host_guest_bits == AMD64_EVENTSEL_HOST_GUEST_MASK) + return; + + if (!!(host_guest_bits & AMD64_EVENTSEL_GUESTONLY) == is_guest_mode(vcpu)) + return; + + pmc->eventsel_hw &= ~ARCH_PERFMON_EVENTSEL_ENABLE; } static int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) -- 2.53.0.rc2.204.g2597b5adb4-goog Add amd_pmu_refresh_host_guest_eventsel_hw() to recalculate eventsel_hw for all PMCs based on the current vCPU state. This is needed because Host-Only and Guest-Only counters must be enabled/disabled at: - SVME changes: When EFER.SVME is modified, counters with Guest-Only bits need their hardware enable state updated. - Nested transitions: When entering or leaving guest mode, Host-Only counters should be disabled/enabled and Guest-Only counters should be enabled/disabled accordingly. Add a nested_transition() callback to kvm_x86_ops and call it from enter_guest_mode() and leave_guest_mode() to ensure the PMU state stays synchronized with guest mode transitions. Signed-off-by: Jim Mattson --- arch/x86/include/asm/kvm-x86-ops.h | 1 + arch/x86/include/asm/kvm_host.h | 2 ++ arch/x86/kvm/kvm_cache_regs.h | 2 ++ arch/x86/kvm/svm/pmu.c | 12 ++++++++++++ arch/x86/kvm/svm/svm.c | 3 +++ arch/x86/kvm/svm/svm.h | 5 +++++ arch/x86/kvm/x86.c | 1 + 7 files changed, 26 insertions(+) diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h index de709fb5bd76..62ac8ecd26e9 100644 --- a/arch/x86/include/asm/kvm-x86-ops.h +++ b/arch/x86/include/asm/kvm-x86-ops.h @@ -108,6 +108,7 @@ KVM_X86_OP(get_entry_info) KVM_X86_OP(check_intercept) KVM_X86_OP(handle_exit_irqoff) KVM_X86_OP_OPTIONAL(update_cpu_dirty_logging) +KVM_X86_OP_OPTIONAL(nested_transition) KVM_X86_OP_OPTIONAL(vcpu_blocking) KVM_X86_OP_OPTIONAL(vcpu_unblocking) KVM_X86_OP_OPTIONAL(pi_update_irte) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index ff07c45e3c73..8dbc5c731859 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1901,6 +1901,8 @@ struct kvm_x86_ops { void (*update_cpu_dirty_logging)(struct kvm_vcpu *vcpu); + void (*nested_transition)(struct kvm_vcpu *vcpu); + const struct kvm_x86_nested_ops *nested_ops; void (*vcpu_blocking)(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h index 8ddb01191d6f..14e2cbab8312 100644 --- a/arch/x86/kvm/kvm_cache_regs.h +++ b/arch/x86/kvm/kvm_cache_regs.h @@ -227,6 +227,7 @@ static inline void enter_guest_mode(struct kvm_vcpu *vcpu) { vcpu->arch.hflags |= HF_GUEST_MASK; vcpu->stat.guest_mode = 1; + kvm_x86_call(nested_transition)(vcpu); } static inline void leave_guest_mode(struct kvm_vcpu *vcpu) @@ -239,6 +240,7 @@ static inline void leave_guest_mode(struct kvm_vcpu *vcpu) } vcpu->stat.guest_mode = 0; + kvm_x86_call(nested_transition)(vcpu); } static inline bool is_guest_mode(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c index 8d451110a94d..e2a849fc7daa 100644 --- a/arch/x86/kvm/svm/pmu.c +++ b/arch/x86/kvm/svm/pmu.c @@ -171,6 +171,18 @@ static void amd_pmu_set_eventsel_hw(struct kvm_pmc *pmc) pmc->eventsel_hw &= ~ARCH_PERFMON_EVENTSEL_ENABLE; } +void amd_pmu_refresh_host_guest_eventsel_hw(struct kvm_vcpu *vcpu) +{ + struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); + int i; + + if (pmu->reserved_bits & AMD64_EVENTSEL_HOST_GUEST_MASK) + return; + + for (i = 0; i < pmu->nr_arch_gp_counters; i++) + amd_pmu_set_eventsel_hw(&pmu->gp_counters[i]); +} + static int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) { struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 5f0136dbdde6..5753388542cf 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -244,6 +244,8 @@ int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) if (svm_gp_erratum_intercept && !sev_guest(vcpu->kvm)) set_exception_intercept(svm, GP_VECTOR); } + + amd_pmu_refresh_host_guest_eventsel_hw(vcpu); } svm->vmcb->save.efer = efer | EFER_SVME; @@ -5222,6 +5224,7 @@ struct kvm_x86_ops svm_x86_ops __initdata = { .check_intercept = svm_check_intercept, .handle_exit_irqoff = svm_handle_exit_irqoff, + .nested_transition = amd_pmu_refresh_host_guest_eventsel_hw, .nested_ops = &svm_nested_ops, diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index ebd7b36b1ceb..c31ef7c46d58 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -864,6 +864,11 @@ void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector); void sev_es_prepare_switch_to_guest(struct vcpu_svm *svm, struct sev_es_save_area *hostsa); void sev_es_unmap_ghcb(struct vcpu_svm *svm); + +/* pmu.c */ +void amd_pmu_refresh_host_guest_eventsel_hw(struct kvm_vcpu *vcpu); + + #ifdef CONFIG_KVM_AMD_SEV int sev_mem_enc_ioctl(struct kvm *kvm, void __user *argp); int sev_mem_enc_register_region(struct kvm *kvm, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index db3f393192d9..01ccbaa5b2e6 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -150,6 +150,7 @@ struct kvm_x86_ops kvm_x86_ops __read_mostly; #include EXPORT_STATIC_CALL_GPL(kvm_x86_get_cs_db_l_bits); EXPORT_STATIC_CALL_GPL(kvm_x86_cache_reg); +EXPORT_STATIC_CALL_GPL(kvm_x86_nested_transition); static bool __read_mostly ignore_msrs = 0; module_param(ignore_msrs, bool, 0644); -- 2.53.0.rc2.204.g2597b5adb4-goog If the vCPU advertises SVM and uses the mediated PMU, allow the guest to set the Host-Only and Guest-Only bits in the event selector MSRs. Signed-off-by: Jim Mattson --- arch/x86/kvm/svm/pmu.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c index e2a849fc7daa..7de7d8d00427 100644 --- a/arch/x86/kvm/svm/pmu.c +++ b/arch/x86/kvm/svm/pmu.c @@ -243,6 +243,9 @@ static void amd_pmu_refresh(struct kvm_vcpu *vcpu) pmu->counter_bitmask[KVM_PMC_GP] = BIT_ULL(48) - 1; pmu->reserved_bits = 0xfffffff000280000ull; + if (guest_cpu_cap_has(vcpu, X86_FEATURE_SVM) && + kvm_vcpu_has_mediated_pmu(vcpu)) + pmu->reserved_bits &= ~AMD64_EVENTSEL_HOST_GUEST_MASK; pmu->raw_event_mask = AMD64_RAW_EVENT_MASK; /* not applicable to AMD; but clean them to prevent any fall out */ pmu->counter_bitmask[KVM_PMC_FIXED] = 0; -- 2.53.0.rc2.204.g2597b5adb4-goog Add a selftest to verify KVM correctly virtualizes the AMD PMU Host-Only (bit 41) and Guest-Only (bit 40) event selector bits across all relevant SVM state transitions. The test programs 4 PMCs simultaneously with all combinations of the Host-Only and Guest-Only bits, then verifies correct counting behavior: 1. SVME=0: all counters count (Host-Only/Guest-Only bits ignored) 2. Set SVME=1: Host-Only and neither/both count; Guest-Only stops 3. VMRUN to L2: Guest-Only and neither/both count; Host-Only stops 4. VMEXIT to L1: Host-Only and neither/both count; Guest-Only stops 5. Clear SVME=0: all counters count (bits ignored again) Signed-off-by: Jim Mattson --- tools/testing/selftests/kvm/Makefile.kvm | 1 + tools/testing/selftests/kvm/include/x86/pmu.h | 6 + .../selftests/kvm/include/x86/processor.h | 2 + .../kvm/x86/svm_pmu_host_guest_test.c | 199 ++++++++++++++++++ 4 files changed, 208 insertions(+) create mode 100644 tools/testing/selftests/kvm/x86/svm_pmu_host_guest_test.c diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm index 58eee0474db6..f20ddd58ee81 100644 --- a/tools/testing/selftests/kvm/Makefile.kvm +++ b/tools/testing/selftests/kvm/Makefile.kvm @@ -112,6 +112,7 @@ TEST_GEN_PROGS_x86 += x86/svm_vmcall_test TEST_GEN_PROGS_x86 += x86/svm_int_ctl_test TEST_GEN_PROGS_x86 += x86/svm_nested_shutdown_test TEST_GEN_PROGS_x86 += x86/svm_nested_soft_inject_test +TEST_GEN_PROGS_x86 += x86/svm_pmu_host_guest_test TEST_GEN_PROGS_x86 += x86/tsc_scaling_sync TEST_GEN_PROGS_x86 += x86/sync_regs_test TEST_GEN_PROGS_x86 += x86/ucna_injection_test diff --git a/tools/testing/selftests/kvm/include/x86/pmu.h b/tools/testing/selftests/kvm/include/x86/pmu.h index 72575eadb63a..af9b279c78df 100644 --- a/tools/testing/selftests/kvm/include/x86/pmu.h +++ b/tools/testing/selftests/kvm/include/x86/pmu.h @@ -38,6 +38,12 @@ #define ARCH_PERFMON_EVENTSEL_INV BIT_ULL(23) #define ARCH_PERFMON_EVENTSEL_CMASK GENMASK_ULL(31, 24) +/* + * These are AMD-specific bits. + */ +#define AMD64_EVENTSEL_GUESTONLY BIT_ULL(40) +#define AMD64_EVENTSEL_HOSTONLY BIT_ULL(41) + /* RDPMC control flags, Intel only. */ #define INTEL_RDPMC_METRICS BIT_ULL(29) #define INTEL_RDPMC_FIXED BIT_ULL(30) diff --git a/tools/testing/selftests/kvm/include/x86/processor.h b/tools/testing/selftests/kvm/include/x86/processor.h index 4ebae4269e68..10ee2d4db1e3 100644 --- a/tools/testing/selftests/kvm/include/x86/processor.h +++ b/tools/testing/selftests/kvm/include/x86/processor.h @@ -19,6 +19,8 @@ #include "kvm_util.h" #include "ucall_common.h" +#define __stack_aligned__ __aligned(16) + extern bool host_cpu_is_intel; extern bool host_cpu_is_amd; extern uint64_t guest_tsc_khz; diff --git a/tools/testing/selftests/kvm/x86/svm_pmu_host_guest_test.c b/tools/testing/selftests/kvm/x86/svm_pmu_host_guest_test.c new file mode 100644 index 000000000000..a08c03a40d4f --- /dev/null +++ b/tools/testing/selftests/kvm/x86/svm_pmu_host_guest_test.c @@ -0,0 +1,199 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * KVM nested SVM PMU Host-Only/Guest-Only test + * + * Copyright (C) 2026, Google LLC. + * + * Test that KVM correctly virtualizes the AMD PMU Host-Only (bit 41) and + * Guest-Only (bit 40) event selector bits across all SVM state + * transitions. + * + * Programs 4 PMCs simultaneously with all combinations of Host-Only and + * Guest-Only bits, then verifies correct counting behavior through: + * 1. SVME=0: all counters count (Host-Only/Guest-Only bits ignored) + * 2. Set SVME=1: Host-Only and neither/both count; Guest-Only stops + * 3. VMRUN to L2: Guest-Only and neither/both count; Host-Only stops + * 4. VMEXIT to L1: Host-Only and neither/both count; Guest-Only stops + * 5. Clear SVME=0: all counters count (bits ignored again) + */ +#include +#include +#include +#include + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" +#include "svm_util.h" +#include "pmu.h" + +#define L2_GUEST_STACK_SIZE 255 + +#define EVENTSEL_RETIRED_INSNS (ARCH_PERFMON_EVENTSEL_OS | \ + ARCH_PERFMON_EVENTSEL_USR | \ + ARCH_PERFMON_EVENTSEL_ENABLE | \ + AMD_ZEN_INSTRUCTIONS_RETIRED) + +/* PMC configurations: index corresponds to Host-Only | Guest-Only bits */ +#define PMC_NEITHER 0 /* Neither bit set */ +#define PMC_GUESTONLY 1 /* Guest-Only bit set */ +#define PMC_HOSTONLY 2 /* Host-Only bit set */ +#define PMC_BOTH 3 /* Both bits set */ +#define NR_PMCS 4 + +/* Bitmasks for which PMCs should be counting in each state */ +#define COUNTS_ALL (BIT(PMC_NEITHER) | BIT(PMC_GUESTONLY) | \ + BIT(PMC_HOSTONLY) | BIT(PMC_BOTH)) +#define COUNTS_L1 (BIT(PMC_NEITHER) | BIT(PMC_HOSTONLY) | BIT(PMC_BOTH)) +#define COUNTS_L2 (BIT(PMC_NEITHER) | BIT(PMC_GUESTONLY) | BIT(PMC_BOTH)) + +#define LOOP_INSNS 1000 + +static __always_inline void run_instruction_loop(void) +{ + unsigned int i; + + for (i = 0; i < LOOP_INSNS; i++) + __asm__ __volatile__("nop"); +} + +static __always_inline void read_counters(uint64_t *counts) +{ + int i; + + for (i = 0; i < NR_PMCS; i++) + counts[i] = rdmsr(MSR_F15H_PERF_CTR + 2 * i); +} + +static __always_inline void run_and_measure(uint64_t *deltas) +{ + uint64_t before[NR_PMCS], after[NR_PMCS]; + int i; + + read_counters(before); + run_instruction_loop(); + read_counters(after); + + for (i = 0; i < NR_PMCS; i++) + deltas[i] = after[i] - before[i]; +} + +static void assert_pmc_counts(uint64_t *deltas, unsigned int expected_counting) +{ + int i; + + for (i = 0; i < NR_PMCS; i++) { + if (expected_counting & BIT(i)) + GUEST_ASSERT_NE(deltas[i], 0); + else + GUEST_ASSERT_EQ(deltas[i], 0); + } +} + +struct test_data { + uint64_t l2_deltas[NR_PMCS]; + bool l2_done; +}; + +static struct test_data *test_data; + +static void l2_guest_code(void) +{ + run_and_measure(test_data->l2_deltas); + test_data->l2_done = true; + vmmcall(); +} + +static void l1_guest_code(struct svm_test_data *svm, struct test_data *data) +{ + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE] __stack_aligned__; + struct vmcb *vmcb = svm->vmcb; + uint64_t deltas[NR_PMCS]; + uint64_t eventsel; + int i; + + test_data = data; + + /* Program 4 PMCs with all combinations of Host-Only/Guest-Only bits */ + for (i = 0; i < NR_PMCS; i++) { + eventsel = EVENTSEL_RETIRED_INSNS; + if (i & PMC_GUESTONLY) + eventsel |= AMD64_EVENTSEL_GUESTONLY; + if (i & PMC_HOSTONLY) + eventsel |= AMD64_EVENTSEL_HOSTONLY; + wrmsr(MSR_F15H_PERF_CTL + 2 * i, eventsel); + wrmsr(MSR_F15H_PERF_CTR + 2 * i, 0); + } + + /* Step 1: SVME=0 - Host-Only/Guest-Only bits ignored; all count */ + wrmsr(MSR_EFER, rdmsr(MSR_EFER) & ~EFER_SVME); + run_and_measure(deltas); + assert_pmc_counts(deltas, COUNTS_ALL); + + /* Step 2: Set SVME=1 - In L1 "host mode"; Guest-Only stops */ + wrmsr(MSR_EFER, rdmsr(MSR_EFER) | EFER_SVME); + run_and_measure(deltas); + assert_pmc_counts(deltas, COUNTS_L1); + + /* Step 3: VMRUN to L2 - In "guest mode"; Host-Only stops */ + generic_svm_setup(svm, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + vmcb->control.intercept &= ~(1ULL << INTERCEPT_MSR_PROT); + + run_guest(vmcb, svm->vmcb_gpa); + + GUEST_ASSERT_EQ(vmcb->control.exit_code, SVM_EXIT_VMMCALL); + GUEST_ASSERT(data->l2_done); + assert_pmc_counts(data->l2_deltas, COUNTS_L2); + + /* Step 4: After VMEXIT to L1 - Back in "host mode"; Guest-Only stops */ + run_and_measure(deltas); + assert_pmc_counts(deltas, COUNTS_L1); + + /* Step 5: Clear SVME - Host-Only/Guest-Only bits ignored; all count */ + wrmsr(MSR_EFER, rdmsr(MSR_EFER) & ~EFER_SVME); + run_and_measure(deltas); + assert_pmc_counts(deltas, COUNTS_ALL); + + GUEST_DONE(); +} + +int main(int argc, char *argv[]) +{ + vm_vaddr_t svm_gva, data_gva; + struct test_data *data_hva; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + struct ucall uc; + + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM)); + TEST_REQUIRE(kvm_is_pmu_enabled()); + TEST_REQUIRE(get_kvm_amd_param_bool("enable_mediated_pmu")); + TEST_REQUIRE(host_cpu_is_amd && kvm_cpu_family() >= 0x17); + + vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); + + vcpu_alloc_svm(vm, &svm_gva); + + data_gva = vm_vaddr_alloc_page(vm); + data_hva = addr_gva2hva(vm, data_gva); + memset(data_hva, 0, sizeof(*data_hva)); + + vcpu_args_set(vcpu, 2, svm_gva, data_gva); + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + case UCALL_DONE: + break; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + + kvm_vm_free(vm); + return 0; +} -- 2.53.0.rc2.204.g2597b5adb4-goog