From: David Woodhouse This mostly reverts commit a5b32718081e ("KVM: x86: Remove unnecessary caching of KVM's PV CPUID base"). Sure, caching state which might change has certain risks, but KVM already does cache the CPUID contents, and the whole point of calling kvm_apply_cpuid_pv_features_quirk() from kvm_vcpu_after_set_cpuid() is to cache the contents of that leaf too, so that guest_pv_has() can access them quickly. An upcoming commit is going to want to use vcpu->arch.kvm_cpuid from kvm_cpuid() at runtime too, so put it back. Signed-off-by: David Woodhouse --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/cpuid.c | 16 +++++++++++----- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f19a76d3ca0e..50febd333f5f 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -897,6 +897,7 @@ struct kvm_vcpu_arch { int cpuid_nent; struct kvm_cpuid_entry2 *cpuid_entries; + struct kvm_hypervisor_cpuid kvm_cpuid; bool cpuid_dynamic_bits_dirty; bool is_amd_compatible; diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index e2836a255b16..bcce3a75c3f2 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -178,7 +178,12 @@ static int kvm_cpuid_check_equal(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 /* * Apply runtime CPUID updates to the incoming CPUID entries to avoid - * false positives due mismatches on KVM-owned feature flags. + * false positives due mismatches on KVM-owned feature flags. Note, + * runtime CPUID updates may consume other CPUID-driven vCPU state, + * e.g. KVM or Xen CPUID bases. Updating runtime state before full + * CPUID processing is functionally correct only because any change in + * CPUID is disallowed, i.e. using stale data is ok because the below + * checks will reject the change. * * Note! @e2 and @nent track the _old_ CPUID entries! */ @@ -231,14 +236,14 @@ static struct kvm_hypervisor_cpuid kvm_get_hypervisor_cpuid(struct kvm_vcpu *vcp static u32 kvm_apply_cpuid_pv_features_quirk(struct kvm_vcpu *vcpu) { - struct kvm_hypervisor_cpuid kvm_cpuid; struct kvm_cpuid_entry2 *best; + u32 features_leaf = vcpu->arch.kvm_cpuid.base | KVM_CPUID_FEATURES; - kvm_cpuid = kvm_get_hypervisor_cpuid(vcpu, KVM_SIGNATURE); - if (!kvm_cpuid.base) + if (!vcpu->arch.kvm_cpuid.base || + vcpu->arch.kvm_cpuid.limit < features_leaf) return 0; - best = kvm_find_cpuid_entry(vcpu, kvm_cpuid.base | KVM_CPUID_FEATURES); + best = kvm_find_cpuid_entry(vcpu, features_leaf); if (!best) return 0; @@ -541,6 +546,7 @@ static int kvm_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2, if (r) goto err; + vcpu->arch.kvm_cpuid = kvm_get_hypervisor_cpuid(vcpu, KVM_SIGNATURE); #ifdef CONFIG_KVM_XEN vcpu->arch.xen.cpuid = kvm_get_hypervisor_cpuid(vcpu, XEN_SIGNATURE); #endif -- 2.49.0 From: David Woodhouse In https://lkml.org/lkml/2008/10/1/246 a proposal was made for generic CPUID leaves, of which only 0x40000010 was defined, to contain the TSC and local APIC frequencies. The proposal from VMware was mostly shot down in flames, *but* XNU does unconditionally assume that this leaf contains the frequency information, if it's present on any hypervisor: https://github.com/apple/darwin-xnu/blob/main/osfmk/i386/cpuid.c So does FreeBSD: https://github.com/freebsd/freebsd-src/commit/4a432614f68 So at this point it would be daft for a hypervisor to expose 0x40000010 for any *other* content. KVM might as well adopt it, and fill in the accurate TSC frequency just as it does for the Xen TSC leaf. Signed-off-by: David Woodhouse --- arch/x86/include/uapi/asm/kvm_para.h | 11 +++++++++++ arch/x86/kvm/cpuid.c | 7 +++++++ 2 files changed, 18 insertions(+) diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h index a1efa7907a0b..1597c4a2a24a 100644 --- a/arch/x86/include/uapi/asm/kvm_para.h +++ b/arch/x86/include/uapi/asm/kvm_para.h @@ -44,6 +44,17 @@ */ #define KVM_FEATURE_CLOCKSOURCE_STABLE_BIT 24 + +/* + * Proposed by VMware in https://lkml.org/lkml/2008/10/1/246 the timing + * information leaf provides the TSC and local APIC timer frequencies: + * + * # EAX: (Virtual) TSC frequency in kHz. + * # EBX: (Virtual) Bus (local apic timer) frequency in kHz. + * # ECX, EDX: RESERVED (reserved fields are set to zero). + */ +#define KVM_CPUID_TIMING_INFO 0x40000010 + #define MSR_KVM_WALL_CLOCK 0x11 #define MSR_KVM_SYSTEM_TIME 0x12 diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index bcce3a75c3f2..1bd69d9c86b7 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -2029,6 +2029,13 @@ bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, } else if (index == 2) { *eax = vcpu->arch.hw_tsc_khz; } + } else if (vcpu->arch.kvm_cpuid.base && + function <= vcpu->arch.kvm_cpuid.limit && + function == (vcpu->arch.kvm_cpuid.base | KVM_CPUID_TIMING_INFO)) { + if (kvm_check_request(KVM_REQ_CLOCK_UPDATE, vcpu)) + kvm_guest_time_update(vcpu); + + *eax = vcpu->arch.hw_tsc_khz; } } else { *eax = *ebx = *ecx = *edx = 0; -- 2.49.0 From: David Woodhouse In https://lkml.org/lkml/2008/10/1/246 a proposal was made for generic CPUID conventions across hypervisors. It was mostly shot down in flames, but the leaf at 0x40000010 containing timing information didn't die. It's used by XNU and FreeBSD guests under all hypervisors¹² to determine the TSC frequency, and also exposed by the EC2 Nitro hypervisor (as well as, presumably, VMware). FreeBSD's Bhyve is probably just about to start exposing it too. Use it under KVM to obtain the TSC frequency more accurately, instead of reverse-calculating the frequency from the mul/shift values in the KVM clock. Before: [ 0.000020] tsc: Detected 2900.014 MHz processor After: [ 0.000020] tsc: Detected 2900.015 MHz processor $ cpuid -1 -l 0x40000010 CPU: hypervisor generic timing information (0x40000010): TSC frequency (Hz) = 2900015 bus frequency (Hz) = 1000000 ¹ https://github.com/apple/darwin-xnu/blob/main/osfmk/i386/cpuid.c ² https://github.com/freebsd/freebsd-src/commit/4a432614f68 Signed-off-by: David Woodhouse --- arch/x86/include/asm/kvm_para.h | 1 + arch/x86/kernel/kvm.c | 10 ++++++++++ arch/x86/kernel/kvmclock.c | 7 ++++++- 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index 57bc74e112f2..d53927103cab 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -121,6 +121,7 @@ static inline long kvm_sev_hypercall3(unsigned int nr, unsigned long p1, void kvmclock_init(void); void kvmclock_disable(void); bool kvm_para_available(void); +unsigned int kvm_para_tsc_khz(void); unsigned int kvm_arch_para_features(void); unsigned int kvm_arch_para_hints(void); void kvm_async_pf_task_wait_schedule(u32 token); diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 8ae750cde0c6..1a80f4e5c854 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -896,6 +896,16 @@ bool kvm_para_available(void) } EXPORT_SYMBOL_GPL(kvm_para_available); +unsigned int kvm_para_tsc_khz() +{ + u32 base = kvm_cpuid_base(); + + if (cpuid_eax(base) >= (base | KVM_CPUID_TIMING_INFO)) + return cpuid_eax(base | KVM_CPUID_TIMING_INFO); + + return 0; +} + unsigned int kvm_arch_para_features(void) { return cpuid_eax(kvm_cpuid_base() | KVM_CPUID_FEATURES); diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index ca0a49eeac4a..0908450ebac9 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -117,7 +117,12 @@ static inline void kvm_sched_clock_init(bool stable) static unsigned long kvm_get_tsc_khz(void) { setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ); - return pvclock_tsc_khz(this_cpu_pvti()); + + /* + * If KVM advertises the frequency directly in CPUID, use that + * instead of reverse-calculating it from the KVM clock data. + */ + return kvm_para_tsc_khz() ? : pvclock_tsc_khz(this_cpu_pvti()); } static void __init kvm_get_preset_lpj(void) -- 2.49.0