Modern AMD processors expose four additional extended LVT registers in the extended APIC register space, which can be used for additional interrupt sources such as instruction-based sampling and others. To support this, introduce two new vCPU-based IOCTLs: KVM_GET_EXT_LAPIC and KVM_SET_EXT_LAPIC. These IOCTLs works similarly to KVM_GET_LAPIC and KVM_SET_LAPIC, but operate on APIC page with extended APIC register space located at APIC offsets 400h-530h. These IOCTLs are intended for use when extended APIC support is enabled in the guest. They allow saving and restoring the full APIC page, including the extended registers. To support this, the `struct kvm_ext_lapic_state` has been made extensible rather than hardcoding its size, improving forward compatibility. Documentation for the new IOCTLs has also been added. For more details on the extended APIC space, refer to AMD Programmer’s Manual Volume 2, Section 16.4.5: Extended Interrupts. https://bugzilla.kernel.org/attachment.cgi?id=306250 Signed-off-by: Manali Shukla --- Documentation/virt/kvm/api.rst | 23 ++++++++++++++++++++ arch/x86/include/uapi/asm/kvm.h | 5 +++++ arch/x86/kvm/lapic.c | 12 ++++++----- arch/x86/kvm/lapic.h | 6 ++++-- arch/x86/kvm/x86.c | 37 ++++++++++++++++++++++++--------- include/uapi/linux/kvm.h | 10 +++++++++ 6 files changed, 76 insertions(+), 17 deletions(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 6aa40ee05a4a..0653718a4f04 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -2048,6 +2048,18 @@ error. Reads the Local APIC registers and copies them into the input argument. The data format and layout are the same as documented in the architecture manual. +:: + + #define KVM_APIC_EXT_REG_SIZE 0x540 + struct kvm_ext_lapic_state { + __DECLARE_FLEX_ARRAY(__u8, regs); + }; + +Applications should use KVM_GET_EXT_LAPIC ioctl if extended APIC is +enabled. KVM_GET_EXT_LAPIC reads Local APIC registers with extended +APIC register space located at offsets 400h-530h and copies them into input +argument. + If KVM_X2APIC_API_USE_32BIT_IDS feature of KVM_CAP_X2APIC_API is enabled, then the format of APIC_ID register depends on the APIC mode (reported by MSR_IA32_APICBASE) of its VCPU. x2APIC stores APIC ID in @@ -2079,6 +2091,17 @@ always uses xAPIC format. Copies the input argument into the Local APIC registers. The data format and layout are the same as documented in the architecture manual. +:: + + #define KVM_APIC_EXT_REG_SIZE 0x540 + struct kvm_ext_lapic_state { + __DECLARE_FLEX_ARRAY(__u8, regs); + }; + +Applications should use KVM_SET_EXT_LAPIC ioctl if extended APIC is enabled. +KVM_SET_EXT_LAPIC copies input arguments with extended APIC register into +Local APIC and extended APIC registers. + The format of the APIC ID register (bytes 32-35 of struct kvm_lapic_state's regs field) depends on the state of the KVM_CAP_X2APIC_API capability. See the note in KVM_GET_LAPIC. diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index 0f15d683817d..d26e1e1bf856 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -124,6 +124,11 @@ struct kvm_lapic_state { char regs[KVM_APIC_REG_SIZE]; }; +#define KVM_APIC_EXT_REG_SIZE 0x540 +struct kvm_ext_lapic_state { + __DECLARE_FLEX_ARRAY(__u8, regs); +}; + struct kvm_segment { __u64 base; __u32 limit; diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index f92e3f53ee75..8bf7e0d33da9 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -3058,7 +3058,7 @@ void kvm_apic_ack_interrupt(struct kvm_vcpu *vcpu, int vector) EXPORT_SYMBOL_GPL(kvm_apic_ack_interrupt); static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu, - struct kvm_lapic_state *s, bool set) + struct kvm_ext_lapic_state *s, bool set) { if (apic_x2apic_mode(vcpu->arch.apic)) { u32 x2apic_id = kvm_x2apic_id(vcpu->arch.apic); @@ -3109,9 +3109,10 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu, return 0; } -int kvm_apic_get_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) +int kvm_apic_get_state(struct kvm_vcpu *vcpu, struct kvm_ext_lapic_state *s, + unsigned int size) { - memcpy(s->regs, vcpu->arch.apic->regs, sizeof(*s)); + memcpy(s->regs, vcpu->arch.apic->regs, size); /* * Get calculated timer current count for remaining timer period (if @@ -3122,7 +3123,8 @@ int kvm_apic_get_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) return kvm_apic_state_fixup(vcpu, s, false); } -int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) +int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_ext_lapic_state *s, + unsigned int size) { struct kvm_lapic *apic = vcpu->arch.apic; int r; @@ -3137,7 +3139,7 @@ int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) kvm_recalculate_apic_map(vcpu->kvm); return r; } - memcpy(vcpu->arch.apic->regs, s->regs, sizeof(*s)); + memcpy(vcpu->arch.apic->regs, s->regs, size); atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY); kvm_recalculate_apic_map(vcpu->kvm); diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index a07f8524d04a..b411de5f33a3 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -122,9 +122,11 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, void kvm_apic_send_ipi(struct kvm_lapic *apic, u32 icr_low, u32 icr_high); int kvm_apic_set_base(struct kvm_vcpu *vcpu, u64 value, bool host_initiated); -int kvm_apic_get_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s); -int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s); void kvm_apic_update_hwapic_isr(struct kvm_vcpu *vcpu); +int kvm_apic_get_state(struct kvm_vcpu *vcpu, struct kvm_ext_lapic_state *s, + unsigned int size); +int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_ext_lapic_state *s, + unsigned int size); int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu); u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e612a34779d7..b249e4c74063 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5131,25 +5131,25 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) } static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, - struct kvm_lapic_state *s) + struct kvm_ext_lapic_state *s, unsigned int size) { if (vcpu->arch.apic->guest_apic_protected) return -EINVAL; kvm_x86_call(sync_pir_to_irr)(vcpu); - return kvm_apic_get_state(vcpu, s); + return kvm_apic_get_state(vcpu, s, size); } static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu, - struct kvm_lapic_state *s) + struct kvm_ext_lapic_state *s, unsigned int size) { int r; if (vcpu->arch.apic->guest_apic_protected) return -EINVAL; - r = kvm_apic_set_state(vcpu, s); + r = kvm_apic_set_state(vcpu, s, size); if (r) return r; update_cr8_intercept(vcpu); @@ -5872,10 +5872,11 @@ long kvm_arch_vcpu_ioctl(struct file *filp, { struct kvm_vcpu *vcpu = filp->private_data; void __user *argp = (void __user *)arg; + unsigned long size; int r; union { struct kvm_sregs2 *sregs2; - struct kvm_lapic_state *lapic; + struct kvm_ext_lapic_state *lapic; struct kvm_xsave *xsave; struct kvm_xcrs *xcrs; void *buffer; @@ -5885,35 +5886,51 @@ long kvm_arch_vcpu_ioctl(struct file *filp, u.buffer = NULL; switch (ioctl) { + case KVM_GET_EXT_LAPIC: case KVM_GET_LAPIC: { r = -EINVAL; if (!lapic_in_kernel(vcpu)) goto out; - u.lapic = kzalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL); + + if (ioctl == KVM_GET_EXT_LAPIC) + size = struct_size(u.lapic, regs, KVM_APIC_EXT_REG_SIZE); + else + size = sizeof(struct kvm_lapic_state); + + u.lapic = kzalloc(size, GFP_KERNEL); r = -ENOMEM; if (!u.lapic) goto out; - r = kvm_vcpu_ioctl_get_lapic(vcpu, u.lapic); + r = kvm_vcpu_ioctl_get_lapic(vcpu, u.lapic, size); if (r) goto out; + r = -EFAULT; - if (copy_to_user(argp, u.lapic, sizeof(struct kvm_lapic_state))) + if (copy_to_user(argp, u.lapic, size)) goto out; + r = 0; break; } + case KVM_SET_EXT_LAPIC: case KVM_SET_LAPIC: { r = -EINVAL; if (!lapic_in_kernel(vcpu)) goto out; - u.lapic = memdup_user(argp, sizeof(*u.lapic)); + + if (ioctl == KVM_SET_EXT_LAPIC) + size = struct_size(u.lapic, regs, KVM_APIC_EXT_REG_SIZE); + else + size = sizeof(struct kvm_lapic_state); + u.lapic = memdup_user(argp, size); + if (IS_ERR(u.lapic)) { r = PTR_ERR(u.lapic); goto out_nofree; } - r = kvm_vcpu_ioctl_set_lapic(vcpu, u.lapic); + r = kvm_vcpu_ioctl_set_lapic(vcpu, u.lapic, size); break; } case KVM_INTERRUPT: { diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index f0f0d49d2544..e72e536e82bc 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1318,6 +1318,16 @@ struct kvm_vfio_spapr_tce { #define KVM_SET_FPU _IOW(KVMIO, 0x8d, struct kvm_fpu) #define KVM_GET_LAPIC _IOR(KVMIO, 0x8e, struct kvm_lapic_state) #define KVM_SET_LAPIC _IOW(KVMIO, 0x8f, struct kvm_lapic_state) +/* + * Added to save/restore local APIC registers with extended APIC (extapic) + * register space. + * + * Qemu emulates extapic logic only when KVM enables extapic functionality via + * KVM capability. In the condition where Qemu sets extapic registers, but KVM doesn't + * set extapic capability, Qemu ends up using KVM_GET_LAPIC and KVM_SET_LAPIC. + */ +#define KVM_GET_EXT_LAPIC _IOR(KVMIO, 0x8e, struct kvm_ext_lapic_state) +#define KVM_SET_EXT_LAPIC _IOW(KVMIO, 0x8f, struct kvm_ext_lapic_state) #define KVM_SET_CPUID2 _IOW(KVMIO, 0x90, struct kvm_cpuid2) #define KVM_GET_CPUID2 _IOWR(KVMIO, 0x91, struct kvm_cpuid2) /* Available with KVM_CAP_VAPIC */ -- 2.43.0