Extract the suppress EOI broadcast (Directed EOI) logic into helper functions and move the check from kvm_ioapic_update_eoi_one() to kvm_ioapic_update_eoi() (required for a later patch). Prepare kvm_ioapic_send_eoi() to honor Suppress EOI Broadcast in split IRQCHIP mode. Introduce two helper functions: - kvm_lapic_advertise_suppress_eoi_broadcast(): determines whether KVM should advertise Suppress EOI Broadcast support to the guest - kvm_lapic_respect_suppress_eoi_broadcast(): determines whether KVM should honor the guest's request to suppress EOI broadcasts This refactoring prepares for I/O APIC version 0x20 support and userspace control of suppress EOI broadcast behavior. Signed-off-by: Khushit Shah --- arch/x86/kvm/ioapic.c | 12 +++++++--- arch/x86/kvm/lapic.c | 53 ++++++++++++++++++++++++++++++++++++------- arch/x86/kvm/lapic.h | 3 +++ 3 files changed, 57 insertions(+), 11 deletions(-) diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index 2c2783296aed..6bf8d110aece 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -545,7 +545,6 @@ static void kvm_ioapic_update_eoi_one(struct kvm_vcpu *vcpu, int trigger_mode, int pin) { - struct kvm_lapic *apic = vcpu->arch.apic; union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[pin]; /* @@ -560,8 +559,7 @@ static void kvm_ioapic_update_eoi_one(struct kvm_vcpu *vcpu, kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, pin); spin_lock(&ioapic->lock); - if (trigger_mode != IOAPIC_LEVEL_TRIG || - kvm_lapic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) + if (trigger_mode != IOAPIC_LEVEL_TRIG) return; ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG); @@ -591,10 +589,16 @@ static void kvm_ioapic_update_eoi_one(struct kvm_vcpu *vcpu, void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector, int trigger_mode) { int i; + struct kvm_lapic *apic = vcpu->arch.apic; struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic; spin_lock(&ioapic->lock); rtc_irq_eoi(ioapic, vcpu, vector); + + if ((kvm_lapic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) && + kvm_lapic_respect_suppress_eoi_broadcast(ioapic->kvm)) + goto out; + for (i = 0; i < IOAPIC_NUM_PINS; i++) { union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i]; @@ -602,6 +606,8 @@ void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector, int trigger_mode) continue; kvm_ioapic_update_eoi_one(vcpu, ioapic, trigger_mode, i); } + +out: spin_unlock(&ioapic->lock); } diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 0ae7f913d782..2c24fd8d815f 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -105,6 +105,39 @@ bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector) apic_test_vector(vector, apic->regs + APIC_IRR); } +bool kvm_lapic_advertise_suppress_eoi_broadcast(struct kvm *kvm) +{ + /* + * The default in-kernel I/O APIC emulates the 82093AA and does not + * implement an EOI register. Some guests (e.g. Windows with the + * Hyper-V role enabled) disable LAPIC EOI broadcast without checking + * the I/O APIC version, which can cause level-triggered interrupts to + * never be EOI'd. + * + * To avoid this, KVM must not advertise Suppress EOI Broadcast support + * when using the default in-kernel I/O APIC. + * + * Historically, in split IRQCHIP mode, KVM always advertised Suppress + * EOI Broadcast support but did not actually suppress EOIs, resulting + * in quirky behavior. + */ + return !ioapic_in_kernel(kvm); +} + +bool kvm_lapic_respect_suppress_eoi_broadcast(struct kvm *kvm) +{ + /* + * Returns true if KVM should honor the guest's request to suppress EOI + * broadcasts, i.e. actually implement Suppress EOI Broadcast. + * + * Historically, in split IRQCHIP mode, KVM ignored the suppress EOI + * broadcast bit set by the guest and broadcasts EOIs to the userspace + * I/O APIC. For In-kernel I/O APIC, the support itself is not + * advertised, but if bit was set by the guest, it was respected. + */ + return ioapic_in_kernel(kvm); +} + __read_mostly DEFINE_STATIC_KEY_FALSE(kvm_has_noapic_vcpu); EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_has_noapic_vcpu); @@ -554,15 +587,9 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu) v = APIC_VERSION | ((apic->nr_lvt_entries - 1) << 16); - /* - * KVM emulates 82093AA datasheet (with in-kernel IOAPIC implementation) - * which doesn't have EOI register; Some buggy OSes (e.g. Windows with - * Hyper-V role) disable EOI broadcast in lapic not checking for IOAPIC - * version first and level-triggered interrupts never get EOIed in - * IOAPIC. - */ + if (guest_cpu_cap_has(vcpu, X86_FEATURE_X2APIC) && - !ioapic_in_kernel(vcpu->kvm)) + kvm_lapic_advertise_suppress_eoi_broadcast(vcpu->kvm)) v |= APIC_LVR_DIRECTED_EOI; kvm_lapic_set_reg(apic, APIC_LVR, v); } @@ -1517,6 +1544,16 @@ static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector) /* Request a KVM exit to inform the userspace IOAPIC. */ if (irqchip_split(apic->vcpu->kvm)) { + /* + * Don't exit to userspace if the guest has enabled Directed + * EOI, a.k.a. Suppress EOI Broadcasts, in which case the local + * APIC doesn't broadcast EOIs (the guest must EOI the target + * I/O APIC(s) directly). + */ + if ((kvm_lapic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) && + kvm_lapic_respect_suppress_eoi_broadcast(apic->vcpu->kvm)) + return; + apic->vcpu->arch.pending_ioapic_eoi = vector; kvm_make_request(KVM_REQ_IOAPIC_EOI_EXIT, apic->vcpu); return; diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 282b9b7da98c..fe2db0f1d190 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -231,6 +231,9 @@ static inline int kvm_lapic_latched_init(struct kvm_vcpu *vcpu) bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector); +bool kvm_lapic_advertise_suppress_eoi_broadcast(struct kvm *kvm); +bool kvm_lapic_respect_suppress_eoi_broadcast(struct kvm *kvm); + void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu); void kvm_bitmap_or_dest_vcpus(struct kvm *kvm, struct kvm_lapic_irq *irq, -- 2.39.3 From: David Woodhouse Introduce support for I/O APIC version 0x20, which includes the EOI Register (EOIR) for directed EOI. The EOI register allows guests to perform EOIs to individual I/O APICs instead of relying on broadcast EOIs from the local APIC. When Suppress EOI Broadcast (SEOIB) capability is advertised to the guest, guests that enable it will EOI individual I/O APICs by writing to their EOI register instead of relying on broadcast EOIs from the LAPIC. Hence, when SEOIB is advertised (so that guests can use it if they choose), use I/O APIC version 0x20 to provide the EOI register. This prepares for a userspace API that will allow explicit control of SEOIB support, providing a consistent interface for both in-kernel and split IRQCHIP mode. Add a tracepoint (kvm_ioapic_directed_eoi) to track directed EOIs for debugging and observability. Signed-off-by: David Woodhouse Signed-off-by: Khushit Shah --- arch/x86/kvm/ioapic.c | 31 +++++++++++++++++++++++++++++-- arch/x86/kvm/ioapic.h | 19 +++++++++++-------- arch/x86/kvm/trace.h | 17 +++++++++++++++++ 3 files changed, 57 insertions(+), 10 deletions(-) diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index 6bf8d110aece..eea1eb7845c4 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -48,8 +48,11 @@ static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic) switch (ioapic->ioregsel) { case IOAPIC_REG_VERSION: - result = ((((IOAPIC_NUM_PINS - 1) & 0xff) << 16) - | (IOAPIC_VERSION_ID & 0xff)); + if (kvm_lapic_advertise_suppress_eoi_broadcast(ioapic->kvm)) + result = IOAPIC_VERSION_ID_EOIR; + else + result = IOAPIC_VERSION_ID; + result |= ((IOAPIC_NUM_PINS - 1) & 0xff) << 16; break; case IOAPIC_REG_APIC_ID: @@ -57,6 +60,10 @@ static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic) result = ((ioapic->id & 0xf) << 24); break; + case IOAPIC_REG_BOOT_CONFIG: + result = 0x01; /* Processor bus */ + break; + default: { u32 redir_index = (ioapic->ioregsel - 0x10) >> 1; @@ -701,6 +708,26 @@ static int ioapic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this, ioapic_write_indirect(ioapic, data); break; + case IOAPIC_REG_EOIR: + /* + * The EOI register is supported (and version 0x20 advertised) + * when userspace explicitly enables suppress EOI broadcast. + */ + if (kvm_lapic_advertise_suppress_eoi_broadcast(vcpu->kvm)) { + u8 vector = data & 0xff; + int i; + + trace_kvm_ioapic_directed_eoi(vcpu, vector); + rtc_irq_eoi(ioapic, vcpu, vector); + for (i = 0; i < IOAPIC_NUM_PINS; i++) { + union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i]; + + if (ent->fields.vector != vector) + continue; + kvm_ioapic_update_eoi_one(vcpu, ioapic, ent->fields.trig_mode, i); + } + } + break; default: break; } diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h index bf28dbc11ff6..f219577f738c 100644 --- a/arch/x86/kvm/ioapic.h +++ b/arch/x86/kvm/ioapic.h @@ -11,7 +11,8 @@ struct kvm_vcpu; #define IOAPIC_NUM_PINS KVM_IOAPIC_NUM_PINS #define MAX_NR_RESERVED_IOAPIC_PINS KVM_MAX_IRQ_ROUTES -#define IOAPIC_VERSION_ID 0x11 /* IOAPIC version */ +#define IOAPIC_VERSION_ID 0x11 /* Default IOAPIC version */ +#define IOAPIC_VERSION_ID_EOIR 0x20 /* IOAPIC version with EOIR support */ #define IOAPIC_EDGE_TRIG 0 #define IOAPIC_LEVEL_TRIG 1 @@ -19,13 +20,15 @@ struct kvm_vcpu; #define IOAPIC_MEM_LENGTH 0x100 /* Direct registers. */ -#define IOAPIC_REG_SELECT 0x00 -#define IOAPIC_REG_WINDOW 0x10 - -/* Indirect registers. */ -#define IOAPIC_REG_APIC_ID 0x00 /* x86 IOAPIC only */ -#define IOAPIC_REG_VERSION 0x01 -#define IOAPIC_REG_ARB_ID 0x02 /* x86 IOAPIC only */ +#define IOAPIC_REG_SELECT 0x00 +#define IOAPIC_REG_WINDOW 0x10 +#define IOAPIC_REG_EOIR 0x40 /* version 0x20+ only */ + +/* INDIRECT registers. */ +#define IOAPIC_REG_APIC_ID 0x00 /* x86 IOAPIC only */ +#define IOAPIC_REG_VERSION 0x01 +#define IOAPIC_REG_ARB_ID 0x02 /* x86 IOAPIC only */ +#define IOAPIC_REG_BOOT_CONFIG 0x03 /* x86 IOAPIC only */ /*ioapic delivery mode*/ #define IOAPIC_FIXED 0x0 diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index e79bc9cb7162..6902758353a9 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -315,6 +315,23 @@ TRACE_EVENT(kvm_ioapic_delayed_eoi_inj, (__entry->e & (1<<15)) ? "level" : "edge", (__entry->e & (1<<16)) ? "|masked" : "") ); + +TRACE_EVENT(kvm_ioapic_directed_eoi, + TP_PROTO(struct kvm_vcpu *vcpu, u8 vector), + TP_ARGS(vcpu, vector), + + TP_STRUCT__entry( + __field( __u32, apicid ) + __field( __u8, vector ) + ), + + TP_fast_assign( + __entry->apicid = vcpu->vcpu_id; + __entry->vector = vector; + ), + + TP_printk("apicid %x vector %u", __entry->apicid, __entry->vector) +); #endif TRACE_EVENT(kvm_msi_set_irq, -- 2.39.3 Add two flags for KVM_CAP_X2APIC_API to allow userspace to control support for Suppress EOI Broadcasts, which KVM completely mishandles. When x2APIC support was first added, KVM incorrectly advertised and "enabled" Suppress EOI Broadcast, without fully supporting the I/O APIC side of the equation, i.e. without adding directed EOI to KVM's in-kernel I/O APIC. That flaw was carried over to split IRQCHIP support, i.e. KVM advertised support for Suppress EOI Broadcasts irrespective of whether or not the userspace I/O APIC implementation supported directed EOIs. Even worse, KVM didn't actually suppress EOI broadcasts, i.e. userspace VMMs without support for directed EOI came to rely on the "spurious" broadcasts. KVM "fixed" the in-kernel I/O APIC implementation by completely disabling support for Suppress EOI Broadcasts in commit 0bcc3fb95b97 ("KVM: lapic: stop advertising DIRECTED_EOI when in-kernel IOAPIC is in use"), but didn't do anything to remedy userspace I/O APIC implementations. KVM's bogus handling of Suppress EOI Broadcast is problematic when the guest relies on interrupts being masked in the I/O APIC until well after the initial local APIC EOI. E.g. Windows with Credential Guard enabled handles interrupts in the following order: 1. Interrupt for L2 arrives. 2. L1 APIC EOIs the interrupt. 3. L1 resumes L2 and injects the interrupt. 4. L2 EOIs after servicing. 5. L1 performs the I/O APIC EOI. Because KVM EOIs the I/O APIC at step #2, the guest can get an interrupt storm, e.g. if the IRQ line is still asserted and userspace reacts to the EOI by re-injecting the IRQ, because the guest doesn't de-assert the line until step #4, and doesn't expect the interrupt to be re-enabled until step #5. Unfortunately, simply "fixing" the bug isn't an option, as KVM has no way of knowing if the userspace I/O APIC supports directed EOIs, i.e. suppressing EOI broadcasts would result in interrupts being stuck masked in the userspace I/O APIC due to step #5 being ignored by userspace. And fully disabling support for Suppress EOI Broadcast is also undesirable, as picking up the fix would require a guest reboot, *and* more importantly would change the virtual CPU model exposed to the guest without any buy-in from userspace. Add KVM_X2APIC_ENABLE_SUPPRESS_EOI_BROADCAST and KVM_X2APIC_DISABLE_SUPPRESS_EOI_BROADCAST flags to allow userspace to explicitly enable or disable support for Suppress EOI Broadcasts. This gives userspace control over the virtual CPU model exposed to the guest, as KVM should never have enabled support for Suppress EOI Broadcast without userspace opt-in. Not setting either flag will result in legacy quirky behavior for backward compatibility. When KVM_X2APIC_ENABLE_SUPPRESS_EOI_BROADCAST is set and using in-kernel IRQCHIP mode, KVM will use I/O APIC version 0x20, which includes support for the EOI Register. Note, Suppress EOI Broadcasts is defined only in Intel's SDM, not in AMD's APM. But the bit is writable on some AMD CPUs, e.g. Turin, and KVM's ABI is to support Directed EOI (KVM's name) irrespective of guest CPU vendor. Fixes: 7543a635aa09 ("KVM: x86: Add KVM exit for IOAPIC EOIs") Closes: https://lore.kernel.org/kvm/7D497EF1-607D-4D37-98E7-DAF95F099342@nutanix.com Cc: stable@vger.kernel.org Suggested-by: David Woodhouse Co-developed-by: Sean Christopherson Signed-off-by: Sean Christopherson Signed-off-by: Khushit Shah --- Documentation/virt/kvm/api.rst | 28 +++++++++++++-- arch/x86/include/asm/kvm_host.h | 7 ++++ arch/x86/include/uapi/asm/kvm.h | 6 ++-- arch/x86/kvm/lapic.c | 64 ++++++++++++++++++++++----------- arch/x86/kvm/x86.c | 15 ++++++-- 5 files changed, 93 insertions(+), 27 deletions(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 57061fa29e6a..ad15ca519afc 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -7800,8 +7800,10 @@ Will return -EBUSY if a VCPU has already been created. Valid feature flags in args[0] are:: - #define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0) - #define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (1ULL << 1) + #define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0) + #define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (1ULL << 1) + #define KVM_X2APIC_ENABLE_SUPPRESS_EOI_BROADCAST (1ULL << 2) + #define KVM_X2APIC_DISABLE_SUPPRESS_EOI_BROADCAST (1ULL << 3) Enabling KVM_X2APIC_API_USE_32BIT_IDS changes the behavior of KVM_SET_GSI_ROUTING, KVM_SIGNAL_MSI, KVM_SET_LAPIC, and KVM_GET_LAPIC, @@ -7814,6 +7816,28 @@ as a broadcast even in x2APIC mode in order to support physical x2APIC without interrupt remapping. This is undesirable in logical mode, where 0xff represents CPUs 0-7 in cluster 0. +Setting KVM_X2APIC_ENABLE_SUPPRESS_EOI_BROADCAST instructs KVM to enable +Suppress EOI Broadcasts. KVM will advertise support for Suppress EOI +Broadcast to the guest and suppress LAPIC EOI broadcasts when the guest +sets the Suppress EOI Broadcast bit in the SPIV register. When using +in-kernel IRQCHIP mode, enabling this capability will cause KVM to use +I/O APIC version 0x20, which includes support for the EOI Register for +directed EOI. + +Setting KVM_X2APIC_DISABLE_SUPPRESS_EOI_BROADCAST disables support for +Suppress EOI Broadcasts entirely, i.e. instructs KVM to NOT advertise +support to the guest. + +Modern VMMs should either enable KVM_X2APIC_ENABLE_SUPPRESS_EOI_BROADCAST +or KVM_X2APIC_DISABLE_SUPPRESS_EOI_BROADCAST. If not, legacy quirky +behavior will be used by KVM: in split IRQCHIP mode, KVM will advertise +support for Suppress EOI Broadcasts but not actually suppress EOI +broadcasts; for in-kernel IRQCHIP mode, KVM will not advertise support for +Suppress EOI Broadcasts. + +Setting both KVM_X2APIC_ENABLE_SUPPRESS_EOI_BROADCAST and +KVM_X2APIC_DISABLE_SUPPRESS_EOI_BROADCAST will fail with an EINVAL error. + 7.8 KVM_CAP_S390_USER_INSTR0 ---------------------------- diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 48598d017d6f..4a6d94dc7a2a 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1229,6 +1229,12 @@ enum kvm_irqchip_mode { KVM_IRQCHIP_SPLIT, /* created with KVM_CAP_SPLIT_IRQCHIP */ }; +enum kvm_suppress_eoi_broadcast_mode { + KVM_SUPPRESS_EOI_BROADCAST_QUIRKED, /* Legacy behavior */ + KVM_SUPPRESS_EOI_BROADCAST_ENABLED, /* Enable Suppress EOI broadcast */ + KVM_SUPPRESS_EOI_BROADCAST_DISABLED /* Disable Suppress EOI broadcast */ +}; + struct kvm_x86_msr_filter { u8 count; bool default_allow:1; @@ -1480,6 +1486,7 @@ struct kvm_arch { bool x2apic_format; bool x2apic_broadcast_quirk_disabled; + enum kvm_suppress_eoi_broadcast_mode suppress_eoi_broadcast_mode; bool has_mapped_host_mmio; bool guest_can_read_msr_platform_info; diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index d420c9c066d4..d30241429fa8 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -913,8 +913,10 @@ struct kvm_sev_snp_launch_finish { __u64 pad1[4]; }; -#define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0) -#define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (1ULL << 1) +#define KVM_X2APIC_API_USE_32BIT_IDS (_BITULL(0)) +#define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (_BITULL(1)) +#define KVM_X2APIC_ENABLE_SUPPRESS_EOI_BROADCAST (_BITULL(2)) +#define KVM_X2APIC_DISABLE_SUPPRESS_EOI_BROADCAST (_BITULL(3)) struct kvm_hyperv_eventfd { __u32 conn_id; diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 2c24fd8d815f..36a5af218802 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -107,21 +107,31 @@ bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector) bool kvm_lapic_advertise_suppress_eoi_broadcast(struct kvm *kvm) { - /* - * The default in-kernel I/O APIC emulates the 82093AA and does not - * implement an EOI register. Some guests (e.g. Windows with the - * Hyper-V role enabled) disable LAPIC EOI broadcast without checking - * the I/O APIC version, which can cause level-triggered interrupts to - * never be EOI'd. - * - * To avoid this, KVM must not advertise Suppress EOI Broadcast support - * when using the default in-kernel I/O APIC. - * - * Historically, in split IRQCHIP mode, KVM always advertised Suppress - * EOI Broadcast support but did not actually suppress EOIs, resulting - * in quirky behavior. - */ - return !ioapic_in_kernel(kvm); + switch (kvm->arch.suppress_eoi_broadcast_mode) { + case KVM_SUPPRESS_EOI_BROADCAST_ENABLED: + return true; + case KVM_SUPPRESS_EOI_BROADCAST_DISABLED: + return false; + case KVM_SUPPRESS_EOI_BROADCAST_QUIRKED: + /* + * The default in-kernel I/O APIC emulates the 82093AA and does not + * implement an EOI register. Some guests (e.g. Windows with the + * Hyper-V role enabled) disable LAPIC EOI broadcast without + * checking the I/O APIC version, which can cause level-triggered + * interrupts to never be EOI'd. + * + * To avoid this, KVM must not advertise Suppress EOI Broadcast + * support when using the default in-kernel I/O APIC. + * + * Historically, in split IRQCHIP mode, KVM always advertised + * Suppress EOI Broadcast support but did not actually suppress + * EOIs, resulting in quirky behavior. + */ + return !ioapic_in_kernel(kvm); + default: + WARN_ON_ONCE(1); + return false; + } } bool kvm_lapic_respect_suppress_eoi_broadcast(struct kvm *kvm) @@ -129,13 +139,25 @@ bool kvm_lapic_respect_suppress_eoi_broadcast(struct kvm *kvm) /* * Returns true if KVM should honor the guest's request to suppress EOI * broadcasts, i.e. actually implement Suppress EOI Broadcast. - * - * Historically, in split IRQCHIP mode, KVM ignored the suppress EOI - * broadcast bit set by the guest and broadcasts EOIs to the userspace - * I/O APIC. For In-kernel I/O APIC, the support itself is not - * advertised, but if bit was set by the guest, it was respected. */ - return ioapic_in_kernel(kvm); + switch (kvm->arch.suppress_eoi_broadcast_mode) { + case KVM_SUPPRESS_EOI_BROADCAST_ENABLED: + return true; + case KVM_SUPPRESS_EOI_BROADCAST_DISABLED: + return false; + case KVM_SUPPRESS_EOI_BROADCAST_QUIRKED: + /* + * Historically, in split IRQCHIP mode, KVM ignored the suppress + * EOI broadcast bit set by the guest and broadcasts EOIs to the + * userspace I/O APIC. For In-kernel I/O APIC, the support itself + * is not advertised, but if bit was set by the guest, it was + * respected. + */ + return ioapic_in_kernel(kvm); + default: + WARN_ON_ONCE(1); + return false; + } } __read_mostly DEFINE_STATIC_KEY_FALSE(kvm_has_noapic_vcpu); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c9c2aa6f4705..5d56b0384dcc 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -121,8 +121,10 @@ static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE); #define KVM_CAP_PMU_VALID_MASK KVM_PMU_CAP_DISABLE -#define KVM_X2APIC_API_VALID_FLAGS (KVM_X2APIC_API_USE_32BIT_IDS | \ - KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK) +#define KVM_X2APIC_API_VALID_FLAGS (KVM_X2APIC_API_USE_32BIT_IDS | \ + KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK | \ + KVM_X2APIC_ENABLE_SUPPRESS_EOI_BROADCAST | \ + KVM_X2APIC_DISABLE_SUPPRESS_EOI_BROADCAST) static void update_cr8_intercept(struct kvm_vcpu *vcpu); static void process_nmi(struct kvm_vcpu *vcpu); @@ -6778,11 +6780,20 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, if (cap->args[0] & ~KVM_X2APIC_API_VALID_FLAGS) break; + if ((cap->args[0] & KVM_X2APIC_ENABLE_SUPPRESS_EOI_BROADCAST) && + (cap->args[0] & KVM_X2APIC_DISABLE_SUPPRESS_EOI_BROADCAST)) + break; + if (cap->args[0] & KVM_X2APIC_API_USE_32BIT_IDS) kvm->arch.x2apic_format = true; if (cap->args[0] & KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK) kvm->arch.x2apic_broadcast_quirk_disabled = true; + if (cap->args[0] & KVM_X2APIC_ENABLE_SUPPRESS_EOI_BROADCAST) + kvm->arch.suppress_eoi_broadcast_mode = KVM_SUPPRESS_EOI_BROADCAST_ENABLED; + if (cap->args[0] & KVM_X2APIC_DISABLE_SUPPRESS_EOI_BROADCAST) + kvm->arch.suppress_eoi_broadcast_mode = KVM_SUPPRESS_EOI_BROADCAST_DISABLED; + r = 0; break; case KVM_CAP_X86_DISABLE_EXITS: -- 2.39.3