Intel VMX records deferred debug exception causes in the VMCS field GUEST_PENDING_DBG_EXCEPTIONS (B0-B3, enabled breakpoint, BS, RTM). This state is used when debug exceptions are suppressed (e.g. by MOV SS / STI interruptibility) and later become deliverable. See Intel SDM Vol. 3C, 27.3.1.5 Checks on Guest Non-Register State and Intel SDM Vol. 3C, 27.7.3 Delivery of Pending Debug Exceptions after VM Entry KVM may vector a #DB exception after a VM-exit and/or instruction emulation. In particular, after a MOV SS that encounters a data breakpoint (and thus suppresses delivery for one instruction), the following instruction may cause a VM-exit and be emulated (e.g. CPUID), or it may be intercepted directly (e.g. ICEBP/INT1). In these flows, VMX retains the deferred breakpoint cause in GUEST_PENDING_DBG_EXCEPTIONS while KVM generates a #DB for single-step (BS). Prior to this change, the resulting in guest DR6 missing B0-B3 even though bare metal reports the combined reasons (e.g. BS+B0). Fix this by merging pending debug causes from GUEST_PENDING_DBG_EXCEPTIONS into the #DB payload when vectoring the #DB exception so the guest always observes all accumulated reasons in DR6. The merging is done in the kvm_deliver_exception_payload() function to cover all injection paths where the payload may be consumed immediately by kvm_multiple_exception(). Reported-by: Nick Peterson Signed-off-by: Aidan Khoury --- arch/x86/include/asm/kvm-x86-ops.h | 1 + arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/vmx/main.c | 9 +++++++++ arch/x86/kvm/vmx/vmx.c | 16 +++++++++++----- arch/x86/kvm/vmx/x86_ops.h | 1 + arch/x86/kvm/x86.c | 12 ++++++++++++ 6 files changed, 35 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h index fdf178443f85..82fddf2fe61b 100644 --- a/arch/x86/include/asm/kvm-x86-ops.h +++ b/arch/x86/include/asm/kvm-x86-ops.h @@ -50,6 +50,7 @@ KVM_X86_OP(get_gdt) KVM_X86_OP(set_gdt) KVM_X86_OP(sync_dirty_debug_regs) KVM_X86_OP(set_dr7) +KVM_X86_OP_OPTIONAL_RET0(get_pending_dbg_exceptions) KVM_X86_OP(cache_reg) KVM_X86_OP(get_rflags) KVM_X86_OP(set_rflags) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index b74ae7183f3a..d4d0aa0a3a4a 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1768,6 +1768,7 @@ struct kvm_x86_ops { void (*set_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); void (*sync_dirty_debug_regs)(struct kvm_vcpu *vcpu); void (*set_dr7)(struct kvm_vcpu *vcpu, unsigned long value); + unsigned long (*get_pending_dbg_exceptions)(struct kvm_vcpu *vcpu); void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg); unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c index 0eb2773b2ae2..1cd30f8e3625 100644 --- a/arch/x86/kvm/vmx/main.c +++ b/arch/x86/kvm/vmx/main.c @@ -465,6 +465,14 @@ static void vt_set_dr7(struct kvm_vcpu *vcpu, unsigned long val) vmx_set_dr7(vcpu, val); } +static unsigned long vt_get_pending_dbg_exceptions(struct kvm_vcpu *vcpu) +{ + if (WARN_ON_ONCE(is_td_vcpu(vcpu))) + return 0; + + return vmx_get_pending_dbg_exceptions(vcpu); +} + static void vt_sync_dirty_debug_regs(struct kvm_vcpu *vcpu) { /* @@ -907,6 +915,7 @@ struct kvm_x86_ops vt_x86_ops __initdata = { .get_gdt = vt_op(get_gdt), .set_gdt = vt_op(set_gdt), .set_dr7 = vt_op(set_dr7), + .get_pending_dbg_exceptions = vt_op(get_pending_dbg_exceptions), .sync_dirty_debug_regs = vt_op(sync_dirty_debug_regs), .cache_reg = vt_op(cache_reg), .get_rflags = vt_op(get_rflags), diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 91b6f2f3edc2..1b2e274fe317 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -5300,13 +5300,13 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu) * have already expired. Note, the CPU sets/clears BS * as appropriate for all other VM-Exits types. */ + if ((vmx_get_rflags(vcpu) & X86_EFLAGS_TF) && + (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & + (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS))) + vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, + vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS) | DR6_BS); if (is_icebp(intr_info)) WARN_ON(!skip_emulated_instruction(vcpu)); - else if ((vmx_get_rflags(vcpu) & X86_EFLAGS_TF) && - (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & - (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS))) - vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, - vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS) | DR6_BS); kvm_queue_exception_p(vcpu, DB_VECTOR, dr6); return 1; @@ -5613,6 +5613,12 @@ void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val) vmcs_writel(GUEST_DR7, val); } +unsigned long vmx_get_pending_dbg_exceptions(struct kvm_vcpu *vcpu) +{ + return vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS) & + (DR6_RTM | DR6_BS | BIT(12) /*Enabled breakpoint*/ | DR_TRAP_BITS); +} + static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu) { kvm_apic_update_ppr(vcpu); diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h index 9697368d65b3..365682799d05 100644 --- a/arch/x86/kvm/vmx/x86_ops.h +++ b/arch/x86/kvm/vmx/x86_ops.h @@ -75,6 +75,7 @@ void vmx_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt); void vmx_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt); void vmx_set_dr6(struct kvm_vcpu *vcpu, unsigned long val); void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val); +unsigned long vmx_get_pending_dbg_exceptions(struct kvm_vcpu *vcpu); void vmx_sync_dirty_debug_regs(struct kvm_vcpu *vcpu); void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg); unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 19d2d6d9e64a..c889dffe4e59 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -765,11 +765,23 @@ static int exception_type(int vector) void kvm_deliver_exception_payload(struct kvm_vcpu *vcpu, struct kvm_queued_exception *ex) { + unsigned long pending_dbg; + if (!ex->has_payload) return; switch (ex->vector) { case DB_VECTOR: + /* + * VMX records deferred debug causes (B0-B3, enabled breakpoint, + * BS, RTM) in the vmcs.PENDING_DBG_EXCEPTIONS field. Merge any + * pending causes into the exception payload so the guest may + * see all accumulated reasons in DR6 when the #DB is vectored. + */ + pending_dbg = kvm_x86_call(get_pending_dbg_exceptions)(vcpu); + if (pending_dbg) + ex->payload |= pending_dbg; + /* * "Certain debug exceptions may clear bit 0-3. The * remaining contents of the DR6 register are never -- 2.43.0