From: Wanpeng Li From: Wanpeng Li Introduce IPI tracking infrastructure for directed yield optimization. Add per-vCPU IPI tracking context in kvm_vcpu_arch with last_ipi_sender/receiver to track IPI communication pairs, pending_ipi flag to indicate awaiting IPI response, and ipi_time_ns monotonic timestamp for recency validation. Add module parameters ipi_tracking_enabled (global toggle, default true) and ipi_window_ns (recency window, default 50ms). Add core helper functions: kvm_track_ipi_communication() to record sender/receiver pairs, kvm_vcpu_is_ipi_receiver() to validate recent IPI relationship, and kvm_vcpu_clear/reset_ipi_context() for lifecycle management. Use lockless READ_ONCE/WRITE_ONCE for minimal overhead. The short time window prevents stale IPI information from affecting throughput workloads. The infrastructure is inert until integrated with interrupt delivery in subsequent patches. Signed-off-by: Wanpeng Li --- arch/x86/include/asm/kvm_host.h | 8 ++++ arch/x86/kvm/lapic.c | 65 +++++++++++++++++++++++++++++++++ arch/x86/kvm/x86.c | 6 +++ arch/x86/kvm/x86.h | 4 ++ include/linux/kvm_host.h | 1 + virt/kvm/kvm_main.c | 5 +++ 6 files changed, 89 insertions(+) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 48598d017d6f..b5bdc115ff45 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1052,6 +1052,14 @@ struct kvm_vcpu_arch { int pending_external_vector; int highest_stale_pending_ioapic_eoi; + /* IPI tracking for directed yield (x86 only) */ + struct { + int last_ipi_sender; /* vCPU ID of last IPI sender */ + int last_ipi_receiver; /* vCPU ID of last IPI receiver */ + bool pending_ipi; /* Pending IPI response */ + u64 ipi_time_ns; /* Monotonic ns when IPI was sent */ + } ipi_context; + /* be preempted when it's in kernel-mode(cpl=0) */ bool preempted_in_kernel; diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 0ae7f913d782..98ec2b18b02c 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -75,6 +75,12 @@ module_param(lapic_timer_advance, bool, 0444); /* step-by-step approximation to mitigate fluctuation */ #define LAPIC_TIMER_ADVANCE_ADJUST_STEP 8 +/* IPI tracking window and runtime toggle (runtime-adjustable) */ +static bool ipi_tracking_enabled = true; +static unsigned long ipi_window_ns = 50 * NSEC_PER_MSEC; +module_param(ipi_tracking_enabled, bool, 0644); +module_param(ipi_window_ns, ulong, 0644); + static bool __read_mostly vector_hashing_enabled = true; module_param_named(vector_hashing, vector_hashing_enabled, bool, 0444); @@ -1113,6 +1119,65 @@ static int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2) return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio; } +/* + * Track IPI communication for directed yield when a unique receiver exists. + * This only writes sender/receiver context and timestamp; ignores self-IPI. + */ +void kvm_track_ipi_communication(struct kvm_vcpu *sender, struct kvm_vcpu *receiver) +{ + if (!sender || !receiver || sender == receiver) + return; + if (unlikely(!READ_ONCE(ipi_tracking_enabled))) + return; + + WRITE_ONCE(sender->arch.ipi_context.last_ipi_receiver, receiver->vcpu_idx); + WRITE_ONCE(sender->arch.ipi_context.pending_ipi, true); + WRITE_ONCE(sender->arch.ipi_context.ipi_time_ns, ktime_get_mono_fast_ns()); + + WRITE_ONCE(receiver->arch.ipi_context.last_ipi_sender, sender->vcpu_idx); +} + +/* + * Check if 'receiver' is the recent IPI target of 'sender'. + * + * Rationale: + * - Use a short window to avoid stale IPI inflating boost priority + * on throughput-sensitive workloads. + */ +bool kvm_vcpu_is_ipi_receiver(struct kvm_vcpu *sender, struct kvm_vcpu *receiver) +{ + u64 then, now; + + if (unlikely(!READ_ONCE(ipi_tracking_enabled))) + return false; + + then = READ_ONCE(sender->arch.ipi_context.ipi_time_ns); + now = ktime_get_mono_fast_ns(); + if (READ_ONCE(sender->arch.ipi_context.pending_ipi) && + READ_ONCE(sender->arch.ipi_context.last_ipi_receiver) == + receiver->vcpu_idx && + now - then <= ipi_window_ns) + return true; + + return false; +} + +void kvm_vcpu_clear_ipi_context(struct kvm_vcpu *vcpu) +{ + WRITE_ONCE(vcpu->arch.ipi_context.pending_ipi, false); + WRITE_ONCE(vcpu->arch.ipi_context.last_ipi_sender, -1); + WRITE_ONCE(vcpu->arch.ipi_context.last_ipi_receiver, -1); +} + +/* + * Reset helper: clear ipi_context and zero ipi_time for hard reset paths. + */ +void kvm_vcpu_reset_ipi_context(struct kvm_vcpu *vcpu) +{ + kvm_vcpu_clear_ipi_context(vcpu); + WRITE_ONCE(vcpu->arch.ipi_context.ipi_time_ns, 0); +} + /* Return true if the interrupt can be handled by using *bitmap as index mask * for valid destinations in *dst array. * Return false if kvm_apic_map_get_dest_lapic did nothing useful. diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index b4b5d2d09634..649e016c131f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -12708,6 +12708,8 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) goto free_guest_fpu; kvm_xen_init_vcpu(vcpu); + /* Initialize IPI tracking */ + kvm_vcpu_reset_ipi_context(vcpu); vcpu_load(vcpu); kvm_vcpu_after_set_cpuid(vcpu); kvm_set_tsc_khz(vcpu, vcpu->kvm->arch.default_tsc_khz); @@ -12781,6 +12783,8 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) kvm_mmu_destroy(vcpu); srcu_read_unlock(&vcpu->kvm->srcu, idx); free_page((unsigned long)vcpu->arch.pio_data); + /* Clear IPI tracking context */ + kvm_vcpu_reset_ipi_context(vcpu); kvfree(vcpu->arch.cpuid_entries); } @@ -12846,6 +12850,8 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) kvm_leave_nested(vcpu); kvm_lapic_reset(vcpu, init_event); + /* Clear IPI tracking context on reset */ + kvm_vcpu_clear_ipi_context(vcpu); WARN_ON_ONCE(is_guest_mode(vcpu) || is_smm(vcpu)); vcpu->arch.hflags = 0; diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index f3dc77f006f9..86a10c653eac 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -451,6 +451,10 @@ fastpath_t handle_fastpath_wrmsr(struct kvm_vcpu *vcpu); fastpath_t handle_fastpath_wrmsr_imm(struct kvm_vcpu *vcpu, u32 msr, int reg); fastpath_t handle_fastpath_hlt(struct kvm_vcpu *vcpu); fastpath_t handle_fastpath_invd(struct kvm_vcpu *vcpu); +void kvm_track_ipi_communication(struct kvm_vcpu *sender, + struct kvm_vcpu *receiver); +void kvm_vcpu_clear_ipi_context(struct kvm_vcpu *vcpu); +void kvm_vcpu_reset_ipi_context(struct kvm_vcpu *vcpu); extern struct kvm_caps kvm_caps; extern struct kvm_host_values kvm_host; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 5bd76cf394fa..5ae8327fdf21 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1532,6 +1532,7 @@ static inline void kvm_vcpu_kick(struct kvm_vcpu *vcpu) } #endif +bool kvm_vcpu_is_ipi_receiver(struct kvm_vcpu *sender, struct kvm_vcpu *receiver); int kvm_vcpu_yield_to(struct kvm_vcpu *target); void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu, bool yield_to_kernel_mode); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index cde1eddbaa91..495e769c7ddf 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -3963,6 +3963,11 @@ bool __weak kvm_arch_dy_has_pending_interrupt(struct kvm_vcpu *vcpu) return false; } +bool __weak kvm_vcpu_is_ipi_receiver(struct kvm_vcpu *sender, struct kvm_vcpu *receiver) +{ + return false; +} + void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode) { int nr_vcpus, start, i, idx, yielded; -- 2.43.0