From: Wanpeng Li Add foundational infrastructure for tracking IPI sender/receiver relationships to improve directed yield candidate selection. Introduce per-vCPU ipi_context structure containing: - last_ipi_receiver: vCPU index that received the last IPI from this vCPU - last_ipi_time_ns: timestamp of the last IPI send - ipi_pending: flag indicating an unacknowledged IPI - last_ipi_sender: vCPU index that sent an IPI to this vCPU - last_ipi_recv_time_ns: timestamp when IPI was received Add module parameters for runtime control: - ipi_tracking_enabled (default: true): master switch for IPI tracking - ipi_window_ns (default: 50ms): recency window for IPI validity Implement helper functions: - kvm_ipi_tracking_enabled(): check if tracking is active - kvm_vcpu_is_ipi_receiver(): determine if a vCPU is a recent IPI target The infrastructure is inert until integrated with interrupt delivery in subsequent patches. v1 -> v2: - Improve documentation for module parameters explaining the 50ms window rationale - Add kvm_vcpu_is_ipi_receiver() declaration to x86.h header - Add weak function annotation comment in kvm_host.h Signed-off-by: Wanpeng Li --- arch/x86/include/asm/kvm_host.h | 12 ++++++ arch/x86/kvm/lapic.c | 76 +++++++++++++++++++++++++++++++++ arch/x86/kvm/x86.c | 3 ++ arch/x86/kvm/x86.h | 8 ++++ include/linux/kvm_host.h | 3 ++ virt/kvm/kvm_main.c | 6 +++ 6 files changed, 108 insertions(+) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 5a3bfa293e8b..2464c310f0a2 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1052,6 +1052,18 @@ struct kvm_vcpu_arch { int pending_external_vector; int highest_stale_pending_ioapic_eoi; + /* + * IPI tracking for directed yield optimization. + * Records sender/receiver relationships when IPIs are delivered + * to enable IPI-aware vCPU scheduling decisions. + */ + struct { + int last_ipi_sender; /* vCPU index of last IPI sender */ + int last_ipi_receiver; /* vCPU index of last IPI receiver */ + bool pending_ipi; /* Awaiting IPI response */ + u64 ipi_time_ns; /* Timestamp when IPI was sent */ + } ipi_context; + /* be preempted when it's in kernel-mode(cpl=0) */ bool preempted_in_kernel; diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 1597dd0b0cc6..23f247a3b127 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -75,6 +75,19 @@ module_param(lapic_timer_advance, bool, 0444); /* step-by-step approximation to mitigate fluctuation */ #define LAPIC_TIMER_ADVANCE_ADJUST_STEP 8 +/* + * IPI tracking for directed yield optimization. + * - ipi_tracking_enabled: global toggle (default on) + * - ipi_window_ns: recency window for IPI validity (default 50ms) + * The 50ms window is chosen to be long enough to capture IPI response + * patterns while short enough to avoid stale information affecting + * scheduling decisions in throughput-sensitive workloads. + */ +static bool ipi_tracking_enabled = true; +static unsigned long ipi_window_ns = 50 * NSEC_PER_MSEC; +module_param(ipi_tracking_enabled, bool, 0644); +module_param(ipi_window_ns, ulong, 0644); + static bool __read_mostly vector_hashing_enabled = true; module_param_named(vector_hashing, vector_hashing_enabled, bool, 0444); @@ -1113,6 +1126,69 @@ static int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2) return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio; } +/* + * Track IPI communication for directed yield optimization. + * Records sender/receiver relationship when a unicast IPI is delivered. + * Only tracks when a unique receiver exists; ignores self-IPI. + */ +void kvm_track_ipi_communication(struct kvm_vcpu *sender, struct kvm_vcpu *receiver) +{ + if (!sender || !receiver || sender == receiver) + return; + if (unlikely(!READ_ONCE(ipi_tracking_enabled))) + return; + + WRITE_ONCE(sender->arch.ipi_context.last_ipi_receiver, receiver->vcpu_idx); + WRITE_ONCE(sender->arch.ipi_context.pending_ipi, true); + WRITE_ONCE(sender->arch.ipi_context.ipi_time_ns, ktime_get_mono_fast_ns()); + + WRITE_ONCE(receiver->arch.ipi_context.last_ipi_sender, sender->vcpu_idx); +} + +/* + * Check if 'receiver' is the recent IPI target of 'sender'. + * + * Rationale: + * - Use a short window to avoid stale IPI inflating boost priority + * on throughput-sensitive workloads. + */ +bool kvm_vcpu_is_ipi_receiver(struct kvm_vcpu *sender, struct kvm_vcpu *receiver) +{ + u64 then, now; + + if (unlikely(!READ_ONCE(ipi_tracking_enabled))) + return false; + + then = READ_ONCE(sender->arch.ipi_context.ipi_time_ns); + now = ktime_get_mono_fast_ns(); + if (READ_ONCE(sender->arch.ipi_context.pending_ipi) && + READ_ONCE(sender->arch.ipi_context.last_ipi_receiver) == + receiver->vcpu_idx && + now - then <= ipi_window_ns) + return true; + + return false; +} + +/* + * Clear IPI context for a vCPU (e.g., on EOI or reset). + */ +void kvm_vcpu_clear_ipi_context(struct kvm_vcpu *vcpu) +{ + WRITE_ONCE(vcpu->arch.ipi_context.pending_ipi, false); + WRITE_ONCE(vcpu->arch.ipi_context.last_ipi_sender, -1); + WRITE_ONCE(vcpu->arch.ipi_context.last_ipi_receiver, -1); +} + +/* + * Reset IPI context completely (e.g., on vCPU creation/destruction). + */ +void kvm_vcpu_reset_ipi_context(struct kvm_vcpu *vcpu) +{ + kvm_vcpu_clear_ipi_context(vcpu); + WRITE_ONCE(vcpu->arch.ipi_context.ipi_time_ns, 0); +} + /* Return true if the interrupt can be handled by using *bitmap as index mask * for valid destinations in *dst array. * Return false if kvm_apic_map_get_dest_lapic did nothing useful. diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0c6d899d53dd..d4c401ef04ca 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -12728,6 +12728,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) goto free_guest_fpu; kvm_xen_init_vcpu(vcpu); + kvm_vcpu_reset_ipi_context(vcpu); vcpu_load(vcpu); kvm_vcpu_after_set_cpuid(vcpu); kvm_set_tsc_khz(vcpu, vcpu->kvm->arch.default_tsc_khz); @@ -12795,6 +12796,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) kvm_mmu_destroy(vcpu); srcu_read_unlock(&vcpu->kvm->srcu, idx); free_page((unsigned long)vcpu->arch.pio_data); + kvm_vcpu_reset_ipi_context(vcpu); kvfree(vcpu->arch.cpuid_entries); } @@ -12871,6 +12873,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) kvm_leave_nested(vcpu); kvm_lapic_reset(vcpu, init_event); + kvm_vcpu_clear_ipi_context(vcpu); WARN_ON_ONCE(is_guest_mode(vcpu) || is_smm(vcpu)); vcpu->arch.hflags = 0; diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index fdab0ad49098..cfc24fb207e0 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -466,6 +466,14 @@ fastpath_t handle_fastpath_wrmsr_imm(struct kvm_vcpu *vcpu, u32 msr, int reg); fastpath_t handle_fastpath_hlt(struct kvm_vcpu *vcpu); fastpath_t handle_fastpath_invd(struct kvm_vcpu *vcpu); +/* IPI tracking helpers for directed yield */ +void kvm_track_ipi_communication(struct kvm_vcpu *sender, + struct kvm_vcpu *receiver); +bool kvm_vcpu_is_ipi_receiver(struct kvm_vcpu *sender, + struct kvm_vcpu *receiver); +void kvm_vcpu_clear_ipi_context(struct kvm_vcpu *vcpu); +void kvm_vcpu_reset_ipi_context(struct kvm_vcpu *vcpu); + extern struct kvm_caps kvm_caps; extern struct kvm_host_values kvm_host; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index d93f75b05ae2..f42315d341b3 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1535,6 +1535,9 @@ static inline void kvm_vcpu_kick(struct kvm_vcpu *vcpu) int kvm_vcpu_yield_to(struct kvm_vcpu *target); void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu, bool yield_to_kernel_mode); +/* Weak function, overridden by arch/x86/kvm for IPI-aware directed yield */ +bool kvm_vcpu_is_ipi_receiver(struct kvm_vcpu *sender, struct kvm_vcpu *receiver); + void kvm_flush_remote_tlbs(struct kvm *kvm); void kvm_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, u64 nr_pages); void kvm_flush_remote_tlbs_memslot(struct kvm *kvm, diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 5fcd401a5897..ff771a872c6d 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -3964,6 +3964,12 @@ bool __weak kvm_arch_dy_has_pending_interrupt(struct kvm_vcpu *vcpu) return false; } +bool __weak kvm_vcpu_is_ipi_receiver(struct kvm_vcpu *sender, + struct kvm_vcpu *receiver) +{ + return false; +} + void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode) { int nr_vcpus, start, i, idx, yielded; -- 2.43.0