On PREEMPT_RT, kvm_xen_set_evtchn_fast() acquires a sleeping lock
(gpc->lock) from hard IRQ context (xen_timer_callback), triggering:

  BUG: sleeping function called from invalid context at kernel/locking/spinlock_rt.c:48
  in_atomic(): 0, irqs_disabled(): 0, non_block: 0, pid: 0, name: swapper/5
  preempt_count: 10100, expected: 0
  RCU nest depth: 0, expected: 0
  4 locks held by swapper/5/0:
  INFO: lockdep is turned off.
  irq event stamp: 1766
  hardirqs last  enabled at (1765): [<ffffffff81678fd4>] tick_nohz_idle_got_tick+0x84/0x90
  hardirqs last disabled at (1766): [<ffffffff8b665051>] sysvec_apic_timer_interrupt+0x11/0xd0
  softirqs last  enabled at (0): [<ffffffff81289e76>] copy_process+0x1586/0x58b0
  softirqs last disabled at (0): [<0000000000000000>] 0x0
  Preempt disabled at:
  [<ffffffff8b6650bc>] sysvec_apic_timer_interrupt+0x7c/0xd0
  CPU: 5 UID: 0 PID: 0 Comm: swapper/5 Not tainted 6.13.0-rc1-syzkaller-00026-g2d5404caa8c7 #0
  Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/13/2024
  Call Trace:
   <IRQ>
   __dump_stack lib/dump_stack.c:94 [inline]
   dump_stack_lvl+0x116/0x1f0 lib/dump_stack.c:120
   __might_resched+0x30d/0x8f0 kernel/sched/core.c:10318
   rt_spin_lock+0x70/0x130 kernel/locking/spinlock_rt.c:48
   kvm_xen_set_evtchn_fast+0x20b/0xa40 arch/x86/kvm/xen.c:1820
   xen_timer_callback+0x91/0x1a0 arch/x86/kvm/xen.c:142
   __run_hrtimer kernel/time/hrtimer.c:1739 [inline]
   __hrtimer_run_queues+0x20b/0xa00 kernel/time/hrtimer.c:1803

The Xen timer uses HRTIMER_MODE_ABS_HARD for latency-sensitive event
delivery (see commit 77c9b9dea4fb ("KVM: x86/xen: Use fast path for Xen
timer delivery")). On PREEMPT_RT, hard IRQ hrtimers execute in hard IRQ
context where sleeping locks cannot be acquired.

Use irq_work to defer event injection to a context where sleeping locks
are permitted on PREEMPT_RT. This preserves the hard IRQ timer precision
on non-RT kernels while avoiding the lock context violation on RT.

The approach follows the existing pvclock_irq_work pattern in
arch/x86/kvm/x86.c.

Tested on PREEMPT_RT kernel (CONFIG_PREEMPT_RT=y) with the syzbot C
reproducer - no crash observed after 30+ minutes of continuous execution.
Also tested on non-RT kernel (CONFIG_PREEMPT_RT=n) to verify no
regression in the fast path.

Reported-by: syzbot+919877893c9d28162dc2@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=919877893c9d28162dc2
Fixes: 77c9b9dea4fb ("KVM: x86/xen: Use fast path for Xen timer delivery")

Signed-off-by: shaikh.kamal <shaikhkamal2012@gmail.com>
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/xen.c              | 31 +++++++++++++++++++++++++++++++
 2 files changed, 32 insertions(+)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 5a3bfa293e8b..533b45289d53 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -746,6 +746,7 @@ struct kvm_vcpu_xen {
 	u64 timer_expires; /* In guest epoch */
 	atomic_t timer_pending;
 	struct hrtimer timer;
+	struct irq_work timer_inject_irqwork;
 	int poll_evtchn;
 	struct timer_list poll_timer;
 	struct kvm_hypervisor_cpuid cpuid;
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index d6b2a665b499..01fa7b165355 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -122,6 +122,24 @@ void kvm_xen_inject_timer_irqs(struct kvm_vcpu *vcpu)
 	}
 }
 
+static void xen_timer_inject_irqwork(struct irq_work *work)
+{
+	struct kvm_vcpu_xen *xen = container_of(work, struct kvm_vcpu_xen,
+						timer_inject_irqwork);
+	struct kvm_vcpu *vcpu = container_of(xen, struct kvm_vcpu, arch.xen);
+	struct kvm_xen_evtchn e;
+	int rc;
+
+	e.vcpu_id = vcpu->vcpu_id;
+	e.vcpu_idx = vcpu->vcpu_idx;
+	e.port = vcpu->arch.xen.timer_virq;
+	e.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
+
+	rc = kvm_xen_set_evtchn_fast(&e, vcpu->kvm);
+	if (rc != -EWOULDBLOCK)
+		vcpu->arch.xen.timer_expires = 0;
+}
+
 static enum hrtimer_restart xen_timer_callback(struct hrtimer *timer)
 {
 	struct kvm_vcpu *vcpu = container_of(timer, struct kvm_vcpu,
@@ -132,6 +150,17 @@ static enum hrtimer_restart xen_timer_callback(struct hrtimer *timer)
 	if (atomic_read(&vcpu->arch.xen.timer_pending))
 		return HRTIMER_NORESTART;
 
+	/*
+	 * On PREEMPT_RT, this callback runs in hard IRQ context where
+	 * kvm_xen_set_evtchn_fast() cannot acquire sleeping locks
+	 * (specifically gpc->lock). Defer to irq_work which runs in
+	 * thread context on RT.
+	 */
+	if (in_hardirq()) {
+		irq_work_queue(&vcpu->arch.xen.timer_inject_irqwork);
+		return HRTIMER_NORESTART;
+	}
+
 	e.vcpu_id = vcpu->vcpu_id;
 	e.vcpu_idx = vcpu->vcpu_idx;
 	e.port = vcpu->arch.xen.timer_virq;
@@ -2303,6 +2332,8 @@ void kvm_xen_init_vcpu(struct kvm_vcpu *vcpu)
 	timer_setup(&vcpu->arch.xen.poll_timer, cancel_evtchn_poll, 0);
 	hrtimer_setup(&vcpu->arch.xen.timer, xen_timer_callback, CLOCK_MONOTONIC,
 		      HRTIMER_MODE_ABS_HARD);
+	init_irq_work(&vcpu->arch.xen.timer_inject_irqwork,
+		      xen_timer_inject_irqwork);
 
 	kvm_gpc_init(&vcpu->arch.xen.runstate_cache, vcpu->kvm);
 	kvm_gpc_init(&vcpu->arch.xen.runstate2_cache, vcpu->kvm);
-- 
2.43.0