After the commit aaf12a7b4323 ("KVM: Rename and move CPUHP_AP_KVM_STARTING to ONLINE section"), KVM's hotplug callbacks have been moved into the ONLINE section, where IRQs and preemption are enabled according to the documentation. However, if IRQs are not guaranteed to be disabled, it could theoretically be a bug, because virtualization_enabled may be stale (with respect to the actual state of the hardware) when read from IRQ context, making the callback potentially reentrant. Therefore, disable IRQs in kvm_online_cpu() and kvm_offline_cpu() to ensure that all paths for kvm_enable_virtualization_cpu() and kvm_disable_virtualization_cpu() are in an IRQ-disabled state. Suggested-by: Sean Christopherson Signed-off-by: Hou Wenlong --- virt/kvm/kvm_main.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 18f29ef93543..cf8dddeed37e 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -5580,6 +5580,8 @@ __weak void kvm_arch_disable_virtualization(void) static int kvm_enable_virtualization_cpu(void) { + lockdep_assert_irqs_disabled(); + if (__this_cpu_read(virtualization_enabled)) return 0; @@ -5595,6 +5597,8 @@ static int kvm_enable_virtualization_cpu(void) static int kvm_online_cpu(unsigned int cpu) { + guard(irqsave)(); + /* * Abort the CPU online process if hardware virtualization cannot * be enabled. Otherwise running VMs would encounter unrecoverable @@ -5605,6 +5609,8 @@ static int kvm_online_cpu(unsigned int cpu) static void kvm_disable_virtualization_cpu(void *ign) { + lockdep_assert_irqs_disabled(); + if (!__this_cpu_read(virtualization_enabled)) return; @@ -5615,6 +5621,8 @@ static void kvm_disable_virtualization_cpu(void *ign) static int kvm_offline_cpu(unsigned int cpu) { + guard(irqsave)(); + kvm_disable_virtualization_cpu(NULL); return 0; } @@ -5648,7 +5656,6 @@ static int kvm_suspend(void) * dropped all locks (userspace tasks are frozen via a fake signal). */ lockdep_assert_not_held(&kvm_usage_lock); - lockdep_assert_irqs_disabled(); kvm_disable_virtualization_cpu(NULL); return 0; @@ -5657,7 +5664,6 @@ static int kvm_suspend(void) static void kvm_resume(void) { lockdep_assert_not_held(&kvm_usage_lock); - lockdep_assert_irqs_disabled(); WARN_ON_ONCE(kvm_enable_virtualization_cpu()); } base-commit: a6ad54137af92535cfe32e19e5f3bc1bb7dbd383 -- 2.31.1 The commit a377ac1cd9d7b ("x86/entry: Move user return notifier out of loop") moved fire_user_return_notifiers() into the section with IRQs disabled, and it somewhat inadvertantly fixed the underlying issue that was papered over by commit 1650b4ebc99d ("KVM: Disable irq while unregistering user notifier"). Therefore, the comments and code are outdated. Aslo assert that IRQs are disabled in kvm_on_user_return(), as both fire_user_return_notifiers() and kvm_arch_disable_virtualization_cpu() are now in IRQs disabled state. Signed-off-by: Hou Wenlong --- arch/x86/kvm/x86.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 33fba801b205..84fc30a99be1 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -568,18 +568,18 @@ static void kvm_on_user_return(struct user_return_notifier *urn) struct kvm_user_return_msrs *msrs = container_of(urn, struct kvm_user_return_msrs, urn); struct kvm_user_return_msr_values *values; - unsigned long flags; /* - * Disabling irqs at this point since the following code could be - * interrupted and executed through kvm_arch_disable_virtualization_cpu() + * Assert that IRQs are disabled. KVM disables virtualization via IPI + * callback on reboot, and this code isn't safe for re-entrancy, e.g. + * receiving the IRQ after checking "registered" would lead to double + * deletion of KVM's notifier. */ - local_irq_save(flags); - if (msrs->registered) { - msrs->registered = false; - user_return_notifier_unregister(urn); - } - local_irq_restore(flags); + lockdep_assert_irqs_disabled(); + + msrs->registered = false; + user_return_notifier_unregister(urn); + for (slot = 0; slot < kvm_nr_uret_msrs; ++slot) { values = &msrs->values[slot]; if (values->host != values->curr) { -- 2.31.1