HVA-based gfn_to_pfn caches are not necessarily backed by a KVM memslot. When an MMU notifier invalidation targets such an HVA, KVM's global mmu_invalidate_seq is not guaranteed to change because that sequence is advanced through the memslot-based invalidation path. This matters during hva_to_pfn_retry(). The refresh path temporarily marks the cache invalid and drops gpc->lock while resolving the HVA and creating a kernel mapping. If an overlapping HVA invalidation completes in that window, the notifier may observe gpc->valid == false and therefore leave no state behind for the in-progress refresh. For an HVA outside all memslots, the refresh cannot rely on mmu_invalidate_seq to detect the event either. To prevent this, we must add a per-cache HVA invalidation sequence. Bump the sequence whenever the cached HVA overlaps an MMU notifier range, regardless of the current valid state. Snapshot the sequence before dropping gpc->lock in hva_to_pfn_retry(), and retry the refresh if it changes before the new mapping is published. Reported-by: syzbot+0948c82180d475ad24e2@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/6a0c5f2c.a00a0220.2c7954.0000.GAE@google.com/ Fixes: b9220d32799a ("KVM: x86/xen: allow shared_info to be mapped by fixed HVA") Signed-off-by: Jeongjun Park --- include/linux/kvm_types.h | 1 + virt/kvm/pfncache.c | 42 ++++++++++++++++++++++++++++++++------- 2 files changed, 36 insertions(+), 7 deletions(-) diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h index a568d8e6f4e8..ff3b8aa73561 100644 --- a/include/linux/kvm_types.h +++ b/include/linux/kvm_types.h @@ -85,6 +85,7 @@ struct gfn_to_pfn_cache { u64 generation; gpa_t gpa; unsigned long uhva; + unsigned long hva_invalidate_seq; struct kvm_memory_slot *memslot; struct kvm *kvm; struct list_head list; diff --git a/virt/kvm/pfncache.c b/virt/kvm/pfncache.c index 728d2c1b488a..296b06482ebc 100644 --- a/virt/kvm/pfncache.c +++ b/virt/kvm/pfncache.c @@ -19,6 +19,24 @@ #include "kvm_mm.h" +static inline bool gpc_uhva_in_range(struct gfn_to_pfn_cache *gpc, + unsigned long start, unsigned long end) +{ + return gpc->uhva >= start && gpc->uhva < end; +} + +static inline bool gpc_should_invalidate(struct gfn_to_pfn_cache *gpc, + unsigned long start, unsigned long end) +{ + if (!gpc_uhva_in_range(gpc, start, end)) + return false; + + if (kvm_gpc_is_hva_active(gpc)) + return true; + + return gpc->valid && !is_error_noslot_pfn(gpc->pfn); +} + /* * MMU notifier 'invalidate_range_start' hook. */ @@ -32,8 +50,7 @@ void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm, unsigned long start, read_lock_irq(&gpc->lock); /* Only a single page so no need to care about length */ - if (gpc->valid && !is_error_noslot_pfn(gpc->pfn) && - gpc->uhva >= start && gpc->uhva < end) { + if (gpc_should_invalidate(gpc, start, end)) { read_unlock_irq(&gpc->lock); /* @@ -45,9 +62,11 @@ void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm, unsigned long start, */ write_lock_irq(&gpc->lock); - if (gpc->valid && !is_error_noslot_pfn(gpc->pfn) && - gpc->uhva >= start && gpc->uhva < end) + if (gpc_should_invalidate(gpc, start, end)) { + if (kvm_gpc_is_hva_active(gpc)) + gpc->hva_invalidate_seq++; gpc->valid = false; + } write_unlock_irq(&gpc->lock); continue; } @@ -124,8 +143,11 @@ static void gpc_unmap(kvm_pfn_t pfn, void *khva) #endif } -static inline bool mmu_notifier_retry_cache(struct kvm *kvm, unsigned long mmu_seq) +static inline bool mmu_notifier_retry_cache(struct gfn_to_pfn_cache *gpc, + unsigned long mmu_seq, unsigned long hva_seq) { + struct kvm *kvm = gpc->kvm; + /* * mn_active_invalidate_count acts for all intents and purposes * like mmu_invalidate_in_progress here; but the latter cannot @@ -149,7 +171,10 @@ static inline bool mmu_notifier_retry_cache(struct kvm *kvm, unsigned long mmu_s * new (incremented) value of mmu_invalidate_seq is observed. */ smp_rmb(); - return kvm->mmu_invalidate_seq != mmu_seq; + if (kvm->mmu_invalidate_seq != mmu_seq) + return true; + + return gpc->hva_invalidate_seq != hva_seq; } static kvm_pfn_t hva_to_pfn_retry(struct gfn_to_pfn_cache *gpc) @@ -159,6 +184,7 @@ static kvm_pfn_t hva_to_pfn_retry(struct gfn_to_pfn_cache *gpc) kvm_pfn_t new_pfn = KVM_PFN_ERR_FAULT; void *new_khva = NULL; unsigned long mmu_seq; + unsigned long hva_seq; struct page *page; struct kvm_follow_pfn kfp = { @@ -182,6 +208,7 @@ static kvm_pfn_t hva_to_pfn_retry(struct gfn_to_pfn_cache *gpc) do { mmu_seq = gpc->kvm->mmu_invalidate_seq; + hva_seq = gpc->hva_invalidate_seq; smp_rmb(); write_unlock_irq(&gpc->lock); @@ -232,7 +259,7 @@ static kvm_pfn_t hva_to_pfn_retry(struct gfn_to_pfn_cache *gpc) * attempting to refresh. */ WARN_ON_ONCE(gpc->valid); - } while (mmu_notifier_retry_cache(gpc->kvm, mmu_seq)); + } while (mmu_notifier_retry_cache(gpc, mmu_seq, hva_seq)); gpc->valid = true; gpc->pfn = new_pfn; @@ -391,6 +418,7 @@ void kvm_gpc_init(struct gfn_to_pfn_cache *gpc, struct kvm *kvm) gpc->pfn = KVM_PFN_ERR_FAULT; gpc->gpa = INVALID_GPA; gpc->uhva = KVM_HVA_ERR_BAD; + gpc->hva_invalidate_seq = 0; gpc->active = gpc->valid = false; } --