In some cases (i.e. under extreme memory pressure on the host), attempting to shadow memory will result in the same memory being unshadowed, causing a loop. Add a PGSTE bit to distinguish between shadowed memory and shadowed DAT tables, fix the unshadowing logic in _gmap_ptep_xchg() to prevent unnecessary unshadowing and perform better checks. Also fix the unshadowing logic in _gmap_crstep_xchg_atomic() which did not unshadow properly when the large page would become unprotected. Opportunistilcally add a check in gmap_protect_rmap() to make sure it won't be called with level == TABLE_TYPE_PAGE_TABLE. Signed-off-by: Claudio Imbrenda Fixes: a2c17f9270cc ("KVM: s390: New gmap code") --- arch/s390/kvm/dat.c | 1 + arch/s390/kvm/dat.h | 3 ++- arch/s390/kvm/gaccess.c | 1 + arch/s390/kvm/gmap.c | 3 ++- arch/s390/kvm/gmap.h | 22 +++++++++++++++++++--- 5 files changed, 25 insertions(+), 5 deletions(-) diff --git a/arch/s390/kvm/dat.c b/arch/s390/kvm/dat.c index 7b8d70fe406d..4a41c0247ffa 100644 --- a/arch/s390/kvm/dat.c +++ b/arch/s390/kvm/dat.c @@ -267,6 +267,7 @@ static int dat_split_ste(struct kvm_s390_mmu_cache *mc, union pmd *pmdp, gfn_t g /* No need to take locks as the page table is not installed yet. */ pgste_init.prefix_notif = old.s.fc1.prefix_notif; pgste_init.vsie_notif = old.s.fc1.vsie_notif; + pgste_init.vsie_gmem = old.s.fc1.vsie_notif; pgste_init.pcl = uses_skeys && init.h.i; dat_init_pgstes(pt, pgste_init.val); } else { diff --git a/arch/s390/kvm/dat.h b/arch/s390/kvm/dat.h index 8f8278c44879..873e13ac5a27 100644 --- a/arch/s390/kvm/dat.h +++ b/arch/s390/kvm/dat.h @@ -145,7 +145,8 @@ union pgste { unsigned long cmma_d : 1; /* Dirty flag for CMMA bits */ unsigned long prefix_notif : 1; /* Guest prefix invalidation notification */ unsigned long vsie_notif : 1; /* Referenced in a shadow table */ - unsigned long : 5; + unsigned long vsie_gmem : 1; /* Contains nested guest memory */ + unsigned long : 4; unsigned long : 8; }; struct { diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index b07accd19618..4f8d5592c9a9 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -1445,6 +1445,7 @@ static int _do_shadow_pte(struct gmap *sg, gpa_t raddr, union pte *ptep_h, union } else { pgste = _gmap_ptep_xchg(sg->parent, ptep_h, newpte, pgste, f->gfn, false); pgste.vsie_notif = 1; + pgste.vsie_gmem = 1; } pgste_set_unlock(ptep_h, pgste); if (rc) diff --git a/arch/s390/kvm/gmap.c b/arch/s390/kvm/gmap.c index 10c98c8cc1d8..8cff0cf5ce24 100644 --- a/arch/s390/kvm/gmap.c +++ b/arch/s390/kvm/gmap.c @@ -1031,7 +1031,8 @@ int gmap_protect_rmap(struct kvm_s390_mmu_cache *mc, struct gmap *sg, gfn_t p_gf union pte pte; int flags, rc; - KVM_BUG_ON(!is_shadow(sg), sg->kvm); + if (KVM_BUG_ON(!is_shadow(sg) || level <= TABLE_TYPE_PAGE_TABLE, sg->kvm)) + return -EINVAL; lockdep_assert_held(&sg->parent->children_lock); flags = DAT_WALK_SPLIT_ALLOC | (uses_skeys(sg->parent) ? DAT_WALK_USES_SKEYS : 0); diff --git a/arch/s390/kvm/gmap.h b/arch/s390/kvm/gmap.h index 96ee1395a592..e490f2995a26 100644 --- a/arch/s390/kvm/gmap.h +++ b/arch/s390/kvm/gmap.h @@ -167,6 +167,15 @@ static inline bool gmap_unmap_prefix(struct gmap *gmap, gfn_t gfn, gfn_t end) return _gmap_unmap_prefix(gmap, gfn, end, false); } +static inline bool pte_needs_unshadow(union pte oldpte, union pte newpte, union pgste pgste) +{ + if (!pgste.vsie_notif) + return false; + if (pgste.vsie_gmem) + return (oldpte.h.p != newpte.h.p) || newpte.h.i; + return !newpte.h.p || !newpte.s.pr; +} + static inline union pgste _gmap_ptep_xchg(struct gmap *gmap, union pte *ptep, union pte newpte, union pgste pgste, gfn_t gfn, bool needs_lock) { @@ -180,8 +189,9 @@ static inline union pgste _gmap_ptep_xchg(struct gmap *gmap, union pte *ptep, un pgste.prefix_notif = 0; gmap_unmap_prefix(gmap, gfn, gfn + 1); } - if (pgste.vsie_notif && (ptep->h.p != newpte.h.p || newpte.h.i)) { + if (pte_needs_unshadow(*ptep, newpte, pgste)) { pgste.vsie_notif = 0; + pgste.vsie_gmem = 0; if (needs_lock) gmap_handle_vsie_unshadow_event(gmap, gfn); else @@ -198,6 +208,13 @@ static inline union pgste gmap_ptep_xchg(struct gmap *gmap, union pte *ptep, uni return _gmap_ptep_xchg(gmap, ptep, newpte, pgste, gfn, true); } +static inline bool crste_needs_unshadow(union crste oldcrste, union crste newcrste) +{ + if (!oldcrste.s.fc1.vsie_notif) + return false; + return (newcrste.h.p != oldcrste.h.p) || newcrste.h.i || !newcrste.s.fc1.vsie_notif; +} + static inline bool __must_check _gmap_crstep_xchg_atomic(struct gmap *gmap, union crste *crstep, union crste oldcrste, union crste newcrste, gfn_t gfn, bool needs_lock) @@ -216,8 +233,7 @@ static inline bool __must_check _gmap_crstep_xchg_atomic(struct gmap *gmap, unio newcrste.s.fc1.prefix_notif = 0; gmap_unmap_prefix(gmap, gfn, gfn + align); } - if (crste_leaf(oldcrste) && oldcrste.s.fc1.vsie_notif && - (newcrste.h.p || newcrste.h.i || !newcrste.s.fc1.vsie_notif)) { + if (crste_leaf(oldcrste) && crste_needs_unshadow(oldcrste, newcrste)) { newcrste.s.fc1.vsie_notif = 0; if (needs_lock) gmap_handle_vsie_unshadow_event(gmap, gfn); -- 2.54.0