Add page table management functions to be used for KVM guest (gmap) page tables. This patch adds functions to handle memslot creation and destruction, additional per-pagetable data stored in the PGSTEs, mapping physical addresses into the gmap, and marking address ranges as prefix. Signed-off-by: Claudio Imbrenda --- arch/s390/kvm/dat.c | 227 ++++++++++++++++++++++++++++++++++++++++++++ arch/s390/kvm/dat.h | 35 +++++++ 2 files changed, 262 insertions(+) diff --git a/arch/s390/kvm/dat.c b/arch/s390/kvm/dat.c index f626e8c37770..4249400a9d21 100644 --- a/arch/s390/kvm/dat.c +++ b/arch/s390/kvm/dat.c @@ -772,3 +772,230 @@ long dat_reset_skeys(union asce asce, gfn_t start) return _dat_walk_gfn_range(start, asce_end(asce), asce, &ops, DAT_WALK_IGN_HOLES, NULL); } + +static long _dat_slot_pte(union pte *ptep, gfn_t gfn, gfn_t next, struct dat_walk *walk) +{ + union crste dummy = { .val = (unsigned long)walk->priv }; + union pte new_pte, pte = READ_ONCE(*ptep); + + new_pte = _PTE_TOK(dummy.tok.type, dummy.tok.par); + + /* Table entry already in the desired state */ + if (pte.val == new_pte.val) + return 0; + + dat_ptep_xchg(ptep, new_pte, gfn, walk->asce, false); + return 0; +} + +static long _dat_slot_crste(union crste *crstep, gfn_t gfn, gfn_t next, struct dat_walk *walk) +{ + union crste new_crste, crste = READ_ONCE(*crstep); + + new_crste.val = (unsigned long)walk->priv; + new_crste.h.tt = crste.h.tt; + + /* Table entry already in the desired state */ + if (crste.val == new_crste.val) + return 0; + + /* This table entry needs to be updated */ + if (walk->start <= gfn && walk->end >= next) { + dat_crstep_xchg_atomic(crstep, crste, new_crste, gfn, walk->asce); + /* A lower level table was present, needs to be freed */ + if (!crste.h.fc && !crste.h.i) + dat_free_level(dereference_crste(crste), true); + return 0; + } + + /* A lower level table is present, things will handled there */ + if (!crste.h.fc && !crste.h.i) + return 0; + /* Split (install a lower level table), and handle things there */ + return dat_split_crste(crstep, gfn, walk->asce); +} + +static const struct dat_walk_ops dat_slot_ops = { + .pte_entry = _dat_slot_pte, + .crste_ops = { _dat_slot_crste, _dat_slot_crste, _dat_slot_crste, _dat_slot_crste, }, +}; + +int dat_set_slot(union asce asce, gfn_t start, gfn_t end, u16 type, u16 param) +{ + unsigned long token = _CRSTE_TOK(0, type, param).val; + + return _dat_walk_gfn_range(start, end, asce, &dat_slot_ops, + DAT_WALK_IGN_HOLES | DAT_WALK_ANY, (void *)token); +} + +unsigned long dat_get_ptval(struct page_table *table, struct ptval_param param) +{ + union pgste *pgstes = table->pgstes + param.offset; + struct page *page = virt_to_page(table); + unsigned long res = 0; + + lock_page(page); + switch (param.len) { + case 3: + res = pgstes->val16; + pgstes++; + fallthrough; + case 2: + res = res << 16 | pgstes->val16; + pgstes++; + fallthrough; + case 1: + res = res << 16 | pgstes->val16; + pgstes++; + fallthrough; + case 0: + res = res << 16 | pgstes->val16; + break; + } + unlock_page(page); + + return res; +} + +void dat_set_ptval(struct page_table *table, struct ptval_param param, unsigned long val) +{ + union pgste *pgstes = table->pgstes + param.offset; + struct page *page = virt_to_page(table); + + lock_page(page); + switch (param.len) { + case 3: + pgstes->val16 = val >> 48; + pgstes++; + fallthrough; + case 2: + pgstes->val16 = val >> 32; + pgstes++; + fallthrough; + case 1: + pgstes->val16 = val >> 16; + pgstes++; + fallthrough; + case 0: + pgstes->val16 = val; + break; + } + unlock_page(page); +} + +static long _dat_test_young_pte(union pte *ptep, gfn_t start, gfn_t end, struct dat_walk *walk) +{ + return ptep->s.y; +} + +static long _dat_test_young_crste(union crste *crstep, gfn_t start, gfn_t end, + struct dat_walk *walk) +{ + return crstep->h.fc && crstep->s.fc1.y; +} + +static const struct dat_walk_ops test_age_ops = { + .pte_entry = _dat_test_young_pte, + .pmd_entry = _dat_test_young_crste, + .pud_entry = _dat_test_young_crste, +}; + +/** + * dat_test_age_gfn() - test young + * @kvm: the kvm instance + * @range: the range of guest addresses whose young status needs to be cleared + * + * Context: called by KVM common code with the kvm mmu write lock held + * Return: 1 if any page in the given range is young, otherwise 0. + */ +bool dat_test_age_gfn(union asce asce, gfn_t start, gfn_t end) +{ + return _dat_walk_gfn_range(start, end, asce, &test_age_ops, 0, NULL) > 0; +} + +int dat_link(kvm_pfn_t pfn, gfn_t gfn, union asce asce, int level, bool w, bool d, bool s, bool sk) +{ + union crste oldval, newval; + union pte newpte, oldpte; + union crste *crstep; + union pgste pgste; + union pte *ptep; + int rc = 0; + + rc = dat_entry_walk(gfn, asce, DAT_WALK_ALLOC_CONTINUE, level, &crstep, &ptep); + if (rc) + return rc == -EINVAL ? rc : -EAGAIN; + + if (WARN_ON_ONCE(unlikely(get_level(crstep, ptep) > level))) + return -EINVAL; + + if (ptep) { + pgste = pgste_get_lock(ptep); + oldpte = *ptep; + newpte = _pte(pfn, w, d | oldpte.s.d, s); + newpte.s.sd = oldpte.s.sd; + oldpte.s.sd = 0; + if (oldpte.val == _PTE_EMPTY.val || oldpte.h.pfra == pfn) + pgste = __dat_ptep_xchg(ptep, pgste, newpte, gfn, asce, sk); + else + rc = -EAGAIN; + pgste_set_unlock(ptep, pgste); + } else { + oldval = READ_ONCE(*crstep); + newval = _crste_fc1(pfn, oldval.h.tt, w, d | oldval.s.fc1.d); + newval.s.fc1.sd = oldval.s.fc1.sd; + if (oldval.val != _CRSTE_EMPTY(oldval.h.tt).val && + crste_origin_large(oldval) != crste_origin_large(newval)) + return -EAGAIN; + if (!dat_crstep_xchg_atomic(crstep, oldval, newval, gfn, asce)) + return -EAGAIN; + } + + return rc; +} + +static long dat_set_pn_crste(union crste *crstep, gfn_t gfn, gfn_t next, struct dat_walk *walk) +{ + union crste crste = READ_ONCE(*crstep); + int *n = walk->priv; + + if (!crste.h.fc || crste.h.i || crste.h.p) + return 0; + + *n = 2; + if (crste.s.fc1.prefix_notif) + return 0; + crste.s.fc1.prefix_notif = 1; + dat_crstep_xchg(crstep, crste, gfn, walk->asce); + return 0; +} + +static long dat_set_pn_pte(union pte *ptep, gfn_t gfn, gfn_t next, struct dat_walk *walk) +{ + int *n = walk->priv; + union pgste pgste; + + pgste = pgste_get_lock(ptep); + if (!ptep->h.i && !ptep->h.p) { + pgste.prefix_notif = 1; + *n += 1; + } + pgste_set_unlock(ptep, pgste); + return 0; +} + +int dat_set_prefix_notif_bit(union asce asce, gfn_t gfn) +{ + static const struct dat_walk_ops ops = { + .pte_entry = dat_set_pn_pte, + .pmd_entry = dat_set_pn_crste, + .pud_entry = dat_set_pn_crste, + }; + + int n = 0; + + _dat_walk_gfn_range(gfn, gfn + 2, asce, &ops, DAT_WALK_IGN_HOLES, &n); + if (n != 2) + return -EAGAIN; + return 0; +} diff --git a/arch/s390/kvm/dat.h b/arch/s390/kvm/dat.h index 40f5c1371ef3..b695eae5d763 100644 --- a/arch/s390/kvm/dat.h +++ b/arch/s390/kvm/dat.h @@ -374,6 +374,11 @@ struct dat_walk { void *priv; }; +struct ptval_param { + unsigned char offset : 6; + unsigned char len : 2; +}; + static inline union pte _pte(kvm_pfn_t pfn, bool w, bool d, bool s) { union pte res = { .val = PFN_PHYS(pfn) }; @@ -413,6 +418,18 @@ static inline union crste _crste_fc1(kvm_pfn_t pfn, int tt, bool w, bool d) return res; } +/** + * 0 1 2 3 4 5 6 7 + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * 0 | | PGT_ADDR | + * 8 | VMADDR |SPLTCNT| | + * 16 | | + * 24 | | + */ +#define MKPTVAL(o, l) ((struct ptval_param) { .offset = (o), .len = ((l) + 1) / 2 - 1}) +#define PTVAL_PGT_ADDR MKPTVAL(4, 8) +#define PTVAL_VMADDR MKPTVAL(8, 6) + union pgste __must_check __dat_ptep_xchg(union pte *ptep, union pgste pgste, union pte new, gfn_t gfn, union asce asce, bool has_skeys); bool dat_crstep_xchg_atomic(union crste *crstep, union crste old, union crste new, gfn_t gfn, @@ -434,6 +451,14 @@ int dat_cond_set_storage_key(union asce asce, gfn_t gfn, union skey skey, union int dat_reset_reference_bit(union asce asce, gfn_t gfn); long dat_reset_skeys(union asce asce, gfn_t start); +unsigned long dat_get_ptval(struct page_table *table, struct ptval_param param); +void dat_set_ptval(struct page_table *table, struct ptval_param param, unsigned long val); + +int dat_set_slot(union asce asce, gfn_t start, gfn_t end, u16 type, u16 param); +int dat_set_prefix_notif_bit(union asce asce, gfn_t gfn); +bool dat_test_age_gfn(union asce asce, gfn_t start, gfn_t end); +int dat_link(kvm_pfn_t pfn, gfn_t gfn, union asce asce, int level, bool w, bool d, bool s, bool sk); + static inline struct crst_table *crste_table_start(union crste *crstep) { return (struct crst_table *)ALIGN_DOWN((unsigned long)crstep, _CRST_TABLE_SIZE); @@ -778,4 +803,14 @@ static inline int get_level(union crste *crstep, union pte *ptep) return ptep ? LEVEL_PTE : crstep->h.tt; } +static inline int dat_delete_slot(union asce asce, gfn_t start, unsigned long npages) +{ + return dat_set_slot(asce, start, start + npages, _DAT_TOKEN_PIC, PGM_ADDRESSING); +} + +static inline int dat_create_slot(union asce asce, gfn_t start, unsigned long npages) +{ + return dat_set_slot(asce, start, start + npages, _DAT_TOKEN_NONE, 0); +} + #endif /* __KVM_S390_DAT_H */ -- 2.51.0