Add Svnapot-aware implementations of clear_full_ptes() and get_and_clear_full_ptes() so full PTE batches can be cleared without losing the required unfold semantics for NAPOT mappings. Signed-off-by: Yunhui Cui --- arch/riscv/include/asm/pgtable.h | 75 ++++++++++++++++++++++++- arch/riscv/mm/contpte.c | 96 ++++++++++++++++++++++++++++++++ 2 files changed, 170 insertions(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 722483d4df37f..3e6516b5a4587 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -657,7 +657,6 @@ static inline void __set_pte_at(struct mm_struct *mm, pte_t *ptep, pte_t pteval) } #define PFN_PTE_SHIFT _PAGE_PFN_SHIFT - static inline void __set_ptes(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pteval, unsigned int nr) { @@ -764,6 +763,47 @@ __ptep_get_and_clear(struct mm_struct *mm, unsigned long address, pte_t *ptep) #define __ptep_get_and_clear __ptep_get_and_clear +static inline void __clear_full_ptes(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, unsigned int nr, int full) +{ + (void)full; + + for (;;) { + __ptep_get_and_clear(mm, addr, ptep); + if (--nr == 0) + break; + ptep++; + addr += PAGE_SIZE; + } +} + +#define __clear_full_ptes __clear_full_ptes + +static inline pte_t __get_and_clear_full_ptes(struct mm_struct *mm, + unsigned long addr, + pte_t *ptep, + unsigned int nr, + int full) +{ + pte_t pte, tmp_pte; + + (void)full; + + pte = __ptep_get_and_clear(mm, addr, ptep); + while (--nr) { + ptep++; + addr += PAGE_SIZE; + tmp_pte = __ptep_get_and_clear(mm, addr, ptep); + if (pte_dirty(tmp_pte)) + pte = pte_mkdirty(pte); + if (pte_young(tmp_pte)) + pte = pte_mkyoung(pte); + } + + return pte; +} + +#define __get_and_clear_full_ptes __get_and_clear_full_ptes static inline void __ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep) { @@ -831,6 +871,11 @@ pte_t napotpte_ptep_get(pte_t *ptep, pte_t orig_pte); pte_t napotpte_ptep_get_lockless(pte_t *ptep); void napotpte_set_ptes(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte, unsigned int nr); +void napotpte_clear_full_ptes(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, unsigned int nr, int full); +pte_t napotpte_get_and_clear_full_ptes(struct mm_struct *mm, + unsigned long addr, pte_t *ptep, + unsigned int nr, int full); void napotpte_clear_young_dirty_ptes(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, unsigned int nr, cydp_t flags); @@ -933,6 +978,32 @@ static inline void clear_young_dirty_ptes(struct vm_area_struct *vma, napotpte_clear_young_dirty_ptes(vma, addr, ptep, nr, flags); } +#define clear_full_ptes clear_full_ptes +static inline void clear_full_ptes(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, unsigned int nr, int full) +{ + if (likely(nr == 1)) { + napotpte_try_unfold(mm, addr, ptep, __ptep_get(ptep)); + __clear_full_ptes(mm, addr, ptep, nr, full); + return; + } + + napotpte_clear_full_ptes(mm, addr, ptep, nr, full); +} + +#define get_and_clear_full_ptes get_and_clear_full_ptes +static inline pte_t get_and_clear_full_ptes(struct mm_struct *mm, + unsigned long addr, pte_t *ptep, + unsigned int nr, int full) +{ + if (likely(nr == 1)) { + napotpte_try_unfold(mm, addr, ptep, __ptep_get(ptep)); + return __get_and_clear_full_ptes(mm, addr, ptep, nr, full); + } + + return napotpte_get_and_clear_full_ptes(mm, addr, ptep, nr, full); +} + #define __HAVE_ARCH_PTEP_SET_WRPROTECT static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep) @@ -989,6 +1060,8 @@ napotpte_ptep_clear_flush_young(struct vm_area_struct *vma, #define ptep_get_lockless __ptep_get_lockless #define ptep_get_and_clear __ptep_get_and_clear #define clear_young_dirty_ptes __clear_young_dirty_ptes +#define clear_full_ptes __clear_full_ptes +#define get_and_clear_full_ptes __get_and_clear_full_ptes #define __HAVE_ARCH_PTEP_SET_WRPROTECT #define ptep_set_wrprotect __ptep_set_wrprotect #define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH diff --git a/arch/riscv/mm/contpte.c b/arch/riscv/mm/contpte.c index f73af7d9b099a..77c2a4dbd3dda 100644 --- a/arch/riscv/mm/contpte.c +++ b/arch/riscv/mm/contpte.c @@ -107,6 +107,38 @@ __napot_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep return pte; } +static void __napot_clear_full_ptes(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, unsigned int nr) +{ + for (;;) { + __napot_ptep_get_and_clear(mm, addr, ptep); + if (--nr == 0) + break; + ptep++; + addr += PAGE_SIZE; + } +} + +static pte_t __napot_get_and_clear_full_ptes(struct mm_struct *mm, + unsigned long addr, pte_t *ptep, + unsigned int nr) +{ + pte_t pte, tmp_pte; + + pte = __napot_ptep_get_and_clear(mm, addr, ptep); + while (--nr) { + ptep++; + addr += PAGE_SIZE; + tmp_pte = __napot_ptep_get_and_clear(mm, addr, ptep); + if (pte_dirty(tmp_pte)) + pte = pte_mkdirty(pte); + if (pte_young(tmp_pte)) + pte = pte_mkyoung(pte); + } + + return pte; +} + static void napotpte_convert(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t target) { @@ -202,6 +234,33 @@ void __napotpte_try_fold(struct mm_struct *mm, unsigned long addr, } EXPORT_SYMBOL(__napotpte_try_fold); +static void napotpte_try_unfold_range(struct mm_struct *mm, + unsigned long addr, pte_t *ptep, + unsigned int nr) +{ + unsigned long next; + pte_t pte; + unsigned int chunk; + + while (nr) { + pte = READ_ONCE(*ptep); + if (pte_present_napot(pte)) { + __napotpte_try_unfold(mm, addr, ptep, pte); + next = napot_align_addr(addr) + napotpte_size(); + chunk = (next - addr) >> PAGE_SHIFT; + } else { + chunk = 1; + } + + if (chunk > nr) + chunk = nr; + + ptep += chunk; + addr += chunk * PAGE_SIZE; + nr -= chunk; + } +} + void __napotpte_try_unfold(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) { @@ -349,6 +408,43 @@ void napotpte_set_ptes(struct mm_struct *mm, unsigned long addr, } EXPORT_SYMBOL(napotpte_set_ptes); +void napotpte_clear_full_ptes(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, unsigned int nr, int full) +{ + (void)full; + + if (!napot_hw_supported() || !mm_is_user(mm)) { + __napot_clear_full_ptes(mm, addr, ptep, nr); + return; + } + + /* + * Unlike arm64 contpte, a Svnapot PTE block stores identical + * napot-encoded entries across the whole block rather than per-page + * PFNs. Batch zap paths must therefore unfold the whole covered range + * so the core MM later sees ordinary per-page PTEs for rmap/rss/tlb + * batching. + */ + napotpte_try_unfold_range(mm, addr, ptep, nr); + __napot_clear_full_ptes(mm, addr, ptep, nr); +} +EXPORT_SYMBOL(napotpte_clear_full_ptes); + +pte_t napotpte_get_and_clear_full_ptes(struct mm_struct *mm, + unsigned long addr, pte_t *ptep, + unsigned int nr, int full) +{ + (void)full; + + if (!napot_hw_supported() || !mm_is_user(mm)) + return __napot_get_and_clear_full_ptes(mm, addr, ptep, nr); + + napotpte_try_unfold_range(mm, addr, ptep, nr); + + return __napot_get_and_clear_full_ptes(mm, addr, ptep, nr); +} +EXPORT_SYMBOL(napotpte_get_and_clear_full_ptes); + void napotpte_clear_young_dirty_ptes(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, unsigned int nr, cydp_t flags) -- 2.39.5