Lazy MMU mode is assumed to be context-independent, in the sense that it does not need any additional information while operating. However, the s390 architecture benefits from knowing the exact page table entries being modified. Introduce lazy_mmu_mode_enable_with_ptes(), which is provided with the process address space and the page table being operated on. This information is required to enable s390-specific optimizations. The function takes parameters that are typically passed to page- table level walkers, which implies that the span of PTE entries never crosses a page table boundary. Architectures that do not require such information simply do not need to define the lazy_mmu_mode_enable_with_ptes() callback. Reviewed-by: Kevin Brodsky Acked-by: David Hildenbrand (Arm) Signed-off-by: Alexander Gordeev --- fs/proc/task_mmu.c | 2 +- include/linux/pgtable.h | 46 +++++++++++++++++++++++++++++++++++++++++ mm/madvise.c | 8 +++---- mm/memory.c | 8 +++---- mm/mprotect.c | 2 +- mm/mremap.c | 2 +- mm/vmalloc.c | 6 +++--- 7 files changed, 60 insertions(+), 14 deletions(-) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 751b9ba160fb..a02a83c390b9 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -2752,7 +2752,7 @@ static int pagemap_scan_pmd_entry(pmd_t *pmd, unsigned long start, return 0; } - lazy_mmu_mode_enable(); + lazy_mmu_mode_enable_with_ptes(vma->vm_mm, start, end, start_pte); if ((p->arg.flags & PM_SCAN_WP_MATCHING) && !p->vec_out) { /* Fast path for performing exclusive WP */ diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index cdd68ed3ae1a..6e582b9e58f3 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -271,6 +271,50 @@ static inline void lazy_mmu_mode_enable(void) arch_enter_lazy_mmu_mode(); } +#ifndef arch_enter_lazy_mmu_mode_with_ptes +static inline void arch_enter_lazy_mmu_mode_with_ptes(struct mm_struct *mm, + unsigned long addr, unsigned long end, pte_t *ptep) +{ + arch_enter_lazy_mmu_mode(); +} +#endif + +/** + * lazy_mmu_mode_enable_with_ptes() - Enable the lazy MMU mode with a speedup hint. + * @mm: Address space the pages are mapped into. + * @addr: Start address of the range. + * @end: End address of the range. + * @ptep: Page table pointer for the first entry. + * + * Enters a new lazy MMU mode section; if the mode was not already enabled, + * enables it and calls arch_enter_lazy_mmu_mode_with_ptes(). + * + * PTEs that fall within the specified range might observe update speedups. + * The PTEs must belong to the specified address space and be in the same PMD. + * + * There are no requirements on the order or range completeness of PTE + * updates for the specified range. + * + * Must be paired with a call to lazy_mmu_mode_disable(). + * + * Has no effect if called: + * - While paused - see lazy_mmu_mode_pause() + * - In interrupt context + */ +static inline void lazy_mmu_mode_enable_with_ptes(struct mm_struct *mm, + unsigned long addr, unsigned long end, pte_t *ptep) +{ + struct lazy_mmu_state *state = ¤t->lazy_mmu_state; + + if (in_interrupt() || state->pause_count > 0) + return; + + VM_WARN_ON_ONCE(state->enable_count == U8_MAX); + + if (state->enable_count++ == 0) + arch_enter_lazy_mmu_mode_with_ptes(mm, addr, end, ptep); +} + /** * lazy_mmu_mode_disable() - Disable the lazy MMU mode. * @@ -353,6 +397,8 @@ static inline void lazy_mmu_mode_resume(void) } #else static inline void lazy_mmu_mode_enable(void) {} +static inline void lazy_mmu_mode_enable_with_ptes(struct mm_struct *mm, + unsigned long addr, unsigned long end, pte_t *ptep) {} static inline void lazy_mmu_mode_disable(void) {} static inline void lazy_mmu_mode_pause(void) {} static inline void lazy_mmu_mode_resume(void) {} diff --git a/mm/madvise.c b/mm/madvise.c index 69708e953cf5..de39703c26a1 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -453,7 +453,7 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd, if (!start_pte) return 0; flush_tlb_batched_pending(mm); - lazy_mmu_mode_enable(); + lazy_mmu_mode_enable_with_ptes(mm, addr, end, start_pte); for (; addr < end; pte += nr, addr += nr * PAGE_SIZE) { nr = 1; ptent = ptep_get(pte); @@ -508,7 +508,7 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd, if (!start_pte) break; flush_tlb_batched_pending(mm); - lazy_mmu_mode_enable(); + lazy_mmu_mode_enable_with_ptes(mm, addr, end, start_pte); if (!err) nr = 0; continue; @@ -675,7 +675,7 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr, if (!start_pte) return 0; flush_tlb_batched_pending(mm); - lazy_mmu_mode_enable(); + lazy_mmu_mode_enable_with_ptes(mm, addr, end, start_pte); for (; addr != end; pte += nr, addr += PAGE_SIZE * nr) { nr = 1; ptent = ptep_get(pte); @@ -735,7 +735,7 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr, if (!start_pte) break; flush_tlb_batched_pending(mm); - lazy_mmu_mode_enable(); + lazy_mmu_mode_enable_with_ptes(mm, addr, end, pte); if (!err) nr = 0; continue; diff --git a/mm/memory.c b/mm/memory.c index 86a973119bd4..e4487564b166 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1272,7 +1272,7 @@ copy_pte_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma, spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); orig_src_pte = src_pte; orig_dst_pte = dst_pte; - lazy_mmu_mode_enable(); + lazy_mmu_mode_enable_with_ptes(src_mm, addr, end, src_pte); do { nr = 1; @@ -1922,7 +1922,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, return addr; flush_tlb_batched_pending(mm); - lazy_mmu_mode_enable(); + lazy_mmu_mode_enable_with_ptes(mm, addr, end, start_pte); do { bool any_skipped = false; @@ -2919,7 +2919,7 @@ static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd, mapped_pte = pte = pte_alloc_map_lock(mm, pmd, addr, &ptl); if (!pte) return -ENOMEM; - lazy_mmu_mode_enable(); + lazy_mmu_mode_enable_with_ptes(mm, addr, end, mapped_pte); do { BUG_ON(!pte_none(ptep_get(pte))); if (!pfn_modify_allowed(pfn, prot)) { @@ -3330,7 +3330,7 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd, return -EINVAL; } - lazy_mmu_mode_enable(); + lazy_mmu_mode_enable_with_ptes(mm, addr, end, mapped_pte); if (fn) { do { diff --git a/mm/mprotect.c b/mm/mprotect.c index 9cbf932b028c..3fc26418e837 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -337,7 +337,7 @@ static long change_pte_range(struct mmu_gather *tlb, is_private_single_threaded = vma_is_single_threaded_private(vma); flush_tlb_batched_pending(vma->vm_mm); - lazy_mmu_mode_enable(); + lazy_mmu_mode_enable_with_ptes(vma->vm_mm, addr, end, pte); do { nr_ptes = 1; oldpte = ptep_get(pte); diff --git a/mm/mremap.c b/mm/mremap.c index e9c8b1d05832..0dfe3de39ccc 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -260,7 +260,7 @@ static int move_ptes(struct pagetable_move_control *pmc, if (new_ptl != old_ptl) spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING); flush_tlb_batched_pending(vma->vm_mm); - lazy_mmu_mode_enable(); + lazy_mmu_mode_enable_with_ptes(mm, old_addr, old_end, old_ptep); for (; old_addr < old_end; old_ptep += nr_ptes, old_addr += nr_ptes * PAGE_SIZE, new_ptep += nr_ptes, new_addr += nr_ptes * PAGE_SIZE) { diff --git a/mm/vmalloc.c b/mm/vmalloc.c index bb6ae08d18f5..11c9c78072ae 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -108,7 +108,7 @@ static int vmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, if (!pte) return -ENOMEM; - lazy_mmu_mode_enable(); + lazy_mmu_mode_enable_with_ptes(&init_mm, addr, end, pte); do { if (unlikely(!pte_none(ptep_get(pte)))) { @@ -371,7 +371,7 @@ static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, unsigned long size = PAGE_SIZE; pte = pte_offset_kernel(pmd, addr); - lazy_mmu_mode_enable(); + lazy_mmu_mode_enable_with_ptes(&init_mm, addr, end, pte); do { #ifdef CONFIG_HUGETLB_PAGE @@ -538,7 +538,7 @@ static int vmap_pages_pte_range(pmd_t *pmd, unsigned long addr, if (!pte) return -ENOMEM; - lazy_mmu_mode_enable(); + lazy_mmu_mode_enable_with_ptes(&init_mm, addr, end, pte); do { struct page *page = pages[*nr]; -- 2.53.0