Some platforms can customize the PTE PMD entry soft-dirty bit making it unavailable even if the architecture provides the resource. Add an API which architectures can define their specific implementations to detect if soft-dirty bit is available on which device the kernel is running. This patch is removing "ifdef CONFIG_MEM_SOFT_DIRTY" in favor of pgtable_supports_soft_dirty() checks that defaults to IS_ENABLED(CONFIG_MEM_SOFT_DIRTY), if not overridden by the architecture, no change in behavior is expected. We make sure to never set VM_SOFTDIRTY if !pgtable_supports_soft_dirty(), so we will never run into VM_SOFTDIRTY checks. Acked-by: David Hildenbrand Signed-off-by: Chunyan Zhang --- fs/proc/task_mmu.c | 15 ++++++--------- include/linux/mm.h | 3 +++ include/linux/pgtable.h | 12 ++++++++++++ mm/debug_vm_pgtable.c | 10 +++++----- mm/huge_memory.c | 13 +++++++------ mm/internal.h | 2 +- mm/mmap.c | 6 ++++-- mm/mremap.c | 13 +++++++------ mm/userfaultfd.c | 10 ++++------ mm/vma.c | 6 ++++-- mm/vma_exec.c | 5 ++++- 11 files changed, 57 insertions(+), 38 deletions(-) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index ced01cf3c5ab..18c55e21bd16 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1582,8 +1582,6 @@ struct clear_refs_private { enum clear_refs_types type; }; -#ifdef CONFIG_MEM_SOFT_DIRTY - static inline bool pte_is_pinned(struct vm_area_struct *vma, unsigned long addr, pte_t pte) { struct folio *folio; @@ -1603,6 +1601,8 @@ static inline bool pte_is_pinned(struct vm_area_struct *vma, unsigned long addr, static inline void clear_soft_dirty(struct vm_area_struct *vma, unsigned long addr, pte_t *pte) { + if (!pgtable_supports_soft_dirty()) + return; /* * The soft-dirty tracker uses #PF-s to catch writes * to pages, so write-protect the pte as well. See the @@ -1625,19 +1625,16 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma, set_pte_at(vma->vm_mm, addr, pte, ptent); } } -#else -static inline void clear_soft_dirty(struct vm_area_struct *vma, - unsigned long addr, pte_t *pte) -{ -} -#endif -#if defined(CONFIG_MEM_SOFT_DIRTY) && defined(CONFIG_TRANSPARENT_HUGEPAGE) +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmdp) { pmd_t old, pmd = *pmdp; + if (!pgtable_supports_soft_dirty()) + return; + if (pmd_present(pmd)) { /* See comment in change_huge_pmd() */ old = pmdp_invalidate(vma, addr, pmdp); diff --git a/include/linux/mm.h b/include/linux/mm.h index 2daa97048974..f9da2ec5b05e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -798,6 +798,7 @@ static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm) static inline void vm_flags_init(struct vm_area_struct *vma, vm_flags_t flags) { + VM_WARN_ON_ONCE(!pgtable_supports_soft_dirty() && (flags & VM_SOFTDIRTY)); ACCESS_PRIVATE(vma, __vm_flags) = flags; } @@ -816,6 +817,7 @@ static inline void vm_flags_reset(struct vm_area_struct *vma, static inline void vm_flags_reset_once(struct vm_area_struct *vma, vm_flags_t flags) { + VM_WARN_ON_ONCE(!pgtable_supports_soft_dirty() && (flags & VM_SOFTDIRTY)); vma_assert_write_locked(vma); WRITE_ONCE(ACCESS_PRIVATE(vma, __vm_flags), flags); } @@ -823,6 +825,7 @@ static inline void vm_flags_reset_once(struct vm_area_struct *vma, static inline void vm_flags_set(struct vm_area_struct *vma, vm_flags_t flags) { + VM_WARN_ON_ONCE(!pgtable_supports_soft_dirty() && (flags & VM_SOFTDIRTY)); vma_start_write(vma); ACCESS_PRIVATE(vma, __vm_flags) |= flags; } diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 32e8457ad535..b13b6f42be3c 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -1553,6 +1553,18 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) #define arch_start_context_switch(prev) do {} while (0) #endif +/* + * Some platforms can customize the PTE soft-dirty bit making it unavailable + * even if the architecture provides the resource. + * Adding this API allows architectures to add their own checks for the + * devices on which the kernel is running. + * Note: When overriding it, please make sure the CONFIG_MEM_SOFT_DIRTY + * is part of this macro. + */ +#ifndef pgtable_supports_soft_dirty +#define pgtable_supports_soft_dirty() IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) +#endif + #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY #ifndef CONFIG_ARCH_ENABLE_THP_MIGRATION static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd) diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c index 830107b6dd08..6a5b226bda28 100644 --- a/mm/debug_vm_pgtable.c +++ b/mm/debug_vm_pgtable.c @@ -690,7 +690,7 @@ static void __init pte_soft_dirty_tests(struct pgtable_debug_args *args) { pte_t pte = pfn_pte(args->fixed_pte_pfn, args->page_prot); - if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY)) + if (!pgtable_supports_soft_dirty()) return; pr_debug("Validating PTE soft dirty\n"); @@ -702,7 +702,7 @@ static void __init pte_swap_soft_dirty_tests(struct pgtable_debug_args *args) { pte_t pte; - if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY)) + if (!pgtable_supports_soft_dirty()) return; pr_debug("Validating PTE swap soft dirty\n"); @@ -718,7 +718,7 @@ static void __init pmd_soft_dirty_tests(struct pgtable_debug_args *args) { pmd_t pmd; - if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY)) + if (!pgtable_supports_soft_dirty()) return; if (!has_transparent_hugepage()) @@ -734,8 +734,8 @@ static void __init pmd_swap_soft_dirty_tests(struct pgtable_debug_args *args) { pmd_t pmd; - if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) || - !IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION)) + if (!pgtable_supports_soft_dirty() || + !IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION)) return; if (!has_transparent_hugepage()) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 5acca24bbabb..85dca384375e 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2263,12 +2263,13 @@ static inline int pmd_move_must_withdraw(spinlock_t *new_pmd_ptl, static pmd_t move_soft_dirty_pmd(pmd_t pmd) { -#ifdef CONFIG_MEM_SOFT_DIRTY - if (unlikely(is_pmd_migration_entry(pmd))) - pmd = pmd_swp_mksoft_dirty(pmd); - else if (pmd_present(pmd)) - pmd = pmd_mksoft_dirty(pmd); -#endif + if (pgtable_supports_soft_dirty()) { + if (unlikely(is_pmd_migration_entry(pmd))) + pmd = pmd_swp_mksoft_dirty(pmd); + else if (pmd_present(pmd)) + pmd = pmd_mksoft_dirty(pmd); + } + return pmd; } diff --git a/mm/internal.h b/mm/internal.h index 085e34f84bae..25fbed57f8a2 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -1530,7 +1530,7 @@ static inline bool vma_soft_dirty_enabled(struct vm_area_struct *vma) * VM_SOFTDIRTY is defined as 0x0, then !(vm_flags & VM_SOFTDIRTY) * will be constantly true. */ - if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY)) + if (!pgtable_supports_soft_dirty()) return false; /* diff --git a/mm/mmap.c b/mm/mmap.c index 5fd3b80fda1d..4b779b090000 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1451,8 +1451,10 @@ static struct vm_area_struct *__install_special_mapping( return ERR_PTR(-ENOMEM); vma_set_range(vma, addr, addr + len, 0); - vm_flags_init(vma, (vm_flags | mm->def_flags | - VM_DONTEXPAND | VM_SOFTDIRTY) & ~VM_LOCKED_MASK); + vm_flags |= mm->def_flags | VM_DONTEXPAND; + if (pgtable_supports_soft_dirty()) + vm_flags |= VM_SOFTDIRTY; + vm_flags_init(vma, vm_flags & ~VM_LOCKED_MASK); vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); vma->vm_ops = ops; diff --git a/mm/mremap.c b/mm/mremap.c index 35de0a7b910e..35a135cd149a 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -162,12 +162,13 @@ static pte_t move_soft_dirty_pte(pte_t pte) * Set soft dirty bit so we can notice * in userspace the ptes were moved. */ -#ifdef CONFIG_MEM_SOFT_DIRTY - if (pte_present(pte)) - pte = pte_mksoft_dirty(pte); - else if (is_swap_pte(pte)) - pte = pte_swp_mksoft_dirty(pte); -#endif + if (pgtable_supports_soft_dirty()) { + if (pte_present(pte)) + pte = pte_mksoft_dirty(pte); + else if (is_swap_pte(pte)) + pte = pte_swp_mksoft_dirty(pte); + } + return pte; } diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index af61b95c89e4..ea8ce18483fe 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -1116,9 +1116,8 @@ static long move_present_ptes(struct mm_struct *mm, orig_dst_pte = folio_mk_pte(src_folio, dst_vma->vm_page_prot); /* Set soft dirty bit so userspace can notice the pte was moved */ -#ifdef CONFIG_MEM_SOFT_DIRTY - orig_dst_pte = pte_mksoft_dirty(orig_dst_pte); -#endif + if (pgtable_supports_soft_dirty()) + orig_dst_pte = pte_mksoft_dirty(orig_dst_pte); if (pte_dirty(orig_src_pte)) orig_dst_pte = pte_mkdirty(orig_dst_pte); orig_dst_pte = pte_mkwrite(orig_dst_pte, dst_vma); @@ -1205,9 +1204,8 @@ static int move_swap_pte(struct mm_struct *mm, struct vm_area_struct *dst_vma, } orig_src_pte = ptep_get_and_clear(mm, src_addr, src_pte); -#ifdef CONFIG_MEM_SOFT_DIRTY - orig_src_pte = pte_swp_mksoft_dirty(orig_src_pte); -#endif + if (pgtable_supports_soft_dirty()) + orig_src_pte = pte_swp_mksoft_dirty(orig_src_pte); set_pte_at(mm, dst_addr, dst_pte, orig_src_pte); double_pt_unlock(dst_ptl, src_ptl); diff --git a/mm/vma.c b/mm/vma.c index a1ec405bda25..e30d3b23e8c7 100644 --- a/mm/vma.c +++ b/mm/vma.c @@ -2568,7 +2568,8 @@ static void __mmap_complete(struct mmap_state *map, struct vm_area_struct *vma) * then new mapped in-place (which must be aimed as * a completely new data area). */ - vm_flags_set(vma, VM_SOFTDIRTY); + if (pgtable_supports_soft_dirty()) + vm_flags_set(vma, VM_SOFTDIRTY); vma_set_page_prot(vma); } @@ -2843,7 +2844,8 @@ int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *vma, mm->data_vm += len >> PAGE_SHIFT; if (vm_flags & VM_LOCKED) mm->locked_vm += (len >> PAGE_SHIFT); - vm_flags_set(vma, VM_SOFTDIRTY); + if (pgtable_supports_soft_dirty()) + vm_flags_set(vma, VM_SOFTDIRTY); return 0; mas_store_fail: diff --git a/mm/vma_exec.c b/mm/vma_exec.c index 922ee51747a6..a822fb73f4e2 100644 --- a/mm/vma_exec.c +++ b/mm/vma_exec.c @@ -107,6 +107,7 @@ int relocate_vma_down(struct vm_area_struct *vma, unsigned long shift) int create_init_stack_vma(struct mm_struct *mm, struct vm_area_struct **vmap, unsigned long *top_mem_p) { + unsigned long flags = VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP; int err; struct vm_area_struct *vma = vm_area_alloc(mm); @@ -137,7 +138,9 @@ int create_init_stack_vma(struct mm_struct *mm, struct vm_area_struct **vmap, BUILD_BUG_ON(VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP); vma->vm_end = STACK_TOP_MAX; vma->vm_start = vma->vm_end - PAGE_SIZE; - vm_flags_init(vma, VM_SOFTDIRTY | VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP); + if (pgtable_supports_soft_dirty()) + flags |= VM_SOFTDIRTY; + vm_flags_init(vma, flags); vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); err = insert_vm_struct(mm, vma); -- 2.34.1