new_anon_rmap() and move_anon_rmap() decide whether to set PAGE_MAPPING_ANON_VMA_LAZY. try_dup_anon_rmap() upgrades the folio to PAGE_MAPPING_ANON during fork() when required. rmap_walk_anon() detects ANON_VMA_LAZY upgrades and retries the walk to ensure the mapping is handled correctly. remove_rmap() needs no special handling since folio_mapped() is checked before use. Signed-off-by: tao --- include/linux/rmap.h | 38 ++++++++++++++++++++++++++++++++++++++ mm/rmap.c | 21 ++++++++++++++++++++- 2 files changed, 58 insertions(+), 1 deletion(-) diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 59244481a8c1..9b1970698204 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -392,6 +392,14 @@ static __always_inline void __folio_rmap_sanity_checks(const struct folio *folio unsigned long mapping = (unsigned long)folio->mapping; struct anon_vma *anon_vma; + if (folio_test_anon_vma_lazy(folio)) { + struct vm_area_struct *root_vma = + (void *)(mapping - FOLIO_MAPPING_ANON_VMA_LAZY); + + VM_WARN_ON_FOLIO(!rcuref_read(&root_vma->vm_rcuref), folio); + return; + } + anon_vma = (void *)(mapping - FOLIO_MAPPING_ANON); VM_WARN_ON_FOLIO(atomic_read(&anon_vma->refcount) == 0, folio); } @@ -431,6 +439,31 @@ void hugetlb_add_anon_rmap(struct folio *, struct vm_area_struct *, void hugetlb_add_new_anon_rmap(struct folio *, struct vm_area_struct *, unsigned long address); +/** + * folio_upgrade_anon_vma_lazy - upgrade folio->mapping from ANON_VMA_LAZY to + * an anon_vma + * @folio: The folio to upgrade + * @vma: The VMA the folio currently belongs to + * + * Upgrade folio->mapping from ANON_VMA_LAZY to an anon_vma. + * This transition is strictly one-way and never reverts back to a lazy + * mapping. + * + * Called during fork() while holding the mmap lock and the VMA write lock, + * but without taking the folio lock. Concurrent readers may briefly observe + * the old lazy mapping. Migration relies on folio_trylock_get_anon_rmap() + * to ensure atomicity, while other rmap operations remain unaffected. + */ +static inline void folio_upgrade_anon_vma_lazy(struct folio *folio, + struct vm_area_struct *vma) +{ + unsigned long anon_tree = (unsigned long)vma->anon_vma; + + VM_BUG_ON_VMA(!anon_tree || !IS_ALIGNED(anon_tree, sizeof(long)), vma); + anon_tree = anon_tree + FOLIO_MAPPING_ANON; + WRITE_ONCE(folio->mapping, (struct address_space *)anon_tree); +} + /* See folio_try_dup_anon_rmap_*() */ static inline int hugetlb_try_dup_anon_rmap(struct folio *folio, struct vm_area_struct *vma) @@ -438,6 +471,9 @@ static inline int hugetlb_try_dup_anon_rmap(struct folio *folio, VM_WARN_ON_FOLIO(!folio_test_hugetlb(folio), folio); VM_WARN_ON_FOLIO(!folio_test_anon(folio), folio); + if (folio_test_anon_vma_lazy(folio)) + folio_upgrade_anon_vma_lazy(folio, vma); + if (PageAnonExclusive(&folio->page)) { if (unlikely(folio_needs_cow_for_dma(vma, folio))) return -EBUSY; @@ -573,6 +609,8 @@ static __always_inline int __folio_try_dup_anon_rmap(struct folio *folio, int i; VM_WARN_ON_FOLIO(!folio_test_anon(folio), folio); + if (folio_test_anon_vma_lazy(folio)) + folio_upgrade_anon_vma_lazy(folio, src_vma); __folio_rmap_sanity_checks(folio, page, nr_pages, level); /* diff --git a/mm/rmap.c b/mm/rmap.c index 46876b3dbfbc..e14509b47412 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -2002,6 +2002,16 @@ void folio_move_anon_rmap(struct folio *folio, struct vm_area_struct *vma) void *anon_vma = vma_anon_vma(vma); VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); + + if (!anon_vma) { + const struct vm_area_struct *root_vma = vma_anon_vma_lazy_root(vma); + + VM_BUG_ON_VMA(!root_vma, vma); + root_vma = (void *)root_vma + FOLIO_MAPPING_ANON_VMA_LAZY; + WRITE_ONCE(folio->mapping, (struct address_space *)root_vma); + return; + } + VM_BUG_ON_VMA(!anon_vma, vma); anon_vma += FOLIO_MAPPING_ANON; @@ -2023,7 +2033,16 @@ void folio_move_anon_rmap(struct folio *folio, struct vm_area_struct *vma) static void __folio_set_anon(struct folio *folio, struct vm_area_struct *vma, unsigned long address, bool exclusive) { - struct anon_vma *anon_vma = vma_anon_vma(vma); + anon_vma_tree_t anon_tree = vma->anon_vma; + const struct vm_area_struct *root_vma = vma_anon_vma_lazy_root(vma); + struct anon_vma *anon_vma = anon_vma_tree_anon_vma(anon_tree); + + if (root_vma && (anon_vma_tree_is_vma(anon_tree) || exclusive)) { + root_vma = (void *)root_vma + FOLIO_MAPPING_ANON_VMA_LAZY; + WRITE_ONCE(folio->mapping, (struct address_space *)root_vma); + folio->index = linear_page_index(vma, address); + return; + } BUG_ON(!anon_vma); -- 2.17.1