Guarantee that rmap_walk() is called on locked folios so that threads changing folio->mapping and folio->index for non-KSM anon folios can serialize on fine-grained folio lock rather than anon_vma lock. Other folio types are already always locked before rmap_walk(). This is in preparation for removing anon_vma write-lock from UFFDIO_MOVE. CC: David Hildenbrand CC: Lorenzo Stoakes CC: Harry Yoo CC: Peter Xu CC: Suren Baghdasaryan CC: Barry Song CC: SeongJae Park Signed-off-by: Lokesh Gidra --- mm/damon/ops-common.c | 16 ++++------------ mm/memory-failure.c | 3 +++ mm/page_idle.c | 8 ++------ mm/rmap.c | 42 ++++++++++++------------------------------ 4 files changed, 21 insertions(+), 48 deletions(-) diff --git a/mm/damon/ops-common.c b/mm/damon/ops-common.c index 998c5180a603..f61d6dde13dc 100644 --- a/mm/damon/ops-common.c +++ b/mm/damon/ops-common.c @@ -162,21 +162,17 @@ void damon_folio_mkold(struct folio *folio) .rmap_one = damon_folio_mkold_one, .anon_lock = folio_lock_anon_vma_read, }; - bool need_lock; if (!folio_mapped(folio) || !folio_raw_mapping(folio)) { folio_set_idle(folio); return; } - need_lock = !folio_test_anon(folio) || folio_test_ksm(folio); - if (need_lock && !folio_trylock(folio)) + if (!folio_trylock(folio)) return; rmap_walk(folio, &rwc); - - if (need_lock) - folio_unlock(folio); + folio_unlock(folio); } @@ -228,7 +224,6 @@ bool damon_folio_young(struct folio *folio) .rmap_one = damon_folio_young_one, .anon_lock = folio_lock_anon_vma_read, }; - bool need_lock; if (!folio_mapped(folio) || !folio_raw_mapping(folio)) { if (folio_test_idle(folio)) @@ -237,14 +232,11 @@ bool damon_folio_young(struct folio *folio) return true; } - need_lock = !folio_test_anon(folio) || folio_test_ksm(folio); - if (need_lock && !folio_trylock(folio)) + if (!folio_trylock(folio)) return false; rmap_walk(folio, &rwc); - - if (need_lock) - folio_unlock(folio); + folio_unlock(folio); return accessed; } diff --git a/mm/memory-failure.c b/mm/memory-failure.c index a24806bb8e82..f698df156bf8 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -2143,7 +2143,10 @@ static void kill_procs_now(struct page *p, unsigned long pfn, int flags, { LIST_HEAD(tokill); + folio_lock(folio); collect_procs(folio, p, &tokill, flags & MF_ACTION_REQUIRED); + folio_unlock(folio); + kill_procs(&tokill, true, pfn, flags); } diff --git a/mm/page_idle.c b/mm/page_idle.c index a82b340dc204..9bf573d22e87 100644 --- a/mm/page_idle.c +++ b/mm/page_idle.c @@ -101,19 +101,15 @@ static void page_idle_clear_pte_refs(struct folio *folio) .rmap_one = page_idle_clear_pte_refs_one, .anon_lock = folio_lock_anon_vma_read, }; - bool need_lock; if (!folio_mapped(folio) || !folio_raw_mapping(folio)) return; - need_lock = !folio_test_anon(folio) || folio_test_ksm(folio); - if (need_lock && !folio_trylock(folio)) + if (!folio_trylock(folio)) return; rmap_walk(folio, &rwc); - - if (need_lock) - folio_unlock(folio); + folio_unlock(folio); } static ssize_t page_idle_bitmap_read(struct file *file, struct kobject *kobj, diff --git a/mm/rmap.c b/mm/rmap.c index 34333ae3bd80..90584f5da379 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -489,17 +489,15 @@ void __init anon_vma_init(void) * if there is a mapcount, we can dereference the anon_vma after observing * those. * - * NOTE: the caller should normally hold folio lock when calling this. If - * not, the caller needs to double check the anon_vma didn't change after - * taking the anon_vma lock for either read or write (UFFDIO_MOVE can modify it - * concurrently without folio lock protection). See folio_lock_anon_vma_read() - * which has already covered that, and comment above remap_pages(). + * NOTE: the caller should hold folio lock when calling this. */ struct anon_vma *folio_get_anon_vma(const struct folio *folio) { struct anon_vma *anon_vma = NULL; unsigned long anon_mapping; + VM_WARN_ON_FOLIO(!folio_test_locked(folio), folio); + rcu_read_lock(); anon_mapping = (unsigned long)READ_ONCE(folio->mapping); if ((anon_mapping & FOLIO_MAPPING_FLAGS) != FOLIO_MAPPING_ANON) @@ -546,7 +544,8 @@ struct anon_vma *folio_lock_anon_vma_read(const struct folio *folio, struct anon_vma *root_anon_vma; unsigned long anon_mapping; -retry: + VM_WARN_ON_FOLIO(!folio_test_locked(folio), folio); + rcu_read_lock(); anon_mapping = (unsigned long)READ_ONCE(folio->mapping); if ((anon_mapping & FOLIO_MAPPING_FLAGS) != FOLIO_MAPPING_ANON) @@ -557,17 +556,6 @@ struct anon_vma *folio_lock_anon_vma_read(const struct folio *folio, anon_vma = (struct anon_vma *) (anon_mapping - FOLIO_MAPPING_ANON); root_anon_vma = READ_ONCE(anon_vma->root); if (down_read_trylock(&root_anon_vma->rwsem)) { - /* - * folio_move_anon_rmap() might have changed the anon_vma as we - * might not hold the folio lock here. - */ - if (unlikely((unsigned long)READ_ONCE(folio->mapping) != - anon_mapping)) { - up_read(&root_anon_vma->rwsem); - rcu_read_unlock(); - goto retry; - } - /* * If the folio is still mapped, then this anon_vma is still * its anon_vma, and holding the mutex ensures that it will @@ -602,18 +590,6 @@ struct anon_vma *folio_lock_anon_vma_read(const struct folio *folio, rcu_read_unlock(); anon_vma_lock_read(anon_vma); - /* - * folio_move_anon_rmap() might have changed the anon_vma as we might - * not hold the folio lock here. - */ - if (unlikely((unsigned long)READ_ONCE(folio->mapping) != - anon_mapping)) { - anon_vma_unlock_read(anon_vma); - put_anon_vma(anon_vma); - anon_vma = NULL; - goto retry; - } - if (atomic_dec_and_test(&anon_vma->refcount)) { /* * Oops, we held the last refcount, release the lock @@ -1005,7 +981,7 @@ int folio_referenced(struct folio *folio, int is_locked, if (!folio_raw_mapping(folio)) return 0; - if (!is_locked && (!folio_test_anon(folio) || folio_test_ksm(folio))) { + if (!is_locked) { we_locked = folio_trylock(folio); if (!we_locked) return 1; @@ -2815,6 +2791,12 @@ static void rmap_walk_anon(struct folio *folio, pgoff_t pgoff_start, pgoff_end; struct anon_vma_chain *avc; + /* + * The folio lock ensures that folio->mapping can't be changed under us + * to an anon_vma with different root. + */ + VM_WARN_ON_FOLIO(!folio_test_locked(folio), folio); + if (locked) { anon_vma = folio_anon_vma(folio); /* anon_vma disappear under us? */ -- 2.51.0.384.g4c02a37b29-goog