Unmapped was added as a parameter to __folio_split() and related call sites to support splitting of folios already in the midst of a migration. This special case arose for device private folio migration since during migration there could be a disconnect between source and destination on the folio size. Introduce split_unmapped_folio_to_order() to handle this special case. This in turn removes the special casing introduced by the unmapped parameter in __folio_split(). Cc: Andrew Morton Cc: David Hildenbrand Cc: Zi Yan Cc: Joshua Hahn Cc: Rakie Kim Cc: Byungchul Park Cc: Gregory Price Cc: Ying Huang Cc: Alistair Popple Cc: Oscar Salvador Cc: Lorenzo Stoakes Cc: Baolin Wang Cc: "Liam R. Howlett" Cc: Nico Pache Cc: Ryan Roberts Cc: Dev Jain Cc: Barry Song Cc: Lyude Paul Cc: Danilo Krummrich Cc: David Airlie Cc: Simona Vetter Cc: Ralph Campbell Cc: Mika Penttilä Cc: Matthew Brost Cc: Francois Dugast Suggested-by: Zi Yan Signed-off-by: Balbir Singh --- include/linux/huge_mm.h | 5 +- mm/huge_memory.c | 135 ++++++++++++++++++++++++++++++++++------ mm/migrate_device.c | 3 +- 3 files changed, 120 insertions(+), 23 deletions(-) diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index e2e91aa1a042..9155e683c08a 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -371,7 +371,8 @@ enum split_type { bool can_split_folio(struct folio *folio, int caller_pins, int *pextra_pins); int __split_huge_page_to_list_to_order(struct page *page, struct list_head *list, - unsigned int new_order, bool unmapped); + unsigned int new_order); +int split_unmapped_folio_to_order(struct folio *folio, unsigned int new_order); int min_order_for_split(struct folio *folio); int split_folio_to_list(struct folio *folio, struct list_head *list); bool folio_split_supported(struct folio *folio, unsigned int new_order, @@ -382,7 +383,7 @@ int folio_split(struct folio *folio, unsigned int new_order, struct page *page, static inline int split_huge_page_to_list_to_order(struct page *page, struct list_head *list, unsigned int new_order) { - return __split_huge_page_to_list_to_order(page, list, new_order, false); + return __split_huge_page_to_list_to_order(page, list, new_order); } static inline int split_huge_page_to_order(struct page *page, unsigned int new_order) { diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 0184cd915f44..942bd8410c54 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -3747,7 +3747,6 @@ bool folio_split_supported(struct folio *folio, unsigned int new_order, * @lock_at: a page within @folio to be left locked to caller * @list: after-split folios will be put on it if non NULL * @split_type: perform uniform split or not (non-uniform split) - * @unmapped: The pages are already unmapped, they are migration entries. * * It calls __split_unmapped_folio() to perform uniform and non-uniform split. * It is in charge of checking whether the split is supported or not and @@ -3763,7 +3762,7 @@ bool folio_split_supported(struct folio *folio, unsigned int new_order, */ static int __folio_split(struct folio *folio, unsigned int new_order, struct page *split_at, struct page *lock_at, - struct list_head *list, enum split_type split_type, bool unmapped) + struct list_head *list, enum split_type split_type) { struct deferred_split *ds_queue; XA_STATE(xas, &folio->mapping->i_pages, folio->index); @@ -3809,14 +3808,12 @@ static int __folio_split(struct folio *folio, unsigned int new_order, * is taken to serialise against parallel split or collapse * operations. */ - if (!unmapped) { - anon_vma = folio_get_anon_vma(folio); - if (!anon_vma) { - ret = -EBUSY; - goto out; - } - anon_vma_lock_write(anon_vma); + anon_vma = folio_get_anon_vma(folio); + if (!anon_vma) { + ret = -EBUSY; + goto out; } + anon_vma_lock_write(anon_vma); mapping = NULL; } else { unsigned int min_order; @@ -3882,8 +3879,7 @@ static int __folio_split(struct folio *folio, unsigned int new_order, goto out_unlock; } - if (!unmapped) - unmap_folio(folio); + unmap_folio(folio); /* block interrupt reentry in xa_lock and spinlock */ local_irq_disable(); @@ -3976,8 +3972,7 @@ static int __folio_split(struct folio *folio, unsigned int new_order, expected_refs = folio_expected_ref_count(new_folio) + 1; folio_ref_unfreeze(new_folio, expected_refs); - if (!unmapped) - lru_add_split_folio(folio, new_folio, lruvec, list); + lru_add_split_folio(folio, new_folio, lruvec, list); /* * Anonymous folio with swap cache. @@ -4033,9 +4028,6 @@ static int __folio_split(struct folio *folio, unsigned int new_order, local_irq_enable(); - if (unmapped) - return ret; - if (nr_shmem_dropped) shmem_uncharge(mapping->host, nr_shmem_dropped); @@ -4079,6 +4071,111 @@ static int __folio_split(struct folio *folio, unsigned int new_order, return ret; } +/* + * This function is a helper for splitting folios that have already been unmapped. + * The use case is that the device or the CPU can refuse to migrate THP pages in + * the middle of migration, due to allocation issues on either side + * + * The high level code is copied from __folio_split, since the pages are anonymous + * and are already isolated from the LRU, the code has been simplified to not + * burden __folio_split with unmapped sprinkled into the code. + * + * None of the split folios are unlocked + */ +int split_unmapped_folio_to_order(struct folio *folio, unsigned int new_order) +{ + int extra_pins; + int ret = 0; + struct folio *new_folio, *next; + struct folio *end_folio = folio_next(folio); + struct deferred_split *ds_queue; + int old_order = folio_order(folio); + + VM_WARN_ON_FOLIO(folio_mapped(folio), folio); + VM_WARN_ON_ONCE_FOLIO(!folio_test_locked(folio), folio); + VM_WARN_ON_ONCE_FOLIO(!folio_test_large(folio), folio); + + if (!can_split_folio(folio, 1, &extra_pins)) { + ret = -EAGAIN; + goto err; + } + + local_irq_disable(); + /* Prevent deferred_split_scan() touching ->_refcount */ + ds_queue = folio_split_queue_lock(folio); + if (folio_ref_freeze(folio, 1 + extra_pins)) { + int expected_refs; + struct swap_cluster_info *ci = NULL; + + if (old_order > 1) { + if (!list_empty(&folio->_deferred_list)) { + ds_queue->split_queue_len--; + /* + * Reinitialize page_deferred_list after + * removing the page from the split_queue, + * otherwise a subsequent split will see list + * corruption when checking the + * page_deferred_list. + */ + list_del_init(&folio->_deferred_list); + } + if (folio_test_partially_mapped(folio)) { + folio_clear_partially_mapped(folio); + mod_mthp_stat(old_order, + MTHP_STAT_NR_ANON_PARTIALLY_MAPPED, -1); + } + /* + * Reinitialize page_deferred_list after removing the + * page from the split_queue, otherwise a subsequent + * split will see list corruption when checking the + * page_deferred_list. + */ + list_del_init(&folio->_deferred_list); + } + split_queue_unlock(ds_queue); + + if (folio_test_swapcache(folio)) + ci = swap_cluster_get_and_lock(folio); + + ret = __split_unmapped_folio(folio, new_order, &folio->page, + NULL, NULL, SPLIT_TYPE_UNIFORM); + + /* + * Unfreeze after-split folios + */ + for (new_folio = folio_next(folio); new_folio != end_folio; + new_folio = next) { + next = folio_next(new_folio); + + zone_device_private_split_cb(folio, new_folio); + + expected_refs = folio_expected_ref_count(new_folio) + 1; + folio_ref_unfreeze(new_folio, expected_refs); + if (ci) + __swap_cache_replace_folio(ci, folio, new_folio); + } + + zone_device_private_split_cb(folio, NULL); + /* + * Unfreeze @folio only after all page cache entries, which + * used to point to it, have been updated with new folios. + * Otherwise, a parallel folio_try_get() can grab @folio + * and its caller can see stale page cache entries. + */ + expected_refs = folio_expected_ref_count(folio) + 1; + folio_ref_unfreeze(folio, expected_refs); + + if (ci) + swap_cluster_unlock(ci); + } else { + split_queue_unlock(ds_queue); + ret = -EAGAIN; + } + local_irq_enable(); +err: + return ret; +} + /* * This function splits a large folio into smaller folios of order @new_order. * @page can point to any page of the large folio to split. The split operation @@ -4127,12 +4224,12 @@ static int __folio_split(struct folio *folio, unsigned int new_order, * with the folio. Splitting to order 0 is compatible with all folios. */ int __split_huge_page_to_list_to_order(struct page *page, struct list_head *list, - unsigned int new_order, bool unmapped) + unsigned int new_order) { struct folio *folio = page_folio(page); return __folio_split(folio, new_order, &folio->page, page, list, - SPLIT_TYPE_UNIFORM, unmapped); + SPLIT_TYPE_UNIFORM); } /** @@ -4163,7 +4260,7 @@ int folio_split(struct folio *folio, unsigned int new_order, struct page *split_at, struct list_head *list) { return __folio_split(folio, new_order, split_at, &folio->page, list, - SPLIT_TYPE_NON_UNIFORM, false); + SPLIT_TYPE_NON_UNIFORM); } int min_order_for_split(struct folio *folio) diff --git a/mm/migrate_device.c b/mm/migrate_device.c index c50abbd32f21..1abe71b0e77e 100644 --- a/mm/migrate_device.c +++ b/mm/migrate_device.c @@ -918,8 +918,7 @@ static int migrate_vma_split_unmapped_folio(struct migrate_vma *migrate, folio_get(folio); split_huge_pmd_address(migrate->vma, addr, true); - ret = __split_huge_page_to_list_to_order(folio_page(folio, 0), NULL, - 0, true); + ret = split_unmapped_folio_to_order(folio, 0); if (ret) return ret; migrate->src[idx] &= ~MIGRATE_PFN_COMPOUND; -- 2.51.1