folio split clears PG_has_hwpoisoned, but the flag should be preserved in after-split folios containing pages with PG_hwpoisoned flag if the folio is split to >0 order folios. Scan all pages in a to-be-split folio to determine which after-split folios need the flag. An alternatives is to change PG_has_hwpoisoned to PG_maybe_hwpoisoned to avoid the scan and set it on all after-split folios, but resulting false positive has undesirable negative impact. To remove false positive, caller of folio_test_has_hwpoisoned() and folio_contain_hwpoisoned_page() needs to do the scan. That might be causing a hassle for current and future callers and more costly than doing the scan in the split code. More details are discussed in [1]. It is OK that current implementation does not do this, because memory failure code always tries to split to order-0 folios and if a folio cannot be split to order-0, memory failure code either gives warnings or the split is not performed. Link: https://lore.kernel.org/all/CAHbLzkoOZm0PXxE9qwtF4gKR=cpRXrSrJ9V9Pm2DJexs985q4g@mail.gmail.com/ [1] Signed-off-by: Zi Yan --- mm/huge_memory.c | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index fc65ec3393d2..f3896c1f130f 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -3455,6 +3455,17 @@ bool can_split_folio(struct folio *folio, int caller_pins, int *pextra_pins) caller_pins; } +static bool page_range_has_hwpoisoned(struct page *first_page, long nr_pages) +{ + long i; + + for (i = 0; i < nr_pages; i++) + if (PageHWPoison(first_page + i)) + return true; + + return false; +} + /* * It splits @folio into @new_order folios and copies the @folio metadata to * all the resulting folios. @@ -3462,22 +3473,32 @@ bool can_split_folio(struct folio *folio, int caller_pins, int *pextra_pins) static void __split_folio_to_order(struct folio *folio, int old_order, int new_order) { + /* Scan poisoned pages when split a poisoned folio to large folios */ + bool check_poisoned_pages = folio_test_has_hwpoisoned(folio) && + new_order != 0; long new_nr_pages = 1 << new_order; long nr_pages = 1 << old_order; long i; + folio_clear_has_hwpoisoned(folio); + + /* Check first new_nr_pages since the loop below skips them */ + if (check_poisoned_pages && + page_range_has_hwpoisoned(folio_page(folio, 0), new_nr_pages)) + folio_set_has_hwpoisoned(folio); /* * Skip the first new_nr_pages, since the new folio from them have all * the flags from the original folio. */ for (i = new_nr_pages; i < nr_pages; i += new_nr_pages) { struct page *new_head = &folio->page + i; - /* * Careful: new_folio is not a "real" folio before we cleared PageTail. * Don't pass it around before clear_compound_head(). */ struct folio *new_folio = (struct folio *)new_head; + bool poisoned_new_folio = check_poisoned_pages && + page_range_has_hwpoisoned(new_head, new_nr_pages); VM_BUG_ON_PAGE(atomic_read(&new_folio->_mapcount) != -1, new_head); @@ -3514,6 +3535,9 @@ static void __split_folio_to_order(struct folio *folio, int old_order, (1L << PG_dirty) | LRU_GEN_MASK | LRU_REFS_MASK)); + if (poisoned_new_folio) + folio_set_has_hwpoisoned(new_folio); + new_folio->mapping = folio->mapping; new_folio->index = folio->index + i; @@ -3600,8 +3624,6 @@ static int __split_unmapped_folio(struct folio *folio, int new_order, int start_order = uniform_split ? new_order : old_order - 1; int split_order; - folio_clear_has_hwpoisoned(folio); - /* * split to new_order one order at a time. For uniform split, * folio is split to new_order directly. -- 2.51.0 When caller does not supply a list to split_huge_page_to_list_to_order(), use split_huge_page_to_order() instead. Signed-off-by: Zi Yan --- include/linux/huge_mm.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 7698b3542c4f..34f8d8453bf3 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -381,6 +381,10 @@ static inline int split_huge_page_to_list_to_order(struct page *page, struct lis { return __split_huge_page_to_list_to_order(page, list, new_order, false); } +static inline int split_huge_page_to_order(struct page *page, unsigned int new_order) +{ + return split_huge_page_to_list_to_order(page, NULL, new_order); +} /* * try_folio_split_to_order - try to split a @folio at @page to @new_order using @@ -400,8 +404,7 @@ static inline int try_folio_split_to_order(struct folio *folio, struct page *page, unsigned int new_order) { if (!non_uniform_split_supported(folio, new_order, /* warns= */ false)) - return split_huge_page_to_list_to_order(&folio->page, NULL, - new_order); + return split_huge_page_to_order(&folio->page, new_order); return folio_split(folio, new_order, page, NULL); } static inline int split_huge_page(struct page *page) @@ -590,6 +593,11 @@ split_huge_page_to_list_to_order(struct page *page, struct list_head *list, VM_WARN_ON_ONCE_PAGE(1, page); return -EINVAL; } +static inline int split_huge_page_to_order(struct page *page, unsigned int new_order) +{ + VM_WARN_ON_ONCE_PAGE(1, page); + return -EINVAL; +} static inline int split_huge_page(struct page *page) { VM_WARN_ON_ONCE_PAGE(1, page); -- 2.51.0 Large block size (LBS) folios cannot be split to order-0 folios but min_order_for_folio(). Current split fails directly, but that is not optimal. Split the folio to min_order_for_folio(), so that, after split, only the folio containing the poisoned page becomes unusable instead. For soft offline, do not split the large folio if its min_order_for_folio() is not 0. Since the folio is still accessible from userspace and premature split might lead to potential performance loss. Suggested-by: Jane Chu Signed-off-by: Zi Yan Reviewed-by: Luis Chamberlain --- mm/memory-failure.c | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index f698df156bf8..40687b7aa8be 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1656,12 +1656,13 @@ static int identify_page_state(unsigned long pfn, struct page *p, * there is still more to do, hence the page refcount we took earlier * is still needed. */ -static int try_to_split_thp_page(struct page *page, bool release) +static int try_to_split_thp_page(struct page *page, unsigned int new_order, + bool release) { int ret; lock_page(page); - ret = split_huge_page(page); + ret = split_huge_page_to_order(page, new_order); unlock_page(page); if (ret && release) @@ -2280,6 +2281,9 @@ int memory_failure(unsigned long pfn, int flags) folio_unlock(folio); if (folio_test_large(folio)) { + int new_order = min_order_for_split(folio); + int err; + /* * The flag must be set after the refcount is bumped * otherwise it may race with THP split. @@ -2294,7 +2298,15 @@ int memory_failure(unsigned long pfn, int flags) * page is a valid handlable page. */ folio_set_has_hwpoisoned(folio); - if (try_to_split_thp_page(p, false) < 0) { + err = try_to_split_thp_page(p, new_order, /* release= */ false); + /* + * If the folio cannot be split to order-0, kill the process, + * but split the folio anyway to minimize the amount of unusable + * pages. + */ + if (err || new_order) { + /* get folio again in case the original one is split */ + folio = page_folio(p); res = -EHWPOISON; kill_procs_now(p, pfn, flags, folio); put_page(p); @@ -2621,7 +2633,17 @@ static int soft_offline_in_use_page(struct page *page) }; if (!huge && folio_test_large(folio)) { - if (try_to_split_thp_page(page, true)) { + int new_order = min_order_for_split(folio); + + /* + * If new_order (target split order) is not 0, do not split the + * folio at all to retain the still accessible large folio. + * NOTE: if minimizing the number of soft offline pages is + * preferred, split it to non-zero new_order like it is done in + * memory_failure(). + */ + if (new_order || try_to_split_thp_page(page, /* new_order= */ 0, + /* release= */ true)) { pr_info("%#lx: thp split failed\n", pfn); return -EBUSY; } -- 2.51.0 try_folio_split_to_order(), folio_split, __folio_split(), and __split_unmapped_folio() do not have correct kernel-doc comment format. Fix them. Signed-off-by: Zi Yan Reviewed-by: Lorenzo Stoakes --- include/linux/huge_mm.h | 10 ++++++---- mm/huge_memory.c | 27 +++++++++++++++------------ 2 files changed, 21 insertions(+), 16 deletions(-) diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 34f8d8453bf3..cbb2243f8e56 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -386,9 +386,9 @@ static inline int split_huge_page_to_order(struct page *page, unsigned int new_o return split_huge_page_to_list_to_order(page, NULL, new_order); } -/* - * try_folio_split_to_order - try to split a @folio at @page to @new_order using - * non uniform split. +/** + * try_folio_split_to_order() - try to split a @folio at @page to @new_order + * using non uniform split. * @folio: folio to be split * @page: split to @new_order at the given page * @new_order: the target split order @@ -398,7 +398,7 @@ static inline int split_huge_page_to_order(struct page *page, unsigned int new_o * folios are put back to LRU list. Use min_order_for_split() to get the lower * bound of @new_order. * - * Return: 0: split is successful, otherwise split failed. + * Return: 0 - split is successful, otherwise split failed. */ static inline int try_folio_split_to_order(struct folio *folio, struct page *page, unsigned int new_order) @@ -486,6 +486,8 @@ static inline spinlock_t *pud_trans_huge_lock(pud_t *pud, /** * folio_test_pmd_mappable - Can we map this folio with a PMD? * @folio: The folio to test + * + * Return: true - @folio can be mapped, false - @folio cannot be mapped. */ static inline bool folio_test_pmd_mappable(struct folio *folio) { diff --git a/mm/huge_memory.c b/mm/huge_memory.c index f3896c1f130f..38094d24fb14 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -3576,8 +3576,9 @@ static void __split_folio_to_order(struct folio *folio, int old_order, ClearPageCompound(&folio->page); } -/* - * It splits an unmapped @folio to lower order smaller folios in two ways. +/** + * __split_unmapped_folio() - splits an unmapped @folio to lower order folios in + * two ways: uniform split or non-uniform split. * @folio: the to-be-split folio * @new_order: the smallest order of the after split folios (since buddy * allocator like split generates folios with orders from @folio's @@ -3612,8 +3613,8 @@ static void __split_folio_to_order(struct folio *folio, int old_order, * folio containing @page. The caller needs to unlock and/or free after-split * folios if necessary. * - * For !uniform_split, when -ENOMEM is returned, the original folio might be - * split. The caller needs to check the input folio. + * Return: 0 - successful, <0 - failed (if -ENOMEM is returned, @folio might be + * split but not to @new_order, the caller needs to check) */ static int __split_unmapped_folio(struct folio *folio, int new_order, struct page *split_at, struct xa_state *xas, @@ -3732,8 +3733,8 @@ bool uniform_split_supported(struct folio *folio, unsigned int new_order, return true; } -/* - * __folio_split: split a folio at @split_at to a @new_order folio +/** + * __folio_split() - split a folio at @split_at to a @new_order folio * @folio: folio to split * @new_order: the order of the new folio * @split_at: a page within the new folio @@ -3751,7 +3752,7 @@ bool uniform_split_supported(struct folio *folio, unsigned int new_order, * 1. for uniform split, @lock_at points to one of @folio's subpages; * 2. for buddy allocator like (non-uniform) split, @lock_at points to @folio. * - * return: 0: successful, <0 failed (if -ENOMEM is returned, @folio might be + * Return: 0 - successful, <0 - failed (if -ENOMEM is returned, @folio might be * split but not to @new_order, the caller needs to check) */ static int __folio_split(struct folio *folio, unsigned int new_order, @@ -4140,14 +4141,13 @@ int __split_huge_page_to_list_to_order(struct page *page, struct list_head *list unmapped); } -/* - * folio_split: split a folio at @split_at to a @new_order folio +/** + * folio_split() - split a folio at @split_at to a @new_order folio * @folio: folio to split * @new_order: the order of the new folio * @split_at: a page within the new folio - * - * return: 0: successful, <0 failed (if -ENOMEM is returned, @folio might be - * split but not to @new_order, the caller needs to check) + * @list: after-split folios are added to @list if not null, otherwise to LRU + * list * * It has the same prerequisites and returns as * split_huge_page_to_list_to_order(). @@ -4161,6 +4161,9 @@ int __split_huge_page_to_list_to_order(struct page *page, struct list_head *list * [order-4, {order-3}, order-3, order-5, order-6, order-7, order-8]. * * After split, folio is left locked for caller. + * + * Return: 0 - successful, <0 - failed (if -ENOMEM is returned, @folio might be + * split but not to @new_order, the caller needs to check) */ int folio_split(struct folio *folio, unsigned int new_order, struct page *split_at, struct list_head *list) -- 2.51.0