folio_split_supported() used in try_folio_split_to_order() requires folio->mapping to be non NULL, but current try_folio_split_to_order() does not check it. There is no issue in the current code, since try_folio_split_to_order() is only used in truncate_inode_partial_folio(), where folio->mapping is not NULL. To prevent future misuse, move folio->mapping NULL check (i.e., folio is truncated) into folio_split_supported(). Since folio->mapping NULL check returns -EBUSY and folio_split_supported() == false means -EINVAL, change folio_split_supported() return type from bool to int and return error numbers accordingly. Rename folio_split_supported() to folio_check_splittable() to match the return type change. While at it, move is_huge_zero_folio() check and folio_test_writeback() check into folio_check_splittable() and add kernel-doc. Signed-off-by: Zi Yan --- include/linux/huge_mm.h | 10 ++++-- mm/huge_memory.c | 74 +++++++++++++++++++++++++---------------- 2 files changed, 53 insertions(+), 31 deletions(-) diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 1d439de1ca2c..97686fb46e30 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -375,8 +375,8 @@ int __split_huge_page_to_list_to_order(struct page *page, struct list_head *list int folio_split_unmapped(struct folio *folio, unsigned int new_order); int min_order_for_split(struct folio *folio); int split_folio_to_list(struct folio *folio, struct list_head *list); -bool folio_split_supported(struct folio *folio, unsigned int new_order, - enum split_type split_type, bool warns); +int folio_check_splittable(struct folio *folio, unsigned int new_order, + enum split_type split_type, bool warns); int folio_split(struct folio *folio, unsigned int new_order, struct page *page, struct list_head *list); @@ -407,7 +407,11 @@ static inline int split_huge_page_to_order(struct page *page, unsigned int new_o static inline int try_folio_split_to_order(struct folio *folio, struct page *page, unsigned int new_order) { - if (!folio_split_supported(folio, new_order, SPLIT_TYPE_NON_UNIFORM, /* warns= */ false)) + int ret; + + ret = folio_check_splittable(folio, new_order, SPLIT_TYPE_NON_UNIFORM, + /* warns= */ false); + if (ret) return split_huge_page_to_order(&folio->page, new_order); return folio_split(folio, new_order, page, NULL); } diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 041b554c7115..c1f1055165dd 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -3688,15 +3688,43 @@ static int __split_unmapped_folio(struct folio *folio, int new_order, return 0; } -bool folio_split_supported(struct folio *folio, unsigned int new_order, - enum split_type split_type, bool warns) +/** + * folio_check_splittable() - check if a folio can be split to a given order + * @folio: folio to be split + * @new_order: the smallest order of the after split folios (since buddy + * allocator like split generates folios with orders from @folio's + * order - 1 to new_order). + * @split_type: uniform or non-uniform split + * @warns: whether gives warnings or not for the checks in the function + * + * folio_check_splittable() checks if @folio can be split to @new_order using + * @split_type method. The truncated folio check must come first. + * + * Context: folio must be locked. + * + * Return: 0 - @folio can be split to @new_order, otherwise an error number is + * returned. + */ +int folio_check_splittable(struct folio *folio, unsigned int new_order, + enum split_type split_type, bool warns) { + VM_WARN_ON_FOLIO(!folio_test_locked(folio), folio); + /* + * Folios that just got truncated cannot get split. Signal to the + * caller that there was a race. + * + * TODO: this will also currently refuse shmem folios that are in the + * swapcache. + */ + if (!folio_test_anon(folio) && !folio->mapping) + return -EBUSY; + if (folio_test_anon(folio)) { /* order-1 is not supported for anonymous THP. */ VM_WARN_ONCE(warns && new_order == 1, "Cannot split to order-1 folio"); if (new_order == 1) - return false; + return -EINVAL; } else if (split_type == SPLIT_TYPE_NON_UNIFORM || new_order) { if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && !mapping_large_folio_support(folio->mapping)) { @@ -3719,7 +3747,7 @@ bool folio_split_supported(struct folio *folio, unsigned int new_order, */ VM_WARN_ONCE(warns, "Cannot split file folio to non-0 order"); - return false; + return -EINVAL; } } @@ -3734,10 +3762,18 @@ bool folio_split_supported(struct folio *folio, unsigned int new_order, if ((split_type == SPLIT_TYPE_NON_UNIFORM || new_order) && folio_test_swapcache(folio)) { VM_WARN_ONCE(warns, "Cannot split swapcache folio to non-0 order"); - return false; + return -EINVAL; } - return true; + if (is_huge_zero_folio(folio)) { + pr_warn_ratelimited("Called split_huge_page for huge zero page\n"); + return -EINVAL; + } + + if (folio_test_writeback(folio)) + return -EBUSY; + + return 0; } static int __folio_freeze_and_split_unmapped(struct folio *folio, unsigned int new_order, @@ -3922,7 +3958,6 @@ static int __folio_split(struct folio *folio, unsigned int new_order, int remap_flags = 0; int extra_pins, ret; pgoff_t end = 0; - bool is_hzp; VM_WARN_ON_ONCE_FOLIO(!folio_test_locked(folio), folio); VM_WARN_ON_ONCE_FOLIO(!folio_test_large(folio), folio); @@ -3930,30 +3965,13 @@ static int __folio_split(struct folio *folio, unsigned int new_order, if (folio != page_folio(split_at) || folio != page_folio(lock_at)) return -EINVAL; - /* - * Folios that just got truncated cannot get split. Signal to the - * caller that there was a race. - * - * TODO: this will also currently refuse shmem folios that are in the - * swapcache. - */ - if (!is_anon && !folio->mapping) - return -EBUSY; - if (new_order >= old_order) return -EINVAL; - if (!folio_split_supported(folio, new_order, split_type, /* warn = */ true)) - return -EINVAL; - - is_hzp = is_huge_zero_folio(folio); - if (is_hzp) { - pr_warn_ratelimited("Called split_huge_page for huge zero page\n"); - return -EBUSY; - } - - if (folio_test_writeback(folio)) - return -EBUSY; + ret = folio_check_splittable(folio, new_order, split_type, + /* warn = */ true); + if (ret) + return ret; if (is_anon) { /* -- 2.51.0 can_split_folio() is just a refcount comparison, making sure only the split caller holds an extra pin. Open code it with folio_expected_ref_count() != folio_ref_count() - 1. For the extra_pins used by folio_ref_freeze(), add folio_cache_references() to calculate it. Suggested-by: David Hildenbrand (Red Hat) Signed-off-by: Zi Yan --- include/linux/huge_mm.h | 1 - mm/huge_memory.c | 43 ++++++++++++++++------------------------- mm/vmscan.c | 3 ++- 3 files changed, 19 insertions(+), 28 deletions(-) diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 97686fb46e30..1ecaeccf39c9 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -369,7 +369,6 @@ enum split_type { SPLIT_TYPE_NON_UNIFORM, }; -bool can_split_folio(struct folio *folio, int caller_pins, int *pextra_pins); int __split_huge_page_to_list_to_order(struct page *page, struct list_head *list, unsigned int new_order); int folio_split_unmapped(struct folio *folio, unsigned int new_order); diff --git a/mm/huge_memory.c b/mm/huge_memory.c index c1f1055165dd..6c821c1c0ac3 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -3455,23 +3455,6 @@ static void lru_add_split_folio(struct folio *folio, struct folio *new_folio, } } -/* Racy check whether the huge page can be split */ -bool can_split_folio(struct folio *folio, int caller_pins, int *pextra_pins) -{ - int extra_pins; - - /* Additional pins from page cache */ - if (folio_test_anon(folio)) - extra_pins = folio_test_swapcache(folio) ? - folio_nr_pages(folio) : 0; - else - extra_pins = folio_nr_pages(folio); - if (pextra_pins) - *pextra_pins = extra_pins; - return folio_mapcount(folio) == folio_ref_count(folio) - extra_pins - - caller_pins; -} - static bool page_range_has_hwpoisoned(struct page *page, long nr_pages) { for (; nr_pages; page++, nr_pages--) @@ -3776,17 +3759,26 @@ int folio_check_splittable(struct folio *folio, unsigned int new_order, return 0; } +/* Number of folio references from the pagecache or the swapcache. */ +static unsigned int folio_cache_references(const struct folio *folio) +{ + if (folio_test_anon(folio) && !folio_test_swapcache(folio)) + return 0; + return folio_nr_pages(folio); +} + static int __folio_freeze_and_split_unmapped(struct folio *folio, unsigned int new_order, struct page *split_at, struct xa_state *xas, struct address_space *mapping, bool do_lru, struct list_head *list, enum split_type split_type, - pgoff_t end, int *nr_shmem_dropped, int extra_pins) + pgoff_t end, int *nr_shmem_dropped) { struct folio *end_folio = folio_next(folio); struct folio *new_folio, *next; int old_order = folio_order(folio); int ret = 0; struct deferred_split *ds_queue; + int extra_pins = folio_cache_references(folio); VM_WARN_ON_ONCE(!mapping && end); /* Prevent deferred_split_scan() touching ->_refcount */ @@ -3956,7 +3948,7 @@ static int __folio_split(struct folio *folio, unsigned int new_order, struct folio *new_folio, *next; int nr_shmem_dropped = 0; int remap_flags = 0; - int extra_pins, ret; + int ret; pgoff_t end = 0; VM_WARN_ON_ONCE_FOLIO(!folio_test_locked(folio), folio); @@ -4036,7 +4028,7 @@ static int __folio_split(struct folio *folio, unsigned int new_order, * Racy check if we can split the page, before unmap_folio() will * split PMDs */ - if (!can_split_folio(folio, 1, &extra_pins)) { + if (folio_expected_ref_count(folio) != folio_ref_count(folio) - 1) { ret = -EAGAIN; goto out_unlock; } @@ -4059,8 +4051,7 @@ static int __folio_split(struct folio *folio, unsigned int new_order, } ret = __folio_freeze_and_split_unmapped(folio, new_order, split_at, &xas, mapping, - true, list, split_type, end, &nr_shmem_dropped, - extra_pins); + true, list, split_type, end, &nr_shmem_dropped); fail: if (mapping) xas_unlock(&xas); @@ -4134,20 +4125,20 @@ static int __folio_split(struct folio *folio, unsigned int new_order, */ int folio_split_unmapped(struct folio *folio, unsigned int new_order) { - int extra_pins, ret = 0; + int ret = 0; VM_WARN_ON_ONCE_FOLIO(folio_mapped(folio), folio); VM_WARN_ON_ONCE_FOLIO(!folio_test_locked(folio), folio); VM_WARN_ON_ONCE_FOLIO(!folio_test_large(folio), folio); VM_WARN_ON_ONCE_FOLIO(!folio_test_anon(folio), folio); - if (!can_split_folio(folio, 1, &extra_pins)) + if (folio_expected_ref_count(folio) != folio_ref_count(folio) - 1) return -EAGAIN; local_irq_disable(); ret = __folio_freeze_and_split_unmapped(folio, new_order, &folio->page, NULL, NULL, false, NULL, SPLIT_TYPE_UNIFORM, - 0, NULL, extra_pins); + 0, NULL); local_irq_enable(); return ret; } @@ -4640,7 +4631,7 @@ static int split_huge_pages_pid(int pid, unsigned long vaddr_start, * can be split or not. So skip the check here. */ if (!folio_test_private(folio) && - !can_split_folio(folio, 0, NULL)) + folio_expected_ref_count(folio) != folio_ref_count(folio)) goto next; if (!folio_trylock(folio)) diff --git a/mm/vmscan.c b/mm/vmscan.c index 92980b072121..3b85652a42b9 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1284,7 +1284,8 @@ static unsigned int shrink_folio_list(struct list_head *folio_list, goto keep_locked; if (folio_test_large(folio)) { /* cannot split folio, skip it */ - if (!can_split_folio(folio, 1, NULL)) + if (folio_expected_ref_count(folio) != + folio_ref_count(folio) - 1) goto activate_locked; /* * Split partially mapped folios right away. -- 2.51.0 min_order_for_split() returns -EBUSY when the folio is truncated and cannot be split. In commit 77008e1b2ef7 ("mm/huge_memory: do not change split_huge_page*() target order silently"), memory_failure() does not handle it and pass -EBUSY to try_to_split_thp_page() directly. try_to_split_thp_page() returns -EINVAL since -EBUSY becomes 0xfffffff0 as new_order is unsigned int in __folio_split() and this large new_order is rejected as an invalid input. The code does not cause a bug. soft_offline_in_use_page() also uses min_order_for_split() but it always passes 0 as new_order for split. Fix it by making min_order_for_split() always return an order. When the given folio is truncated, namely folio->mapping == NULL, return 0 and let a subsequent split function handle the situation and return -EBUSY. Add kernel-doc to min_order_for_split() to clarify its use. Signed-off-by: Zi Yan --- include/linux/huge_mm.h | 6 +++--- mm/huge_memory.c | 25 +++++++++++++++++++------ 2 files changed, 22 insertions(+), 9 deletions(-) diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 1ecaeccf39c9..9b3a4e2b0668 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -372,7 +372,7 @@ enum split_type { int __split_huge_page_to_list_to_order(struct page *page, struct list_head *list, unsigned int new_order); int folio_split_unmapped(struct folio *folio, unsigned int new_order); -int min_order_for_split(struct folio *folio); +unsigned int min_order_for_split(struct folio *folio); int split_folio_to_list(struct folio *folio, struct list_head *list); int folio_check_splittable(struct folio *folio, unsigned int new_order, enum split_type split_type, bool warns); @@ -634,10 +634,10 @@ static inline int split_huge_page(struct page *page) return -EINVAL; } -static inline int min_order_for_split(struct folio *folio) +static inline unsigned int min_order_for_split(struct folio *folio) { VM_WARN_ON_ONCE_FOLIO(1, folio); - return -EINVAL; + return 0; } static inline int split_folio_to_list(struct folio *folio, struct list_head *list) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 6c821c1c0ac3..ebc3ba0907fd 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -4230,16 +4230,29 @@ int folio_split(struct folio *folio, unsigned int new_order, SPLIT_TYPE_NON_UNIFORM); } -int min_order_for_split(struct folio *folio) +/** + * min_order_for_split() - get the minimum order @folio can be split to + * @folio: folio to split + * + * min_order_for_split() tells the minimum order @folio can be split to. + * If a file-backed folio is truncated, 0 will be returned. Any subsequent + * split attempt should get -EBUSY from split checking code. + * + * Return: @folio's minimum order for split + */ +unsigned int min_order_for_split(struct folio *folio) { if (folio_test_anon(folio)) return 0; - if (!folio->mapping) { - if (folio_test_pmd_mappable(folio)) - count_vm_event(THP_SPLIT_PAGE_FAILED); - return -EBUSY; - } + /* + * If the folio got truncated, we don't know the previous mapping and + * consequently the old min order. But it doesn't matter, as any split + * attempt will immediately fail with -EBUSY as the folio cannot get + * split until freed. + */ + if (!folio->mapping) + return 0; return mapping_min_folio_order(folio->mapping); } -- 2.51.0 The "return " statements for error checks at the beginning of __folio_split() skip necessary count_vm_event() and count_mthp_stat() at the end of the function. Fix these by replacing them with "ret = ; goto out;". Signed-off-by: Zi Yan --- mm/huge_memory.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index ebc3ba0907fd..a42c4f29ce4f 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -3954,16 +3954,20 @@ static int __folio_split(struct folio *folio, unsigned int new_order, VM_WARN_ON_ONCE_FOLIO(!folio_test_locked(folio), folio); VM_WARN_ON_ONCE_FOLIO(!folio_test_large(folio), folio); - if (folio != page_folio(split_at) || folio != page_folio(lock_at)) - return -EINVAL; + if (folio != page_folio(split_at) || folio != page_folio(lock_at)) { + ret = -EINVAL; + goto out; + } - if (new_order >= old_order) - return -EINVAL; + if (new_order >= old_order) { + ret = -EINVAL; + goto out; + } ret = folio_check_splittable(folio, new_order, split_type, /* warn = */ true); if (ret) - return ret; + goto out; if (is_anon) { /* -- 2.51.0