No one will be able to use it, so the related code can be removed in the coming commits. Signed-off-by: Zi Yan --- mm/Kconfig | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/mm/Kconfig b/mm/Kconfig index bd283958d675..408fc7b82233 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -937,17 +937,6 @@ config THP_SWAP For selection by architectures with reasonable THP sizes. -config READ_ONLY_THP_FOR_FS - bool "Read-only THP for filesystems (EXPERIMENTAL)" - depends on TRANSPARENT_HUGEPAGE - - help - Allow khugepaged to put read-only file-backed pages in THP. - - This is marked experimental because it is a new feature. Write - support of file THPs will be developed in the next few release - cycles. - config NO_PAGE_MAPCOUNT bool "No per-page mapcount (EXPERIMENTAL)" help -- 2.43.0 collapse_file() requires FSes supporting large folio with at least PMD_ORDER, so replace the READ_ONLY_THP_FOR_FS check with that. shmem with huge option turned on also sets large folio order on mapping, so the check also applies to shmem. While at it, replace VM_BUG_ON with returning failure values. Signed-off-by: Zi Yan --- mm/khugepaged.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index d06d84219e1b..45b12ffb1550 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1899,8 +1899,11 @@ static enum scan_result collapse_file(struct mm_struct *mm, unsigned long addr, int nr_none = 0; bool is_shmem = shmem_file(file); - VM_BUG_ON(!IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && !is_shmem); - VM_BUG_ON(start & (HPAGE_PMD_NR - 1)); + /* "huge" shmem sets mapping folio order and passes the check below */ + if (mapping_max_folio_order(mapping) < PMD_ORDER) + return SCAN_FAIL; + if (start & (HPAGE_PMD_NR - 1)) + return SCAN_ADDRESS_RANGE; result = alloc_charge_folio(&new_folio, mm, cc); if (result != SCAN_SUCCEED) -- 2.43.0 They are used by READ_ONLY_THP_FOR_FS to handle writes to FSes without large folio support, so that read-only THPs created in these FSes are not seen by the FSes when the underlying fd becomes writable. Now read-only PMD THPs only appear in a FS with large folio support and the supported orders include PMD_ORDRE. Signed-off-by: Zi Yan --- fs/open.c | 27 --------------------------- include/linux/pagemap.h | 29 ----------------------------- mm/filemap.c | 1 - mm/huge_memory.c | 1 - mm/khugepaged.c | 29 ++--------------------------- 5 files changed, 2 insertions(+), 85 deletions(-) diff --git a/fs/open.c b/fs/open.c index 91f1139591ab..cef382d9d8b8 100644 --- a/fs/open.c +++ b/fs/open.c @@ -970,33 +970,6 @@ static int do_dentry_open(struct file *f, if ((f->f_flags & O_DIRECT) && !(f->f_mode & FMODE_CAN_ODIRECT)) return -EINVAL; - /* - * XXX: Huge page cache doesn't support writing yet. Drop all page - * cache for this file before processing writes. - */ - if (f->f_mode & FMODE_WRITE) { - /* - * Depends on full fence from get_write_access() to synchronize - * against collapse_file() regarding i_writecount and nr_thps - * updates. Ensures subsequent insertion of THPs into the page - * cache will fail. - */ - if (filemap_nr_thps(inode->i_mapping)) { - struct address_space *mapping = inode->i_mapping; - - filemap_invalidate_lock(inode->i_mapping); - /* - * unmap_mapping_range just need to be called once - * here, because the private pages is not need to be - * unmapped mapping (e.g. data segment of dynamic - * shared libraries here). - */ - unmap_mapping_range(mapping, 0, 0, 0); - truncate_inode_pages(mapping, 0); - filemap_invalidate_unlock(inode->i_mapping); - } - } - return 0; cleanup_all: diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index ec442af3f886..dad3f8846cdc 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -530,35 +530,6 @@ static inline size_t mapping_max_folio_size(const struct address_space *mapping) return PAGE_SIZE << mapping_max_folio_order(mapping); } -static inline int filemap_nr_thps(const struct address_space *mapping) -{ -#ifdef CONFIG_READ_ONLY_THP_FOR_FS - return atomic_read(&mapping->nr_thps); -#else - return 0; -#endif -} - -static inline void filemap_nr_thps_inc(struct address_space *mapping) -{ -#ifdef CONFIG_READ_ONLY_THP_FOR_FS - if (!mapping_large_folio_support(mapping)) - atomic_inc(&mapping->nr_thps); -#else - WARN_ON_ONCE(mapping_large_folio_support(mapping) == 0); -#endif -} - -static inline void filemap_nr_thps_dec(struct address_space *mapping) -{ -#ifdef CONFIG_READ_ONLY_THP_FOR_FS - if (!mapping_large_folio_support(mapping)) - atomic_dec(&mapping->nr_thps); -#else - WARN_ON_ONCE(mapping_large_folio_support(mapping) == 0); -#endif -} - struct address_space *folio_mapping(const struct folio *folio); /** diff --git a/mm/filemap.c b/mm/filemap.c index 2b933a1da9bd..4248e7cdecf3 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -189,7 +189,6 @@ static void filemap_unaccount_folio(struct address_space *mapping, lruvec_stat_mod_folio(folio, NR_SHMEM_THPS, -nr); } else if (folio_test_pmd_mappable(folio)) { lruvec_stat_mod_folio(folio, NR_FILE_THPS, -nr); - filemap_nr_thps_dec(mapping); } if (test_bit(AS_KERNEL_FILE, &folio->mapping->flags)) mod_node_page_state(folio_pgdat(folio), diff --git a/mm/huge_memory.c b/mm/huge_memory.c index b2a6060b3c20..c7873dbdc470 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -3833,7 +3833,6 @@ static int __folio_freeze_and_split_unmapped(struct folio *folio, unsigned int n } else { lruvec_stat_mod_folio(folio, NR_FILE_THPS, -nr); - filemap_nr_thps_dec(mapping); } } } diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 45b12ffb1550..8004ab8de6d2 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -2104,20 +2104,8 @@ static enum scan_result collapse_file(struct mm_struct *mm, unsigned long addr, goto xa_unlocked; } - if (!is_shmem) { - filemap_nr_thps_inc(mapping); - /* - * Paired with the fence in do_dentry_open() -> get_write_access() - * to ensure i_writecount is up to date and the update to nr_thps - * is visible. Ensures the page cache will be truncated if the - * file is opened writable. - */ - smp_mb(); - if (inode_is_open_for_write(mapping->host)) { - result = SCAN_FAIL; - filemap_nr_thps_dec(mapping); - } - } + if (!is_shmem && inode_is_open_for_write(mapping->host)) + result = SCAN_FAIL; xa_locked: xas_unlock_irq(&xas); @@ -2296,19 +2284,6 @@ static enum scan_result collapse_file(struct mm_struct *mm, unsigned long addr, folio_putback_lru(folio); folio_put(folio); } - /* - * Undo the updates of filemap_nr_thps_inc for non-SHMEM - * file only. This undo is not needed unless failure is - * due to SCAN_COPY_MC. - */ - if (!is_shmem && result == SCAN_COPY_MC) { - filemap_nr_thps_dec(mapping); - /* - * Paired with the fence in do_dentry_open() -> get_write_access() - * to ensure the update to nr_thps is visible. - */ - smp_mb(); - } new_folio->mapping = NULL; -- 2.43.0 filemap_nr_thps*() are removed, the related field, address_space->nr_thps, is no longer needed. Remove it. Signed-off-by: Zi Yan --- fs/inode.c | 3 --- include/linux/fs.h | 5 ----- 2 files changed, 8 deletions(-) diff --git a/fs/inode.c b/fs/inode.c index cc12b68e021b..16ab0a345419 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -280,9 +280,6 @@ int inode_init_always_gfp(struct super_block *sb, struct inode *inode, gfp_t gfp mapping->flags = 0; mapping->wb_err = 0; atomic_set(&mapping->i_mmap_writable, 0); -#ifdef CONFIG_READ_ONLY_THP_FOR_FS - atomic_set(&mapping->nr_thps, 0); -#endif mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE); mapping->i_private_data = NULL; mapping->writeback_index = 0; diff --git a/include/linux/fs.h b/include/linux/fs.h index 0bdccfa70b44..35875696fb4c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -455,7 +455,6 @@ extern const struct address_space_operations empty_aops; * memory mappings. * @gfp_mask: Memory allocation flags to use for allocating pages. * @i_mmap_writable: Number of VM_SHARED, VM_MAYWRITE mappings. - * @nr_thps: Number of THPs in the pagecache (non-shmem only). * @i_mmap: Tree of private and shared mappings. * @i_mmap_rwsem: Protects @i_mmap and @i_mmap_writable. * @nrpages: Number of page entries, protected by the i_pages lock. @@ -473,10 +472,6 @@ struct address_space { struct rw_semaphore invalidate_lock; gfp_t gfp_mask; atomic_t i_mmap_writable; -#ifdef CONFIG_READ_ONLY_THP_FOR_FS - /* number of thp, only for non-shmem files */ - atomic_t nr_thps; -#endif struct rb_root_cached i_mmap; unsigned long nrpages; pgoff_t writeback_index; -- 2.43.0 Replace it with a check on the max folio order of the file's address space mapping, making sure PMD_ORDER is supported. Signed-off-by: Zi Yan --- mm/huge_memory.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index c7873dbdc470..1da1467328a3 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -89,9 +89,6 @@ static inline bool file_thp_enabled(struct vm_area_struct *vma) { struct inode *inode; - if (!IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS)) - return false; - if (!vma->vm_file) return false; @@ -100,6 +97,9 @@ static inline bool file_thp_enabled(struct vm_area_struct *vma) if (IS_ANON_FILE(inode)) return false; + if (mapping_max_folio_order(inode->i_mapping) < PMD_ORDER) + return false; + return !inode_is_open_for_write(inode) && S_ISREG(inode->i_mode); } -- 2.43.0 Without READ_ONLY_THP_FOR_FS, large file-backed folios cannot be created by a FS without large folio support. The check is no longer needed. Signed-off-by: Zi Yan --- mm/huge_memory.c | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 1da1467328a3..30eddcbf86f1 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -3732,28 +3732,6 @@ int folio_check_splittable(struct folio *folio, unsigned int new_order, /* order-1 is not supported for anonymous THP. */ if (new_order == 1) return -EINVAL; - } else if (split_type == SPLIT_TYPE_NON_UNIFORM || new_order) { - if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && - !mapping_large_folio_support(folio->mapping)) { - /* - * We can always split a folio down to a single page - * (new_order == 0) uniformly. - * - * For any other scenario - * a) uniform split targeting a large folio - * (new_order > 0) - * b) any non-uniform split - * we must confirm that the file system supports large - * folios. - * - * Note that we might still have THPs in such - * mappings, which is created from khugepaged when - * CONFIG_READ_ONLY_THP_FOR_FS is enabled. But in that - * case, the mapping does not actually support large - * folios properly. - */ - return -EINVAL; - } } /* -- 2.43.0 After READ_ONLY_THP_FOR_FS is removed, FS either supports large folio or not. folio_split() can be used on a FS with large folio support without worrying about getting a THP on a FS without large folio support. Signed-off-by: Zi Yan --- include/linux/huge_mm.h | 25 ++----------------------- mm/truncate.c | 8 ++++---- 2 files changed, 6 insertions(+), 27 deletions(-) diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 1258fa37e85b..171de8138e98 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -389,27 +389,6 @@ static inline int split_huge_page_to_order(struct page *page, unsigned int new_o return split_huge_page_to_list_to_order(page, NULL, new_order); } -/** - * try_folio_split_to_order() - try to split a @folio at @page to @new_order - * using non uniform split. - * @folio: folio to be split - * @page: split to @new_order at the given page - * @new_order: the target split order - * - * Try to split a @folio at @page using non uniform split to @new_order, if - * non uniform split is not supported, fall back to uniform split. After-split - * folios are put back to LRU list. Use min_order_for_split() to get the lower - * bound of @new_order. - * - * Return: 0 - split is successful, otherwise split failed. - */ -static inline int try_folio_split_to_order(struct folio *folio, - struct page *page, unsigned int new_order) -{ - if (folio_check_splittable(folio, new_order, SPLIT_TYPE_NON_UNIFORM)) - return split_huge_page_to_order(&folio->page, new_order); - return folio_split(folio, new_order, page, NULL); -} static inline int split_huge_page(struct page *page) { return split_huge_page_to_list_to_order(page, NULL, 0); @@ -641,8 +620,8 @@ static inline int split_folio_to_list(struct folio *folio, struct list_head *lis return -EINVAL; } -static inline int try_folio_split_to_order(struct folio *folio, - struct page *page, unsigned int new_order) +static inline int folio_split(struct folio *folio, unsigned int new_order, + struct page *page, struct list_head *list); { VM_WARN_ON_ONCE_FOLIO(1, folio); return -EINVAL; diff --git a/mm/truncate.c b/mm/truncate.c index 2931d66c16d0..6973b05ec4b8 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -177,7 +177,7 @@ int truncate_inode_folio(struct address_space *mapping, struct folio *folio) return 0; } -static int try_folio_split_or_unmap(struct folio *folio, struct page *split_at, +static int folio_split_or_unmap(struct folio *folio, struct page *split_at, unsigned long min_order) { enum ttu_flags ttu_flags = @@ -186,7 +186,7 @@ static int try_folio_split_or_unmap(struct folio *folio, struct page *split_at, TTU_IGNORE_MLOCK; int ret; - ret = try_folio_split_to_order(folio, split_at, min_order); + ret = folio_split(folio, min_order, split_at, NULL); /* * If the split fails, unmap the folio, so it will be refaulted @@ -252,7 +252,7 @@ bool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end) min_order = mapping_min_folio_order(folio->mapping); split_at = folio_page(folio, PAGE_ALIGN_DOWN(offset) / PAGE_SIZE); - if (!try_folio_split_or_unmap(folio, split_at, min_order)) { + if (!folio_split_or_unmap(folio, split_at, min_order)) { /* * try to split at offset + length to make sure folios within * the range can be dropped, especially to avoid memory waste @@ -279,7 +279,7 @@ bool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end) /* make sure folio2 is large and does not change its mapping */ if (folio_test_large(folio2) && folio2->mapping == folio->mapping) - try_folio_split_or_unmap(folio2, split_at2, min_order); + folio_split_or_unmap(folio2, split_at2, min_order); folio_unlock(folio2); out: -- 2.43.0 READ_ONLY_THP_FOR_FS is no longer present, remove related comment. Signed-off-by: Zi Yan Acked-by: David Sterba --- fs/btrfs/defrag.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/btrfs/defrag.c b/fs/btrfs/defrag.c index 7e2db5d3a4d4..a8d49d9ca981 100644 --- a/fs/btrfs/defrag.c +++ b/fs/btrfs/defrag.c @@ -860,9 +860,6 @@ static struct folio *defrag_prepare_one_folio(struct btrfs_inode *inode, pgoff_t return folio; /* - * Since we can defragment files opened read-only, we can encounter - * transparent huge pages here (see CONFIG_READ_ONLY_THP_FOR_FS). - * * The IO for such large folios is not fully tested, thus return * an error to reject such folios unless it's an experimental build. * -- 2.43.0 Change the requirement to a file system with large folio support and the supported order needs to include PMD_ORDER. Signed-off-by: Zi Yan --- tools/testing/selftests/mm/khugepaged.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/mm/khugepaged.c b/tools/testing/selftests/mm/khugepaged.c index 3fe7ef04ac62..bdcdd31beb1e 100644 --- a/tools/testing/selftests/mm/khugepaged.c +++ b/tools/testing/selftests/mm/khugepaged.c @@ -1086,8 +1086,8 @@ static void usage(void) fprintf(stderr, "\t\t: [all|khugepaged|madvise]\n"); fprintf(stderr, "\t\t: [all|anon|file|shmem]\n"); fprintf(stderr, "\n\t\"file,all\" mem_type requires [dir] argument\n"); - fprintf(stderr, "\n\t\"file,all\" mem_type requires kernel built with\n"); - fprintf(stderr, "\tCONFIG_READ_ONLY_THP_FOR_FS=y\n"); + fprintf(stderr, "\n\t\"file,all\" mem_type requires a file system\n"); + fprintf(stderr, "\twith large folio support (order >= PMD order)\n"); fprintf(stderr, "\n\tif [dir] is a (sub)directory of a tmpfs mount, tmpfs must be\n"); fprintf(stderr, "\tmounted with huge=advise option for khugepaged tests to work\n"); fprintf(stderr, "\n\tSupported Options:\n"); -- 2.43.0 Any file system with large folio support and the supported orders include PMD_ORDER can be used. Signed-off-by: Zi Yan --- tools/testing/selftests/mm/guard-regions.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/mm/guard-regions.c b/tools/testing/selftests/mm/guard-regions.c index 48e8b1539be3..13e77e48b6ef 100644 --- a/tools/testing/selftests/mm/guard-regions.c +++ b/tools/testing/selftests/mm/guard-regions.c @@ -2205,7 +2205,7 @@ TEST_F(guard_regions, collapse) /* * We must close and re-open local-file backed as read-only for - * CONFIG_READ_ONLY_THP_FOR_FS to work. + * MADV_COLLAPSE to work. */ if (variant->backing == LOCAL_FILE_BACKED) { ASSERT_EQ(close(self->fd), 0); @@ -2237,9 +2237,10 @@ TEST_F(guard_regions, collapse) /* * Now collapse the entire region. This should fail in all cases. * - * The madvise() call will also fail if CONFIG_READ_ONLY_THP_FOR_FS is - * not set for the local file case, but we can't differentiate whether - * this occurred or if the collapse was rightly rejected. + * The madvise() call will also fail if the file system does not support + * large folio or the supported orders do not include PMD_ORDER for the + * local file case, but we can't differentiate whether this occurred or + * if the collapse was rightly rejected. */ EXPECT_NE(madvise(ptr, size, MADV_COLLAPSE), 0); -- 2.43.0