No one will be able to use it, so the related code can be removed in the coming commits. Signed-off-by: Zi Yan --- mm/Kconfig | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/mm/Kconfig b/mm/Kconfig index bd283958d675..408fc7b82233 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -937,17 +937,6 @@ config THP_SWAP For selection by architectures with reasonable THP sizes. -config READ_ONLY_THP_FOR_FS - bool "Read-only THP for filesystems (EXPERIMENTAL)" - depends on TRANSPARENT_HUGEPAGE - - help - Allow khugepaged to put read-only file-backed pages in THP. - - This is marked experimental because it is a new feature. Write - support of file THPs will be developed in the next few release - cycles. - config NO_PAGE_MAPCOUNT bool "No per-page mapcount (EXPERIMENTAL)" help -- 2.43.0 READ_ONLY_THP_FOR_FS is no longer present, remove corresponding code. Signed-off-by: Zi Yan --- mm/khugepaged.c | 159 +++++++++++------------------------------------- 1 file changed, 34 insertions(+), 125 deletions(-) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index b2ac28ddd480..39f0b8959535 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1899,7 +1899,7 @@ static enum scan_result collapse_file(struct mm_struct *mm, unsigned long addr, int nr_none = 0; bool is_shmem = shmem_file(file); - VM_BUG_ON(!IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && !is_shmem); + VM_WARN_ON_ONCE(!is_shmem); VM_BUG_ON(start & (HPAGE_PMD_NR - 1)); result = alloc_charge_folio(&new_folio, mm, cc); @@ -1909,8 +1909,7 @@ static enum scan_result collapse_file(struct mm_struct *mm, unsigned long addr, mapping_set_update(&xas, mapping); __folio_set_locked(new_folio); - if (is_shmem) - __folio_set_swapbacked(new_folio); + __folio_set_swapbacked(new_folio); new_folio->index = start; new_folio->mapping = mapping; @@ -1935,83 +1934,39 @@ static enum scan_result collapse_file(struct mm_struct *mm, unsigned long addr, folio = xas_load(&xas); VM_BUG_ON(index != xas.xa_index); - if (is_shmem) { - if (!folio) { - /* - * Stop if extent has been truncated or - * hole-punched, and is now completely - * empty. - */ - if (index == start) { - if (!xas_next_entry(&xas, end - 1)) { - result = SCAN_TRUNCATED; - goto xa_locked; - } + if (!folio) { + /* + * Stop if extent has been truncated or + * hole-punched, and is now completely + * empty. + */ + if (index == start) { + if (!xas_next_entry(&xas, end - 1)) { + result = SCAN_TRUNCATED; + goto xa_locked; } - nr_none++; - index++; - continue; } + nr_none++; + index++; + continue; + } - if (xa_is_value(folio) || !folio_test_uptodate(folio)) { - xas_unlock_irq(&xas); - /* swap in or instantiate fallocated page */ - if (shmem_get_folio(mapping->host, index, 0, - &folio, SGP_NOALLOC)) { - result = SCAN_FAIL; - goto xa_unlocked; - } - /* drain lru cache to help folio_isolate_lru() */ - lru_add_drain(); - } else if (folio_trylock(folio)) { - folio_get(folio); - xas_unlock_irq(&xas); - } else { - result = SCAN_PAGE_LOCK; - goto xa_locked; - } - } else { /* !is_shmem */ - if (!folio || xa_is_value(folio)) { - xas_unlock_irq(&xas); - page_cache_sync_readahead(mapping, &file->f_ra, - file, index, - end - index); - /* drain lru cache to help folio_isolate_lru() */ - lru_add_drain(); - folio = filemap_lock_folio(mapping, index); - if (IS_ERR(folio)) { - result = SCAN_FAIL; - goto xa_unlocked; - } - } else if (folio_test_dirty(folio)) { - /* - * khugepaged only works on read-only fd, - * so this page is dirty because it hasn't - * been flushed since first write. There - * won't be new dirty pages. - * - * Trigger async flush here and hope the - * writeback is done when khugepaged - * revisits this page. - * - * This is a one-off situation. We are not - * forcing writeback in loop. - */ - xas_unlock_irq(&xas); - filemap_flush(mapping); - result = SCAN_PAGE_DIRTY_OR_WRITEBACK; - goto xa_unlocked; - } else if (folio_test_writeback(folio)) { - xas_unlock_irq(&xas); - result = SCAN_PAGE_DIRTY_OR_WRITEBACK; + if (xa_is_value(folio) || !folio_test_uptodate(folio)) { + xas_unlock_irq(&xas); + /* swap in or instantiate fallocated page */ + if (shmem_get_folio(mapping->host, index, 0, + &folio, SGP_NOALLOC)) { + result = SCAN_FAIL; goto xa_unlocked; - } else if (folio_trylock(folio)) { - folio_get(folio); - xas_unlock_irq(&xas); - } else { - result = SCAN_PAGE_LOCK; - goto xa_locked; } + /* drain lru cache to help folio_isolate_lru() */ + lru_add_drain(); + } else if (folio_trylock(folio)) { + folio_get(folio); + xas_unlock_irq(&xas); + } else { + result = SCAN_PAGE_LOCK; + goto xa_locked; } /* @@ -2041,17 +1996,6 @@ static enum scan_result collapse_file(struct mm_struct *mm, unsigned long addr, goto out_unlock; } - if (!is_shmem && (folio_test_dirty(folio) || - folio_test_writeback(folio))) { - /* - * khugepaged only works on read-only fd, so this - * folio is dirty because it hasn't been flushed - * since first write. - */ - result = SCAN_PAGE_DIRTY_OR_WRITEBACK; - goto out_unlock; - } - if (!folio_isolate_lru(folio)) { result = SCAN_DEL_PAGE_LRU; goto out_unlock; @@ -2101,21 +2045,6 @@ static enum scan_result collapse_file(struct mm_struct *mm, unsigned long addr, goto xa_unlocked; } - if (!is_shmem) { - filemap_nr_thps_inc(mapping); - /* - * Paired with the fence in do_dentry_open() -> get_write_access() - * to ensure i_writecount is up to date and the update to nr_thps - * is visible. Ensures the page cache will be truncated if the - * file is opened writable. - */ - smp_mb(); - if (inode_is_open_for_write(mapping->host)) { - result = SCAN_FAIL; - filemap_nr_thps_dec(mapping); - } - } - xa_locked: xas_unlock_irq(&xas); xa_unlocked: @@ -2224,12 +2153,8 @@ static enum scan_result collapse_file(struct mm_struct *mm, unsigned long addr, xas_lock_irq(&xas); } - if (is_shmem) { - lruvec_stat_mod_folio(new_folio, NR_SHMEM, HPAGE_PMD_NR); - lruvec_stat_mod_folio(new_folio, NR_SHMEM_THPS, HPAGE_PMD_NR); - } else { - lruvec_stat_mod_folio(new_folio, NR_FILE_THPS, HPAGE_PMD_NR); - } + lruvec_stat_mod_folio(new_folio, NR_SHMEM, HPAGE_PMD_NR); + lruvec_stat_mod_folio(new_folio, NR_SHMEM_THPS, HPAGE_PMD_NR); lruvec_stat_mod_folio(new_folio, NR_FILE_PAGES, HPAGE_PMD_NR); /* @@ -2240,8 +2165,7 @@ static enum scan_result collapse_file(struct mm_struct *mm, unsigned long addr, folio_mark_uptodate(new_folio); folio_ref_add(new_folio, HPAGE_PMD_NR - 1); - if (is_shmem) - folio_mark_dirty(new_folio); + folio_mark_dirty(new_folio); folio_add_lru(new_folio); /* Join all the small entries into a single multi-index entry. */ @@ -2266,9 +2190,7 @@ static enum scan_result collapse_file(struct mm_struct *mm, unsigned long addr, list_del(&folio->lru); lruvec_stat_mod_folio(folio, NR_FILE_PAGES, -folio_nr_pages(folio)); - if (is_shmem) - lruvec_stat_mod_folio(folio, NR_SHMEM, - -folio_nr_pages(folio)); + lruvec_stat_mod_folio(folio, NR_SHMEM, -folio_nr_pages(folio)); folio->mapping = NULL; folio_clear_active(folio); folio_clear_unevictable(folio); @@ -2293,19 +2215,6 @@ static enum scan_result collapse_file(struct mm_struct *mm, unsigned long addr, folio_putback_lru(folio); folio_put(folio); } - /* - * Undo the updates of filemap_nr_thps_inc for non-SHMEM - * file only. This undo is not needed unless failure is - * due to SCAN_COPY_MC. - */ - if (!is_shmem && result == SCAN_COPY_MC) { - filemap_nr_thps_dec(mapping); - /* - * Paired with the fence in do_dentry_open() -> get_write_access() - * to ensure the update to nr_thps is visible. - */ - smp_mb(); - } new_folio->mapping = NULL; -- 2.43.0 They are used by READ_ONLY_THP_FOR_FS and no longer needed. Signed-off-by: Zi Yan --- include/linux/pagemap.h | 29 ----------------------------- mm/filemap.c | 1 - mm/huge_memory.c | 1 - 3 files changed, 31 deletions(-) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index ec442af3f886..dad3f8846cdc 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -530,35 +530,6 @@ static inline size_t mapping_max_folio_size(const struct address_space *mapping) return PAGE_SIZE << mapping_max_folio_order(mapping); } -static inline int filemap_nr_thps(const struct address_space *mapping) -{ -#ifdef CONFIG_READ_ONLY_THP_FOR_FS - return atomic_read(&mapping->nr_thps); -#else - return 0; -#endif -} - -static inline void filemap_nr_thps_inc(struct address_space *mapping) -{ -#ifdef CONFIG_READ_ONLY_THP_FOR_FS - if (!mapping_large_folio_support(mapping)) - atomic_inc(&mapping->nr_thps); -#else - WARN_ON_ONCE(mapping_large_folio_support(mapping) == 0); -#endif -} - -static inline void filemap_nr_thps_dec(struct address_space *mapping) -{ -#ifdef CONFIG_READ_ONLY_THP_FOR_FS - if (!mapping_large_folio_support(mapping)) - atomic_dec(&mapping->nr_thps); -#else - WARN_ON_ONCE(mapping_large_folio_support(mapping) == 0); -#endif -} - struct address_space *folio_mapping(const struct folio *folio); /** diff --git a/mm/filemap.c b/mm/filemap.c index 2b933a1da9bd..4248e7cdecf3 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -189,7 +189,6 @@ static void filemap_unaccount_folio(struct address_space *mapping, lruvec_stat_mod_folio(folio, NR_SHMEM_THPS, -nr); } else if (folio_test_pmd_mappable(folio)) { lruvec_stat_mod_folio(folio, NR_FILE_THPS, -nr); - filemap_nr_thps_dec(mapping); } if (test_bit(AS_KERNEL_FILE, &folio->mapping->flags)) mod_node_page_state(folio_pgdat(folio), diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 610a6184e92c..54b3d21e4cbb 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -3832,7 +3832,6 @@ static int __folio_freeze_and_split_unmapped(struct folio *folio, unsigned int n } else { lruvec_stat_mod_folio(folio, NR_FILE_THPS, -nr); - filemap_nr_thps_dec(mapping); } } } -- 2.43.0 It is used by no-long-exist READ_ONLY_THP_FOR_FS. Signed-off-by: Zi Yan --- mm/huge_memory.c | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 54b3d21e4cbb..de4a1e6ce376 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -85,24 +85,6 @@ unsigned long huge_anon_orders_madvise __read_mostly; unsigned long huge_anon_orders_inherit __read_mostly; static bool anon_orders_configured __initdata; -static inline bool file_thp_enabled(struct vm_area_struct *vma) -{ - struct inode *inode; - - if (!IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS)) - return false; - - if (!vma->vm_file) - return false; - - inode = file_inode(vma->vm_file); - - if (IS_ANON_FILE(inode)) - return false; - - return !inode_is_open_for_write(inode) && S_ISREG(inode->i_mode); -} - /* If returns true, we are unable to access the VMA's folios. */ static bool vma_is_special_huge(const struct vm_area_struct *vma) { @@ -199,9 +181,6 @@ unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma, */ if (((in_pf || smaps)) && vma->vm_ops->huge_fault) return orders; - /* Only regular file is valid in collapse path */ - if (((!in_pf || smaps)) && file_thp_enabled(vma)) - return orders; return 0; } -- 2.43.0 Without READ_ONLY_THP_FOR_FS, large file-backed folios cannot be created by a FS without large folio support. The check is no longer needed. Signed-off-by: Zi Yan --- mm/huge_memory.c | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index de4a1e6ce376..c7680787409c 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -3710,28 +3710,6 @@ int folio_check_splittable(struct folio *folio, unsigned int new_order, /* order-1 is not supported for anonymous THP. */ if (new_order == 1) return -EINVAL; - } else if (split_type == SPLIT_TYPE_NON_UNIFORM || new_order) { - if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && - !mapping_large_folio_support(folio->mapping)) { - /* - * We can always split a folio down to a single page - * (new_order == 0) uniformly. - * - * For any other scenario - * a) uniform split targeting a large folio - * (new_order > 0) - * b) any non-uniform split - * we must confirm that the file system supports large - * folios. - * - * Note that we might still have THPs in such - * mappings, which is created from khugepaged when - * CONFIG_READ_ONLY_THP_FOR_FS is enabled. But in that - * case, the mapping does not actually support large - * folios properly. - */ - return -EINVAL; - } } /* -- 2.43.0 After READ_ONLY_THP_FOR_FS is removed, FS either supports large folio or not. folio_split() can be used on a FS with large folio support without worrying about getting a THP on a FS without large folio support. Signed-off-by: Zi Yan --- include/linux/huge_mm.h | 28 ---------------------------- mm/truncate.c | 8 ++++---- 2 files changed, 4 insertions(+), 32 deletions(-) diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 1258fa37e85b..77c8f73f8839 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -389,27 +389,6 @@ static inline int split_huge_page_to_order(struct page *page, unsigned int new_o return split_huge_page_to_list_to_order(page, NULL, new_order); } -/** - * try_folio_split_to_order() - try to split a @folio at @page to @new_order - * using non uniform split. - * @folio: folio to be split - * @page: split to @new_order at the given page - * @new_order: the target split order - * - * Try to split a @folio at @page using non uniform split to @new_order, if - * non uniform split is not supported, fall back to uniform split. After-split - * folios are put back to LRU list. Use min_order_for_split() to get the lower - * bound of @new_order. - * - * Return: 0 - split is successful, otherwise split failed. - */ -static inline int try_folio_split_to_order(struct folio *folio, - struct page *page, unsigned int new_order) -{ - if (folio_check_splittable(folio, new_order, SPLIT_TYPE_NON_UNIFORM)) - return split_huge_page_to_order(&folio->page, new_order); - return folio_split(folio, new_order, page, NULL); -} static inline int split_huge_page(struct page *page) { return split_huge_page_to_list_to_order(page, NULL, 0); @@ -641,13 +620,6 @@ static inline int split_folio_to_list(struct folio *folio, struct list_head *lis return -EINVAL; } -static inline int try_folio_split_to_order(struct folio *folio, - struct page *page, unsigned int new_order) -{ - VM_WARN_ON_ONCE_FOLIO(1, folio); - return -EINVAL; -} - static inline void deferred_split_folio(struct folio *folio, bool partially_mapped) {} #define split_huge_pmd(__vma, __pmd, __address) \ do { } while (0) diff --git a/mm/truncate.c b/mm/truncate.c index 2931d66c16d0..6973b05ec4b8 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -177,7 +177,7 @@ int truncate_inode_folio(struct address_space *mapping, struct folio *folio) return 0; } -static int try_folio_split_or_unmap(struct folio *folio, struct page *split_at, +static int folio_split_or_unmap(struct folio *folio, struct page *split_at, unsigned long min_order) { enum ttu_flags ttu_flags = @@ -186,7 +186,7 @@ static int try_folio_split_or_unmap(struct folio *folio, struct page *split_at, TTU_IGNORE_MLOCK; int ret; - ret = try_folio_split_to_order(folio, split_at, min_order); + ret = folio_split(folio, min_order, split_at, NULL); /* * If the split fails, unmap the folio, so it will be refaulted @@ -252,7 +252,7 @@ bool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end) min_order = mapping_min_folio_order(folio->mapping); split_at = folio_page(folio, PAGE_ALIGN_DOWN(offset) / PAGE_SIZE); - if (!try_folio_split_or_unmap(folio, split_at, min_order)) { + if (!folio_split_or_unmap(folio, split_at, min_order)) { /* * try to split at offset + length to make sure folios within * the range can be dropped, especially to avoid memory waste @@ -279,7 +279,7 @@ bool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end) /* make sure folio2 is large and does not change its mapping */ if (folio_test_large(folio2) && folio2->mapping == folio->mapping) - try_folio_split_or_unmap(folio2, split_at2, min_order); + folio_split_or_unmap(folio2, split_at2, min_order); folio_unlock(folio2); out: -- 2.43.0 READ_ONLY_THP_FOR_FS is removed, the related field is no longer needed. Signed-off-by: Zi Yan --- fs/inode.c | 3 --- include/linux/fs.h | 5 ----- 2 files changed, 8 deletions(-) diff --git a/fs/inode.c b/fs/inode.c index cc12b68e021b..16ab0a345419 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -280,9 +280,6 @@ int inode_init_always_gfp(struct super_block *sb, struct inode *inode, gfp_t gfp mapping->flags = 0; mapping->wb_err = 0; atomic_set(&mapping->i_mmap_writable, 0); -#ifdef CONFIG_READ_ONLY_THP_FOR_FS - atomic_set(&mapping->nr_thps, 0); -#endif mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE); mapping->i_private_data = NULL; mapping->writeback_index = 0; diff --git a/include/linux/fs.h b/include/linux/fs.h index 0bdccfa70b44..35875696fb4c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -455,7 +455,6 @@ extern const struct address_space_operations empty_aops; * memory mappings. * @gfp_mask: Memory allocation flags to use for allocating pages. * @i_mmap_writable: Number of VM_SHARED, VM_MAYWRITE mappings. - * @nr_thps: Number of THPs in the pagecache (non-shmem only). * @i_mmap: Tree of private and shared mappings. * @i_mmap_rwsem: Protects @i_mmap and @i_mmap_writable. * @nrpages: Number of page entries, protected by the i_pages lock. @@ -473,10 +472,6 @@ struct address_space { struct rw_semaphore invalidate_lock; gfp_t gfp_mask; atomic_t i_mmap_writable; -#ifdef CONFIG_READ_ONLY_THP_FOR_FS - /* number of thp, only for non-shmem files */ - atomic_t nr_thps; -#endif struct rb_root_cached i_mmap; unsigned long nrpages; pgoff_t writeback_index; -- 2.43.0 READ_ONLY_THP_FOR_FS is no longer present, delete related code. Signed-off-by: Zi Yan --- fs/open.c | 27 --------------------------- 1 file changed, 27 deletions(-) diff --git a/fs/open.c b/fs/open.c index 91f1139591ab..cef382d9d8b8 100644 --- a/fs/open.c +++ b/fs/open.c @@ -970,33 +970,6 @@ static int do_dentry_open(struct file *f, if ((f->f_flags & O_DIRECT) && !(f->f_mode & FMODE_CAN_ODIRECT)) return -EINVAL; - /* - * XXX: Huge page cache doesn't support writing yet. Drop all page - * cache for this file before processing writes. - */ - if (f->f_mode & FMODE_WRITE) { - /* - * Depends on full fence from get_write_access() to synchronize - * against collapse_file() regarding i_writecount and nr_thps - * updates. Ensures subsequent insertion of THPs into the page - * cache will fail. - */ - if (filemap_nr_thps(inode->i_mapping)) { - struct address_space *mapping = inode->i_mapping; - - filemap_invalidate_lock(inode->i_mapping); - /* - * unmap_mapping_range just need to be called once - * here, because the private pages is not need to be - * unmapped mapping (e.g. data segment of dynamic - * shared libraries here). - */ - unmap_mapping_range(mapping, 0, 0, 0); - truncate_inode_pages(mapping, 0); - filemap_invalidate_unlock(inode->i_mapping); - } - } - return 0; cleanup_all: -- 2.43.0 READ_ONLY_THP_FOR_FS is no longer present, remove related comment. Signed-off-by: Zi Yan --- fs/btrfs/defrag.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/btrfs/defrag.c b/fs/btrfs/defrag.c index 7e2db5d3a4d4..a8d49d9ca981 100644 --- a/fs/btrfs/defrag.c +++ b/fs/btrfs/defrag.c @@ -860,9 +860,6 @@ static struct folio *defrag_prepare_one_folio(struct btrfs_inode *inode, pgoff_t return folio; /* - * Since we can defragment files opened read-only, we can encounter - * transparent huge pages here (see CONFIG_READ_ONLY_THP_FOR_FS). - * * The IO for such large folios is not fully tested, thus return * an error to reject such folios unless it's an experimental build. * -- 2.43.0 READ_ONLY_THP_FOR_FS is no longer present, remove related test. Signed-off-by: Zi Yan --- tools/testing/selftests/mm/khugepaged.c | 197 +----------------------- 1 file changed, 3 insertions(+), 194 deletions(-) diff --git a/tools/testing/selftests/mm/khugepaged.c b/tools/testing/selftests/mm/khugepaged.c index 3fe7ef04ac62..842311682833 100644 --- a/tools/testing/selftests/mm/khugepaged.c +++ b/tools/testing/selftests/mm/khugepaged.c @@ -37,7 +37,6 @@ static int anon_order; enum vma_type { VMA_ANON, - VMA_FILE, VMA_SHMEM, }; @@ -49,7 +48,6 @@ struct mem_ops { const char *name; }; -static struct mem_ops *file_ops; static struct mem_ops *anon_ops; static struct mem_ops *shmem_ops; @@ -112,8 +110,6 @@ static void restore_settings(int sig) static void save_settings(void) { printf("Save THP and khugepaged settings..."); - if (file_ops && finfo.type == VMA_FILE) - thp_set_read_ahead_path(finfo.dev_queue_read_ahead_path); thp_save_settings(); success("OK"); @@ -125,89 +121,6 @@ static void save_settings(void) signal(SIGQUIT, restore_settings); } -static void get_finfo(const char *dir) -{ - struct stat path_stat; - struct statfs fs; - char buf[1 << 10]; - char path[PATH_MAX]; - char *str, *end; - - finfo.dir = dir; - stat(finfo.dir, &path_stat); - if (!S_ISDIR(path_stat.st_mode)) { - printf("%s: Not a directory (%s)\n", __func__, finfo.dir); - exit(EXIT_FAILURE); - } - if (snprintf(finfo.path, sizeof(finfo.path), "%s/" TEST_FILE, - finfo.dir) >= sizeof(finfo.path)) { - printf("%s: Pathname is too long\n", __func__); - exit(EXIT_FAILURE); - } - if (statfs(finfo.dir, &fs)) { - perror("statfs()"); - exit(EXIT_FAILURE); - } - finfo.type = fs.f_type == TMPFS_MAGIC ? VMA_SHMEM : VMA_FILE; - if (finfo.type == VMA_SHMEM) - return; - - /* Find owning device's queue/read_ahead_kb control */ - if (snprintf(path, sizeof(path), "/sys/dev/block/%d:%d/uevent", - major(path_stat.st_dev), minor(path_stat.st_dev)) - >= sizeof(path)) { - printf("%s: Pathname is too long\n", __func__); - exit(EXIT_FAILURE); - } - if (read_file(path, buf, sizeof(buf)) < 0) { - perror("read_file(read_num)"); - exit(EXIT_FAILURE); - } - if (strstr(buf, "DEVTYPE=disk")) { - /* Found it */ - if (snprintf(finfo.dev_queue_read_ahead_path, - sizeof(finfo.dev_queue_read_ahead_path), - "/sys/dev/block/%d:%d/queue/read_ahead_kb", - major(path_stat.st_dev), minor(path_stat.st_dev)) - >= sizeof(finfo.dev_queue_read_ahead_path)) { - printf("%s: Pathname is too long\n", __func__); - exit(EXIT_FAILURE); - } - return; - } - if (!strstr(buf, "DEVTYPE=partition")) { - printf("%s: Unknown device type: %s\n", __func__, path); - exit(EXIT_FAILURE); - } - /* - * Partition of block device - need to find actual device. - * Using naming convention that devnameN is partition of - * device devname. - */ - str = strstr(buf, "DEVNAME="); - if (!str) { - printf("%s: Could not read: %s", __func__, path); - exit(EXIT_FAILURE); - } - str += 8; - end = str; - while (*end) { - if (isdigit(*end)) { - *end = '\0'; - if (snprintf(finfo.dev_queue_read_ahead_path, - sizeof(finfo.dev_queue_read_ahead_path), - "/sys/block/%s/queue/read_ahead_kb", - str) >= sizeof(finfo.dev_queue_read_ahead_path)) { - printf("%s: Pathname is too long\n", __func__); - exit(EXIT_FAILURE); - } - return; - } - ++end; - } - printf("%s: Could not read: %s\n", __func__, path); - exit(EXIT_FAILURE); -} static bool check_swap(void *addr, unsigned long size) { @@ -364,77 +277,6 @@ static bool anon_check_huge(void *addr, int nr_hpages) return check_huge_anon(addr, nr_hpages, hpage_pmd_size); } -static void *file_setup_area(int nr_hpages) -{ - int fd; - void *p; - unsigned long size; - - unlink(finfo.path); /* Cleanup from previous failed tests */ - printf("Creating %s for collapse%s...", finfo.path, - finfo.type == VMA_SHMEM ? " (tmpfs)" : ""); - fd = open(finfo.path, O_DSYNC | O_CREAT | O_RDWR | O_TRUNC | O_EXCL, - 777); - if (fd < 0) { - perror("open()"); - exit(EXIT_FAILURE); - } - - size = nr_hpages * hpage_pmd_size; - p = alloc_mapping(nr_hpages); - fill_memory(p, 0, size); - write(fd, p, size); - close(fd); - munmap(p, size); - success("OK"); - - printf("Opening %s read only for collapse...", finfo.path); - finfo.fd = open(finfo.path, O_RDONLY, 777); - if (finfo.fd < 0) { - perror("open()"); - exit(EXIT_FAILURE); - } - p = mmap(BASE_ADDR, size, PROT_READ, - MAP_PRIVATE, finfo.fd, 0); - if (p == MAP_FAILED || p != BASE_ADDR) { - perror("mmap()"); - exit(EXIT_FAILURE); - } - - /* Drop page cache */ - write_file("/proc/sys/vm/drop_caches", "3", 2); - success("OK"); - return p; -} - -static void file_cleanup_area(void *p, unsigned long size) -{ - munmap(p, size); - close(finfo.fd); - unlink(finfo.path); -} - -static void file_fault(void *p, unsigned long start, unsigned long end) -{ - if (madvise(((char *)p) + start, end - start, MADV_POPULATE_READ)) { - perror("madvise(MADV_POPULATE_READ"); - exit(EXIT_FAILURE); - } -} - -static bool file_check_huge(void *addr, int nr_hpages) -{ - switch (finfo.type) { - case VMA_FILE: - return check_huge_file(addr, nr_hpages, hpage_pmd_size); - case VMA_SHMEM: - return check_huge_shmem(addr, nr_hpages, hpage_pmd_size); - default: - exit(EXIT_FAILURE); - return false; - } -} - static void *shmem_setup_area(int nr_hpages) { void *p; @@ -477,14 +319,6 @@ static struct mem_ops __anon_ops = { .name = "anon", }; -static struct mem_ops __file_ops = { - .setup_area = &file_setup_area, - .cleanup_area = &file_cleanup_area, - .fault = &file_fault, - .check_huge = &file_check_huge, - .name = "file", -}; - static struct mem_ops __shmem_ops = { .setup_area = &shmem_setup_area, .cleanup_area = &shmem_cleanup_area, @@ -576,7 +410,7 @@ static void khugepaged_collapse(const char *msg, char *p, int nr_hpages, } /* - * For file and shmem memory, khugepaged only retracts pte entries after + * For shmem memory, khugepaged only retracts pte entries after * putting the new hugepage in the page cache. The hugepage must be * subsequently refaulted to install the pmd mapping for the mm. */ @@ -603,7 +437,7 @@ static struct collapse_context __madvise_context = { static bool is_tmpfs(struct mem_ops *ops) { - return ops == &__file_ops && finfo.type == VMA_SHMEM; + return finfo.type == VMA_SHMEM; } static bool is_anon(struct mem_ops *ops) @@ -1084,10 +918,7 @@ static void usage(void) fprintf(stderr, "\nUsage: ./khugepaged [OPTIONS] [dir]\n\n"); fprintf(stderr, "\t\t: :\n"); fprintf(stderr, "\t\t: [all|khugepaged|madvise]\n"); - fprintf(stderr, "\t\t: [all|anon|file|shmem]\n"); - fprintf(stderr, "\n\t\"file,all\" mem_type requires [dir] argument\n"); - fprintf(stderr, "\n\t\"file,all\" mem_type requires kernel built with\n"); - fprintf(stderr, "\tCONFIG_READ_ONLY_THP_FOR_FS=y\n"); + fprintf(stderr, "\t\t: [all|anon|shmem]\n"); fprintf(stderr, "\n\tif [dir] is a (sub)directory of a tmpfs mount, tmpfs must be\n"); fprintf(stderr, "\tmounted with huge=advise option for khugepaged tests to work\n"); fprintf(stderr, "\n\tSupported Options:\n"); @@ -1143,26 +974,16 @@ static void parse_test_type(int argc, char **argv) usage(); if (!strcmp(buf, "all")) { - file_ops = &__file_ops; anon_ops = &__anon_ops; shmem_ops = &__shmem_ops; } else if (!strcmp(buf, "anon")) { anon_ops = &__anon_ops; - } else if (!strcmp(buf, "file")) { - file_ops = &__file_ops; } else if (!strcmp(buf, "shmem")) { shmem_ops = &__shmem_ops; } else { usage(); } - if (!file_ops) - return; - - if (argc != 2) - usage(); - - get_finfo(argv[1]); } int main(int argc, char **argv) @@ -1228,37 +1049,27 @@ int main(int argc, char **argv) } while (0) TEST(collapse_full, khugepaged_context, anon_ops); - TEST(collapse_full, khugepaged_context, file_ops); TEST(collapse_full, khugepaged_context, shmem_ops); TEST(collapse_full, madvise_context, anon_ops); - TEST(collapse_full, madvise_context, file_ops); TEST(collapse_full, madvise_context, shmem_ops); TEST(collapse_empty, khugepaged_context, anon_ops); TEST(collapse_empty, madvise_context, anon_ops); TEST(collapse_single_pte_entry, khugepaged_context, anon_ops); - TEST(collapse_single_pte_entry, khugepaged_context, file_ops); TEST(collapse_single_pte_entry, khugepaged_context, shmem_ops); TEST(collapse_single_pte_entry, madvise_context, anon_ops); - TEST(collapse_single_pte_entry, madvise_context, file_ops); TEST(collapse_single_pte_entry, madvise_context, shmem_ops); TEST(collapse_max_ptes_none, khugepaged_context, anon_ops); - TEST(collapse_max_ptes_none, khugepaged_context, file_ops); TEST(collapse_max_ptes_none, madvise_context, anon_ops); - TEST(collapse_max_ptes_none, madvise_context, file_ops); TEST(collapse_single_pte_entry_compound, khugepaged_context, anon_ops); - TEST(collapse_single_pte_entry_compound, khugepaged_context, file_ops); TEST(collapse_single_pte_entry_compound, madvise_context, anon_ops); - TEST(collapse_single_pte_entry_compound, madvise_context, file_ops); TEST(collapse_full_of_compound, khugepaged_context, anon_ops); - TEST(collapse_full_of_compound, khugepaged_context, file_ops); TEST(collapse_full_of_compound, khugepaged_context, shmem_ops); TEST(collapse_full_of_compound, madvise_context, anon_ops); - TEST(collapse_full_of_compound, madvise_context, file_ops); TEST(collapse_full_of_compound, madvise_context, shmem_ops); TEST(collapse_compound_extreme, khugepaged_context, anon_ops); @@ -1280,10 +1091,8 @@ int main(int argc, char **argv) TEST(collapse_max_ptes_shared, madvise_context, anon_ops); TEST(madvise_collapse_existing_thps, madvise_context, anon_ops); - TEST(madvise_collapse_existing_thps, madvise_context, file_ops); TEST(madvise_collapse_existing_thps, madvise_context, shmem_ops); - TEST(madvise_retracted_page_tables, madvise_context, file_ops); TEST(madvise_retracted_page_tables, madvise_context, shmem_ops); restore_settings(0); -- 2.43.0 READ_ONLY_THP_FOR_FS is no longer present, remove related test. Signed-off-by: Zi Yan --- tools/testing/selftests/mm/guard-regions.c | 148 +-------------------- 1 file changed, 1 insertion(+), 147 deletions(-) diff --git a/tools/testing/selftests/mm/guard-regions.c b/tools/testing/selftests/mm/guard-regions.c index 48e8b1539be3..fc8b2a4e7ba2 100644 --- a/tools/testing/selftests/mm/guard-regions.c +++ b/tools/testing/selftests/mm/guard-regions.c @@ -42,7 +42,6 @@ static sigjmp_buf signal_jmp_buf; enum backing_type { ANON_BACKED, SHMEM_BACKED, - LOCAL_FILE_BACKED, }; FIXTURE(guard_regions) @@ -67,11 +66,6 @@ FIXTURE_VARIANT_ADD(guard_regions, shmem) .backing = SHMEM_BACKED, }; -FIXTURE_VARIANT_ADD(guard_regions, file) -{ - .backing = LOCAL_FILE_BACKED, -}; - static bool is_anon_backed(const FIXTURE_VARIANT(guard_regions) * variant) { switch (variant->backing) { @@ -98,7 +92,6 @@ static void *mmap_(FIXTURE_DATA(guard_regions) * self, offset = 0; break; case SHMEM_BACKED: - case LOCAL_FILE_BACKED: flags |= MAP_SHARED; fd = self->fd; break; @@ -198,18 +191,6 @@ static void teardown_sighandler(void) sigaction(SIGSEGV, &act, NULL); } -static int open_file(const char *prefix, char *path) -{ - int fd; - - snprintf(path, PATH_MAX, "%sguard_regions_test_file_XXXXXX", prefix); - fd = mkstemp(path); - if (fd < 0) - ksft_exit_fail_perror("mkstemp"); - - return fd; -} - /* Establish a varying pattern in a buffer. */ static void set_pattern(char *ptr, size_t num_pages, size_t page_size) { @@ -262,54 +243,6 @@ static bool is_buf_eq(char *buf, size_t size, char chr) return true; } -/* - * Some file systems have issues with merging due to changing merge-sensitive - * parameters in the .mmap callback, and prior to .mmap_prepare being - * implemented everywhere this will now result in an unexpected failure to - * merge (e.g. - overlayfs). - * - * Perform a simple test to see if the local file system suffers from this, if - * it does then we can skip test logic that assumes local file system merging is - * sane. - */ -static bool local_fs_has_sane_mmap(FIXTURE_DATA(guard_regions) * self, - const FIXTURE_VARIANT(guard_regions) * variant) -{ - const unsigned long page_size = self->page_size; - char *ptr, *ptr2; - struct procmap_fd procmap; - - if (variant->backing != LOCAL_FILE_BACKED) - return true; - - /* Map 10 pages. */ - ptr = mmap_(self, variant, NULL, 10 * page_size, PROT_READ | PROT_WRITE, 0, 0); - if (ptr == MAP_FAILED) - return false; - /* Unmap the middle. */ - munmap(&ptr[5 * page_size], page_size); - - /* Map again. */ - ptr2 = mmap_(self, variant, &ptr[5 * page_size], page_size, PROT_READ | PROT_WRITE, - MAP_FIXED, 5 * page_size); - - if (ptr2 == MAP_FAILED) - return false; - - /* Now make sure they all merged. */ - if (open_self_procmap(&procmap) != 0) - return false; - if (!find_vma_procmap(&procmap, ptr)) - return false; - if (procmap.query.vma_start != (unsigned long)ptr) - return false; - if (procmap.query.vma_end != (unsigned long)ptr + 10 * page_size) - return false; - close_procmap(&procmap); - - return true; -} - FIXTURE_SETUP(guard_regions) { self->page_size = (unsigned long)sysconf(_SC_PAGESIZE); @@ -318,9 +251,6 @@ FIXTURE_SETUP(guard_regions) switch (variant->backing) { case ANON_BACKED: return; - case LOCAL_FILE_BACKED: - self->fd = open_file("", self->path); - break; case SHMEM_BACKED: self->fd = memfd_create(self->path, 0); break; @@ -1750,55 +1680,6 @@ TEST_F(guard_regions, map_private) ASSERT_EQ(munmap(ptr_private, 10 * page_size), 0); } -/* Test that guard regions established over a read-only mapping function correctly. */ -TEST_F(guard_regions, readonly_file) -{ - const unsigned long page_size = self->page_size; - char *ptr; - int i; - - if (variant->backing != LOCAL_FILE_BACKED) - SKIP(return, "Read-only test specific to file-backed"); - - /* Map shared so we can populate with pattern, populate it, unmap. */ - ptr = mmap_(self, variant, NULL, 10 * page_size, - PROT_READ | PROT_WRITE, 0, 0); - ASSERT_NE(ptr, MAP_FAILED); - set_pattern(ptr, 10, page_size); - ASSERT_EQ(munmap(ptr, 10 * page_size), 0); - /* Close the fd so we can re-open read-only. */ - ASSERT_EQ(close(self->fd), 0); - - /* Re-open read-only. */ - self->fd = open(self->path, O_RDONLY); - ASSERT_NE(self->fd, -1); - /* Re-map read-only. */ - ptr = mmap_(self, variant, NULL, 10 * page_size, PROT_READ, 0, 0); - ASSERT_NE(ptr, MAP_FAILED); - - /* Mark every other page guarded. */ - for (i = 0; i < 10; i += 2) { - char *ptr_pg = &ptr[i * page_size]; - - ASSERT_EQ(madvise(ptr_pg, page_size, MADV_GUARD_INSTALL), 0); - } - - /* Assert that the guard regions are in place.*/ - for (i = 0; i < 10; i++) { - char *ptr_pg = &ptr[i * page_size]; - - ASSERT_EQ(try_read_buf(ptr_pg), i % 2 != 0); - } - - /* Remove guard regions. */ - ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_REMOVE), 0); - - /* Ensure the data is as expected. */ - ASSERT_TRUE(check_pattern(ptr, 10, page_size)); - - ASSERT_EQ(munmap(ptr, 10 * page_size), 0); -} - TEST_F(guard_regions, fault_around) { const unsigned long page_size = self->page_size; @@ -2203,17 +2084,6 @@ TEST_F(guard_regions, collapse) if (variant->backing != ANON_BACKED) ASSERT_EQ(ftruncate(self->fd, size), 0); - /* - * We must close and re-open local-file backed as read-only for - * CONFIG_READ_ONLY_THP_FOR_FS to work. - */ - if (variant->backing == LOCAL_FILE_BACKED) { - ASSERT_EQ(close(self->fd), 0); - - self->fd = open(self->path, O_RDONLY); - ASSERT_GE(self->fd, 0); - } - ptr = mmap_(self, variant, NULL, size, PROT_READ, 0, 0); ASSERT_NE(ptr, MAP_FAILED); @@ -2234,13 +2104,7 @@ TEST_F(guard_regions, collapse) /* Allow huge page throughout region. */ ASSERT_EQ(madvise(ptr, size, MADV_HUGEPAGE), 0); - /* - * Now collapse the entire region. This should fail in all cases. - * - * The madvise() call will also fail if CONFIG_READ_ONLY_THP_FOR_FS is - * not set for the local file case, but we can't differentiate whether - * this occurred or if the collapse was rightly rejected. - */ + /* Now collapse the entire region. This should fail in all cases. */ EXPECT_NE(madvise(ptr, size, MADV_COLLAPSE), 0); /* @@ -2298,16 +2162,6 @@ TEST_F(guard_regions, smaps) ASSERT_TRUE(check_vmflag_guard(ptr)); ASSERT_TRUE(check_vmflag_guard(&ptr[5 * page_size])); - /* - * If the local file system is unable to merge VMAs due to having - * unusual characteristics, there is no point in asserting merge - * behaviour. - */ - if (!local_fs_has_sane_mmap(self, variant)) { - TH_LOG("local filesystem does not support sane merging skipping merge test"); - return; - } - /* Map a fresh VMA between the two split VMAs. */ ptr2 = mmap_(self, variant, &ptr[4 * page_size], page_size, PROT_READ | PROT_WRITE, MAP_FIXED, 4 * page_size); -- 2.43.0