Introduce shmem_insert_folio(), which transfers an isolated folio zero-copy into a shmem file's page cache. The folio is charged to memcg, inserted into the address space, and placed on the anon LRU for normal reclaim. An optional writeback parameter requests immediate swap writeback. Higher-order folios are promoted to compound before insertion, enabling THP-sized swap entries with CONFIG_THP_SWAP=y. On failure the folio is returned to its original state and the caller retains ownership. Assisted-by: GitHub_Copilot:claude-sonnet-4.6 Signed-off-by: Thomas Hellström --- include/linux/mm.h | 1 + include/linux/shmem_fs.h | 2 + mm/page_alloc.c | 21 ++++++++ mm/shmem.c | 105 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 129 insertions(+) diff --git a/include/linux/mm.h b/include/linux/mm.h index af23453e9dbd..e2e7b0c0998b 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1676,6 +1676,7 @@ struct mmu_gather; struct inode; extern void prep_compound_page(struct page *page, unsigned int order); +extern void undo_compound_page(struct page *page); static inline unsigned int folio_large_order(const struct folio *folio) { diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index 93a0ba872ebe..2dc9355757fd 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -175,6 +175,8 @@ int shmem_get_folio(struct inode *inode, pgoff_t index, loff_t write_end, struct folio **foliop, enum sgp_type sgp); struct folio *shmem_read_folio_gfp(struct address_space *mapping, pgoff_t index, gfp_t gfp); +int shmem_insert_folio(struct file *file, struct folio *folio, unsigned int order, + pgoff_t index, bool writeback, gfp_t folio_gfp); static inline struct folio *shmem_read_folio(struct address_space *mapping, pgoff_t index) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 227d58dc3de6..db82825a3348 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -705,6 +705,27 @@ void prep_compound_page(struct page *page, unsigned int order) prep_compound_head(page, order); } +/** + * undo_compound_page() - Reverse the effect of prep_compound_page(). + * @page: The head page of a compound page to demote. + * + * Returns the pages to non-compound state as if prep_compound_page() + * had never been called. split_page() must NOT have been called on + * the compound page; tail refcounts must be 0. The caller must ensure + * no other users hold references to the compound page. + */ +void undo_compound_page(struct page *page) +{ + unsigned int i, nr = 1U << compound_order(page); + + page[1].flags.f &= ~PAGE_FLAGS_SECOND; + for (i = 1; i < nr; i++) { + page[i].mapping = NULL; + clear_compound_head(&page[i]); + } + ClearPageHead(page); +} + static inline void set_buddy_order(struct page *page, unsigned int order) { set_page_private(page, order); diff --git a/mm/shmem.c b/mm/shmem.c index 3b5dc21b323c..45e80a74f77c 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -937,6 +937,111 @@ int shmem_add_to_page_cache(struct folio *folio, return 0; } +/** + * shmem_insert_folio() - Insert an isolated folio into a shmem file. + * @file: The shmem file created with shmem_file_setup(). + * @folio: The folio to insert. Must be isolated (not on LRU), unlocked, + * have exactly one reference (the caller's), have no page-table + * mappings, and have folio->mapping == NULL. + * @order: The allocation order of @folio. If @order > 0 and @folio is + * not already a large (compound) folio, it will be promoted to a + * compound folio of this order inside this function. This requires + * the standard post-alloc state: head refcount == 1, tail + * refcounts == 0 (i.e. split_page() must NOT have been called). + * On failure the promotion is reversed and the folio is returned + * to its original non-compound state. + * @index: Page-cache index at which to insert. Must be aligned to + * (1 << @order) and within the file's size. + * @writeback: If true, attempt immediate writeback to swap after insertion. + * Best-effort; failure is silently ignored. + * @folio_gfp: The GFP flags to use for memory-cgroup charging. + * + * The folio is inserted zero-copy into the shmem page cache and placed on + * the anon LRU, where it participates in normal kernel reclaim (written to + * swap under memory pressure). Any previous content at @index is discarded. + * On success the caller should release their reference with folio_put() and + * track the (@file, @index) pair for later recovery via shmem_read_folio() + * and release via shmem_truncate_range(). + * + * Return: 0 on success. On failure the folio is returned to its original + * state and the caller retains ownership. + */ +int shmem_insert_folio(struct file *file, struct folio *folio, unsigned int order, + pgoff_t index, bool writeback, gfp_t folio_gfp) +{ + struct address_space *mapping = file->f_mapping; + struct inode *inode = mapping->host; + bool promoted; + long nr_pages; + int ret; + + promoted = order > 0 && !folio_test_large(folio); + if (promoted) + prep_compound_page(&folio->page, order); + nr_pages = folio_nr_pages(folio); + + VM_BUG_ON_FOLIO(folio_test_lru(folio), folio); + VM_BUG_ON_FOLIO(folio_mapped(folio), folio); + VM_BUG_ON_FOLIO(folio_test_swapcache(folio), folio); + VM_BUG_ON_FOLIO(folio->mapping, folio); + VM_BUG_ON(index != round_down(index, nr_pages)); + + folio_lock(folio); + __folio_set_swapbacked(folio); + folio_mark_uptodate(folio); + + folio_gfp &= GFP_RECLAIM_MASK; + ret = mem_cgroup_charge(folio, NULL, folio_gfp); + if (ret) + goto err_unlock; + + ret = shmem_add_to_page_cache(folio, mapping, index, NULL, folio_gfp); + if (ret == -EEXIST) { + shmem_truncate_range(inode, + (loff_t)index << PAGE_SHIFT, + ((loff_t)(index + nr_pages) << PAGE_SHIFT) - 1); + ret = shmem_add_to_page_cache(folio, mapping, index, NULL, + folio_gfp); + } + if (ret) + goto err_uncharge; + + folio_mark_dirty(folio); + + ret = shmem_inode_acct_blocks(inode, nr_pages); + if (ret) { + filemap_remove_folio(folio); + goto err_uncharge; + } + + shmem_recalc_inode(inode, nr_pages, 0); + + if (writeback) { + ret = shmem_writeout(folio, NULL, NULL); + if (ret == AOP_WRITEPAGE_ACTIVATE) { + /* No swap slot available; reclaim will retry. */ + folio_add_lru(folio); + folio_unlock(folio); + } + /* ret == 0 or ret < 0: folio unlocked by shmem_writeout */ + } else { + folio_add_lru(folio); + folio_unlock(folio); + } + + return 0; + +err_uncharge: + mem_cgroup_uncharge(folio); +err_unlock: + __folio_clear_swapbacked(folio); + folio_unlock(folio); + if (promoted) + undo_compound_page(&folio->page); + return ret; +} +EXPORT_SYMBOL_GPL(shmem_insert_folio); + /* * Somewhat like filemap_remove_folio, but substitutes swap for @folio. */ -- 2.54.0 Add ttm_backup_insert_folio(), a thin wrapper around shmem_insert_folio() that returns a handle, for use by drivers with large isolated folios. Replace the alloc+copy ttm_backup_backup_page() path in ttm_pool_backup() with the zero-copy ttm_backup_insert_folio() path. On success NR_GPU_ACTIVE is decremented and the caller's reference is released; shmem takes ownership. The alloc_gfp argument used for allocating shmem backing pages is no longer needed. If insertion fails for a higher-order page, it is split into order-0 pages with ttm_pool_split_for_swap() and the loop retries each page individually. Assisted-by: GitHub_Copilot:claude-sonnet-4.6 Signed-off-by: Thomas Hellström --- drivers/gpu/drm/ttm/ttm_backup.c | 92 ++++++++++++-------------------- drivers/gpu/drm/ttm/ttm_pool.c | 67 ++++++++++++++++------- include/drm/ttm/ttm_backup.h | 11 ++-- 3 files changed, 87 insertions(+), 83 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_backup.c b/drivers/gpu/drm/ttm/ttm_backup.c index 81df4cb5606b..a37e9404b895 100644 --- a/drivers/gpu/drm/ttm/ttm_backup.c +++ b/drivers/gpu/drm/ttm/ttm_backup.c @@ -6,7 +6,6 @@ #include #include -#include #include /* @@ -68,73 +67,50 @@ int ttm_backup_copy_page(struct file *backup, struct page *dst, } /** - * ttm_backup_backup_page() - Backup a page + * ttm_backup_insert_folio() - Zero-copy insert of an isolated folio into backup. * @backup: The struct backup pointer to use. - * @page: The page to back up. - * @writeback: Whether to perform immediate writeback of the page. - * This may have performance implications. - * @idx: A unique integer for each page and each struct backup. - * This allows the backup implementation to avoid managing - * its address space separately. - * @page_gfp: The gfp value used when the page was allocated. - * This is used for accounting purposes. - * @alloc_gfp: The gfp to be used when allocating memory. + * @folio: The folio to insert. Must be isolated (not on LRU), unlocked, + * have exactly one reference (the caller's), and have no page-table + * mappings. The folio must not be swapbacked or in the swapcache, + * and folio->private must have been cleared by the caller. + * @order: The allocation order of @folio. If @order > 0 and @folio is not + * already a large folio, it is promoted to a compound folio of this + * order (see shmem_insert_folio()). split_page() must NOT have been + * called; tail-page refcounts must be 0. + * @writeback: Whether to attempt immediate writeback to swap after insertion. + * Best-effort; failure is silently ignored. + * @idx: Page-cache index within @backup. Must be aligned to (1 << @order). + * @folio_gfp: The gfp value used when the folio was allocated. + * Used for memory-cgroup charging. * - * Context: If called from reclaim context, the caller needs to - * assert that the shrinker gfp has __GFP_FS set, to avoid - * deadlocking on lock_page(). If @writeback is set to true and - * called from reclaim context, the caller also needs to assert - * that the shrinker gfp has __GFP_IO set, since without it, - * we're not allowed to start backup IO. + * Context: May be called from reclaim context. If @writeback is true, the + * caller must assert that the shrinker gfp has __GFP_IO set. * - * Return: A handle on success. Negative error code on failure. + * The folio is transferred zero-copy into the shmem page cache. On success + * the caller should release their reference with folio_put() and track the + * handle for later recovery via ttm_backup_copy_page() and release via + * ttm_backup_drop(). Handles for sub-pages of a compound folio follow + * sequentially: handle + j addresses sub-page j. * - * Note: This function could be extended to back up a folio and - * implementations would then split the folio internally if needed. - * Drawback is that the caller would then have to keep track of - * the folio size- and usage. + * Return: A positive handle on success. Negative error code on failure; + * the folio is returned to its original non-compound state and the + * caller retains ownership. */ s64 -ttm_backup_backup_page(struct file *backup, struct page *page, - bool writeback, pgoff_t idx, gfp_t page_gfp, - gfp_t alloc_gfp) +ttm_backup_insert_folio(struct file *backup, struct folio *folio, + unsigned int order, bool writeback, pgoff_t idx, + gfp_t folio_gfp) { - struct address_space *mapping = backup->f_mapping; - unsigned long handle = 0; - struct folio *to_folio; int ret; - to_folio = shmem_read_folio_gfp(mapping, idx, alloc_gfp); - if (IS_ERR(to_folio)) - return PTR_ERR(to_folio); - - folio_mark_accessed(to_folio); - folio_lock(to_folio); - folio_mark_dirty(to_folio); - copy_highpage(folio_file_page(to_folio, idx), page); - handle = ttm_backup_shmem_idx_to_handle(idx); - - if (writeback && !folio_mapped(to_folio) && - folio_clear_dirty_for_io(to_folio)) { - folio_set_reclaim(to_folio); - ret = shmem_writeout(to_folio, NULL, NULL); - if (!folio_test_writeback(to_folio)) - folio_clear_reclaim(to_folio); - /* - * If writeout succeeds, it unlocks the folio. errors - * are otherwise dropped, since writeout is only best - * effort here. - */ - if (ret) - folio_unlock(to_folio); - } else { - folio_unlock(to_folio); - } - - folio_put(to_folio); - - return handle; + WARN_ON_ONCE(folio_get_private(folio)); + ret = shmem_insert_folio(backup, folio, order, idx, writeback, folio_gfp); + if (ret) + return ret; + + return ttm_backup_shmem_idx_to_handle(idx); } +EXPORT_SYMBOL_GPL(ttm_backup_insert_folio); /** * ttm_backup_fini() - Free the struct backup resources after last use. diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c index d380a3c7fe40..8ea3a125c465 100644 --- a/drivers/gpu/drm/ttm/ttm_pool.c +++ b/drivers/gpu/drm/ttm/ttm_pool.c @@ -487,7 +487,7 @@ static void ttm_pool_split_for_swap(struct ttm_pool *pool, struct page *p) /** * DOC: Partial backup and restoration of a struct ttm_tt. * - * Swapout using ttm_backup_backup_page() and swapin using + * Swapout using ttm_backup_insert_folio() and swapin using * ttm_backup_copy_page() may fail. * The former most likely due to lack of swap-space or memory, the latter due * to lack of memory or because of signal interruption during waits. @@ -1045,12 +1045,11 @@ long ttm_pool_backup(struct ttm_pool *pool, struct ttm_tt *tt, { struct file *backup = tt->backup; struct page *page; - unsigned long handle; - gfp_t alloc_gfp; gfp_t gfp; int ret = 0; pgoff_t shrunken = 0; - pgoff_t i, num_pages; + pgoff_t i, num_pages, npages; + unsigned long j; if (WARN_ON(ttm_tt_is_backed_up(tt))) return -EINVAL; @@ -1070,7 +1069,8 @@ long ttm_pool_backup(struct ttm_pool *pool, struct ttm_tt *tt, unsigned int order; page = tt->pages[i]; - if (unlikely(!page)) { + if (unlikely(!page || + ttm_backup_page_ptr_is_handle(page))) { num_pages = 1; continue; } @@ -1098,34 +1098,63 @@ long ttm_pool_backup(struct ttm_pool *pool, struct ttm_tt *tt, else gfp = GFP_HIGHUSER; - alloc_gfp = GFP_KERNEL | __GFP_HIGH | __GFP_NOWARN | __GFP_RETRY_MAYFAIL; - num_pages = tt->num_pages; /* Pretend doing fault injection by shrinking only half of the pages. */ if (IS_ENABLED(CONFIG_FAULT_INJECTION) && should_fail(&backup_fault_inject, 1)) num_pages = DIV_ROUND_UP(num_pages, 2); - for (i = 0; i < num_pages; ++i) { - s64 shandle; + for (i = 0; i < num_pages; i += npages) { + unsigned int order; + s64 handle; + npages = 1; page = tt->pages[i]; if (unlikely(!page)) continue; - ttm_pool_split_for_swap(pool, page); + /* Already-handled entry from a previous attempt. */ + if (unlikely(ttm_backup_page_ptr_is_handle(page))) + continue; + + order = ttm_pool_page_order(pool, page); + npages = 1UL << order; - shandle = ttm_backup_backup_page(backup, page, flags->writeback, i, - gfp, alloc_gfp); - if (shandle < 0) { - /* We allow partially shrunken tts */ - ret = shandle; + /* + * If fault injection truncated num_pages mid-compound, skip + * the partial tail rather than inserting it. + */ + if (unlikely(i + npages > num_pages)) + break; + + /* + * Transfer this page zero-copy into shmem. page->private + * stores the TTM order; clear it before inserting. + */ + page->private = 0; + handle = ttm_backup_insert_folio(backup, page_folio(page), + order, flags->writeback, + i, gfp); + if (unlikely(handle < 0)) { + if (order) { + page->private = order; + ttm_pool_split_for_swap(pool, page); + npages = 0; + continue; + } + ret = (int)handle; break; } - handle = shandle; - tt->pages[i] = ttm_backup_handle_to_page_ptr(handle); - __free_pages_gpu_account(page, 0, false); - shrunken++; + + /* + * NR_GPU_ACTIVE is node-only; use mod_node_page_state() + * directly after the folio becomes memcg-charged. + */ + mod_node_page_state(page_pgdat(page), NR_GPU_ACTIVE, -(1 << order)); + folio_put(page_folio(page)); + for (j = 0; j < npages; j++) + tt->pages[i + j] = ttm_backup_handle_to_page_ptr(handle + j); + shrunken += npages; } return shrunken ? shrunken : ret; diff --git a/include/drm/ttm/ttm_backup.h b/include/drm/ttm/ttm_backup.h index 29b9c855af77..0c2feed0bffb 100644 --- a/include/drm/ttm/ttm_backup.h +++ b/include/drm/ttm/ttm_backup.h @@ -13,9 +13,8 @@ * ttm_backup_handle_to_page_ptr() - Convert handle to struct page pointer * @handle: The handle to convert. * - * Converts an opaque handle received from the - * ttm_backup_backup_page() function to an (invalid) - * struct page pointer suitable for a struct page array. + * Converts an opaque handle received from ttm_backup_insert_folio() + * function to an (invalid) struct page pointer suitable for a struct page array. * * Return: An (invalid) struct page pointer. */ @@ -59,9 +58,9 @@ int ttm_backup_copy_page(struct file *backup, struct page *dst, pgoff_t handle, bool intr, gfp_t additional_gfp); s64 -ttm_backup_backup_page(struct file *backup, struct page *page, - bool writeback, pgoff_t idx, gfp_t page_gfp, - gfp_t alloc_gfp); +ttm_backup_insert_folio(struct file *backup, struct folio *folio, + unsigned int order, bool writeback, pgoff_t idx, + gfp_t folio_gfp); void ttm_backup_fini(struct file *backup); -- 2.54.0