Create a slab cache for ptdescs and point to the struct page from the ptdesc. Remove all the padding from ptdesc that makes it line up with struct page. Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/mm.h | 1 + include/linux/mm_types.h | 50 ++++------------------------------------ mm/internal.h | 1 + mm/memory.c | 35 ++++++++++++++++++++++++---- mm/mm_init.c | 1 + 5 files changed, 37 insertions(+), 51 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index e60b181da3df..e8bb52061b0c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2970,6 +2970,7 @@ static inline struct ptdesc *page_ptdesc(const struct page *page) * The high bits are used for information like zone/node/section. */ enum pt_flags { + /* Bits 0-3 used for pt_order */ PT_reserved = PG_reserved, }; diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index f5d9e0afe0fa..efdf29b8b478 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -548,38 +548,30 @@ FOLIO_MATCH(compound_head, _head_3); /** * struct ptdesc - Memory descriptor for page tables. * @pt_flags: enum pt_flags plus zone/node/section. + * @pt_page: page allocated to store page table entries. * @pt_rcu_head: For freeing page table pages. * @pt_list: List of used page tables. Used for s390 gmap shadow pages * (which are not linked into the user page tables) and x86 * pgds. - * @_pt_pad_1: Padding that aliases with page's compound head. * @pmd_huge_pte: Protected by ptdesc->ptl, used for THPs. - * @__page_mapping: Aliases with page->mapping. Unused for page tables. * @pt_index: Used for s390 gmap. * @pt_mm: Used for x86 pgds. * @pt_frag_refcount: For fragmented page table tracking. Powerpc only. * @pt_share_count: Used for HugeTLB PMD page table share count. - * @_pt_pad_2: Padding to ensure proper alignment. * @ptl: Lock for the page table. - * @__page_type: Same as page->page_type. Unused for page tables. - * @__page_refcount: Same as page refcount. - * @pt_memcg_data: Memcg data. Tracked for page tables here. * * This struct overlays struct page for now. Do not modify without a good * understanding of the issues. */ struct ptdesc { memdesc_flags_t pt_flags; + struct page *pt_page; union { struct rcu_head pt_rcu_head; struct list_head pt_list; - struct { - unsigned long _pt_pad_1; - pgtable_t pmd_huge_pte; - }; + pgtable_t pmd_huge_pte; }; - unsigned long __page_mapping; union { pgoff_t pt_index; @@ -591,47 +583,13 @@ struct ptdesc { }; union { - unsigned long _pt_pad_2; #if ALLOC_SPLIT_PTLOCKS spinlock_t *ptl; #else spinlock_t ptl; #endif }; - unsigned int __page_type; - atomic_t __page_refcount; -#ifdef CONFIG_MEMCG - unsigned long pt_memcg_data; -#endif -}; - -#define TABLE_MATCH(pg, pt) \ - static_assert(offsetof(struct page, pg) == offsetof(struct ptdesc, pt)) -TABLE_MATCH(flags, pt_flags); -TABLE_MATCH(compound_head, pt_list); -TABLE_MATCH(compound_head, _pt_pad_1); -TABLE_MATCH(mapping, __page_mapping); -TABLE_MATCH(__folio_index, pt_index); -TABLE_MATCH(rcu_head, pt_rcu_head); -TABLE_MATCH(page_type, __page_type); -TABLE_MATCH(_refcount, __page_refcount); -#ifdef CONFIG_MEMCG -TABLE_MATCH(memcg_data, pt_memcg_data); -#endif -#undef TABLE_MATCH -static_assert(sizeof(struct ptdesc) <= sizeof(struct page)); - -#define ptdesc_page(pt) (_Generic((pt), \ - const struct ptdesc *: (const struct page *)(pt), \ - struct ptdesc *: (struct page *)(pt))) - -#define ptdesc_folio(pt) (_Generic((pt), \ - const struct ptdesc *: (const struct folio *)(pt), \ - struct ptdesc *: (struct folio *)(pt))) - -#define page_ptdesc(p) (_Generic((p), \ - const struct page *: (const struct ptdesc *)(p), \ - struct page *: (struct ptdesc *)(p))) +} __aligned(16); #ifdef CONFIG_HUGETLB_PMD_PAGE_TABLE_SHARING static inline void ptdesc_pmd_pts_init(struct ptdesc *ptdesc) diff --git a/mm/internal.h b/mm/internal.h index 15d64601289b..d57487ba443d 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -100,6 +100,7 @@ struct pagetable_move_control { unlikely(__ret_warn_once); \ }) +void __init ptcache_init(void); void page_writeback_init(void); /* diff --git a/mm/memory.c b/mm/memory.c index 47eb5834db23..331582bec495 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -7267,10 +7267,17 @@ long copy_folio_from_user(struct folio *dst_folio, } #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */ +static struct kmem_cache *ptcache; + +void __init ptcache_init(void) +{ + ptcache = KMEM_CACHE(ptdesc, 0); +} + /** * pagetable_alloc - Allocate pagetables * @gfp: GFP flags - * @order: desired pagetable order + * @order: pagetable order * * pagetable_alloc allocates memory for page tables as well as a page table * descriptor to describe that memory. @@ -7279,16 +7286,34 @@ long copy_folio_from_user(struct folio *dst_folio, */ struct ptdesc *pagetable_alloc_noprof(gfp_t gfp, unsigned int order) { - struct page *page = alloc_frozen_pages_noprof(gfp | __GFP_COMP, order); + struct page *page; pg_data_t *pgdat; + struct ptdesc *ptdesc; + + BUG_ON(!ptcache); - if (!page) + ptdesc = kmem_cache_alloc(ptcache, gfp); + if (!ptdesc) return NULL; + page = alloc_pages_memdesc(gfp, order, + memdesc_create(ptdesc, MEMDESC_TYPE_PAGE_TABLE)); + if (!page) { + kmem_cache_free(ptcache, ptdesc); + return NULL; + } + + VM_BUG_ON_PAGE(memdesc_type(page->memdesc) != MEMDESC_TYPE_PAGE_TABLE, page); pgdat = NODE_DATA(page_to_nid(page)); mod_node_page_state(pgdat, NR_PAGETABLE, 1 << order); __SetPageTable(page); - return page_ptdesc(page); + page->__folio_index = (unsigned long)ptdesc; + + ptdesc->pt_flags = page->flags; + ptdesc->pt_flags.f |= order; + ptdesc->pt_page = page; + + return ptdesc; } /** @@ -7302,7 +7327,7 @@ void pagetable_free(struct ptdesc *pt) { pg_data_t *pgdat = NODE_DATA(memdesc_nid(pt->pt_flags)); struct page *page = ptdesc_page(pt); - unsigned int order = compound_order(page); + unsigned int order = pt->pt_flags.f & 0xf; mod_node_page_state(pgdat, NR_PAGETABLE, -(1L << order)); __ClearPageTable(page); diff --git a/mm/mm_init.c b/mm/mm_init.c index 3db2dea7db4c..dc6d2f81b692 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -2712,6 +2712,7 @@ void __init mm_core_init(void) */ page_ext_init_flatmem_late(); kmemleak_init(); + ptcache_init(); ptlock_cache_init(); pgtable_cache_init(); debug_objects_mem_init(); -- 2.47.2