Architectures frequently only care about the address associated with a page table. The current ptdesc api forced callers to acquire a ptdesc to use them. Add more apis to abstract ptdescs away from architectures that don't need the descriptor. This patch adds pgtable_alloc() and pgtable_free() to operate on the underlying addresses associated with page table descriptors, similar to get_free_pages() and free_pages(). The allocations will be zeroed since theres no reason to want a page table with stale data. Suggested-by: Dave Hansen Signed-off-by: Vishal Moola (Oracle) --- include/linux/mm.h | 4 ++++ mm/memory.c | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+) diff --git a/include/linux/mm.h b/include/linux/mm.h index f8a8fd47399c..3f3000567823 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3419,6 +3419,10 @@ static inline void __pagetable_free(struct ptdesc *pt) __free_pages(page, compound_order(page)); } +unsigned long pgtable_alloc_addr_noprof(gfp_t gfp, unsigned int order); +#define pgtable_alloc_addr(...) alloc_hooks(pgtable_alloc_addr_noprof(__VA_ARGS__)) +void pgtable_free_addr(const void *addr); + #ifdef CONFIG_ASYNC_KERNEL_PGTABLE_FREE void pagetable_free_kernel(struct ptdesc *pt); #else diff --git a/mm/memory.c b/mm/memory.c index d6d273eb2189..96c4c4d06aa1 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -7451,6 +7451,40 @@ long copy_folio_from_user(struct folio *dst_folio, } #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */ +/** + * pgtable_alloc_addr - Allocate pagetables to get an address + * @gfp: GFP flags + * @order: desired pagetable order + * + * pgtable_alloc_addr is like pagetable_alloc. This is for callers who only want a + * page table's address, not its ptdesc. + * + * Return: The address associated with the allocated page table, or 0 on + * failure. + */ +unsigned long pgtable_alloc_addr_noprof(gfp_t gfp, unsigned int order) +{ + struct ptdesc *ptdesc = pagetable_alloc_noprof(gfp | __GFP_ZERO, order); + + if (!ptdesc) + return 0; + return (unsigned long) ptdesc_address(ptdesc); +} + +/** + * pgtable_free_addr - Free pagetables by address + * @addr: The virtual address from pgtable_alloc() + * + * This function is for callers who have the address but no ptdesc. If you + * have the ptdesc, use pagetable_free() instead. + */ +void pgtable_free_addr(const void *addr) +{ + struct ptdesc *ptdesc = virt_to_ptdesc(addr); + + pagetable_free(ptdesc); +} + #if defined(CONFIG_SPLIT_PTE_PTLOCKS) && ALLOC_SPLIT_PTLOCKS static struct kmem_cache *page_ptl_cachep; -- 2.52.0 We need all allocation and free sites to use the ptdesc APIs in order to allocate them separately from regular pages. Convert these pte allocation/free sites to use ptdescs. Also, rename *_pte_page() functions to *_pte(). Rename them now to avoid any confusion later. Eventually these allocations will be backed by a ptdesc not a page, but that's not important to callers either. Signed-off-by: Vishal Moola (Oracle) --- arch/x86/mm/pat/set_memory.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index 6c6eb486f7a6..c6c68fbbb046 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -1400,7 +1400,7 @@ static int collapse_large_pages(unsigned long addr, struct list_head *pgtables) return collapsed; } -static bool try_to_free_pte_page(pte_t *pte) +static bool try_to_free_pte(pte_t *pte) { int i; @@ -1408,7 +1408,7 @@ static bool try_to_free_pte_page(pte_t *pte) if (!pte_none(pte[i])) return false; - free_page((unsigned long)pte); + pgtable_free_addr(pte); return true; } @@ -1435,7 +1435,7 @@ static bool unmap_pte_range(pmd_t *pmd, unsigned long start, unsigned long end) pte++; } - if (try_to_free_pte_page((pte_t *)pmd_page_vaddr(*pmd))) { + if (try_to_free_pte((pte_t *)pmd_page_vaddr(*pmd))) { pmd_clear(pmd); return true; } @@ -1537,9 +1537,9 @@ static void unmap_pud_range(p4d_t *p4d, unsigned long start, unsigned long end) */ } -static int alloc_pte_page(pmd_t *pmd) +static int alloc_pte(pmd_t *pmd) { - pte_t *pte = (pte_t *)get_zeroed_page(GFP_KERNEL); + pte_t *pte = (pte_t *) pgtable_alloc_addr(GFP_KERNEL, 0); if (!pte) return -1; @@ -1600,7 +1600,7 @@ static long populate_pmd(struct cpa_data *cpa, */ pmd = pmd_offset(pud, start); if (pmd_none(*pmd)) - if (alloc_pte_page(pmd)) + if (alloc_pte(pmd)) return -1; populate_pte(cpa, start, pre_end, cur_pages, pmd, pgprot); @@ -1641,7 +1641,7 @@ static long populate_pmd(struct cpa_data *cpa, if (start < end) { pmd = pmd_offset(pud, start); if (pmd_none(*pmd)) - if (alloc_pte_page(pmd)) + if (alloc_pte(pmd)) return -1; populate_pte(cpa, start, end, num_pages - cur_pages, -- 2.52.0 We need all allocation and free sites to use the ptdesc APIs in order to allocate them separately from regular pages. Convert these pmd allocation/free sites to use ptdescs. populate_pgd() also allocates pagetables that may later be freed by try_to_free_pmd_page(), so allocate ptdescs there as well. Also, rename *_pmd_page() functions to *_pmd(). Rename them now to avoid any confusion later. Eventually these allocations will be backed by a ptdesc not a page, but that's not important to callers either. Signed-off-by: Vishal Moola (Oracle) --- arch/x86/mm/pat/set_memory.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index c6c68fbbb046..dfe05cdf460c 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -1412,7 +1412,7 @@ static bool try_to_free_pte(pte_t *pte) return true; } -static bool try_to_free_pmd_page(pmd_t *pmd) +static bool try_to_free_pmd(pmd_t *pmd) { int i; @@ -1420,7 +1420,7 @@ static bool try_to_free_pmd_page(pmd_t *pmd) if (!pmd_none(pmd[i])) return false; - free_page((unsigned long)pmd); + pgtable_free_addr(pmd); return true; } @@ -1446,7 +1446,7 @@ static void __unmap_pmd_range(pud_t *pud, pmd_t *pmd, unsigned long start, unsigned long end) { if (unmap_pte_range(pmd, start, end)) - if (try_to_free_pmd_page(pud_pgtable(*pud))) + if (try_to_free_pmd(pud_pgtable(*pud))) pud_clear(pud); } @@ -1490,7 +1490,7 @@ static void unmap_pmd_range(pud_t *pud, unsigned long start, unsigned long end) * Try again to free the PMD page if haven't succeeded above. */ if (!pud_none(*pud)) - if (try_to_free_pmd_page(pud_pgtable(*pud))) + if (try_to_free_pmd(pud_pgtable(*pud))) pud_clear(pud); } @@ -1547,9 +1547,9 @@ static int alloc_pte(pmd_t *pmd) return 0; } -static int alloc_pmd_page(pud_t *pud) +static int alloc_pmd(pud_t *pud) { - pmd_t *pmd = (pmd_t *)get_zeroed_page(GFP_KERNEL); + pmd_t *pmd = (pmd_t *) pgtable_alloc_addr(GFP_KERNEL, 0); if (!pmd) return -1; @@ -1622,7 +1622,7 @@ static long populate_pmd(struct cpa_data *cpa, * We cannot use a 1G page so allocate a PMD page if needed. */ if (pud_none(*pud)) - if (alloc_pmd_page(pud)) + if (alloc_pmd(pud)) return -1; pmd = pmd_offset(pud, start); @@ -1678,7 +1678,7 @@ static int populate_pud(struct cpa_data *cpa, unsigned long start, p4d_t *p4d, * Need a PMD page? */ if (pud_none(*pud)) - if (alloc_pmd_page(pud)) + if (alloc_pmd(pud)) return -1; cur_pages = populate_pmd(cpa, start, pre_end, cur_pages, @@ -1715,7 +1715,7 @@ static int populate_pud(struct cpa_data *cpa, unsigned long start, p4d_t *p4d, pud = pud_offset(p4d, start); if (pud_none(*pud)) - if (alloc_pmd_page(pud)) + if (alloc_pmd(pud)) return -1; tmp = populate_pmd(cpa, start, end, cpa->numpages - cur_pages, @@ -1743,7 +1743,7 @@ static int populate_pgd(struct cpa_data *cpa, unsigned long addr) pgd_entry = cpa->pgd + pgd_index(addr); if (pgd_none(*pgd_entry)) { - p4d = (p4d_t *)get_zeroed_page(GFP_KERNEL); + p4d = (p4d_t *)pgtable_alloc_addr(GFP_KERNEL, 0); if (!p4d) return -1; @@ -1755,7 +1755,7 @@ static int populate_pgd(struct cpa_data *cpa, unsigned long addr) */ p4d = p4d_offset(pgd_entry, addr); if (p4d_none(*p4d)) { - pud = (pud_t *)get_zeroed_page(GFP_KERNEL); + pud = (pud_t *)pgtable_alloc_addr(GFP_KERNEL, 0); if (!pud) return -1; -- 2.52.0 In order to separately allocate ptdescs from pages, we need all allocation and free sites to use the appropriate functions. split_large_page() allocates a page to be used as a page table. This should be allocating a ptdesc, so convert it. Signed-off-by: Vishal Moola (Oracle) Acked-by: Mike Rapoport (Microsoft) --- arch/x86/mm/pat/set_memory.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index dfe05cdf460c..dd17034a3c58 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -1119,9 +1119,10 @@ static void split_set_pte(struct cpa_data *cpa, pte_t *pte, unsigned long pfn, static int __split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address, - struct page *base) + struct ptdesc *ptdesc) { unsigned long lpaddr, lpinc, ref_pfn, pfn, pfninc = 1; + struct page *base = ptdesc_page(ptdesc); pte_t *pbase = (pte_t *)page_address(base); unsigned int i, level; pgprot_t ref_prot; @@ -1226,18 +1227,18 @@ __split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address, static int split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address) { - struct page *base; + struct ptdesc *ptdesc; if (!debug_pagealloc_enabled()) spin_unlock(&cpa_lock); - base = alloc_pages(GFP_KERNEL, 0); + ptdesc = pagetable_alloc(GFP_KERNEL, 0); if (!debug_pagealloc_enabled()) spin_lock(&cpa_lock); - if (!base) + if (!ptdesc) return -ENOMEM; - if (__split_large_page(cpa, kpte, address, base)) - __free_page(base); + if (__split_large_page(cpa, kpte, address, ptdesc)) + pagetable_free(ptdesc); return 0; } -- 2.52.0