Simplify alloc_fresh_hugetlb_folio() and convert more functions to use it, which help us to remove prep_new_hugetlb_folio() and __prep_new_hugetlb_folio(). Reviewed-by: Sidhartha Kumar Signed-off-by: Kefeng Wang --- mm/hugetlb.c | 44 +++++++++++++------------------------------- 1 file changed, 13 insertions(+), 31 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 572b6f777284..93aa32c51265 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1906,20 +1906,6 @@ static void init_new_hugetlb_folio(struct hstate *h, struct folio *folio) set_hugetlb_cgroup_rsvd(folio, NULL); } -static void __prep_new_hugetlb_folio(struct hstate *h, struct folio *folio) -{ - init_new_hugetlb_folio(h, folio); - hugetlb_vmemmap_optimize_folio(h, folio); -} - -static void prep_new_hugetlb_folio(struct hstate *h, struct folio *folio, int nid) -{ - __prep_new_hugetlb_folio(h, folio); - spin_lock_irq(&hugetlb_lock); - __prep_account_new_huge_page(h, nid); - spin_unlock_irq(&hugetlb_lock); -} - /* * Find and lock address space (mapping) in write mode. * @@ -2005,10 +1991,10 @@ static struct folio *only_alloc_fresh_hugetlb_folio(struct hstate *h, } /* - * Common helper to allocate a fresh hugetlb page. All specific allocators - * should use this function to get new hugetlb pages + * Common helper to allocate a fresh hugetlb folio. All specific allocators + * should use this function to get new hugetlb folio * - * Note that returned page is 'frozen': ref count of head page and all tail + * Note that returned folio is 'frozen': ref count of head page and all tail * pages is zero. */ static struct folio *alloc_fresh_hugetlb_folio(struct hstate *h, @@ -2016,14 +2002,9 @@ static struct folio *alloc_fresh_hugetlb_folio(struct hstate *h, { struct folio *folio; - if (hstate_is_gigantic(h)) - folio = alloc_gigantic_folio(h, gfp_mask, nid, nmask); - else - folio = alloc_buddy_hugetlb_folio(h, gfp_mask, nid, nmask, NULL); - if (!folio) - return NULL; - - prep_new_hugetlb_folio(h, folio, folio_nid(folio)); + folio = only_alloc_fresh_hugetlb_folio(h, gfp_mask, nid, nmask, NULL); + if (folio) + hugetlb_vmemmap_optimize_folio(h, folio); return folio; } @@ -2241,12 +2222,10 @@ static struct folio *alloc_surplus_hugetlb_folio(struct hstate *h, goto out_unlock; spin_unlock_irq(&hugetlb_lock); - folio = only_alloc_fresh_hugetlb_folio(h, gfp_mask, nid, nmask, NULL); + folio = alloc_fresh_hugetlb_folio(h, gfp_mask, nid, nmask); if (!folio) return NULL; - hugetlb_vmemmap_optimize_folio(h, folio); - spin_lock_irq(&hugetlb_lock); /* * nr_huge_pages needs to be adjusted within the same lock cycle @@ -2290,6 +2269,10 @@ static struct folio *alloc_migrate_hugetlb_folio(struct hstate *h, gfp_t gfp_mas if (!folio) return NULL; + spin_lock_irq(&hugetlb_lock); + __prep_account_new_huge_page(h, folio_nid(folio)); + spin_unlock_irq(&hugetlb_lock); + /* fresh huge pages are frozen */ folio_ref_unfreeze(folio, 1); /* @@ -2836,11 +2819,10 @@ static int alloc_and_dissolve_hugetlb_folio(struct folio *old_folio, if (!new_folio) { spin_unlock_irq(&hugetlb_lock); gfp_mask = htlb_alloc_mask(h) | __GFP_THISNODE; - new_folio = alloc_buddy_hugetlb_folio(h, gfp_mask, nid, - NULL, NULL); + new_folio = alloc_fresh_hugetlb_folio(h, gfp_mask, + nid, NULL); if (!new_folio) return -ENOMEM; - __prep_new_hugetlb_folio(h, new_folio); goto retry; } -- 2.27.0 In order to avoid the wrong nid passed into the account, it's better to move folio_nid() into account_new_hugetlb_folio(). Reviewed-by: Sidhartha Kumar Signed-off-by: Kefeng Wang --- mm/hugetlb.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 93aa32c51265..4131467fc1cd 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1890,11 +1890,11 @@ void free_huge_folio(struct folio *folio) /* * Must be called with the hugetlb lock held */ -static void __prep_account_new_huge_page(struct hstate *h, int nid) +static void account_new_hugetlb_folio(struct hstate *h, struct folio *folio) { lockdep_assert_held(&hugetlb_lock); h->nr_huge_pages++; - h->nr_huge_pages_node[nid]++; + h->nr_huge_pages_node[folio_nid(folio)]++; } static void init_new_hugetlb_folio(struct hstate *h, struct folio *folio) @@ -2020,7 +2020,7 @@ static void prep_and_add_allocated_folios(struct hstate *h, /* Add all new pool pages to free lists in one lock cycle */ spin_lock_irqsave(&hugetlb_lock, flags); list_for_each_entry_safe(folio, tmp_f, folio_list, lru) { - __prep_account_new_huge_page(h, folio_nid(folio)); + account_new_hugetlb_folio(h, folio); enqueue_hugetlb_folio(h, folio); } spin_unlock_irqrestore(&hugetlb_lock, flags); @@ -2232,7 +2232,7 @@ static struct folio *alloc_surplus_hugetlb_folio(struct hstate *h, * as surplus_pages, otherwise it might confuse * persistent_huge_pages() momentarily. */ - __prep_account_new_huge_page(h, folio_nid(folio)); + account_new_hugetlb_folio(h, folio); /* * We could have raced with the pool size change. @@ -2270,7 +2270,7 @@ static struct folio *alloc_migrate_hugetlb_folio(struct hstate *h, gfp_t gfp_mas return NULL; spin_lock_irq(&hugetlb_lock); - __prep_account_new_huge_page(h, folio_nid(folio)); + account_new_hugetlb_folio(h, folio); spin_unlock_irq(&hugetlb_lock); /* fresh huge pages are frozen */ @@ -2829,7 +2829,7 @@ static int alloc_and_dissolve_hugetlb_folio(struct folio *old_folio, /* * Ok, old_folio is still a genuine free hugepage. Remove it from * the freelist and decrease the counters. These will be - * incremented again when calling __prep_account_new_huge_page() + * incremented again when calling account_new_hugetlb_folio() * and enqueue_hugetlb_folio() for new_folio. The counters will * remain stable since this happens under the lock. */ @@ -2839,7 +2839,7 @@ static int alloc_and_dissolve_hugetlb_folio(struct folio *old_folio, * Ref count on new_folio is already zero as it was dropped * earlier. It can be directly added to the pool free list. */ - __prep_account_new_huge_page(h, nid); + account_new_hugetlb_folio(h, new_folio); enqueue_hugetlb_folio(h, new_folio); /* @@ -3309,7 +3309,7 @@ static void __init prep_and_add_bootmem_folios(struct hstate *h, hugetlb_bootmem_init_migratetype(folio, h); /* Subdivide locks to achieve better parallel performance */ spin_lock_irqsave(&hugetlb_lock, flags); - __prep_account_new_huge_page(h, folio_nid(folio)); + account_new_hugetlb_folio(h, folio); enqueue_hugetlb_folio(h, folio); spin_unlock_irqrestore(&hugetlb_lock, flags); } -- 2.27.0 Use order instead of struct hstate to remove huge_page_order() call from all hugetlb folio allocation. Reviewed-by: Sidhartha Kumar Reviewed-by: Jane Chu Signed-off-by: Kefeng Wang --- mm/hugetlb.c | 27 +++++++++++++-------------- mm/hugetlb_cma.c | 3 +-- mm/hugetlb_cma.h | 6 +++--- 3 files changed, 17 insertions(+), 19 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 4131467fc1cd..5c93faf82674 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1473,17 +1473,16 @@ static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed) #ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE #ifdef CONFIG_CONTIG_ALLOC -static struct folio *alloc_gigantic_folio(struct hstate *h, gfp_t gfp_mask, +static struct folio *alloc_gigantic_folio(int order, gfp_t gfp_mask, int nid, nodemask_t *nodemask) { struct folio *folio; - int order = huge_page_order(h); bool retried = false; if (nid == NUMA_NO_NODE) nid = numa_mem_id(); retry: - folio = hugetlb_cma_alloc_folio(h, gfp_mask, nid, nodemask); + folio = hugetlb_cma_alloc_folio(order, gfp_mask, nid, nodemask); if (!folio) { if (hugetlb_cma_exclusive_alloc()) return NULL; @@ -1506,16 +1505,16 @@ static struct folio *alloc_gigantic_folio(struct hstate *h, gfp_t gfp_mask, } #else /* !CONFIG_CONTIG_ALLOC */ -static struct folio *alloc_gigantic_folio(struct hstate *h, gfp_t gfp_mask, - int nid, nodemask_t *nodemask) +static struct folio *alloc_gigantic_folio(int order, gfp_t gfp_mask, int nid, + nodemask_t *nodemask) { return NULL; } #endif /* CONFIG_CONTIG_ALLOC */ #else /* !CONFIG_ARCH_HAS_GIGANTIC_PAGE */ -static struct folio *alloc_gigantic_folio(struct hstate *h, gfp_t gfp_mask, - int nid, nodemask_t *nodemask) +static struct folio *alloc_gigantic_folio(int order, gfp_t gfp_mask, int nid, + nodemask_t *nodemask) { return NULL; } @@ -1926,11 +1925,9 @@ struct address_space *hugetlb_folio_mapping_lock_write(struct folio *folio) return NULL; } -static struct folio *alloc_buddy_hugetlb_folio(struct hstate *h, - gfp_t gfp_mask, int nid, nodemask_t *nmask, - nodemask_t *node_alloc_noretry) +static struct folio *alloc_buddy_hugetlb_folio(int order, gfp_t gfp_mask, + int nid, nodemask_t *nmask, nodemask_t *node_alloc_noretry) { - int order = huge_page_order(h); struct folio *folio; bool alloc_try_hard = true; @@ -1980,11 +1977,13 @@ static struct folio *only_alloc_fresh_hugetlb_folio(struct hstate *h, nodemask_t *node_alloc_noretry) { struct folio *folio; + int order = huge_page_order(h); - if (hstate_is_gigantic(h)) - folio = alloc_gigantic_folio(h, gfp_mask, nid, nmask); + if (order > MAX_PAGE_ORDER) + folio = alloc_gigantic_folio(order, gfp_mask, nid, nmask); else - folio = alloc_buddy_hugetlb_folio(h, gfp_mask, nid, nmask, node_alloc_noretry); + folio = alloc_buddy_hugetlb_folio(order, gfp_mask, nid, nmask, + node_alloc_noretry); if (folio) init_new_hugetlb_folio(h, folio); return folio; diff --git a/mm/hugetlb_cma.c b/mm/hugetlb_cma.c index f58ef4969e7a..e8e4dc7182d5 100644 --- a/mm/hugetlb_cma.c +++ b/mm/hugetlb_cma.c @@ -26,11 +26,10 @@ void hugetlb_cma_free_folio(struct folio *folio) } -struct folio *hugetlb_cma_alloc_folio(struct hstate *h, gfp_t gfp_mask, +struct folio *hugetlb_cma_alloc_folio(int order, gfp_t gfp_mask, int nid, nodemask_t *nodemask) { int node; - int order = huge_page_order(h); struct folio *folio = NULL; if (hugetlb_cma[nid]) diff --git a/mm/hugetlb_cma.h b/mm/hugetlb_cma.h index f7d7fb9880a2..2c2ec8a7e134 100644 --- a/mm/hugetlb_cma.h +++ b/mm/hugetlb_cma.h @@ -4,7 +4,7 @@ #ifdef CONFIG_CMA void hugetlb_cma_free_folio(struct folio *folio); -struct folio *hugetlb_cma_alloc_folio(struct hstate *h, gfp_t gfp_mask, +struct folio *hugetlb_cma_alloc_folio(int order, gfp_t gfp_mask, int nid, nodemask_t *nodemask); struct huge_bootmem_page *hugetlb_cma_alloc_bootmem(struct hstate *h, int *nid, bool node_exact); @@ -18,8 +18,8 @@ static inline void hugetlb_cma_free_folio(struct folio *folio) { } -static inline struct folio *hugetlb_cma_alloc_folio(struct hstate *h, - gfp_t gfp_mask, int nid, nodemask_t *nodemask) +static inline struct folio *hugetlb_cma_alloc_folio(int order, gfp_t gfp_mask, + int nid, nodemask_t *nodemask) { return NULL; } -- 2.27.0 The struct hstate is never used, remove it. Reviewed-by: Sidhartha Kumar Reviewed-by: Jane Chu Signed-off-by: Kefeng Wang --- mm/hugetlb.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 5c93faf82674..ab748964d219 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1896,7 +1896,7 @@ static void account_new_hugetlb_folio(struct hstate *h, struct folio *folio) h->nr_huge_pages_node[folio_nid(folio)]++; } -static void init_new_hugetlb_folio(struct hstate *h, struct folio *folio) +static void init_new_hugetlb_folio(struct folio *folio) { __folio_set_hugetlb(folio); INIT_LIST_HEAD(&folio->lru); @@ -1985,7 +1985,7 @@ static struct folio *only_alloc_fresh_hugetlb_folio(struct hstate *h, folio = alloc_buddy_hugetlb_folio(order, gfp_mask, nid, nmask, node_alloc_noretry); if (folio) - init_new_hugetlb_folio(h, folio); + init_new_hugetlb_folio(folio); return folio; } @@ -3404,7 +3404,7 @@ static void __init gather_bootmem_prealloc_node(unsigned long nid) hugetlb_folio_init_vmemmap(folio, h, HUGETLB_VMEMMAP_RESERVE_PAGES); - init_new_hugetlb_folio(h, folio); + init_new_hugetlb_folio(folio); if (hugetlb_bootmem_page_prehvo(m)) /* @@ -4019,7 +4019,7 @@ static long demote_free_hugetlb_folios(struct hstate *src, struct hstate *dst, prep_compound_page(page, dst->order); new_folio->mapping = NULL; - init_new_hugetlb_folio(dst, new_folio); + init_new_hugetlb_folio(new_folio); /* Copy the CMA flag so that it is freed correctly */ if (cma) folio_set_hugetlb_cma(new_folio); -- 2.27.0 Move the NUMA_NO_NODE check out of buddy and gigantic folio allocation to cleanup code a bit, also this will avoid NUMA_NO_NODE passed as 'nid' to node_isset() in alloc_buddy_hugetlb_folio(). Reviewed-by: Sidhartha Kumar Reviewed-by: Jane Chu Signed-off-by: Kefeng Wang --- mm/hugetlb.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index ab748964d219..4e8709d7deee 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1479,8 +1479,6 @@ static struct folio *alloc_gigantic_folio(int order, gfp_t gfp_mask, struct folio *folio; bool retried = false; - if (nid == NUMA_NO_NODE) - nid = numa_mem_id(); retry: folio = hugetlb_cma_alloc_folio(order, gfp_mask, nid, nodemask); if (!folio) { @@ -1942,8 +1940,6 @@ static struct folio *alloc_buddy_hugetlb_folio(int order, gfp_t gfp_mask, alloc_try_hard = false; if (alloc_try_hard) gfp_mask |= __GFP_RETRY_MAYFAIL; - if (nid == NUMA_NO_NODE) - nid = numa_mem_id(); folio = (struct folio *)__alloc_frozen_pages(gfp_mask, order, nid, nmask); @@ -1979,6 +1975,9 @@ static struct folio *only_alloc_fresh_hugetlb_folio(struct hstate *h, struct folio *folio; int order = huge_page_order(h); + if (nid == NUMA_NO_NODE) + nid = numa_mem_id(); + if (order > MAX_PAGE_ORDER) folio = alloc_gigantic_folio(order, gfp_mask, nid, nmask); else -- 2.27.0 Introduce a ACR_FLAGS_FROZEN flags to indicate that we want to allocate a frozen compound pages by alloc_contig_range(), also provide alloc_contig_frozen_pages() to allocate pages without incrementing their refcount, which may be beneficial to some users (eg hugetlb). Signed-off-by: Kefeng Wang --- include/linux/gfp.h | 6 ++++ mm/page_alloc.c | 85 +++++++++++++++++++++++++-------------------- 2 files changed, 54 insertions(+), 37 deletions(-) diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 5ebf26fcdcfa..d0047b85fe34 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -427,6 +427,7 @@ extern gfp_t vma_thp_gfp_mask(struct vm_area_struct *vma); typedef unsigned int __bitwise acr_flags_t; #define ACR_FLAGS_NONE ((__force acr_flags_t)0) // ordinary allocation request #define ACR_FLAGS_CMA ((__force acr_flags_t)BIT(0)) // allocate for CMA +#define ACR_FLAGS_FROZEN ((__force acr_flags_t)BIT(1)) // allocate for frozen compound pages /* The below functions must be run on a range from a single zone. */ extern int alloc_contig_range_noprof(unsigned long start, unsigned long end, @@ -437,6 +438,11 @@ extern struct page *alloc_contig_pages_noprof(unsigned long nr_pages, gfp_t gfp_ int nid, nodemask_t *nodemask); #define alloc_contig_pages(...) alloc_hooks(alloc_contig_pages_noprof(__VA_ARGS__)) +struct page *alloc_contig_frozen_pages_noprof(unsigned long nr_pages, + gfp_t gfp_mask, int nid, nodemask_t *nodemask); +#define alloc_contig_frozen_pages(...) \ + alloc_hooks(alloc_contig_frozen_pages_noprof(__VA_ARGS__)) + #endif void free_contig_range(unsigned long pfn, unsigned long nr_pages); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index baead29b3e67..0677c49fdff1 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -6854,6 +6854,9 @@ int alloc_contig_range_noprof(unsigned long start, unsigned long end, if (__alloc_contig_verify_gfp_mask(gfp_mask, (gfp_t *)&cc.gfp_mask)) return -EINVAL; + if ((alloc_flags & ACR_FLAGS_FROZEN) && !(gfp_mask & __GFP_COMP)) + return -EINVAL; + /* * What we do here is we mark all pageblocks in range as * MIGRATE_ISOLATE. Because pageblock and max order pages may @@ -6951,7 +6954,8 @@ int alloc_contig_range_noprof(unsigned long start, unsigned long end, check_new_pages(head, order); prep_new_page(head, order, gfp_mask, 0); - set_page_refcounted(head); + if (!(alloc_flags & ACR_FLAGS_FROZEN)) + set_page_refcounted(head); } else { ret = -EINVAL; WARN(true, "PFN range: requested [%lu, %lu), allocated [%lu, %lu)\n", @@ -6963,15 +6967,6 @@ int alloc_contig_range_noprof(unsigned long start, unsigned long end, } EXPORT_SYMBOL(alloc_contig_range_noprof); -static int __alloc_contig_pages(unsigned long start_pfn, - unsigned long nr_pages, gfp_t gfp_mask) -{ - unsigned long end_pfn = start_pfn + nr_pages; - - return alloc_contig_range_noprof(start_pfn, end_pfn, ACR_FLAGS_NONE, - gfp_mask); -} - static bool pfn_range_valid_contig(struct zone *z, unsigned long start_pfn, unsigned long nr_pages) { @@ -7003,31 +6998,8 @@ static bool zone_spans_last_pfn(const struct zone *zone, return zone_spans_pfn(zone, last_pfn); } -/** - * alloc_contig_pages() -- tries to find and allocate contiguous range of pages - * @nr_pages: Number of contiguous pages to allocate - * @gfp_mask: GFP mask. Node/zone/placement hints limit the search; only some - * action and reclaim modifiers are supported. Reclaim modifiers - * control allocation behavior during compaction/migration/reclaim. - * @nid: Target node - * @nodemask: Mask for other possible nodes - * - * This routine is a wrapper around alloc_contig_range(). It scans over zones - * on an applicable zonelist to find a contiguous pfn range which can then be - * tried for allocation with alloc_contig_range(). This routine is intended - * for allocation requests which can not be fulfilled with the buddy allocator. - * - * The allocated memory is always aligned to a page boundary. If nr_pages is a - * power of two, then allocated range is also guaranteed to be aligned to same - * nr_pages (e.g. 1GB request would be aligned to 1GB). - * - * Allocated pages can be freed with free_contig_range() or by manually calling - * __free_page() on each allocated page. - * - * Return: pointer to contiguous pages on success, or NULL if not successful. - */ -struct page *alloc_contig_pages_noprof(unsigned long nr_pages, gfp_t gfp_mask, - int nid, nodemask_t *nodemask) +static struct page *__alloc_contig_pages(unsigned long nr_pages, gfp_t gfp_mask, + acr_flags_t alloc_flags, int nid, nodemask_t *nodemask) { unsigned long ret, pfn, flags; struct zonelist *zonelist; @@ -7050,8 +7022,8 @@ struct page *alloc_contig_pages_noprof(unsigned long nr_pages, gfp_t gfp_mask, * and cause alloc_contig_range() to fail... */ spin_unlock_irqrestore(&zone->lock, flags); - ret = __alloc_contig_pages(pfn, nr_pages, - gfp_mask); + ret = alloc_contig_range_noprof(pfn, pfn + nr_pages, + alloc_flags, gfp_mask); if (!ret) return pfn_to_page(pfn); spin_lock_irqsave(&zone->lock, flags); @@ -7062,6 +7034,45 @@ struct page *alloc_contig_pages_noprof(unsigned long nr_pages, gfp_t gfp_mask, } return NULL; } + +/** + * alloc_contig_pages() -- tries to find and allocate contiguous range of pages + * @nr_pages: Number of contiguous pages to allocate + * @gfp_mask: GFP mask. Node/zone/placement hints limit the search; only some + * action and reclaim modifiers are supported. Reclaim modifiers + * control allocation behavior during compaction/migration/reclaim. + * @nid: Target node + * @nodemask: Mask for other possible nodes + * + * This routine is a wrapper around alloc_contig_range(). It scans over zones + * on an applicable zonelist to find a contiguous pfn range which can then be + * tried for allocation with alloc_contig_range(). This routine is intended + * for allocation requests which can not be fulfilled with the buddy allocator. + * + * The allocated memory is always aligned to a page boundary. If nr_pages is a + * power of two, then allocated range is also guaranteed to be aligned to same + * nr_pages (e.g. 1GB request would be aligned to 1GB). + * + * Allocated pages can be freed with free_contig_range() or by manually calling + * __free_page() on each allocated page. + * + * Return: pointer to contiguous pages on success, or NULL if not successful. + */ +struct page *alloc_contig_pages_noprof(unsigned long nr_pages, gfp_t gfp_mask, + int nid, nodemask_t *nodemask) +{ + return __alloc_contig_pages(nr_pages, gfp_mask, ACR_FLAGS_NONE, + nid, nodemask); +} + +struct page *alloc_contig_frozen_pages_noprof(unsigned long nr_pages, + gfp_t gfp_mask, int nid, nodemask_t *nodemask) +{ + /* always allocate compound pages without refcount increased */ + return __alloc_contig_pages(nr_pages, gfp_mask | __GFP_COMP, + ACR_FLAGS_FROZEN, nid, nodemask); +} + #endif /* CONFIG_CONTIG_ALLOC */ void free_contig_range(unsigned long pfn, unsigned long nr_pages) -- 2.27.0 In order to support frozen page allocation in the following changes, adding the alloc flags for __cma_alloc(). Signed-off-by: Kefeng Wang --- mm/cma.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/mm/cma.c b/mm/cma.c index e56ec64d0567..3f3c96be67f7 100644 --- a/mm/cma.c +++ b/mm/cma.c @@ -778,7 +778,8 @@ static void cma_debug_show_areas(struct cma *cma) static int cma_range_alloc(struct cma *cma, struct cma_memrange *cmr, unsigned long count, unsigned int align, - struct page **pagep, gfp_t gfp) + struct page **pagep, gfp_t gfp, + acr_flags_t alloc_flags) { unsigned long mask, offset; unsigned long pfn = -1; @@ -823,7 +824,7 @@ static int cma_range_alloc(struct cma *cma, struct cma_memrange *cmr, pfn = cmr->base_pfn + (bitmap_no << cma->order_per_bit); mutex_lock(&cma->alloc_mutex); - ret = alloc_contig_range(pfn, pfn + count, ACR_FLAGS_CMA, gfp); + ret = alloc_contig_range(pfn, pfn + count, alloc_flags, gfp); mutex_unlock(&cma->alloc_mutex); if (ret == 0) { page = pfn_to_page(pfn); @@ -848,7 +849,7 @@ static int cma_range_alloc(struct cma *cma, struct cma_memrange *cmr, } static struct page *__cma_alloc(struct cma *cma, unsigned long count, - unsigned int align, gfp_t gfp) + unsigned int align, gfp_t gfp, acr_flags_t alloc_flags) { struct page *page = NULL; int ret = -ENOMEM, r; @@ -870,7 +871,7 @@ static struct page *__cma_alloc(struct cma *cma, unsigned long count, page = NULL; ret = cma_range_alloc(cma, &cma->ranges[r], count, align, - &page, gfp); + &page, gfp, alloc_flags); if (ret != -EBUSY || page) break; } @@ -918,7 +919,9 @@ static struct page *__cma_alloc(struct cma *cma, unsigned long count, struct page *cma_alloc(struct cma *cma, unsigned long count, unsigned int align, bool no_warn) { - return __cma_alloc(cma, count, align, GFP_KERNEL | (no_warn ? __GFP_NOWARN : 0)); + return __cma_alloc(cma, count, align, + GFP_KERNEL | (no_warn ? __GFP_NOWARN : 0), + ACR_FLAGS_CMA); } struct folio *cma_alloc_folio(struct cma *cma, int order, gfp_t gfp) @@ -928,7 +931,7 @@ struct folio *cma_alloc_folio(struct cma *cma, int order, gfp_t gfp) if (WARN_ON(!order || !(gfp & __GFP_COMP))) return NULL; - page = __cma_alloc(cma, 1 << order, order, gfp); + page = __cma_alloc(cma, 1 << order, order, gfp, ACR_FLAGS_CMA); return page ? page_folio(page) : NULL; } -- 2.27.0 Kill cma_pages_valid() which only used in cma_release(), also cleanup code duplication between cma pages valid checking and cma memrange finding, add __cma_release() helper to prepare for the upcoming frozen page release. Signed-off-by: Kefeng Wang --- include/linux/cma.h | 1 - mm/cma.c | 57 ++++++++++++--------------------------------- 2 files changed, 15 insertions(+), 43 deletions(-) diff --git a/include/linux/cma.h b/include/linux/cma.h index 62d9c1cf6326..e5745d2aec55 100644 --- a/include/linux/cma.h +++ b/include/linux/cma.h @@ -49,7 +49,6 @@ extern int cma_init_reserved_mem(phys_addr_t base, phys_addr_t size, struct cma **res_cma); extern struct page *cma_alloc(struct cma *cma, unsigned long count, unsigned int align, bool no_warn); -extern bool cma_pages_valid(struct cma *cma, const struct page *pages, unsigned long count); extern bool cma_release(struct cma *cma, const struct page *pages, unsigned long count); extern int cma_for_each_area(int (*it)(struct cma *cma, void *data), void *data); diff --git a/mm/cma.c b/mm/cma.c index 3f3c96be67f7..b4413e382d5d 100644 --- a/mm/cma.c +++ b/mm/cma.c @@ -936,34 +936,36 @@ struct folio *cma_alloc_folio(struct cma *cma, int order, gfp_t gfp) return page ? page_folio(page) : NULL; } -bool cma_pages_valid(struct cma *cma, const struct page *pages, - unsigned long count) +static bool __cma_release(struct cma *cma, const struct page *pages, + unsigned long count) { unsigned long pfn, end; int r; struct cma_memrange *cmr; - bool ret; + + pr_debug("%s(page %p, count %lu)\n", __func__, (void *)pages, count); if (!cma || !pages || count > cma->count) return false; pfn = page_to_pfn(pages); - ret = false; for (r = 0; r < cma->nranges; r++) { cmr = &cma->ranges[r]; end = cmr->base_pfn + cmr->count; - if (pfn >= cmr->base_pfn && pfn < end) { - ret = pfn + count <= end; + if (pfn >= cmr->base_pfn && pfn < end && pfn + count <= end) break; - } } - if (!ret) - pr_debug("%s(page %p, count %lu)\n", - __func__, (void *)pages, count); + if (r == cma->nranges) + return false; - return ret; + free_contig_range(pfn, count); + cma_clear_bitmap(cma, cmr, pfn, count); + cma_sysfs_account_release_pages(cma, count); + trace_cma_release(cma->name, pfn, pages, count); + + return true; } /** @@ -979,36 +981,7 @@ bool cma_pages_valid(struct cma *cma, const struct page *pages, bool cma_release(struct cma *cma, const struct page *pages, unsigned long count) { - struct cma_memrange *cmr; - unsigned long pfn, end_pfn; - int r; - - pr_debug("%s(page %p, count %lu)\n", __func__, (void *)pages, count); - - if (!cma_pages_valid(cma, pages, count)) - return false; - - pfn = page_to_pfn(pages); - end_pfn = pfn + count; - - for (r = 0; r < cma->nranges; r++) { - cmr = &cma->ranges[r]; - if (pfn >= cmr->base_pfn && - pfn < (cmr->base_pfn + cmr->count)) { - VM_BUG_ON(end_pfn > cmr->base_pfn + cmr->count); - break; - } - } - - if (r == cma->nranges) - return false; - - free_contig_range(pfn, count); - cma_clear_bitmap(cma, cmr, pfn, count); - cma_sysfs_account_release_pages(cma, count); - trace_cma_release(cma->name, pfn, pages, count); - - return true; + return __cma_release(cma, pages, count); } bool cma_free_folio(struct cma *cma, const struct folio *folio) @@ -1016,7 +989,7 @@ bool cma_free_folio(struct cma *cma, const struct folio *folio) if (WARN_ON(!folio_test_large(folio))) return false; - return cma_release(cma, &folio->page, folio_nr_pages(folio)); + return __cma_release(cma, &folio->page, folio_nr_pages(folio)); } int cma_for_each_area(int (*it)(struct cma *cma, void *data), void *data) -- 2.27.0 The alloc_gigantic_folio() will allocate a folio by alloc_contig_range() with refcount increated and then freeze it, convert to allocate a frozen folio directly to remove the atomic operation about folio refcount, also saving atomic operation during __update_and_free_hugetlb_folio too. Rename some functions to make them more self-explanatory, folio_alloc_gigantic -> folio_alloc_frozen_gigantic cma_alloc/free_folio -> cma_alloc/free_frozen_folio hugetlb_cma_alloc/free_folio -> hugetlb_cma_alloc/free_frozen_folio Signed-off-by: Kefeng Wang --- include/linux/cma.h | 8 ++++---- include/linux/gfp.h | 14 ++++++------- mm/cma.c | 22 +++++++++++++------- mm/hugetlb.c | 50 +++++++++------------------------------------ mm/hugetlb_cma.c | 13 ++++++------ mm/hugetlb_cma.h | 10 ++++----- 6 files changed, 47 insertions(+), 70 deletions(-) diff --git a/include/linux/cma.h b/include/linux/cma.h index e5745d2aec55..cceec7b25bae 100644 --- a/include/linux/cma.h +++ b/include/linux/cma.h @@ -57,16 +57,16 @@ extern bool cma_intersects(struct cma *cma, unsigned long start, unsigned long e extern void cma_reserve_pages_on_error(struct cma *cma); #ifdef CONFIG_CMA -struct folio *cma_alloc_folio(struct cma *cma, int order, gfp_t gfp); -bool cma_free_folio(struct cma *cma, const struct folio *folio); +struct folio *cma_alloc_frozen_folio(struct cma *cma, int order, gfp_t gfp); +bool cma_free_frozen_folio(struct cma *cma, const struct folio *folio); bool cma_validate_zones(struct cma *cma); #else -static inline struct folio *cma_alloc_folio(struct cma *cma, int order, gfp_t gfp) +static inline struct folio *cma_alloc_frozen_folio(struct cma *cma, int order, gfp_t gfp) { return NULL; } -static inline bool cma_free_folio(struct cma *cma, const struct folio *folio) +static inline bool cma_free_frozen_folio(struct cma *cma, const struct folio *folio) { return false; } diff --git a/include/linux/gfp.h b/include/linux/gfp.h index d0047b85fe34..ceda042be704 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -447,26 +447,26 @@ struct page *alloc_contig_frozen_pages_noprof(unsigned long nr_pages, void free_contig_range(unsigned long pfn, unsigned long nr_pages); #ifdef CONFIG_CONTIG_ALLOC -static inline struct folio *folio_alloc_gigantic_noprof(int order, gfp_t gfp, - int nid, nodemask_t *node) +static inline struct folio *folio_alloc_frozen_gigantic_noprof(int order, + gfp_t gfp, int nid, nodemask_t *node) { struct page *page; if (WARN_ON(!order || !(gfp & __GFP_COMP))) return NULL; - page = alloc_contig_pages_noprof(1 << order, gfp, nid, node); + page = alloc_contig_frozen_pages_noprof(1 << order, gfp, nid, node); return page ? page_folio(page) : NULL; } #else -static inline struct folio *folio_alloc_gigantic_noprof(int order, gfp_t gfp, - int nid, nodemask_t *node) +static inline struct folio *folio_alloc_frozen_gigantic_noprof(int order, + gfp_t gfp, int nid, nodemask_t *node) { return NULL; } #endif -/* This should be paired with folio_put() rather than free_contig_range(). */ -#define folio_alloc_gigantic(...) alloc_hooks(folio_alloc_gigantic_noprof(__VA_ARGS__)) +/* This must be paired with free_frozen_pages() rather than free_contig_range(). */ +#define folio_alloc_frozen_gigantic(...) alloc_hooks(folio_alloc_frozen_gigantic_noprof(__VA_ARGS__)) #endif /* __LINUX_GFP_H */ diff --git a/mm/cma.c b/mm/cma.c index b4413e382d5d..49d53879c1d0 100644 --- a/mm/cma.c +++ b/mm/cma.c @@ -924,20 +924,21 @@ struct page *cma_alloc(struct cma *cma, unsigned long count, ACR_FLAGS_CMA); } -struct folio *cma_alloc_folio(struct cma *cma, int order, gfp_t gfp) +struct folio *cma_alloc_frozen_folio(struct cma *cma, int order, gfp_t gfp) { struct page *page; if (WARN_ON(!order || !(gfp & __GFP_COMP))) return NULL; - page = __cma_alloc(cma, 1 << order, order, gfp, ACR_FLAGS_CMA); + page = __cma_alloc(cma, 1 << order, order, gfp, + ACR_FLAGS_CMA | ACR_FLAGS_FROZEN); return page ? page_folio(page) : NULL; } static bool __cma_release(struct cma *cma, const struct page *pages, - unsigned long count) + unsigned long count, bool frozen_page) { unsigned long pfn, end; int r; @@ -960,7 +961,14 @@ static bool __cma_release(struct cma *cma, const struct page *pages, if (r == cma->nranges) return false; - free_contig_range(pfn, count); + if (frozen_page) { + struct page *head_page = compound_head((struct page *)pages); + + free_frozen_pages(head_page, compound_order(head_page)); + } else { + free_contig_range(pfn, count); + } + cma_clear_bitmap(cma, cmr, pfn, count); cma_sysfs_account_release_pages(cma, count); trace_cma_release(cma->name, pfn, pages, count); @@ -981,15 +989,15 @@ static bool __cma_release(struct cma *cma, const struct page *pages, bool cma_release(struct cma *cma, const struct page *pages, unsigned long count) { - return __cma_release(cma, pages, count); + return __cma_release(cma, pages, count, false); } -bool cma_free_folio(struct cma *cma, const struct folio *folio) +bool cma_free_frozen_folio(struct cma *cma, const struct folio *folio) { if (WARN_ON(!folio_test_large(folio))) return false; - return __cma_release(cma, &folio->page, folio_nr_pages(folio)); + return __cma_release(cma, &folio->page, folio_nr_pages(folio), true); } int cma_for_each_area(int (*it)(struct cma *cma, void *data), void *data) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 4e8709d7deee..1d801bf65916 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -125,16 +125,6 @@ static void hugetlb_unshare_pmds(struct vm_area_struct *vma, unsigned long start, unsigned long end, bool take_locks); static struct resv_map *vma_resv_map(struct vm_area_struct *vma); -static void hugetlb_free_folio(struct folio *folio) -{ - if (folio_test_hugetlb_cma(folio)) { - hugetlb_cma_free_folio(folio); - return; - } - - folio_put(folio); -} - static inline bool subpool_is_free(struct hugepage_subpool *spool) { if (spool->count) @@ -1472,43 +1462,20 @@ static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed) nr_nodes--) #ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE -#ifdef CONFIG_CONTIG_ALLOC static struct folio *alloc_gigantic_folio(int order, gfp_t gfp_mask, int nid, nodemask_t *nodemask) { struct folio *folio; - bool retried = false; - -retry: - folio = hugetlb_cma_alloc_folio(order, gfp_mask, nid, nodemask); - if (!folio) { - if (hugetlb_cma_exclusive_alloc()) - return NULL; - - folio = folio_alloc_gigantic(order, gfp_mask, nid, nodemask); - if (!folio) - return NULL; - } - if (folio_ref_freeze(folio, 1)) + folio = hugetlb_cma_alloc_frozen_folio(order, gfp_mask, nid, nodemask); + if (folio) return folio; - pr_warn("HugeTLB: unexpected refcount on PFN %lu\n", folio_pfn(folio)); - hugetlb_free_folio(folio); - if (!retried) { - retried = true; - goto retry; - } - return NULL; -} + if (hugetlb_cma_exclusive_alloc()) + return NULL; -#else /* !CONFIG_CONTIG_ALLOC */ -static struct folio *alloc_gigantic_folio(int order, gfp_t gfp_mask, int nid, - nodemask_t *nodemask) -{ - return NULL; + return folio_alloc_frozen_gigantic(order, gfp_mask, nid, nodemask); } -#endif /* CONFIG_CONTIG_ALLOC */ #else /* !CONFIG_ARCH_HAS_GIGANTIC_PAGE */ static struct folio *alloc_gigantic_folio(int order, gfp_t gfp_mask, int nid, @@ -1641,9 +1608,12 @@ static void __update_and_free_hugetlb_folio(struct hstate *h, if (unlikely(folio_test_hwpoison(folio))) folio_clear_hugetlb_hwpoison(folio); - folio_ref_unfreeze(folio, 1); + VM_BUG_ON_FOLIO(folio_ref_count(folio), folio); - hugetlb_free_folio(folio); + if (folio_test_hugetlb_cma(folio)) + hugetlb_cma_free_frozen_folio(folio); + else + free_frozen_pages(&folio->page, folio_order(folio)); } /* diff --git a/mm/hugetlb_cma.c b/mm/hugetlb_cma.c index e8e4dc7182d5..24738a1d6098 100644 --- a/mm/hugetlb_cma.c +++ b/mm/hugetlb_cma.c @@ -18,29 +18,28 @@ static unsigned long hugetlb_cma_size_in_node[MAX_NUMNODES] __initdata; static bool hugetlb_cma_only; static unsigned long hugetlb_cma_size __initdata; -void hugetlb_cma_free_folio(struct folio *folio) +void hugetlb_cma_free_frozen_folio(struct folio *folio) { int nid = folio_nid(folio); - WARN_ON_ONCE(!cma_free_folio(hugetlb_cma[nid], folio)); + WARN_ON_ONCE(!cma_free_frozen_folio(hugetlb_cma[nid], folio)); } - -struct folio *hugetlb_cma_alloc_folio(int order, gfp_t gfp_mask, - int nid, nodemask_t *nodemask) +struct folio *hugetlb_cma_alloc_frozen_folio(int order, gfp_t gfp_mask, + int nid, nodemask_t *nodemask) { int node; struct folio *folio = NULL; if (hugetlb_cma[nid]) - folio = cma_alloc_folio(hugetlb_cma[nid], order, gfp_mask); + folio = cma_alloc_frozen_folio(hugetlb_cma[nid], order, gfp_mask); if (!folio && !(gfp_mask & __GFP_THISNODE)) { for_each_node_mask(node, *nodemask) { if (node == nid || !hugetlb_cma[node]) continue; - folio = cma_alloc_folio(hugetlb_cma[node], order, gfp_mask); + folio = cma_alloc_frozen_folio(hugetlb_cma[node], order, gfp_mask); if (folio) break; } diff --git a/mm/hugetlb_cma.h b/mm/hugetlb_cma.h index 2c2ec8a7e134..71db3544816e 100644 --- a/mm/hugetlb_cma.h +++ b/mm/hugetlb_cma.h @@ -3,8 +3,8 @@ #define _LINUX_HUGETLB_CMA_H #ifdef CONFIG_CMA -void hugetlb_cma_free_folio(struct folio *folio); -struct folio *hugetlb_cma_alloc_folio(int order, gfp_t gfp_mask, +void hugetlb_cma_free_frozen_folio(struct folio *folio); +struct folio *hugetlb_cma_alloc_frozen_folio(int order, gfp_t gfp_mask, int nid, nodemask_t *nodemask); struct huge_bootmem_page *hugetlb_cma_alloc_bootmem(struct hstate *h, int *nid, bool node_exact); @@ -14,12 +14,12 @@ unsigned long hugetlb_cma_total_size(void); void hugetlb_cma_validate_params(void); bool hugetlb_early_cma(struct hstate *h); #else -static inline void hugetlb_cma_free_folio(struct folio *folio) +static inline void hugetlb_cma_free_frozen_folio(struct folio *folio) { } -static inline struct folio *hugetlb_cma_alloc_folio(int order, gfp_t gfp_mask, - int nid, nodemask_t *nodemask) +static inline struct folio *hugetlb_cma_alloc_frozen_folio(int order, + gfp_t gfp_mask, int nid, nodemask_t *nodemask) { return NULL; } -- 2.27.0