Add pghint_t, a bitwise type for communicating page allocation hints between the allocator and callers. Define PGHINT_ZEROED to indicate that the allocated page contents are known to be zero. Add _hints variants of the allocation functions that accept a pghint_t *hints output parameter: vma_alloc_folio_hints() -> folio_alloc_mpol_hints (internal) -> __alloc_frozen_pages_hints() The existing APIs are unchanged and continue to work without hints. For now, hints is always initialized to 0. A subsequent patch will set PGHINT_ZEROED when the page was pre-zeroed by the host. Signed-off-by: Michael S. Tsirkin Assisted-by: Claude:claude-opus-4-6 Assisted-by: cursor-agent:GPT-5.4-xhigh --- include/linux/gfp.h | 15 ++++++++ mm/internal.h | 4 +++ mm/mempolicy.c | 85 +++++++++++++++++++++++++++++++++++++++++++++ mm/page_alloc.c | 15 ++++++-- 4 files changed, 117 insertions(+), 2 deletions(-) diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 51ef13ed756e..14433a20e60c 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -226,6 +226,9 @@ static inline void arch_free_page(struct page *page, int order) { } static inline void arch_alloc_page(struct page *page, int order) { } #endif +typedef unsigned int __bitwise pghint_t; +#define PGHINT_ZEROED ((__force pghint_t)BIT(0)) + struct page *__alloc_pages_noprof(gfp_t gfp, unsigned int order, int preferred_nid, nodemask_t *nodemask); #define __alloc_pages(...) alloc_hooks(__alloc_pages_noprof(__VA_ARGS__)) @@ -325,6 +328,9 @@ struct folio *folio_alloc_mpol_noprof(gfp_t gfp, unsigned int order, struct mempolicy *mpol, pgoff_t ilx, int nid); struct folio *vma_alloc_folio_noprof(gfp_t gfp, int order, struct vm_area_struct *vma, unsigned long addr); +struct folio *vma_alloc_folio_hints_noprof(gfp_t gfp, int order, + struct vm_area_struct *vma, unsigned long addr, + pghint_t *hints); #else static inline struct page *alloc_pages_noprof(gfp_t gfp_mask, unsigned int order) { @@ -344,12 +350,21 @@ static inline struct folio *vma_alloc_folio_noprof(gfp_t gfp, int order, { return folio_alloc_noprof(gfp, order); } +static inline struct folio *vma_alloc_folio_hints_noprof(gfp_t gfp, int order, + struct vm_area_struct *vma, unsigned long addr, + pghint_t *hints) +{ + if (hints) + *hints = 0; + return folio_alloc_noprof(gfp, order); +} #endif #define alloc_pages(...) alloc_hooks(alloc_pages_noprof(__VA_ARGS__)) #define folio_alloc(...) alloc_hooks(folio_alloc_noprof(__VA_ARGS__)) #define folio_alloc_mpol(...) alloc_hooks(folio_alloc_mpol_noprof(__VA_ARGS__)) #define vma_alloc_folio(...) alloc_hooks(vma_alloc_folio_noprof(__VA_ARGS__)) +#define vma_alloc_folio_hints(...) alloc_hooks(vma_alloc_folio_hints_noprof(__VA_ARGS__)) #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0) diff --git a/mm/internal.h b/mm/internal.h index cb0af847d7d9..686667b956c0 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -894,8 +894,12 @@ extern int user_min_free_kbytes; struct page *__alloc_frozen_pages_noprof(gfp_t, unsigned int order, int nid, nodemask_t *); +struct page *__alloc_frozen_pages_hints_noprof(gfp_t, unsigned int order, + int nid, nodemask_t *, pghint_t *hints); #define __alloc_frozen_pages(...) \ alloc_hooks(__alloc_frozen_pages_noprof(__VA_ARGS__)) +#define __alloc_frozen_pages_hints(...) \ + alloc_hooks(__alloc_frozen_pages_hints_noprof(__VA_ARGS__)) void free_frozen_pages(struct page *page, unsigned int order); void free_unref_folios(struct folio_batch *fbatch); diff --git a/mm/mempolicy.c b/mm/mempolicy.c index cf92bd6a8226..b918639eef71 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2547,6 +2547,91 @@ struct folio *vma_alloc_folio_noprof(gfp_t gfp, int order, struct vm_area_struct } EXPORT_SYMBOL(vma_alloc_folio_noprof); +static struct page *alloc_pages_preferred_many_hints(gfp_t gfp, + unsigned int order, int nid, nodemask_t *nodemask, + pghint_t *hints) +{ + struct page *page; + gfp_t preferred_gfp; + + preferred_gfp = gfp | __GFP_NOWARN; + preferred_gfp &= ~(__GFP_DIRECT_RECLAIM | __GFP_NOFAIL); + page = __alloc_frozen_pages_hints_noprof(preferred_gfp, order, nid, + nodemask, hints); + if (!page) + page = __alloc_frozen_pages_hints_noprof(gfp, order, nid, NULL, + hints); + + return page; +} + +static struct page *alloc_pages_mpol_hints(gfp_t gfp, unsigned int order, + struct mempolicy *pol, pgoff_t ilx, int nid, + pghint_t *hints) +{ + nodemask_t *nodemask; + struct page *page; + + nodemask = policy_nodemask(gfp, pol, ilx, &nid); + + if (pol->mode == MPOL_PREFERRED_MANY) + return alloc_pages_preferred_many_hints(gfp, order, nid, + nodemask, hints); + + if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && + order == HPAGE_PMD_ORDER && ilx != NO_INTERLEAVE_INDEX) { + if (pol->mode != MPOL_INTERLEAVE && + pol->mode != MPOL_WEIGHTED_INTERLEAVE && + (!nodemask || node_isset(nid, *nodemask))) { + page = __alloc_frozen_pages_hints_noprof( + gfp | __GFP_THISNODE | __GFP_NORETRY, order, + nid, NULL, hints); + if (page || !(gfp & __GFP_DIRECT_RECLAIM)) + return page; + } + } + + page = __alloc_frozen_pages_hints_noprof(gfp, order, nid, nodemask, + hints); + + if (unlikely(pol->mode == MPOL_INTERLEAVE || + pol->mode == MPOL_WEIGHTED_INTERLEAVE) && page) { + if (static_branch_likely(&vm_numa_stat_key) && + page_to_nid(page) == nid) { + preempt_disable(); + __count_numa_event(page_zone(page), NUMA_INTERLEAVE_HIT); + preempt_enable(); + } + } + + return page; +} + +struct folio *vma_alloc_folio_hints_noprof(gfp_t gfp, int order, + struct vm_area_struct *vma, unsigned long addr, + pghint_t *hints) +{ + struct mempolicy *pol; + pgoff_t ilx; + struct folio *folio; + struct page *page; + + if (vma->vm_flags & VM_DROPPABLE) + gfp |= __GFP_NOWARN; + + pol = get_vma_policy(vma, addr, order, &ilx); + page = alloc_pages_mpol_hints(gfp | __GFP_COMP, order, pol, ilx, + numa_node_id(), hints); + mpol_cond_put(pol); + if (!page) + return NULL; + + set_page_refcounted(page); + folio = page_rmappable_folio(page); + return folio; +} +EXPORT_SYMBOL(vma_alloc_folio_hints_noprof); + struct page *alloc_frozen_pages_noprof(gfp_t gfp, unsigned order) { struct mempolicy *pol = &default_policy; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index edbb1edf463d..f7abbc46e725 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5222,14 +5222,17 @@ EXPORT_SYMBOL_GPL(alloc_pages_bulk_noprof); /* * This is the 'heart' of the zoned buddy allocator. */ -struct page *__alloc_frozen_pages_noprof(gfp_t gfp, unsigned int order, - int preferred_nid, nodemask_t *nodemask) +struct page *__alloc_frozen_pages_hints_noprof(gfp_t gfp, unsigned int order, + int preferred_nid, nodemask_t *nodemask, pghint_t *hints) { struct page *page; unsigned int alloc_flags = ALLOC_WMARK_LOW; gfp_t alloc_gfp; /* The gfp_t that was actually used for allocation */ struct alloc_context ac = { }; + if (hints) + *hints = (pghint_t)0; + /* * There are several places where we assume that the order value is sane * so bail out early if the request is out of bound. @@ -5285,6 +5288,14 @@ struct page *__alloc_frozen_pages_noprof(gfp_t gfp, unsigned int order, return page; } +EXPORT_SYMBOL(__alloc_frozen_pages_hints_noprof); + +struct page *__alloc_frozen_pages_noprof(gfp_t gfp, unsigned int order, + int preferred_nid, nodemask_t *nodemask) +{ + return __alloc_frozen_pages_hints_noprof(gfp, order, preferred_nid, + nodemask, NULL); +} EXPORT_SYMBOL(__alloc_frozen_pages_noprof); struct page *__alloc_pages_noprof(gfp_t gfp, unsigned int order, -- MST