From: Ackerley Tng Move memory policy interpretation out of alloc_buddy_hugetlb_folio_with_mpol() and into alloc_hugetlb_folio() to separate reading and interpretation of memory policy from actual allocation. This will later allow memory policy to be interpreted outside of the process of allocating a hugetlb folio entirely. This opens doors for other callers of the HugeTLB folio allocation function, such as guest_memfd, where memory may not always be mapped and hence may not have an associated vma. Introduce struct mempolicy_interpreted to hold all the components of an interpreted memory policy. Rename alloc_buddy_hugetlb_folio_with_mpol() to alloc_buddy_hugetlb_folio() since the function no longer interprets memory policy. No functional change intended. Reviewed-by: James Houghton Acked-by: Oscar Salvador Signed-off-by: Ackerley Tng --- include/uapi/linux/mempolicy.h | 2 +- mm/hugetlb.c | 50 +++++++++++++++++++++++++++--------------- 2 files changed, 33 insertions(+), 19 deletions(-) diff --git a/include/uapi/linux/mempolicy.h b/include/uapi/linux/mempolicy.h index 6c962d866e864..7f6fc9599693b 100644 --- a/include/uapi/linux/mempolicy.h +++ b/include/uapi/linux/mempolicy.h @@ -16,7 +16,7 @@ */ /* Policies */ -enum { +enum mempolicy_mode { MPOL_DEFAULT, MPOL_PREFERRED, MPOL_BIND, diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 190ab539a97d4..6a5f69b3b1cb4 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1334,6 +1334,12 @@ static unsigned long available_huge_pages(struct hstate *h) return h->free_huge_pages - h->resv_huge_pages; } +struct mempolicy_interpreted { + int nid; + nodemask_t *nodemask; + enum mempolicy_mode mode; +}; + static struct folio *dequeue_hugetlb_folio_vma(struct hstate *h, struct vm_area_struct *vma, unsigned long address) @@ -2155,32 +2161,28 @@ static struct folio *alloc_migrate_hugetlb_folio(struct hstate *h, gfp_t gfp_mas return folio; } -/* - * Use the VMA's mpolicy to allocate a huge page from the buddy. - */ static -struct folio *alloc_buddy_hugetlb_folio_with_mpol(struct hstate *h, - struct vm_area_struct *vma, unsigned long addr) +struct folio *alloc_buddy_hugetlb_folio(struct hstate *h, + gfp_t gfp_mask, struct mempolicy_interpreted *mpoli) { struct folio *folio = NULL; - struct mempolicy *mpol; - gfp_t gfp_mask = htlb_alloc_mask(h); - int nid; - nodemask_t *nodemask; + nodemask_t *nodemask = mpoli->nodemask; - nid = huge_node(vma, addr, gfp_mask, &mpol, &nodemask); - if (mpol_is_preferred_many(mpol)) { + if (mpoli->mode == MPOL_PREFERRED_MANY) { gfp_t gfp = gfp_mask & ~(__GFP_DIRECT_RECLAIM | __GFP_NOFAIL); - folio = alloc_surplus_hugetlb_folio(h, gfp, nid, nodemask); + folio = alloc_surplus_hugetlb_folio(h, gfp, mpoli->nid, + nodemask); /* Fallback to all nodes if page==NULL */ nodemask = NULL; } - if (!folio) - folio = alloc_surplus_hugetlb_folio(h, gfp_mask, nid, nodemask); - mpol_cond_put(mpol); + if (!folio) { + folio = alloc_surplus_hugetlb_folio(h, gfp_mask, mpoli->nid, + nodemask); + } + return folio; } @@ -2869,7 +2871,7 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma, map_chg_state map_chg; int ret, idx; struct hugetlb_cgroup *h_cg = NULL; - gfp_t gfp = htlb_alloc_mask(h) | __GFP_RETRY_MAYFAIL; + gfp_t gfp = htlb_alloc_mask(h); idx = hstate_index(h); @@ -2941,8 +2943,20 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma, folio = dequeue_hugetlb_folio_vma(h, vma, addr); if (!folio) { + struct mempolicy_interpreted mpoli; + struct mempolicy *mpol; + nodemask_t *nodemask; + int nid; + spin_unlock_irq(&hugetlb_lock); - folio = alloc_buddy_hugetlb_folio_with_mpol(h, vma, addr); + nid = huge_node(vma, addr, gfp, &mpol, &nodemask); + mpoli = (struct mempolicy_interpreted){ + .nid = nid, + .mode = mpol->mode, + .nodemask = nodemask, + }; + folio = alloc_buddy_hugetlb_folio(h, gfp, &mpoli); + mpol_cond_put(mpol); if (!folio) goto out_uncharge_cgroup; spin_lock_irq(&hugetlb_lock); @@ -2998,7 +3012,7 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma, } } - ret = mem_cgroup_charge_hugetlb(folio, gfp); + ret = mem_cgroup_charge_hugetlb(folio, gfp | __GFP_RETRY_MAYFAIL); /* * Unconditionally increment NR_HUGETLB here. If it turns out that * mem_cgroup_charge_hugetlb failed, then immediately free the page and -- 2.54.0.563.g4f69b47b94-goog