From: Alexei Starovoitov Change alloc_pages_nolock() to default to __GFP_COMP when allocating pages, since upcoming reentrant alloc_slab_page() needs __GFP_COMP. Also allow __GFP_ACCOUNT flag to be specified, since BPF infra needs __GFP_ACCOUNT. Reviewed-by: Vlastimil Babka Signed-off-by: Alexei Starovoitov --- include/linux/gfp.h | 2 +- kernel/bpf/syscall.c | 2 +- mm/page_alloc.c | 10 ++++++---- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 5ebf26fcdcfa..0ceb4e09306c 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -354,7 +354,7 @@ static inline struct page *alloc_page_vma_noprof(gfp_t gfp, } #define alloc_page_vma(...) alloc_hooks(alloc_page_vma_noprof(__VA_ARGS__)) -struct page *alloc_pages_nolock_noprof(int nid, unsigned int order); +struct page *alloc_pages_nolock_noprof(gfp_t gfp_flags, int nid, unsigned int order); #define alloc_pages_nolock(...) alloc_hooks(alloc_pages_nolock_noprof(__VA_ARGS__)) extern unsigned long get_free_pages_noprof(gfp_t gfp_mask, unsigned int order); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index dd5304c6ac3c..eb9b6c4c10e9 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -579,7 +579,7 @@ static bool can_alloc_pages(void) static struct page *__bpf_alloc_page(int nid) { if (!can_alloc_pages()) - return alloc_pages_nolock(nid, 0); + return alloc_pages_nolock(__GFP_ACCOUNT, nid, 0); return alloc_pages_node(nid, GFP_KERNEL | __GFP_ZERO | __GFP_ACCOUNT diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 78ddf1d43c6c..148945f0b667 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -7506,6 +7506,7 @@ static bool __free_unaccepted(struct page *page) /** * alloc_pages_nolock - opportunistic reentrant allocation from any context + * @gfp_flags: GFP flags. Only __GFP_ACCOUNT allowed. * @nid: node to allocate from * @order: allocation order size * @@ -7519,7 +7520,7 @@ static bool __free_unaccepted(struct page *page) * Return: allocated page or NULL on failure. NULL does not mean EBUSY or EAGAIN. * It means ENOMEM. There is no reason to call it again and expect !NULL. */ -struct page *alloc_pages_nolock_noprof(int nid, unsigned int order) +struct page *alloc_pages_nolock_noprof(gfp_t gfp_flags, int nid, unsigned int order) { /* * Do not specify __GFP_DIRECT_RECLAIM, since direct claim is not allowed. @@ -7541,12 +7542,13 @@ struct page *alloc_pages_nolock_noprof(int nid, unsigned int order) * specify it here to highlight that alloc_pages_nolock() * doesn't want to deplete reserves. */ - gfp_t alloc_gfp = __GFP_NOWARN | __GFP_ZERO | __GFP_NOMEMALLOC - | __GFP_ACCOUNT; + gfp_t alloc_gfp = __GFP_NOWARN | __GFP_ZERO | __GFP_NOMEMALLOC | __GFP_COMP + | gfp_flags; unsigned int alloc_flags = ALLOC_TRYLOCK; struct alloc_context ac = { }; struct page *page; + VM_WARN_ON_ONCE(gfp_flags & ~__GFP_ACCOUNT); /* * In PREEMPT_RT spin_trylock() will call raw_spin_lock() which is * unsafe in NMI. If spin_trylock() is called from hard IRQ the current @@ -7584,7 +7586,7 @@ struct page *alloc_pages_nolock_noprof(int nid, unsigned int order) if (page) set_page_refcounted(page); - if (memcg_kmem_online() && page && + if (memcg_kmem_online() && page && (gfp_flags & __GFP_ACCOUNT) && unlikely(__memcg_kmem_charge_page(page, alloc_gfp, order) != 0)) { free_pages_nolock(page, order); page = NULL; -- 2.47.1