When CONFIG_DEBUG_OBJECTS_FREE is enabled, debug_check_no_{obj,locks}_freed() functions are called. Since both of them spin on a lock, they are not safe to be called if the FPI_TRYLOCK flag is specified. This leads to a lockdep splat: ================================ WARNING: inconsistent lock state 6.19.0-rc5-slab-for-next+ #326 Tainted: G N -------------------------------- inconsistent {INITIAL USE} -> {IN-NMI} usage. kunit_try_catch/9046 [HC2[2]:SC0[0]:HE0:SE1] takes: ffffffff84ed6bf8 (&obj_hash[i].lock){-.-.}-{2:2}, at: __debug_check_no_obj_freed+0xe0/0x300 {INITIAL USE} state was registered at: lock_acquire+0xd9/0x2f0 _raw_spin_lock_irqsave+0x4c/0x80 __debug_object_init+0x9d/0x1f0 debug_object_init+0x34/0x50 __init_work+0x28/0x40 init_cgroup_housekeeping+0x151/0x210 init_cgroup_root+0x3d/0x140 cgroup_init_early+0x30/0x240 start_kernel+0x3e/0xcd0 x86_64_start_reservations+0x18/0x30 x86_64_start_kernel+0xf3/0x140 common_startup_64+0x13e/0x148 irq event stamp: 2998 hardirqs last enabled at (2997): [] exc_nmi+0x11a/0x240 hardirqs last disabled at (2998): [] sysvec_irq_work+0x11/0x110 softirqs last enabled at (1416): [] __irq_exit_rcu+0x132/0x1c0 softirqs last disabled at (1303): [] __irq_exit_rcu+0x132/0x1c0 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&obj_hash[i].lock); lock(&obj_hash[i].lock); *** DEADLOCK *** Fix this by adding an fpi_t parameter to free_pages_prepare() and skipping those checks if FPI_TRYLOCK is set. Since mm/compaction.c calls free_pages_prepare(), move the fpi_t definition to mm/internal.h. Fixes: 8c57b687e833 ("mm, bpf: Introduce free_pages_nolock()") Cc: Signed-off-by: Harry Yoo --- mm/compaction.c | 2 +- mm/internal.h | 35 ++++++++++++++++++++++++++++++++++- mm/page_alloc.c | 42 ++++++------------------------------------ 3 files changed, 41 insertions(+), 38 deletions(-) diff --git a/mm/compaction.c b/mm/compaction.c index 1e8f8eca318c..9ffeb7c6d2b0 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -1859,7 +1859,7 @@ static void compaction_free(struct folio *dst, unsigned long data) struct page *page = &dst->page; if (folio_put_testzero(dst)) { - free_pages_prepare(page, order); + free_pages_prepare(page, order, FPI_NONE); list_add(&dst->lru, &cc->freepages[order]); cc->nr_freepages += 1 << order; } diff --git a/mm/internal.h b/mm/internal.h index 1f44ccb4badf..8fdffbe5cf1f 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -820,7 +820,40 @@ static inline void prep_compound_tail(struct page *head, int tail_idx) } void post_alloc_hook(struct page *page, unsigned int order, gfp_t gfp_flags); -extern bool free_pages_prepare(struct page *page, unsigned int order); + +/* Free Page Internal flags: for internal, non-pcp variants of free_pages(). */ +typedef int __bitwise fpi_t; + +/* No special request */ +#define FPI_NONE ((__force fpi_t)0) + +/* + * Skip free page reporting notification for the (possibly merged) page. + * This does not hinder free page reporting from grabbing the page, + * reporting it and marking it "reported" - it only skips notifying + * the free page reporting infrastructure about a newly freed page. For + * example, used when temporarily pulling a page from a freelist and + * putting it back unmodified. + */ +#define FPI_SKIP_REPORT_NOTIFY ((__force fpi_t)BIT(0)) + +/* + * Place the (possibly merged) page to the tail of the freelist. Will ignore + * page shuffling (relevant code - e.g., memory onlining - is expected to + * shuffle the whole zone). + * + * Note: No code should rely on this flag for correctness - it's purely + * to allow for optimizations when handing back either fresh pages + * (memory onlining) or untouched pages (page isolation, free page + * reporting). + */ +#define FPI_TO_TAIL ((__force fpi_t)BIT(1)) + +/* Free the page without taking locks. Rely on trylock only. */ +#define FPI_TRYLOCK ((__force fpi_t)BIT(2)) + +extern bool free_pages_prepare(struct page *page, unsigned int order, + fpi_t fpi_flags); extern int user_min_free_kbytes; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 0127e9d661ad..328c4d996ae8 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -60,37 +60,6 @@ #include "shuffle.h" #include "page_reporting.h" -/* Free Page Internal flags: for internal, non-pcp variants of free_pages(). */ -typedef int __bitwise fpi_t; - -/* No special request */ -#define FPI_NONE ((__force fpi_t)0) - -/* - * Skip free page reporting notification for the (possibly merged) page. - * This does not hinder free page reporting from grabbing the page, - * reporting it and marking it "reported" - it only skips notifying - * the free page reporting infrastructure about a newly freed page. For - * example, used when temporarily pulling a page from a freelist and - * putting it back unmodified. - */ -#define FPI_SKIP_REPORT_NOTIFY ((__force fpi_t)BIT(0)) - -/* - * Place the (possibly merged) page to the tail of the freelist. Will ignore - * page shuffling (relevant code - e.g., memory onlining - is expected to - * shuffle the whole zone). - * - * Note: No code should rely on this flag for correctness - it's purely - * to allow for optimizations when handing back either fresh pages - * (memory onlining) or untouched pages (page isolation, free page - * reporting). - */ -#define FPI_TO_TAIL ((__force fpi_t)BIT(1)) - -/* Free the page without taking locks. Rely on trylock only. */ -#define FPI_TRYLOCK ((__force fpi_t)BIT(2)) - /* prevent >1 _updater_ of zone percpu pageset ->high and ->batch fields */ static DEFINE_MUTEX(pcp_batch_high_lock); #define MIN_PERCPU_PAGELIST_HIGH_FRACTION (8) @@ -1314,7 +1283,8 @@ static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr) #endif /* CONFIG_MEM_ALLOC_PROFILING */ __always_inline bool free_pages_prepare(struct page *page, - unsigned int order) + unsigned int order, + fpi_t fpi_flags) { int bad = 0; bool skip_kasan_poison = should_skip_kasan_poison(page); @@ -1407,7 +1377,7 @@ __always_inline bool free_pages_prepare(struct page *page, page_table_check_free(page, order); pgalloc_tag_sub(page, 1 << order); - if (!PageHighMem(page)) { + if (!PageHighMem(page) && !(fpi_flags & FPI_TRYLOCK)) { debug_check_no_locks_freed(page_address(page), PAGE_SIZE << order); debug_check_no_obj_freed(page_address(page), @@ -1579,7 +1549,7 @@ static void __free_pages_ok(struct page *page, unsigned int order, unsigned long pfn = page_to_pfn(page); struct zone *zone = page_zone(page); - if (free_pages_prepare(page, order)) + if (free_pages_prepare(page, order, fpi_flags)) free_one_page(zone, page, pfn, order, fpi_flags); } @@ -2940,7 +2910,7 @@ static void __free_frozen_pages(struct page *page, unsigned int order, return; } - if (!free_pages_prepare(page, order)) + if (!free_pages_prepare(page, order, fpi_flags)) return; /* @@ -3002,7 +2972,7 @@ void free_unref_folios(struct folio_batch *folios) unsigned long pfn = folio_pfn(folio); unsigned int order = folio_order(folio); - if (!free_pages_prepare(&folio->page, order)) + if (!free_pages_prepare(&folio->page, order, FPI_NONE)) continue; /* * Free orders not handled on the PCP directly to the -- 2.43.0