setup_superpageblocks() is __init-only and uses memblock_alloc_node(), so hotplugged memory that extends a zone's span has no superpageblock coverage. Pages in those regions would bypass superpageblock steering entirely. Add resize_zone_superpageblocks() which is called from move_pfn_range_to_zone() after the zone span has been updated. It allocates a new superpageblock array with kvmalloc_node() covering the full zone span, copies existing superpageblocks (fixing up list head pointers), and initializes new superpageblocks for the added range. Use round-up division for partial pageblock counting to match init_one_superpageblock(). ZONE_DEVICE is excluded since device pages should not participate in anti- fragmentation steering. Signed-off-by: Rik van Riel Assisted-by: Claude:claude-opus-4.7 syzkaller --- include/linux/mmzone.h | 1 + mm/internal.h | 4 ++ mm/memory_hotplug.c | 4 ++ mm/mm_init.c | 138 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 147 insertions(+) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index e3eac971a76a..19190328e0c7 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1057,6 +1057,7 @@ struct zone { struct superpageblock *superpageblocks; unsigned long nr_superpageblocks; unsigned long superpageblock_base_pfn; /* 1GB-aligned base */ + bool spb_kvmalloced; /* true if from kvmalloc (hotplug) */ /* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */ unsigned long zone_start_pfn; diff --git a/mm/internal.h b/mm/internal.h index c8404cb00b08..6a089bc4aa09 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -1101,6 +1101,10 @@ void init_cma_reserved_pageblock(struct page *page); #endif /* CONFIG_COMPACTION || CONFIG_CMA */ +#ifdef CONFIG_MEMORY_HOTPLUG +void resize_zone_superpageblocks(struct zone *zone); +#endif + struct cma; #ifdef CONFIG_CMA diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 2a943ec57c85..b7c30dfdce8e 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -752,6 +752,10 @@ void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn, resize_zone_range(zone, start_pfn, nr_pages); resize_pgdat_range(pgdat, start_pfn, nr_pages); + /* Grow superpageblock array to cover the new zone span */ + if (!zone_is_zone_device(zone)) + resize_zone_superpageblocks(zone); + /* * Subsection population requires care in pfn_to_online_page(). * Set the taint to enable the slow path detection of diff --git a/mm/mm_init.c b/mm/mm_init.c index de02a6087c21..ad1cbc2b4498 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -1592,6 +1592,144 @@ static void __init setup_superpageblocks(struct zone *zone) zone_start, zone_end); } +#ifdef CONFIG_MEMORY_HOTPLUG +/** + * resize_zone_superpageblocks - grow superpageblock array for memory hotplug + * @zone: zone whose span has been extended by hotplug + * + * Called from move_pfn_range_to_zone() after resize_zone_range() has + * updated the zone's span. Allocates a new superpageblock array covering + * the full zone span, copies existing superpageblocks (fixing up list heads), + * and initializes new superpageblocks for the added range. + * + * Must be called under mem_hotplug_lock (write). No concurrent + * allocations can occur since the hotplugged pages are not yet online. + */ +void __meminit resize_zone_superpageblocks(struct zone *zone) +{ + unsigned long zone_start = zone->zone_start_pfn; + unsigned long zone_end = zone_start + zone->spanned_pages; + unsigned long new_sb_base, new_nr_sbs; + unsigned long old_offset; + struct superpageblock *old_sbs; + struct superpageblock *new_sbs; + bool old_kvmalloced; + size_t alloc_size; + unsigned long i; + int nid = zone_to_nid(zone); + + if (!zone->spanned_pages) + return; + + new_sb_base = ALIGN_DOWN(zone_start, SUPERPAGEBLOCK_NR_PAGES); + new_nr_sbs = (ALIGN(zone_end, SUPERPAGEBLOCK_NR_PAGES) - new_sb_base) >> + SUPERPAGEBLOCK_ORDER; + + /* Already covered? */ + if (zone->superpageblocks && + new_sb_base == zone->superpageblock_base_pfn && + new_nr_sbs == zone->nr_superpageblocks) + return; + + alloc_size = new_nr_sbs * sizeof(struct superpageblock); + new_sbs = kvmalloc_node(alloc_size, GFP_KERNEL | __GFP_ZERO, nid); + if (!new_sbs) { + pr_warn("Failed to allocate %zu bytes for zone %s superpageblocks\n", + alloc_size, zone->name); + return; + } + + /* + * Copy existing superpageblocks to their new position. + * The old array covers [old_base, old_base + old_nr * SB_SIZE). + * The new array covers [new_base, new_base + new_nr * SB_SIZE). + * old_base >= new_base always (zone can only grow). + */ + if (zone->superpageblocks) { + old_offset = (zone->superpageblock_base_pfn - new_sb_base) >> + SUPERPAGEBLOCK_ORDER; + memcpy(&new_sbs[old_offset], zone->superpageblocks, + zone->nr_superpageblocks * sizeof(struct superpageblock)); + + /* + * Fix up list_head pointers that were self-referencing + * (empty lists) or pointing into the old array. + */ + for (i = old_offset; i < old_offset + zone->nr_superpageblocks; i++) { + struct superpageblock *sb = &new_sbs[i]; + + if (list_empty(&sb->list)) + INIT_LIST_HEAD(&sb->list); + else + list_replace(&zone->superpageblocks[i - old_offset].list, + &sb->list); + } + } + + /* Initialize new superpageblocks (slots not covered by old array) */ + for (i = 0; i < new_nr_sbs; i++) { + struct superpageblock *sb = &new_sbs[i]; + bool is_old = false; + + if (zone->superpageblocks) { + old_offset = (zone->superpageblock_base_pfn - new_sb_base) >> + SUPERPAGEBLOCK_ORDER; + if (i >= old_offset && + i < old_offset + zone->nr_superpageblocks) + is_old = true; + } + + if (is_old) + continue; + + init_one_superpageblock(sb, zone, + new_sb_base + (i << SUPERPAGEBLOCK_ORDER), + zone_start, zone_end); + } + + /* + * Update existing superpageblocks whose nr_reserved may have + * increased due to the zone span growing into them. + */ + if (zone->superpageblocks) { + old_offset = (zone->superpageblock_base_pfn - new_sb_base) >> + SUPERPAGEBLOCK_ORDER; + for (i = old_offset; i < old_offset + zone->nr_superpageblocks; i++) { + struct superpageblock *sb = &new_sbs[i]; + unsigned long sb_start = sb->start_pfn; + unsigned long sb_end = sb_start + SUPERPAGEBLOCK_NR_PAGES; + unsigned long pb_start = max(sb_start, zone_start); + unsigned long pb_end = min(sb_end, zone_end); + u16 new_pbs = (pb_end > pb_start) ? + ((pb_end - pb_start + pageblock_nr_pages - 1) >> + pageblock_order) : 0; + u16 old_pbs = sb->nr_free + sb->nr_unmovable + + sb->nr_reclaimable + sb->nr_movable + + sb->nr_reserved; + + if (new_pbs > old_pbs) + sb->nr_reserved += new_pbs - old_pbs; + } + } + + /* Swap in the new array */ + old_sbs = zone->superpageblocks; + old_kvmalloced = zone->spb_kvmalloced; + zone->superpageblocks = new_sbs; + zone->nr_superpageblocks = new_nr_sbs; + zone->superpageblock_base_pfn = new_sb_base; + zone->spb_kvmalloced = true; + + /* + * The boot-time array was allocated with memblock_alloc, which + * is not individually freeable after boot. Only kvfree arrays + * from previous hotplug resizes. + */ + if (old_sbs && old_kvmalloced) + kvfree(old_sbs); +} +#endif /* CONFIG_MEMORY_HOTPLUG */ + #ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE /* Initialise the number of pages represented by NR_PAGEBLOCK_BITS */ -- 2.54.0