The alloc_contig_pages() function spends a significant amount of time within the pfn_range_valid_contig() function. - set_max_huge_pages - 99.98% alloc_pool_huge_folio only_alloc_fresh_hugetlb_folio.isra.0 - alloc_contig_frozen_pages_noprof - 87.00% pfn_range_valid_contig pfn_to_online_page - 12.91% alloc_contig_frozen_range_noprof 4.51% replace_free_hugepage_folios - 4.02% prep_new_page prep_compound_page - 2.98% undo_isolate_page_range - 2.79% unset_migratetype_isolate - 2.75% __move_freepages_block_isolate 2.71% __move_freepages_block - 0.98% start_isolate_page_range 0.66% set_migratetype_isolate To optimize this process, implement a new helper page_is_unmovable(), which reuses the logic from has_unmovable_pages(). This function avoids unnecessary iterations for compound pages, such as THP, and non-compound high-order buddy pages, which significantly improving the efficiency of contiguous memory allocation. A simple test on machine with 114G free memory, allocate 120 * 1G HugeTLB folios(104 successfully returned), time echo 120 > /sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages Before: 0m3.605s After: 0m0.602s Signed-off-by: Kefeng Wang --- include/linux/page-isolation.h | 2 + mm/page_alloc.c | 25 ++--- mm/page_isolation.c | 187 +++++++++++++++++---------------- 3 files changed, 109 insertions(+), 105 deletions(-) diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h index 3e2f960e166c..6f8638c9904f 100644 --- a/include/linux/page-isolation.h +++ b/include/linux/page-isolation.h @@ -67,4 +67,6 @@ void undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn); int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn, enum pb_isolate_mode mode); +bool page_is_unmovable(struct zone *zone, struct page *page, + enum pb_isolate_mode mode, unsigned long *step); #endif diff --git a/mm/page_alloc.c b/mm/page_alloc.c index d8d5379c44dc..813c5f57883f 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -7157,18 +7157,20 @@ static bool pfn_range_valid_contig(struct zone *z, unsigned long start_pfn, unsigned long nr_pages, bool skip_hugetlb, bool *skipped_hugetlb) { - unsigned long i, end_pfn = start_pfn + nr_pages; + unsigned long end_pfn = start_pfn + nr_pages; struct page *page; - for (i = start_pfn; i < end_pfn; i++) { - page = pfn_to_online_page(i); + while (start_pfn < end_pfn) { + unsigned long step = 1; + + page = pfn_to_online_page(start_pfn); if (!page) return false; if (page_zone(page) != z) return false; - if (PageReserved(page)) + if (page_is_unmovable(z, page, PB_ISOLATE_MODE_OTHER, &step)) return false; /* @@ -7183,9 +7185,6 @@ static bool pfn_range_valid_contig(struct zone *z, unsigned long start_pfn, if (PageHuge(page)) { unsigned int order; - if (!IS_ENABLED(CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION)) - return false; - if (skip_hugetlb) { *skipped_hugetlb = true; return false; @@ -7196,17 +7195,9 @@ static bool pfn_range_valid_contig(struct zone *z, unsigned long start_pfn, if ((order >= MAX_FOLIO_ORDER) || (nr_pages <= (1 << order))) return false; - - /* - * Reaching this point means we've encounted a huge page - * smaller than nr_pages, skip all pfn's for that page. - * - * We can't get here from a tail-PageHuge, as it implies - * we started a scan in the middle of a hugepage larger - * than nr_pages - which the prior check filters for. - */ - i += (1 << order) - 1; } + + start_pfn += step; } return true; } diff --git a/mm/page_isolation.c b/mm/page_isolation.c index b5924eff4f8b..c48ff5c00244 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c @@ -15,6 +15,100 @@ #define CREATE_TRACE_POINTS #include +bool page_is_unmovable(struct zone *zone, struct page *page, + enum pb_isolate_mode mode, unsigned long *step) +{ + /* + * Both, bootmem allocations and memory holes are marked + * PG_reserved and are unmovable. We can even have unmovable + * allocations inside ZONE_MOVABLE, for example when + * specifying "movablecore". + */ + if (PageReserved(page)) + return true; + + /* + * If the zone is movable and we have ruled out all reserved + * pages then it should be reasonably safe to assume the rest + * is movable. + */ + if (zone_idx(zone) == ZONE_MOVABLE) + return false; + + /* + * Hugepages are not in LRU lists, but they're movable. + * THPs are on the LRU, but need to be counted as #small pages. + * We need not scan over tail pages because we don't + * handle each tail page individually in migration. + */ + if (PageHuge(page) || PageCompound(page)) { + struct folio *folio = page_folio(page); + + if (folio_test_hugetlb(folio)) { + struct hstate *h; + + if (!IS_ENABLED(CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION)) + return true; + + /* + * The huge page may be freed so can not + * use folio_hstate() directly. + */ + h = size_to_hstate(folio_size(folio)); + if (h && !hugepage_migration_supported(h)) + return true; + + } else if (!folio_test_lru(folio)) { + return true; + } + + *step = folio_nr_pages(folio) - folio_page_idx(folio, page); + return false; + } + + /* + * We can't use page_count without pin a page + * because another CPU can free compound page. + * This check already skips compound tails of THP + * because their page->_refcount is zero at all time. + */ + if (!page_ref_count(page)) { + if (PageBuddy(page)) + *step = (1 << buddy_order(page)); + return false; + } + + /* + * The HWPoisoned page may be not in buddy system, and + * page_count() is not 0. + */ + if ((mode == PB_ISOLATE_MODE_MEM_OFFLINE) && PageHWPoison(page)) + return false; + + /* + * We treat all PageOffline() pages as movable when offlining + * to give drivers a chance to decrement their reference count + * in MEM_GOING_OFFLINE in order to indicate that these pages + * can be offlined as there are no direct references anymore. + * For actually unmovable PageOffline() where the driver does + * not support this, we will fail later when trying to actually + * move these pages that still have a reference count > 0. + * (false negatives in this function only) + */ + if ((mode == PB_ISOLATE_MODE_MEM_OFFLINE) && PageOffline(page)) + return false; + + if (PageLRU(page) || page_has_movable_ops(page)) + return false; + + /* + * If there are RECLAIMABLE pages, we need to check + * it. But now, memory offline itself doesn't call + * shrink_node_slabs() and it still to be fixed. + */ + return true; +} + /* * This function checks whether the range [start_pfn, end_pfn) includes * unmovable pages or not. The range must fall into a single pageblock and @@ -35,7 +129,6 @@ static struct page *has_unmovable_pages(unsigned long start_pfn, unsigned long e { struct page *page = pfn_to_page(start_pfn); struct zone *zone = page_zone(page); - unsigned long pfn; VM_BUG_ON(pageblock_start_pfn(start_pfn) != pageblock_start_pfn(end_pfn - 1)); @@ -52,96 +145,14 @@ static struct page *has_unmovable_pages(unsigned long start_pfn, unsigned long e return page; } - for (pfn = start_pfn; pfn < end_pfn; pfn++) { - page = pfn_to_page(pfn); + while (start_pfn < end_pfn) { + unsigned long step = 1; - /* - * Both, bootmem allocations and memory holes are marked - * PG_reserved and are unmovable. We can even have unmovable - * allocations inside ZONE_MOVABLE, for example when - * specifying "movablecore". - */ - if (PageReserved(page)) + page = pfn_to_page(start_pfn); + if (page_is_unmovable(zone, page, mode, &step)) return page; - /* - * If the zone is movable and we have ruled out all reserved - * pages then it should be reasonably safe to assume the rest - * is movable. - */ - if (zone_idx(zone) == ZONE_MOVABLE) - continue; - - /* - * Hugepages are not in LRU lists, but they're movable. - * THPs are on the LRU, but need to be counted as #small pages. - * We need not scan over tail pages because we don't - * handle each tail page individually in migration. - */ - if (PageHuge(page) || PageTransCompound(page)) { - struct folio *folio = page_folio(page); - unsigned int skip_pages; - - if (PageHuge(page)) { - struct hstate *h; - - /* - * The huge page may be freed so can not - * use folio_hstate() directly. - */ - h = size_to_hstate(folio_size(folio)); - if (h && !hugepage_migration_supported(h)) - return page; - } else if (!folio_test_lru(folio)) { - return page; - } - - skip_pages = folio_nr_pages(folio) - folio_page_idx(folio, page); - pfn += skip_pages - 1; - continue; - } - - /* - * We can't use page_count without pin a page - * because another CPU can free compound page. - * This check already skips compound tails of THP - * because their page->_refcount is zero at all time. - */ - if (!page_ref_count(page)) { - if (PageBuddy(page)) - pfn += (1 << buddy_order(page)) - 1; - continue; - } - - /* - * The HWPoisoned page may be not in buddy system, and - * page_count() is not 0. - */ - if ((mode == PB_ISOLATE_MODE_MEM_OFFLINE) && PageHWPoison(page)) - continue; - - /* - * We treat all PageOffline() pages as movable when offlining - * to give drivers a chance to decrement their reference count - * in MEM_GOING_OFFLINE in order to indicate that these pages - * can be offlined as there are no direct references anymore. - * For actually unmovable PageOffline() where the driver does - * not support this, we will fail later when trying to actually - * move these pages that still have a reference count > 0. - * (false negatives in this function only) - */ - if ((mode == PB_ISOLATE_MODE_MEM_OFFLINE) && PageOffline(page)) - continue; - - if (PageLRU(page) || page_has_movable_ops(page)) - continue; - - /* - * If there are RECLAIMABLE pages, we need to check - * it. But now, memory offline itself doesn't call - * shrink_node_slabs() and it still to be fixed. - */ - return page; + start_pfn += step; } return NULL; } -- 2.27.0