If the folio(even not CoW folio) is dma pinned, it can't be migrated, so always skip pinned folio to avoid a waste of cycles when folios migration. Reviewed-by: Sidhartha Kumar Acked-by: David Hildenbrand Reviewed-by: Lance Yang Signed-off-by: Kefeng Wang --- mm/mprotect.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/mm/mprotect.c b/mm/mprotect.c index 113b48985834..bb59a42809b8 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -136,9 +136,12 @@ static bool prot_numa_skip(struct vm_area_struct *vma, unsigned long addr, if (folio_is_zone_device(folio) || folio_test_ksm(folio)) goto skip; - /* Also skip shared copy-on-write pages */ - if (is_cow_mapping(vma->vm_flags) && - (folio_maybe_dma_pinned(folio) || folio_maybe_mapped_shared(folio))) + /* Also skip shared copy-on-write folios */ + if (is_cow_mapping(vma->vm_flags) && folio_maybe_mapped_shared(folio)) + goto skip; + + /* Folios are pinned and can't be migrated */ + if (folio_maybe_dma_pinned(folio)) goto skip; /* -- 2.27.0 If the pte_protnone() is true, we could avoid unnecessary struct page accessing and reduce cache footprint when scanning page tables for prot numa, the performance test of pmbench memory accessing benchmark should be benifit, see more commit a818f5363a0e ("autonuma: reduce cache footprint when scanning page tables"). Reviewed-by: Sidhartha Kumar Signed-off-by: Kefeng Wang --- mm/mprotect.c | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/mm/mprotect.c b/mm/mprotect.c index bb59a42809b8..7affa88a6de7 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -118,18 +118,13 @@ static int mprotect_folio_pte_batch(struct folio *folio, pte_t *ptep, return folio_pte_batch_flags(folio, NULL, ptep, &pte, max_nr_ptes, flags); } -static bool prot_numa_skip(struct vm_area_struct *vma, unsigned long addr, - pte_t oldpte, pte_t *pte, int target_node, - struct folio *folio) +static bool prot_numa_skip(struct vm_area_struct *vma, int target_node, + struct folio *folio) { bool ret = true; bool toptier; int nid; - /* Avoid TLB flush if possible */ - if (pte_protnone(oldpte)) - goto skip; - if (!folio) goto skip; @@ -307,23 +302,23 @@ static long change_pte_range(struct mmu_gather *tlb, struct page *page; pte_t ptent; + /* Already in the desired state. */ + if (prot_numa && pte_protnone(oldpte)) + continue; + page = vm_normal_page(vma, addr, oldpte); if (page) folio = page_folio(page); + /* * Avoid trapping faults against the zero or KSM * pages. See similar comment in change_huge_pmd. */ - if (prot_numa) { - int ret = prot_numa_skip(vma, addr, oldpte, pte, - target_node, folio); - if (ret) { - - /* determine batch to skip */ - nr_ptes = mprotect_folio_pte_batch(folio, - pte, oldpte, max_nr_ptes, /* flags = */ 0); - continue; - } + if (prot_numa & prot_numa_skip(vma, target_node, folio)) { + /* determine batch to skip */ + nr_ptes = mprotect_folio_pte_batch(folio, + pte, oldpte, max_nr_ptes, /* flags = */ 0); + continue; } nr_ptes = mprotect_folio_pte_batch(folio, pte, oldpte, max_nr_ptes, flags); -- 2.27.0 Rename prot_numa_skip() to folio_skip_prot_numa(), and remove ret by directly return value instead of goto style. The folio skip checks for prot numa should be suitable for pmd folio too, which helps to avoid unnecessary pmd change and folio migration attempts. Reviewed-by: Sidhartha Kumar Signed-off-by: Kefeng Wang --- mm/huge_memory.c | 21 +++++++-------------- mm/internal.h | 2 ++ mm/mprotect.c | 35 ++++++++++++++--------------------- 3 files changed, 23 insertions(+), 35 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 1d1b74950332..8ae17e0aacb9 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2395,8 +2395,7 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, #endif if (prot_numa) { - struct folio *folio; - bool toptier; + int target_node = NUMA_NO_NODE; /* * Avoid trapping faults against the zero page. The read-only * data is likely to be read-cached on the local CPU and @@ -2408,19 +2407,13 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, if (pmd_protnone(*pmd)) goto unlock; - folio = pmd_folio(*pmd); - toptier = node_is_toptier(folio_nid(folio)); - /* - * Skip scanning top tier node if normal numa - * balancing is disabled - */ - if (!(sysctl_numa_balancing_mode & NUMA_BALANCING_NORMAL) && - toptier) - goto unlock; + /* Get target node for single threaded private VMAs */ + if (!(vma->vm_flags & VM_SHARED) && + atomic_read(&vma->vm_mm->mm_users) == 1) + target_node = numa_node_id(); - if (folio_use_access_time(folio)) - folio_xchg_access_time(folio, - jiffies_to_msecs(jiffies)); + if (folio_skip_prot_numa(pmd_folio(*pmd), vma, target_node)) + goto unlock; } /* * In case prot_numa, we are under mmap_read_lock(mm). It's critical diff --git a/mm/internal.h b/mm/internal.h index 1561fc2ff5b8..55daceab3682 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -1378,6 +1378,8 @@ void vunmap_range_noflush(unsigned long start, unsigned long end); void __vunmap_range_noflush(unsigned long start, unsigned long end); +bool folio_skip_prot_numa(struct folio *folio, struct vm_area_struct *vma, + int target_node); int numa_migrate_check(struct folio *folio, struct vm_fault *vmf, unsigned long addr, int *flags, bool writable, int *last_cpupid); diff --git a/mm/mprotect.c b/mm/mprotect.c index 7affa88a6de7..cec4c80eb46d 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -118,26 +118,21 @@ static int mprotect_folio_pte_batch(struct folio *folio, pte_t *ptep, return folio_pte_batch_flags(folio, NULL, ptep, &pte, max_nr_ptes, flags); } -static bool prot_numa_skip(struct vm_area_struct *vma, int target_node, - struct folio *folio) +bool folio_skip_prot_numa(struct folio *folio, struct vm_area_struct *vma, + int target_node) { - bool ret = true; - bool toptier; int nid; - if (!folio) - goto skip; - - if (folio_is_zone_device(folio) || folio_test_ksm(folio)) - goto skip; + if (!folio || folio_is_zone_device(folio) || folio_test_ksm(folio)) + return true; /* Also skip shared copy-on-write folios */ if (is_cow_mapping(vma->vm_flags) && folio_maybe_mapped_shared(folio)) - goto skip; + return true; /* Folios are pinned and can't be migrated */ if (folio_maybe_dma_pinned(folio)) - goto skip; + return true; /* * While migration can move some dirty pages, @@ -145,7 +140,7 @@ static bool prot_numa_skip(struct vm_area_struct *vma, int target_node, * context. */ if (folio_is_file_lru(folio) && folio_test_dirty(folio)) - goto skip; + return true; /* * Don't mess with PTEs if page is already on the node @@ -153,23 +148,20 @@ static bool prot_numa_skip(struct vm_area_struct *vma, int target_node, */ nid = folio_nid(folio); if (target_node == nid) - goto skip; - - toptier = node_is_toptier(nid); + return true; /* * Skip scanning top tier node if normal numa * balancing is disabled */ - if (!(sysctl_numa_balancing_mode & NUMA_BALANCING_NORMAL) && toptier) - goto skip; + if (!(sysctl_numa_balancing_mode & NUMA_BALANCING_NORMAL) && + node_is_toptier(nid)) + return true; - ret = false; if (folio_use_access_time(folio)) folio_xchg_access_time(folio, jiffies_to_msecs(jiffies)); -skip: - return ret; + return false; } /* Set nr_ptes number of ptes, starting from idx */ @@ -314,7 +306,8 @@ static long change_pte_range(struct mmu_gather *tlb, * Avoid trapping faults against the zero or KSM * pages. See similar comment in change_huge_pmd. */ - if (prot_numa & prot_numa_skip(vma, target_node, folio)) { + if (prot_numa & folio_skip_prot_numa(folio, vma, + target_node)) { /* determine batch to skip */ nr_ptes = mprotect_folio_pte_batch(folio, pte, oldpte, max_nr_ptes, /* flags = */ 0); -- 2.27.0