If the folio (even not CoW folio) is dma pinned, it can't be migrated due to the elevated reference count. So always skip a pinned folio to avoid wasting cycles when folios are migrated. Acked-by: Zi Yan Acked-by: David Hildenbrand Reviewed-by: Dev Jain Reviewed-by: Lance Yang Reviewed-by: Sidhartha Kumar Reviewed-by: Lorenzo Stoakes Signed-off-by: Kefeng Wang --- mm/mprotect.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/mm/mprotect.c b/mm/mprotect.c index 988c366137d5..056986d9076a 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -136,9 +136,12 @@ static bool prot_numa_skip(struct vm_area_struct *vma, unsigned long addr, if (folio_is_zone_device(folio) || folio_test_ksm(folio)) goto skip; - /* Also skip shared copy-on-write pages */ - if (is_cow_mapping(vma->vm_flags) && - (folio_maybe_dma_pinned(folio) || folio_maybe_mapped_shared(folio))) + /* Also skip shared copy-on-write folios */ + if (is_cow_mapping(vma->vm_flags) && folio_maybe_mapped_shared(folio)) + goto skip; + + /* Folios are pinned and can't be migrated */ + if (folio_maybe_dma_pinned(folio)) goto skip; /* -- 2.27.0 If the pte_protnone() is true, we could avoid unnecessary struct page accessing and reduce cache footprint when scanning page tables for prot numa, there was a similar change before, see more commit a818f5363a0e ("autonuma: reduce cache footprint when scanning page tables"). Acked-by: Zi Yan Reviewed-by: Dev Jain Reviewed-by: Sidhartha Kumar Signed-off-by: Kefeng Wang --- mm/mprotect.c | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/mm/mprotect.c b/mm/mprotect.c index 056986d9076a..6236d120c8e6 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -118,18 +118,13 @@ static int mprotect_folio_pte_batch(struct folio *folio, pte_t *ptep, return folio_pte_batch_flags(folio, NULL, ptep, &pte, max_nr_ptes, flags); } -static bool prot_numa_skip(struct vm_area_struct *vma, unsigned long addr, - pte_t oldpte, pte_t *pte, int target_node, - struct folio *folio) +static bool prot_numa_skip(struct vm_area_struct *vma, int target_node, + struct folio *folio) { bool ret = true; bool toptier; int nid; - /* Avoid TLB flush if possible */ - if (pte_protnone(oldpte)) - goto skip; - if (!folio) goto skip; @@ -307,23 +302,25 @@ static long change_pte_range(struct mmu_gather *tlb, struct page *page; pte_t ptent; + /* Already in the desired state. */ + if (prot_numa && pte_protnone(oldpte)) + continue; + page = vm_normal_page(vma, addr, oldpte); if (page) folio = page_folio(page); + /* * Avoid trapping faults against the zero or KSM * pages. See similar comment in change_huge_pmd. */ - if (prot_numa) { - int ret = prot_numa_skip(vma, addr, oldpte, pte, - target_node, folio); - if (ret) { + if (prot_numa && + prot_numa_skip(vma, target_node, folio)) { - /* determine batch to skip */ - nr_ptes = mprotect_folio_pte_batch(folio, - pte, oldpte, max_nr_ptes, /* flags = */ 0); - continue; - } + /* determine batch to skip */ + nr_ptes = mprotect_folio_pte_batch(folio, + pte, oldpte, max_nr_ptes, /* flags = */ 0); + continue; } nr_ptes = mprotect_folio_pte_batch(folio, pte, oldpte, max_nr_ptes, flags); -- 2.27.0 The prot_numa_skip() naming is not good since it updates the folio access time except checking whether to skip prot NUMA, so rename it to folio_needs_prot_numa(), and cleanup it a bit, remove ret by directly return value instead of goto style, also make it non-static function so that it can be reused. Signed-off-by: Kefeng Wang --- mm/internal.h | 3 +++ mm/mprotect.c | 43 ++++++++++++++++++++++--------------------- 2 files changed, 25 insertions(+), 21 deletions(-) diff --git a/mm/internal.h b/mm/internal.h index 6691d3ea55af..b521b5177d3c 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -1403,6 +1403,9 @@ int numa_migrate_check(struct folio *folio, struct vm_fault *vmf, unsigned long addr, int *flags, bool writable, int *last_cpupid); +bool folio_needs_prot_numa(struct folio *folio, struct vm_area_struct *vma, + int target_node); + void free_zone_device_folio(struct folio *folio); int migrate_device_coherent_folio(struct folio *folio); diff --git a/mm/mprotect.c b/mm/mprotect.c index 6236d120c8e6..1369ba6f6294 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -118,26 +118,30 @@ static int mprotect_folio_pte_batch(struct folio *folio, pte_t *ptep, return folio_pte_batch_flags(folio, NULL, ptep, &pte, max_nr_ptes, flags); } -static bool prot_numa_skip(struct vm_area_struct *vma, int target_node, - struct folio *folio) +/** + * folio_needs_prot_numa() - Whether the folio needs prot numa + * @folio: The folio. + * @vma: The VMA mapping. + * @target_node: The numa node being accessed. + * + * Return: True if folio needs prot numa and the access time of + * folio is adjusted. False otherwise. + */ +bool folio_needs_prot_numa(struct folio *folio, struct vm_area_struct *vma, + int target_node) { - bool ret = true; - bool toptier; int nid; - if (!folio) - goto skip; - - if (folio_is_zone_device(folio) || folio_test_ksm(folio)) - goto skip; + if (!folio || folio_is_zone_device(folio) || folio_test_ksm(folio)) + return false; /* Also skip shared copy-on-write folios */ if (is_cow_mapping(vma->vm_flags) && folio_maybe_mapped_shared(folio)) - goto skip; + return false; /* Folios are pinned and can't be migrated */ if (folio_maybe_dma_pinned(folio)) - goto skip; + return false; /* * While migration can move some dirty pages, @@ -145,7 +149,7 @@ static bool prot_numa_skip(struct vm_area_struct *vma, int target_node, * context. */ if (folio_is_file_lru(folio) && folio_test_dirty(folio)) - goto skip; + return false; /* * Don't mess with PTEs if page is already on the node @@ -153,23 +157,20 @@ static bool prot_numa_skip(struct vm_area_struct *vma, int target_node, */ nid = folio_nid(folio); if (target_node == nid) - goto skip; - - toptier = node_is_toptier(nid); + return false; /* * Skip scanning top tier node if normal numa * balancing is disabled */ - if (!(sysctl_numa_balancing_mode & NUMA_BALANCING_NORMAL) && toptier) - goto skip; + if (!(sysctl_numa_balancing_mode & NUMA_BALANCING_NORMAL) && + node_is_toptier(nid)) + return false; - ret = false; if (folio_use_access_time(folio)) folio_xchg_access_time(folio, jiffies_to_msecs(jiffies)); -skip: - return ret; + return true; } /* Set nr_ptes number of ptes, starting from idx */ @@ -315,7 +316,7 @@ static long change_pte_range(struct mmu_gather *tlb, * pages. See similar comment in change_huge_pmd. */ if (prot_numa && - prot_numa_skip(vma, target_node, folio)) { + !folio_needs_prot_numa(folio, vma, target_node)) { /* determine batch to skip */ nr_ptes = mprotect_folio_pte_batch(folio, -- 2.27.0 The folio_needs_prot_numa() checks whether to need prot numa, which skips unsuitable folio, i.e. zone device, shared folios(ksm, CoW), non-movable dma pinned, dirty file folio and already numa affinity's folios, the policy should be applied to pmd folio too, which helps to avoid unnecessary pmd change and folio migration attempts. Reviewed-by: Sidhartha Kumar Signed-off-by: Kefeng Wang --- mm/huge_memory.c | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 2764613a9b3d..121c92f5c486 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2477,8 +2477,8 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, #endif if (prot_numa) { - struct folio *folio; - bool toptier; + int target_node = NUMA_NO_NODE; + /* * Avoid trapping faults against the zero page. The read-only * data is likely to be read-cached on the local CPU and @@ -2490,19 +2490,13 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, if (pmd_protnone(*pmd)) goto unlock; - folio = pmd_folio(*pmd); - toptier = node_is_toptier(folio_nid(folio)); - /* - * Skip scanning top tier node if normal numa - * balancing is disabled - */ - if (!(sysctl_numa_balancing_mode & NUMA_BALANCING_NORMAL) && - toptier) - goto unlock; + /* Get target node for single threaded private VMAs */ + if (!(vma->vm_flags & VM_SHARED) && + atomic_read(&vma->vm_mm->mm_users) == 1) + target_node = numa_node_id(); - if (folio_use_access_time(folio)) - folio_xchg_access_time(folio, - jiffies_to_msecs(jiffies)); + if (!folio_needs_prot_numa(pmd_folio(*pmd), vma, target_node)) + goto unlock; } /* * In case prot_numa, we are under mmap_read_lock(mm). It's critical -- 2.27.0