The prot_numa_skip() naming is not good since it updates the folio access time except checking whether to skip prot NUMA, so rename it to folio_can_map_prot_numa(), and cleanup it a bit, remove ret by directly return value instead of goto style. Adding a new helper vma_is_single_threaded_private() to check whether it's a single threaded private VMA, and make folio_can_map_prot_numa() a non-static function so that they could be reused in change_huge_pmd(), since folio_can_map_prot_numa() will be shared in different paths, let's move it near change_prot_numa() in mempolicy.c. Acked-by: David Hildenbrand Reviewed-by: Lorenzo Stoakes Signed-off-by: Kefeng Wang --- mm/internal.h | 20 +++++++++++++++ mm/mempolicy.c | 61 +++++++++++++++++++++++++++++++++++++++++++++ mm/mprotect.c | 67 ++++---------------------------------------------- 3 files changed, 86 insertions(+), 62 deletions(-) diff --git a/mm/internal.h b/mm/internal.h index 6691d3ea55af..69a8442ed2d4 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -1399,6 +1399,26 @@ void vunmap_range_noflush(unsigned long start, unsigned long end); void __vunmap_range_noflush(unsigned long start, unsigned long end); +static inline bool vma_is_single_threaded_private(struct vm_area_struct *vma) +{ + if (vma->vm_flags & VM_SHARED) + return false; + + return atomic_read(&vma->vm_mm->mm_users) == 1; +} + +#ifdef CONFIG_NUMA_BALANCING +bool folio_can_map_prot_numa(struct folio *folio, struct vm_area_struct *vma, + bool is_private_single_threaded); + +#else +static inline bool folio_can_map_prot_numa(struct folio *folio, + struct vm_area_struct *vma, bool is_private_single_threaded) +{ + return false; +} +#endif + int numa_migrate_check(struct folio *folio, struct vm_fault *vmf, unsigned long addr, int *flags, bool writable, int *last_cpupid); diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 3d797d47a040..b633b3342dea 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -85,6 +85,7 @@ #include #include #include +#include #include #include #include @@ -99,6 +100,7 @@ #include #include #include +#include #include #include #include @@ -805,6 +807,65 @@ static int queue_folios_hugetlb(pte_t *pte, unsigned long hmask, } #ifdef CONFIG_NUMA_BALANCING +/** + * folio_can_map_prot_numa() - check whether the folio can map prot numa + * @folio: The folio whose mapping considered for being made NUMA hintable + * @vma: The VMA that the folio belongs to. + * @is_private_single_threaded: Is this a single-threaded private VMA or not + * + * This function checks to see if the folio actually indicates that + * we need to make the mapping one which causes a NUMA hinting fault, + * as there are cases where it's simply unnecessary, and the folio's + * access time is adjusted for memory tiering if prot numa needed. + * + * Return: True if the mapping of the folio needs to be changed, false otherwise. + */ +bool folio_can_map_prot_numa(struct folio *folio, struct vm_area_struct *vma, + bool is_private_single_threaded) +{ + int nid; + + if (!folio || folio_is_zone_device(folio) || folio_test_ksm(folio)) + return false; + + /* Also skip shared copy-on-write folios */ + if (is_cow_mapping(vma->vm_flags) && folio_maybe_mapped_shared(folio)) + return false; + + /* Folios are pinned and can't be migrated */ + if (folio_maybe_dma_pinned(folio)) + return false; + + /* + * While migration can move some dirty folios, + * it cannot move them all from MIGRATE_ASYNC + * context. + */ + if (folio_is_file_lru(folio) && folio_test_dirty(folio)) + return false; + + /* + * Don't mess with PTEs if folio is already on the node + * a single-threaded process is running on. + */ + nid = folio_nid(folio); + if (is_private_single_threaded && (nid == numa_node_id())) + return false; + + /* + * Skip scanning top tier node if normal numa + * balancing is disabled + */ + if (!(sysctl_numa_balancing_mode & NUMA_BALANCING_NORMAL) && + node_is_toptier(nid)) + return false; + + if (folio_use_access_time(folio)) + folio_xchg_access_time(folio, jiffies_to_msecs(jiffies)); + + return true; +} + /* * This is used to mark a range of virtual addresses to be inaccessible. * These are later cleared by a NUMA hinting fault. Depending on these diff --git a/mm/mprotect.c b/mm/mprotect.c index 6236d120c8e6..ab4e06cd9a69 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -29,9 +29,7 @@ #include #include #include -#include #include -#include #include #include #include @@ -118,60 +116,6 @@ static int mprotect_folio_pte_batch(struct folio *folio, pte_t *ptep, return folio_pte_batch_flags(folio, NULL, ptep, &pte, max_nr_ptes, flags); } -static bool prot_numa_skip(struct vm_area_struct *vma, int target_node, - struct folio *folio) -{ - bool ret = true; - bool toptier; - int nid; - - if (!folio) - goto skip; - - if (folio_is_zone_device(folio) || folio_test_ksm(folio)) - goto skip; - - /* Also skip shared copy-on-write folios */ - if (is_cow_mapping(vma->vm_flags) && folio_maybe_mapped_shared(folio)) - goto skip; - - /* Folios are pinned and can't be migrated */ - if (folio_maybe_dma_pinned(folio)) - goto skip; - - /* - * While migration can move some dirty pages, - * it cannot move them all from MIGRATE_ASYNC - * context. - */ - if (folio_is_file_lru(folio) && folio_test_dirty(folio)) - goto skip; - - /* - * Don't mess with PTEs if page is already on the node - * a single-threaded process is running on. - */ - nid = folio_nid(folio); - if (target_node == nid) - goto skip; - - toptier = node_is_toptier(nid); - - /* - * Skip scanning top tier node if normal numa - * balancing is disabled - */ - if (!(sysctl_numa_balancing_mode & NUMA_BALANCING_NORMAL) && toptier) - goto skip; - - ret = false; - if (folio_use_access_time(folio)) - folio_xchg_access_time(folio, jiffies_to_msecs(jiffies)); - -skip: - return ret; -} - /* Set nr_ptes number of ptes, starting from idx */ static void prot_commit_flush_ptes(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, pte_t oldpte, pte_t ptent, int nr_ptes, @@ -274,7 +218,7 @@ static long change_pte_range(struct mmu_gather *tlb, pte_t *pte, oldpte; spinlock_t *ptl; long pages = 0; - int target_node = NUMA_NO_NODE; + bool is_private_single_threaded; bool prot_numa = cp_flags & MM_CP_PROT_NUMA; bool uffd_wp = cp_flags & MM_CP_UFFD_WP; bool uffd_wp_resolve = cp_flags & MM_CP_UFFD_WP_RESOLVE; @@ -285,10 +229,8 @@ static long change_pte_range(struct mmu_gather *tlb, if (!pte) return -EAGAIN; - /* Get target node for single threaded private VMAs */ - if (prot_numa && !(vma->vm_flags & VM_SHARED) && - atomic_read(&vma->vm_mm->mm_users) == 1) - target_node = numa_node_id(); + if (prot_numa) + is_private_single_threaded = vma_is_single_threaded_private(vma); flush_tlb_batched_pending(vma->vm_mm); arch_enter_lazy_mmu_mode(); @@ -315,7 +257,8 @@ static long change_pte_range(struct mmu_gather *tlb, * pages. See similar comment in change_huge_pmd. */ if (prot_numa && - prot_numa_skip(vma, target_node, folio)) { + !folio_can_map_prot_numa(folio, vma, + is_private_single_threaded)) { /* determine batch to skip */ nr_ptes = mprotect_folio_pte_batch(folio, -- 2.27.0