The 'swap' PMD case is confusing as only non-present entries (i.e. non-swap 'swap' entries) are valid at PMD level- currently migration entries and device private entries. We repeatedly perform checks of the form is_swap_pmd() || pmd_trans_huge() which is itself confusing - it implies that swap entries at PMD level exist and are different from huge entries. Address this confusion by introduced is_huge_pmd() which checks for either case. Sadly due to header dependency issues (huge_mm.h is included very early on in headers and cannot really rely on much else) we cannot assert here that is_pmd_non_present_folio_entry() is true for the input PMD if non-present. However since these are the only valid, handled cases the function is still achieving what it intends to do. We then replace all instances of is_swap_pmd() || pmd_trans_huge() with is_huge_pmd() invocations and adjust logic accordingly to accommodate this. Signed-off-by: Lorenzo Stoakes --- include/linux/huge_mm.h | 40 ++++++++++++++++++++++++++++++++++++---- mm/huge_memory.c | 3 ++- mm/memory.c | 4 ++-- mm/mprotect.c | 2 +- mm/mremap.c | 2 +- 5 files changed, 42 insertions(+), 9 deletions(-) diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 7698b3542c4f..892cb825dfc7 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -416,10 +416,37 @@ void reparent_deferred_split_queue(struct mem_cgroup *memcg); void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, unsigned long address, bool freeze); +/** + * is_huge_pmd() - Is this PMD either a huge PMD entry or a huge non-present + * entry? + * @pmd: The PMD to check. + * + * A huge PMD entry is a non-empty entry which is present and marked huge or a + * huge non-present entry. This check be performed without the appropriate locks + * held, in which case the condition should be rechecked after they are + * acquired. + * + * Returns: true if this PMD is huge, false otherwise. + */ +static inline bool is_huge_pmd(pmd_t pmd) +{ + if (pmd_present(pmd)) { + return pmd_trans_huge(pmd); + } else if (!pmd_none(pmd)) { + /* + * Non-present PMDs must be valid huge non-present entries. We + * cannot assert that here due to header dependency issues. + */ + return true; + } + + return false; +} + #define split_huge_pmd(__vma, __pmd, __address) \ do { \ pmd_t *____pmd = (__pmd); \ - if (is_swap_pmd(*____pmd) || pmd_trans_huge(*____pmd)) \ + if (is_huge_pmd(*____pmd)) \ __split_huge_pmd(__vma, __pmd, __address, \ false); \ } while (0) @@ -466,10 +493,10 @@ static inline int is_swap_pmd(pmd_t pmd) static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma) { - if (is_swap_pmd(*pmd) || pmd_trans_huge(*pmd)) + if (is_huge_pmd(*pmd)) return __pmd_trans_huge_lock(pmd, vma); - else - return NULL; + + return NULL; } static inline spinlock_t *pud_trans_huge_lock(pud_t *pud, struct vm_area_struct *vma) @@ -734,6 +761,11 @@ static inline struct folio *get_persistent_huge_zero_folio(void) { return NULL; } + +static inline bool is_huge_pmd(pmd_t pmd) +{ + return false; +} #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ static inline int split_folio_to_list_to_order(struct folio *folio, diff --git a/mm/huge_memory.c b/mm/huge_memory.c index a59718f85ec3..e0fe1c3e01c9 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2799,8 +2799,9 @@ int move_pages_huge_pmd(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd, pm spinlock_t *__pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma) { spinlock_t *ptl; + ptl = pmd_lock(vma->vm_mm, pmd); - if (likely(is_swap_pmd(*pmd) || pmd_trans_huge(*pmd))) + if (likely(is_huge_pmd(*pmd))) return ptl; spin_unlock(ptl); return NULL; diff --git a/mm/memory.c b/mm/memory.c index 83828548ef5f..cc163060933f 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1373,7 +1373,7 @@ copy_pmd_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma, src_pmd = pmd_offset(src_pud, addr); do { next = pmd_addr_end(addr, end); - if (is_swap_pmd(*src_pmd) || pmd_trans_huge(*src_pmd)) { + if (is_huge_pmd(*src_pmd)) { int err; VM_BUG_ON_VMA(next-addr != HPAGE_PMD_SIZE, src_vma); @@ -1921,7 +1921,7 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb, pmd = pmd_offset(pud, addr); do { next = pmd_addr_end(addr, end); - if (is_swap_pmd(*pmd) || pmd_trans_huge(*pmd)) { + if (is_huge_pmd(*pmd)) { if (next - addr != HPAGE_PMD_SIZE) __split_huge_pmd(vma, pmd, addr, false); else if (zap_huge_pmd(tlb, vma, pmd, addr)) { diff --git a/mm/mprotect.c b/mm/mprotect.c index e25ac9835cc2..c1d640758b60 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -522,7 +522,7 @@ static inline long change_pmd_range(struct mmu_gather *tlb, goto next; _pmd = pmdp_get_lockless(pmd); - if (is_swap_pmd(_pmd) || pmd_trans_huge(_pmd)) { + if (is_huge_pmd(_pmd)) { if ((next - addr != HPAGE_PMD_SIZE) || pgtable_split_needed(vma, cp_flags)) { __split_huge_pmd(vma, pmd, addr, false); diff --git a/mm/mremap.c b/mm/mremap.c index f01c74add990..070ba5a85824 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -850,7 +850,7 @@ unsigned long move_page_tables(struct pagetable_move_control *pmc) if (!new_pmd) break; again: - if (is_swap_pmd(*old_pmd) || pmd_trans_huge(*old_pmd)) { + if (is_huge_pmd(*old_pmd)) { if (extent == HPAGE_PMD_SIZE && move_pgt_entry(pmc, HPAGE_PMD, old_pmd, new_pmd)) continue; -- 2.51.0